SQL Server Forums
Profile | Register | Active Topics | Members | Search | Forum FAQ
 
Register Now and get your question answered!
Username:
Password:
Save Password
Forgot your Password?

 All Forums
 General SQL Server Forums
 Script Library
 Strip those RTF tags away
 New Topic  Reply to Topic
 Printer Friendly
Author Previous Topic Topic Next Topic  

SwePeso
Patron Saint of Lost Yaks

Sweden
30281 Posts

Posted - 09/26/2007 :  12:16:08  Show Profile  Visit SwePeso's Homepage  Reply with Quote
This algorithm can be used to strip out HTML tags too.
With reference to http://www.sqlteam.com/forums/topic.asp?TOPIC_ID=89973
and http://www.sqlteam.com/forums/topic.asp?TOPIC_ID=90000
CREATE FUNCTION	dbo.fnParseRTF
(
	@rtf VARCHAR(8000)
)
RETURNS VARCHAR(8000)
AS
BEGIN
	DECLARE	@Stage TABLE
		(
			Chr CHAR(1),
			Pos INT
		)

	INSERT		@Stage
			(
				Chr,
				Pos
			)
	SELECT		SUBSTRING(@rtf, Number, 1),
			Number
	FROM		master..spt_values
	WHERE		Type = 'p'
			AND SUBSTRING(@rtf, Number, 1) IN ('{', '}')

	DECLARE	@Pos1 INT,
		@Pos2 INT

	SELECT	@Pos1 = MIN(Pos),
		@Pos2 = MAX(Pos)
	FROM	@Stage

	DELETE
	FROM	@Stage
	WHERE	Pos IN (@Pos1, @Pos2)

	WHILE 1 = 1
		BEGIN
			SELECT TOP 1	@Pos1 = s1.Pos,
					@Pos2 = s2.Pos
			FROM		@Stage AS s1
			INNER JOIN	@Stage AS s2 ON s2.Pos > s1.Pos
			WHERE		s1.Chr = '{'
					AND s2.Chr = '}'
			ORDER BY	s2.Pos - s1.Pos

			IF @@ROWCOUNT = 0
				BREAK

			DELETE
			FROM	@Stage
			WHERE	Pos IN (@Pos1, @Pos2)

			UPDATE	@Stage
			SET	Pos = Pos - @Pos2 + @Pos1 - 1
			WHERE	Pos > @Pos2

			SET @rtf = STUFF(@rtf, @Pos1, @Pos2 - @Pos1 + 1, '')
		END

	SET	@Pos1 = PATINDEX('%\cf[0123456789][0123456789 ]%', @rtf)

	WHILE @Pos1 > 0
		SELECT	@Pos2 = CHARINDEX(' ', @rtf, @Pos1 + 1),
			@rtf = STUFF(@rtf, @Pos1, @Pos2 - @Pos1 + 1, ''),
			@Pos1 = PATINDEX('%\cf[0123456789][0123456789 ]%', @rtf)

	SELECT	@rtf = REPLACE(@rtf, '\pard', ''),
		@rtf = REPLACE(@rtf, '\par', ''),
		@rtf = LEFT(@rtf, LEN(@rtf) - 1)

	SELECT	@rtf = REPLACE(@rtf, '\b0 ', ''),
		@rtf = REPLACE(@rtf, '\b ', '')

	SELECT	@rtf = STUFF(@rtf, 1, CHARINDEX(' ', @rtf), '')

	RETURN	@rtf
END

E 12°55'05.25"
N 56°04'39.16"

Edited by - SwePeso on 09/26/2007 12:16:42

SwePeso
Patron Saint of Lost Yaks

Sweden
30281 Posts

Posted - 09/26/2007 :  12:16:25  Show Profile  Visit SwePeso's Homepage  Reply with Quote
DECLARE	@Sample TABLE (ID INT, data VARCHAR(8000))

INSERT	@Sample
SELECT	1, '{\rtf1\ansi\ansicpg1252\deff0\deflang1053{\fonttbl{\f0\fswiss\fcharset0 Arial;}}
{\colortbl ;\red255\green0\blue0;\red0\green128\blue0;\red0\green0\blue255;}
{\*\generator Msftedit 5.41.15.1507;}\viewkind4\uc1\pard\f0\fs20 P\cf1 e\cf0 ter\par
\cf2 L\cf0 ar\b s\b0 s\cf3 o\cf0 n\par
}' UNION ALL
SELECT	2, '{\rtf1\ansi\deff0{\fonttbl{\f0\fnil\fcharset0 Tahoma;}}
{\colortbl ;\red0\green0\blue0;}
{\*\generator Riched20 5.50.99.2014;}\viewkind4\uc1\pard\cf1\f0\fs18\lang1033 ~200 LF streambank stabilization to provide structural protection\par}'

SELECT	ID,
	dbo.fnParseRTF(data)
FROM	@Sample

E 12°55'05.25"
N 56°04'39.16"

Edited by - SwePeso on 09/26/2007 12:17:00
Go to Top of Page

Kristen
Test

United Kingdom
22431 Posts

Posted - 09/26/2007 :  15:13:40  Show Profile  Reply with Quote
Blimey; Noble effort mate!

I fiddled about for 10 minutes and decided I had better things to do with my time!

Here's a pathetic example with extended charters in it(some "pretty" quotes) for you to play with some more ...

UNION ALL
SELECT	3, '{\rtf1\ansi\ansicpg1252\deff0\deflang1033{\fonttbl{\f0\fswiss\fcharset0 Arial;}{\f1\froman\fcharset0 Times New Roman;}}
\viewkind4\uc1\pard\f0\fs20 This is an \lang2057\f1\fs24\ldblquote quoted text\rdblquote  of pretty-quotes\lang1033\f0\fs20\par
}'

Kristen

Edited by - Kristen on 09/26/2007 15:15:36
Go to Top of Page

SwePeso
Patron Saint of Lost Yaks

Sweden
30281 Posts

Posted - 10/25/2007 :  06:46:52  Show Profile  Visit SwePeso's Homepage  Reply with Quote
Oh well... Found this on another forum
create function dbo.RTF2TXT(@in varchar(8000)) RETURNS varchar(8000) AS 
BEGIN

DECLARE @object int
DECLARE @hr int
DECLARE @out varchar(8000)

-- Create an object that points to the SQL Server
EXEC @hr = sp_OACreate 'RICHTEXT.RichtextCtrl', @object OUT
EXEC @hr = sp_OASetProperty @object, 'TextRTF', @in
EXEC @hr = sp_OAGetProperty @object, 'Text', @out OUT
EXEC @hr = sp_OADestroy @object
return @out

END
GO

Mladen, maybe a possible project for you to do in a CLR?



E 12°55'05.25"
N 56°04'39.16"
Go to Top of Page

Kristen
Test

United Kingdom
22431 Posts

Posted - 10/26/2007 :  05:49:14  Show Profile  Reply with Quote
Hahaha ... get the COM object to do it all for you, eh?!
Go to Top of Page

spirit1
Cybernetic Yak Master

Slovenia
11751 Posts

Posted - 10/26/2007 :  05:57:59  Show Profile  Visit spirit1's Homepage  Reply with Quote
I'm still waiting on that salesman...

_______________________________________________
Causing trouble since 1980
blog: http://weblogs.sqlteam.com/mladenp
SSMS Add-in that does a few things: www.ssmstoolspack.com
Go to Top of Page

SwePeso
Patron Saint of Lost Yaks

Sweden
30281 Posts

Posted - 10/26/2007 :  06:19:58  Show Profile  Visit SwePeso's Homepage  Reply with Quote
Start with this.
I am interested to know how hard it is to write and compile.
And then testing speed.



E 12°55'05.25"
N 56°04'39.16"
Go to Top of Page

spirit1
Cybernetic Yak Master

Slovenia
11751 Posts

Posted - 10/26/2007 :  07:50:10  Show Profile  Visit spirit1's Homepage  Reply with Quote
give me a rtf sample.

_______________________________________________
Causing trouble since 1980
blog: http://weblogs.sqlteam.com/mladenp
SSMS Add-in that does a few things: www.ssmstoolspack.com
Go to Top of Page

yosiasz
Flowing Fount of Yak Knowledge

USA
1635 Posts

Posted - 04/08/2009 :  10:17:04  Show Profile  Click to see yosiasz's MSN Messenger address  Reply with Quote
Wow,

I know this post is old but I have been looking for this for ages! Now the project I am working on has rtf with non Roman/non Latin script so the results are coming out with ?????????? How I can get around that. I could dump the data to Microsoft Word with the Font I need and maybe I might be able to see it there?

Any help would be much appreciated. Let me know if I need to create a new thread for this.

Thanks

<><><><><><><><><><><><><><><><><><><><><><><><><>
If you don't have the passion to help people, you have no passion
Go to Top of Page

stevebo
Starting Member

1 Posts

Posted - 02/23/2010 :  13:52:50  Show Profile  Reply with Quote
For those of you getting an error with this script, it doesn't handle nulls well. Try adding this code

BEGIN

if @rtf is null or @rtf = ''
return ''


DECLARE @Stage TABLE

Also, the script is pretty good, but doesn't do so well with trailing rtf tags. I don't have the life force (or knowledge) to fix it though.
Go to Top of Page

onetrue
Starting Member

1 Posts

Posted - 03/01/2013 :  10:16:35  Show Profile  Reply with Quote
there is a tiny bug if the @rtf is with zero length
here is the correct:

drop FUNCTION dbo.fnParseRTF
go
CREATE FUNCTION dbo.fnParseRTF
(
@rtf VARCHAR(8000)
)
RETURNS VARCHAR(8000)
AS
BEGIN


DECLARE @Stage TABLE
(
Chr CHAR(1),
Pos INT
)

INSERT @Stage
(
Chr,
Pos
)
SELECT SUBSTRING(@rtf, Number, 1),
Number
FROM master..spt_values
WHERE Type = 'p'
AND SUBSTRING(@rtf, Number, 1) IN ('{', '}')

DECLARE @Pos1 INT,
@Pos2 INT

SELECT @Pos1 = MIN(Pos),
@Pos2 = MAX(Pos)
FROM @Stage

DELETE
FROM @Stage
WHERE Pos IN (@Pos1, @Pos2)

WHILE 1 = 1
BEGIN
SELECT TOP 1 @Pos1 = s1.Pos, @Pos2 = s2.Pos
FROM @Stage AS s1
INNER JOIN @Stage AS s2 ON s2.Pos > s1.Pos
WHERE s1.Chr = '{'
AND s2.Chr = '}'
ORDER BY s2.Pos - s1.Pos

IF @@ROWCOUNT = 0
BREAK

DELETE
FROM @Stage
WHERE Pos IN (@Pos1, @Pos2)

UPDATE @Stage
SET Pos = Pos - @Pos2 + @Pos1 - 1
WHERE Pos > @Pos2

SET @rtf = STUFF(@rtf, @Pos1, @Pos2 - @Pos1 + 1, '')
END

SET @Pos1 = PATINDEX('%\cf[0123456789][0123456789 ]%', @rtf)

WHILE @Pos1 > 0
SELECT @Pos2 = CHARINDEX(' ', @rtf, @Pos1 + 1), @rtf = STUFF(@rtf, @Pos1, @Pos2 - @Pos1 + 1, ''), @Pos1 = PATINDEX('%\cf[0123456789][0123456789 ]%', @rtf)

SELECT @rtf = REPLACE(@rtf, '\pard', ''), @rtf = REPLACE(@rtf, '\par', ''), @rtf = case when LEN(@rtf)>0 then LEFT(@rtf, LEN(@rtf) - 1) else @rtf end

SELECT @rtf = REPLACE(@rtf, '\b0 ', ''), @rtf = REPLACE(@rtf, '\b ', '')

SELECT @rtf = STUFF(@rtf, 1, CHARINDEX(' ', @rtf), '')


RETURN @rtf
end
Go to Top of Page
  Previous Topic Topic Next Topic  
 New Topic  Reply to Topic
 Printer Friendly
Jump To:
SQL Server Forums © 2000-2009 SQLTeam Publishing, LLC Go To Top Of Page
This page was generated in 0.08 seconds. Powered By: Snitz Forums 2000