-- This table contains the tags to be replaced. The % in <head%>
-- will take care of any extra information in the tag that you needn't worry
-- about as a whole. In any case, this table contains all the tags that needs
-- to be search & replaced.
create table #html ( tag varchar(30) )
insert #html values ( '<html>' )
insert #html values ( '<head%>' )
insert #html values ( '<title%>' )
insert #html values ( '<link%>' )
insert #html values ( '</title>' )
insert #html values ( '</head>' )
insert #html values ( '<body%>' )
insert #html values ( '</html>' )
go
-- A simple table with the HTML strings
create table #t ( id tinyint identity , string varchar(255) )
insert #t values (
-- '<HTML><HEAD><TITLE>Some Name</TITLE>
<LINK REL="stylesheet" HREF="/style.css" TYPE="text/css" ></HEAD>
<BODY BGCOLOR="FFFFFF" VLINK="#444444">
Some HTML text after the body</HTML>'
)
insert #t values (
'<HTML><HEAD><TITLE>Another Name</TITLE>
--
<LINK REL="stylesheet" HREF="/style.css"></HEAD>
<BODY BGCOLOR="FFFFFF" VLINK="#444444">Another HTML text after the body</HTML>'
)
go
-- This is the code to strip the tags out.
-- It finds the starting location of each tag in the HTML string ,
-- finds the length of the tag with the extra properties if any. This is
-- done by locating the end of the tag namely '>'. The same is done
-- in a loop till all tags are replaced.
begin tran
while exists(select * from #t join #html on patindex('%' + tag + '%' , string ) > 0 )
update #t
set string = stuff( string , patindex('%' + tag + '%' , string ) ,
charindex( '>' , string , patindex('%' + tag + '%' , string ) )
- patindex('%' + tag + '%' , string ) + 1 , '' )
from #t join #html
on patindex('%' + tag + '%' , string ) > 0
select * from #t
rollback