Please start any new threads on our new site at https://forums.sqlteam.com. We've got lots of great SQL Server experts to answer whatever question you can come up with.

 All Forums
 SQL Server 2005 Forums
 Transact-SQL (2005)
 Damerau-Levenshtein-Distance (now working)

Author  Topic 

dms-distance
Starting Member

8 Posts

Posted - 2011-05-25 : 03:44:51
hi there,
recently i found a implementation of the damerau-levenshtein-distance in sql on http://blogs.geekdojo.net/nathan/archive/2010/01/22/2022299405.aspx
but i had to transform it to ms sql, so now it looks like this:
CREATE function dbo.DamerauLevenshteinDistance(@str1 varchar(48), @str2 varchar(48)) 
RETURNS int
AS
BEGIN
Declare @str1_len int,
@str2_len int

select @str1_len = len(@str1),
@str2_len = len(@str2)

if (@str1 IS NULL AND @str2 IS NULL)
return 0

if (@str1 IS null)
return @str2_len

if (@str2 IS null)
return @str1_len

-- matrix that stores cumulative scores as strings are traversed
DECLARE @i int,
@j int,
@cost int

declare @d table(i int NULL,j int NULL,value1 int NULL)

-- fill in scores for first row and column
SET @i = 0
WHILE (@i <= @str1_len)
BEGIN
INSERT into @d (i,j,value1) VALUES(@i,0,@i)
SET @i = @i+1
END --while
SET @j = 0

WHILE (@j <= @str2_len)
BEGIN
INSERT into @d (i,j,value1) VALUES(0,@j,@j)
SET @j = @j+1
END --while

DECLARE @newvalue int,
@x int,
@y int,
@z int,
@tcost int,
@a int,
@b int

SET @i = 1
WHILE (@i <= @str1_len)
BEGIN
SET @j = 1
WHILE (@j <= @str2_len)
BEGIN
if (SUBSTRING(@str1,@i,1) = SUBSTRING(@str2,@j,1))
set @cost = 0
else
set @cost = 1
-- addition, subtraction, substitution costs
set @x=(SELECT value1 + 1 from @d WHERE i=@i - 1 AND j=@j)
set @y=(SELECT value1 + 1 from @d WHERE i=@i AND j=@j-1)
set @z=(SELECT DISTINCT value1 from @d WHERE i=@i-1 AND j=@j-1)
set @z = @z + @cost
set @newvalue = dbo.Minimum(dbo.Minimum(@x, @y),@z)
INSERT into @d (i,j,value1) VALUES(@i,@j,@newvalue)
if (@i > 1 AND @j > 1 AND SUBSTRING(@str1,@i,1) = SUBSTRING
(@str2,@j - 1,1) AND SUBSTRING(@str1,@i - 1,1) = SUBSTRING(@str2,@j,1))
BEGIN
-- transposition cost
set @a=(SELECT value1 from @d WHERE i=@i AND j=@j)
set @b=(SELECT DISTINCT value1 from @d WHERE i=@i - 2 AND j=@j-2)
set @b = @b +@cost
set @tcost=dbo.Minimum(@a,@b)
Update @d SET value1 = @tcost WHERE i=@i AND j=@j
END
SET @j = @j+1
END --while
SET @i = @i+1
END --while
declare @distance int
set @distance =(select value1 from @d WHERE i=@str1_len AND j=@str2_len)
return @distance
END


now i get this error: subquery returned more than one value

so how may i get this function work?
thx in advance

edit: change the code

raghuveer125
Constraint Violating Yak Guru

285 Posts

Posted - 2011-05-25 : 04:15:10
You cant use temp Table in Function But you can yous variable Table

Like
Declare @d TABLE (i int NULL,j int NULL,value1 int NULL)

In Love... With Me!
Go to Top of Page

raghuveer125
Constraint Violating Yak Guru

285 Posts

Posted - 2011-05-25 : 04:16:25
Try this


--Declare @Table Table(i int, Fname nvarchar(23))
CREATE function dbo.DamerauLevenshteinDistance(@str1 varchar(48), @str2 varchar(48))
RETURNS int
AS
BEGIN
Declare @str1_len int,
@str2_len int

select @str1_len = len(@str1),
@str2_len = len(@str2)

if (@str1 IS NULL AND @str2 IS NULL)
return 0

if (@str1 IS null)
return @str2_len

if (@str2 IS null)
return @str1_len

-- matrix that stores cumulative scores as strings are traversed
DECLARE @i int,
@j int,
@cost int

Declare @d TABLE (i int NULL,j int NULL,value1 int NULL)



-- fill in scores for first row and column
SET @i = 0
WHILE (@i <= @str1_len)
BEGIN
INSERT into @d (i,j,value1) VALUES(@i,0,@i)
SET @i = @i+1
END --while
SET @j = 0

WHILE (@j <= @str2_len)
BEGIN
INSERT into @d (i,j,value1) VALUES(0,@j,@j)
SET @j = @j+1
END --while

DECLARE @newvalue int,
@x int,
@y int,
@z int,
@tcost int,
@a int,
@b int

SET @i = 1
WHILE (@i <= @str1_len)
BEGIN
SET @j = 1
WHILE (@j <= @str2_len)
BEGIN
if (SUBSTRING(@str1,@i,1) = SUBSTRING(@str2,@j,1))
set @cost = 0
else
set @cost = 1
-- addition, subtraction, substitution costs
set @x=(SELECT value1 + 1 from @d WHERE @i=@i - 1 AND @j=@j)
set @y=(SELECT value1 + 1 from @d WHERE @i=@i AND @j=@j-1)
set @z=(SELECT DISTINCT value1 from @d WHERE @i=@i-1 AND @j=@j-1)
set @z = @z + @cost
set @newvalue = dbo.Minimum(dbo.Minimum(@x, @y),@z)
INSERT into @d (i,j,value1) VALUES(@i,@j,@newvalue)
if (@i > 1 AND @j > 1 AND SUBSTRING(@str1,@i,1) = SUBSTRING
(@str2,@j - 1,1) AND SUBSTRING(@str1,@i - 1,1) = SUBSTRING(@str2,@j,1))
BEGIN
-- transposition cost
set @a=(SELECT value1 from @d WHERE @i=@i AND @j=@j)
set @b=(SELECT DISTINCT value1 from @d WHERE @i=@i - 2 AND @j=@j-2)
set @b = @b +@cost
set @tcost=dbo.Minimum(@a,@b)
Update @d SET value1 = @tcost WHERE @i=@i AND @j=@j
END
SET @j = @j+1
END --while
SET @i = @i+1
END --while
declare @distance int
set @distance =(select value1 from @d WHERE i=@str1_len AND j=@str2_len)
return @distance
END

In Love... With Me!
Go to Top of Page

dms-distance
Starting Member

8 Posts

Posted - 2011-05-25 : 04:19:22
hi tanks, for your help! now i get an error by running this function. i changed the first post.

subquery returned more than one value.

but i only want to get back the distance as int
Go to Top of Page

raghuveer125
Constraint Violating Yak Guru

285 Posts

Posted - 2011-05-25 : 05:06:28
Is this your function

dbo.Minimum

In Love... With Me!
Go to Top of Page

dms-distance
Starting Member

8 Posts

Posted - 2011-05-25 : 05:10:54
quote:
Originally posted by raghuveer125

Is this your function

dbo.Minimum

In Love... With Me!



hi, this is the function minimum:
CREATE FUNCTION dbo.Minimum(@val1 float, @val2 float) 
RETURNS Float
AS
BEGIN
IF (@val1 < @val2)
RETURN @val1
RETURN @val2
END


is this the reason? so how can i fix this problem?
Go to Top of Page

Seventhnight
Master Smack Fu Yak Hacker

2878 Posts

Posted - 2011-05-25 : 06:01:50
I think this has already been done here also... http://www.sqlteam.com/forums/topic.asp?TOPIC_ID=51540

Corey

I Has Returned!!
Go to Top of Page

dms-distance
Starting Member

8 Posts

Posted - 2011-05-25 : 06:04:09
quote:
Originally posted by Seventhnight

I think this has already been done here also... http://www.sqlteam.com/forums/topic.asp?TOPIC_ID=51540

Corey

I Has Returned!!



i think this is only the levenshtein-distance
Go to Top of Page

dms-distance
Starting Member

8 Posts

Posted - 2011-05-26 : 02:57:31
edit: i think i works correctly now:
CREATE function dbo.DamerauLevenshteinDistance(@str1 varchar(48), @str2 varchar(48)) 
RETURNS int
AS
begin
Declare @str1_len int,
@str2_len int,
@distance int

select @str1_len = len(@str1),
@str2_len = len(@str2)

if ((@str1 IS NULL OR @str1 = '') AND (@str2 IS NULL OR @str2 = ''))
--return 0
set @distance = 0

if (@str1 IS NULL OR @str1 = '')
--return @str2_len
set @distance = @str2_len

if (@str2 IS NULL OR @str2 = '')
--return @str1_len
set @distance = @str1_len

if ((@str1 IS NOT NULL OR @str1 <> '') AND (@str2 IS NOT NULL OR @str2 <> ''))
begin
-- matrix that stores cumulative scores as strings are traversed
Declare @i int,
@j int,
@cost int

Declare @d table(i int NULL,j int NULL,value1 int NULL)

-- fill in scores for first row and column
set @i = 0
while (@i <= @str1_len)
begin
insert into @d (i,j,value1) values(@i,0,@i)
set @i = @i+1
end --while
set @j = 0

while (@j <= @str2_len)
begin
insert into @d (i,j,value1) values(0,@j,@j)
set @j = @j+1
end --while

Declare @newvalue int,
@x int,
@y int,
@z int,
@tcost int,
@a int,
@b int

set @i = 1
while (@i <= @str1_len)
begin
set @j = 1
while (@j <= @str2_len)
begin
if (substring(@str1,@i,1) = substring(@str2,@j,1))
set @cost = 0
else
set @cost = 1
-- addition, subtraction, substitution costs
set @x=(select value1 + 1 from @d where i=@i - 1 AND j=@j)
set @y=(select value1 + 1 from @d where i=@i AND j=@j-1)
set @z=(select distinct value1 from @d where i=@i-1 AND j=@j-1)
set @z = @z + @cost
set @newvalue = dbo.Minimum(dbo.Minimum(@x, @y),@z)
insert into @d (i,j,value1) values(@i,@j,@newvalue)
if (@i > 1 AND @j > 1 AND substring(@str1,@i,1) = substring
(@str2,@j - 1,1) AND substring(@str1,@i - 1,1) = substring(@str2,@j,1))
begin
-- transposition cost
set @a=(select value1 from @d where i=@i AND j=@j)
set @b=(select distinct value1 from @d where i=@i - 2 AND j=@j-2)
set @b = @b +@cost
set @tcost=dbo.Minimum(@a,@b)
Update @d set value1 = @tcost where i=@i AND j=@j
end
set @j = @j+1
end --while
set @i = @i+1

end --while
set @distance =(select value1 from @d where i=@str1_len AND j=@str2_len)
end

return @distance
end
Go to Top of Page

mmoschel
Starting Member

1 Post

Posted - 2013-11-15 : 10:34:08
Hi Team,

Shouldn't the following return 2 instead of 3? Is anyone else seeing a result of 3 for this function?

SELECT DBO.DamerauLevenshteinDistance('CA','ABC')

Thanks in advance,
Go to Top of Page

SwePeso
Patron Saint of Lost Yaks

30421 Posts

Posted - 2013-11-16 : 08:02:01
Do it with SQLCLR
http://sourceforge.net/projects/simmetrics/



Microsoft SQL Server MVP, MCT, MCSE, MCSA, MCP, MCITP, MCTS, MCDBA
Go to Top of Page

ybayoglu
Starting Member

1 Post

Posted - 2014-01-09 : 10:49:28
quote:
Originally posted by mmoschel

Hi Team,

Shouldn't the following return 2 instead of 3? Is anyone else seeing a result of 3 for this function?

SELECT DBO.DamerauLevenshteinDistance('CA','ABC')

Thanks in advance,



i think you consider first transpose 'CA' to 'AC' and then insert B to between A and C; but you can not mix transpose operation and insertation. thats why 3 is corret result.
somebody please warn me, if i am wrong.
Go to Top of Page
   

- Advertisement -