SQL Server Forums
Profile | Register | Active Topics | Members | Search | Forum FAQ
 
Register Now and get your question answered!
Username:
Password:
Save Password
Forgot your Password?

 All Forums
 SQL Server 2005 Forums
 Transact-SQL (2005)
 Damerau-Levenshtein-Distance (now working)
 New Topic  Reply to Topic
 Printer Friendly
Author Previous Topic Topic Next Topic  

dms-distance
Starting Member

8 Posts

Posted - 05/25/2011 :  03:44:51  Show Profile  Reply with Quote
hi there,
recently i found a implementation of the damerau-levenshtein-distance in sql on http://blogs.geekdojo.net/nathan/archive/2010/01/22/2022299405.aspx
but i had to transform it to ms sql, so now it looks like this:
CREATE function dbo.DamerauLevenshteinDistance(@str1 varchar(48), @str2 varchar(48)) 
RETURNS int 
AS 
BEGIN 
Declare @str1_len        int, 
        @str2_len        int 

select        @str1_len        = len(@str1), 
              @str2_len        = len(@str2) 

if (@str1 IS NULL AND @str2 IS NULL) 
return 0 

if (@str1 IS null) 
return @str2_len 

if (@str2 IS null) 
return @str1_len 

-- matrix that stores cumulative scores as strings are traversed 
DECLARE @i int,                 
                @j int, 
                @cost int 

declare @d table(i int NULL,j int NULL,value1 int NULL) 

-- fill in scores for first row and column 
SET @i = 0 
WHILE (@i <= @str1_len) 
        BEGIN 
                INSERT into @d (i,j,value1) VALUES(@i,0,@i) 
                SET @i = @i+1 
        END --while 
SET @j = 0 

WHILE (@j <= @str2_len) 
        BEGIN 
                INSERT into @d (i,j,value1) VALUES(0,@j,@j) 
                SET @j = @j+1 
        END --while 

DECLARE @newvalue int, 
                @x int, 
                @y int, 
                @z int, 
                @tcost int, 
                @a int, 
                @b int 

SET @i = 1 
WHILE (@i <= @str1_len) 
        BEGIN 
                SET @j = 1 
                WHILE (@j <= @str2_len) 
                        BEGIN 
                                if (SUBSTRING(@str1,@i,1) = SUBSTRING(@str2,@j,1)) 
                                        set @cost = 0 
                                else 
                                        set @cost = 1 
                                -- addition, subtraction, substitution costs 
                                set @x=(SELECT value1 + 1 from @d WHERE i=@i - 1 AND j=@j) 
                                set @y=(SELECT value1 + 1 from @d WHERE i=@i AND j=@j-1) 
                                set @z=(SELECT DISTINCT value1 from @d WHERE i=@i-1 AND j=@j-1) 
                                set @z = @z + @cost 
                                set @newvalue = dbo.Minimum(dbo.Minimum(@x, @y),@z) 
                                INSERT into @d (i,j,value1) VALUES(@i,@j,@newvalue) 
                                if (@i > 1 AND @j > 1 AND SUBSTRING(@str1,@i,1) = SUBSTRING 
                                (@str2,@j - 1,1) AND SUBSTRING(@str1,@i - 1,1) = SUBSTRING(@str2,@j,1)) 
                                        BEGIN 
                                                -- transposition cost 
                                                set @a=(SELECT value1 from @d WHERE i=@i AND j=@j) 
                                                set @b=(SELECT DISTINCT value1 from @d WHERE i=@i - 2 AND j=@j-2) 
                                                set @b = @b +@cost 
                                                set @tcost=dbo.Minimum(@a,@b) 
                                                Update @d SET value1 = @tcost WHERE i=@i AND j=@j 
                                        END 
                                SET @j = @j+1 
                        END --while 
                SET @i = @i+1 
        END --while 
declare @distance int 
        set @distance =(select value1 from @d WHERE i=@str1_len AND j=@str2_len) 
return @distance 
END 


now i get this error: subquery returned more than one value

so how may i get this function work?
thx in advance

edit: change the code

Edited by - dms-distance on 05/26/2011 05:10:58

raghuveer125
Constraint Violating Yak Guru

India
283 Posts

Posted - 05/25/2011 :  04:15:10  Show Profile  Reply with Quote
You cant use temp Table in Function But you can yous variable Table

Like
Declare @d TABLE (i int NULL,j int NULL,value1 int NULL)

In Love... With Me!
Go to Top of Page

raghuveer125
Constraint Violating Yak Guru

India
283 Posts

Posted - 05/25/2011 :  04:16:25  Show Profile  Reply with Quote
Try this


--Declare @Table Table(i int, Fname nvarchar(23))
CREATE function dbo.DamerauLevenshteinDistance(@str1 varchar(48), @str2 varchar(48))
RETURNS int
AS
BEGIN
Declare @str1_len int,
@str2_len int

select @str1_len = len(@str1),
@str2_len = len(@str2)

if (@str1 IS NULL AND @str2 IS NULL)
return 0

if (@str1 IS null)
return @str2_len

if (@str2 IS null)
return @str1_len

-- matrix that stores cumulative scores as strings are traversed
DECLARE @i int,
@j int,
@cost int

Declare @d TABLE (i int NULL,j int NULL,value1 int NULL)



-- fill in scores for first row and column
SET @i = 0
WHILE (@i <= @str1_len)
BEGIN
INSERT into @d (i,j,value1) VALUES(@i,0,@i)
SET @i = @i+1
END --while
SET @j = 0

WHILE (@j <= @str2_len)
BEGIN
INSERT into @d (i,j,value1) VALUES(0,@j,@j)
SET @j = @j+1
END --while

DECLARE @newvalue int,
@x int,
@y int,
@z int,
@tcost int,
@a int,
@b int

SET @i = 1
WHILE (@i <= @str1_len)
BEGIN
SET @j = 1
WHILE (@j <= @str2_len)
BEGIN
if (SUBSTRING(@str1,@i,1) = SUBSTRING(@str2,@j,1))
set @cost = 0
else
set @cost = 1
-- addition, subtraction, substitution costs
set @x=(SELECT value1 + 1 from @d WHERE @i=@i - 1 AND @j=@j)
set @y=(SELECT value1 + 1 from @d WHERE @i=@i AND @j=@j-1)
set @z=(SELECT DISTINCT value1 from @d WHERE @i=@i-1 AND @j=@j-1)
set @z = @z + @cost
set @newvalue = dbo.Minimum(dbo.Minimum(@x, @y),@z)
INSERT into @d (i,j,value1) VALUES(@i,@j,@newvalue)
if (@i > 1 AND @j > 1 AND SUBSTRING(@str1,@i,1) = SUBSTRING
(@str2,@j - 1,1) AND SUBSTRING(@str1,@i - 1,1) = SUBSTRING(@str2,@j,1))
BEGIN
-- transposition cost
set @a=(SELECT value1 from @d WHERE @i=@i AND @j=@j)
set @b=(SELECT DISTINCT value1 from @d WHERE @i=@i - 2 AND @j=@j-2)
set @b = @b +@cost
set @tcost=dbo.Minimum(@a,@b)
Update @d SET value1 = @tcost WHERE @i=@i AND @j=@j
END
SET @j = @j+1
END --while
SET @i = @i+1
END --while
declare @distance int
set @distance =(select value1 from @d WHERE i=@str1_len AND j=@str2_len)
return @distance
END

In Love... With Me!
Go to Top of Page

dms-distance
Starting Member

8 Posts

Posted - 05/25/2011 :  04:19:22  Show Profile  Reply with Quote
hi tanks, for your help! now i get an error by running this function. i changed the first post.

subquery returned more than one value.

but i only want to get back the distance as int

Edited by - dms-distance on 05/25/2011 04:20:53
Go to Top of Page

raghuveer125
Constraint Violating Yak Guru

India
283 Posts

Posted - 05/25/2011 :  05:06:28  Show Profile  Reply with Quote
Is this your function

dbo.Minimum

In Love... With Me!
Go to Top of Page

dms-distance
Starting Member

8 Posts

Posted - 05/25/2011 :  05:10:54  Show Profile  Reply with Quote
quote:
Originally posted by raghuveer125

Is this your function

dbo.Minimum

In Love... With Me!



hi, this is the function minimum:
CREATE FUNCTION dbo.Minimum(@val1 float, @val2 float) 
RETURNS Float 
AS 
BEGIN 
    IF (@val1 < @val2) 
        RETURN @val1
    RETURN @val2
END


is this the reason? so how can i fix this problem?
Go to Top of Page

Seventhnight
Flowing Fount of Yak Knowledge

USA
2878 Posts

Posted - 05/25/2011 :  06:01:50  Show Profile  Visit Seventhnight's Homepage  Reply with Quote
I think this has already been done here also... http://www.sqlteam.com/forums/topic.asp?TOPIC_ID=51540

Corey

I Has Returned!!
Go to Top of Page

dms-distance
Starting Member

8 Posts

Posted - 05/25/2011 :  06:04:09  Show Profile  Reply with Quote
quote:
Originally posted by Seventhnight

I think this has already been done here also... http://www.sqlteam.com/forums/topic.asp?TOPIC_ID=51540

Corey

I Has Returned!!



i think this is only the levenshtein-distance
Go to Top of Page

dms-distance
Starting Member

8 Posts

Posted - 05/26/2011 :  02:57:31  Show Profile  Reply with Quote
edit: i think i works correctly now:
CREATE function dbo.DamerauLevenshteinDistance(@str1 varchar(48), @str2 varchar(48)) 
RETURNS int 
AS 
begin 
Declare @str1_len        int, 
        @str2_len        int, 
        @distance int 

select        @str1_len        = len(@str1), 
              @str2_len        = len(@str2) 

if ((@str1 IS NULL OR @str1 = '') AND (@str2 IS NULL OR @str2 = '')) 
--return 0 
set @distance = 0 

if (@str1 IS NULL OR @str1 = '') 
--return @str2_len 
set @distance = @str2_len 

if (@str2 IS NULL OR @str2 = '') 
--return @str1_len 
set @distance = @str1_len 

if ((@str1 IS NOT NULL OR @str1 <> '') AND (@str2 IS NOT NULL OR @str2 <> '')) 
begin 
        -- matrix that stores cumulative scores as strings are traversed 
        Declare @i int,                 
                        @j int, 
                        @cost int 

        Declare @d table(i int NULL,j int NULL,value1 int NULL) 

        -- fill in scores for first row and column 
        set @i = 0 
        while (@i <= @str1_len) 
                begin 
                        insert into @d (i,j,value1) values(@i,0,@i) 
                        set @i = @i+1 
                end --while 
        set @j = 0 

        while (@j <= @str2_len) 
                begin 
                        insert into @d (i,j,value1) values(0,@j,@j) 
                        set @j = @j+1 
                end --while 

        Declare @newvalue int, 
                        @x int, 
                        @y int, 
                        @z int, 
                        @tcost int, 
                        @a int, 
                        @b int 

        set @i = 1 
        while (@i <= @str1_len) 
                begin 
                        set @j = 1 
                        while (@j <= @str2_len) 
                                begin 
                                        if (substring(@str1,@i,1) = substring(@str2,@j,1)) 
                                                set @cost = 0 
                                        else 
                                                set @cost = 1 
                                        -- addition, subtraction, substitution costs 
                                        set @x=(select value1 + 1 from @d where i=@i - 1 AND j=@j) 
                                        set @y=(select value1 + 1 from @d where i=@i AND j=@j-1) 
                                        set @z=(select distinct value1 from @d where i=@i-1 AND j=@j-1) 
                                        set @z = @z + @cost 
                                        set @newvalue = dbo.Minimum(dbo.Minimum(@x, @y),@z) 
                                        insert into @d (i,j,value1) values(@i,@j,@newvalue) 
                                        if (@i > 1 AND @j > 1 AND substring(@str1,@i,1) = substring 
                                        (@str2,@j - 1,1) AND substring(@str1,@i - 1,1) = substring(@str2,@j,1)) 
                                                begin 
                                                        -- transposition cost 
                                                        set @a=(select value1 from @d where i=@i AND j=@j) 
                                                        set @b=(select distinct value1 from @d where i=@i - 2 AND j=@j-2) 
                                                        set @b = @b +@cost 
                                                        set @tcost=dbo.Minimum(@a,@b) 
                                                        Update @d set value1 = @tcost where i=@i AND j=@j 
                                                end 
                                        set @j = @j+1 
                                end --while 
                        set @i = @i+1 

                        end --while 
        set @distance =(select value1 from @d where i=@str1_len AND j=@str2_len) 
end 

return        @distance 
end 

Edited by - dms-distance on 05/26/2011 05:10:38
Go to Top of Page

mmoschel
Starting Member

USA
1 Posts

Posted - 11/15/2013 :  10:34:08  Show Profile  Reply with Quote
Hi Team,

Shouldn't the following return 2 instead of 3? Is anyone else seeing a result of 3 for this function?

SELECT DBO.DamerauLevenshteinDistance('CA','ABC')

Thanks in advance,
Go to Top of Page

SwePeso
Patron Saint of Lost Yaks

Sweden
29910 Posts

Posted - 11/16/2013 :  08:02:01  Show Profile  Visit SwePeso's Homepage  Reply with Quote
Do it with SQLCLR
http://sourceforge.net/projects/simmetrics/



Microsoft SQL Server MVP, MCT, MCSE, MCSA, MCP, MCITP, MCTS, MCDBA

Edited by - SwePeso on 11/16/2013 08:02:18
Go to Top of Page

ybayoglu
Starting Member

Turkey
1 Posts

Posted - 01/09/2014 :  10:49:28  Show Profile  Reply with Quote
quote:
Originally posted by mmoschel

Hi Team,

Shouldn't the following return 2 instead of 3? Is anyone else seeing a result of 3 for this function?

SELECT DBO.DamerauLevenshteinDistance('CA','ABC')

Thanks in advance,



i think you consider first transpose 'CA' to 'AC' and then insert B to between A and C; but you can not mix transpose operation and insertation. thats why 3 is corret result.
somebody please warn me, if i am wrong.
Go to Top of Page
  Previous Topic Topic Next Topic  
 New Topic  Reply to Topic
 Printer Friendly
Jump To:
SQL Server Forums © 2000-2009 SQLTeam Publishing, LLC Go To Top Of Page
This page was generated in 0.16 seconds. Powered By: Snitz Forums 2000