aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fuzzel.lua104
-rw-r--r--fuzzel_min.lua2
2 files changed, 55 insertions, 51 deletions
diff --git a/fuzzel.lua b/fuzzel.lua
index fc6d10d..664e8fc 100644
--- a/fuzzel.lua
+++ b/fuzzel.lua
@@ -6,28 +6,28 @@
Some helper functions for calculateing distance between two strings
Provides:
- fuzzel.LevenshtienDistance_extended(string_first, string_second, number_addcost, number_substituecost, number_deletecost)
- Calculates the Levenshtien Distance between two strings, useing the costs given. "Real" Levenshtien Distance uses values 1,1,1 for costs.
+ fuzzel.LevenshteinDistance_extended(string_first, string_second, number_addcost, number_substituecost, number_deletecost)
+ Calculates the Levenshtein Distance between two strings, useing the costs given. "Real" Levenshtein Distance uses values 1,1,1 for costs.
returns number_distance
- fuzzel.LevenshtienDistance(string_first, strings_second)
- Calculates the "real" Levenshtien Distance
+ fuzzel.LevenshteinDistance(string_first, strings_second)
+ Calculates the "real" Levenshtein Distance
returns number_distance
fuzzel.LevensteinRatio(string_first, string_second)
- The Levenshtien Ratio divided by the first string's length. Useing a ratio is a decent way to determin if a spelling is "close enough"
+ The Levenshtein Ratio divided by the first string's length. Useing a ratio is a decent way to determin if a spelling is "close enough"
returns number_distance
- fuzzel.DamerauLevenshtienDistance_extended(string_first, string_second, number_addcost, number_substituecost, number_deletecost, number_transpositioncost)
- Damerau-Levenshtien Distance is almost exactly like Levenshtien Distance, with the caveat that two letters next to each other, with swapped positions only counts as "one" cost (in "real" Damerau-Levenshtien Distance)
+ fuzzel.DamerauLevenshteinDistance_extended(string_first, string_second, number_addcost, number_substituecost, number_deletecost, number_transpositioncost)
+ Damerau-Levenshtein Distance is almost exactly like Levenshtein Distance, with the caveat that two letters next to each other, with swapped positions only counts as "one" cost (in "real" Damerau-Levenshtein Distance)
returns number
- fuzzel.DamerauLevenshtienDistance(stirng_first, strings_second)
- Calculates the "real" Damerau-Levenshtien Distance
+ fuzzel.DamerauLevenshteinDistance(stirng_first, strings_second)
+ Calculates the "real" Damerau-Levenshtein Distance
returns number
- fuzzel.DamerauLevenshtienRatio(string_first, string_second)
- The Damerau-Levenshtien Distance divided by the first string's length
+ fuzzel.DamerauLevenshteinRatio(string_first, string_second)
+ The Damerau-Levenshtein Distance divided by the first string's length
returns number_ratio
fuzzel.HammingDistance(string_first, string_second)
@@ -39,19 +39,19 @@
returns number_ratio
fuzzel.FuzzyFindDistance(string_needle, vararg_in)
- in may be either a table, or a list of arguments. fuzzel.FuzzySearchDistance will find the string that most closely resembles needle, based on Damerau-Levenshtien Distance
+ in may be either a table, or a list of arguments. fuzzel.FuzzySearchDistance will find the string that most closely resembles needle, based on Damerau-Levenshtein Distance
returns string_closest, number_distance
fuzzel.FuzzyFindRatio(string_needle, vararg_in)
- in may be either a table, or a list of arguments. Same as above, except it returns the string with the closest Damerau-Levenshtien ratio.
+ in may be either a table, or a list of arguments. Same as above, except it returns the string with the closest Damerau-Levenshtein ratio.
returns string_closest, nubmer_ratio
fuzzel.FuzzySortDistance(string_needle, vararg_in)
- Sorts either the table, or the arguments, and returns a table. Uses Damerau-Levenshtien Distance
+ Sorts either the table, or the arguments, and returns a table. Uses Damerau-Levenshtein Distance
returns table_sorted
fuzzel.FuzzySortRatio(string needle, vararg_in)
- Same as above, but uses Damerau-Levenshtien Ratio instead
+ Same as above, but uses Damerau-Levenshtein Ratio instead
returns table_sorted
Example:
@@ -85,25 +85,30 @@
3 : Brown Fox
Some easy-to-use mnemonics
- fuzzel.ld_e = fuzzel.LevenshtienDistance_extended
- fuzzel.ld = fuzzel.LevenshtienDistance
+ fuzzel.ld_e = fuzzel.LevenshteinDistance_extended
+ fuzzel.ld = fuzzel.LevenshteinDistance
fuzzel.lr = fuzzel.LevensteinRatio
- fuzzel.dld_e = fuzzel.DamerauLevenshtienDistance_extended
- fuzzel.dld = fuzzel.DamerauLevenshtienDistance
- fuzzel.dlr = fuzzel.DamerauLevenshtienRatio
+ fuzzel.dld_e = fuzzel.DamerauLevenshteinDistance_extended
+ fuzzel.dld = fuzzel.DamerauLevenshteinDistance
+ fuzzel.dlr = fuzzel.DamerauLevenshteinRatio
fuzzel.hd = fuzzel.HammingDistance
fuzzel.hr = fuzzel.HammingRatio
fuzzel.ffd = fuzzel.FuzzyFindDistance
fuzzel.ffr = fuzzel.FuzzyFindRatio
fuzzel.fsd = fuzzel.FuzzySortDistance
fuzzel.fsr = fuzzel.FuzzySortRatio
-]]
+
+]]--You probably don't want to touch anything past this point
--Assign locals to these to the minifier can compress the file better
local strlen,chrat,min,asrt,prs,iprs,typ,upack,tblins,tblsrt = string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack,table.insert,table.sort
local fuzzel = {}
+--A clever way to allow the minifier to minify function names, this basically just assigns variables with their string equivalent.
+local da, le, di, ra, fu, fi, so, ex, ha = "Damerau", "Levenshtein", "Distance", "Ratio", "Fuzzy", "Find", "Sort", "_extended", "Hamming"
+local LevenshteinDistance_extended,LevenshteinDistance,LevenshteinRatio,DamerauLevenshteinDistance_extended,DamerauLevenshteinDistance,DamerauLevenshteinRatio,FuzzyFindDistance,FuzzyFindRatio,FuzzySortDistance,FuzzySortRatio,HammingDistance,HammingRatio = le..di..ex,le..di,le..ra,da..le..di..ex,da..le..di,da..le..ra,fu..fi..di,fu..fi..ra,fu..so..di,fu..so..ra,ha..di,ha..ra
+
local function genericDistance( stringa, stringb, addcost, subcost, delcost, ...)
--Length of each string
local salen, sblen = strlen(stringa), strlen(stringb)
@@ -145,38 +150,37 @@ local function genericDistance( stringa, stringb, addcost, subcost, delcost, ...
return dyntbl[salen][sblen]
end
-function fuzzel.LevenshtienDistance_extended(stringa, stringb, addcost, subcost, delcost)
- return fuzzel.genericDistance(stringa, stringb, addcost, subcost, delcost)
+fuzzel[LevenshteinDistance_extended] = function(stringa, stringb, addcost, subcost, delcost)
+ return genericDistance(stringa, stringb, addcost, subcost, delcost)
end
-fuzzel.ld_e = fuzzel.LevenshtienDistance_extended
+fuzzel.ld_e = fuzzel[LevenshteinDistance_extended]
-function fuzzel.LevenshtienDistance(stringa,stringb)
+fuzzel[LevenshteinDistance] = function(stringa,stringb)
return fuzzel.ld_e(stringa,stringb,1,1,1)
end
-fuzzel.ld = fuzzel.LevenshtienDistance
-
+fuzzel.ld = fuzzel[LevenshteinDistance]
-function fuzzel.LevenshteinRatio(stringa,stringb)
+fuzzel[LevenshteinRatio] = function(stringa,stringb)
return fuzzel.ld(stringa,stringb) / strlen(stringa)
end
-fuzzel.lr = fuzzel.LevensteinRatio
+fuzzel.lr = fuzzel[LevenshteinRatio]
-function fuzzel.DamerauLevenshtienDistance_extended(stringa, stringb, addcost, subcost, delcost, trncost)
+fuzzel[DamerauLevenshteinDistance_extended] = function(stringa, stringb, addcost, subcost, delcost, trncost)
return genericDistance(stringa,stringb,addcost,subcost,delcost,true,trncost)
end
-fuzzel.dld_e = fuzzel.DamerauLevenshtienDistance_extended
+fuzzel.dld_e = fuzzel[DamerauLevenshteinDistance_extended]
-function fuzzel.DamerauLevenshtienDistance(stringa,stringb)
+fuzzel[DamerauLevenshteinDistance] = function(stringa,stringb)
return fuzzel.dld_e(stringa,stringb,1,1,1,1)
end
-fuzzel.dld = fuzzel.DamerauLevenshtienDistance
+fuzzel.dld = fuzzel[DamerauLevenshteinDistance]
-function fuzzel.DamerauLevenshtienRatio(stringa,stringb)
+fuzzel[DamerauLevenshteinRatio] = function(stringa,stringb)
return fuzzel.dld(stringa,stringb) / strlen(stringa)
end
-fuzzel.dlr = fuzzel.DamerauLevenshtienRatio
+fuzzel.dlr = fuzzel[DamerauLevenshteinRatio]
-function fuzzel.HammingDistance(stringa,stringb)
+fuzzel[HammingDistance] = function(stringa,stringb)
local len = strlen(stringa)
asrt(len == strlen(stringb),"Hamming Distance cannot be calculated on two strings of different lengths:\"" .. stringa .. "\" \"" .. stringb .. "\"")
local dist = 0
@@ -185,12 +189,12 @@ function fuzzel.HammingDistance(stringa,stringb)
end
return dist
end
-fuzzel.hd = fuzzel.HammingDistance
+fuzzel.hd = fuzzel[HammingDistance]
-function fuzzel.HammingRatio(stringa,stringb)
- return fuzzel.HammingDistance(stringa,stringb) / strlen(stringa)
+fuzzel[HammingRatio] = function(stringa,stringb)
+ return fuzzel.hd(stringa,stringb) / strlen(stringa)
end
-fuzzel.hr = fuzzel.HammingRatio
+fuzzel.hr = fuzzel[HammingRatio]
local function FuzzySearch(str,func,...)
--Allow varargs, or a table
@@ -209,12 +213,12 @@ local function FuzzySearch(str,func,...)
return sout, tmin
end
-function fuzzel.FuzzyFindDistance(str,...)
+fuzzel[FuzzyFindDistance] = function(str,...)
return upack{FuzzySearch(str,fuzzel.dld,...)}
end
-fuzzel.ffd = fuzzel.FuzzyFindDistance
+fuzzel.ffd = fuzzel[FuzzyFindDistance]
-function fuzzel.FuzzyFindRatio(str,...)
+fuzzel[FuzzyFindRatio] = function(str,...)
return upack{FuzzySearch(str,fuzzel.dlr,...)}
end
@@ -246,16 +250,16 @@ local function FuzzySort(str, func, ...)
end
return otbl
end
-fuzzel.ffr = fuzzel.FuzzyFindRatio
+fuzzel.ffr = fuzzel[FuzzyFindRatio]
-function fuzzel.FuzzySortDistance(str,...)
- return upack{FuzzySort(str,fuzzel.dld,...)}
+fuzzel[FuzzySortDistance] = function(str,...)
+ return FuzzySort(str,fuzzel.dld,...)
end
-fuzzel.fsd = fuzzel.FuzzySortDistance
+fuzzel.fsd = fuzzel[FuzzySortDistance]
-function fuzzel.FuzzySortRatio(str,...)
- return upack{FuzzySort(str,fuzzel.dlr,...)}
+fuzzel[FuzzySortRatio] = function(str,...)
+ return FuzzySort(str,fuzzel.dlr,...)
end
-fuzzel.fsr = fuzzel.FuzzySortRatio
+fuzzel.fsr = fuzzel[FuzzySortRatio]
return fuzzel
diff --git a/fuzzel_min.lua b/fuzzel_min.lua
index 961932d..13edd2a 100644
--- a/fuzzel_min.lua
+++ b/fuzzel_min.lua
@@ -1 +1 @@
-local a,b,c,d,e,f,g,h,i,j=string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack,table.insert,table.sort;local k={}local function l(m,n,o,p,q,...)local r,s=a(m),a(n)local t={}for u=0,r do t[u]={}for v=0,s do t[u][v]=0 end end;for u=1,r do t[u][0]=u end;for v=1,s do t[0][v]=v end;for v=1,s do for u=1,r do local w=b(m,u)local x=b(n,v)t[u][v]=c(t[u-1][v]+q,t[u][v-1]+o,t[u-1][v-1]+(w==x and 0 or p))if arg[1]and u>1 and v>1 and w==b(n,v-1)and b(m,u-1)==x then t[u][v]=c(t[u][v],t[u-2][v-2]+(w==x and 0 or arg[2]))end end end;return t[r][s]end;function k.LevenshtienDistance_extended(m,n,o,p,q)return k.genericDistance(m,n,o,p,q)end;k.ld_e=k.LevenshtienDistance_extended;function k.LevenshtienDistance(m,n)return k.ld_e(m,n,1,1,1)end;k.ld=k.LevenshtienDistance;function k.LevenshteinRatio(m,n)return k.ld(m,n)/a(m)end;k.lr=k.LevensteinRatio;function k.DamerauLevenshtienDistance_extended(m,n,o,p,q,y)return l(m,n,o,p,q,true,y)end;k.dld_e=k.DamerauLevenshtienDistance_extended;function k.DamerauLevenshtienDistance(m,n)return k.dld_e(m,n,1,1,1,1)end;k.dld=k.DamerauLevenshtienDistance;function k.DamerauLevenshtienRatio(m,n)return k.dld(m,n)/a(m)end;k.dlr=k.DamerauLevenshtienRatio;function k.HammingDistance(m,n)local z=a(m)d(z==a(n),"Hamming Distance cannot be calculated on two strings of different lengths:\""..m.."\" \""..n.."\"")local A=0;for u=1,z do A=A+(b(m,u)~=b(n,u)and 1 or 0)end;return A end;k.hd=k.HammingDistance;function k.HammingRatio(m,n)return k.HammingDistance(m,n)/a(m)end;k.hr=k.HammingRatio;local function B(C,D,...)local E=g(arg[1])=="table"and arg[1]or arg;local F=D(E[1],C)local G=E[1]for H,I in e(E)do local J=D(I,C)if J<F then F=J;G=I end end;return G,F end;function k.FuzzyFindDistance(C,...)return h{B(C,k.dld,...)}end;k.ffd=k.FuzzyFindDistance;function k.FuzzyFindRatio(C,...)return h{B(C,k.dlr,...)}end;local function K(C,D,...)local E=g(arg[1])=="table"and arg[1]or arg;local L={}local M={}for H,I in e(E)do local A=D(C,I)if L[A]==nil then L[A]={}i(M,A)end;i(L[A],I)end;j(M)local N={}for H,I in f(M)do for u,v in e(L[I])do i(N,v)end end;return N end;k.ffr=k.FuzzyFindRatio;function k.FuzzySortDistance(C,...)return h{K(C,k.dld,...)}end;k.fsd=k.FuzzySortDistance;function k.FuzzySortRatio(C,...)return h{K(C,k.dlr,...)}end;k.fsr=k.FuzzySortRatio;return k
+local a,b,c,d,e,f,g,h,i,j=string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack,table.insert,table.sort;local k={}local l,m,n,o,p,q,r,s,t="Damerau","Levenshtein","Distance","Ratio","Fuzzy","Find","Sort","_extended","Hamming"local u,v,w,x,y,z,A,B,C,D,E,F=m..n..s,m..n,m..o,l..m..n..s,l..m..n,l..m..o,p..q..n,p..q..o,p..r..n,p..r..o,t..n,t..o;local function G(H,I,J,K,L,...)local M,N=a(H),a(I)local O={}for P=0,M do O[P]={}for Q=0,N do O[P][Q]=0 end end;for P=1,M do O[P][0]=P end;for Q=1,N do O[0][Q]=Q end;for Q=1,N do for P=1,M do local R=b(H,P)local S=b(I,Q)O[P][Q]=c(O[P-1][Q]+L,O[P][Q-1]+J,O[P-1][Q-1]+(R==S and 0 or K))if arg[1]and P>1 and Q>1 and R==b(I,Q-1)and b(H,P-1)==S then O[P][Q]=c(O[P][Q],O[P-2][Q-2]+(R==S and 0 or arg[2]))end end end;return O[M][N]end;k[u]=function(H,I,J,K,L)return G(H,I,J,K,L)end;k.ld_e=k[u]k[v]=function(H,I)return k.ld_e(H,I,1,1,1)end;k.ld=k[v]k[w]=function(H,I)return k.ld(H,I)/a(H)end;k.lr=k[w]k[x]=function(H,I,J,K,L,T)return G(H,I,J,K,L,true,T)end;k.dld_e=k[x]k[y]=function(H,I)return k.dld_e(H,I,1,1,1,1)end;k.dld=k[y]k[z]=function(H,I)return k.dld(H,I)/a(H)end;k.dlr=k[z]k[E]=function(H,I)local U=a(H)d(U==a(I),"Hamming Distance cannot be calculated on two strings of different lengths:\""..H.."\" \""..I.."\"")local V=0;for P=1,U do V=V+(b(H,P)~=b(I,P)and 1 or 0)end;return V end;k.hd=k[E]k[F]=function(H,I)return k.hd(H,I)/a(H)end;k.hr=k[F]local function W(X,Y,...)local Z=g(arg[1])=="table"and arg[1]or arg;local _=Y(Z[1],X)local a0=Z[1]for a1,a2 in e(Z)do local a3=Y(a2,X)if a3<_ then _=a3;a0=a2 end end;return a0,_ end;k[A]=function(X,...)return h{W(X,k.dld,...)}end;k.ffd=k[A]k[B]=function(X,...)return h{W(X,k.dlr,...)}end;local function a4(X,Y,...)local Z=g(arg[1])=="table"and arg[1]or arg;local a5={}local a6={}for a1,a2 in e(Z)do local V=Y(X,a2)if a5[V]==nil then a5[V]={}i(a6,V)end;i(a5[V],a2)end;j(a6)local a7={}for a1,a2 in f(a6)do for P,Q in e(a5[a2])do i(a7,Q)end end;return a7 end;k.ffr=k[B]k[C]=function(X,...)return a4(X,k.dld,...)end;k.fsd=k[C]k[D]=function(X,...)return a4(X,k.dlr,...)end;k.fsr=k[D]return k