aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexander Pickering <alexandermpickering@gmail.com>2016-06-05 20:48:01 -0400
committerAlexander Pickering <alexandermpickering@gmail.com>2016-06-05 20:48:01 -0400
commit9294725b11e8a0c35c630749cc52b108b76857ec (patch)
tree654aed1950c133a8dad4f9e1f75c295e45baa199
parent55674013e729a8ecb929bf5060e53859bd44b997 (diff)
downloadfuzzel-9294725b11e8a0c35c630749cc52b108b76857ec.tar.gz
fuzzel-9294725b11e8a0c35c630749cc52b108b76857ec.tar.bz2
fuzzel-9294725b11e8a0c35c630749cc52b108b76857ec.zip
Added Fuzzy sorting functions
-rw-r--r--FuzzelExample.lua23
-rw-r--r--fuzzel.lua127
-rw-r--r--fuzzel_min.lua3
3 files changed, 93 insertions, 60 deletions
diff --git a/FuzzelExample.lua b/FuzzelExample.lua
deleted file mode 100644
index 5bf62cf..0000000
--- a/FuzzelExample.lua
+++ /dev/null
@@ -1,23 +0,0 @@
---Include the module
-local fuzzel = dofile("fuzzel.lua")
-
---A function that takes a table and returns a function that takes a string, and returns the closesting match in the table.
-function suggestoption(tbl_options)
- return function(str)
- local closest = fuzzel.FuzzySearchDistance(str,tbl_options)
- return closest
- end
-end
-
---A couple of options
-local options = {
- "Fat Cat",
- "Lazy Dog",
- "Brown Fox",
-}
-
---Create the function, with our options
-local suggestfunc = suggestoption(options)
-
---And use it, to see what option closest matches "Brown Cat"
-print(suggestfunc("Brown Cat"))
diff --git a/fuzzel.lua b/fuzzel.lua
index bc90984..0496a68 100644
--- a/fuzzel.lua
+++ b/fuzzel.lua
@@ -38,38 +38,66 @@
The hamming distance divided by the length of the first string
returns number_ratio
- fuzzel.FuzzySearchDistance(string_needle, vararg_in)
+ fuzzel.FuzzyFindDistance(string_needle, vararg_in)
in may be either a table, or a list of arguments. fuzzel.FuzzySearchDistance will find the string that most closely resembles needle, based on Damerau-Levenshtien Distance
returns string_closest, number_distance
- fuzzel.FuzzySearchRatio(string_needle, vararg_in)
+ fuzzel.FuzzyFindRatio(string_needle, vararg_in)
in may be either a table, or a list of arguments. Same as above, except it returns the string with the closest Damerau-Levenshtien ratio.
returns string_closest, nubmer_ratio
+ fuzzel.FuzzySortDistance(string_needle, vararg_in)
+ Sorts either the table, or the arguments, and returns a table. Uses Damerau-Levenshtien Distance
+ returns table_sorted
+
+ fuzzel.FuzzySortRatio(string needle, vararg_in)
+ Same as above, but uses Damerau-Levenshtien Ratio instead
+ returns table_sorted
+
Example:
Returns a function that will return the closest string to the string it was passed
-----------------FuzzelExample.lua------------------
+ --Include the module
local fuzzel = dofile("fuzzel.lua")
- function suggestoption(tbl_options)
- return function(str)
- local closest = fuzzel.FuzzySearchDistance(str,tbl_options)
- return closest
- end
- end
-
+ --A couple of options
local options = {
"Fat Cat",
"Lazy Dog",
"Brown Fox",
}
- local suggestfunc = suggestoption(options)
- suggestfunc("Brown Cat")
- -------------End FuzzelExample.lua------------------
+ --And use it, to see what option closest matches "Brown Cat"
+ print("\"Lulzy Cat\" is close to \"" .. fuzzel.FuzzyFindDistance("Lulzy Cat",options) .. "\"")
+ --Sort the options to see the order in which they most closely match "Brown Cat"
+ print("\"Frag God\" is closest to:")
+ for k,v in ipairs(fuzzel.FuzzySortRatio("Frag God",options)) do
+ print(k .. "\t:\t" .. v)
+ end
+ -------------End FuzzelExample.lua------------------
+ Outputs:
+ "Lulzy Cat" is close to "Fat Cat"
+ "Frag God" is closest to:
+ 1 : Fat Cat
+ 2 : Lazy Dog
+ 3 : Brown Fox
+
+ Some easy-to-use mnemonics
+ fuzzel.ld_e = fuzzel.LevenshtienDistance_extended
+ fuzzel.ld = fuzzel.LevenshtienDistance
+ fuzzel.lr = fuzzel.LevensteinRatio
+ fuzzel.dld_e = fuzzel.DamerauLevenshtienDistance_extended
+ fuzzel.dld = fuzzel.DamerauLevenshtienDistance
+ fuzzel.dlr = fuzzel.DamerauLevenshtienRatio
+ fuzzel.hd = fuzzel.HammingDistance
+ fuzzel.hr = fuzzel.HammingRatio
+ fuzzel.ffd = fuzzel.FuzzyFindDistance
+ fuzzel.ffr = fuzzel.FuzzyFindRatio
+ fuzzel.fsd = fuzzel.FuzzySortDistance
+ fuzzel.fsr = fuzzel.FuzzySortRatio
]]
-local strlen,chrat,min,asrt,prs,iprs,typ,upack = string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack
+local strlen,chrat,min,asrt,prs,iprs,typ,upack,tblins,tblsrt = string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack,table.insert,table.sort
local fuzzel = {}
@@ -117,29 +145,34 @@ end
function fuzzel.LevenshtienDistance_extended(stringa, stringb, addcost, subcost, delcost)
return fuzzel.genericDistance(stringa, stringb, addcost, subcost, delcost)
end
+fuzzel.ld_e = fuzzel.LevenshtienDistance_extended
function fuzzel.LevenshtienDistance(stringa,stringb)
- return fuzzel.LevenshtienDistance_extended(stringa,stringb,1,1,1)
+ return fuzzel.ld_e(stringa,stringb,1,1,1)
end
+fuzzel.ld = fuzzel.LevenshtienDistance
+
---The distance as a ratio of stringa's length
function fuzzel.LevenshteinRatio(stringa,stringb)
- return fuzzel.LevenshtienDistance(stringa,stringb) / strlen(stringa)
+ return fuzzel.ld(stringa,stringb) / strlen(stringa)
end
---Almost the same as LevenshtienDistance, but considers two characters swaped as only "one" mistake
+fuzzel.lr = fuzzel.LevensteinRatio
+
function fuzzel.DamerauLevenshtienDistance_extended(stringa, stringb, addcost, subcost, delcost, trncost)
return genericDistance(stringa,stringb,addcost,subcost,delcost,true,trncost)
end
+fuzzel.dld_e = fuzzel.DamerauLevenshtienDistance_extended
function fuzzel.DamerauLevenshtienDistance(stringa,stringb)
- return fuzzel.DamerauLevenshtienDistance_extended(stringa,stringb,1,1,1,1)
+ return fuzzel.dld_e(stringa,stringb,1,1,1,1)
end
+fuzzel.dld = fuzzel.DamerauLevenshtienDistance
function fuzzel.DamerauLevenshtienRatio(stringa,stringb)
- return fuzzel.DamerauLevenshtienDistance(stringa,stringb) / strlen(stringa)
+ return fuzzel.dld(stringa,stringb) / strlen(stringa)
end
+fuzzel.dlr = fuzzel.DamerauLevenshtienRatio
---Purely the number of mistakes
function fuzzel.HammingDistance(stringa,stringb)
local len = strlen(stringa)
asrt(len == strlen(stringb),"Hamming Distance cannot be calculated on two strings of different lengths:\"" .. stringa .. "\" \"" .. stringb .. "\"")
@@ -149,10 +182,12 @@ function fuzzel.HammingDistance(stringa,stringb)
end
return dist
end
+fuzzel.hd = fuzzel.HammingDistance
function fuzzel.HammingRatio(stringa,stringb)
return fuzzel.HammingDistance(stringa,stringb) / strlen(stringa)
end
+fuzzel.hr = fuzzel.HammingRatio
local function FuzzySearch(str,func,...)
local looparg = typ(arg[1]) == "table" and arg[1] or arg
@@ -168,24 +203,46 @@ local function FuzzySearch(str,func,...)
return sout, tmin
end
-function fuzzel.FuzzySearchDistance(str,...)
- return upack{FuzzySearch(str,fuzzel.DamerauLevenshtienDistance,...)}
+function fuzzel.FuzzyFindDistance(str,...)
+ return upack{FuzzySearch(str,fuzzel.dld,...)}
end
+fuzzel.ffd = fuzzel.FuzzyFindDistance
-function fuzzel.FuzzySearchRatio(str,...)
- return upack{FuzzySearch(str,fuzzel.DamerauLevenshtienRatio,...)}
+function fuzzel.FuzzyFindRatio(str,...)
+ return upack{FuzzySearch(str,fuzzel.dlr,...)}
end
---Some easy-to-use mnemonics
-fuzzel.ld_e = fuzzel.LevenshtienDistance_extended
-fuzzel.ld = fuzzel.LevenshtienDistance
-fuzzel.lr = fuzzel.LevensteinRatio
-fuzzel.dld_e = fuzzel.DamerauLevenshtienDistance_extended
-fuzzel.dld = fuzzel.DamerauLevenshtienDistance
-fuzzel.dlr = fuzzel.DamerauLevenshtienRatio
-fuzzel.hd = fuzzel.HammingDistance
-fuzzel.hr = fuzzel.HammingRatio
-fuzzel.fsd = fuzzel.FuzzySearchDistance
-fuzzel.fsr = fuzzel.FuzzySearchRatio
+local function FuzzySort(str, func, ...)
+ local looparg = typ(arg[1]) == "table" and arg[1] or arg
+ local usorted = {}
+ for k,v in prs(looparg) do
+ local dist = func(str,v)
+ usorted[dist] = usorted[dist] or {}
+ tblins(usorted[dist],v)
+ end
+ local sorted = {}
+ for k,v in prs(usorted) do
+ tblins(sorted,k)
+ end
+ tblsrt(sorted)
+ local otbl = {}
+ for k,v in prs(sorted) do
+ for i,j in prs(usorted[v]) do
+ tblins(otbl,j)
+ end
+ end
+ return otbl
+end
+fuzzel.ffr = fuzzel.FuzzyFindRatio
+
+function fuzzel.FuzzySortDistance(str,...)
+ return upack{FuzzySort(str,fuzzel.dld,...)}
+end
+fuzzel.fsd = fuzzel.FuzzySortDistance
+
+function fuzzel.FuzzySortRatio(str,...)
+ return upack{FuzzySort(str,fuzzel.dlr,...)}
+end
+fuzzel.fsr = fuzzel.FuzzySortRatio
return fuzzel
diff --git a/fuzzel_min.lua b/fuzzel_min.lua
index 4e845b3..2482a7a 100644
--- a/fuzzel_min.lua
+++ b/fuzzel_min.lua
@@ -1,2 +1 @@
---This file has been minified! For the original, see cogarr.net/source/
-local a,b,c,d,e,f,g,h=string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack;local i={}local function j(k,l,m,n,o,...)local p,q=a(k),a(l)local r={}for s=0,p do r[s]={}for t=0,q do r[s][t]=0 end end;for s=1,p do r[s][0]=s end;for t=1,q do r[0][t]=t end;for t=1,q do for s=1,p do local u=b(k,s)local v=b(l,t)r[s][t]=c(r[s-1][t]+o,r[s][t-1]+m,r[s-1][t-1]+(u==v and 0 or n))if arg[1]and s>1 and t>1 and u==b(l,t-1)and b(k,s-1)==v then r[s][t]=c(r[s][t],r[s-2][t-2]+(u==v and 0 or arg[2]))end end end;return r[p][q]end;function i.LevenshtienDistance_extended(k,l,m,n,o)return i.genericDistance(k,l,m,n,o)end;function i.LevenshtienDistance(k,l)return i.LevenshtienDistance_extended(k,l,1,1,1)end;function i.LevenshteinRatio(k,l)return i.LevenshtienDistance(k,l)/a(k)end;function i.DamerauLevenshtienDistance_extended(k,l,m,n,o,w)return j(k,l,m,n,o,true,w)end;function i.DamerauLevenshtienDistance(k,l)return i.DamerauLevenshtienDistance_extended(k,l,1,1,1,1)end;function i.DamerauLevenshtienRatio(k,l)return i.DamerauLevenshtienDistance(k,l)/a(k)end;function i.HammingDistance(k,l)local x=a(k)d(x==a(l),"Hamming Distance cannot be calculated on two strings of different lengths:\""..k.."\" \""..l.."\"")local y=0;for s=1,x do y=y+(b(k,s)~=b(l,s)and 1)end;return y end;function i.HammingRatio(k,l)return i.HammingDistance(k,l)/a(k)end;local function z(A,B,...)local C=g(arg[1])=="table"and arg[1]or arg;local D=B(C[1],A)local E=C[1]for F,G in e(C)do local H=B(G,A)if H<D then D=H;E=G end end;return E,D end;function i.FuzzySearchDistance(A,...)return h{z(A,i.DamerauLevenshtienDistance,...)}end;function i.FuzzySearchRatio(A,...)return h{z(A,i.DamerauLevenshtienRatio,...)}end;i.ld_e=i.LevenshtienDistance_extended;i.ld=i.LevenshtienDistance;i.lr=i.LevensteinRatio;i.dld_e=i.DamerauLevenshtienDistance_extended;i.dld=i.DamerauLevenshtienDistance;i.dlr=i.DamerauLevenshtienRatio;i.hd=i.HammingDistance;i.hr=i.HammingRatio;i.fsd=i.FuzzySearchDistance;i.fsr=i.FuzzySearchRatio;return i
+local a,b,c,d,e,f,g,h,i,j=string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack,table.insert,table.sort;local k={}local function l(m,n,o,p,q,...)local r,s=a(m),a(n)local t={}for u=0,r do t[u]={}for v=0,s do t[u][v]=0 end end;for u=1,r do t[u][0]=u end;for v=1,s do t[0][v]=v end;for v=1,s do for u=1,r do local w=b(m,u)local x=b(n,v)t[u][v]=c(t[u-1][v]+q,t[u][v-1]+o,t[u-1][v-1]+(w==x and 0 or p))if arg[1]and u>1 and v>1 and w==b(n,v-1)and b(m,u-1)==x then t[u][v]=c(t[u][v],t[u-2][v-2]+(w==x and 0 or arg[2]))end end end;return t[r][s]end;function k.LevenshtienDistance_extended(m,n,o,p,q)return k.genericDistance(m,n,o,p,q)end;k.ld_e=k.LevenshtienDistance_extended;function k.LevenshtienDistance(m,n)return k.ld_e(m,n,1,1,1)end;k.ld=k.LevenshtienDistance;function k.LevenshteinRatio(m,n)return k.ld(m,n)/a(m)end;k.lr=k.LevensteinRatio;function k.DamerauLevenshtienDistance_extended(m,n,o,p,q,y)return l(m,n,o,p,q,true,y)end;k.dld_e=k.DamerauLevenshtienDistance_extended;function k.DamerauLevenshtienDistance(m,n)return k.dld_e(m,n,1,1,1,1)end;k.dld=k.DamerauLevenshtienDistance;function k.DamerauLevenshtienRatio(m,n)return k.dld(m,n)/a(m)end;k.dlr=k.DamerauLevenshtienRatio;function k.HammingDistance(m,n)local z=a(m)d(z==a(n),"Hamming Distance cannot be calculated on two strings of different lengths:\""..m.."\" \""..n.."\"")local A=0;for u=1,z do A=A+(b(m,u)~=b(n,u)and 1)end;return A end;k.hd=k.HammingDistance;function k.HammingRatio(m,n)return k.HammingDistance(m,n)/a(m)end;k.hr=k.HammingRatio;local function B(C,D,...)local E=g(arg[1])=="table"and arg[1]or arg;local F=D(E[1],C)local G=E[1]for H,I in e(E)do local J=D(I,C)if J<F then F=J;G=I end end;return G,F end;function k.FuzzyFindDistance(C,...)return h{B(C,k.dld,...)}end;k.ffd=k.FuzzyFindDistance;function k.FuzzyFindRatio(C,...)return h{B(C,k.dlr,...)}end;local function K(C,D,...)local E=g(arg[1])=="table"and arg[1]or arg;local L={}for H,I in e(E)do local A=D(C,I)L[A]=L[A]or{}i(L[A],I)end;local M={}for H,I in e(L)do i(M,H)end;j(M)local N={}for H,I in e(M)do for u,v in e(L[I])do i(N,v)end end;return N end;k.ffr=k.FuzzyFindRatio;function k.FuzzySortDistance(C,...)return h{K(C,k.dld,...)}end;k.fsd=k.FuzzySortDistance;function k.FuzzySortRatio(C,...)return h{K(C,k.dlr,...)}end;k.fsr=k.FuzzySortRatio;return k