From 9294725b11e8a0c35c630749cc52b108b76857ec Mon Sep 17 00:00:00 2001 From: Alexander Pickering Date: Sun, 5 Jun 2016 20:48:01 -0400 Subject: Added Fuzzy sorting functions --- FuzzelExample.lua | 23 ---------- fuzzel.lua | 127 +++++++++++++++++++++++++++++++++++++++--------------- fuzzel_min.lua | 3 +- 3 files changed, 93 insertions(+), 60 deletions(-) delete mode 100644 FuzzelExample.lua diff --git a/FuzzelExample.lua b/FuzzelExample.lua deleted file mode 100644 index 5bf62cf..0000000 --- a/FuzzelExample.lua +++ /dev/null @@ -1,23 +0,0 @@ ---Include the module -local fuzzel = dofile("fuzzel.lua") - ---A function that takes a table and returns a function that takes a string, and returns the closesting match in the table. -function suggestoption(tbl_options) - return function(str) - local closest = fuzzel.FuzzySearchDistance(str,tbl_options) - return closest - end -end - ---A couple of options -local options = { - "Fat Cat", - "Lazy Dog", - "Brown Fox", -} - ---Create the function, with our options -local suggestfunc = suggestoption(options) - ---And use it, to see what option closest matches "Brown Cat" -print(suggestfunc("Brown Cat")) diff --git a/fuzzel.lua b/fuzzel.lua index bc90984..0496a68 100644 --- a/fuzzel.lua +++ b/fuzzel.lua @@ -38,38 +38,66 @@ The hamming distance divided by the length of the first string returns number_ratio - fuzzel.FuzzySearchDistance(string_needle, vararg_in) + fuzzel.FuzzyFindDistance(string_needle, vararg_in) in may be either a table, or a list of arguments. fuzzel.FuzzySearchDistance will find the string that most closely resembles needle, based on Damerau-Levenshtien Distance returns string_closest, number_distance - fuzzel.FuzzySearchRatio(string_needle, vararg_in) + fuzzel.FuzzyFindRatio(string_needle, vararg_in) in may be either a table, or a list of arguments. Same as above, except it returns the string with the closest Damerau-Levenshtien ratio. returns string_closest, nubmer_ratio + fuzzel.FuzzySortDistance(string_needle, vararg_in) + Sorts either the table, or the arguments, and returns a table. Uses Damerau-Levenshtien Distance + returns table_sorted + + fuzzel.FuzzySortRatio(string needle, vararg_in) + Same as above, but uses Damerau-Levenshtien Ratio instead + returns table_sorted + Example: Returns a function that will return the closest string to the string it was passed -----------------FuzzelExample.lua------------------ + --Include the module local fuzzel = dofile("fuzzel.lua") - function suggestoption(tbl_options) - return function(str) - local closest = fuzzel.FuzzySearchDistance(str,tbl_options) - return closest - end - end - + --A couple of options local options = { "Fat Cat", "Lazy Dog", "Brown Fox", } - local suggestfunc = suggestoption(options) - suggestfunc("Brown Cat") - -------------End FuzzelExample.lua------------------ + --And use it, to see what option closest matches "Brown Cat" + print("\"Lulzy Cat\" is close to \"" .. fuzzel.FuzzyFindDistance("Lulzy Cat",options) .. "\"") + --Sort the options to see the order in which they most closely match "Brown Cat" + print("\"Frag God\" is closest to:") + for k,v in ipairs(fuzzel.FuzzySortRatio("Frag God",options)) do + print(k .. "\t:\t" .. v) + end + -------------End FuzzelExample.lua------------------ + Outputs: + "Lulzy Cat" is close to "Fat Cat" + "Frag God" is closest to: + 1 : Fat Cat + 2 : Lazy Dog + 3 : Brown Fox + + Some easy-to-use mnemonics + fuzzel.ld_e = fuzzel.LevenshtienDistance_extended + fuzzel.ld = fuzzel.LevenshtienDistance + fuzzel.lr = fuzzel.LevensteinRatio + fuzzel.dld_e = fuzzel.DamerauLevenshtienDistance_extended + fuzzel.dld = fuzzel.DamerauLevenshtienDistance + fuzzel.dlr = fuzzel.DamerauLevenshtienRatio + fuzzel.hd = fuzzel.HammingDistance + fuzzel.hr = fuzzel.HammingRatio + fuzzel.ffd = fuzzel.FuzzyFindDistance + fuzzel.ffr = fuzzel.FuzzyFindRatio + fuzzel.fsd = fuzzel.FuzzySortDistance + fuzzel.fsr = fuzzel.FuzzySortRatio ]] -local strlen,chrat,min,asrt,prs,iprs,typ,upack = string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack +local strlen,chrat,min,asrt,prs,iprs,typ,upack,tblins,tblsrt = string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack,table.insert,table.sort local fuzzel = {} @@ -117,29 +145,34 @@ end function fuzzel.LevenshtienDistance_extended(stringa, stringb, addcost, subcost, delcost) return fuzzel.genericDistance(stringa, stringb, addcost, subcost, delcost) end +fuzzel.ld_e = fuzzel.LevenshtienDistance_extended function fuzzel.LevenshtienDistance(stringa,stringb) - return fuzzel.LevenshtienDistance_extended(stringa,stringb,1,1,1) + return fuzzel.ld_e(stringa,stringb,1,1,1) end +fuzzel.ld = fuzzel.LevenshtienDistance + ---The distance as a ratio of stringa's length function fuzzel.LevenshteinRatio(stringa,stringb) - return fuzzel.LevenshtienDistance(stringa,stringb) / strlen(stringa) + return fuzzel.ld(stringa,stringb) / strlen(stringa) end ---Almost the same as LevenshtienDistance, but considers two characters swaped as only "one" mistake +fuzzel.lr = fuzzel.LevensteinRatio + function fuzzel.DamerauLevenshtienDistance_extended(stringa, stringb, addcost, subcost, delcost, trncost) return genericDistance(stringa,stringb,addcost,subcost,delcost,true,trncost) end +fuzzel.dld_e = fuzzel.DamerauLevenshtienDistance_extended function fuzzel.DamerauLevenshtienDistance(stringa,stringb) - return fuzzel.DamerauLevenshtienDistance_extended(stringa,stringb,1,1,1,1) + return fuzzel.dld_e(stringa,stringb,1,1,1,1) end +fuzzel.dld = fuzzel.DamerauLevenshtienDistance function fuzzel.DamerauLevenshtienRatio(stringa,stringb) - return fuzzel.DamerauLevenshtienDistance(stringa,stringb) / strlen(stringa) + return fuzzel.dld(stringa,stringb) / strlen(stringa) end +fuzzel.dlr = fuzzel.DamerauLevenshtienRatio ---Purely the number of mistakes function fuzzel.HammingDistance(stringa,stringb) local len = strlen(stringa) asrt(len == strlen(stringb),"Hamming Distance cannot be calculated on two strings of different lengths:\"" .. stringa .. "\" \"" .. stringb .. "\"") @@ -149,10 +182,12 @@ function fuzzel.HammingDistance(stringa,stringb) end return dist end +fuzzel.hd = fuzzel.HammingDistance function fuzzel.HammingRatio(stringa,stringb) return fuzzel.HammingDistance(stringa,stringb) / strlen(stringa) end +fuzzel.hr = fuzzel.HammingRatio local function FuzzySearch(str,func,...) local looparg = typ(arg[1]) == "table" and arg[1] or arg @@ -168,24 +203,46 @@ local function FuzzySearch(str,func,...) return sout, tmin end -function fuzzel.FuzzySearchDistance(str,...) - return upack{FuzzySearch(str,fuzzel.DamerauLevenshtienDistance,...)} +function fuzzel.FuzzyFindDistance(str,...) + return upack{FuzzySearch(str,fuzzel.dld,...)} end +fuzzel.ffd = fuzzel.FuzzyFindDistance -function fuzzel.FuzzySearchRatio(str,...) - return upack{FuzzySearch(str,fuzzel.DamerauLevenshtienRatio,...)} +function fuzzel.FuzzyFindRatio(str,...) + return upack{FuzzySearch(str,fuzzel.dlr,...)} end ---Some easy-to-use mnemonics -fuzzel.ld_e = fuzzel.LevenshtienDistance_extended -fuzzel.ld = fuzzel.LevenshtienDistance -fuzzel.lr = fuzzel.LevensteinRatio -fuzzel.dld_e = fuzzel.DamerauLevenshtienDistance_extended -fuzzel.dld = fuzzel.DamerauLevenshtienDistance -fuzzel.dlr = fuzzel.DamerauLevenshtienRatio -fuzzel.hd = fuzzel.HammingDistance -fuzzel.hr = fuzzel.HammingRatio -fuzzel.fsd = fuzzel.FuzzySearchDistance -fuzzel.fsr = fuzzel.FuzzySearchRatio +local function FuzzySort(str, func, ...) + local looparg = typ(arg[1]) == "table" and arg[1] or arg + local usorted = {} + for k,v in prs(looparg) do + local dist = func(str,v) + usorted[dist] = usorted[dist] or {} + tblins(usorted[dist],v) + end + local sorted = {} + for k,v in prs(usorted) do + tblins(sorted,k) + end + tblsrt(sorted) + local otbl = {} + for k,v in prs(sorted) do + for i,j in prs(usorted[v]) do + tblins(otbl,j) + end + end + return otbl +end +fuzzel.ffr = fuzzel.FuzzyFindRatio + +function fuzzel.FuzzySortDistance(str,...) + return upack{FuzzySort(str,fuzzel.dld,...)} +end +fuzzel.fsd = fuzzel.FuzzySortDistance + +function fuzzel.FuzzySortRatio(str,...) + return upack{FuzzySort(str,fuzzel.dlr,...)} +end +fuzzel.fsr = fuzzel.FuzzySortRatio return fuzzel diff --git a/fuzzel_min.lua b/fuzzel_min.lua index 4e845b3..2482a7a 100644 --- a/fuzzel_min.lua +++ b/fuzzel_min.lua @@ -1,2 +1 @@ ---This file has been minified! For the original, see cogarr.net/source/ -local a,b,c,d,e,f,g,h=string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack;local i={}local function j(k,l,m,n,o,...)local p,q=a(k),a(l)local r={}for s=0,p do r[s]={}for t=0,q do r[s][t]=0 end end;for s=1,p do r[s][0]=s end;for t=1,q do r[0][t]=t end;for t=1,q do for s=1,p do local u=b(k,s)local v=b(l,t)r[s][t]=c(r[s-1][t]+o,r[s][t-1]+m,r[s-1][t-1]+(u==v and 0 or n))if arg[1]and s>1 and t>1 and u==b(l,t-1)and b(k,s-1)==v then r[s][t]=c(r[s][t],r[s-2][t-2]+(u==v and 0 or arg[2]))end end end;return r[p][q]end;function i.LevenshtienDistance_extended(k,l,m,n,o)return i.genericDistance(k,l,m,n,o)end;function i.LevenshtienDistance(k,l)return i.LevenshtienDistance_extended(k,l,1,1,1)end;function i.LevenshteinRatio(k,l)return i.LevenshtienDistance(k,l)/a(k)end;function i.DamerauLevenshtienDistance_extended(k,l,m,n,o,w)return j(k,l,m,n,o,true,w)end;function i.DamerauLevenshtienDistance(k,l)return i.DamerauLevenshtienDistance_extended(k,l,1,1,1,1)end;function i.DamerauLevenshtienRatio(k,l)return i.DamerauLevenshtienDistance(k,l)/a(k)end;function i.HammingDistance(k,l)local x=a(k)d(x==a(l),"Hamming Distance cannot be calculated on two strings of different lengths:\""..k.."\" \""..l.."\"")local y=0;for s=1,x do y=y+(b(k,s)~=b(l,s)and 1)end;return y end;function i.HammingRatio(k,l)return i.HammingDistance(k,l)/a(k)end;local function z(A,B,...)local C=g(arg[1])=="table"and arg[1]or arg;local D=B(C[1],A)local E=C[1]for F,G in e(C)do local H=B(G,A)if H1 and v>1 and w==b(n,v-1)and b(m,u-1)==x then t[u][v]=c(t[u][v],t[u-2][v-2]+(w==x and 0 or arg[2]))end end end;return t[r][s]end;function k.LevenshtienDistance_extended(m,n,o,p,q)return k.genericDistance(m,n,o,p,q)end;k.ld_e=k.LevenshtienDistance_extended;function k.LevenshtienDistance(m,n)return k.ld_e(m,n,1,1,1)end;k.ld=k.LevenshtienDistance;function k.LevenshteinRatio(m,n)return k.ld(m,n)/a(m)end;k.lr=k.LevensteinRatio;function k.DamerauLevenshtienDistance_extended(m,n,o,p,q,y)return l(m,n,o,p,q,true,y)end;k.dld_e=k.DamerauLevenshtienDistance_extended;function k.DamerauLevenshtienDistance(m,n)return k.dld_e(m,n,1,1,1,1)end;k.dld=k.DamerauLevenshtienDistance;function k.DamerauLevenshtienRatio(m,n)return k.dld(m,n)/a(m)end;k.dlr=k.DamerauLevenshtienRatio;function k.HammingDistance(m,n)local z=a(m)d(z==a(n),"Hamming Distance cannot be calculated on two strings of different lengths:\""..m.."\" \""..n.."\"")local A=0;for u=1,z do A=A+(b(m,u)~=b(n,u)and 1)end;return A end;k.hd=k.HammingDistance;function k.HammingRatio(m,n)return k.HammingDistance(m,n)/a(m)end;k.hr=k.HammingRatio;local function B(C,D,...)local E=g(arg[1])=="table"and arg[1]or arg;local F=D(E[1],C)local G=E[1]for H,I in e(E)do local J=D(I,C)if J