diff options
| author | Alexander Pickering <alexandermpickering@gmail.com> | 2016-06-05 20:48:01 -0400 |
|---|---|---|
| committer | Alexander Pickering <alexandermpickering@gmail.com> | 2016-06-05 20:48:01 -0400 |
| commit | 9294725b11e8a0c35c630749cc52b108b76857ec (patch) | |
| tree | 654aed1950c133a8dad4f9e1f75c295e45baa199 | |
| parent | 55674013e729a8ecb929bf5060e53859bd44b997 (diff) | |
| download | fuzzel-9294725b11e8a0c35c630749cc52b108b76857ec.tar.gz fuzzel-9294725b11e8a0c35c630749cc52b108b76857ec.tar.bz2 fuzzel-9294725b11e8a0c35c630749cc52b108b76857ec.zip | |
Added Fuzzy sorting functions
| -rw-r--r-- | FuzzelExample.lua | 23 | ||||
| -rw-r--r-- | fuzzel.lua | 127 | ||||
| -rw-r--r-- | fuzzel_min.lua | 3 |
3 files changed, 93 insertions, 60 deletions
diff --git a/FuzzelExample.lua b/FuzzelExample.lua deleted file mode 100644 index 5bf62cf..0000000 --- a/FuzzelExample.lua +++ /dev/null @@ -1,23 +0,0 @@ ---Include the module -local fuzzel = dofile("fuzzel.lua") - ---A function that takes a table and returns a function that takes a string, and returns the closesting match in the table. -function suggestoption(tbl_options) - return function(str) - local closest = fuzzel.FuzzySearchDistance(str,tbl_options) - return closest - end -end - ---A couple of options -local options = { - "Fat Cat", - "Lazy Dog", - "Brown Fox", -} - ---Create the function, with our options -local suggestfunc = suggestoption(options) - ---And use it, to see what option closest matches "Brown Cat" -print(suggestfunc("Brown Cat")) @@ -38,38 +38,66 @@ The hamming distance divided by the length of the first string returns number_ratio - fuzzel.FuzzySearchDistance(string_needle, vararg_in) + fuzzel.FuzzyFindDistance(string_needle, vararg_in) in may be either a table, or a list of arguments. fuzzel.FuzzySearchDistance will find the string that most closely resembles needle, based on Damerau-Levenshtien Distance returns string_closest, number_distance - fuzzel.FuzzySearchRatio(string_needle, vararg_in) + fuzzel.FuzzyFindRatio(string_needle, vararg_in) in may be either a table, or a list of arguments. Same as above, except it returns the string with the closest Damerau-Levenshtien ratio. returns string_closest, nubmer_ratio + fuzzel.FuzzySortDistance(string_needle, vararg_in) + Sorts either the table, or the arguments, and returns a table. Uses Damerau-Levenshtien Distance + returns table_sorted + + fuzzel.FuzzySortRatio(string needle, vararg_in) + Same as above, but uses Damerau-Levenshtien Ratio instead + returns table_sorted + Example: Returns a function that will return the closest string to the string it was passed -----------------FuzzelExample.lua------------------ + --Include the module local fuzzel = dofile("fuzzel.lua") - function suggestoption(tbl_options) - return function(str) - local closest = fuzzel.FuzzySearchDistance(str,tbl_options) - return closest - end - end - + --A couple of options local options = { "Fat Cat", "Lazy Dog", "Brown Fox", } - local suggestfunc = suggestoption(options) - suggestfunc("Brown Cat") - -------------End FuzzelExample.lua------------------ + --And use it, to see what option closest matches "Brown Cat" + print("\"Lulzy Cat\" is close to \"" .. fuzzel.FuzzyFindDistance("Lulzy Cat",options) .. "\"") + --Sort the options to see the order in which they most closely match "Brown Cat" + print("\"Frag God\" is closest to:") + for k,v in ipairs(fuzzel.FuzzySortRatio("Frag God",options)) do + print(k .. "\t:\t" .. v) + end + -------------End FuzzelExample.lua------------------ + Outputs: + "Lulzy Cat" is close to "Fat Cat" + "Frag God" is closest to: + 1 : Fat Cat + 2 : Lazy Dog + 3 : Brown Fox + + Some easy-to-use mnemonics + fuzzel.ld_e = fuzzel.LevenshtienDistance_extended + fuzzel.ld = fuzzel.LevenshtienDistance + fuzzel.lr = fuzzel.LevensteinRatio + fuzzel.dld_e = fuzzel.DamerauLevenshtienDistance_extended + fuzzel.dld = fuzzel.DamerauLevenshtienDistance + fuzzel.dlr = fuzzel.DamerauLevenshtienRatio + fuzzel.hd = fuzzel.HammingDistance + fuzzel.hr = fuzzel.HammingRatio + fuzzel.ffd = fuzzel.FuzzyFindDistance + fuzzel.ffr = fuzzel.FuzzyFindRatio + fuzzel.fsd = fuzzel.FuzzySortDistance + fuzzel.fsr = fuzzel.FuzzySortRatio ]] -local strlen,chrat,min,asrt,prs,iprs,typ,upack = string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack +local strlen,chrat,min,asrt,prs,iprs,typ,upack,tblins,tblsrt = string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack,table.insert,table.sort local fuzzel = {} @@ -117,29 +145,34 @@ end function fuzzel.LevenshtienDistance_extended(stringa, stringb, addcost, subcost, delcost) return fuzzel.genericDistance(stringa, stringb, addcost, subcost, delcost) end +fuzzel.ld_e = fuzzel.LevenshtienDistance_extended function fuzzel.LevenshtienDistance(stringa,stringb) - return fuzzel.LevenshtienDistance_extended(stringa,stringb,1,1,1) + return fuzzel.ld_e(stringa,stringb,1,1,1) end +fuzzel.ld = fuzzel.LevenshtienDistance + ---The distance as a ratio of stringa's length function fuzzel.LevenshteinRatio(stringa,stringb) - return fuzzel.LevenshtienDistance(stringa,stringb) / strlen(stringa) + return fuzzel.ld(stringa,stringb) / strlen(stringa) end ---Almost the same as LevenshtienDistance, but considers two characters swaped as only "one" mistake +fuzzel.lr = fuzzel.LevensteinRatio + function fuzzel.DamerauLevenshtienDistance_extended(stringa, stringb, addcost, subcost, delcost, trncost) return genericDistance(stringa,stringb,addcost,subcost,delcost,true,trncost) end +fuzzel.dld_e = fuzzel.DamerauLevenshtienDistance_extended function fuzzel.DamerauLevenshtienDistance(stringa,stringb) - return fuzzel.DamerauLevenshtienDistance_extended(stringa,stringb,1,1,1,1) + return fuzzel.dld_e(stringa,stringb,1,1,1,1) end +fuzzel.dld = fuzzel.DamerauLevenshtienDistance function fuzzel.DamerauLevenshtienRatio(stringa,stringb) - return fuzzel.DamerauLevenshtienDistance(stringa,stringb) / strlen(stringa) + return fuzzel.dld(stringa,stringb) / strlen(stringa) end +fuzzel.dlr = fuzzel.DamerauLevenshtienRatio ---Purely the number of mistakes function fuzzel.HammingDistance(stringa,stringb) local len = strlen(stringa) asrt(len == strlen(stringb),"Hamming Distance cannot be calculated on two strings of different lengths:\"" .. stringa .. "\" \"" .. stringb .. "\"") @@ -149,10 +182,12 @@ function fuzzel.HammingDistance(stringa,stringb) end return dist end +fuzzel.hd = fuzzel.HammingDistance function fuzzel.HammingRatio(stringa,stringb) return fuzzel.HammingDistance(stringa,stringb) / strlen(stringa) end +fuzzel.hr = fuzzel.HammingRatio local function FuzzySearch(str,func,...) local looparg = typ(arg[1]) == "table" and arg[1] or arg @@ -168,24 +203,46 @@ local function FuzzySearch(str,func,...) return sout, tmin end -function fuzzel.FuzzySearchDistance(str,...) - return upack{FuzzySearch(str,fuzzel.DamerauLevenshtienDistance,...)} +function fuzzel.FuzzyFindDistance(str,...) + return upack{FuzzySearch(str,fuzzel.dld,...)} end +fuzzel.ffd = fuzzel.FuzzyFindDistance -function fuzzel.FuzzySearchRatio(str,...) - return upack{FuzzySearch(str,fuzzel.DamerauLevenshtienRatio,...)} +function fuzzel.FuzzyFindRatio(str,...) + return upack{FuzzySearch(str,fuzzel.dlr,...)} end ---Some easy-to-use mnemonics -fuzzel.ld_e = fuzzel.LevenshtienDistance_extended -fuzzel.ld = fuzzel.LevenshtienDistance -fuzzel.lr = fuzzel.LevensteinRatio -fuzzel.dld_e = fuzzel.DamerauLevenshtienDistance_extended -fuzzel.dld = fuzzel.DamerauLevenshtienDistance -fuzzel.dlr = fuzzel.DamerauLevenshtienRatio -fuzzel.hd = fuzzel.HammingDistance -fuzzel.hr = fuzzel.HammingRatio -fuzzel.fsd = fuzzel.FuzzySearchDistance -fuzzel.fsr = fuzzel.FuzzySearchRatio +local function FuzzySort(str, func, ...) + local looparg = typ(arg[1]) == "table" and arg[1] or arg + local usorted = {} + for k,v in prs(looparg) do + local dist = func(str,v) + usorted[dist] = usorted[dist] or {} + tblins(usorted[dist],v) + end + local sorted = {} + for k,v in prs(usorted) do + tblins(sorted,k) + end + tblsrt(sorted) + local otbl = {} + for k,v in prs(sorted) do + for i,j in prs(usorted[v]) do + tblins(otbl,j) + end + end + return otbl +end +fuzzel.ffr = fuzzel.FuzzyFindRatio + +function fuzzel.FuzzySortDistance(str,...) + return upack{FuzzySort(str,fuzzel.dld,...)} +end +fuzzel.fsd = fuzzel.FuzzySortDistance + +function fuzzel.FuzzySortRatio(str,...) + return upack{FuzzySort(str,fuzzel.dlr,...)} +end +fuzzel.fsr = fuzzel.FuzzySortRatio return fuzzel diff --git a/fuzzel_min.lua b/fuzzel_min.lua index 4e845b3..2482a7a 100644 --- a/fuzzel_min.lua +++ b/fuzzel_min.lua @@ -1,2 +1 @@ ---This file has been minified! For the original, see cogarr.net/source/ -local a,b,c,d,e,f,g,h=string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack;local i={}local function j(k,l,m,n,o,...)local p,q=a(k),a(l)local r={}for s=0,p do r[s]={}for t=0,q do r[s][t]=0 end end;for s=1,p do r[s][0]=s end;for t=1,q do r[0][t]=t end;for t=1,q do for s=1,p do local u=b(k,s)local v=b(l,t)r[s][t]=c(r[s-1][t]+o,r[s][t-1]+m,r[s-1][t-1]+(u==v and 0 or n))if arg[1]and s>1 and t>1 and u==b(l,t-1)and b(k,s-1)==v then r[s][t]=c(r[s][t],r[s-2][t-2]+(u==v and 0 or arg[2]))end end end;return r[p][q]end;function i.LevenshtienDistance_extended(k,l,m,n,o)return i.genericDistance(k,l,m,n,o)end;function i.LevenshtienDistance(k,l)return i.LevenshtienDistance_extended(k,l,1,1,1)end;function i.LevenshteinRatio(k,l)return i.LevenshtienDistance(k,l)/a(k)end;function i.DamerauLevenshtienDistance_extended(k,l,m,n,o,w)return j(k,l,m,n,o,true,w)end;function i.DamerauLevenshtienDistance(k,l)return i.DamerauLevenshtienDistance_extended(k,l,1,1,1,1)end;function i.DamerauLevenshtienRatio(k,l)return i.DamerauLevenshtienDistance(k,l)/a(k)end;function i.HammingDistance(k,l)local x=a(k)d(x==a(l),"Hamming Distance cannot be calculated on two strings of different lengths:\""..k.."\" \""..l.."\"")local y=0;for s=1,x do y=y+(b(k,s)~=b(l,s)and 1)end;return y end;function i.HammingRatio(k,l)return i.HammingDistance(k,l)/a(k)end;local function z(A,B,...)local C=g(arg[1])=="table"and arg[1]or arg;local D=B(C[1],A)local E=C[1]for F,G in e(C)do local H=B(G,A)if H<D then D=H;E=G end end;return E,D end;function i.FuzzySearchDistance(A,...)return h{z(A,i.DamerauLevenshtienDistance,...)}end;function i.FuzzySearchRatio(A,...)return h{z(A,i.DamerauLevenshtienRatio,...)}end;i.ld_e=i.LevenshtienDistance_extended;i.ld=i.LevenshtienDistance;i.lr=i.LevensteinRatio;i.dld_e=i.DamerauLevenshtienDistance_extended;i.dld=i.DamerauLevenshtienDistance;i.dlr=i.DamerauLevenshtienRatio;i.hd=i.HammingDistance;i.hr=i.HammingRatio;i.fsd=i.FuzzySearchDistance;i.fsr=i.FuzzySearchRatio;return i +local a,b,c,d,e,f,g,h,i,j=string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack,table.insert,table.sort;local k={}local function l(m,n,o,p,q,...)local r,s=a(m),a(n)local t={}for u=0,r do t[u]={}for v=0,s do t[u][v]=0 end end;for u=1,r do t[u][0]=u end;for v=1,s do t[0][v]=v end;for v=1,s do for u=1,r do local w=b(m,u)local x=b(n,v)t[u][v]=c(t[u-1][v]+q,t[u][v-1]+o,t[u-1][v-1]+(w==x and 0 or p))if arg[1]and u>1 and v>1 and w==b(n,v-1)and b(m,u-1)==x then t[u][v]=c(t[u][v],t[u-2][v-2]+(w==x and 0 or arg[2]))end end end;return t[r][s]end;function k.LevenshtienDistance_extended(m,n,o,p,q)return k.genericDistance(m,n,o,p,q)end;k.ld_e=k.LevenshtienDistance_extended;function k.LevenshtienDistance(m,n)return k.ld_e(m,n,1,1,1)end;k.ld=k.LevenshtienDistance;function k.LevenshteinRatio(m,n)return k.ld(m,n)/a(m)end;k.lr=k.LevensteinRatio;function k.DamerauLevenshtienDistance_extended(m,n,o,p,q,y)return l(m,n,o,p,q,true,y)end;k.dld_e=k.DamerauLevenshtienDistance_extended;function k.DamerauLevenshtienDistance(m,n)return k.dld_e(m,n,1,1,1,1)end;k.dld=k.DamerauLevenshtienDistance;function k.DamerauLevenshtienRatio(m,n)return k.dld(m,n)/a(m)end;k.dlr=k.DamerauLevenshtienRatio;function k.HammingDistance(m,n)local z=a(m)d(z==a(n),"Hamming Distance cannot be calculated on two strings of different lengths:\""..m.."\" \""..n.."\"")local A=0;for u=1,z do A=A+(b(m,u)~=b(n,u)and 1)end;return A end;k.hd=k.HammingDistance;function k.HammingRatio(m,n)return k.HammingDistance(m,n)/a(m)end;k.hr=k.HammingRatio;local function B(C,D,...)local E=g(arg[1])=="table"and arg[1]or arg;local F=D(E[1],C)local G=E[1]for H,I in e(E)do local J=D(I,C)if J<F then F=J;G=I end end;return G,F end;function k.FuzzyFindDistance(C,...)return h{B(C,k.dld,...)}end;k.ffd=k.FuzzyFindDistance;function k.FuzzyFindRatio(C,...)return h{B(C,k.dlr,...)}end;local function K(C,D,...)local E=g(arg[1])=="table"and arg[1]or arg;local L={}for H,I in e(E)do local A=D(C,I)L[A]=L[A]or{}i(L[A],I)end;local M={}for H,I in e(L)do i(M,H)end;j(M)local N={}for H,I in e(M)do for u,v in e(L[I])do i(N,v)end end;return N end;k.ffr=k.FuzzyFindRatio;function k.FuzzySortDistance(C,...)return h{K(C,k.dld,...)}end;k.fsd=k.FuzzySortDistance;function k.FuzzySortRatio(C,...)return h{K(C,k.dlr,...)}end;k.fsr=k.FuzzySortRatio;return k |
