diff options
Diffstat (limited to 'fuzzel.lua')
| -rw-r--r-- | fuzzel.lua | 127 |
1 files changed, 92 insertions, 35 deletions
@@ -38,38 +38,66 @@ The hamming distance divided by the length of the first string returns number_ratio - fuzzel.FuzzySearchDistance(string_needle, vararg_in) + fuzzel.FuzzyFindDistance(string_needle, vararg_in) in may be either a table, or a list of arguments. fuzzel.FuzzySearchDistance will find the string that most closely resembles needle, based on Damerau-Levenshtien Distance returns string_closest, number_distance - fuzzel.FuzzySearchRatio(string_needle, vararg_in) + fuzzel.FuzzyFindRatio(string_needle, vararg_in) in may be either a table, or a list of arguments. Same as above, except it returns the string with the closest Damerau-Levenshtien ratio. returns string_closest, nubmer_ratio + fuzzel.FuzzySortDistance(string_needle, vararg_in) + Sorts either the table, or the arguments, and returns a table. Uses Damerau-Levenshtien Distance + returns table_sorted + + fuzzel.FuzzySortRatio(string needle, vararg_in) + Same as above, but uses Damerau-Levenshtien Ratio instead + returns table_sorted + Example: Returns a function that will return the closest string to the string it was passed -----------------FuzzelExample.lua------------------ + --Include the module local fuzzel = dofile("fuzzel.lua") - function suggestoption(tbl_options) - return function(str) - local closest = fuzzel.FuzzySearchDistance(str,tbl_options) - return closest - end - end - + --A couple of options local options = { "Fat Cat", "Lazy Dog", "Brown Fox", } - local suggestfunc = suggestoption(options) - suggestfunc("Brown Cat") - -------------End FuzzelExample.lua------------------ + --And use it, to see what option closest matches "Brown Cat" + print("\"Lulzy Cat\" is close to \"" .. fuzzel.FuzzyFindDistance("Lulzy Cat",options) .. "\"") + --Sort the options to see the order in which they most closely match "Brown Cat" + print("\"Frag God\" is closest to:") + for k,v in ipairs(fuzzel.FuzzySortRatio("Frag God",options)) do + print(k .. "\t:\t" .. v) + end + -------------End FuzzelExample.lua------------------ + Outputs: + "Lulzy Cat" is close to "Fat Cat" + "Frag God" is closest to: + 1 : Fat Cat + 2 : Lazy Dog + 3 : Brown Fox + + Some easy-to-use mnemonics + fuzzel.ld_e = fuzzel.LevenshtienDistance_extended + fuzzel.ld = fuzzel.LevenshtienDistance + fuzzel.lr = fuzzel.LevensteinRatio + fuzzel.dld_e = fuzzel.DamerauLevenshtienDistance_extended + fuzzel.dld = fuzzel.DamerauLevenshtienDistance + fuzzel.dlr = fuzzel.DamerauLevenshtienRatio + fuzzel.hd = fuzzel.HammingDistance + fuzzel.hr = fuzzel.HammingRatio + fuzzel.ffd = fuzzel.FuzzyFindDistance + fuzzel.ffr = fuzzel.FuzzyFindRatio + fuzzel.fsd = fuzzel.FuzzySortDistance + fuzzel.fsr = fuzzel.FuzzySortRatio ]] -local strlen,chrat,min,asrt,prs,iprs,typ,upack = string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack +local strlen,chrat,min,asrt,prs,iprs,typ,upack,tblins,tblsrt = string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack,table.insert,table.sort local fuzzel = {} @@ -117,29 +145,34 @@ end function fuzzel.LevenshtienDistance_extended(stringa, stringb, addcost, subcost, delcost) return fuzzel.genericDistance(stringa, stringb, addcost, subcost, delcost) end +fuzzel.ld_e = fuzzel.LevenshtienDistance_extended function fuzzel.LevenshtienDistance(stringa,stringb) - return fuzzel.LevenshtienDistance_extended(stringa,stringb,1,1,1) + return fuzzel.ld_e(stringa,stringb,1,1,1) end +fuzzel.ld = fuzzel.LevenshtienDistance + ---The distance as a ratio of stringa's length function fuzzel.LevenshteinRatio(stringa,stringb) - return fuzzel.LevenshtienDistance(stringa,stringb) / strlen(stringa) + return fuzzel.ld(stringa,stringb) / strlen(stringa) end ---Almost the same as LevenshtienDistance, but considers two characters swaped as only "one" mistake +fuzzel.lr = fuzzel.LevensteinRatio + function fuzzel.DamerauLevenshtienDistance_extended(stringa, stringb, addcost, subcost, delcost, trncost) return genericDistance(stringa,stringb,addcost,subcost,delcost,true,trncost) end +fuzzel.dld_e = fuzzel.DamerauLevenshtienDistance_extended function fuzzel.DamerauLevenshtienDistance(stringa,stringb) - return fuzzel.DamerauLevenshtienDistance_extended(stringa,stringb,1,1,1,1) + return fuzzel.dld_e(stringa,stringb,1,1,1,1) end +fuzzel.dld = fuzzel.DamerauLevenshtienDistance function fuzzel.DamerauLevenshtienRatio(stringa,stringb) - return fuzzel.DamerauLevenshtienDistance(stringa,stringb) / strlen(stringa) + return fuzzel.dld(stringa,stringb) / strlen(stringa) end +fuzzel.dlr = fuzzel.DamerauLevenshtienRatio ---Purely the number of mistakes function fuzzel.HammingDistance(stringa,stringb) local len = strlen(stringa) asrt(len == strlen(stringb),"Hamming Distance cannot be calculated on two strings of different lengths:\"" .. stringa .. "\" \"" .. stringb .. "\"") @@ -149,10 +182,12 @@ function fuzzel.HammingDistance(stringa,stringb) end return dist end +fuzzel.hd = fuzzel.HammingDistance function fuzzel.HammingRatio(stringa,stringb) return fuzzel.HammingDistance(stringa,stringb) / strlen(stringa) end +fuzzel.hr = fuzzel.HammingRatio local function FuzzySearch(str,func,...) local looparg = typ(arg[1]) == "table" and arg[1] or arg @@ -168,24 +203,46 @@ local function FuzzySearch(str,func,...) return sout, tmin end -function fuzzel.FuzzySearchDistance(str,...) - return upack{FuzzySearch(str,fuzzel.DamerauLevenshtienDistance,...)} +function fuzzel.FuzzyFindDistance(str,...) + return upack{FuzzySearch(str,fuzzel.dld,...)} end +fuzzel.ffd = fuzzel.FuzzyFindDistance -function fuzzel.FuzzySearchRatio(str,...) - return upack{FuzzySearch(str,fuzzel.DamerauLevenshtienRatio,...)} +function fuzzel.FuzzyFindRatio(str,...) + return upack{FuzzySearch(str,fuzzel.dlr,...)} end ---Some easy-to-use mnemonics -fuzzel.ld_e = fuzzel.LevenshtienDistance_extended -fuzzel.ld = fuzzel.LevenshtienDistance -fuzzel.lr = fuzzel.LevensteinRatio -fuzzel.dld_e = fuzzel.DamerauLevenshtienDistance_extended -fuzzel.dld = fuzzel.DamerauLevenshtienDistance -fuzzel.dlr = fuzzel.DamerauLevenshtienRatio -fuzzel.hd = fuzzel.HammingDistance -fuzzel.hr = fuzzel.HammingRatio -fuzzel.fsd = fuzzel.FuzzySearchDistance -fuzzel.fsr = fuzzel.FuzzySearchRatio +local function FuzzySort(str, func, ...) + local looparg = typ(arg[1]) == "table" and arg[1] or arg + local usorted = {} + for k,v in prs(looparg) do + local dist = func(str,v) + usorted[dist] = usorted[dist] or {} + tblins(usorted[dist],v) + end + local sorted = {} + for k,v in prs(usorted) do + tblins(sorted,k) + end + tblsrt(sorted) + local otbl = {} + for k,v in prs(sorted) do + for i,j in prs(usorted[v]) do + tblins(otbl,j) + end + end + return otbl +end +fuzzel.ffr = fuzzel.FuzzyFindRatio + +function fuzzel.FuzzySortDistance(str,...) + return upack{FuzzySort(str,fuzzel.dld,...)} +end +fuzzel.fsd = fuzzel.FuzzySortDistance + +function fuzzel.FuzzySortRatio(str,...) + return upack{FuzzySort(str,fuzzel.dlr,...)} +end +fuzzel.fsr = fuzzel.FuzzySortRatio return fuzzel |
