aboutsummaryrefslogtreecommitdiff
path: root/fuzzel.lua
diff options
context:
space:
mode:
authorAlexander Pickering <alexandermpickering@gmail.com>2016-06-05 20:48:01 -0400
committerAlexander Pickering <alexandermpickering@gmail.com>2016-06-05 20:48:01 -0400
commit9294725b11e8a0c35c630749cc52b108b76857ec (patch)
tree654aed1950c133a8dad4f9e1f75c295e45baa199 /fuzzel.lua
parent55674013e729a8ecb929bf5060e53859bd44b997 (diff)
downloadfuzzel-9294725b11e8a0c35c630749cc52b108b76857ec.tar.gz
fuzzel-9294725b11e8a0c35c630749cc52b108b76857ec.tar.bz2
fuzzel-9294725b11e8a0c35c630749cc52b108b76857ec.zip
Added Fuzzy sorting functions
Diffstat (limited to 'fuzzel.lua')
-rw-r--r--fuzzel.lua127
1 files changed, 92 insertions, 35 deletions
diff --git a/fuzzel.lua b/fuzzel.lua
index bc90984..0496a68 100644
--- a/fuzzel.lua
+++ b/fuzzel.lua
@@ -38,38 +38,66 @@
The hamming distance divided by the length of the first string
returns number_ratio
- fuzzel.FuzzySearchDistance(string_needle, vararg_in)
+ fuzzel.FuzzyFindDistance(string_needle, vararg_in)
in may be either a table, or a list of arguments. fuzzel.FuzzySearchDistance will find the string that most closely resembles needle, based on Damerau-Levenshtien Distance
returns string_closest, number_distance
- fuzzel.FuzzySearchRatio(string_needle, vararg_in)
+ fuzzel.FuzzyFindRatio(string_needle, vararg_in)
in may be either a table, or a list of arguments. Same as above, except it returns the string with the closest Damerau-Levenshtien ratio.
returns string_closest, nubmer_ratio
+ fuzzel.FuzzySortDistance(string_needle, vararg_in)
+ Sorts either the table, or the arguments, and returns a table. Uses Damerau-Levenshtien Distance
+ returns table_sorted
+
+ fuzzel.FuzzySortRatio(string needle, vararg_in)
+ Same as above, but uses Damerau-Levenshtien Ratio instead
+ returns table_sorted
+
Example:
Returns a function that will return the closest string to the string it was passed
-----------------FuzzelExample.lua------------------
+ --Include the module
local fuzzel = dofile("fuzzel.lua")
- function suggestoption(tbl_options)
- return function(str)
- local closest = fuzzel.FuzzySearchDistance(str,tbl_options)
- return closest
- end
- end
-
+ --A couple of options
local options = {
"Fat Cat",
"Lazy Dog",
"Brown Fox",
}
- local suggestfunc = suggestoption(options)
- suggestfunc("Brown Cat")
- -------------End FuzzelExample.lua------------------
+ --And use it, to see what option closest matches "Brown Cat"
+ print("\"Lulzy Cat\" is close to \"" .. fuzzel.FuzzyFindDistance("Lulzy Cat",options) .. "\"")
+ --Sort the options to see the order in which they most closely match "Brown Cat"
+ print("\"Frag God\" is closest to:")
+ for k,v in ipairs(fuzzel.FuzzySortRatio("Frag God",options)) do
+ print(k .. "\t:\t" .. v)
+ end
+ -------------End FuzzelExample.lua------------------
+ Outputs:
+ "Lulzy Cat" is close to "Fat Cat"
+ "Frag God" is closest to:
+ 1 : Fat Cat
+ 2 : Lazy Dog
+ 3 : Brown Fox
+
+ Some easy-to-use mnemonics
+ fuzzel.ld_e = fuzzel.LevenshtienDistance_extended
+ fuzzel.ld = fuzzel.LevenshtienDistance
+ fuzzel.lr = fuzzel.LevensteinRatio
+ fuzzel.dld_e = fuzzel.DamerauLevenshtienDistance_extended
+ fuzzel.dld = fuzzel.DamerauLevenshtienDistance
+ fuzzel.dlr = fuzzel.DamerauLevenshtienRatio
+ fuzzel.hd = fuzzel.HammingDistance
+ fuzzel.hr = fuzzel.HammingRatio
+ fuzzel.ffd = fuzzel.FuzzyFindDistance
+ fuzzel.ffr = fuzzel.FuzzyFindRatio
+ fuzzel.fsd = fuzzel.FuzzySortDistance
+ fuzzel.fsr = fuzzel.FuzzySortRatio
]]
-local strlen,chrat,min,asrt,prs,iprs,typ,upack = string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack
+local strlen,chrat,min,asrt,prs,iprs,typ,upack,tblins,tblsrt = string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack,table.insert,table.sort
local fuzzel = {}
@@ -117,29 +145,34 @@ end
function fuzzel.LevenshtienDistance_extended(stringa, stringb, addcost, subcost, delcost)
return fuzzel.genericDistance(stringa, stringb, addcost, subcost, delcost)
end
+fuzzel.ld_e = fuzzel.LevenshtienDistance_extended
function fuzzel.LevenshtienDistance(stringa,stringb)
- return fuzzel.LevenshtienDistance_extended(stringa,stringb,1,1,1)
+ return fuzzel.ld_e(stringa,stringb,1,1,1)
end
+fuzzel.ld = fuzzel.LevenshtienDistance
+
---The distance as a ratio of stringa's length
function fuzzel.LevenshteinRatio(stringa,stringb)
- return fuzzel.LevenshtienDistance(stringa,stringb) / strlen(stringa)
+ return fuzzel.ld(stringa,stringb) / strlen(stringa)
end
---Almost the same as LevenshtienDistance, but considers two characters swaped as only "one" mistake
+fuzzel.lr = fuzzel.LevensteinRatio
+
function fuzzel.DamerauLevenshtienDistance_extended(stringa, stringb, addcost, subcost, delcost, trncost)
return genericDistance(stringa,stringb,addcost,subcost,delcost,true,trncost)
end
+fuzzel.dld_e = fuzzel.DamerauLevenshtienDistance_extended
function fuzzel.DamerauLevenshtienDistance(stringa,stringb)
- return fuzzel.DamerauLevenshtienDistance_extended(stringa,stringb,1,1,1,1)
+ return fuzzel.dld_e(stringa,stringb,1,1,1,1)
end
+fuzzel.dld = fuzzel.DamerauLevenshtienDistance
function fuzzel.DamerauLevenshtienRatio(stringa,stringb)
- return fuzzel.DamerauLevenshtienDistance(stringa,stringb) / strlen(stringa)
+ return fuzzel.dld(stringa,stringb) / strlen(stringa)
end
+fuzzel.dlr = fuzzel.DamerauLevenshtienRatio
---Purely the number of mistakes
function fuzzel.HammingDistance(stringa,stringb)
local len = strlen(stringa)
asrt(len == strlen(stringb),"Hamming Distance cannot be calculated on two strings of different lengths:\"" .. stringa .. "\" \"" .. stringb .. "\"")
@@ -149,10 +182,12 @@ function fuzzel.HammingDistance(stringa,stringb)
end
return dist
end
+fuzzel.hd = fuzzel.HammingDistance
function fuzzel.HammingRatio(stringa,stringb)
return fuzzel.HammingDistance(stringa,stringb) / strlen(stringa)
end
+fuzzel.hr = fuzzel.HammingRatio
local function FuzzySearch(str,func,...)
local looparg = typ(arg[1]) == "table" and arg[1] or arg
@@ -168,24 +203,46 @@ local function FuzzySearch(str,func,...)
return sout, tmin
end
-function fuzzel.FuzzySearchDistance(str,...)
- return upack{FuzzySearch(str,fuzzel.DamerauLevenshtienDistance,...)}
+function fuzzel.FuzzyFindDistance(str,...)
+ return upack{FuzzySearch(str,fuzzel.dld,...)}
end
+fuzzel.ffd = fuzzel.FuzzyFindDistance
-function fuzzel.FuzzySearchRatio(str,...)
- return upack{FuzzySearch(str,fuzzel.DamerauLevenshtienRatio,...)}
+function fuzzel.FuzzyFindRatio(str,...)
+ return upack{FuzzySearch(str,fuzzel.dlr,...)}
end
---Some easy-to-use mnemonics
-fuzzel.ld_e = fuzzel.LevenshtienDistance_extended
-fuzzel.ld = fuzzel.LevenshtienDistance
-fuzzel.lr = fuzzel.LevensteinRatio
-fuzzel.dld_e = fuzzel.DamerauLevenshtienDistance_extended
-fuzzel.dld = fuzzel.DamerauLevenshtienDistance
-fuzzel.dlr = fuzzel.DamerauLevenshtienRatio
-fuzzel.hd = fuzzel.HammingDistance
-fuzzel.hr = fuzzel.HammingRatio
-fuzzel.fsd = fuzzel.FuzzySearchDistance
-fuzzel.fsr = fuzzel.FuzzySearchRatio
+local function FuzzySort(str, func, ...)
+ local looparg = typ(arg[1]) == "table" and arg[1] or arg
+ local usorted = {}
+ for k,v in prs(looparg) do
+ local dist = func(str,v)
+ usorted[dist] = usorted[dist] or {}
+ tblins(usorted[dist],v)
+ end
+ local sorted = {}
+ for k,v in prs(usorted) do
+ tblins(sorted,k)
+ end
+ tblsrt(sorted)
+ local otbl = {}
+ for k,v in prs(sorted) do
+ for i,j in prs(usorted[v]) do
+ tblins(otbl,j)
+ end
+ end
+ return otbl
+end
+fuzzel.ffr = fuzzel.FuzzyFindRatio
+
+function fuzzel.FuzzySortDistance(str,...)
+ return upack{FuzzySort(str,fuzzel.dld,...)}
+end
+fuzzel.fsd = fuzzel.FuzzySortDistance
+
+function fuzzel.FuzzySortRatio(str,...)
+ return upack{FuzzySort(str,fuzzel.dlr,...)}
+end
+fuzzel.fsr = fuzzel.FuzzySortRatio
return fuzzel