diff options
| author | Alexander Pickering <alexandermpickering@gmail.com> | 2016-07-10 17:04:29 -0400 |
|---|---|---|
| committer | Alexander Pickering <alexandermpickering@gmail.com> | 2016-07-10 17:04:29 -0400 |
| commit | 1de5f9ac6f038bfed2230cc1272b253794b2f41a (patch) | |
| tree | 15bc9d515f1f48c036522afb7cc71f60243849a9 /gamemode/shared/fuzzel.lua | |
| download | artery-1de5f9ac6f038bfed2230cc1272b253794b2f41a.tar.gz artery-1de5f9ac6f038bfed2230cc1272b253794b2f41a.tar.bz2 artery-1de5f9ac6f038bfed2230cc1272b253794b2f41a.zip | |
Initial commit
Diffstat (limited to 'gamemode/shared/fuzzel.lua')
| -rw-r--r-- | gamemode/shared/fuzzel.lua | 286 |
1 files changed, 286 insertions, 0 deletions
diff --git a/gamemode/shared/fuzzel.lua b/gamemode/shared/fuzzel.lua new file mode 100644 index 0000000..6aad45f --- /dev/null +++ b/gamemode/shared/fuzzel.lua @@ -0,0 +1,286 @@ +--[[ + Fuzzel v1.3 - Alexander "Apickx" Pickering + Entered into the public domain June 2, 2016 + You are not required to, but consider putting a link to the source in your file's comments! + + Some helper functions for calculateing distance between two strings + + Provides: + fuzzel.LevenshteinDistance_extended(string_first, string_second, number_addcost, number_substituecost, number_deletecost) + Calculates the Levenshtein Distance between two strings, useing the costs given. "Real" Levenshtein Distance uses values 1,1,1 for costs. + returns number_distance + + fuzzel.LevenshteinDistance(string_first, strings_second) + Calculates the "real" Levenshtein Distance + returns number_distance + + fuzzel.LevensteinRatio(string_first, string_second) + The Levenshtein Ratio divided by the first string's length. Useing a ratio is a decent way to determin if a spelling is "close enough" + returns number_distance + + fuzzel.DamerauLevenshteinDistance_extended(string_first, string_second, number_addcost, number_substituecost, number_deletecost, number_transpositioncost) + Damerau-Levenshtein Distance is almost exactly like Levenshtein Distance, with the caveat that two letters next to each other, with swapped positions only counts as "one" cost (in "real" Damerau-Levenshtein Distance) + returns number + + fuzzel.DamerauLevenshteinDistance(stirng_first, strings_second) + Calculates the "real" Damerau-Levenshtein Distance + returns number + + fuzzel.DamerauLevenshteinRatio(string_first, string_second) + The Damerau-Levenshtein Distance divided by the first string's length + returns number_ratio + + fuzzel.HammingDistance(string_first, string_second) + Purely the number of substitutions needed to change one string into another. Note that both strings must be the same length. + returns number_distance + + fuzzel.HammingRatio(string_first, string_second) + The hamming distance divided by the length of the first string + returns number_ratio + + fuzzel.FuzzyFindDistance(string_needle, vararg_in) + in may be either a table, or a list of arguments. fuzzel.FuzzySearchDistance will find the string that most closely resembles needle, based on Damerau-Levenshtein Distance. If multiple options have the same distance, it will return the first one encountered (This may not be in any sort of order!) + returns string_closest, number_distance + + fuzzel.FuzzyFindRatio(string_needle, vararg_in) + in may be either a table, or a list of arguments. Same as above, except it returns the string with the closest Damerau-Levenshtein ratio. + returns string_closest, nubmer_ratio + + fuzzel.FuzzySortDistance(string_needle, vararg_in) + Sorts either the table, or the arguments, and returns a table. Uses Damerau-Levenshtein Distance + returns table_sorted + + fuzzel.FuzzySortRatio(string needle, vararg_in) + Same as above, but uses Damerau-Levenshtein Ratio instead + returns table_sorted + + fuzzel.FuzzyAutocompleteDistance(string_needle, vararg in) + vararg_in can be either a table, or a list of entries, this will fuzzy sort based on the length of the input, which makes it better at autocompletion than fuzzy sorting. Uses Damerau-Levenshtein Distance. + returns table_sorted + + fuzzel.FuzzyAutocompleteRatio(string_needle, vararg_in) + Same as above, but uses DamerauLevenshteinRatio + returns table_sorted + + Example: + Returns a function that will return the closest string to the string it was passed + -----------------FuzzelExample.lua------------------ + --Include the module + local fuzzel = require("fuzzel.lua") + + --A couple of options + local options = { + "Fat Cat", + "Lazy Dog", + "Brown Fox", + } + + --And use it, to see what option closest matches "Lulzy Cat" + local close,distance = fuzzel.FuzzyFindDistance("Lulzy Cat", options) + print("\"Lulzy Cat\" is close to \"" .. close .. "\", distance:" .. distance) + + --Sort the options to see the order in which they most closely match "Frag God" + print("\"Frag God\" is closest to:") + for k,v in ipairs(fuzzel.FuzzySortRatio("Frag God",options)) do + print(k .. "\t:\t" .. v) + end + -------------End FuzzelExample.lua------------------ + Outputs: + "Lulzy Cat" is close to "Fat Cat" + "Frag God" is closest to: + 1 : Fat Cat + 2 : Lazy Dog + 3 : Brown Fox + + Some easy-to-use mnemonics + fuzzel.ld_e = fuzzel.LevenshteinDistance_extended + fuzzel.ld = fuzzel.LevenshteinDistance + fuzzel.lr = fuzzel.LevensteinRatio + fuzzel.dld_e = fuzzel.DamerauLevenshteinDistance_extended + fuzzel.dld = fuzzel.DamerauLevenshteinDistance + fuzzel.dlr = fuzzel.DamerauLevenshteinRatio + fuzzel.hd = fuzzel.HammingDistance + fuzzel.hr = fuzzel.HammingRatio + fuzzel.ffd = fuzzel.FuzzyFindDistance + fuzzel.ffr = fuzzel.FuzzyFindRatio + fuzzel.fsd = fuzzel.FuzzySortDistance + fuzzel.fsr = fuzzel.FuzzySortRatio + fuzzel.fad = fuzzel.FuzzyAutocompleteDistance + fuzzel.far = fuzzel.FuzzyAutocompleteRatio + +]]--You probably don't want to touch anything past this point + +--Assign locals to these to the minifier can compress the file better +local strlen,chrat,min,asrt,prs,iprs,typ,upack,tblins,tblsrt,strsub,tru,fal = string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack,table.insert,table.sort,string.sub,true,false + +local fuzzel = {} + +--A clever way to allow the minifier to minify function names, this basically just assigns variables with their string equivalent. +local da, le, di, ra, fu, fi, so, ex, ha, au = "Damerau", "Levenshtein", "Distance", "Ratio", "Fuzzy", "Find", "Sort", "_extended", "Hamming", "Autocomplete" +local LevenshteinDistance_extended,LevenshteinDistance,LevenshteinRatio,DamerauLevenshteinDistance_extended,DamerauLevenshteinDistance,DamerauLevenshteinRatio,FuzzyFindDistance,FuzzyFindRatio,FuzzySortDistance,FuzzySortRatio,HammingDistance,HammingRatio,FuzzyAutocompleteDistance,FuzzyAutocompleteRatio = le..di..ex,le..di,le..ra,da..le..di..ex,da..le..di,da..le..ra,fu..fi..di,fu..fi..ra,fu..so..di,fu..so..ra,ha..di,ha..ra,fu..au..di,fu..au..ra + +local function genericDistance( stringa, stringb, addcost, subcost, delcost, ...) + local arg = {...} + + --Length of each string + local salen, sblen = strlen(stringa), strlen(stringb) + + --Create a 0 matrix the size of len(a) x len(b) + local dyntbl = {} + for i = 0,salen do + dyntbl[i] = {} + for j = 0,sblen do + dyntbl[i][j] = 0 + end + end + + --Initalize the matrix + for i = 1,salen do + dyntbl[i][0] = i + end + for j = 1,sblen do + dyntbl[0][j] = j + end + + --And build up the matrix based on costs-so-far + for j = 1,sblen do + for i = 1,salen do + local ca,cb = chrat(stringa,i),chrat(stringb,j) + dyntbl[i][j] = min( + dyntbl[i-1][j] + delcost, --deletion + dyntbl[i][j-1] + addcost, --insertion + dyntbl[i-1][j-1] + (ca == cb and 0 or subcost) --substituion + ) + if arg[1] and i > 1 and j > 1 and ca == chrat(stringb,j-1) and chrat(stringa,i-1) == cb then + dyntbl[i][j] = min(dyntbl[i][j], + dyntbl[i-2][j-2] + (ca == cb and 0 or arg[2])) --transposition + end + end + end + + return dyntbl[salen][sblen] +end + +fuzzel[LevenshteinDistance_extended] = function(stringa, stringb, addcost, subcost, delcost) + return genericDistance(stringa, stringb, addcost, subcost, delcost) +end +fuzzel.ld_e = fuzzel[LevenshteinDistance_extended] + +fuzzel[LevenshteinDistance] = function(stringa,stringb) + return fuzzel.ld_e(stringa,stringb,1,1,1) +end +fuzzel.ld = fuzzel[LevenshteinDistance] + +fuzzel[LevenshteinRatio] = function(stringa,stringb) + return fuzzel.ld(stringa,stringb) / strlen(stringa) +end +fuzzel.lr = fuzzel[LevenshteinRatio] + +fuzzel[DamerauLevenshteinDistance_extended] = function(stringa, stringb, addcost, subcost, delcost, trncost) + return genericDistance(stringa,stringb,addcost,subcost,delcost,tru,trncost) +end +fuzzel.dld_e = fuzzel[DamerauLevenshteinDistance_extended] + +fuzzel[DamerauLevenshteinDistance] = function(stringa,stringb) + return fuzzel.dld_e(stringa,stringb,1,1,1,1) +end +fuzzel.dld = fuzzel[DamerauLevenshteinDistance] + +fuzzel[DamerauLevenshteinRatio] = function(stringa,stringb) + return fuzzel.dld(stringa,stringb) / strlen(stringa) +end +fuzzel.dlr = fuzzel[DamerauLevenshteinRatio] + +fuzzel[HammingDistance] = function(stringa,stringb) + local len,dist = strlen(stringa),0 + asrt(len == strlen(stringb), ha.." "..di.." cannot be calculated on two strings of different lengths:\"" .. stringa .. "\" \"" .. stringb .. "\"") + for i = 1,len do + dist = dist + ((chrat(stringa,i) ~= chrat(stringb,i)) and 1 or 0) + end + return dist +end +fuzzel.hd = fuzzel[HammingDistance] + +fuzzel[HammingRatio] = function(stringa,stringb) + return fuzzel.hd(stringa,stringb) / strlen(stringa) +end +fuzzel.hr = fuzzel[HammingRatio] + +local function FuzzySearch(str,func,...) + local arg = {...} + + --Allow varargs, or a table + local looparg = typ(arg[1]) == "table" and arg[1] or arg + + --Find the string with the shortest distance to the string we were supplied + local tmin,sout = func(looparg[1],str),looparg[1] + for k,v in prs(looparg) do + local t = func(v,str) + if t <= tmin then + tmin,sout = t,k + end + end + return looparg[sout], tmin +end + +fuzzel[FuzzyFindDistance] = function(str,...) + return upack{FuzzySearch(str,fuzzel.dld,...)} +end +fuzzel.ffd = fuzzel[FuzzyFindDistance] + +fuzzel[FuzzyFindRatio] = function(str,...) + return upack{FuzzySearch(str,fuzzel.dlr,...)} +end + +local function FuzzySort(str, func, short, ...) + local arg = {...} + + --allow varargs, or a table + local looparg = typ(arg[1]) == "table" and arg[1] or arg + + --Roughly sort everything by it's distance to the string + local usorted,sorted,otbl,slen = {},{},{},strlen(str) + for k,v in prs(looparg) do + local sstr = short and strsub(v,0,slen) or v + local dist = func(str,sstr) + if usorted[dist] == nil then + usorted[dist] = {} + tblins(sorted,dist) + end + tblins(usorted[dist],v) + end + + --Actually sort them into something can can be iterated with ipairs + tblsrt(sorted) + + --Then build our output table + for k,v in iprs(sorted) do + for i,j in prs(usorted[v]) do + tblins(otbl,j) + end + end + return otbl +end +fuzzel.ffr = fuzzel[FuzzyFindRatio] + +fuzzel[FuzzySortDistance] = function(str,...) + return FuzzySort(str,fuzzel.dld,fal,...) +end +fuzzel.fsd = fuzzel[FuzzySortDistance] + +fuzzel[FuzzySortRatio] = function(str,...) + return FuzzySort(str,fuzzel.dlr,fal,...) +end +fuzzel.fsr = fuzzel[FuzzySortRatio] + +fuzzel[FuzzyAutocompleteDistance] = function(str, ...) + return FuzzySort(str,fuzzel.dld,tru,...) +end +fuzzel.fad = fuzzel[FuzzyAutocompleteDistance] + +fuzzel[FuzzyAutocompleteRatio] = function(str,...) + return FuzzySort(str,fuzzel.dlr,tru,...) +end +fuzzel.far = fuzzel[FuzzyAutocompleteRatio] + +return fuzzel |
