diff options
Diffstat (limited to 'fuzzel.lua')
| -rw-r--r-- | fuzzel.lua | 104 |
1 files changed, 54 insertions, 50 deletions
@@ -6,28 +6,28 @@ Some helper functions for calculateing distance between two strings Provides: - fuzzel.LevenshtienDistance_extended(string_first, string_second, number_addcost, number_substituecost, number_deletecost) - Calculates the Levenshtien Distance between two strings, useing the costs given. "Real" Levenshtien Distance uses values 1,1,1 for costs. + fuzzel.LevenshteinDistance_extended(string_first, string_second, number_addcost, number_substituecost, number_deletecost) + Calculates the Levenshtein Distance between two strings, useing the costs given. "Real" Levenshtein Distance uses values 1,1,1 for costs. returns number_distance - fuzzel.LevenshtienDistance(string_first, strings_second) - Calculates the "real" Levenshtien Distance + fuzzel.LevenshteinDistance(string_first, strings_second) + Calculates the "real" Levenshtein Distance returns number_distance fuzzel.LevensteinRatio(string_first, string_second) - The Levenshtien Ratio divided by the first string's length. Useing a ratio is a decent way to determin if a spelling is "close enough" + The Levenshtein Ratio divided by the first string's length. Useing a ratio is a decent way to determin if a spelling is "close enough" returns number_distance - fuzzel.DamerauLevenshtienDistance_extended(string_first, string_second, number_addcost, number_substituecost, number_deletecost, number_transpositioncost) - Damerau-Levenshtien Distance is almost exactly like Levenshtien Distance, with the caveat that two letters next to each other, with swapped positions only counts as "one" cost (in "real" Damerau-Levenshtien Distance) + fuzzel.DamerauLevenshteinDistance_extended(string_first, string_second, number_addcost, number_substituecost, number_deletecost, number_transpositioncost) + Damerau-Levenshtein Distance is almost exactly like Levenshtein Distance, with the caveat that two letters next to each other, with swapped positions only counts as "one" cost (in "real" Damerau-Levenshtein Distance) returns number - fuzzel.DamerauLevenshtienDistance(stirng_first, strings_second) - Calculates the "real" Damerau-Levenshtien Distance + fuzzel.DamerauLevenshteinDistance(stirng_first, strings_second) + Calculates the "real" Damerau-Levenshtein Distance returns number - fuzzel.DamerauLevenshtienRatio(string_first, string_second) - The Damerau-Levenshtien Distance divided by the first string's length + fuzzel.DamerauLevenshteinRatio(string_first, string_second) + The Damerau-Levenshtein Distance divided by the first string's length returns number_ratio fuzzel.HammingDistance(string_first, string_second) @@ -39,19 +39,19 @@ returns number_ratio fuzzel.FuzzyFindDistance(string_needle, vararg_in) - in may be either a table, or a list of arguments. fuzzel.FuzzySearchDistance will find the string that most closely resembles needle, based on Damerau-Levenshtien Distance + in may be either a table, or a list of arguments. fuzzel.FuzzySearchDistance will find the string that most closely resembles needle, based on Damerau-Levenshtein Distance returns string_closest, number_distance fuzzel.FuzzyFindRatio(string_needle, vararg_in) - in may be either a table, or a list of arguments. Same as above, except it returns the string with the closest Damerau-Levenshtien ratio. + in may be either a table, or a list of arguments. Same as above, except it returns the string with the closest Damerau-Levenshtein ratio. returns string_closest, nubmer_ratio fuzzel.FuzzySortDistance(string_needle, vararg_in) - Sorts either the table, or the arguments, and returns a table. Uses Damerau-Levenshtien Distance + Sorts either the table, or the arguments, and returns a table. Uses Damerau-Levenshtein Distance returns table_sorted fuzzel.FuzzySortRatio(string needle, vararg_in) - Same as above, but uses Damerau-Levenshtien Ratio instead + Same as above, but uses Damerau-Levenshtein Ratio instead returns table_sorted Example: @@ -85,25 +85,30 @@ 3 : Brown Fox Some easy-to-use mnemonics - fuzzel.ld_e = fuzzel.LevenshtienDistance_extended - fuzzel.ld = fuzzel.LevenshtienDistance + fuzzel.ld_e = fuzzel.LevenshteinDistance_extended + fuzzel.ld = fuzzel.LevenshteinDistance fuzzel.lr = fuzzel.LevensteinRatio - fuzzel.dld_e = fuzzel.DamerauLevenshtienDistance_extended - fuzzel.dld = fuzzel.DamerauLevenshtienDistance - fuzzel.dlr = fuzzel.DamerauLevenshtienRatio + fuzzel.dld_e = fuzzel.DamerauLevenshteinDistance_extended + fuzzel.dld = fuzzel.DamerauLevenshteinDistance + fuzzel.dlr = fuzzel.DamerauLevenshteinRatio fuzzel.hd = fuzzel.HammingDistance fuzzel.hr = fuzzel.HammingRatio fuzzel.ffd = fuzzel.FuzzyFindDistance fuzzel.ffr = fuzzel.FuzzyFindRatio fuzzel.fsd = fuzzel.FuzzySortDistance fuzzel.fsr = fuzzel.FuzzySortRatio -]] + +]]--You probably don't want to touch anything past this point --Assign locals to these to the minifier can compress the file better local strlen,chrat,min,asrt,prs,iprs,typ,upack,tblins,tblsrt = string.len,string.byte,math.min,assert,pairs,ipairs,type,unpack,table.insert,table.sort local fuzzel = {} +--A clever way to allow the minifier to minify function names, this basically just assigns variables with their string equivalent. +local da, le, di, ra, fu, fi, so, ex, ha = "Damerau", "Levenshtein", "Distance", "Ratio", "Fuzzy", "Find", "Sort", "_extended", "Hamming" +local LevenshteinDistance_extended,LevenshteinDistance,LevenshteinRatio,DamerauLevenshteinDistance_extended,DamerauLevenshteinDistance,DamerauLevenshteinRatio,FuzzyFindDistance,FuzzyFindRatio,FuzzySortDistance,FuzzySortRatio,HammingDistance,HammingRatio = le..di..ex,le..di,le..ra,da..le..di..ex,da..le..di,da..le..ra,fu..fi..di,fu..fi..ra,fu..so..di,fu..so..ra,ha..di,ha..ra + local function genericDistance( stringa, stringb, addcost, subcost, delcost, ...) --Length of each string local salen, sblen = strlen(stringa), strlen(stringb) @@ -145,38 +150,37 @@ local function genericDistance( stringa, stringb, addcost, subcost, delcost, ... return dyntbl[salen][sblen] end -function fuzzel.LevenshtienDistance_extended(stringa, stringb, addcost, subcost, delcost) - return fuzzel.genericDistance(stringa, stringb, addcost, subcost, delcost) +fuzzel[LevenshteinDistance_extended] = function(stringa, stringb, addcost, subcost, delcost) + return genericDistance(stringa, stringb, addcost, subcost, delcost) end -fuzzel.ld_e = fuzzel.LevenshtienDistance_extended +fuzzel.ld_e = fuzzel[LevenshteinDistance_extended] -function fuzzel.LevenshtienDistance(stringa,stringb) +fuzzel[LevenshteinDistance] = function(stringa,stringb) return fuzzel.ld_e(stringa,stringb,1,1,1) end -fuzzel.ld = fuzzel.LevenshtienDistance - +fuzzel.ld = fuzzel[LevenshteinDistance] -function fuzzel.LevenshteinRatio(stringa,stringb) +fuzzel[LevenshteinRatio] = function(stringa,stringb) return fuzzel.ld(stringa,stringb) / strlen(stringa) end -fuzzel.lr = fuzzel.LevensteinRatio +fuzzel.lr = fuzzel[LevenshteinRatio] -function fuzzel.DamerauLevenshtienDistance_extended(stringa, stringb, addcost, subcost, delcost, trncost) +fuzzel[DamerauLevenshteinDistance_extended] = function(stringa, stringb, addcost, subcost, delcost, trncost) return genericDistance(stringa,stringb,addcost,subcost,delcost,true,trncost) end -fuzzel.dld_e = fuzzel.DamerauLevenshtienDistance_extended +fuzzel.dld_e = fuzzel[DamerauLevenshteinDistance_extended] -function fuzzel.DamerauLevenshtienDistance(stringa,stringb) +fuzzel[DamerauLevenshteinDistance] = function(stringa,stringb) return fuzzel.dld_e(stringa,stringb,1,1,1,1) end -fuzzel.dld = fuzzel.DamerauLevenshtienDistance +fuzzel.dld = fuzzel[DamerauLevenshteinDistance] -function fuzzel.DamerauLevenshtienRatio(stringa,stringb) +fuzzel[DamerauLevenshteinRatio] = function(stringa,stringb) return fuzzel.dld(stringa,stringb) / strlen(stringa) end -fuzzel.dlr = fuzzel.DamerauLevenshtienRatio +fuzzel.dlr = fuzzel[DamerauLevenshteinRatio] -function fuzzel.HammingDistance(stringa,stringb) +fuzzel[HammingDistance] = function(stringa,stringb) local len = strlen(stringa) asrt(len == strlen(stringb),"Hamming Distance cannot be calculated on two strings of different lengths:\"" .. stringa .. "\" \"" .. stringb .. "\"") local dist = 0 @@ -185,12 +189,12 @@ function fuzzel.HammingDistance(stringa,stringb) end return dist end -fuzzel.hd = fuzzel.HammingDistance +fuzzel.hd = fuzzel[HammingDistance] -function fuzzel.HammingRatio(stringa,stringb) - return fuzzel.HammingDistance(stringa,stringb) / strlen(stringa) +fuzzel[HammingRatio] = function(stringa,stringb) + return fuzzel.hd(stringa,stringb) / strlen(stringa) end -fuzzel.hr = fuzzel.HammingRatio +fuzzel.hr = fuzzel[HammingRatio] local function FuzzySearch(str,func,...) --Allow varargs, or a table @@ -209,12 +213,12 @@ local function FuzzySearch(str,func,...) return sout, tmin end -function fuzzel.FuzzyFindDistance(str,...) +fuzzel[FuzzyFindDistance] = function(str,...) return upack{FuzzySearch(str,fuzzel.dld,...)} end -fuzzel.ffd = fuzzel.FuzzyFindDistance +fuzzel.ffd = fuzzel[FuzzyFindDistance] -function fuzzel.FuzzyFindRatio(str,...) +fuzzel[FuzzyFindRatio] = function(str,...) return upack{FuzzySearch(str,fuzzel.dlr,...)} end @@ -246,16 +250,16 @@ local function FuzzySort(str, func, ...) end return otbl end -fuzzel.ffr = fuzzel.FuzzyFindRatio +fuzzel.ffr = fuzzel[FuzzyFindRatio] -function fuzzel.FuzzySortDistance(str,...) - return upack{FuzzySort(str,fuzzel.dld,...)} +fuzzel[FuzzySortDistance] = function(str,...) + return FuzzySort(str,fuzzel.dld,...) end -fuzzel.fsd = fuzzel.FuzzySortDistance +fuzzel.fsd = fuzzel[FuzzySortDistance] -function fuzzel.FuzzySortRatio(str,...) - return upack{FuzzySort(str,fuzzel.dlr,...)} +fuzzel[FuzzySortRatio] = function(str,...) + return FuzzySort(str,fuzzel.dlr,...) end -fuzzel.fsr = fuzzel.FuzzySortRatio +fuzzel.fsr = fuzzel[FuzzySortRatio] return fuzzel |
