From 774b296d3e49b8be3b0feaee8b5d3154fcec73b6 Mon Sep 17 00:00:00 2001 From: Alexander Pickering Date: Fri, 1 Jul 2016 22:08:45 -0400 Subject: Initial commit --- src/amalg.cache | 3 + src/glum.lua | 444 +++++++++++++++++++++++++++++ src/parser.lua | 848 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/scope.lua | 74 +++++ 4 files changed, 1369 insertions(+) create mode 100644 src/amalg.cache create mode 100644 src/glum.lua create mode 100644 src/parser.lua create mode 100644 src/scope.lua (limited to 'src') diff --git a/src/amalg.cache b/src/amalg.cache new file mode 100644 index 0000000..de81b62 --- /dev/null +++ b/src/amalg.cache @@ -0,0 +1,3 @@ +return { + [ "lpeg" ] = "C", +} diff --git a/src/glum.lua b/src/glum.lua new file mode 100644 index 0000000..ff05953 --- /dev/null +++ b/src/glum.lua @@ -0,0 +1,444 @@ +--[[ +This moudle allows you to minify gLua code + Use: + local x = require("glum.lua") + local str =" + --Here is some code to be minified!\n + for a=1,10,2 do\n + print(a)\n + end + " + print(x.minify(str)) + Dependencies: + lua-parser +]] +local parser = dofile("../src/parser.lua") +local lpeg = require("lpeg") +lpeg.locale(lpeg) + +local glum = {} + +--- Creates a deep copy of a table. +-- Creates a deep copy, will even copy metamethods. +-- @tab orig the original table to copy +-- @return a copy of the table +local function deepcopy(orig) end --Creates a deep copy of a table + +local function getnextvarname(latname) end --generates the next valid variable name from the last valid variable name. + +local function printtable(tbl) end --A debugging function, a replacement for glua PrintTable + +local function stringfor(ast,tbl) end --Returns the string for the given abstract syntax tree, within the scope of tbl + +local function removespaces(string) end --Removes extra spaces and semicolons in string + + +--Creates a deep copy of a table +local function deepcopy(orig) + local orig_type = type(orig) + local copy + if orig_type == "table" then + copy = {} + for orig_key, orig_value in next, orig, nil do + copy[deepcopy(orig_key)] = deepcopy(orig_value) + end + setmetatable(copy, deepcopy(getmetatable(orig))) + else -- number, string, boolean, etc + copy = orig + end + return copy +end + +--A list of reserved words that cannot be used as variable names +local nonames = {"if","for","end","do","local","then","else","elseif","return","goto","function","nil","false","true","repeat","return","break","and","or","not","in","repeat","until","while","continue"} +local reservednames = {} +for k,v in ipairs(nonames) do + reservednames[v] = true +end + +--A function that generates the next valid variable name from the last valid variable name. +local varchars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXY" +local function getnextvarname(lname) + local place = string.find(lname,"[" .. varchars .. "]") + local length = string.len(lname) + if place == nil then + return string.rep("a", length + 1) + else + local lbyte = string.byte(lname,place) + local newchar = string.char(lbyte + (lbyte < 122 and 1 or -57)) + local after = string.sub(lname, place + 1, length) + local before = string.rep("a", place-1) + local output = before .. newchar .. after + while reservednames[output] or _G[output] do + output = getnextvarname(output) + end + return output + end +end + +--A debugging function, a replacement for glua PrintTable +local function printtable(tbl, tabset) + tabset = tabset or 0 + for k,v in pairs(tbl) do + for i = 0,tabset do io.write("\t") end + io.write(k .. ":") + if type(v) == "table" then + io.write("\n") + printtable(v, tabset + 1) + else + io.write(tostring(v) .. "\n") + end + end +end + +--Abandon all hope, ye who enter here +--Refer to the comments at the top of parser.lua for what each function should do. +--If willox ever decides to add new language features, they need to be added to BOTH parser.lua and here. +local syntax = { + ["Call"] = function(ast,tbl) + local exprname = stringfor(ast[1],tbl) + local argnames = {} + local cursor = 2 + while ast[cursor] ~= nil do + argnames[cursor-1] = stringfor(ast[cursor],tbl) + cursor = cursor + 1 + end + local argstring = table.concat(argnames,",") + local ostring = table.concat({exprname,"(",argstring,")"}) + return ostring + end, + ["Invoke"] = function(ast,tbl) + local func = stringfor(ast[1],tbl) + local invargs = {} + for k = 3,#ast do + invargs[#invargs + 1] = stringfor(ast[k],tbl) + end + local output = func + local inv + --A short hand if it's a simple thing + if ast[2].tag == "String" and #ast[2][1] < (#func + 2) then + inv = ast[2][1] + output = output .. ":" .. inv .. "(" + else + inv = stringfor(ast[2],tbl) + output = output .. "[" .. inv .. "](" .. func .. "," + end + output = output .. table.concat(invargs,",") + output = output .. ")" + return output + end, + ["String"] = function(ast,tbl) + if tbl.strings[ast[1]] == nil then + local nextvar = getnextvarname(tbl.lname) + tbl.lname = nextvar + tbl.strings[ast[1]] = nextvar + return nextvar + end + return tbl.strings[ast[1]] + end, + ["Id"] = function(ast,tbl) + if tbl.ids[ast[1]] == nil then + return ast[1] + end + return tbl.ids[ast[1]] + end, + ["Index"] = function(ast,tbl) + local globalvar = stringfor(ast[1],tbl) + if ast[2].tag == "String" then + return table.concat({globalvar, ".", ast[2][1]}) + end + return table.concat({globalvar, "[", stringfor(ast[2],tbl), "]"}) + end, + ["Paren"] = function(ast,tbl) + return table.concat({"(" .. stringfor(ast[1],tbl) .. ")"}) + end, + ["Dots"] = function(ast,tbl) + return "..." + end, + ["Forin"] = function(ast,tbl) + local nadd = deepcopy(tbl) + local nl = stringfor(ast[1],nadd) + local el = stringfor(ast[2],nadd) + local code = stringfor(ast[3],nadd) + local output = table.concat({" for ", nl, " in ", el, " do ", code, " end "}) + return output + end, + ["NameList"] = function(ast,tbl) + local outputtbl = {} + for k = 1,#ast do + if ast[k].tag ~= "Id" then + outputtbl[#outputtbl + 1] = stringfor(ast[k]) + else + if tbl.ids[ast[k][1]] ~= nil then + outputtbl[#outputtbl + 1] = tbl.ids[ast[k][1]] + else + local newvar = getnextvarname(tbl.lname) + tbl.lname = newvar + tbl.ids[ast[k][1]] = newvar + outputtbl[#outputtbl + 1] = newvar + end + end + end + local output = table.concat(outputtbl, ",") + return output + end, + ["ExpList"] = function(ast,tbl) + local exprs = {} + for k = 1,#ast do + exprs[#exprs + 1] = stringfor(ast[k],tbl) + end + return table.concat(exprs,",") + end, + ["Nil"] = function(ast,tbl) + return "nil" + end, + ["True"] = function(ast,tbl) + return "true" + end, + ["False"] = function(ast,tbl) + return "false" + end, + ["Return"] = function(ast,tbl) + local retargs = {} + for k,v in ipairs(ast) do + retargs[k] = stringfor(v,tbl) + end + return " return " .. table.concat(retargs,",") + end, + ["If"] = function(ast,tbl) + + local expr1 = stringfor(ast[1],tbl) + local block1 = stringfor(ast[2],tbl) + local codeblocks = {} + codeblocks[#codeblocks + 1] = table.concat({" if ",expr1," then ",block1}) + + for k = 3,#ast-1,2 do + local expr = stringfor(ast[k],tbl) + local block = stringfor(ast[k + 1],tbl) + codeblocks[#codeblocks + 1] = table.concat({" elseif " , expr , " then " , block}) + end + + if #ast % 2 == 1 then + local block = stringfor(ast[#ast],tbl) + codeblocks[#codeblocks + 1] = " else " .. block + end + codeblocks[#codeblocks + 1] = " end " + return table.concat(codeblocks) + end, + ["Fornum"] = function(ast,tbl) + local var + if ast[1].tag == "Id" then + if tbl.ids[ast[1][1]] ~= nil then + var = tbl.ids[ast[1][1]] + else + local newvar = getnextvarname(tbl.lname) + tbl.lname = newvar + tbl.ids[ast[1][1]] = newvar + var = newvar + end + else + var = stringfor(ast[1],tbl) + end + local start = stringfor(ast[2],tbl) + local endnum = stringfor(ast[3],tbl) + local incrementer = 1 + local code = "" + if ast[4].tag ~= "Block" then -- incrementer + incrementer = stringfor(ast[4],tbl) + code = stringfor(ast[5],tbl) + else + code = stringfor(ast[4],tbl) + end + local incstr = incrementer ~= 1 and ("," .. incrementer) or "" + tbl[var] = nil + return table.concat({" for ",var,"=",start,",",endnum,incstr," do ",code," end "}) + end, + ["Op"] = function(ast,tbl) + local binop = { + ["or"] = " or ", ["and"] = " and ", + ["ne"] = "~=", ["eq"] = "==", + ["le"] = "<=", ["ge"] = ">=", + ["lt"] = "<", ["gt"] = ">", + ["bor"] = "|", ["bxor"] = "~", + ["band"] = "&", ["shl"] = "<<", + ["shr"] = ">>", ["concat"] = "..", + ["add"] = "+", ["sub"] = "-", + ["mul"] = "*", ["div"] = "/", + ["mod"] = "%", ["pow"] = "^", + } + local uniop = { + ["len"] = "#", ["not"] = "not", + ["unm"] = "-", ["bnot"] = "~", + } + local opname = ast[1] + if uniop[opname] ~= nil then + local rhs = stringfor(ast[2],tbl) + return optbl[opname] .. rhs + end + local lhs = stringfor(ast[2],tbl) + local rhs = stringfor(ast[3],tbl) + local output = table.concat(lhs,binop[opname],rhs) + return output + end, + ["Pair"] = function(ast,tbl) + local lhs = stringfor(ast[1],tbl) + local rhs = stringfor(ast[2],tbl) + return table.concat({"[",lhs,"]=",rhs}) + end, + ["Table"] = function(ast,tbl) + local fields = {} + for k = 1, #ast do + fields[#fields + 1] = stringfor(ast[k],tbl) + end + local fieldstr = table.concat(fields,",") + return table.concat({"{",fieldstr,"}"}) + end, + ["Number"] = function(ast,tbl) + return ast[1] + end, + ["Local"] = function(ast,tbl) + local tblcpy = tbl + local lhs,rhs = stringfor(ast[1],tblcpy),nil + if ast[2].tag ~= nil then + rhs = stringfor(ast[2],tblcpy) + end + local output = "local " .. lhs + if ast[2].tag ~= nil then + output = output .. "=" .. rhs .. ";" + end + return output + end, + ["VarList"] = function(ast,tbl) + local vars = {} + for k = 1,#ast do + vars[#vars + 1] = stringfor(ast[k],tbl) + end + return table.concat(vars,",") + end, + ["Set"] = function(ast,tbl) + local lhs = {} + for k = 1,#ast[1] do + lhs[#lhs + 1] = stringfor(ast[1],tbl) + end + local rhs = {} + for k = 1,#ast[2] do + rhs[#rhs + 1] = stringfor(ast[2],tbl) + end + local ostring = table.concat(lhs,",") + ostring = ostring .. "=" .. table.concat(rhs,",") + return ostring .. ";" + end, + ["Label"] = function(ast,tbl) + if tbl.nids[ast[1]] == nil then + local nextvar = getnextvarname(tbl.lname) + tbl.lname = nextvar + tbl.nids[ast[1]] = nextvar + end + return "::" .. tbl.nids[ast[1]] .. "::" + end, + ["Goto"] = function(ast,tbl) + if tbl.nids[ast[1]] == nil then + local nextvar = getnextvarname(tbl.lname) + tbl.lname = nextvar + tbl.nids[ast[1]] = nextvar + end + return " goto " .. tbl.nids[ast[1]] + end, + ["Function"] = function(ast,tbl) + local funcargs = stringfor(ast[1],tbl) + local code = stringfor(ast[2],tbl) + return table.concat({" function(",funcargs,")",code," end "}) + end, + ["Localrec"] = function(ast,tbl) + local ident + if tbl.ids[ast[1][1]] ~= nil then + ident = tbl.ids[ast[1][1]] + else + local newvar = getnextvarname(tbl.lname) + tbl.lname = newvar + tbl.ids[ast[1][1][1]] = newvar + ident = newvar + end + local argstr = stringfor(ast[2][1][1],tbl) + local expr = stringfor(ast[2][1][2],tbl) + return table.concat({" local function ",ident,"(",argstr,")",expr," end "}) + end, + ["Continue"] = function(ast,tbl) + return " continue " + end, + ["While"] = function(ast,tbl) + local expr = stringfor(ast[1],tbl) + local block = stringfor(ast[2],tbl) + local output = table.concat(" while " , expr , " do " , block , " end ") + return output + end, + ["Break"] = function(ast,tbl) + return " break " + end, + ["Block"] = function(ast,oldtbl) + local tbl = deepcopy(oldtbl) + oldtbl.block = true + local codeblocks = {} + for k = 1,#ast do + codeblocks[#codeblocks + 1] = stringfor(ast[k],tbl) + end + local code = table.concat(codeblocks) + local lhss,rhss = {},{} + for k,v in pairs(tbl.strings) do + if oldtbl.strings[k] ~= tbl.strings[k] then + lhss[#lhss + 1] = v + rhss[#rhss + 1] = string.format("%q",k) + end + end + local inits = "" + local lhs = " local " .. table.concat(lhss,",") + local rhs = table.concat(rhss,",") + if string.len(rhs) > 0 then + inits = table.concat({lhs, "=", rhs, ";"}) + end + return inits .. code + end, +} + +local function stringfor(ast,tbl) + if syntax[ast.tag] ~= nil then + return syntax[ast.tag](ast,tbl) + else + error("Attempted to use unknown tag type:" .. ast.tag) + end +end + +--Removes extra spaces and duplicated ; from a string +local function removespaces(str) + local removables = { + {"%s*%)%s*","%)"}, --Spaces before or after ) + {"%s*%(%s*","%("}, --Spaces before or after ( + {"%s*;%s*",";"}, --Spaces before or after ; + {"%s*,%s*",","}, --Spaces before or after , + {";+",";"}, --Multiple ; in a row + {"^%s*",""}, --Spaces at the beginning of the file + {"%s*$",""}, --Spaces at the end of the file + {"%s+"," "}, --Multiple spaces in a row + } + --Order is important + for k,v in ipairs(removables) do + str = string.gsub(str,v[1],v[2]) + end + return str +end + +glum.minify = function(str, name) + local ast, error_msg = parser.parse(str, name) + if not ast then + error(error_msg) + end + local localvar = { + ["strings"] = {}, + ["ids"] = {}, + ["lname"] = "", + ["nids"] = {}, + } + return removespaces(stringfor(ast,localvar)) +end + +return glum diff --git a/src/parser.lua b/src/parser.lua new file mode 100644 index 0000000..c36baf3 --- /dev/null +++ b/src/parser.lua @@ -0,0 +1,848 @@ +--[[ +This module implements a parser for Lua 5.2 with LPeg, +and generates an Abstract Syntax Tree in the Metalua format. +For more information about Metalua, please, visit: +https://github.com/fab13n/metalua-parser + +block: { stat* } + +stat: + `Do{ stat* } + | `Set{ {lhs+} {expr+} } -- lhs1, lhs2... = e1, e2... + | `While{ expr block } -- while e do b end + | `Repeat{ block expr } -- repeat b until e + | `If{ (expr block)+ block? } -- if e1 then b1 [elseif e2 then b2] ... [else bn] end + | `Fornum{ ident expr expr expr? block } -- for ident = e, e[, e] do b end + | `Forin{ {ident+} {expr+} block } -- for i1, i2... in e1, e2... do b end + | `Local{ {ident+} {expr+}? } -- local i1, i2... = e1, e2... + | `Localrec{ ident expr } -- only used for 'local function' + | `Goto{ } -- goto str + | `Label{ } -- ::str:: + | `Return{ } -- return e1, e2... + | `Break -- break + | apply + +expr: + `Nil + | `Dots + | `True + | `False + | `Number{ } + | `String{ } + | `Function{ { `Id{ }* `Dots? } block } + | `Table{ ( `Pair{ expr expr } | expr )* } + | `Op{ opid expr expr? } + | `Paren{ expr } -- significant to cut multiple values returns + | apply + | lhs + +apply: + `Call{ expr expr* } + | `Invoke{ expr `String{ } expr* } + +lhs: `Id{ } | `Index{ expr expr } + +opid: 'add' | 'sub' | 'mul' | 'div' | 'idiv' | 'mod' | 'pow' | 'concat' + | 'band' | 'bor' | 'bxor' | 'shl' | 'shr' | 'eq' | 'lt' | 'le' + | 'and' | 'or' | 'not' | 'unm' | 'len' | 'bnot' +]] +local parser = {} +local lpeg = require("lpeg") +local scope = dofile("../src/scope.lua") + +lpeg.locale(lpeg) + +local P, S, V = lpeg.P, lpeg.S, lpeg.V +local C, Carg, Cb, Cc = lpeg.C, lpeg.Carg, lpeg.Cb, lpeg.Cc +local Cf, Cg, Cmt, Cp, Ct = lpeg.Cf, lpeg.Cg, lpeg.Cmt, lpeg.Cp, lpeg.Ct +local alpha, digit, alnum = lpeg.alpha, lpeg.digit, lpeg.alnum +local xdigit = lpeg.xdigit +local space = lpeg.space + +local lineno = scope.lineno +local new_scope, end_scope = scope.new_scope, scope.end_scope +local new_function, end_function = scope.new_function, scope.end_function +local begin_loop, end_loop = scope.begin_loop, scope.end_loop +local insideloop = scope.insideloop + +--some stuff for getting glua comments +local BEGIN_COMMENT = lpeg.P("/*") +local END_COMMENT = lpeg.P("*/") +local NOT_BEGIN = (1 - BEGIN_COMMENT)^0 +local NOT_END = (1 - END_COMMENT)^0 +local FULL_COMMENT_CONTENTS = BEGIN_COMMENT * NOT_END * END_COMMENT + +-- error message auxiliary functions + +-- creates an error message for the input string +local function syntaxerror (errorinfo, pos, msg) + local l, c = lineno(errorinfo.subject, pos) + local error_msg = "%s:%d:%d: syntax error, %s" + return string.format(error_msg, errorinfo.filename, l, c, msg) +end + +-- gets the farthest failure position +local function getffp (s, i, t) + return t.ffp or i, t +end + +-- gets the table that contains the error information +local function geterrorinfo () + return Cmt(Carg(1), getffp) * (C(V"OneWord") + Cc("EOF")) / + function (t, u) + t.unexpected = u + return t + end +end + +-- creates an errror message using the farthest failure position +local function errormsg () + return geterrorinfo() / + function (t) + local p = t.ffp or 1 + local msg = "unexpected '%s', expecting %s" + msg = string.format(msg, t.unexpected, t.expected) + return nil, syntaxerror(t, p, msg) + end +end + +-- reports a syntactic error +local function report_error () + return errormsg() +end + +-- sets the farthest failure position and the expected tokens +local function setffp (s, i, t, n) + if not t.ffp or i > t.ffp then + t.ffp = i + t.list = {} ; t.list[n] = n + t.expected = "'" .. n .. "'" + elseif i == t.ffp then + if not t.list[n] then + t.list[n] = n + t.expected = "'" .. n .. "', " .. t.expected + end + end + return false +end + +local function updateffp (name) + return Cmt(Carg(1) * Cc(name), setffp) +end + +-- regular combinators and auxiliary functions + +local function token (pat, name) + return pat * V"Skip" + updateffp(name) * P(false) +end + +local function symb (str) + return token (P(str), str) +end + +local function kw (str) + return token (P(str) * -V"idRest", str) +end + +local function taggedCap (tag, pat) + return Ct(Cg(Cp(), "pos") * Cg(Cc(tag), "tag") * pat) +end + +local function unaryop (op, e) + return { tag = "Op", pos = e.pos, [1] = op, [2] = e } +end + +local function binaryop (e1, op, e2) + if not op then + return e1 + elseif op == "add" or + op == "sub" or + op == "mul" or + op == "div" or + op == "idiv" or + op == "mod" or + op == "pow" or + op == "concat" or + op == "band" or + op == "bor" or + op == "bxor" or + op == "shl" or + op == "shr" or + op == "eq" or + op == "lt" or + op == "le" or + op == "and" or + op == "or" then + return { tag = "Op", pos = e1.pos, [1] = op, [2] = e1, [3] = e2 } + elseif op == "ne" then + return unaryop ("not", { tag = "Op", pos = e1.pos, [1] = "eq", [2] = e1, [3] = e2 }) + elseif op == "gt" then + return { tag = "Op", pos = e1.pos, [1] = "lt", [2] = e2, [3] = e1 } + elseif op == "ge" then + return { tag = "Op", pos = e1.pos, [1] = "le", [2] = e2, [3] = e1 } + end +end + +local function chainl (pat, sep, a) + return Cf(pat * Cg(sep * pat)^0, binaryop) + a +end + +local function chainl1 (pat, sep) + return Cf(pat * Cg(sep * pat)^0, binaryop) +end + +local function sepby (pat, sep, tag) + return taggedCap(tag, (pat * (sep * pat)^0)^-1) +end + +local function sepby1 (pat, sep, tag) + return taggedCap(tag, pat * (sep * pat)^0) +end + +local function fix_str (str) + str = string.gsub(str, "\\a", "\a") + str = string.gsub(str, "\\b", "\b") + str = string.gsub(str, "\\f", "\f") + str = string.gsub(str, "\\n", "\n") + str = string.gsub(str, "\\r", "\r") + str = string.gsub(str, "\\t", "\t") + str = string.gsub(str, "\\v", "\v") + str = string.gsub(str, "\\\n", "\n") + str = string.gsub(str, "\\\r", "\n") + str = string.gsub(str, "\\'", "'") + str = string.gsub(str, '\\"', '"') + str = string.gsub(str, '\\\\', '\\') + return str +end + +-- grammar + +local G = { V"Lua", + Lua = V"Shebang"^-1 * V"Skip" * V"Chunk" * -1 + report_error(); + -- parser + Chunk = V"Block"; + StatList = (symb(";") + V"Stat")^0; + Var = V"Id"; + Id = taggedCap("Id", token(V"Name", "Name")); + FunctionDef = kw("function") * V"FuncBody"; + FieldSep = symb(",") + symb(";"); + Field = taggedCap("Pair", (symb("[") * V"Expr" * symb("]") * symb("=") * V"Expr") + + (taggedCap("String", token(V"Name", "Name")) * symb("=") * V"Expr")) + + V"Expr"; + FieldList = (V"Field" * (V"FieldSep" * V"Field")^0 * V"FieldSep"^-1)^-1; + Constructor = taggedCap("Table", symb("{") * V"FieldList" * symb("}")); + NameList = sepby1(V"Id", symb(","), "NameList"); + ExpList = sepby1(V"Expr", symb(","), "ExpList"); + FuncArgs = symb("(") * (V"Expr" * (symb(",") * V"Expr")^0)^-1 * symb(")") + + V"Constructor" + + taggedCap("String", token(V"String", "String")); + Expr = V"SubExpr_1"; + SubExpr_1 = chainl1(V"SubExpr_2", V"OrOp"); + SubExpr_2 = chainl1(V"SubExpr_3", V"AndOp"); + SubExpr_3 = chainl1(V"SubExpr_4", V"RelOp"); + SubExpr_4 = chainl1(V"SubExpr_5", V"BOrOp"); + SubExpr_5 = chainl1(V"SubExpr_6", V"BXorOp"); + SubExpr_6 = chainl1(V"SubExpr_7", V"BAndOp"); + SubExpr_7 = chainl1(V"SubExpr_8", V"ShiftOp"); + SubExpr_8 = V"SubExpr_9" * V"ConOp" * V"SubExpr_8" / binaryop + + V"SubExpr_9"; + SubExpr_9 = chainl1(V"SubExpr_10", V"AddOp"); + SubExpr_10 = chainl1(V"SubExpr_11", V"MulOp"); + SubExpr_11 = V"UnOp" * V"SubExpr_11" / unaryop + + V"SubExpr_12"; + SubExpr_12 = V"SimpleExp" * (V"PowOp" * V"SubExpr_11")^-1 / binaryop; + SimpleExp = taggedCap("Number", token(V"Number", "Number")) + + taggedCap("String", token(V"String", "String")) + + taggedCap("Nil", kw("nil")) + + taggedCap("False", kw("false")) + + taggedCap("True", kw("true")) + + taggedCap("Dots", symb("...")) + + V"FunctionDef" + + V"Constructor" + + V"SuffixedExp"; + SuffixedExp = Cf(V"PrimaryExp" * ( + taggedCap("DotIndex", symb(".") * taggedCap("String", token(V"Name", "Name"))) + + taggedCap("ArrayIndex", symb("[") * V"Expr" * symb("]")) + + taggedCap("Invoke", Cg(symb(":") * taggedCap("String", token(V"Name", "Name")) * V"FuncArgs")) + + taggedCap("Call", V"FuncArgs") + )^0, function (t1, t2) + if t2 then + if t2.tag == "Call" or t2.tag == "Invoke" then + local t = {tag = t2.tag, pos = t1.pos, [1] = t1} + for k, v in ipairs(t2) do + table.insert(t, v) + end + return t + else + return {tag = "Index", pos = t1.pos, [1] = t1, [2] = t2[1]} + end + end + return t1 + end); + PrimaryExp = V"Var" + + taggedCap("Paren", symb("(") * V"Expr" * symb(")")); + Block = taggedCap("Block", V"StatList" * V"RetStat"^-1); + IfStat = taggedCap("If", + kw("if") * V"Expr" * kw("then") * V"Block" * + (kw("elseif") * V"Expr" * kw("then") * V"Block")^0 * + (kw("else") * V"Block")^-1 * + kw("end")); + WhileStat = taggedCap("While", kw("while") * V"Expr" * + kw("do") * V"Block" * kw("end")); + DoStat = kw("do") * V"Block" * kw("end") / + function (t) + t.tag = "Do" + return t + end; + ForBody = kw("do") * V"Block"; + ForNum = taggedCap("Fornum", + V"Id" * symb("=") * V"Expr" * symb(",") * + V"Expr" * (symb(",") * V"Expr")^-1 * + V"ForBody"); + ForGen = taggedCap("Forin", V"NameList" * kw("in") * V"ExpList" * V"ForBody"); + ForStat = kw("for") * (V"ForNum" + V"ForGen") * kw("end"); + RepeatStat = taggedCap("Repeat", kw("repeat") * V"Block" * + kw("until") * V"Expr"); + FuncName = Cf(V"Id" * (symb(".") * taggedCap("String", token(V"Name", "Name")))^0, + function (t1, t2) + if t2 then + return {tag = "Index", pos = t1.pos, [1] = t1, [2] = t2} + end + return t1 + end) * (symb(":") * taggedCap("String", token(V"Name", "Name")))^-1 / + function (t1, t2) + if t2 then + return {tag = "Index", pos = t1.pos, is_method = true, [1] = t1, [2] = t2} + end + return t1 + end; + ParList = V"NameList" * (symb(",") * symb("...") * taggedCap("Dots", Cp()))^-1 / + function (t, v) + if v then table.insert(t, v) end + return t + end + + symb("...") * taggedCap("Dots", Cp()) / + function (v) + return {v} + end + + P(true) / function () return {} end; + -- Cc({}) generates a strange bug when parsing [[function t:a() end ; function t.a() end]] + -- the bug is to add the parameter self to the second function definition + --FuncBody = taggedCap("Function", symb("(") * (V"ParList" + Cc({})) * symb(")") * V"Block" * kw("end")); + FuncBody = taggedCap("Function", symb("(") * V"ParList" * symb(")") * V"Block" * kw("end")); + FuncStat = taggedCap("Set", kw("function") * V"FuncName" * V"FuncBody") / + function (t) + if t[1].is_method then table.insert(t[2][1], 1, {tag = "Id", [1] = "self"}) end + t[1] = {t[1]} + t[2] = {t[2]} + return t + end; + LocalFunc = taggedCap("Localrec", kw("function") * V"Id" * V"FuncBody") / + function (t) + t[1] = {t[1]} + t[2] = {t[2]} + return t + end; + LocalAssign = taggedCap("Local", V"NameList" * ((symb("=") * V"ExpList") + Ct(Cc()))); + LocalStat = kw("local") * (V"LocalFunc" + V"LocalAssign"); + LabelStat = taggedCap("Label", symb("::") * token(V"Name", "Name") * symb("::")); + BreakStat = taggedCap("Break", kw("break")); + ContinueStat = taggedCap("Continue", kw("continue")); + GoToStat = taggedCap("Goto", kw("goto") * token(V"Name", "Name")); + RetStat = taggedCap("Return", kw("return") * (V"Expr" * (symb(",") * V"Expr")^0)^-1 * symb(";")^-1); + ExprStat = Cmt( + (V"SuffixedExp" * + (Cc(function (...) + local vl = {...} + local el = vl[#vl] + table.remove(vl) + for k, v in ipairs(vl) do + if v.tag == "Id" or v.tag == "Index" then + vl[k] = v + else + -- invalid assignment + return false + end + end + vl.tag = "VarList" + vl.pos = vl[1].pos + return true, {tag = "Set", pos = vl.pos, [1] = vl, [2] = el} + end) * V"Assignment")) + + + (V"SuffixedExp" * + (Cc(function (s) + if s.tag == "Call" or + s.tag == "Invoke" then + return true, s + end + -- invalid statement + return false + end))) + , function (s, i, s1, f, ...) return f(s1, ...) end); + Assignment = ((symb(",") * V"SuffixedExp")^1)^-1 * symb("=") * V"ExpList"; + Stat = V"IfStat" + V"WhileStat" + V"DoStat" + V"ForStat" + + V"RepeatStat" + V"FuncStat" + V"LocalStat" + V"LabelStat" + + V"BreakStat" + V"GoToStat" + V"ExprStat" + V"ContinueStat"; + -- lexer + Space = space^1; + Equals = P"="^0; + Open = "[" * Cg(V"Equals", "init") * "[" * P"\n"^-1; + Close = "]" * C(V"Equals") * "]"; + CloseEQ = Cmt(V"Close" * Cb("init"), + function (s, i, a, b) return a == b end); + LongString = V"Open" * C((P(1) - V"CloseEQ")^0) * V"Close" / + function (s, o) return s end; + Comment = P"--" * V"LongString" / function () return end + + P"--" * (P(1) - P"\n")^0 + + P"//" * (P(1) - P"\n")^0 + + C(FULL_COMMENT_CONTENTS) / function() return end; + Skip = (V"Space" + V"Comment")^0; + idStart = alpha + P("_"); + idRest = alnum + P("_"); + Keywords = P("and") + "break" + "do" + "elseif" + "else" + "end" + + "false" + "for" + "function" + "goto" + "if" + "in" + + "local" + "nil" + "not" + "or" + "repeat" + "return" + + "then" + "true" + "until" + "while" + "continue"; + Reserved = V"Keywords" * -V"idRest"; + Identifier = V"idStart" * V"idRest"^0; + Name = -V"Reserved" * C(V"Identifier") * -V"idRest"; + Hex = (P("0x") + P("0X")) * xdigit^1; + Expo = S("eE") * S("+-")^-1 * digit^1; + Float = (((digit^1 * P(".") * digit^0) + + (P(".") * digit^1)) * V"Expo"^-1) + + (digit^1 * V"Expo"); + Int = digit^1; + Number = C(V"Hex" + V"Float" + V"Int") / + function (n) return tonumber(n) end; + ShortString = P'"' * C(((P'\\' * P(1)) + (P(1) - P'"'))^0) * P'"' + + P"'" * C(((P"\\" * P(1)) + (P(1) - P"'"))^0) * P"'"; + String = V"LongString" + (V"ShortString" / function (s) return fix_str(s) end); + OrOp = kw("or") / "or" + + symb("||") / "or"; + AndOp = kw("and") / "and" + + symb("&&") / "and"; + RelOp = symb("~=") / "ne" + + symb("==") / "eq" + + symb("<=") / "le" + + symb(">=") / "ge" + + symb("<") / "lt" + + symb(">") / "gt" + + symb("!=") / "ne"; + BOrOp = symb("|") / "bor"; + BXorOp = symb("~") / "bxor"; + BAndOp = symb("&") / "band"; + ShiftOp = symb("<<") / "shl" + + symb(">>") / "shr"; + ConOp = symb("..") / "concat"; + AddOp = symb("+") / "add" + + symb("-") / "sub"; + MulOp = symb("*") / "mul" + + --symb("//") / "idiv" + + symb("/") / "div" + + symb("%") / "mod"; + UnOp = kw("not") / "not" + + symb("-") / "unm" + + symb("#") / "len" + + symb("~") / "bnot" + + symb("!") / "not"; + PowOp = symb("^") / "pow"; + Shebang = P"#" * (P(1) - P"\n")^0 * P"\n"; + -- for error reporting + OneWord = V"Name" + V"Number" + V"String" + V"Reserved" + P("...") + P(1); +} + +local function exist_label (env, scope, stm) + local l = stm[1] + for s=scope, 0, -1 do + if env[s]["label"][l] then return true end + end + return false +end + +local function set_label (env, label, pos) + local scope = env.scope + local l = env[scope]["label"][label] + if not l then + env[scope]["label"][label] = { name = label, pos = pos } + return true + else + local msg = "label '%s' already defined at line %d" + local line = lineno(env.errorinfo.subject, l.pos) + msg = string.format(msg, label, line) + return nil, syntaxerror(env.errorinfo, pos, msg) + end +end + +local function set_pending_goto (env, stm) + local scope = env.scope + table.insert(env[scope]["goto"], stm) + return true +end + +local function verify_pending_gotos (env) + for s=env.maxscope, 0, -1 do + for k, v in ipairs(env[s]["goto"]) do + if not exist_label(env, s, v) then + local msg = "no visible label '%s' for " + msg = string.format(msg, v[1]) + return nil, syntaxerror(env.errorinfo, v.pos, msg) + end + end + end + return true +end + +local function set_vararg (env, is_vararg) + env["function"][env.fscope].is_vararg = is_vararg +end + +local traverse_stm, traverse_exp, traverse_var +local traverse_block, traverse_explist, traverse_varlist, traverse_parlist + +function traverse_parlist (env, parlist) + local len = #parlist + local is_vararg = false + if len > 0 and parlist[len].tag == "Dots" then + is_vararg = true + end + set_vararg(env, is_vararg) + return true +end + +local function traverse_function (env, exp) + new_function(env) + new_scope(env) + local status, msg = traverse_parlist(env, exp[1]) + if not status then return status, msg end + status, msg = traverse_block(env, exp[2]) + if not status then return status, msg end + end_scope(env) + end_function(env) + return true +end + +local function traverse_op (env, exp) + local status, msg = traverse_exp(env, exp[2]) + if not status then return status, msg end + if exp[3] then + status, msg = traverse_exp(env, exp[3]) + if not status then return status, msg end + end + return true +end + +local function traverse_paren (env, exp) + local status, msg = traverse_exp(env, exp[1]) + if not status then return status, msg end + return true +end + +local function traverse_table (env, fieldlist) + for k, v in ipairs(fieldlist) do + local tag = v.tag + if tag == "Pair" then + local status, msg = traverse_exp(env, v[1]) + if not status then return status, msg end + status, msg = traverse_exp(env, v[2]) + if not status then return status, msg end + else + local status, msg = traverse_exp(env, v) + if not status then return status, msg end + end + end + return true +end + +local function traverse_vararg (env, exp) + if not env["function"][env.fscope].is_vararg then + local msg = "cannot use '...' outside a vararg function" + return nil, syntaxerror(env.errorinfo, exp.pos, msg) + end + return true +end + +local function traverse_call (env, call) + local status, msg = traverse_exp(env, call[1]) + if not status then return status, msg end + for i=2, #call do + status, msg = traverse_exp(env, call[i]) + if not status then return status, msg end + end + return true +end + +local function traverse_invoke (env, invoke) + local status, msg = traverse_exp(env, invoke[1]) + if not status then return status, msg end + for i=3, #invoke do + status, msg = traverse_exp(env, invoke[i]) + if not status then return status, msg end + end + return true +end + +local function traverse_assignment (env, stm) + local status, msg = traverse_varlist(env, stm[1]) + if not status then return status, msg end + status, msg = traverse_explist(env, stm[2]) + if not status then return status, msg end + return true +end + +local function traverse_break (env, stm) + if not insideloop(env) then + local msg = " not inside a loop" + return nil, syntaxerror(env.errorinfo, stm.pos, msg) + end + return true +end + +local function traverse_continue (env, stm) + if not insideloop(env) then + local msg = " not inside a loop" + return nil, syntaxerror(env.errorinfo, stm.pos, msg) + end + return true +end + +local function traverse_forin (env, stm) + begin_loop(env) + new_scope(env) + local status, msg = traverse_explist(env, stm[2]) + if not status then return status, msg end + status, msg = traverse_block(env, stm[3]) + if not status then return status, msg end + end_scope(env) + end_loop(env) + return true +end + +local function traverse_fornum (env, stm) + local status, msg + begin_loop(env) + new_scope(env) + status, msg = traverse_exp(env, stm[2]) + if not status then return status, msg end + status, msg = traverse_exp(env, stm[3]) + if not status then return status, msg end + if stm[5] then + status, msg = traverse_exp(env, stm[4]) + if not status then return status, msg end + status, msg = traverse_block(env, stm[5]) + if not status then return status, msg end + else + status, msg = traverse_block(env, stm[4]) + if not status then return status, msg end + end + end_scope(env) + end_loop(env) + return true +end + +local function traverse_goto (env, stm) + local status, msg = set_pending_goto(env, stm) + if not status then return status, msg end + return true +end + +local function traverse_if (env, stm) + local len = #stm + if len % 2 == 0 then + for i=1, len, 2 do + local status, msg = traverse_exp(env, stm[i]) + if not status then return status, msg end + status, msg = traverse_block(env, stm[i+1]) + if not status then return status, msg end + end + else + for i=1, len-1, 2 do + local status, msg = traverse_exp(env, stm[i]) + if not status then return status, msg end + status, msg = traverse_block(env, stm[i+1]) + if not status then return status, msg end + end + local status, msg = traverse_block(env, stm[len]) + if not status then return status, msg end + end + return true +end + +local function traverse_label (env, stm) + local status, msg = set_label(env, stm[1], stm.pos) + if not status then return status, msg end + return true +end + +local function traverse_let (env, stm) + local status, msg = traverse_explist(env, stm[2]) + if not status then return status, msg end + return true +end + +local function traverse_letrec (env, stm) + local status, msg = traverse_exp(env, stm[2][1]) + if not status then return status, msg end + return true +end + +local function traverse_repeat (env, stm) + begin_loop(env) + local status, msg = traverse_block(env, stm[1]) + if not status then return status, msg end + status, msg = traverse_exp(env, stm[2]) + if not status then return status, msg end + end_loop(env) + return true +end + +local function traverse_return (env, stm) + local status, msg = traverse_explist(env, stm) + if not status then return status, msg end + return true +end + +local function traverse_while (env, stm) + begin_loop(env) + local status, msg = traverse_exp(env, stm[1]) + if not status then return status, msg end + status, msg = traverse_block(env, stm[2]) + if not status then return status, msg end + end_loop(env) + return true +end + +function traverse_var (env, var) + local tag = var.tag + if tag == "Id" then -- `Id{ } + return true + elseif tag == "Index" then -- `Index{ expr expr } + local status, msg = traverse_exp(env, var[1]) + if not status then return status, msg end + status, msg = traverse_exp(env, var[2]) + if not status then return status, msg end + return true + else + error("expecting a variable, but got a " .. tag) + end +end + +function traverse_varlist (env, varlist) + for k, v in ipairs(varlist) do + local status, msg = traverse_var(env, v) + if not status then return status, msg end + end + return true +end + +function traverse_exp (env, exp) + local tag = exp.tag + if tag == "Nil" or + tag == "True" or + tag == "False" or + tag == "Number" or -- `Number{ } + tag == "String" then -- `String{ } + return true + elseif tag == "Dots" then + return traverse_vararg(env, exp) + elseif tag == "Function" then -- `Function{ { `Id{ }* `Dots? } block } + return traverse_function(env, exp) + elseif tag == "Table" then -- `Table{ ( `Pair{ expr expr } | expr )* } + return traverse_table(env, exp) + elseif tag == "Op" then -- `Op{ opid expr expr? } + return traverse_op(env, exp) + elseif tag == "Paren" then -- `Paren{ expr } + return traverse_paren(env, exp) + elseif tag == "Call" then -- `Call{ expr expr* } + return traverse_call(env, exp) + elseif tag == "Invoke" then -- `Invoke{ expr `String{ expr* } + return traverse_invoke(env, exp) + elseif tag == "Id" or -- `Id{ } + tag == "Index" then -- `Index{ expr expr } + return traverse_var(env, exp) + else + error("expecting an expression, but got a " .. tag) + end +end + +function traverse_explist (env, explist) + for k, v in ipairs(explist) do + local status, msg = traverse_exp(env, v) + if not status then return status, msg end + end + return true +end + +function traverse_stm (env, stm) + local tag = stm.tag + if tag == "Do" then -- `Do{ stat* } + return traverse_block(env, stm) + elseif tag == "Set" then -- `Set{ {lhs+} {expr+} } + return traverse_assignment(env, stm) + elseif tag == "While" then -- `While{ expr block } + return traverse_while(env, stm) + elseif tag == "Repeat" then -- `Repeat{ block expr } + return traverse_repeat(env, stm) + elseif tag == "If" then -- `If{ (expr block)+ block? } + return traverse_if(env, stm) + elseif tag == "Fornum" then -- `Fornum{ ident expr expr expr? block } + return traverse_fornum(env, stm) + elseif tag == "Forin" then -- `Forin{ {ident+} {expr+} block } + return traverse_forin(env, stm) + elseif tag == "Local" then -- `Local{ {ident+} {expr+}? } + return traverse_let(env, stm) + elseif tag == "Localrec" then -- `Localrec{ ident expr } + return traverse_letrec(env, stm) + elseif tag == "Goto" then -- `Goto{ } + return traverse_goto(env, stm) + elseif tag == "Label" then -- `Label{ } + return traverse_label(env, stm) + elseif tag == "Return" then -- `Return{ * } + return traverse_return(env, stm) + elseif tag == "Break" then + return traverse_break(env, stm) + elseif tag == "Continue" then + return traverse_continue(env,stm) + elseif tag == "Call" then -- `Call{ expr expr* } + return traverse_call(env, stm) + elseif tag == "Invoke" then -- `Invoke{ expr `String{ } expr* } + return traverse_invoke(env, stm) + else + error("expecting a statement, but got a " .. tag) + end +end + +function traverse_block (env, block) + local l = {} + new_scope(env) + for k, v in ipairs(block) do + local status, msg = traverse_stm(env, v) + if not status then return status, msg end + end + end_scope(env) + return true +end + + +local function traverse (ast, errorinfo) + assert(type(ast) == "table") + assert(type(errorinfo) == "table") + local env = { errorinfo = errorinfo, ["function"] = {} } + new_function(env) + set_vararg(env, true) + local status, msg = traverse_block(env, ast) + if not status then return status, msg end + end_function(env) + status, msg = verify_pending_gotos(env) + if not status then return status, msg end + return ast +end + +function parser.parse (subject, filename) + local errorinfo = { subject = subject, filename = filename } + --lpeg.setmaxstack(1000) + local ast, error_msg = lpeg.match(G, subject, nil, errorinfo) + if not ast then return ast, error_msg end + return traverse(ast, errorinfo) +end + +return parser diff --git a/src/scope.lua b/src/scope.lua new file mode 100644 index 0000000..dd19392 --- /dev/null +++ b/src/scope.lua @@ -0,0 +1,74 @@ +--[[ +This module implements functions that handle scoping rules +]] +local scope = {} + +function scope.lineno (s, i) + if i == 1 then return 1, 1 end + local l, lastline = 0, "" + s = s:sub(1, i) .. "\n" + for line in s:gmatch("[^\n]*[\n]") do + l = l + 1 + lastline = line + end + local c = lastline:len() - 1 + return l, c ~= 0 and c or 1 +end + +function scope.new_scope (env) + if not env.scope then + env.scope = 0 + else + env.scope = env.scope + 1 + end + local scope = env.scope + env.maxscope = scope + env[scope] = {} + env[scope]["label"] = {} + env[scope]["local"] = {} + env[scope]["goto"] = {} +end + +function scope.begin_scope (env) + env.scope = env.scope + 1 +end + +function scope.end_scope (env) + env.scope = env.scope - 1 +end + +function scope.new_function (env) + if not env.fscope then + env.fscope = 0 + else + env.fscope = env.fscope + 1 + end + local fscope = env.fscope + env["function"][fscope] = {} +end + +function scope.begin_function (env) + env.fscope = env.fscope + 1 +end + +function scope.end_function (env) + env.fscope = env.fscope - 1 +end + +function scope.begin_loop (env) + if not env.loop then + env.loop = 1 + else + env.loop = env.loop + 1 + end +end + +function scope.end_loop (env) + env.loop = env.loop - 1 +end + +function scope.insideloop (env) + return env.loop and env.loop > 0 +end + +return scope -- cgit v1.2.3-70-g09d2