Reduction of moose.lua sizing working now!

This commit is contained in:
FlightControl_Master
2017-09-26 18:47:33 +02:00
parent 11067d4bfd
commit 5558c26db7
160 changed files with 36080 additions and 229 deletions

View File

@@ -0,0 +1,181 @@
---------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Convert between various code representation formats. Atomic
-- converters are written in extenso, others are composed automatically
-- by chaining the atomic ones together in a closure.
--
-- Supported formats are:
--
-- * srcfile: the name of a file containing sources.
-- * src: these sources as a single string.
-- * lexstream: a stream of lexemes.
-- * ast: an abstract syntax tree.
-- * proto: a (Yueliang) struture containing a high level
-- representation of bytecode. Largely based on the
-- Proto structure in Lua's VM
-- * bytecode: a string dump of the function, as taken by
-- loadstring() and produced by string.dump().
-- * function: an executable lua function in RAM.
--
--------------------------------------------------------------------------------
local checks = require 'checks'
local M = { }
--------------------------------------------------------------------------------
-- Order of the transformations. if 'a' is on the left of 'b', then a 'a' can
-- be transformed into a 'b' (but not the other way around).
-- M.sequence goes for numbers to format names, M.order goes from format
-- names to numbers.
--------------------------------------------------------------------------------
M.sequence = {
'srcfile', 'src', 'lexstream', 'ast', 'proto', 'bytecode', 'function' }
local arg_types = {
srcfile = { 'string', '?string' },
src = { 'string', '?string' },
lexstream = { 'lexer.stream', '?string' },
ast = { 'table', '?string' },
proto = { 'table', '?string' },
bytecode = { 'string', '?string' },
}
if false then
-- if defined, runs on every newly-generated AST
function M.check_ast(ast)
local function rec(x, n, parent)
if not x.lineinfo and parent.lineinfo then
local pp = require 'metalua.pprint'
pp.printf("WARNING: Missing lineinfo in child #%s `%s{...} of node at %s",
n, x.tag or '', tostring(parent.lineinfo))
end
for i, child in ipairs(x) do
if type(child)=='table' then rec(child, i, x) end
end
end
rec(ast, -1, { })
end
end
M.order= { }; for a,b in pairs(M.sequence) do M.order[b]=a end
local CONV = { } -- conversion metatable __index
function CONV :srcfile_to_src(x, name)
checks('metalua.compiler', 'string', '?string')
name = name or '@'..x
local f, msg = io.open (x, 'rb')
if not f then error(msg) end
local r, msg = f :read '*a'
if not r then error("Cannot read file '"..x.."': "..msg) end
f :close()
return r, name
end
function CONV :src_to_lexstream(src, name)
checks('metalua.compiler', 'string', '?string')
local r = self.parser.lexer :newstream (src, name)
return r, name
end
function CONV :lexstream_to_ast(lx, name)
checks('metalua.compiler', 'lexer.stream', '?string')
local r = self.parser.chunk(lx)
r.source = name
if M.check_ast then M.check_ast (r) end
return r, name
end
local bytecode_compiler = nil -- cache to avoid repeated `pcall(require(...))`
local function get_bytecode_compiler()
if bytecode_compiler then return bytecode_compiler else
local status, result = pcall(require, 'metalua.compiler.bytecode')
if status then
bytecode_compiler = result
return result
elseif string.match(result, "not found") then
error "Compilation only available with full Metalua"
else error (result) end
end
end
function CONV :ast_to_proto(ast, name)
checks('metalua.compiler', 'table', '?string')
return get_bytecode_compiler().ast_to_proto(ast, name), name
end
function CONV :proto_to_bytecode(proto, name)
return get_bytecode_compiler().proto_to_bytecode(proto), name
end
function CONV :bytecode_to_function(bc, name)
checks('metalua.compiler', 'string', '?string')
return loadstring(bc, name)
end
-- Create all sensible combinations
for i=1,#M.sequence do
local src = M.sequence[i]
for j=i+2, #M.sequence do
local dst = M.sequence[j]
local dst_name = src.."_to_"..dst
local my_arg_types = arg_types[src]
local functions = { }
for k=i, j-1 do
local name = M.sequence[k].."_to_"..M.sequence[k+1]
local f = assert(CONV[name], name)
table.insert (functions, f)
end
CONV[dst_name] = function(self, a, b)
checks('metalua.compiler', unpack(my_arg_types))
for _, f in ipairs(functions) do
a, b = f(self, a, b)
end
return a, b
end
--printf("Created M.%s out of %s", dst_name, table.concat(n, ', '))
end
end
--------------------------------------------------------------------------------
-- This one goes in the "wrong" direction, cannot be composed.
--------------------------------------------------------------------------------
function CONV :function_to_bytecode(...) return string.dump(...) end
function CONV :ast_to_src(...)
require 'metalua.loader' -- ast_to_string isn't written in plain lua
return require 'metalua.compiler.ast_to_src' (...)
end
local MT = { __index=CONV, __type='metalua.compiler' }
function M.new()
local parser = require 'metalua.compiler.parser' .new()
local self = { parser = parser }
setmetatable(self, MT)
return self
end
return M

View File

@@ -0,0 +1,682 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-{ extension ('match', ...) }
local M = { }
M.__index = M
M.__call = |self, ...| self:run(...)
local pp=require 'metalua.pprint'
--------------------------------------------------------------------------------
-- Instanciate a new AST->source synthetizer
--------------------------------------------------------------------------------
function M.new ()
local self = {
_acc = { }, -- Accumulates pieces of source as strings
current_indent = 0, -- Current level of line indentation
indent_step = " " -- Indentation symbol, normally spaces or '\t'
}
return setmetatable (self, M)
end
--------------------------------------------------------------------------------
-- Run a synthetizer on the `ast' arg and return the source as a string.
-- Can also be used as a static method `M.run (ast)'; in this case,
-- a temporary Metizer is instanciated on the fly.
--------------------------------------------------------------------------------
function M:run (ast)
if not ast then
self, ast = M.new(), self
end
self._acc = { }
self:node (ast)
return table.concat (self._acc)
end
--------------------------------------------------------------------------------
-- Accumulate a piece of source file in the synthetizer.
--------------------------------------------------------------------------------
function M:acc (x)
if x then table.insert (self._acc, x) end
end
--------------------------------------------------------------------------------
-- Accumulate an indented newline.
-- Jumps an extra line if indentation is 0, so that
-- toplevel definitions are separated by an extra empty line.
--------------------------------------------------------------------------------
function M:nl ()
if self.current_indent == 0 then self:acc "\n" end
self:acc ("\n" .. self.indent_step:rep (self.current_indent))
end
--------------------------------------------------------------------------------
-- Increase indentation and accumulate a new line.
--------------------------------------------------------------------------------
function M:nlindent ()
self.current_indent = self.current_indent + 1
self:nl ()
end
--------------------------------------------------------------------------------
-- Decrease indentation and accumulate a new line.
--------------------------------------------------------------------------------
function M:nldedent ()
self.current_indent = self.current_indent - 1
self:acc ("\n" .. self.indent_step:rep (self.current_indent))
end
--------------------------------------------------------------------------------
-- Keywords, which are illegal as identifiers.
--------------------------------------------------------------------------------
local keywords_list = {
"and", "break", "do", "else", "elseif",
"end", "false", "for", "function", "if",
"in", "local", "nil", "not", "or",
"repeat", "return", "then", "true", "until",
"while" }
local keywords = { }
for _, kw in pairs(keywords_list) do keywords[kw]=true end
--------------------------------------------------------------------------------
-- Return true iff string `id' is a legal identifier name.
--------------------------------------------------------------------------------
local function is_ident (id)
return string['match'](id, "^[%a_][%w_]*$") and not keywords[id]
end
--------------------------------------------------------------------------------
-- Return true iff ast represents a legal function name for
-- syntax sugar ``function foo.bar.gnat() ... end'':
-- a series of nested string indexes, with an identifier as
-- the innermost node.
--------------------------------------------------------------------------------
local function is_idx_stack (ast)
match ast with
| `Id{ _ } -> return true
| `Index{ left, `String{ _ } } -> return is_idx_stack (left)
| _ -> return false
end
end
--------------------------------------------------------------------------------
-- Operator precedences, in increasing order.
-- This is not directly used, it's used to generate op_prec below.
--------------------------------------------------------------------------------
local op_preprec = {
{ "or", "and" },
{ "lt", "le", "eq", "ne" },
{ "concat" },
{ "add", "sub" },
{ "mul", "div", "mod" },
{ "unary", "not", "len" },
{ "pow" },
{ "index" } }
--------------------------------------------------------------------------------
-- operator --> precedence table, generated from op_preprec.
--------------------------------------------------------------------------------
local op_prec = { }
for prec, ops in ipairs (op_preprec) do
for _, op in ipairs (ops) do
op_prec[op] = prec
end
end
--------------------------------------------------------------------------------
-- operator --> source representation.
--------------------------------------------------------------------------------
local op_symbol = {
add = " + ", sub = " - ", mul = " * ",
div = " / ", mod = " % ", pow = " ^ ",
concat = " .. ", eq = " == ", ne = " ~= ",
lt = " < ", le = " <= ", ["and"] = " and ",
["or"] = " or ", ["not"] = "not ", len = "# " }
--------------------------------------------------------------------------------
-- Accumulate the source representation of AST `node' in
-- the synthetizer. Most of the work is done by delegating to
-- the method having the name of the AST tag.
-- If something can't be converted to normal sources, it's
-- instead dumped as a `-{ ... }' splice in the source accumulator.
--------------------------------------------------------------------------------
function M:node (node)
assert (self~=M and self._acc)
if node==nil then self:acc'<<error>>'
elseif not self.custom_printer or not self.custom_printer (self, node) then
if not node.tag then -- tagless (henceunindented) block.
self:list (node, self.nl)
else
local f = M[node.tag]
if type (f) == "function" then -- Delegate to tag method.
f (self, node, unpack (node))
elseif type (f) == "string" then -- tag string.
self:acc (f)
else -- No appropriate method, fall back to splice dumping.
-- This cannot happen in a plain Lua AST.
self:acc " -{ "
self:acc (pp.tostring (node, {metalua_tag=1, hide_hash=1}), 80)
self:acc " }"
end
end
end
end
function M:block(body)
if not self.custom_printer or not self.custom_printer (self, body) then
self:nlindent ()
self:list (body, self.nl)
self:nldedent ()
end
end
--------------------------------------------------------------------------------
-- Convert every node in the AST list `list' passed as 1st arg.
-- `sep' is an optional separator to be accumulated between each list element,
-- it can be a string or a synth method.
-- `start' is an optional number (default == 1), indicating which is the
-- first element of list to be converted, so that we can skip the begining
-- of a list.
--------------------------------------------------------------------------------
function M:list (list, sep, start)
for i = start or 1, # list do
self:node (list[i])
if list[i + 1] then
if not sep then
elseif type (sep) == "function" then sep (self)
elseif type (sep) == "string" then self:acc (sep)
else error "Invalid list separator" end
end
end
end
--------------------------------------------------------------------------------
--
-- Tag methods.
-- ------------
--
-- Specific AST node dumping methods, associated to their node kinds
-- by their name, which is the corresponding AST tag.
-- synth:node() is in charge of delegating a node's treatment to the
-- appropriate tag method.
--
-- Such tag methods are called with the AST node as 1st arg.
-- As a convenience, the n node's children are passed as args #2 ... n+1.
--
-- There are several things that could be refactored into common subroutines
-- here: statement blocks dumping, function dumping...
-- However, given their small size and linear execution
-- (they basically perform series of :acc(), :node(), :list(),
-- :nl(), :nlindent() and :nldedent() calls), it seems more readable
-- to avoid multiplication of such tiny functions.
--
-- To make sense out of these, you need to know metalua's AST syntax, as
-- found in the reference manual or in metalua/doc/ast.txt.
--
--------------------------------------------------------------------------------
function M:Do (node)
self:acc "do"
self:block (node)
self:acc "end"
end
function M:Set (node)
match node with
| `Set{ { `Index{ lhs, `String{ method } } },
{ `Function{ { `Id "self", ... } == params, body } } }
if is_idx_stack (lhs) and is_ident (method) ->
-- ``function foo:bar(...) ... end'' --
self:acc "function "
self:node (lhs)
self:acc ":"
self:acc (method)
self:acc " ("
self:list (params, ", ", 2)
self:acc ")"
self:block (body)
self:acc "end"
| `Set{ { lhs }, { `Function{ params, body } } } if is_idx_stack (lhs) ->
-- ``function foo(...) ... end'' --
self:acc "function "
self:node (lhs)
self:acc " ("
self:list (params, ", ")
self:acc ")"
self:block (body)
self:acc "end"
| `Set{ { `Id{ lhs1name } == lhs1, ... } == lhs, rhs }
if not is_ident (lhs1name) ->
-- ``foo, ... = ...'' when foo is *not* a valid identifier.
-- In that case, the spliced 1st variable must get parentheses,
-- to be distinguished from a statement splice.
-- This cannot happen in a plain Lua AST.
self:acc "("
self:node (lhs1)
self:acc ")"
if lhs[2] then -- more than one lhs variable
self:acc ", "
self:list (lhs, ", ", 2)
end
self:acc " = "
self:list (rhs, ", ")
| `Set{ lhs, rhs } ->
-- ``... = ...'', no syntax sugar --
self:list (lhs, ", ")
self:acc " = "
self:list (rhs, ", ")
| `Set{ lhs, rhs, annot } ->
-- ``... = ...'', no syntax sugar, annotation --
local n = #lhs
for i=1,n do
local ell, a = lhs[i], annot[i]
self:node (ell)
if a then
self:acc ' #'
self:node(a)
end
if i~=n then self:acc ', ' end
end
self:acc " = "
self:list (rhs, ", ")
end
end
function M:While (node, cond, body)
self:acc "while "
self:node (cond)
self:acc " do"
self:block (body)
self:acc "end"
end
function M:Repeat (node, body, cond)
self:acc "repeat"
self:block (body)
self:acc "until "
self:node (cond)
end
function M:If (node)
for i = 1, #node-1, 2 do
-- for each ``if/then'' and ``elseif/then'' pair --
local cond, body = node[i], node[i+1]
self:acc (i==1 and "if " or "elseif ")
self:node (cond)
self:acc " then"
self:block (body)
end
-- odd number of children --> last one is an `else' clause --
if #node%2 == 1 then
self:acc "else"
self:block (node[#node])
end
self:acc "end"
end
function M:Fornum (node, var, first, last)
local body = node[#node]
self:acc "for "
self:node (var)
self:acc " = "
self:node (first)
self:acc ", "
self:node (last)
if #node==5 then -- 5 children --> child #4 is a step increment.
self:acc ", "
self:node (node[4])
end
self:acc " do"
self:block (body)
self:acc "end"
end
function M:Forin (node, vars, generators, body)
self:acc "for "
self:list (vars, ", ")
self:acc " in "
self:list (generators, ", ")
self:acc " do"
self:block (body)
self:acc "end"
end
function M:Local (node, lhs, rhs, annots)
if next (lhs) then
self:acc "local "
if annots then
local n = #lhs
for i=1, n do
self:node (lhs)
local a = annots[i]
if a then
self:acc ' #'
self:node (a)
end
if i~=n then self:acc ', ' end
end
else
self:list (lhs, ", ")
end
if rhs[1] then
self:acc " = "
self:list (rhs, ", ")
end
else -- Can't create a local statement with 0 variables in plain Lua
self:acc (pp.tostring (node, {metalua_tag=1, hide_hash=1, fix_indent=2}))
end
end
function M:Localrec (node, lhs, rhs)
match node with
| `Localrec{ { `Id{name} }, { `Function{ params, body } } }
if is_ident (name) ->
-- ``local function name() ... end'' --
self:acc "local function "
self:acc (name)
self:acc " ("
self:list (params, ", ")
self:acc ")"
self:block (body)
self:acc "end"
| _ ->
-- Other localrec are unprintable ==> splice them --
-- This cannot happen in a plain Lua AST. --
self:acc "-{ "
self:acc (pp.tostring (node, {metalua_tag=1, hide_hash=1, fix_indent=2}))
self:acc " }"
end
end
function M:Call (node, f)
-- single string or table literal arg ==> no need for parentheses. --
local parens
match node with
| `Call{ _, `String{_} }
| `Call{ _, `Table{...}} -> parens = false
| _ -> parens = true
end
self:node (f)
self:acc (parens and " (" or " ")
self:list (node, ", ", 2) -- skip `f'.
self:acc (parens and ")")
end
function M:Invoke (node, f, method)
-- single string or table literal arg ==> no need for parentheses. --
local parens
match node with
| `Invoke{ _, _, `String{_} }
| `Invoke{ _, _, `Table{...}} -> parens = false
| _ -> parens = true
end
self:node (f)
self:acc ":"
self:acc (method[1])
self:acc (parens and " (" or " ")
self:list (node, ", ", 3) -- Skip args #1 and #2, object and method name.
self:acc (parens and ")")
end
function M:Return (node)
self:acc "return "
self:list (node, ", ")
end
M.Break = "break"
M.Nil = "nil"
M.False = "false"
M.True = "true"
M.Dots = "..."
function M:Number (node, n)
self:acc (tostring (n))
end
function M:String (node, str)
-- format "%q" prints '\n' in an umpractical way IMO,
-- so this is fixed with the :gsub( ) call.
self:acc (string.format ("%q", str):gsub ("\\\n", "\\n"))
end
function M:Function (node, params, body, annots)
self:acc "function ("
if annots then
local n = #params
for i=1,n do
local p, a = params[i], annots[i]
self:node(p)
if annots then
self:acc " #"
self:node(a)
end
if i~=n then self:acc ', ' end
end
else
self:list (params, ", ")
end
self:acc ")"
self:block (body)
self:acc "end"
end
function M:Table (node)
if not node[1] then self:acc "{ }" else
self:acc "{"
if #node > 1 then self:nlindent () else self:acc " " end
for i, elem in ipairs (node) do
match elem with
| `Pair{ `String{ key }, value } if is_ident (key) ->
-- ``key = value''. --
self:acc (key)
self:acc " = "
self:node (value)
| `Pair{ key, value } ->
-- ``[key] = value''. --
self:acc "["
self:node (key)
self:acc "] = "
self:node (value)
| _ ->
-- ``value''. --
self:node (elem)
end
if node [i+1] then
self:acc ","
self:nl ()
end
end
if #node > 1 then self:nldedent () else self:acc " " end
self:acc "}"
end
end
function M:Op (node, op, a, b)
-- Transform ``not (a == b)'' into ``a ~= b''. --
match node with
| `Op{ "not", `Op{ "eq", _a, _b } }
| `Op{ "not", `Paren{ `Op{ "eq", _a, _b } } } ->
op, a, b = "ne", _a, _b
| _ ->
end
if b then -- binary operator.
local left_paren, right_paren
match a with
| `Op{ op_a, ...} if op_prec[op] >= op_prec[op_a] -> left_paren = true
| _ -> left_paren = false
end
match b with -- FIXME: might not work with right assoc operators ^ and ..
| `Op{ op_b, ...} if op_prec[op] >= op_prec[op_b] -> right_paren = true
| _ -> right_paren = false
end
self:acc (left_paren and "(")
self:node (a)
self:acc (left_paren and ")")
self:acc (op_symbol [op])
self:acc (right_paren and "(")
self:node (b)
self:acc (right_paren and ")")
else -- unary operator.
local paren
match a with
| `Op{ op_a, ... } if op_prec[op] >= op_prec[op_a] -> paren = true
| _ -> paren = false
end
self:acc (op_symbol[op])
self:acc (paren and "(")
self:node (a)
self:acc (paren and ")")
end
end
function M:Paren (node, content)
self:acc "("
self:node (content)
self:acc ")"
end
function M:Index (node, table, key)
local paren_table
-- Check precedence, see if parens are needed around the table --
match table with
| `Op{ op, ... } if op_prec[op] < op_prec.index -> paren_table = true
| _ -> paren_table = false
end
self:acc (paren_table and "(")
self:node (table)
self:acc (paren_table and ")")
match key with
| `String{ field } if is_ident (field) ->
-- ``table.key''. --
self:acc "."
self:acc (field)
| _ ->
-- ``table [key]''. --
self:acc "["
self:node (key)
self:acc "]"
end
end
function M:Id (node, name)
if is_ident (name) then
self:acc (name)
else -- Unprintable identifier, fall back to splice representation.
-- This cannot happen in a plain Lua AST.
self:acc "-{`Id "
self:String (node, name)
self:acc "}"
end
end
M.TDyn = '*'
M.TDynbar = '**'
M.TPass = 'pass'
M.TField = 'field'
M.TIdbar = M.TId
M.TReturn = M.Return
function M:TId (node, name) self:acc(name) end
function M:TCatbar(node, te, tebar)
self:acc'('
self:node(te)
self:acc'|'
self:tebar(tebar)
self:acc')'
end
function M:TFunction(node, p, r)
self:tebar(p)
self:acc '->'
self:tebar(r)
end
function M:TTable (node, default, pairs)
self:acc '['
self:list (pairs, ', ')
if default.tag~='TField' then
self:acc '|'
self:node (default)
end
self:acc ']'
end
function M:TPair (node, k, v)
self:node (k)
self:acc '='
self:node (v)
end
function M:TIdbar (node, name)
self :acc (name)
end
function M:TCatbar (node, a, b)
self:node(a)
self:acc ' ++ '
self:node(b)
end
function M:tebar(node)
if node.tag then self:node(node) else
self:acc '('
self:list(node, ', ')
self:acc ')'
end
end
function M:TUnkbar(node, name)
self:acc '~~'
self:acc (name)
end
function M:TUnk(node, name)
self:acc '~'
self:acc (name)
end
for name, tag in pairs{ const='TConst', var='TVar', currently='TCurrently', just='TJust' } do
M[tag] = function(self, node, te)
self:acc (name..' ')
self:node(te)
end
end
return M

View File

@@ -0,0 +1,29 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
local compile = require 'metalua.compiler.bytecode.compile'
local ldump = require 'metalua.compiler.bytecode.ldump'
local M = { }
M.ast_to_proto = compile.ast_to_proto
M.proto_to_bytecode = ldump.dump_string
M.proto_to_file = ldump.dump_file
return M

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,448 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2005-2013 Kein-Hong Man, Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Kein-Hong Man - Initial implementation for Lua 5.0, part of Yueliang
-- Fabien Fleutot - Port to Lua 5.1, integration with Metalua
--
-------------------------------------------------------------------------------
--[[--------------------------------------------------------------------
ldump.lua
Save bytecodes in Lua
This file is part of Yueliang.
Copyright (c) 2005 Kein-Hong Man <khman@users.sf.net>
The COPYRIGHT file describes the conditions
under which this software may be distributed.
------------------------------------------------------------------------
[FF] Slightly modified, mainly to produce Lua 5.1 bytecode.
----------------------------------------------------------------------]]
--[[--------------------------------------------------------------------
-- Notes:
-- * LUA_NUMBER (double), byte order (little endian) and some other
-- header values hard-coded; see other notes below...
-- * One significant difference is that instructions are still in table
-- form (with OP/A/B/C/Bx fields) and luaP:Instruction() is needed to
-- convert them into 4-char strings
-- * Deleted:
-- luaU:DumpVector: folded into DumpLines, DumpCode
-- * Added:
-- luaU:endianness() (from lundump.c)
-- luaU:make_setS: create a chunk writer that writes to a string
-- luaU:make_setF: create a chunk writer that writes to a file
-- (lua.h contains a typedef for a Chunkwriter pointer, and
-- a Lua-based implementation exists, writer() in lstrlib.c)
-- luaU:from_double(x): encode double value for writing
-- luaU:from_int(x): encode integer value for writing
-- (error checking is limited for these conversion functions)
-- (double conversion does not support denormals or NaNs)
-- luaU:ttype(o) (from lobject.h)
----------------------------------------------------------------------]]
local luaP = require 'metalua.compiler.bytecode.lopcodes'
local M = { }
local format = { }
format.header = string.dump(function()end):sub(1, 12)
format.little_endian, format.int_size,
format.size_t_size, format.instr_size,
format.number_size, format.integral = format.header:byte(7, 12)
format.little_endian = format.little_endian~=0
format.integral = format.integral ~=0
assert(format.integral or format.number_size==8, "Number format not supported by dumper")
assert(format.little_endian, "Big endian architectures not supported by dumper")
--requires luaP
local luaU = { }
M.luaU = luaU
luaU.format = format
-- constants used by dumper
luaU.LUA_TNIL = 0
luaU.LUA_TBOOLEAN = 1
luaU.LUA_TNUMBER = 3 -- (all in lua.h)
luaU.LUA_TSTRING = 4
luaU.LUA_TNONE = -1
-- definitions for headers of binary files
--luaU.LUA_SIGNATURE = "\27Lua" -- binary files start with "<esc>Lua"
--luaU.VERSION = 81 -- 0x50; last format change was in 5.0
--luaU.FORMAT_VERSION = 0 -- 0 is official version. yeah I know I'm a liar.
-- a multiple of PI for testing native format
-- multiplying by 1E7 gives non-trivial integer values
--luaU.TEST_NUMBER = 3.14159265358979323846E7
--[[--------------------------------------------------------------------
-- Additional functions to handle chunk writing
-- * to use make_setS and make_setF, see test_ldump.lua elsewhere
----------------------------------------------------------------------]]
------------------------------------------------------------------------
-- works like the lobject.h version except that TObject used in these
-- scripts only has a 'value' field, no 'tt' field (native types used)
------------------------------------------------------------------------
function luaU:ttype(o)
local tt = type(o.value)
if tt == "number" then return self.LUA_TNUMBER
elseif tt == "string" then return self.LUA_TSTRING
elseif tt == "nil" then return self.LUA_TNIL
elseif tt == "boolean" then return self.LUA_TBOOLEAN
else
return self.LUA_TNONE -- the rest should not appear
end
end
------------------------------------------------------------------------
-- create a chunk writer that writes to a string
-- * returns the writer function and a table containing the string
-- * to get the final result, look in buff.data
------------------------------------------------------------------------
function luaU:make_setS()
local buff = {}
buff.data = ""
local writer =
function(s, buff) -- chunk writer
if not s then return end
buff.data = buff.data..s
end
return writer, buff
end
------------------------------------------------------------------------
-- create a chunk writer that writes to a file
-- * returns the writer function and a table containing the file handle
-- * if a nil is passed, then writer should close the open file
------------------------------------------------------------------------
function luaU:make_setF(filename)
local buff = {}
buff.h = io.open(filename, "wb")
if not buff.h then return nil end
local writer =
function(s, buff) -- chunk writer
if not buff.h then return end
if not s then buff.h:close(); return end
buff.h:write(s)
end
return writer, buff
end
-----------------------------------------------------------------------
-- converts a IEEE754 double number to an 8-byte little-endian string
-- * luaU:from_double() and luaU:from_int() are from ChunkBake project
-- * supports +/- Infinity, but not denormals or NaNs
-----------------------------------------------------------------------
function luaU:from_double(x)
local function grab_byte(v)
return math.floor(v / 256),
string.char(math.mod(math.floor(v), 256))
end
local sign = 0
if x < 0 then sign = 1; x = -x end
local mantissa, exponent = math.frexp(x)
if x == 0 then -- zero
mantissa, exponent = 0, 0
elseif x == 1/0 then
mantissa, exponent = 0, 2047
else
mantissa = (mantissa * 2 - 1) * math.ldexp(0.5, 53)
exponent = exponent + 1022
end
local v, byte = "" -- convert to bytes
x = mantissa
for i = 1,6 do
x, byte = grab_byte(x); v = v..byte -- 47:0
end
x, byte = grab_byte(exponent * 16 + x); v = v..byte -- 55:48
x, byte = grab_byte(sign * 128 + x); v = v..byte -- 63:56
return v
end
-----------------------------------------------------------------------
-- converts a number to a little-endian 32-bit integer string
-- * input value assumed to not overflow, can be signed/unsigned
-----------------------------------------------------------------------
function luaU:from_int(x, size)
local v = ""
x = math.floor(x)
if x >= 0 then
for i = 1, size do
v = v..string.char(math.mod(x, 256)); x = math.floor(x / 256)
end
else -- x < 0
x = -x
local carry = 1
for i = 1, size do
local c = 255 - math.mod(x, 256) + carry
if c == 256 then c = 0; carry = 1 else carry = 0 end
v = v..string.char(c); x = math.floor(x / 256)
end
end
return v
end
--[[--------------------------------------------------------------------
-- Functions to make a binary chunk
-- * many functions have the size parameter removed, since output is
-- in the form of a string and some sizes are implicit or hard-coded
-- * luaU:DumpVector has been deleted (used in DumpCode & DumpLines)
----------------------------------------------------------------------]]
------------------------------------------------------------------------
-- dump a block of literal bytes
------------------------------------------------------------------------
function luaU:DumpLiteral(s, D) self:DumpBlock(s, D) end
--[[--------------------------------------------------------------------
-- struct DumpState:
-- L -- lua_State (not used in this script)
-- write -- lua_Chunkwriter (chunk writer function)
-- data -- void* (chunk writer context or data already written)
----------------------------------------------------------------------]]
------------------------------------------------------------------------
-- dumps a block of bytes
-- * lua_unlock(D.L), lua_lock(D.L) deleted
------------------------------------------------------------------------
function luaU:DumpBlock(b, D) D.write(b, D.data) end
------------------------------------------------------------------------
-- dumps a single byte
------------------------------------------------------------------------
function luaU:DumpByte(y, D)
self:DumpBlock(string.char(y), D)
end
------------------------------------------------------------------------
-- dumps a signed integer of size `format.int_size` (for int)
------------------------------------------------------------------------
function luaU:DumpInt(x, D)
self:DumpBlock(self:from_int(x, format.int_size), D)
end
------------------------------------------------------------------------
-- dumps an unsigned integer of size `format.size_t_size` (for size_t)
------------------------------------------------------------------------
function luaU:DumpSize(x, D)
self:DumpBlock(self:from_int(x, format.size_t_size), D)
end
------------------------------------------------------------------------
-- dumps a LUA_NUMBER; can be an int or double depending on the VM.
------------------------------------------------------------------------
function luaU:DumpNumber(x, D)
if format.integral then
self:DumpBlock(self:from_int(x, format.number_size), D)
else
self:DumpBlock(self:from_double(x), D)
end
end
------------------------------------------------------------------------
-- dumps a Lua string
------------------------------------------------------------------------
function luaU:DumpString(s, D)
if s == nil then
self:DumpSize(0, D)
else
s = s.."\0" -- include trailing '\0'
self:DumpSize(string.len(s), D)
self:DumpBlock(s, D)
end
end
------------------------------------------------------------------------
-- dumps instruction block from function prototype
------------------------------------------------------------------------
function luaU:DumpCode(f, D)
local n = f.sizecode
self:DumpInt(n, D)
--was DumpVector
for i = 0, n - 1 do
self:DumpBlock(luaP:Instruction(f.code[i]), D)
end
end
------------------------------------------------------------------------
-- dumps local variable names from function prototype
------------------------------------------------------------------------
function luaU:DumpLocals(f, D)
local n = f.sizelocvars
self:DumpInt(n, D)
for i = 0, n - 1 do
-- Dirty temporary fix:
-- `Stat{ } keeps properly count of the number of local vars,
-- but fails to keep score of their debug info (names).
-- It therefore might happen that #f.localvars < f.sizelocvars, or
-- that a variable's startpc and endpc fields are left unset.
-- FIXME: This might not be needed anymore, check the bug report
-- by J. Belmonte.
local var = f.locvars[i]
if not var then break end
-- printf("[DUMPLOCALS] dumping local var #%i = %s", i, table.tostring(var))
self:DumpString(var.varname, D)
self:DumpInt(var.startpc or 0, D)
self:DumpInt(var.endpc or 0, D)
end
end
------------------------------------------------------------------------
-- dumps line information from function prototype
------------------------------------------------------------------------
function luaU:DumpLines(f, D)
local n = f.sizelineinfo
self:DumpInt(n, D)
--was DumpVector
for i = 0, n - 1 do
self:DumpInt(f.lineinfo[i], D) -- was DumpBlock
--print(i, f.lineinfo[i])
end
end
------------------------------------------------------------------------
-- dump upvalue names from function prototype
------------------------------------------------------------------------
function luaU:DumpUpvalues(f, D)
local n = f.sizeupvalues
self:DumpInt(n, D)
for i = 0, n - 1 do
self:DumpString(f.upvalues[i], D)
end
end
------------------------------------------------------------------------
-- dump constant pool from function prototype
-- * nvalue(o) and tsvalue(o) macros removed
------------------------------------------------------------------------
function luaU:DumpConstants(f, D)
local n = f.sizek
self:DumpInt(n, D)
for i = 0, n - 1 do
local o = f.k[i] -- TObject
local tt = self:ttype(o)
assert (tt >= 0)
self:DumpByte(tt, D)
if tt == self.LUA_TNUMBER then
self:DumpNumber(o.value, D)
elseif tt == self.LUA_TSTRING then
self:DumpString(o.value, D)
elseif tt == self.LUA_TBOOLEAN then
self:DumpByte (o.value and 1 or 0, D)
elseif tt == self.LUA_TNIL then
else
assert(false) -- cannot happen
end
end
end
function luaU:DumpProtos (f, D)
local n = f.sizep
assert (n)
self:DumpInt(n, D)
for i = 0, n - 1 do
self:DumpFunction(f.p[i], f.source, D)
end
end
function luaU:DumpDebug(f, D)
self:DumpLines(f, D)
self:DumpLocals(f, D)
self:DumpUpvalues(f, D)
end
------------------------------------------------------------------------
-- dump child function prototypes from function prototype
--FF completely reworked for 5.1 format
------------------------------------------------------------------------
function luaU:DumpFunction(f, p, D)
-- print "Dumping function:"
-- table.print(f, 60)
local source = f.source
if source == p then source = nil end
self:DumpString(source, D)
self:DumpInt(f.lineDefined, D)
self:DumpInt(f.lastLineDefined or 42, D)
self:DumpByte(f.nups, D)
self:DumpByte(f.numparams, D)
self:DumpByte(f.is_vararg, D)
self:DumpByte(f.maxstacksize, D)
self:DumpCode(f, D)
self:DumpConstants(f, D)
self:DumpProtos( f, D)
self:DumpDebug(f, D)
end
------------------------------------------------------------------------
-- dump Lua header section (some sizes hard-coded)
--FF: updated for version 5.1
------------------------------------------------------------------------
function luaU:DumpHeader(D)
self:DumpLiteral(format.header, D)
end
------------------------------------------------------------------------
-- dump function as precompiled chunk
-- * w, data are created from make_setS, make_setF
--FF: suppressed extraneous [L] param
------------------------------------------------------------------------
function luaU:dump (Main, w, data)
local D = {} -- DumpState
D.write = w
D.data = data
self:DumpHeader(D)
self:DumpFunction(Main, nil, D)
-- added: for a chunk writer writing to a file, this final call with
-- nil data is to indicate to the writer to close the file
D.write(nil, D.data)
end
------------------------------------------------------------------------
-- find byte order (from lundump.c)
-- * hard-coded to little-endian
------------------------------------------------------------------------
function luaU:endianness()
return 1
end
-- FIXME: ugly concat-base generation in [make_setS], bufferize properly!
function M.dump_string (proto)
local writer, buff = luaU:make_setS()
luaU:dump (proto, writer, buff)
return buff.data
end
-- FIXME: [make_setS] sucks, perform synchronous file writing
-- Now unused
function M.dump_file (proto, filename)
local writer, buff = luaU:make_setS()
luaU:dump (proto, writer, buff)
local file = io.open (filename, "wb")
file:write (buff.data)
io.close(file)
--if UNIX_SHARPBANG then os.execute ("chmod a+x "..filename) end
end
return M

View File

@@ -0,0 +1,442 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2005-2013 Kein-Hong Man, Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Kein-Hong Man - Initial implementation for Lua 5.0, part of Yueliang
-- Fabien Fleutot - Port to Lua 5.1, integration with Metalua
--
-------------------------------------------------------------------------------
--[[--------------------------------------------------------------------
$Id$
lopcodes.lua
Lua 5 virtual machine opcodes in Lua
This file is part of Yueliang.
Copyright (c) 2005 Kein-Hong Man <khman@users.sf.net>
The COPYRIGHT file describes the conditions
under which this software may be distributed.
See the ChangeLog for more information.
------------------------------------------------------------------------
[FF] Slightly modified, mainly to produce Lua 5.1 bytecode.
----------------------------------------------------------------------]]
--[[--------------------------------------------------------------------
-- Notes:
-- * an Instruction is a table with OP, A, B, C, Bx elements; this
-- should allow instruction handling to work with doubles and ints
-- * Added:
-- luaP:Instruction(i): convert field elements to a 4-char string
-- luaP:DecodeInst(x): convert 4-char string into field elements
-- * WARNING luaP:Instruction outputs instructions encoded in little-
-- endian form and field size and positions are hard-coded
----------------------------------------------------------------------]]
local function debugf() end
local luaP = { }
--[[
===========================================================================
We assume that instructions are unsigned numbers.
All instructions have an opcode in the first 6 bits.
Instructions can have the following fields:
'A' : 8 bits
'B' : 9 bits
'C' : 9 bits
'Bx' : 18 bits ('B' and 'C' together)
'sBx' : signed Bx
A signed argument is represented in excess K; that is, the number
value is the unsigned value minus K. K is exactly the maximum value
for that argument (so that -max is represented by 0, and +max is
represented by 2*max), which is half the maximum for the corresponding
unsigned argument.
===========================================================================
--]]
luaP.OpMode = {"iABC", "iABx", "iAsBx"} -- basic instruction format
------------------------------------------------------------------------
-- size and position of opcode arguments.
-- * WARNING size and position is hard-coded elsewhere in this script
------------------------------------------------------------------------
luaP.SIZE_C = 9
luaP.SIZE_B = 9
luaP.SIZE_Bx = luaP.SIZE_C + luaP.SIZE_B
luaP.SIZE_A = 8
luaP.SIZE_OP = 6
luaP.POS_C = luaP.SIZE_OP
luaP.POS_B = luaP.POS_C + luaP.SIZE_C
luaP.POS_Bx = luaP.POS_C
luaP.POS_A = luaP.POS_B + luaP.SIZE_B
--FF from 5.1
luaP.BITRK = 2^(luaP.SIZE_B - 1)
function luaP:ISK(x) return x >= self.BITRK end
luaP.MAXINDEXRK = luaP.BITRK - 1
function luaP:RKASK(x)
if x < self.BITRK then return x+self.BITRK else return x end
end
------------------------------------------------------------------------
-- limits for opcode arguments.
-- we use (signed) int to manipulate most arguments,
-- so they must fit in BITS_INT-1 bits (-1 for sign)
------------------------------------------------------------------------
-- removed "#if SIZE_Bx < BITS_INT-1" test, assume this script is
-- running on a Lua VM with double or int as LUA_NUMBER
luaP.MAXARG_Bx = math.ldexp(1, luaP.SIZE_Bx) - 1
luaP.MAXARG_sBx = math.floor(luaP.MAXARG_Bx / 2) -- 'sBx' is signed
luaP.MAXARG_A = math.ldexp(1, luaP.SIZE_A) - 1
luaP.MAXARG_B = math.ldexp(1, luaP.SIZE_B) - 1
luaP.MAXARG_C = math.ldexp(1, luaP.SIZE_C) - 1
-- creates a mask with 'n' 1 bits at position 'p'
-- MASK1(n,p) deleted
-- creates a mask with 'n' 0 bits at position 'p'
-- MASK0(n,p) deleted
--[[--------------------------------------------------------------------
Visual representation for reference:
31 | | | 0 bit position
+-----+-----+-----+----------+
| B | C | A | Opcode | iABC format
+-----+-----+-----+----------+
- 9 - 9 - 8 - 6 - field sizes
+-----+-----+-----+----------+
| [s]Bx | A | Opcode | iABx | iAsBx format
+-----+-----+-----+----------+
----------------------------------------------------------------------]]
------------------------------------------------------------------------
-- the following macros help to manipulate instructions
-- * changed to a table object representation, very clean compared to
-- the [nightmare] alternatives of using a number or a string
------------------------------------------------------------------------
-- these accept or return opcodes in the form of string names
function luaP:GET_OPCODE(i) return self.ROpCode[i.OP] end
function luaP:SET_OPCODE(i, o) i.OP = self.OpCode[o] end
function luaP:GETARG_A(i) return i.A end
function luaP:SETARG_A(i, u) i.A = u end
function luaP:GETARG_B(i) return i.B end
function luaP:SETARG_B(i, b) i.B = b end
function luaP:GETARG_C(i) return i.C end
function luaP:SETARG_C(i, b) i.C = b end
function luaP:GETARG_Bx(i) return i.Bx end
function luaP:SETARG_Bx(i, b) i.Bx = b end
function luaP:GETARG_sBx(i) return i.Bx - self.MAXARG_sBx end
function luaP:SETARG_sBx(i, b) i.Bx = b + self.MAXARG_sBx end
function luaP:CREATE_ABC(o,a,b,c)
return {OP = self.OpCode[o], A = a, B = b, C = c}
end
function luaP:CREATE_ABx(o,a,bc)
return {OP = self.OpCode[o], A = a, Bx = bc}
end
------------------------------------------------------------------------
-- Bit shuffling stuffs
------------------------------------------------------------------------
if false and pcall (require, 'bit') then
------------------------------------------------------------------------
-- Return a 4-char string little-endian encoded form of an instruction
------------------------------------------------------------------------
function luaP:Instruction(i)
--FIXME
end
else
------------------------------------------------------------------------
-- Version without bit manipulation library.
------------------------------------------------------------------------
local p2 = {1,2,4,8,16,32,64,128,256, 512, 1024, 2048, 4096}
-- keeps [n] bits from [x]
local function keep (x, n) return x % p2[n+1] end
-- shifts bits of [x] [n] places to the right
local function srb (x,n) return math.floor (x / p2[n+1]) end
-- shifts bits of [x] [n] places to the left
local function slb (x,n) return x * p2[n+1] end
------------------------------------------------------------------------
-- Return a 4-char string little-endian encoded form of an instruction
------------------------------------------------------------------------
function luaP:Instruction(i)
-- printf("Instr->string: %s %s", self.opnames[i.OP], table.tostring(i))
local c0, c1, c2, c3
-- change to OP/A/B/C format if needed
if i.Bx then i.C = keep (i.Bx, 9); i.B = srb (i.Bx, 9) end
-- c0 = 6B from opcode + 2LSB from A (flushed to MSB)
c0 = i.OP + slb (keep (i.A, 2), 6)
-- c1 = 6MSB from A + 2LSB from C (flushed to MSB)
c1 = srb (i.A, 2) + slb (keep (i.C, 2), 6)
-- c2 = 7MSB from C + 1LSB from B (flushed to MSB)
c2 = srb (i.C, 2) + slb (keep (i.B, 1), 7)
-- c3 = 8MSB from B
c3 = srb (i.B, 1)
--printf ("Instruction: %s %s", self.opnames[i.OP], tostringv (i))
--printf ("Bin encoding: %x %x %x %x", c0, c1, c2, c3)
return string.char(c0, c1, c2, c3)
end
end
------------------------------------------------------------------------
-- decodes a 4-char little-endian string into an instruction struct
------------------------------------------------------------------------
function luaP:DecodeInst(x)
error "Not implemented"
end
------------------------------------------------------------------------
-- invalid register that fits in 8 bits
------------------------------------------------------------------------
luaP.NO_REG = luaP.MAXARG_A
------------------------------------------------------------------------
-- R(x) - register
-- Kst(x) - constant (in constant table)
-- RK(x) == if x < MAXSTACK then R(x) else Kst(x-MAXSTACK)
------------------------------------------------------------------------
------------------------------------------------------------------------
-- grep "ORDER OP" if you change these enums
------------------------------------------------------------------------
--[[--------------------------------------------------------------------
Lua virtual machine opcodes (enum OpCode):
------------------------------------------------------------------------
name args description
------------------------------------------------------------------------
OP_MOVE A B R(A) := R(B)
OP_LOADK A Bx R(A) := Kst(Bx)
OP_LOADBOOL A B C R(A) := (Bool)B; if (C) PC++
OP_LOADNIL A B R(A) := ... := R(B) := nil
OP_GETUPVAL A B R(A) := UpValue[B]
OP_GETGLOBAL A Bx R(A) := Gbl[Kst(Bx)]
OP_GETTABLE A B C R(A) := R(B)[RK(C)]
OP_SETGLOBAL A Bx Gbl[Kst(Bx)] := R(A)
OP_SETUPVAL A B UpValue[B] := R(A)
OP_SETTABLE A B C R(A)[RK(B)] := RK(C)
OP_NEWTABLE A B C R(A) := {} (size = B,C)
OP_SELF A B C R(A+1) := R(B); R(A) := R(B)[RK(C)]
OP_ADD A B C R(A) := RK(B) + RK(C)
OP_SUB A B C R(A) := RK(B) - RK(C)
OP_MUL A B C R(A) := RK(B) * RK(C)
OP_DIV A B C R(A) := RK(B) / RK(C)
OP_POW A B C R(A) := RK(B) ^ RK(C)
OP_UNM A B R(A) := -R(B)
OP_NOT A B R(A) := not R(B)
OP_CONCAT A B C R(A) := R(B).. ... ..R(C)
OP_JMP sBx PC += sBx
OP_EQ A B C if ((RK(B) == RK(C)) ~= A) then pc++
OP_LT A B C if ((RK(B) < RK(C)) ~= A) then pc++
OP_LE A B C if ((RK(B) <= RK(C)) ~= A) then pc++
OP_TEST A B C if (R(B) <=> C) then R(A) := R(B) else pc++
OP_CALL A B C R(A), ... ,R(A+C-2) := R(A)(R(A+1), ... ,R(A+B-1))
OP_TAILCALL A B C return R(A)(R(A+1), ... ,R(A+B-1))
OP_RETURN A B return R(A), ... ,R(A+B-2) (see note)
OP_FORLOOP A sBx R(A)+=R(A+2); if R(A) <?= R(A+1) then PC+= sBx
OP_TFORLOOP A C R(A+2), ... ,R(A+2+C) := R(A)(R(A+1), R(A+2));
if R(A+2) ~= nil then pc++
OP_TFORPREP A sBx if type(R(A)) == table then R(A+1):=R(A), R(A):=next;
PC += sBx
OP_SETLIST A Bx R(A)[Bx-Bx%FPF+i] := R(A+i), 1 <= i <= Bx%FPF+1
OP_SETLISTO A Bx (see note)
OP_CLOSE A close all variables in the stack up to (>=) R(A)
OP_CLOSURE A Bx R(A) := closure(KPROTO[Bx], R(A), ... ,R(A+n))
----------------------------------------------------------------------]]
luaP.opnames = {} -- opcode names
luaP.OpCode = {} -- lookup name -> number
luaP.ROpCode = {} -- lookup number -> name
local i = 0
for v in string.gfind([[
MOVE -- 0
LOADK
LOADBOOL
LOADNIL
GETUPVAL
GETGLOBAL -- 5
GETTABLE
SETGLOBAL
SETUPVAL
SETTABLE
NEWTABLE -- 10
SELF
ADD
SUB
MUL
DIV -- 15
MOD
POW
UNM
NOT
LEN -- 20
CONCAT
JMP
EQ
LT
LE -- 25
TEST
TESTSET
CALL
TAILCALL
RETURN -- 30
FORLOOP
FORPREP
TFORLOOP
SETLIST
CLOSE -- 35
CLOSURE
VARARG
]], "[%a]+") do
local n = "OP_"..v
luaP.opnames[i] = v
luaP.OpCode[n] = i
luaP.ROpCode[i] = n
i = i + 1
end
luaP.NUM_OPCODES = i
--[[
===========================================================================
Notes:
(1) In OP_CALL, if (B == 0) then B = top. C is the number of returns - 1,
and can be 0: OP_CALL then sets 'top' to last_result+1, so
next open instruction (OP_CALL, OP_RETURN, OP_SETLIST) may use 'top'.
(2) In OP_RETURN, if (B == 0) then return up to 'top'
(3) For comparisons, B specifies what conditions the test should accept.
(4) All 'skips' (pc++) assume that next instruction is a jump
(5) OP_SETLISTO is used when the last item in a table constructor is a
function, so the number of elements set is up to top of stack
===========================================================================
--]]
------------------------------------------------------------------------
-- masks for instruction properties
------------------------------------------------------------------------
-- was enum OpModeMask:
luaP.OpModeBreg = 2 -- B is a register
luaP.OpModeBrk = 3 -- B is a register/constant
luaP.OpModeCrk = 4 -- C is a register/constant
luaP.OpModesetA = 5 -- instruction set register A
luaP.OpModeK = 6 -- Bx is a constant
luaP.OpModeT = 1 -- operator is a test
------------------------------------------------------------------------
-- get opcode mode, e.g. "iABC"
------------------------------------------------------------------------
function luaP:getOpMode(m)
--printv(m)
--printv(self.OpCode[m])
--printv(self.opmodes [self.OpCode[m]+1])
return self.OpMode[tonumber(string.sub(self.opmodes[self.OpCode[m] + 1], 7, 7))]
end
------------------------------------------------------------------------
-- test an instruction property flag
-- * b is a string, e.g. "OpModeBreg"
------------------------------------------------------------------------
function luaP:testOpMode(m, b)
return (string.sub(self.opmodes[self.OpCode[m] + 1], self[b], self[b]) == "1")
end
-- number of list items to accumulate before a SETLIST instruction
-- (must be a power of 2)
-- * used in lparser, lvm, ldebug, ltests
luaP.LFIELDS_PER_FLUSH = 50 --FF updated to match 5.1
-- luaP_opnames[] is set above, as the luaP.opnames table
-- opmode(t,b,bk,ck,sa,k,m) deleted
--[[--------------------------------------------------------------------
Legend for luaP:opmodes:
1 T -> T (is a test?)
2 B -> B is a register
3 b -> B is an RK register/constant combination
4 C -> C is an RK register/constant combination
5 A -> register A is set by the opcode
6 K -> Bx is a constant
7 m -> 1 if iABC layout,
2 if iABx layout,
3 if iAsBx layout
----------------------------------------------------------------------]]
luaP.opmodes = {
-- TBbCAKm opcode
"0100101", -- OP_MOVE 0
"0000112", -- OP_LOADK
"0000101", -- OP_LOADBOOL
"0100101", -- OP_LOADNIL
"0000101", -- OP_GETUPVAL
"0000112", -- OP_GETGLOBAL 5
"0101101", -- OP_GETTABLE
"0000012", -- OP_SETGLOBAL
"0000001", -- OP_SETUPVAL
"0011001", -- OP_SETTABLE
"0000101", -- OP_NEWTABLE 10
"0101101", -- OP_SELF
"0011101", -- OP_ADD
"0011101", -- OP_SUB
"0011101", -- OP_MUL
"0011101", -- OP_DIV 15
"0011101", -- OP_MOD
"0011101", -- OP_POW
"0100101", -- OP_UNM
"0100101", -- OP_NOT
"0100101", -- OP_LEN 20
"0101101", -- OP_CONCAT
"0000003", -- OP_JMP
"1011001", -- OP_EQ
"1011001", -- OP_LT
"1011001", -- OP_LE 25
"1000101", -- OP_TEST
"1100101", -- OP_TESTSET
"0000001", -- OP_CALL
"0000001", -- OP_TAILCALL
"0000001", -- OP_RETURN 30
"0000003", -- OP_FORLOOP
"0000103", -- OP_FORPREP
"1000101", -- OP_TFORLOOP
"0000001", -- OP_SETLIST
"0000001", -- OP_CLOSE 35
"0000102", -- OP_CLOSURE
"0000101" -- OP_VARARG
}
return luaP

View File

@@ -0,0 +1,86 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
--*-lua-*-----------------------------------------------------------------------
-- Override Lua's default compilation functions, so that they support Metalua
-- rather than only plain Lua
--------------------------------------------------------------------------------
local mlc = require 'metalua.compiler'
local M = { }
-- Original versions
local original_lua_versions = {
load = load,
loadfile = loadfile,
loadstring = loadstring,
dofile = dofile }
local lua_loadstring = loadstring
local lua_loadfile = loadfile
function M.loadstring(str, name)
if type(str) ~= 'string' then error 'string expected' end
if str:match '^\027LuaQ' then return lua_loadstring(str) end
local n = str:match '^#![^\n]*\n()'
if n then str=str:sub(n, -1) end
-- FIXME: handle erroneous returns (return nil + error msg)
return mlc.new():src_to_function(str, name)
end
function M.loadfile(filename)
local f, err_msg = io.open(filename, 'rb')
if not f then return nil, err_msg end
local success, src = pcall( f.read, f, '*a')
pcall(f.close, f)
if success then return M.loadstring (src, '@'..filename)
else return nil, src end
end
function M.load(f, name)
local acc = { }
while true do
local x = f()
if not x then break end
assert(type(x)=='string', "function passed to load() must return strings")
table.insert(acc, x)
end
return M.loadstring(table.concat(acc))
end
function M.dostring(src)
local f, msg = M.loadstring(src)
if not f then error(msg) end
return f()
end
function M.dofile(name)
local f, msg = M.loadfile(name)
if not f then error(msg) end
return f()
end
-- Export replacement functions as globals
for name, f in pairs(M) do _G[name] = f end
-- To be done *after* exportation
M.lua = original_lua_versions
return M

View File

@@ -0,0 +1,42 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
-- Export all public APIs from sub-modules, squashed into a flat spacename
local MT = { __type='metalua.compiler.parser' }
local MODULE_REL_NAMES = { "annot.grammar", "expr", "meta", "misc",
"stat", "table", "ext" }
local function new()
local M = {
lexer = require "metalua.compiler.parser.lexer" ();
extensions = { } }
for _, rel_name in ipairs(MODULE_REL_NAMES) do
local abs_name = "metalua.compiler.parser."..rel_name
local extender = require (abs_name)
if not M.extensions[abs_name] then
if type (extender) == 'function' then extender(M) end
M.extensions[abs_name] = extender
end
end
return setmetatable(M, MT)
end
return { new = new }

View File

@@ -0,0 +1,48 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
local checks = require 'checks'
local gg = require 'metalua.grammar.generator'
local M = { }
function M.opt(mlc, primary, a_type)
checks('table', 'table|function', 'string')
return gg.sequence{
primary,
gg.onkeyword{ "#", function() return assert(mlc.annot[a_type]) end },
builder = function(x)
local t, annot = unpack(x)
return annot and { tag='Annot', t, annot } or t
end }
end
-- split a list of "foo" and "`Annot{foo, annot}" into a list of "foo"
-- and a list of "annot".
-- No annot list is returned if none of the elements were annotated.
function M.split(lst)
local x, a, some = { }, { }, false
for i, p in ipairs(lst) do
if p.tag=='Annot' then
some, x[i], a[i] = true, unpack(p)
else x[i] = p end
end
if some then return x, a else return lst end
end
return M

View File

@@ -0,0 +1,112 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
local gg = require 'metalua.grammar.generator'
return function(M)
local _M = gg.future(M)
M.lexer :add '->'
local A = { }
local _A = gg.future(A)
M.annot = A
-- Type identifier: Lua keywords such as `"nil"` allowed.
function M.annot.tid(lx)
local w = lx :next()
local t = w.tag
if t=='Keyword' and w[1] :match '^[%a_][%w_]*$' or w.tag=='Id'
then return {tag='TId'; lineinfo=w.lineinfo; w[1]}
else return gg.parse_error (lx, 'tid expected') end
end
local field_types = { var='TVar'; const='TConst';
currently='TCurrently'; field='TField' }
-- TODO check lineinfo
function M.annot.tf(lx)
local tk = lx:next()
local w = tk[1]
local tag = field_types[w]
if not tag then error ('Invalid field type '..w)
elseif tag=='TField' then return {tag='TField'} else
local te = M.te(lx)
return {tag=tag; te}
end
end
M.annot.tebar_content = gg.list{
name = 'tebar content',
primary = _A.te,
separators = { ",", ";" },
terminators = ")" }
M.annot.tebar = gg.multisequence{
name = 'annot.tebar',
--{ '*', builder = 'TDynbar' }, -- maybe not user-available
{ '(', _A.tebar_content, ')',
builder = function(x) return x[1] end },
{ _A.te }
}
M.annot.te = gg.multisequence{
name = 'annot.te',
{ _A.tid, builder=function(x) return x[1] end },
{ '*', builder = 'TDyn' },
{ "[",
gg.list{
primary = gg.sequence{
_M.expr, "=", _A.tf,
builder = 'TPair'
},
separators = { ",", ";" },
terminators = { "]", "|" } },
gg.onkeyword{ "|", _A.tf },
"]",
builder = function(x)
local fields, other = unpack(x)
return { tag='TTable', other or {tag='TField'}, fields }
end }, -- "[ ... ]"
{ '(', _A.tebar_content, ')', '->', '(', _A.tebar_content, ')',
builder = function(x)
local p, r = unpack(x)
return {tag='TFunction', p, r }
end } }
M.annot.ts = gg.multisequence{
name = 'annot.ts',
{ 'return', _A.tebar_content, builder='TReturn' },
{ _A.tid, builder = function(x)
if x[1][1]=='pass' then return {tag='TPass'}
else error "Bad statement type" end
end } }
-- TODO: add parsers for statements:
-- #return tebar
-- #alias = te
-- #ell = tf
--[[
M.annot.stat_annot = gg.sequence{
gg.list{ primary=_A.tid, separators='.' },
'=',
XXX??,
builder = 'Annot' }
--]]
return M.annot
end

View File

@@ -0,0 +1,206 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--
-- Exported API:
-- * [mlp.expr()]
-- * [mlp.expr_list()]
-- * [mlp.func_val()]
--
-------------------------------------------------------------------------------
local pp = require 'metalua.pprint'
local gg = require 'metalua.grammar.generator'
local annot = require 'metalua.compiler.parser.annot.generator'
return function(M)
local _M = gg.future(M)
local _table = gg.future(M, 'table')
local _meta = gg.future(M, 'meta') -- TODO move to ext?
local _annot = gg.future(M, 'annot') -- TODO move to annot
--------------------------------------------------------------------------------
-- Non-empty expression list. Actually, this isn't used here, but that's
-- handy to give to users.
--------------------------------------------------------------------------------
M.expr_list = gg.list{ primary=_M.expr, separators="," }
--------------------------------------------------------------------------------
-- Helpers for function applications / method applications
--------------------------------------------------------------------------------
M.func_args_content = gg.list{
name = "function arguments",
primary = _M.expr,
separators = ",",
terminators = ")" }
-- Used to parse methods
M.method_args = gg.multisequence{
name = "function argument(s)",
{ "{", _table.content, "}" },
{ "(", _M.func_args_content, ")", builder = unpack },
{ "+{", _meta.quote_content, "}" },
-- TODO lineinfo?
function(lx) local r = M.opt_string(lx); return r and {r} or { } end }
--------------------------------------------------------------------------------
-- [func_val] parses a function, from opening parameters parenthese to
-- "end" keyword included. Used for anonymous functions as well as
-- function declaration statements (both local and global).
--
-- It's wrapped in a [_func_val] eta expansion, so that when expr
-- parser uses the latter, they will notice updates of [func_val]
-- definitions.
--------------------------------------------------------------------------------
M.func_params_content = gg.list{
name="function parameters",
gg.multisequence{ { "...", builder = "Dots" }, annot.opt(M, _M.id, 'te') },
separators = ",", terminators = {")", "|"} }
-- TODO move to annot
M.func_val = gg.sequence{
name = "function body",
"(", _M.func_params_content, ")", _M.block, "end",
builder = function(x)
local params, body = unpack(x)
local annots, some = { }, false
for i, p in ipairs(params) do
if p.tag=='Annot' then
params[i], annots[i], some = p[1], p[2], true
else annots[i] = false end
end
if some then return { tag='Function', params, body, annots }
else return { tag='Function', params, body } end
end }
local func_val = function(lx) return M.func_val(lx) end
--------------------------------------------------------------------------------
-- Default parser for primary expressions
--------------------------------------------------------------------------------
function M.id_or_literal (lx)
local a = lx:next()
if a.tag~="Id" and a.tag~="String" and a.tag~="Number" then
local msg
if a.tag=='Eof' then
msg = "End of file reached when an expression was expected"
elseif a.tag=='Keyword' then
msg = "An expression was expected, and `"..a[1]..
"' can't start an expression"
else
msg = "Unexpected expr token " .. pp.tostring (a)
end
gg.parse_error (lx, msg)
end
return a
end
--------------------------------------------------------------------------------
-- Builder generator for operators. Wouldn't be worth it if "|x|" notation
-- were allowed, but then lua 5.1 wouldn't compile it
--------------------------------------------------------------------------------
-- opf1 = |op| |_,a| `Op{ op, a }
local function opf1 (op) return
function (_,a) return { tag="Op", op, a } end end
-- opf2 = |op| |a,_,b| `Op{ op, a, b }
local function opf2 (op) return
function (a,_,b) return { tag="Op", op, a, b } end end
-- opf2r = |op| |a,_,b| `Op{ op, b, a } -- (args reversed)
local function opf2r (op) return
function (a,_,b) return { tag="Op", op, b, a } end end
local function op_ne(a, _, b)
-- This version allows to remove the "ne" operator from the AST definition.
-- However, it doesn't always produce the exact same bytecode as Lua 5.1.
return { tag="Op", "not",
{ tag="Op", "eq", a, b, lineinfo= {
first = a.lineinfo.first, last = b.lineinfo.last } } }
end
--------------------------------------------------------------------------------
--
-- complete expression
--
--------------------------------------------------------------------------------
-- FIXME: set line number. In [expr] transformers probably
M.expr = gg.expr {
name = "expression",
primary = gg.multisequence{
name = "expr primary",
{ "(", _M.expr, ")", builder = "Paren" },
{ "function", _M.func_val, builder = unpack },
{ "-{", _meta.splice_content, "}", builder = unpack },
{ "+{", _meta.quote_content, "}", builder = unpack },
{ "nil", builder = "Nil" },
{ "true", builder = "True" },
{ "false", builder = "False" },
{ "...", builder = "Dots" },
{ "{", _table.content, "}", builder = unpack },
_M.id_or_literal },
infix = {
name = "expr infix op",
{ "+", prec = 60, builder = opf2 "add" },
{ "-", prec = 60, builder = opf2 "sub" },
{ "*", prec = 70, builder = opf2 "mul" },
{ "/", prec = 70, builder = opf2 "div" },
{ "%", prec = 70, builder = opf2 "mod" },
{ "^", prec = 90, builder = opf2 "pow", assoc = "right" },
{ "..", prec = 40, builder = opf2 "concat", assoc = "right" },
{ "==", prec = 30, builder = opf2 "eq" },
{ "~=", prec = 30, builder = op_ne },
{ "<", prec = 30, builder = opf2 "lt" },
{ "<=", prec = 30, builder = opf2 "le" },
{ ">", prec = 30, builder = opf2r "lt" },
{ ">=", prec = 30, builder = opf2r "le" },
{ "and",prec = 20, builder = opf2 "and" },
{ "or", prec = 10, builder = opf2 "or" } },
prefix = {
name = "expr prefix op",
{ "not", prec = 80, builder = opf1 "not" },
{ "#", prec = 80, builder = opf1 "len" },
{ "-", prec = 80, builder = opf1 "unm" } },
suffix = {
name = "expr suffix op",
{ "[", _M.expr, "]", builder = function (tab, idx)
return {tag="Index", tab, idx[1]} end},
{ ".", _M.id, builder = function (tab, field)
return {tag="Index", tab, _M.id2string(field[1])} end },
{ "(", _M.func_args_content, ")", builder = function(f, args)
return {tag="Call", f, unpack(args[1])} end },
{ "{", _table.content, "}", builder = function (f, arg)
return {tag="Call", f, arg[1]} end},
{ ":", _M.id, _M.method_args, builder = function (obj, post)
local m_name, args = unpack(post)
return {tag="Invoke", obj, _M.id2string(m_name), unpack(args)} end},
{ "+{", _meta.quote_content, "}", builder = function (f, arg)
return {tag="Call", f, arg[1] } end },
default = { name="opt_string_arg", parse = _M.opt_string, builder = function(f, arg)
return {tag="Call", f, arg } end } } }
return M
end

View File

@@ -0,0 +1,96 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Non-Lua syntax extensions
--
--------------------------------------------------------------------------------
local gg = require 'metalua.grammar.generator'
return function(M)
local _M = gg.future(M)
---------------------------------------------------------------------------
-- Algebraic Datatypes
----------------------------------------------------------------------------
local function adt (lx)
local node = _M.id (lx)
local tagval = node[1]
-- tagkey = `Pair{ `String "key", `String{ -{tagval} } }
local tagkey = { tag="Pair", {tag="String", "tag"}, {tag="String", tagval} }
if lx:peek().tag == "String" or lx:peek().tag == "Number" then
-- TODO support boolean litterals
return { tag="Table", tagkey, lx:next() }
elseif lx:is_keyword (lx:peek(), "{") then
local x = M.table.table (lx)
table.insert (x, 1, tagkey)
return x
else return { tag="Table", tagkey } end
end
M.adt = gg.sequence{ "`", adt, builder = unpack }
M.expr.primary :add(M.adt)
----------------------------------------------------------------------------
-- Anonymous lambda
----------------------------------------------------------------------------
M.lambda_expr = gg.sequence{
"|", _M.func_params_content, "|", _M.expr,
builder = function (x)
local li = x[2].lineinfo
return { tag="Function", x[1],
{ {tag="Return", x[2], lineinfo=li }, lineinfo=li } }
end }
M.expr.primary :add (M.lambda_expr)
--------------------------------------------------------------------------------
-- Allows to write "a `f` b" instead of "f(a, b)". Taken from Haskell.
--------------------------------------------------------------------------------
function M.expr_in_backquotes (lx) return M.expr(lx, 35) end -- 35=limited precedence
M.expr.infix :add{ name = "infix function",
"`", _M.expr_in_backquotes, "`", prec = 35, assoc="left",
builder = function(a, op, b) return {tag="Call", op[1], a, b} end }
--------------------------------------------------------------------------------
-- C-style op+assignments
-- TODO: no protection against side-effects in LHS vars.
--------------------------------------------------------------------------------
local function op_assign(kw, op)
local function rhs(a, b) return { tag="Op", op, a, b } end
local function f(a,b)
if #a ~= #b then gg.parse_error "assymetric operator+assignment" end
local right = { }
local r = { tag="Set", a, right }
for i=1, #a do right[i] = { tag="Op", op, a[i], b[i] } end
return r
end
M.lexer :add (kw)
M.assignments[kw] = f
end
local ops = { add='+='; sub='-='; mul='*='; div='/=' }
for ast_op_name, keyword in pairs(ops) do op_assign(keyword, ast_op_name) end
return M
end

View File

@@ -0,0 +1,43 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2014 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
----------------------------------------------------------------------
-- Generate a new lua-specific lexer, derived from the generic lexer.
----------------------------------------------------------------------
local generic_lexer = require 'metalua.grammar.lexer'
return function()
local lexer = generic_lexer.lexer :clone()
local keywords = {
"and", "break", "do", "else", "elseif",
"end", "false", "for", "function",
"goto", -- Lua5.2
"if",
"in", "local", "nil", "not", "or", "repeat",
"return", "then", "true", "until", "while",
"...", "..", "==", ">=", "<=", "~=",
"::", -- Lua5,2
"+{", "-{" } -- Metalua
for _, w in ipairs(keywords) do lexer :add (w) end
return lexer
end

View File

@@ -0,0 +1,138 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2014 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-- Compile-time metaprogramming features: splicing ASTs generated during compilation,
-- AST quasi-quoting helpers.
local gg = require 'metalua.grammar.generator'
return function(M)
local _M = gg.future(M)
M.meta={ }
local _MM = gg.future(M.meta)
--------------------------------------------------------------------------------
-- External splicing: compile an AST into a chunk, load and evaluate
-- that chunk, and replace the chunk by its result (which must also be
-- an AST).
--------------------------------------------------------------------------------
-- TODO: that's not part of the parser
function M.meta.eval (ast)
-- TODO: should there be one mlc per splice, or per parser instance?
local mlc = require 'metalua.compiler'.new()
local f = mlc :ast_to_function (ast, '=splice')
local result=f(M) -- splices act on the current parser
return result
end
----------------------------------------------------------------------------
-- Going from an AST to an AST representing that AST
-- the only hash-part key being lifted is `"tag"`.
-- Doesn't lift subtrees protected inside a `Splice{ ... }.
-- e.g. change `Foo{ 123 } into
-- `Table{ `Pair{ `String "tag", `String "foo" }, `Number 123 }
----------------------------------------------------------------------------
local function lift (t)
--print("QUOTING:", table.tostring(t, 60,'nohash'))
local cases = { }
function cases.table (t)
local mt = { tag = "Table" }
--table.insert (mt, { tag = "Pair", quote "quote", { tag = "True" } })
if t.tag == "Splice" then
assert (#t==1, "Invalid splice")
local sp = t[1]
return sp
elseif t.tag then
table.insert (mt, { tag="Pair", lift "tag", lift(t.tag) })
end
for _, v in ipairs (t) do
table.insert (mt, lift(v))
end
return mt
end
function cases.number (t) return { tag = "Number", t, quote = true } end
function cases.string (t) return { tag = "String", t, quote = true } end
function cases.boolean (t) return { tag = t and "True" or "False", t, quote = true } end
local f = cases [type(t)]
if f then return f(t) else error ("Cannot quote an AST containing "..tostring(t)) end
end
M.meta.lift = lift
--------------------------------------------------------------------------------
-- when this variable is false, code inside [-{...}] is compiled and
-- avaluated immediately. When it's true (supposedly when we're
-- parsing data inside a quasiquote), [-{foo}] is replaced by
-- [`Splice{foo}], which will be unpacked by [quote()].
--------------------------------------------------------------------------------
local in_a_quote = false
--------------------------------------------------------------------------------
-- Parse the inside of a "-{ ... }"
--------------------------------------------------------------------------------
function M.meta.splice_content (lx)
local parser_name = "expr"
if lx:is_keyword (lx:peek(2), ":") then
local a = lx:next()
lx:next() -- skip ":"
assert (a.tag=="Id", "Invalid splice parser name")
parser_name = a[1]
end
-- TODO FIXME running a new parser with the old lexer?!
local parser = require 'metalua.compiler.parser'.new()
local ast = parser [parser_name](lx)
if in_a_quote then -- only prevent quotation in this subtree
--printf("SPLICE_IN_QUOTE:\n%s", _G.table.tostring(ast, "nohash", 60))
return { tag="Splice", ast }
else -- convert in a block, eval, replace with result
if parser_name == "expr" then ast = { { tag="Return", ast } }
elseif parser_name == "stat" then ast = { ast }
elseif parser_name ~= "block" then
error ("splice content must be an expr, stat or block") end
--printf("EXEC THIS SPLICE:\n%s", _G.table.tostring(ast, "nohash", 60))
return M.meta.eval (ast)
end
end
M.meta.splice = gg.sequence{ "-{", _MM.splice_content, "}", builder=unpack }
--------------------------------------------------------------------------------
-- Parse the inside of a "+{ ... }"
--------------------------------------------------------------------------------
function M.meta.quote_content (lx)
local parser
if lx:is_keyword (lx:peek(2), ":") then -- +{parser: content }
local parser_name = M.id(lx)[1]
parser = M[parser_name]
lx:next() -- skip ":"
else -- +{ content }
parser = M.expr
end
local prev_iq = in_a_quote
in_a_quote = true
--print("IN_A_QUOTE")
local content = parser (lx)
local q_content = M.meta.lift (content)
in_a_quote = prev_iq
return q_content
end
return M
end

View File

@@ -0,0 +1,176 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--
-- Summary: metalua parser, miscellaneous utility functions.
--
-------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Exported API:
-- * [mlp.fget()]
-- * [mlp.id()]
-- * [mlp.opt_id()]
-- * [mlp.id_list()]
-- * [mlp.string()]
-- * [mlp.opt_string()]
-- * [mlp.id2string()]
--
--------------------------------------------------------------------------------
local pp = require 'metalua.pprint'
local gg = require 'metalua.grammar.generator'
-- TODO: replace splice-aware versions with naive ones, move etensions in ./meta
return function(M)
local _M = gg.future(M)
--[[ metaprog-free versions:
function M.id(lx)
if lx:peek().tag~='Id' then gg.parse_error(lx, "Identifier expected")
else return lx:next() end
end
function M.opt_id(lx)
if lx:peek().tag~='Id' then return lx:next() else return false end
end
function M.string(lx)
if lx:peek().tag~='String' then gg.parse_error(lx, "String expected")
else return lx:next() end
end
function M.opt_string(lx)
if lx:peek().tag~='String' then return lx:next() else return false end
end
--------------------------------------------------------------------------------
-- Converts an identifier into a string. Hopefully one day it'll handle
-- splices gracefully, but that proves quite tricky.
--------------------------------------------------------------------------------
function M.id2string (id)
if id.tag == "Id" then id.tag = "String"; return id
else error ("Identifier expected: "..table.tostring(id, 'nohash')) end
end
--]]
--------------------------------------------------------------------------------
-- Try to read an identifier (possibly as a splice), or return [false] if no
-- id is found.
--------------------------------------------------------------------------------
function M.opt_id (lx)
local a = lx:peek();
if lx:is_keyword (a, "-{") then
local v = M.meta.splice(lx)
if v.tag ~= "Id" and v.tag ~= "Splice" then
gg.parse_error(lx, "Bad id splice")
end
return v
elseif a.tag == "Id" then return lx:next()
else return false end
end
--------------------------------------------------------------------------------
-- Mandatory reading of an id: causes an error if it can't read one.
--------------------------------------------------------------------------------
function M.id (lx)
return M.opt_id (lx) or gg.parse_error(lx,"Identifier expected")
end
--------------------------------------------------------------------------------
-- Common helper function
--------------------------------------------------------------------------------
M.id_list = gg.list { primary = _M.id, separators = "," }
--------------------------------------------------------------------------------
-- Converts an identifier into a string. Hopefully one day it'll handle
-- splices gracefully, but that proves quite tricky.
--------------------------------------------------------------------------------
function M.id2string (id)
--print("id2string:", disp.ast(id))
if id.tag == "Id" then id.tag = "String"; return id
elseif id.tag == "Splice" then
error ("id2string on splice not implemented")
-- Evaluating id[1] will produce `Id{ xxx },
-- and we want it to produce `String{ xxx }.
-- The following is the plain notation of:
-- +{ `String{ `Index{ `Splice{ -{id[1]} }, `Number 1 } } }
return { tag="String", { tag="Index", { tag="Splice", id[1] },
{ tag="Number", 1 } } }
else error ("Identifier expected: "..pp.tostring (id, {metalua_tag=1, hide_hash=1})) end
end
--------------------------------------------------------------------------------
-- Read a string, possibly spliced, or return an error if it can't
--------------------------------------------------------------------------------
function M.string (lx)
local a = lx:peek()
if lx:is_keyword (a, "-{") then
local v = M.meta.splice(lx)
if v.tag ~= "String" and v.tag ~= "Splice" then
gg.parse_error(lx,"Bad string splice")
end
return v
elseif a.tag == "String" then return lx:next()
else error "String expected" end
end
--------------------------------------------------------------------------------
-- Try to read a string, or return false if it can't. No splice allowed.
--------------------------------------------------------------------------------
function M.opt_string (lx)
return lx:peek().tag == "String" and lx:next()
end
--------------------------------------------------------------------------------
-- Chunk reader: block + Eof
--------------------------------------------------------------------------------
function M.skip_initial_sharp_comment (lx)
-- Dirty hack: I'm happily fondling lexer's private parts
-- FIXME: redundant with lexer:newstream()
lx :sync()
local i = lx.src:match ("^#.-\n()", lx.i)
if i then
lx.i = i
lx.column_offset = i
lx.line = lx.line and lx.line + 1 or 1
end
end
local function chunk (lx)
if lx:peek().tag == 'Eof' then
return { } -- handle empty files
else
M.skip_initial_sharp_comment (lx)
local chunk = M.block (lx)
if lx:peek().tag ~= "Eof" then
gg.parse_error(lx, "End-of-file expected")
end
return chunk
end
end
-- chunk is wrapped in a sequence so that it has a "transformer" field.
M.chunk = gg.sequence { chunk, builder = unpack }
return M
end

View File

@@ -0,0 +1,279 @@
------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--
-- Summary: metalua parser, statement/block parser. This is part of the
-- definition of module [mlp].
--
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--
-- Exports API:
-- * [mlp.stat()]
-- * [mlp.block()]
-- * [mlp.for_header()]
--
-------------------------------------------------------------------------------
local lexer = require 'metalua.grammar.lexer'
local gg = require 'metalua.grammar.generator'
local annot = require 'metalua.compiler.parser.annot.generator'
--------------------------------------------------------------------------------
-- List of all keywords that indicate the end of a statement block. Users are
-- likely to extend this list when designing extensions.
--------------------------------------------------------------------------------
return function(M)
local _M = gg.future(M)
M.block_terminators = { "else", "elseif", "end", "until", ")", "}", "]" }
-- FIXME: this must be handled from within GG!!!
-- FIXME: there's no :add method in the list anyway. Added by gg.list?!
function M.block_terminators :add(x)
if type (x) == "table" then for _, y in ipairs(x) do self :add (y) end
else table.insert (self, x) end
end
----------------------------------------------------------------------------
-- list of statements, possibly followed by semicolons
----------------------------------------------------------------------------
M.block = gg.list {
name = "statements block",
terminators = M.block_terminators,
primary = function (lx)
-- FIXME use gg.optkeyword()
local x = M.stat (lx)
if lx:is_keyword (lx:peek(), ";") then lx:next() end
return x
end }
----------------------------------------------------------------------------
-- Helper function for "return <expr_list>" parsing.
-- Called when parsing return statements.
-- The specific test for initial ";" is because it's not a block terminator,
-- so without it gg.list would choke on "return ;" statements.
-- We don't make a modified copy of block_terminators because this list
-- is sometimes modified at runtime, and the return parser would get out of
-- sync if it was relying on a copy.
----------------------------------------------------------------------------
local return_expr_list_parser = gg.multisequence{
{ ";" , builder = function() return { } end },
default = gg.list {
_M.expr, separators = ",", terminators = M.block_terminators } }
local for_vars_list = gg.list{
name = "for variables list",
primary = _M.id,
separators = ",",
terminators = "in" }
----------------------------------------------------------------------------
-- for header, between [for] and [do] (exclusive).
-- Return the `Forxxx{...} AST, without the body element (the last one).
----------------------------------------------------------------------------
function M.for_header (lx)
local vars = M.id_list(lx)
if lx :is_keyword (lx:peek(), "=") then
if #vars ~= 1 then
gg.parse_error (lx, "numeric for only accepts one variable")
end
lx:next() -- skip "="
local exprs = M.expr_list (lx)
if #exprs < 2 or #exprs > 3 then
gg.parse_error (lx, "numeric for requires 2 or 3 boundaries")
end
return { tag="Fornum", vars[1], unpack (exprs) }
else
if not lx :is_keyword (lx :next(), "in") then
gg.parse_error (lx, '"=" or "in" expected in for loop')
end
local exprs = M.expr_list (lx)
return { tag="Forin", vars, exprs }
end
end
----------------------------------------------------------------------------
-- Function def parser helper: id ( . id ) *
----------------------------------------------------------------------------
local function fn_builder (list)
local acc = list[1]
local first = acc.lineinfo.first
for i = 2, #list do
local index = M.id2string(list[i])
local li = lexer.new_lineinfo(first, index.lineinfo.last)
acc = { tag="Index", acc, index, lineinfo=li }
end
return acc
end
local func_name = gg.list{ _M.id, separators = ".", builder = fn_builder }
----------------------------------------------------------------------------
-- Function def parser helper: ( : id )?
----------------------------------------------------------------------------
local method_name = gg.onkeyword{ name = "method invocation", ":", _M.id,
transformers = { function(x) return x and x.tag=='Id' and M.id2string(x) end } }
----------------------------------------------------------------------------
-- Function def builder
----------------------------------------------------------------------------
local function funcdef_builder(x)
local name, method, func = unpack(x)
if method then
name = { tag="Index", name, method,
lineinfo = {
first = name.lineinfo.first,
last = method.lineinfo.last } }
table.insert (func[1], 1, {tag="Id", "self"})
end
local r = { tag="Set", {name}, {func} }
r[1].lineinfo = name.lineinfo
r[2].lineinfo = func.lineinfo
return r
end
----------------------------------------------------------------------------
-- if statement builder
----------------------------------------------------------------------------
local function if_builder (x)
local cond_block_pairs, else_block, r = x[1], x[2], {tag="If"}
local n_pairs = #cond_block_pairs
for i = 1, n_pairs do
local cond, block = unpack(cond_block_pairs[i])
r[2*i-1], r[2*i] = cond, block
end
if else_block then table.insert(r, #r+1, else_block) end
return r
end
--------------------------------------------------------------------------------
-- produce a list of (expr,block) pairs
--------------------------------------------------------------------------------
local elseifs_parser = gg.list {
gg.sequence { _M.expr, "then", _M.block , name='elseif parser' },
separators = "elseif",
terminators = { "else", "end" }
}
local annot_expr = gg.sequence {
_M.expr,
gg.onkeyword{ "#", gg.future(M, 'annot').tf },
builder = function(x)
local e, a = unpack(x)
if a then return { tag='Annot', e, a }
else return e end
end }
local annot_expr_list = gg.list {
primary = annot.opt(M, _M.expr, 'tf'), separators = ',' }
------------------------------------------------------------------------
-- assignments and calls: statements that don't start with a keyword
------------------------------------------------------------------------
local function assign_or_call_stat_parser (lx)
local e = annot_expr_list (lx)
local a = lx:is_keyword(lx:peek())
local op = a and M.assignments[a]
-- TODO: refactor annotations
if op then
--FIXME: check that [e] is a LHS
lx :next()
local annots
e, annots = annot.split(e)
local v = M.expr_list (lx)
if type(op)=="string" then return { tag=op, e, v, annots }
else return op (e, v) end
else
assert (#e > 0)
if #e > 1 then
gg.parse_error (lx,
"comma is not a valid statement separator; statement can be "..
"separated by semicolons, or not separated at all")
elseif e[1].tag ~= "Call" and e[1].tag ~= "Invoke" then
local typename
if e[1].tag == 'Id' then
typename = '("'..e[1][1]..'") is an identifier'
elseif e[1].tag == 'Op' then
typename = "is an arithmetic operation"
else typename = "is of type '"..(e[1].tag or "<list>").."'" end
gg.parse_error (lx,
"This expression %s; "..
"a statement was expected, and only function and method call "..
"expressions can be used as statements", typename);
end
return e[1]
end
end
M.local_stat_parser = gg.multisequence{
-- local function <name> <func_val>
{ "function", _M.id, _M.func_val, builder =
function(x)
local vars = { x[1], lineinfo = x[1].lineinfo }
local vals = { x[2], lineinfo = x[2].lineinfo }
return { tag="Localrec", vars, vals }
end },
-- local <id_list> ( = <expr_list> )?
default = gg.sequence{
gg.list{
primary = annot.opt(M, _M.id, 'tf'),
separators = ',' },
gg.onkeyword{ "=", _M.expr_list },
builder = function(x)
local annotated_left, right = unpack(x)
local left, annotations = annot.split(annotated_left)
return {tag="Local", left, right or { }, annotations }
end } }
------------------------------------------------------------------------
-- statement
------------------------------------------------------------------------
M.stat = gg.multisequence {
name = "statement",
{ "do", _M.block, "end", builder =
function (x) return { tag="Do", unpack (x[1]) } end },
{ "for", _M.for_header, "do", _M.block, "end", builder =
function (x) x[1][#x[1]+1] = x[2]; return x[1] end },
{ "function", func_name, method_name, _M.func_val, builder=funcdef_builder },
{ "while", _M.expr, "do", _M.block, "end", builder = "While" },
{ "repeat", _M.block, "until", _M.expr, builder = "Repeat" },
{ "local", _M.local_stat_parser, builder = unpack },
{ "return", return_expr_list_parser, builder =
function(x) x[1].tag='Return'; return x[1] end },
{ "break", builder = function() return { tag="Break" } end },
{ "-{", gg.future(M, 'meta').splice_content, "}", builder = unpack },
{ "if", gg.nonempty(elseifs_parser), gg.onkeyword{ "else", M.block }, "end",
builder = if_builder },
default = assign_or_call_stat_parser }
M.assignments = {
["="] = "Set"
}
function M.assignments:add(k, v) self[k] = v end
return M
end

View File

@@ -0,0 +1,77 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Exported API:
-- * [M.table_bracket_field()]
-- * [M.table_field()]
-- * [M.table_content()]
-- * [M.table()]
--
-- KNOWN BUG: doesn't handle final ";" or "," before final "}"
--
--------------------------------------------------------------------------------
local gg = require 'metalua.grammar.generator'
return function(M)
M.table = { }
local _table = gg.future(M.table)
local _expr = gg.future(M).expr
--------------------------------------------------------------------------------
-- `[key] = value` table field definition
--------------------------------------------------------------------------------
M.table.bracket_pair = gg.sequence{ "[", _expr, "]", "=", _expr, builder = "Pair" }
--------------------------------------------------------------------------------
-- table element parser: list value, `id = value` pair or `[value] = value` pair.
--------------------------------------------------------------------------------
function M.table.element (lx)
if lx :is_keyword (lx :peek(), "[") then return M.table.bracket_pair(lx) end
local e = M.expr (lx)
if not lx :is_keyword (lx :peek(), "=") then return e end
lx :next(); -- skip the "="
local key = M.id2string(e) -- will fail on non-identifiers
local val = M.expr(lx)
local r = { tag="Pair", key, val }
r.lineinfo = { first = key.lineinfo.first, last = val.lineinfo.last }
return r
end
-----------------------------------------------------------------------------
-- table constructor, without enclosing braces; returns a full table object
-----------------------------------------------------------------------------
M.table.content = gg.list {
-- eta expansion to allow patching the element definition
primary = _table.element,
separators = { ",", ";" },
terminators = "}",
builder = "Table" }
--------------------------------------------------------------------------------
-- complete table constructor including [{...}]
--------------------------------------------------------------------------------
-- TODO beware, stat and expr use only table.content, this can't be patched.
M.table.table = gg.sequence{ "{", _table.content, "}", builder = unpack }
return M
end

View File

@@ -0,0 +1,282 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
--
-- This extension implements list comprehensions, similar to Haskell and
-- Python syntax, to easily describe lists.
--
-- * x[a ... b] is the list { x[a], x[a+1], ..., x[b] }
-- * { f()..., b } contains all the elements returned by f(), then b
-- (allows to expand list fields other than the last one)
-- * list comprehensions a la python, with "for" and "if" suffixes:
-- {i+10*j for i=1,3 for j=1,3 if i~=j} is { 21, 31, 12, 32, 13, 23 }
--
-------------------------------------------------------------------------------
-{ extension ("match", ...) }
local SUPPORT_IMPROVED_LOOPS = true
local SUPPORT_IMPROVED_INDEXES = false -- depends on deprecated table.isub
local SUPPORT_CONTINUE = true
local SUPPORT_COMP_LISTS = true
assert (SUPPORT_IMPROVED_LOOPS or not SUPPORT_CONTINUE,
"Can't support 'continue' without improved loop headers")
local gg = require 'metalua.grammar.generator'
local Q = require 'metalua.treequery'
local function dots_list_suffix_builder (x) return `DotsSuffix{ x } end
local function for_list_suffix_builder (list_element, suffix)
local new_header = suffix[1]
match list_element with
| `Comp{ _, acc } -> table.insert (acc, new_header); return list_element
| _ -> return `Comp{ list_element, { new_header } }
end
end
local function if_list_suffix_builder (list_element, suffix)
local new_header = `If{ suffix[1] }
match list_element with
| `Comp{ _, acc } -> table.insert (acc, new_header); return list_element
| _ -> return `Comp{ list_element, { new_header } }
end
end
-- Builds a statement from a table element, which adds this element to
-- a table `t`, potentially thanks to an alias `tinsert` to
-- `table.insert`.
-- @param core the part around which the loops are built.
-- either `DotsSuffix{expr}, `Pair{ expr } or a plain expression
-- @param list comprehension suffixes, in the order in which they appear
-- either `Forin{ ... } or `Fornum{ ...} or `If{ ... }. In each case,
-- it misses a last child node as its body.
-- @param t a variable containing the table to fill
-- @param tinsert a variable containing `table.insert`.
--
-- @return fill a statement which fills empty table `t` with the denoted element
local function comp_list_builder(core, list, t, tinsert)
local filler
-- 1 - Build the loop's core: if it has suffix "...", every elements of the
-- multi-return must be inserted, hence the extra [for] loop.
match core with
| `DotsSuffix{ element } ->
local x = gg.gensym()
filler = +{stat: for _, -{x} in pairs{ -{element} } do (-{tinsert})(-{t}, -{x}) end }
| `Pair{ key, value } ->
--filler = +{ -{t}[-{key}] = -{value} }
filler = `Set{ { `Index{ t, key } }, { value } }
| _ -> filler = +{ (-{tinsert})(-{t}, -{core}) }
end
-- 2 - Stack the `if` and `for` control structures, from outside to inside.
-- This is done in a destructive way for the elements of [list].
for i = #list, 1, -1 do
table.insert (list[i], {filler})
filler = list[i]
end
return filler
end
local function table_content_builder (list)
local special = false -- Does the table need a special builder?
for _, element in ipairs(list) do
local etag = element.tag
if etag=='Comp' or etag=='DotsSuffix' then special=true; break end
end
if not special then list.tag='Table'; return list end
local t, tinsert = gg.gensym 'table', gg.gensym 'table_insert'
local filler_block = { +{stat: local -{t}, -{tinsert} = { }, table.insert } }
for _, element in ipairs(list) do
local filler
match element with
| `Comp{ core, comp } -> filler = comp_list_builder(core, comp, t, tinsert)
| _ -> filler = comp_list_builder(element, { }, t, tinsert)
end
table.insert(filler_block, filler)
end
return `Stat{ filler_block, t }
end
--------------------------------------------------------------------------------
-- Back-end for improved index operator.
local function index_builder(a, suffix)
match suffix[1] with
-- Single index, no range: keep the native semantics
| { { e, false } } -> return `Index{ a, e }
-- Either a range, or multiple indexes, or both
| ranges ->
local r = `Call{ +{table.isub}, a }
local function acc (x,y) table.insert (r,x); table.insert (r,y) end
for _, seq in ipairs (ranges) do
match seq with
| { e, false } -> acc(e,e)
| { e, f } -> acc(e,f)
end
end
return r
end
end
-------------------------------------------------------------------
-- Find continue statements in a loop body, change them into goto
-- end-of-body.
local function transform_continue_statements(body)
local continue_statements = Q(body)
:if_unknown() -- tolerate unknown 'Continue' statements
:not_under ('Forin', 'Fornum', 'While', 'Repeat')
:filter ('Continue')
:list()
if next(continue_statements) then
local continue_label = gg.gensym 'continue' [1]
table.insert(body, `Label{ continue_label })
for _, statement in ipairs(continue_statements) do
statement.tag = 'Goto'
statement[1] = continue_label
end
return true
else return false end
end
-------------------------------------------------------------------------------
-- Back-end for loops with a multi-element header
local function loop_builder(x)
local first, elements, body = unpack(x)
-- Change continue statements into gotos.
if SUPPORT_CONTINUE then transform_continue_statements(body) end
-------------------------------------------------------------------
-- If it's a regular loop, don't bloat the code
if not next(elements) then
table.insert(first, body)
return first
end
-------------------------------------------------------------------
-- There's no reason to treat the first element in a special way
table.insert(elements, 1, first)
-------------------------------------------------------------------
-- Change breaks into gotos that escape all loops at once.
local exit_label = nil
local function break_to_goto(break_node)
if not exit_label then exit_label = gg.gensym 'break' [1] end
break_node = break_node or { }
break_node.tag = 'Goto'
break_node[1] = exit_label
return break_node
end
Q(body)
:not_under('Function', 'Forin', 'Fornum', 'While', 'Repeat')
:filter('Break')
:foreach (break_to_goto)
-------------------------------------------------------------------
-- Compile all headers elements, from last to first.
-- invariant: `body` is a block (not a statement)
local result = body
for i = #elements, 1, -1 do
local e = elements[i]
match e with
| `If{ cond } ->
result = { `If{ cond, result } }
| `Until{ cond } ->
result = +{block: if -{cond} then -{break_to_goto()} else -{result} end }
| `While{ cond } ->
if i==1 then result = { `While{ cond, result } } -- top-level while
else result = +{block: if -{cond} then -{result} else -{break_to_goto()} end } end
| `Forin{ ... } | `Fornum{ ... } ->
table.insert (e, result); result={e}
| _-> require'metalua.pprint'.printf("Bad loop header element %s", e)
end
end
-------------------------------------------------------------------
-- If some breaks had to be changed into gotos, insert the label
if exit_label then result = { result, `Label{ exit_label } } end
return result
end
--------------------------------------------------------------------------------
-- Improved "[...]" index operator:
-- * support for multi-indexes ("foo[bar, gnat]")
-- * support for ranges ("foo[bar ... gnat]")
--------------------------------------------------------------------------------
local function extend(M)
local _M = gg.future(M)
if SUPPORT_COMP_LISTS then
-- support for "for" / "if" comprehension suffixes in literal tables
local original_table_element = M.table.element
M.table.element = gg.expr{ name="table cell",
primary = original_table_element,
suffix = { name="table cell suffix",
{ "...", builder = dots_list_suffix_builder },
{ "for", _M.for_header, builder = for_list_suffix_builder },
{ "if", _M.expr, builder = if_list_suffix_builder } } }
M.table.content.builder = table_content_builder
end
if SUPPORT_IMPROVED_INDEXES then
-- Support for ranges and multiple indices in bracket suffixes
M.expr.suffix:del '['
M.expr.suffix:add{ name="table index/range",
"[", gg.list{
gg.sequence { _M.expr, gg.onkeyword{ "...", _M.expr } } ,
separators = { ",", ";" } },
"]", builder = index_builder }
end
if SUPPORT_IMPROVED_LOOPS then
local original_for_header = M.for_header
M.stat :del 'for'
M.stat :del 'while'
M.loop_suffix = gg.multisequence{
{ 'while', _M.expr, builder = |x| `Until{ `Op{ 'not', x[1] } } },
{ 'until', _M.expr, builder = |x| `Until{ x[1] } },
{ 'if', _M.expr, builder = |x| `If{ x[1] } },
{ 'for', original_for_header, builder = |x| x[1] } }
M.loop_suffix_list = gg.list{ _M.loop_suffix, terminators='do' }
M.stat :add{
'for', original_for_header, _M.loop_suffix_list, 'do', _M.block, 'end',
builder = loop_builder }
M.stat :add{
'while', _M.expr, _M.loop_suffix_list, 'do', _M.block, 'end',
builder = |x| loop_builder{ `While{x[1]}, x[2], x[3] } }
end
if SUPPORT_CONTINUE then
M.lexer :add 'continue'
M.stat :add{ 'continue', builder='Continue' }
end
end
return extend

View File

@@ -0,0 +1,400 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--
-- Glossary:
--
-- * term_seq: the tested stuff, a sequence of terms
-- * pattern_element: might match one term of a term seq. Represented
-- as expression ASTs.
-- * pattern_seq: might match a term_seq
-- * pattern_group: several pattern seqs, one of them might match
-- the term seq.
-- * case: pattern_group * guard option * block
-- * match_statement: tested term_seq * case list
--
-- Hence a complete match statement is a:
--
-- { list(expr), list{ list(list(expr)), expr or false, block } }
--
-- Implementation hints
-- ====================
--
-- The implementation is made as modular as possible, so that parts
-- can be reused in other extensions. The priviledged way to share
-- contextual information across functions is through the 'cfg' table
-- argument. Its fields include:
--
-- * code: code generated from pattern. A pattern_(element|seq|group)
-- is compiled as a sequence of instructions which will jump to
-- label [cfg.on_failure] if the tested term doesn't match.
--
-- * on_failure: name of the label where the code will jump if the
-- pattern doesn't match
--
-- * locals: names of local variables used by the pattern. This
-- includes bound variables, and temporary variables used to
-- destructure tables. Names are stored as keys of the table,
-- values are meaningless.
--
-- * after_success: label where the code must jump after a pattern
-- succeeded to capture a term, and the guard suceeded if there is
-- any, and the conditional block has run.
--
-- * ntmp: number of temporary variables used to destructurate table
-- in the current case.
--
-- Code generation is performed by acc_xxx() functions, which accumulate
-- code in cfg.code:
--
-- * acc_test(test, cfg) will generate a jump to cfg.on_failure
-- *when the test returns TRUE*
--
-- * acc_stat accumulates a statement
--
-- * acc_assign accumulate an assignment statement, and makes sure that
-- the LHS variable the registered as local in cfg.locals.
--
-------------------------------------------------------------------------------
-- TODO: hygiene wrt type()
-- TODO: cfg.ntmp isn't reset as often as it could. I'm not even sure
-- the corresponding locals are declared.
local checks = require 'checks'
local gg = require 'metalua.grammar.generator'
local pp = require 'metalua.pprint'
----------------------------------------------------------------------
-- This would have been best done through library 'metalua.walk',
-- but walk depends on match, so we have to break the dependency.
-- It replaces all instances of `...' in `ast' with `term', unless
-- it appears in a function.
----------------------------------------------------------------------
local function replace_dots (ast, term)
local function rec (node)
for i, child in ipairs(node) do
if type(child)~="table" then -- pass
elseif child.tag=='Dots' then
if term=='ambiguous' then
error ("You can't use `...' on the right of a match case when it appears "..
"more than once on the left")
else node[i] = term end
elseif child.tag=='Function' then return nil
else rec(child) end
end
end
return rec(ast)
end
local tmpvar_base = gg.gensym 'submatch.' [1]
local function next_tmpvar(cfg)
assert (cfg.ntmp, "No cfg.ntmp imbrication level in the match compiler")
cfg.ntmp = cfg.ntmp+1
return `Id{ tmpvar_base .. cfg.ntmp }
end
-- Code accumulators
local acc_stat = |x,cfg| table.insert (cfg.code, x)
local acc_test = |x,cfg| acc_stat(+{stat: if -{x} then -{`Goto{cfg.on_failure}} end}, cfg)
-- lhs :: `Id{ string }
-- rhs :: expr
local function acc_assign (lhs, rhs, cfg)
assert(lhs.tag=='Id')
cfg.locals[lhs[1]] = true
acc_stat (`Set{ {lhs}, {rhs} }, cfg)
end
local literal_tags = { String=1, Number=1, True=1, False=1, Nil=1 }
-- pattern :: `Id{ string }
-- term :: expr
local function id_pattern_element_builder (pattern, term, cfg)
assert (pattern.tag == "Id")
if pattern[1] == "_" then
-- "_" is used as a dummy var ==> no assignment, no == checking
cfg.locals._ = true
elseif cfg.locals[pattern[1]] then
-- This var is already bound ==> test for equality
acc_test (+{ -{term} ~= -{pattern} }, cfg)
else
-- Free var ==> bind it, and remember it for latter linearity checking
acc_assign (pattern, term, cfg)
cfg.locals[pattern[1]] = true
end
end
-- mutually recursive with table_pattern_element_builder
local pattern_element_builder
-- pattern :: pattern and `Table{ }
-- term :: expr
local function table_pattern_element_builder (pattern, term, cfg)
local seen_dots, len = false, 0
acc_test (+{ type( -{term} ) ~= "table" }, cfg)
for i = 1, #pattern do
local key, sub_pattern
if pattern[i].tag=="Pair" then -- Explicit key/value pair
key, sub_pattern = unpack (pattern[i])
assert (literal_tags[key.tag], "Invalid key")
else -- Implicit key
len, key, sub_pattern = len+1, `Number{ len+1 }, pattern[i]
end
-- '...' can only appear in final position
-- Could be fixed actually...
assert (not seen_dots, "Wrongly placed `...' ")
if sub_pattern.tag == "Id" then
-- Optimization: save a useless [ v(n+1)=v(n).key ]
id_pattern_element_builder (sub_pattern, `Index{ term, key }, cfg)
if sub_pattern[1] ~= "_" then
acc_test (+{ -{sub_pattern} == nil }, cfg)
end
elseif sub_pattern.tag == "Dots" then
-- Remember where the capture is, and thatt arity checking shouldn't occur
seen_dots = true
else
-- Business as usual:
local v2 = next_tmpvar(cfg)
acc_assign (v2, `Index{ term, key }, cfg)
pattern_element_builder (sub_pattern, v2, cfg)
-- TODO: restore ntmp?
end
end
if seen_dots then -- remember how to retrieve `...'
-- FIXME: check, but there might be cases where the variable -{term}
-- will be overridden in contrieved tables.
-- ==> save it now, and clean the setting statement if unused
if cfg.dots_replacement then cfg.dots_replacement = 'ambiguous'
else cfg.dots_replacement = +{ select (-{`Number{len}}, unpack(-{term})) } end
else -- Check arity
acc_test (+{ #-{term} ~= -{`Number{len}} }, cfg)
end
end
-- mutually recursive with pattern_element_builder
local eq_pattern_element_builder, regexp_pattern_element_builder
-- Concatenate code in [cfg.code], that will jump to label
-- [cfg.on_failure] if [pattern] doesn't match [term]. [pattern]
-- should be an identifier, or at least cheap to compute and
-- side-effects free.
--
-- pattern :: pattern_element
-- term :: expr
function pattern_element_builder (pattern, term, cfg)
if literal_tags[pattern.tag] then
acc_test (+{ -{term} ~= -{pattern} }, cfg)
elseif "Id" == pattern.tag then
id_pattern_element_builder (pattern, term, cfg)
elseif "Op" == pattern.tag and "div" == pattern[1] then
regexp_pattern_element_builder (pattern, term, cfg)
elseif "Op" == pattern.tag and "eq" == pattern[1] then
eq_pattern_element_builder (pattern, term, cfg)
elseif "Table" == pattern.tag then
table_pattern_element_builder (pattern, term, cfg)
else
error ("Invalid pattern at "..
tostring(pattern.lineinfo)..
": "..pp.tostring(pattern, {hide_hash=true}))
end
end
function eq_pattern_element_builder (pattern, term, cfg)
local _, pat1, pat2 = unpack (pattern)
local ntmp_save = cfg.ntmp
pattern_element_builder (pat1, term, cfg)
cfg.ntmp = ntmp_save
pattern_element_builder (pat2, term, cfg)
end
-- pattern :: `Op{ 'div', string, list{`Id string} or `Id{ string }}
-- term :: expr
local function regexp_pattern_element_builder (pattern, term, cfg)
local op, regexp, sub_pattern = unpack(pattern)
-- Sanity checks --
assert (op=='div', "Don't know what to do with that op in a pattern")
assert (regexp.tag=="String",
"Left hand side operand for '/' in a pattern must be "..
"a literal string representing a regular expression")
if sub_pattern.tag=="Table" then
for _, x in ipairs(sub_pattern) do
assert (x.tag=="Id" or x.tag=='Dots',
"Right hand side operand for '/' in a pattern must be "..
"a list of identifiers")
end
else
assert (sub_pattern.tag=="Id",
"Right hand side operand for '/' in a pattern must be "..
"an identifier or a list of identifiers")
end
-- Regexp patterns can only match strings
acc_test (+{ type(-{term}) ~= 'string' }, cfg)
-- put all captures in a list
local capt_list = +{ { string.strmatch(-{term}, -{regexp}) } }
-- save them in a var_n for recursive decomposition
local v2 = next_tmpvar(cfg)
acc_stat (+{stat: local -{v2} = -{capt_list} }, cfg)
-- was capture successful?
acc_test (+{ not next (-{v2}) }, cfg)
pattern_element_builder (sub_pattern, v2, cfg)
end
-- Jumps to [cfg.on_faliure] if pattern_seq doesn't match
-- term_seq.
local function pattern_seq_builder (pattern_seq, term_seq, cfg)
if #pattern_seq ~= #term_seq then error ("Bad seq arity") end
cfg.locals = { } -- reset bound variables between alternatives
for i=1, #pattern_seq do
cfg.ntmp = 1 -- reset the tmp var generator
pattern_element_builder(pattern_seq[i], term_seq[i], cfg)
end
end
--------------------------------------------------
-- for each case i:
-- pattern_seq_builder_i:
-- * on failure, go to on_failure_i
-- * on success, go to on_success
-- label on_success:
-- block
-- goto after_success
-- label on_failure_i
--------------------------------------------------
local function case_builder (case, term_seq, cfg)
local patterns_group, guard, block = unpack(case)
local on_success = gg.gensym 'on_success' [1]
for i = 1, #patterns_group do
local pattern_seq = patterns_group[i]
cfg.on_failure = gg.gensym 'match_fail' [1]
cfg.dots_replacement = false
pattern_seq_builder (pattern_seq, term_seq, cfg)
if i<#patterns_group then
acc_stat (`Goto{on_success}, cfg)
acc_stat (`Label{cfg.on_failure}, cfg)
end
end
acc_stat (`Label{on_success}, cfg)
if guard then acc_test (+{not -{guard}}, cfg) end
if cfg.dots_replacement then
replace_dots (block, cfg.dots_replacement)
end
block.tag = 'Do'
acc_stat (block, cfg)
acc_stat (`Goto{cfg.after_success}, cfg)
acc_stat (`Label{cfg.on_failure}, cfg)
end
local function match_builder (x)
local term_seq, cases = unpack(x)
local cfg = {
code = `Do{ },
after_success = gg.gensym "_after_success" }
-- Some sharing issues occur when modifying term_seq,
-- so it's replaced by a copy new_term_seq.
-- TODO: clean that up, and re-suppress the useless copies
-- (cf. remarks about capture bug below).
local new_term_seq = { }
local match_locals
-- Make sure that all tested terms are variables or literals
for i=1, #term_seq do
local t = term_seq[i]
-- Capture problem: the following would compile wrongly:
-- `match x with x -> end'
-- Temporary workaround: suppress the condition, so that
-- all external variables are copied into unique names.
--if t.tag ~= 'Id' and not literal_tags[t.tag] then
local v = gg.gensym 'v'
if not match_locals then match_locals = `Local{ {v}, {t} } else
table.insert(match_locals[1], v)
table.insert(match_locals[2], t)
end
new_term_seq[i] = v
--end
end
term_seq = new_term_seq
if match_locals then acc_stat(match_locals, cfg) end
for i=1, #cases do
local case_cfg = {
after_success = cfg.after_success,
code = `Do{ }
-- locals = { } -- unnecessary, done by pattern_seq_builder
}
case_builder (cases[i], term_seq, case_cfg)
if next (case_cfg.locals) then
local case_locals = { }
table.insert (case_cfg.code, 1, `Local{ case_locals, { } })
for v, _ in pairs (case_cfg.locals) do
table.insert (case_locals, `Id{ v })
end
end
acc_stat(case_cfg.code, cfg)
end
local li = `String{tostring(cases.lineinfo)}
acc_stat(+{error('mismatch at '..-{li})}, cfg)
acc_stat(`Label{cfg.after_success}, cfg)
return cfg.code
end
----------------------------------------------------------------------
-- Syntactical front-end
----------------------------------------------------------------------
local function extend(M)
local _M = gg.future(M)
checks('metalua.compiler.parser')
M.lexer:add{ "match", "with", "->" }
M.block.terminators:add "|"
local match_cases_list_parser = gg.list{ name = "match cases list",
gg.sequence{ name = "match case",
gg.list{ name = "match case patterns list",
primary = _M.expr_list,
separators = "|",
terminators = { "->", "if" } },
gg.onkeyword{ "if", _M.expr, consume = true },
"->",
_M.block },
separators = "|",
terminators = "end" }
M.stat:add{ name = "match statement",
"match",
_M.expr_list,
"with", gg.optkeyword "|",
match_cases_list_parser,
"end",
builder = |x| match_builder{ x[1], x[3] } }
end
return extend

View File

@@ -0,0 +1,834 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Summary: parser generator. Collection of higher order functors,
-- which allow to build and combine parsers. Relies on a lexer
-- that supports the same API as the one exposed in mll.lua.
--
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Exported API:
--
-- Parser generators:
-- * [gg.sequence()]
-- * [gg.multisequence()]
-- * [gg.expr()]
-- * [gg.list()]
-- * [gg.onkeyword()]
-- * [gg.optkeyword()]
--
-- Other functions:
-- * [gg.parse_error()]
-- * [gg.make_parser()]
-- * [gg.is_parser()]
--
--------------------------------------------------------------------------------
local M = { }
local checks = require 'checks'
local lexer = require 'metalua.grammar.lexer'
local pp = require 'metalua.pprint'
--------------------------------------------------------------------------------
-- Symbol generator: [gensym()] returns a guaranteed-to-be-unique identifier.
-- The main purpose is to avoid variable capture in macros.
--
-- If a string is passed as an argument, theis string will be part of the
-- id name (helpful for macro debugging)
--------------------------------------------------------------------------------
local gensymidx = 0
function M.gensym (arg)
gensymidx = gensymidx + 1
return { tag="Id", string.format(".%i.%s", gensymidx, arg or "")}
end
-------------------------------------------------------------------------------
-- parser metatable, which maps __call to method parse, and adds some
-- error tracing boilerplate.
-------------------------------------------------------------------------------
local parser_metatable = { }
function parser_metatable :__call (lx, ...)
return self :parse (lx, ...)
end
-------------------------------------------------------------------------------
-- Turn a table into a parser, mainly by setting the metatable.
-------------------------------------------------------------------------------
function M.make_parser(kind, p)
p.kind = kind
if not p.transformers then p.transformers = { } end
function p.transformers:add (x)
table.insert (self, x)
end
setmetatable (p, parser_metatable)
return p
end
-------------------------------------------------------------------------------
-- Return true iff [x] is a parser.
-- If it's a gg-generated parser, return the name of its kind.
-------------------------------------------------------------------------------
function M.is_parser (x)
return type(x)=="function" or getmetatable(x)==parser_metatable and x.kind
end
-------------------------------------------------------------------------------
-- Parse a sequence, without applying builder nor transformers.
-------------------------------------------------------------------------------
local function raw_parse_sequence (lx, p)
local r = { }
for i=1, #p do
local e=p[i]
if type(e) == "string" then
local kw = lx :next()
if not lx :is_keyword (kw, e) then
M.parse_error(
lx, "A keyword was expected, probably `%s'.", e)
end
elseif M.is_parser (e) then
table.insert (r, e(lx))
else -- Invalid parser definition, this is *not* a parsing error
error(string.format(
"Sequence `%s': element #%i is neither a string nor a parser: %s",
p.name, i, pp.tostring(e)))
end
end
return r
end
-------------------------------------------------------------------------------
-- Parse a multisequence, without applying multisequence transformers.
-- The sequences are completely parsed.
-------------------------------------------------------------------------------
local function raw_parse_multisequence (lx, sequence_table, default)
local seq_parser = sequence_table[lx:is_keyword(lx:peek())]
if seq_parser then return seq_parser (lx)
elseif default then return default (lx)
else return false end
end
-------------------------------------------------------------------------------
-- Applies all transformers listed in parser on ast.
-------------------------------------------------------------------------------
local function transform (ast, parser, fli, lli)
if parser.transformers then
for _, t in ipairs (parser.transformers) do ast = t(ast) or ast end
end
if type(ast) == 'table' then
local ali = ast.lineinfo
if not ali or ali.first~=fli or ali.last~=lli then
ast.lineinfo = lexer.new_lineinfo(fli, lli)
end
end
return ast
end
-------------------------------------------------------------------------------
-- Generate a tracable parsing error (not implemented yet)
-------------------------------------------------------------------------------
function M.parse_error(lx, fmt, ...)
local li = lx:lineinfo_left()
local file, line, column, offset, positions
if li then
file, line, column, offset = li.source, li.line, li.column, li.offset
positions = { first = li, last = li }
else
line, column, offset = -1, -1, -1
end
local msg = string.format("line %i, char %i: "..fmt, line, column, ...)
if file and file~='?' then msg = "file "..file..", "..msg end
local src = lx.src
if offset>0 and src then
local i, j = offset, offset
while src:sub(i,i) ~= '\n' and i>=0 do i=i-1 end
while src:sub(j,j) ~= '\n' and j<=#src do j=j+1 end
local srcline = src:sub (i+1, j-1)
local idx = string.rep (" ", column).."^"
msg = string.format("%s\n>>> %s\n>>> %s", msg, srcline, idx)
end
--lx :kill()
error(msg)
end
-------------------------------------------------------------------------------
--
-- Sequence parser generator
--
-------------------------------------------------------------------------------
-- Input fields:
--
-- * [builder]: how to build an AST out of sequence parts. let [x] be the list
-- of subparser results (keywords are simply omitted). [builder] can be:
-- - [nil], in which case the result of parsing is simply [x]
-- - a string, which is then put as a tag on [x]
-- - a function, which takes [x] as a parameter and returns an AST.
--
-- * [name]: the name of the parser. Used for debug messages
--
-- * [transformers]: a list of AST->AST functions, applied in order on ASTs
-- returned by the parser.
--
-- * Table-part entries corresponds to keywords (strings) and subparsers
-- (function and callable objects).
--
-- After creation, the following fields are added:
-- * [parse] the parsing function lexer->AST
-- * [kind] == "sequence"
-- * [name] is set, if it wasn't in the input.
--
-------------------------------------------------------------------------------
function M.sequence (p)
M.make_parser ("sequence", p)
-------------------------------------------------------------------
-- Parsing method
-------------------------------------------------------------------
function p:parse (lx)
-- Raw parsing:
local fli = lx:lineinfo_right()
local seq = raw_parse_sequence (lx, self)
local lli = lx:lineinfo_left()
-- Builder application:
local builder, tb = self.builder, type (self.builder)
if tb == "string" then seq.tag = builder
elseif tb == "function" or builder and builder.__call then seq = builder(seq)
elseif builder == nil then -- nothing
else error ("Invalid builder of type "..tb.." in sequence") end
seq = transform (seq, self, fli, lli)
assert (not seq or seq.lineinfo)
return seq
end
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
-- Try to build a proper name
if p.name then
-- don't touch existing name
elseif type(p[1])=="string" then -- find name based on 1st keyword
if #p==1 then p.name=p[1]
elseif type(p[#p])=="string" then
p.name = p[1] .. " ... " .. p[#p]
else p.name = p[1] .. " ..." end
else -- can't find a decent name
p.name = "unnamed_sequence"
end
return p
end --</sequence>
-------------------------------------------------------------------------------
--
-- Multiple, keyword-driven, sequence parser generator
--
-------------------------------------------------------------------------------
-- in [p], useful fields are:
--
-- * [transformers]: as usual
--
-- * [name]: as usual
--
-- * Table-part entries must be sequence parsers, or tables which can
-- be turned into a sequence parser by [gg.sequence]. These
-- sequences must start with a keyword, and this initial keyword
-- must be different for each sequence. The table-part entries will
-- be removed after [gg.multisequence] returns.
--
-- * [default]: the parser to run if the next keyword in the lexer is
-- none of the registered initial keywords. If there's no default
-- parser and no suitable initial keyword, the multisequence parser
-- simply returns [false].
--
-- After creation, the following fields are added:
--
-- * [parse] the parsing function lexer->AST
--
-- * [sequences] the table of sequences, indexed by initial keywords.
--
-- * [add] method takes a sequence parser or a config table for
-- [gg.sequence], and adds/replaces the corresponding sequence
-- parser. If the keyword was already used, the former sequence is
-- removed and a warning is issued.
--
-- * [get] method returns a sequence by its initial keyword
--
-- * [kind] == "multisequence"
--
-------------------------------------------------------------------------------
function M.multisequence (p)
M.make_parser ("multisequence", p)
-------------------------------------------------------------------
-- Add a sequence (might be just a config table for [gg.sequence])
-------------------------------------------------------------------
function p :add (s)
-- compile if necessary:
local keyword = type(s)=='table' and s[1]
if type(s)=='table' and not M.is_parser(s) then M.sequence(s) end
if M.is_parser(s)~='sequence' or type(keyword)~='string' then
if self.default then -- two defaults
error ("In a multisequence parser, all but one sequences "..
"must start with a keyword")
else self.default = s end -- first default
else
if self.sequences[keyword] then -- duplicate keyword
-- TODO: warn that initial keyword `keyword` is overloaded in multiseq
end
self.sequences[keyword] = s
end
end -- </multisequence.add>
-------------------------------------------------------------------
-- Get the sequence starting with this keyword. [kw :: string]
-------------------------------------------------------------------
function p :get (kw) return self.sequences [kw] end
-------------------------------------------------------------------
-- Remove the sequence starting with keyword [kw :: string]
-------------------------------------------------------------------
function p :del (kw)
if not self.sequences[kw] then
-- TODO: warn that we try to delete a non-existent entry
end
local removed = self.sequences[kw]
self.sequences[kw] = nil
return removed
end
-------------------------------------------------------------------
-- Parsing method
-------------------------------------------------------------------
function p :parse (lx)
local fli = lx:lineinfo_right()
local x = raw_parse_multisequence (lx, self.sequences, self.default)
local lli = lx:lineinfo_left()
return transform (x, self, fli, lli)
end
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
-- Register the sequences passed to the constructor. They're going
-- from the array part of the parser to the hash part of field
-- [sequences]
p.sequences = { }
for i=1, #p do p :add (p[i]); p[i] = nil end
-- FIXME: why is this commented out?
--if p.default and not is_parser(p.default) then sequence(p.default) end
return p
end --</multisequence>
-------------------------------------------------------------------------------
--
-- Expression parser generator
--
-------------------------------------------------------------------------------
--
-- Expression configuration relies on three tables: [prefix], [infix]
-- and [suffix]. Moreover, the primary parser can be replaced by a
-- table: in this case the [primary] table will be passed to
-- [gg.multisequence] to create a parser.
--
-- Each of these tables is a modified multisequence parser: the
-- differences with respect to regular multisequence config tables are:
--
-- * the builder takes specific parameters:
-- - for [prefix], it takes the result of the prefix sequence parser,
-- and the prefixed expression
-- - for [infix], it takes the left-hand-side expression, the results
-- of the infix sequence parser, and the right-hand-side expression.
-- - for [suffix], it takes the suffixed expression, and the result
-- of the suffix sequence parser.
--
-- * the default field is a list, with parameters:
-- - [parser] the raw parsing function
-- - [transformers], as usual
-- - [prec], the operator's precedence
-- - [assoc] for [infix] table, the operator's associativity, which
-- can be "left", "right" or "flat" (default to left)
--
-- In [p], useful fields are:
-- * [transformers]: as usual
-- * [name]: as usual
-- * [primary]: the atomic expression parser, or a multisequence config
-- table (mandatory)
-- * [prefix]: prefix operators config table, see above.
-- * [infix]: infix operators config table, see above.
-- * [suffix]: suffix operators config table, see above.
--
-- After creation, these fields are added:
-- * [kind] == "expr"
-- * [parse] as usual
-- * each table is turned into a multisequence, and therefore has an
-- [add] method
--
-------------------------------------------------------------------------------
function M.expr (p)
M.make_parser ("expr", p)
-------------------------------------------------------------------
-- parser method.
-- In addition to the lexer, it takes an optional precedence:
-- it won't read expressions whose precedence is lower or equal
-- to [prec].
-------------------------------------------------------------------
function p :parse (lx, prec)
prec = prec or 0
------------------------------------------------------
-- Extract the right parser and the corresponding
-- options table, for (pre|in|suff)fix operators.
-- Options include prec, assoc, transformers.
------------------------------------------------------
local function get_parser_info (tab)
local p2 = tab :get (lx :is_keyword (lx :peek()))
if p2 then -- keyword-based sequence found
local function parser(lx) return raw_parse_sequence(lx, p2) end
return parser, p2
else -- Got to use the default parser
local d = tab.default
if d then return d.parse or d.parser, d
else return false, false end
end
end
------------------------------------------------------
-- Look for a prefix sequence. Multiple prefixes are
-- handled through the recursive [p.parse] call.
-- Notice the double-transform: one for the primary
-- expr, and one for the one with the prefix op.
------------------------------------------------------
local function handle_prefix ()
local fli = lx :lineinfo_right()
local p2_func, p2 = get_parser_info (self.prefix)
local op = p2_func and p2_func (lx)
if op then -- Keyword-based sequence found
local ili = lx :lineinfo_right() -- Intermediate LineInfo
local e = p2.builder (op, self :parse (lx, p2.prec))
local lli = lx :lineinfo_left()
return transform (transform (e, p2, ili, lli), self, fli, lli)
else -- No prefix found, get a primary expression
local e = self.primary(lx)
local lli = lx :lineinfo_left()
return transform (e, self, fli, lli)
end
end --</expr.parse.handle_prefix>
------------------------------------------------------
-- Look for an infix sequence+right-hand-side operand.
-- Return the whole binary expression result,
-- or false if no operator was found.
------------------------------------------------------
local function handle_infix (e)
local p2_func, p2 = get_parser_info (self.infix)
if not p2 then return false end
-----------------------------------------
-- Handle flattening operators: gather all operands
-- of the series in [list]; when a different operator
-- is found, stop, build from [list], [transform] and
-- return.
-----------------------------------------
if (not p2.prec or p2.prec>prec) and p2.assoc=="flat" then
local fli = lx:lineinfo_right()
local pflat, list = p2, { e }
repeat
local op = p2_func(lx)
if not op then break end
table.insert (list, self:parse (lx, p2.prec))
local _ -- We only care about checking that p2==pflat
_, p2 = get_parser_info (self.infix)
until p2 ~= pflat
local e2 = pflat.builder (list)
local lli = lx:lineinfo_left()
return transform (transform (e2, pflat, fli, lli), self, fli, lli)
-----------------------------------------
-- Handle regular infix operators: [e] the LHS is known,
-- just gather the operator and [e2] the RHS.
-- Result goes in [e3].
-----------------------------------------
elseif p2.prec and p2.prec>prec or
p2.prec==prec and p2.assoc=="right" then
local fli = e.lineinfo.first -- lx:lineinfo_right()
local op = p2_func(lx)
if not op then return false end
local e2 = self:parse (lx, p2.prec)
local e3 = p2.builder (e, op, e2)
local lli = lx:lineinfo_left()
return transform (transform (e3, p2, fli, lli), self, fli, lli)
-----------------------------------------
-- Check for non-associative operators, and complain if applicable.
-----------------------------------------
elseif p2.assoc=="none" and p2.prec==prec then
M.parse_error (lx, "non-associative operator!")
-----------------------------------------
-- No infix operator suitable at that precedence
-----------------------------------------
else return false end
end --</expr.parse.handle_infix>
------------------------------------------------------
-- Look for a suffix sequence.
-- Return the result of suffix operator on [e],
-- or false if no operator was found.
------------------------------------------------------
local function handle_suffix (e)
-- FIXME bad fli, must take e.lineinfo.first
local p2_func, p2 = get_parser_info (self.suffix)
if not p2 then return false end
if not p2.prec or p2.prec>=prec then
--local fli = lx:lineinfo_right()
local fli = e.lineinfo.first
local op = p2_func(lx)
if not op then return false end
local lli = lx:lineinfo_left()
e = p2.builder (e, op)
e = transform (transform (e, p2, fli, lli), self, fli, lli)
return e
end
return false
end --</expr.parse.handle_suffix>
------------------------------------------------------
-- Parser body: read suffix and (infix+operand)
-- extensions as long as we're able to fetch more at
-- this precedence level.
------------------------------------------------------
local e = handle_prefix()
repeat
local x = handle_suffix (e); e = x or e
local y = handle_infix (e); e = y or e
until not (x or y)
-- No transform: it already happened in operators handling
return e
end --</expr.parse>
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
if not p.primary then p.primary=p[1]; p[1]=nil end
for _, t in ipairs{ "primary", "prefix", "infix", "suffix" } do
if not p[t] then p[t] = { } end
if not M.is_parser(p[t]) then M.multisequence(p[t]) end
end
function p:add(...) return self.primary:add(...) end
return p
end --</expr>
-------------------------------------------------------------------------------
--
-- List parser generator
--
-------------------------------------------------------------------------------
-- In [p], the following fields can be provided in input:
--
-- * [builder]: takes list of subparser results, returns AST
-- * [transformers]: as usual
-- * [name]: as usual
--
-- * [terminators]: list of strings representing the keywords which
-- might mark the end of the list. When non-empty, the list is
-- allowed to be empty. A string is treated as a single-element
-- table, whose element is that string, e.g. ["do"] is the same as
-- [{"do"}].
--
-- * [separators]: list of strings representing the keywords which can
-- separate elements of the list. When non-empty, one of these
-- keyword has to be found between each element. Lack of a separator
-- indicates the end of the list. A string is treated as a
-- single-element table, whose element is that string, e.g. ["do"]
-- is the same as [{"do"}]. If [terminators] is empty/nil, then
-- [separators] has to be non-empty.
--
-- After creation, the following fields are added:
-- * [parse] the parsing function lexer->AST
-- * [kind] == "list"
--
-------------------------------------------------------------------------------
function M.list (p)
M.make_parser ("list", p)
-------------------------------------------------------------------
-- Parsing method
-------------------------------------------------------------------
function p :parse (lx)
------------------------------------------------------
-- Used to quickly check whether there's a terminator
-- or a separator immediately ahead
------------------------------------------------------
local function peek_is_in (keywords)
return keywords and lx:is_keyword(lx:peek(), unpack(keywords)) end
local x = { }
local fli = lx :lineinfo_right()
-- if there's a terminator to start with, don't bother trying
local is_empty_list = self.terminators and (peek_is_in (self.terminators) or lx:peek().tag=="Eof")
if not is_empty_list then
repeat
local item = self.primary(lx)
table.insert (x, item) -- read one element
until
-- There's a separator list specified, and next token isn't in it.
-- Otherwise, consume it with [lx:next()]
self.separators and not(peek_is_in (self.separators) and lx:next()) or
-- Terminator token ahead
peek_is_in (self.terminators) or
-- Last reason: end of file reached
lx:peek().tag=="Eof"
end
local lli = lx:lineinfo_left()
-- Apply the builder. It can be a string, or a callable value,
-- or simply nothing.
local b = self.builder
if b then
if type(b)=="string" then x.tag = b -- b is a string, use it as a tag
elseif type(b)=="function" then x=b(x)
else
local bmt = getmetatable(b)
if bmt and bmt.__call then x=b(x) end
end
end
return transform (x, self, fli, lli)
end --</list.parse>
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
if not p.primary then p.primary = p[1]; p[1] = nil end
if type(p.terminators) == "string" then p.terminators = { p.terminators }
elseif p.terminators and #p.terminators == 0 then p.terminators = nil end
if type(p.separators) == "string" then p.separators = { p.separators }
elseif p.separators and #p.separators == 0 then p.separators = nil end
return p
end --</list>
-------------------------------------------------------------------------------
--
-- Keyword-conditioned parser generator
--
-------------------------------------------------------------------------------
--
-- Only apply a parser if a given keyword is found. The result of
-- [gg.onkeyword] parser is the result of the subparser (modulo
-- [transformers] applications).
--
-- lineinfo: the keyword is *not* included in the boundaries of the
-- resulting lineinfo. A review of all usages of gg.onkeyword() in the
-- implementation of metalua has shown that it was the appropriate choice
-- in every case.
--
-- Input fields:
--
-- * [name]: as usual
--
-- * [transformers]: as usual
--
-- * [peek]: if non-nil, the conditioning keyword is left in the lexeme
-- stream instead of being consumed.
--
-- * [primary]: the subparser.
--
-- * [keywords]: list of strings representing triggering keywords.
--
-- * Table-part entries can contain strings, and/or exactly one parser.
-- Strings are put in [keywords], and the parser is put in [primary].
--
-- After the call, the following fields will be set:
--
-- * [parse] the parsing method
-- * [kind] == "onkeyword"
-- * [primary]
-- * [keywords]
--
-------------------------------------------------------------------------------
function M.onkeyword (p)
M.make_parser ("onkeyword", p)
-------------------------------------------------------------------
-- Parsing method
-------------------------------------------------------------------
function p :parse (lx)
if lx :is_keyword (lx:peek(), unpack(self.keywords)) then
local fli = lx:lineinfo_right()
if not self.peek then lx:next() end
local content = self.primary (lx)
local lli = lx:lineinfo_left()
local li = content.lineinfo or { }
fli, lli = li.first or fli, li.last or lli
return transform (content, p, fli, lli)
else return false end
end
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
if not p.keywords then p.keywords = { } end
for _, x in ipairs(p) do
if type(x)=="string" then table.insert (p.keywords, x)
else assert (not p.primary and M.is_parser (x)); p.primary = x end
end
assert (next (p.keywords), "Missing trigger keyword in gg.onkeyword")
assert (p.primary, 'no primary parser in gg.onkeyword')
return p
end --</onkeyword>
-------------------------------------------------------------------------------
--
-- Optional keyword consummer pseudo-parser generator
--
-------------------------------------------------------------------------------
--
-- This doesn't return a real parser, just a function. That function parses
-- one of the keywords passed as parameters, and returns it. It returns
-- [false] if no matching keyword is found.
--
-- Notice that tokens returned by lexer already carry lineinfo, therefore
-- there's no need to add them, as done usually through transform() calls.
-------------------------------------------------------------------------------
function M.optkeyword (...)
local args = {...}
if type (args[1]) == "table" then
assert (#args == 1)
args = args[1]
end
for _, v in ipairs(args) do assert (type(v)=="string") end
return function (lx)
local x = lx:is_keyword (lx:peek(), unpack (args))
if x then lx:next(); return x
else return false end
end
end
-------------------------------------------------------------------------------
--
-- Run a parser with a special lexer
--
-------------------------------------------------------------------------------
--
-- This doesn't return a real parser, just a function.
-- First argument is the lexer class to be used with the parser,
-- 2nd is the parser itself.
-- The resulting parser returns whatever the argument parser does.
--
-------------------------------------------------------------------------------
function M.with_lexer(new_lexer, parser)
-------------------------------------------------------------------
-- Most gg functions take their parameters in a table, so it's
-- better to silently accept when with_lexer{ } is called with
-- its arguments in a list:
-------------------------------------------------------------------
if not parser and #new_lexer==2 and type(new_lexer[1])=='table' then
return M.with_lexer(unpack(new_lexer))
end
-------------------------------------------------------------------
-- Save the current lexer, switch it for the new one, run the parser,
-- restore the previous lexer, even if the parser caused an error.
-------------------------------------------------------------------
return function (lx)
local old_lexer = getmetatable(lx)
lx:sync()
setmetatable(lx, new_lexer)
local status, result = pcall(parser, lx)
lx:sync()
setmetatable(lx, old_lexer)
if status then return result else error(result) end
end
end
--------------------------------------------------------------------------------
--
-- Make sure a parser is used and returns successfully.
--
--------------------------------------------------------------------------------
function M.nonempty(primary)
local p = M.make_parser('non-empty list', { primary = primary, name=primary.name })
function p :parse (lx)
local fli = lx:lineinfo_right()
local content = self.primary (lx)
local lli = lx:lineinfo_left()
local li = content.lineinfo or { }
fli, lli = li.first or fli, li.last or lli
if #content == 0 then
M.parse_error (lx, "`%s' must not be empty.", self.name or "list")
else
return transform (content, self, fli, lli)
end
end
return p
end
local FUTURE_MT = { }
function FUTURE_MT:__tostring() return "<Proxy parser module>" end
function FUTURE_MT:__newindex(key, value) error "don't write in futures" end
function FUTURE_MT :__index (parser_name)
return function(...)
local p, m = rawget(self, '__path'), self.__module
if p then for _, name in ipairs(p) do
m=rawget(m, name)
if not m then error ("Submodule '"..name.."' undefined") end
end end
local f = rawget(m, parser_name)
if not f then error ("Parser '"..parser_name.."' undefined") end
return f(...)
end
end
function M.future(module, ...)
checks('table')
local path = ... and {...}
if path then for _, x in ipairs(path) do
assert(type(x)=='string', "Bad future arg")
end end
local self = { __module = module,
__path = path }
return setmetatable(self, FUTURE_MT)
end
return M

View File

@@ -0,0 +1,672 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
local checks = require 'checks'
local M = { }
local lexer = { alpha={ }, sym={ } }
lexer.__index=lexer
lexer.__type='lexer.stream'
M.lexer = lexer
local debugf = function() end
-- local debugf=printf
----------------------------------------------------------------------
-- Some locale settings produce bad results, e.g. French locale
-- expect float numbers to use commas instead of periods.
-- TODO: change number parser into something loclae-independent,
-- locales are nasty.
----------------------------------------------------------------------
os.setlocale('C')
local MT = { }
M.metatables=MT
----------------------------------------------------------------------
-- Create a new metatable, for a new class of objects.
----------------------------------------------------------------------
local function new_metatable(name)
local mt = { __type = 'lexer.'..name };
mt.__index = mt
MT[name] = mt
end
----------------------------------------------------------------------
-- Position: represent a point in a source file.
----------------------------------------------------------------------
new_metatable 'position'
local position_idx=1
function M.new_position(line, column, offset, source)
checks('number', 'number', 'number', 'string')
local id = position_idx; position_idx = position_idx+1
return setmetatable({line=line, column=column, offset=offset,
source=source, id=id}, MT.position)
end
function MT.position :__tostring()
return string.format("<%s%s|L%d|C%d|K%d>",
self.comments and "C|" or "",
self.source, self.line, self.column, self.offset)
end
----------------------------------------------------------------------
-- Position factory: convert offsets into line/column/offset positions.
----------------------------------------------------------------------
new_metatable 'position_factory'
function M.new_position_factory(src, src_name)
-- assert(type(src)=='string')
-- assert(type(src_name)=='string')
local lines = { 1 }
for offset in src :gmatch '\n()' do table.insert(lines, offset) end
local max = #src+1
table.insert(lines, max+1) -- +1 includes Eof
return setmetatable({ src_name=src_name, line2offset=lines, max=max },
MT.position_factory)
end
function MT.position_factory :get_position (offset)
-- assert(type(offset)=='number')
assert(offset<=self.max)
local line2offset = self.line2offset
local left = self.last_left or 1
if offset<line2offset[left] then left=1 end
local right = left+1
if line2offset[right]<=offset then right = right+1 end
if line2offset[right]<=offset then right = #line2offset end
while true do
-- print (" trying lines "..left.."/"..right..", offsets "..line2offset[left]..
-- "/"..line2offset[right].." for offset "..offset)
-- assert(line2offset[left]<=offset)
-- assert(offset<line2offset[right])
-- assert(left<right)
if left+1==right then break end
local middle = math.floor((left+right)/2)
if line2offset[middle]<=offset then left=middle else right=middle end
end
-- assert(left+1==right)
-- printf("found that offset %d is between %d and %d, hence on line %d",
-- offset, line2offset[left], line2offset[right], left)
local line = left
local column = offset - line2offset[line] + 1
self.last_left = left
return M.new_position(line, column, offset, self.src_name)
end
----------------------------------------------------------------------
-- Lineinfo: represent a node's range in a source file;
-- embed information about prefix and suffix comments.
----------------------------------------------------------------------
new_metatable 'lineinfo'
function M.new_lineinfo(first, last)
checks('lexer.position', 'lexer.position')
return setmetatable({first=first, last=last}, MT.lineinfo)
end
function MT.lineinfo :__tostring()
local fli, lli = self.first, self.last
local line = fli.line; if line~=lli.line then line =line ..'-'..lli.line end
local column = fli.column; if column~=lli.column then column=column..'-'..lli.column end
local offset = fli.offset; if offset~=lli.offset then offset=offset..'-'..lli.offset end
return string.format("<%s%s|L%s|C%s|K%s%s>",
fli.comments and "C|" or "",
fli.source, line, column, offset,
lli.comments and "|C" or "")
end
----------------------------------------------------------------------
-- Token: atomic Lua language element, with a category, a content,
-- and some lineinfo relating it to its original source.
----------------------------------------------------------------------
new_metatable 'token'
function M.new_token(tag, content, lineinfo)
--printf("TOKEN `%s{ %q, lineinfo = %s} boundaries %d, %d",
-- tag, content, tostring(lineinfo), lineinfo.first.id, lineinfo.last.id)
return setmetatable({tag=tag, lineinfo=lineinfo, content}, MT.token)
end
function MT.token :__tostring()
--return string.format("`%s{ %q, %s }", self.tag, self[1], tostring(self.lineinfo))
return string.format("`%s %q", self.tag, self[1])
end
----------------------------------------------------------------------
-- Comment: series of comment blocks with associated lineinfo.
-- To be attached to the tokens just before and just after them.
----------------------------------------------------------------------
new_metatable 'comment'
function M.new_comment(lines)
local first = lines[1].lineinfo.first
local last = lines[#lines].lineinfo.last
local lineinfo = M.new_lineinfo(first, last)
return setmetatable({lineinfo=lineinfo, unpack(lines)}, MT.comment)
end
function MT.comment :text()
local last_line = self[1].lineinfo.last.line
local acc = { }
for i, line in ipairs(self) do
local nreturns = line.lineinfo.first.line - last_line
table.insert(acc, ("\n"):rep(nreturns))
table.insert(acc, line[1])
end
return table.concat(acc)
end
function M.new_comment_line(text, lineinfo, nequals)
checks('string', 'lexer.lineinfo', '?number')
return { lineinfo = lineinfo, text, nequals }
end
----------------------------------------------------------------------
-- Patterns used by [lexer :extract] to decompose the raw string into
-- correctly tagged tokens.
----------------------------------------------------------------------
lexer.patterns = {
spaces = "^[ \r\n\t]*()",
short_comment = "^%-%-([^\n]*)\n?()",
--final_short_comment = "^%-%-([^\n]*)()$",
long_comment = "^%-%-%[(=*)%[\n?(.-)%]%1%]()",
long_string = "^%[(=*)%[\n?(.-)%]%1%]()",
number_mantissa = { "^%d+%.?%d*()", "^%d*%.%d+()" },
number_mantissa_hex = { "^%x+%.?%x*()", "^%x*%.%x+()" }, --Lua5.1 and Lua5.2
number_exponant = "^[eE][%+%-]?%d+()",
number_exponant_hex = "^[pP][%+%-]?%d+()", --Lua5.2
number_hex = "^0[xX]()",
word = "^([%a_][%w_]*)()"
}
----------------------------------------------------------------------
-- unescape a whole string, applying [unesc_digits] and
-- [unesc_letter] as many times as required.
----------------------------------------------------------------------
local function unescape_string (s)
-- Turn the digits of an escape sequence into the corresponding
-- character, e.g. [unesc_digits("123") == string.char(123)].
local function unesc_digits (backslashes, digits)
if #backslashes%2==0 then
-- Even number of backslashes, they escape each other, not the digits.
-- Return them so that unesc_letter() can treat them
return backslashes..digits
else
-- Remove the odd backslash, which escapes the number sequence.
-- The rest will be returned and parsed by unesc_letter()
backslashes = backslashes :sub (1,-2)
end
local k, j, i = digits :reverse() :byte(1, 3)
local z = string.byte "0"
local code = (k or z) + 10*(j or z) + 100*(i or z) - 111*z
if code > 255 then
error ("Illegal escape sequence '\\"..digits..
"' in string: ASCII codes must be in [0..255]")
end
local c = string.char (code)
if c == '\\' then c = '\\\\' end -- parsed by unesc_letter (test: "\092b" --> "\\b")
return backslashes..c
end
-- Turn hex digits of escape sequence into char.
local function unesc_hex(backslashes, digits)
if #backslashes%2==0 then
return backslashes..'x'..digits
else
backslashes = backslashes :sub (1,-2)
end
local c = string.char(tonumber(digits,16))
if c == '\\' then c = '\\\\' end -- parsed by unesc_letter (test: "\x5cb" --> "\\b")
return backslashes..c
end
-- Handle Lua 5.2 \z sequences
local function unesc_z(backslashes, more)
if #backslashes%2==0 then
return backslashes..more
else
return backslashes :sub (1,-2)
end
end
-- Take a letter [x], and returns the character represented by the
-- sequence ['\\'..x], e.g. [unesc_letter "n" == "\n"].
local function unesc_letter(x)
local t = {
a = "\a", b = "\b", f = "\f",
n = "\n", r = "\r", t = "\t", v = "\v",
["\\"] = "\\", ["'"] = "'", ['"'] = '"', ["\n"] = "\n" }
return t[x] or x
end
s = s: gsub ("(\\+)(z%s*)", unesc_z) -- Lua 5.2
s = s: gsub ("(\\+)([0-9][0-9]?[0-9]?)", unesc_digits)
s = s: gsub ("(\\+)x([0-9a-fA-F][0-9a-fA-F])", unesc_hex) -- Lua 5.2
s = s: gsub ("\\(%D)",unesc_letter)
return s
end
lexer.extractors = {
"extract_long_comment", "extract_short_comment",
"extract_short_string", "extract_word", "extract_number",
"extract_long_string", "extract_symbol" }
----------------------------------------------------------------------
-- Really extract next token from the raw string
-- (and update the index).
-- loc: offset of the position just after spaces and comments
-- previous_i: offset in src before extraction began
----------------------------------------------------------------------
function lexer :extract ()
local attached_comments = { }
local function gen_token(...)
local token = M.new_token(...)
if #attached_comments>0 then -- attach previous comments to token
local comments = M.new_comment(attached_comments)
token.lineinfo.first.comments = comments
if self.lineinfo_last_extracted then
self.lineinfo_last_extracted.comments = comments
end
attached_comments = { }
end
token.lineinfo.first.facing = self.lineinfo_last_extracted
self.lineinfo_last_extracted.facing = assert(token.lineinfo.first)
self.lineinfo_last_extracted = assert(token.lineinfo.last)
return token
end
while true do -- loop until a non-comment token is found
-- skip whitespaces
self.i = self.src:match (self.patterns.spaces, self.i)
if self.i>#self.src then
local fli = self.posfact :get_position (#self.src+1)
local lli = self.posfact :get_position (#self.src+1) -- ok?
local tok = gen_token("Eof", "eof", M.new_lineinfo(fli, lli))
tok.lineinfo.last.facing = lli
return tok
end
local i_first = self.i -- loc = position after whitespaces
-- try every extractor until a token is found
for _, extractor in ipairs(self.extractors) do
local tag, content, xtra = self [extractor] (self)
if tag then
local fli = self.posfact :get_position (i_first)
local lli = self.posfact :get_position (self.i-1)
local lineinfo = M.new_lineinfo(fli, lli)
if tag=='Comment' then
local prev_comment = attached_comments[#attached_comments]
if not xtra -- new comment is short
and prev_comment and not prev_comment[2] -- prev comment is short
and prev_comment.lineinfo.last.line+1==fli.line then -- adjascent lines
-- concat with previous comment
prev_comment[1] = prev_comment[1].."\n"..content -- TODO quadratic, BAD!
prev_comment.lineinfo.last = lli
else -- accumulate comment
local comment = M.new_comment_line(content, lineinfo, xtra)
table.insert(attached_comments, comment)
end
break -- back to skipping spaces
else -- not a comment: real token, then
return gen_token(tag, content, lineinfo)
end -- if token is a comment
end -- if token found
end -- for each extractor
end -- while token is a comment
end -- :extract()
----------------------------------------------------------------------
-- Extract a short comment.
----------------------------------------------------------------------
function lexer :extract_short_comment()
-- TODO: handle final_short_comment
local content, j = self.src :match (self.patterns.short_comment, self.i)
if content then self.i=j; return 'Comment', content, nil end
end
----------------------------------------------------------------------
-- Extract a long comment.
----------------------------------------------------------------------
function lexer :extract_long_comment()
local equals, content, j = self.src:match (self.patterns.long_comment, self.i)
if j then self.i = j; return "Comment", content, #equals end
end
----------------------------------------------------------------------
-- Extract a '...' or "..." short string.
----------------------------------------------------------------------
function lexer :extract_short_string()
local k = self.src :sub (self.i,self.i) -- first char
if k~=[[']] and k~=[["]] then return end -- no match'
local i = self.i + 1
local j = i
while true do
local x,y; x, j, y = self.src :match ("([\\\r\n"..k.."])()(.?)", j) -- next interesting char
if x == '\\' then
if y == 'z' then -- Lua 5.2 \z
j = self.src :match ("^%s*()", j+1)
else
j=j+1 -- escaped char
end
elseif x == k then break -- end of string
else
assert (not x or x=='\r' or x=='\n')
return nil, 'Unterminated string'
end
end
self.i = j
return 'String', unescape_string (self.src :sub (i,j-2))
end
----------------------------------------------------------------------
-- Extract Id or Keyword.
----------------------------------------------------------------------
function lexer :extract_word()
local word, j = self.src:match (self.patterns.word, self.i)
if word then
self.i = j
return (self.alpha [word] and 'Keyword' or 'Id'), word
end
end
----------------------------------------------------------------------
-- Extract Number.
----------------------------------------------------------------------
function lexer :extract_number()
local j = self.src:match(self.patterns.number_hex, self.i)
if j then
j = self.src:match (self.patterns.number_mantissa_hex[1], j) or
self.src:match (self.patterns.number_mantissa_hex[2], j)
if j then
j = self.src:match (self.patterns.number_exponant_hex, j) or j
end
else
j = self.src:match (self.patterns.number_mantissa[1], self.i) or
self.src:match (self.patterns.number_mantissa[2], self.i)
if j then
j = self.src:match (self.patterns.number_exponant, j) or j
end
end
if not j then return end
-- Number found, interpret with tonumber() and return it
local str = self.src:sub (self.i, j-1)
-- :TODO: tonumber on Lua5.2 floating hex may or may not work on Lua5.1
local n = tonumber (str)
if not n then error(str.." is not a valid number according to tonumber()") end
self.i = j
return 'Number', n
end
----------------------------------------------------------------------
-- Extract long string.
----------------------------------------------------------------------
function lexer :extract_long_string()
local _, content, j = self.src :match (self.patterns.long_string, self.i)
if j then self.i = j; return 'String', content end
end
----------------------------------------------------------------------
-- Extract symbol.
----------------------------------------------------------------------
function lexer :extract_symbol()
local k = self.src:sub (self.i,self.i)
local symk = self.sym [k] -- symbols starting with `k`
if not symk then
self.i = self.i + 1
return 'Keyword', k
end
for _, sym in pairs (symk) do
if sym == self.src:sub (self.i, self.i + #sym - 1) then
self.i = self.i + #sym
return 'Keyword', sym
end
end
self.i = self.i+1
return 'Keyword', k
end
----------------------------------------------------------------------
-- Add a keyword to the list of keywords recognized by the lexer.
----------------------------------------------------------------------
function lexer :add (w, ...)
assert(not ..., "lexer :add() takes only one arg, although possibly a table")
if type (w) == "table" then
for _, x in ipairs (w) do self :add (x) end
else
if w:match (self.patterns.word .. "$") then self.alpha [w] = true
elseif w:match "^%p%p+$" then
local k = w:sub(1,1)
local list = self.sym [k]
if not list then list = { }; self.sym [k] = list end
table.insert (list, w)
elseif w:match "^%p$" then return
else error "Invalid keyword" end
end
end
----------------------------------------------------------------------
-- Return the [n]th next token, without consuming it.
-- [n] defaults to 1. If it goes pass the end of the stream, an EOF
-- token is returned.
----------------------------------------------------------------------
function lexer :peek (n)
if not n then n=1 end
if n > #self.peeked then
for i = #self.peeked+1, n do
self.peeked [i] = self :extract()
end
end
return self.peeked [n]
end
----------------------------------------------------------------------
-- Return the [n]th next token, removing it as well as the 0..n-1
-- previous tokens. [n] defaults to 1. If it goes pass the end of the
-- stream, an EOF token is returned.
----------------------------------------------------------------------
function lexer :next (n)
n = n or 1
self :peek (n)
local a
for i=1,n do
a = table.remove (self.peeked, 1)
-- TODO: is this used anywhere? I think not. a.lineinfo.last may be nil.
--self.lastline = a.lineinfo.last.line
end
self.lineinfo_last_consumed = a.lineinfo.last
return a
end
----------------------------------------------------------------------
-- Returns an object which saves the stream's current state.
----------------------------------------------------------------------
-- FIXME there are more fields than that to save
function lexer :save () return { self.i; {unpack(self.peeked) } } end
----------------------------------------------------------------------
-- Restore the stream's state, as saved by method [save].
----------------------------------------------------------------------
-- FIXME there are more fields than that to restore
function lexer :restore (s) self.i=s[1]; self.peeked=s[2] end
----------------------------------------------------------------------
-- Resynchronize: cancel any token in self.peeked, by emptying the
-- list and resetting the indexes
----------------------------------------------------------------------
function lexer :sync()
local p1 = self.peeked[1]
if p1 then
local li_first = p1.lineinfo.first
if li_first.comments then li_first=li_first.comments.lineinfo.first end
self.i = li_first.offset
self.column_offset = self.i - li_first.column
self.peeked = { }
self.attached_comments = p1.lineinfo.first.comments or { }
end
end
----------------------------------------------------------------------
-- Take the source and offset of an old lexer.
----------------------------------------------------------------------
function lexer :takeover(old)
self :sync(); old :sync()
for _, field in ipairs{ 'i', 'src', 'attached_comments', 'posfact' } do
self[field] = old[field]
end
return self
end
----------------------------------------------------------------------
-- Return the current position in the sources. This position is between
-- two tokens, and can be within a space / comment area, and therefore
-- have a non-null width. :lineinfo_left() returns the beginning of the
-- separation area, :lineinfo_right() returns the end of that area.
--
-- ____ last consummed token ____ first unconsummed token
-- / /
-- XXXXX <spaces and comments> YYYYY
-- \____ \____
-- :lineinfo_left() :lineinfo_right()
----------------------------------------------------------------------
function lexer :lineinfo_right()
return self :peek(1).lineinfo.first
end
function lexer :lineinfo_left()
return self.lineinfo_last_consumed
end
----------------------------------------------------------------------
-- Create a new lexstream.
----------------------------------------------------------------------
function lexer :newstream (src_or_stream, name)
name = name or "?"
if type(src_or_stream)=='table' then -- it's a stream
return setmetatable ({ }, self) :takeover (src_or_stream)
elseif type(src_or_stream)=='string' then -- it's a source string
local src = src_or_stream
local pos1 = M.new_position(1, 1, 1, name)
local stream = {
src_name = name; -- Name of the file
src = src; -- The source, as a single string
peeked = { }; -- Already peeked, but not discarded yet, tokens
i = 1; -- Character offset in src
attached_comments = { },-- comments accumulator
lineinfo_last_extracted = pos1,
lineinfo_last_consumed = pos1,
posfact = M.new_position_factory (src_or_stream, name)
}
setmetatable (stream, self)
-- Skip initial sharp-bang for Unix scripts
-- FIXME: redundant with mlp.chunk()
if src and src :match "^#!" then
local endofline = src :find "\n"
stream.i = endofline and (endofline + 1) or #src
end
return stream
else
assert(false, ":newstream() takes a source string or a stream, not a "..
type(src_or_stream))
end
end
----------------------------------------------------------------------
-- If there's no ... args, return the token a (whose truth value is
-- true) if it's a `Keyword{ }, or nil. If there are ... args, they
-- have to be strings. if the token a is a keyword, and it's content
-- is one of the ... args, then returns it (it's truth value is
-- true). If no a keyword or not in ..., return nil.
----------------------------------------------------------------------
function lexer :is_keyword (a, ...)
if not a or a.tag ~= "Keyword" then return false end
local words = {...}
if #words == 0 then return a[1] end
for _, w in ipairs (words) do
if w == a[1] then return w end
end
return false
end
----------------------------------------------------------------------
-- Cause an error if the next token isn't a keyword whose content
-- is listed among ... args (which have to be strings).
----------------------------------------------------------------------
function lexer :check (...)
local words = {...}
local a = self :next()
local function err ()
error ("Got " .. tostring (a) ..
", expected one of these keywords : '" ..
table.concat (words,"', '") .. "'") end
if not a or a.tag ~= "Keyword" then err () end
if #words == 0 then return a[1] end
for _, w in ipairs (words) do
if w == a[1] then return w end
end
err ()
end
----------------------------------------------------------------------
--
----------------------------------------------------------------------
function lexer :clone()
local alpha_clone, sym_clone = { }, { }
for word in pairs(self.alpha) do alpha_clone[word]=true end
for letter, list in pairs(self.sym) do sym_clone[letter] = { unpack(list) } end
local clone = { alpha=alpha_clone, sym=sym_clone }
setmetatable(clone, self)
clone.__index = clone
return clone
end
----------------------------------------------------------------------
-- Cancel everything left in a lexer, all subsequent attempts at
-- `:peek()` or `:next()` will return `Eof`.
----------------------------------------------------------------------
function lexer :kill()
self.i = #self.src+1
self.peeked = { }
self.attached_comments = { }
self.lineinfo_last = self.posfact :get_position (#self.src+1)
end
return M

View File

@@ -0,0 +1,133 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
local M = require "package" -- extend Lua's basic "package" module
local checks = require 'checks'
M.metalua_extension_prefix = 'metalua.extension.'
-- Initialize package.mpath from package.path
M.mpath = M.mpath or os.getenv 'LUA_MPATH' or
(M.path..";") :gsub("%.(lua[:;])", ".m%1") :sub(1, -2)
M.mcache = M.mcache or os.getenv 'LUA_MCACHE'
----------------------------------------------------------------------
-- resc(k) returns "%"..k if it's a special regular expression char,
-- or just k if it's normal.
----------------------------------------------------------------------
local regexp_magic = { }
for k in ("^$()%.[]*+-?") :gmatch "." do regexp_magic[k]="%"..k end
local function resc(k) return regexp_magic[k] or k end
----------------------------------------------------------------------
-- Take a Lua module name, return the open file and its name,
-- or <false> and an error message.
----------------------------------------------------------------------
function M.findfile(name, path_string)
local config_regexp = ("([^\n])\n"):rep(5):sub(1, -2)
local dir_sep, path_sep, path_mark, execdir, igmark =
M.config :match (config_regexp)
name = name:gsub ('%.', dir_sep)
local errors = { }
local path_pattern = string.format('[^%s]+', resc(path_sep))
for path in path_string:gmatch (path_pattern) do
--printf('path = %s, rpath_mark=%s, name=%s', path, resc(path_mark), name)
local filename = path:gsub (resc (path_mark), name)
--printf('filename = %s', filename)
local file = io.open (filename, 'rb')
if file then return file, filename end
table.insert(errors, string.format("\tno file %q", filename))
end
return false, '\n'..table.concat(errors, "\n")..'\n'
end
----------------------------------------------------------------------
-- Before compiling a metalua source module, try to find and load
-- a more recent bytecode dump. Requires lfs
----------------------------------------------------------------------
local function metalua_cache_loader(name, src_filename, src)
if not M.mcache:find('%?') then
-- This is highly suspicious...
print("WARNING: no '?' character in $LUA_MCACHE/package.mcache")
end
local mlc = require 'metalua.compiler'.new()
local lfs = require 'lfs'
local dir_sep = M.config:sub(1,1)
local dst_filename = M.mcache :gsub ('%?', (name:gsub('%.', dir_sep)))
local src_a = lfs.attributes(src_filename)
local src_date = src_a and src_a.modification or 0
local dst_a = lfs.attributes(dst_filename)
local dst_date = dst_a and dst_a.modification or 0
local delta = dst_date - src_date
local bytecode, file, msg
if delta <= 0 then
--print ("(need to recompile "..src_filename.." into "..dst_filename..")")
bytecode = mlc :src_to_bytecode (src, '@'..src_filename)
for x in dst_filename :gmatch('()'..dir_sep) do
lfs.mkdir(dst_filename:sub(1,x))
end
file, msg = io.open(dst_filename, 'wb')
if not file then error(msg) end
file :write (bytecode)
file :close()
else
file, msg = io.open(dst_filename, 'rb')
if not file then error(msg) end
bytecode = file :read '*a'
file :close()
end
return mlc :bytecode_to_function (bytecode, '@'..src_filename)
end
----------------------------------------------------------------------
-- Load a metalua source file.
----------------------------------------------------------------------
function M.metalua_loader (name)
local file, filename_or_msg = M.findfile (name, M.mpath)
if not file then return filename_or_msg end
local luastring = file:read '*a'
file:close()
if M.mcache and pcall(require, 'lfs') then
return metalua_cache_loader(name, filename_or_msg, luastring)
else return require 'metalua.compiler'.new() :src_to_function (luastring, '@'..filename_or_msg) end
end
----------------------------------------------------------------------
-- Placed after lua/luac loader, so precompiled files have
-- higher precedence.
----------------------------------------------------------------------
table.insert(M.loaders, M.metalua_loader)
----------------------------------------------------------------------
-- Load an extension.
----------------------------------------------------------------------
function extension (name, mlp)
local complete_name = M.metalua_extension_prefix..name
local extend_func = require (complete_name)
if not mlp.extensions[complete_name] then
local ast =extend_func(mlp)
mlp.extensions[complete_name] =extend_func
return ast
end
end
return M

View File

@@ -0,0 +1,295 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
----------------------------------------------------------------------
----------------------------------------------------------------------
----------------------------------------------------------------------
--
-- Lua objects pretty-printer
--
----------------------------------------------------------------------
----------------------------------------------------------------------
local M = { }
M.DEFAULT_CFG = {
hide_hash = false; -- Print the non-array part of tables?
metalua_tag = true; -- Use Metalua's backtick syntax sugar?
fix_indent = nil; -- If a number, number of indentation spaces;
-- If false, indent to the previous brace.
line_max = nil; -- If a number, tries to avoid making lines with
-- more than this number of chars.
initial_indent = 0; -- If a number, starts at this level of indentation
keywords = { }; -- Set of keywords which must not use Lua's field
-- shortcuts {["foo"]=...} -> {foo=...}
}
local function valid_id(cfg, x)
if type(x) ~= "string" then return false end
if not x:match "^[a-zA-Z_][a-zA-Z0-9_]*$" then return false end
if cfg.keywords and cfg.keywords[x] then return false end
return true
end
local __tostring_cache = setmetatable({ }, {__mode='k'})
-- Retrieve the string produced by `__tostring` metamethod if present,
-- return `false` otherwise. Cached in `__tostring_cache`.
local function __tostring(x)
local the_string = __tostring_cache[x]
if the_string~=nil then return the_string end
local mt = getmetatable(x)
if mt then
local __tostring = mt.__tostring
if __tostring then
the_string = __tostring(x)
__tostring_cache[x] = the_string
return the_string
end
end
if x~=nil then __tostring_cache[x] = false end -- nil is an illegal key
return false
end
local xlen -- mutually recursive with `xlen_type`
local xlen_cache = setmetatable({ }, {__mode='k'})
-- Helpers for the `xlen` function
local xlen_type = {
["nil"] = function ( ) return 3 end;
number = function (x) return #tostring(x) end;
boolean = function (x) return x and 4 or 5 end;
string = function (x) return #string.format("%q",x) end;
}
function xlen_type.table (adt, cfg, nested)
local custom_string = __tostring(adt)
if custom_string then return #custom_string end
-- Circular referenced objects are printed with the plain
-- `tostring` function in nested positions.
if nested [adt] then return #tostring(adt) end
nested [adt] = true
local has_tag = cfg.metalua_tag and valid_id(cfg, adt.tag)
local alen = #adt
local has_arr = alen>0
local has_hash = false
local x = 0
if not cfg.hide_hash then
-- first pass: count hash-part
for k, v in pairs(adt) do
if k=="tag" and has_tag then
-- this is the tag -> do nothing!
elseif type(k)=="number" and k<=alen and math.fmod(k,1)==0 and k>0 then
-- array-part pair -> do nothing!
else
has_hash = true
if valid_id(cfg, k) then x=x+#k
else x = x + xlen (k, cfg, nested) + 2 end -- count surrounding brackets
x = x + xlen (v, cfg, nested) + 5 -- count " = " and ", "
end
end
end
for i = 1, alen do x = x + xlen (adt[i], nested) + 2 end -- count ", "
nested[adt] = false -- No more nested calls
if not (has_tag or has_arr or has_hash) then return 3 end
if has_tag then x=x+#adt.tag+1 end
if not (has_arr or has_hash) then return x end
if not has_hash and alen==1 and type(adt[1])~="table" then
return x-2 -- substract extraneous ", "
end
return x+2 -- count "{ " and " }", substract extraneous ", "
end
-- Compute the number of chars it would require to display the table
-- on a single line. Helps to decide whether some carriage returns are
-- required. Since the size of each sub-table is required many times,
-- it's cached in [xlen_cache].
xlen = function (x, cfg, nested)
-- no need to compute length for 1-line prints
if not cfg.line_max then return 0 end
nested = nested or { }
if x==nil then return #"nil" end
local len = xlen_cache[x]
if len then return len end
local f = xlen_type[type(x)]
if not f then return #tostring(x) end
len = f (x, cfg, nested)
xlen_cache[x] = len
return len
end
local function consider_newline(p, len)
if not p.cfg.line_max then return end
if p.current_offset + len <= p.cfg.line_max then return end
if p.indent < p.current_offset then
p:acc "\n"; p:acc ((" "):rep(p.indent))
p.current_offset = p.indent
end
end
local acc_value
local acc_type = {
["nil"] = function(p) p:acc("nil") end;
number = function(p, adt) p:acc (tostring (adt)) end;
string = function(p, adt) p:acc ((string.format ("%q", adt):gsub("\\\n", "\\n"))) end;
boolean = function(p, adt) p:acc (adt and "true" or "false") end }
-- Indentation:
-- * if `cfg.fix_indent` is set to a number:
-- * add this number of space for each level of depth
-- * return to the line as soon as it flushes things further left
-- * if not, tabulate to one space after the opening brace.
-- * as a result, it never saves right-space to return before first element
function acc_type.table(p, adt)
if p.nested[adt] then p:acc(tostring(adt)); return end
p.nested[adt] = true
local has_tag = p.cfg.metalua_tag and valid_id(p.cfg, adt.tag)
local alen = #adt
local has_arr = alen>0
local has_hash = false
local previous_indent = p.indent
if has_tag then p:acc("`"); p:acc(adt.tag) end
local function indent(p)
if not p.cfg.fix_indent then p.indent = p.current_offset
else p.indent = p.indent + p.cfg.fix_indent end
end
-- First pass: handle hash-part
if not p.cfg.hide_hash then
for k, v in pairs(adt) do
if has_tag and k=='tag' then -- pass the 'tag' field
elseif type(k)=="number" and k<=alen and k>0 and math.fmod(k,1)==0 then
-- pass array-part keys (consecutive ints less than `#adt`)
else -- hash-part keys
if has_hash then p:acc ", " else -- 1st hash-part pair ever found
p:acc "{ "; indent(p)
end
-- Determine whether a newline is required
local is_id, expected_len=valid_id(p.cfg, k)
if is_id then expected_len=#k+xlen(v, p.cfg, p.nested)+#" = , "
else expected_len = xlen(k, p.cfg, p.nested)+xlen(v, p.cfg, p.nested)+#"[] = , " end
consider_newline(p, expected_len)
-- Print the key
if is_id then p:acc(k); p:acc " = " else
p:acc "["; acc_value (p, k); p:acc "] = "
end
acc_value (p, v) -- Print the value
has_hash = true
end
end
end
-- Now we know whether there's a hash-part, an array-part, and a tag.
-- Tag and hash-part are already printed if they're present.
if not has_tag and not has_hash and not has_arr then p:acc "{ }";
elseif has_tag and not has_hash and not has_arr then -- nothing, tag already in acc
else
assert (has_hash or has_arr) -- special case { } already handled
local no_brace = false
if has_hash and has_arr then p:acc ", "
elseif has_tag and not has_hash and alen==1 and type(adt[1])~="table" then
-- No brace required; don't print "{", remember not to print "}"
p:acc (" "); acc_value (p, adt[1]) -- indent= indent+(cfg.fix_indent or 0))
no_brace = true
elseif not has_hash then
-- Braces required, but not opened by hash-part handler yet
p:acc "{ "; indent(p)
end
-- 2nd pass: array-part
if not no_brace and has_arr then
local expected_len = xlen(adt[1], p.cfg, p.nested)
consider_newline(p, expected_len)
acc_value(p, adt[1]) -- indent+(cfg.fix_indent or 0)
for i=2, alen do
p:acc ", ";
consider_newline(p, xlen(adt[i], p.cfg, p.nested))
acc_value (p, adt[i]) --indent+(cfg.fix_indent or 0)
end
end
if not no_brace then p:acc " }" end
end
p.nested[adt] = false -- No more nested calls
p.indent = previous_indent
end
function acc_value(p, v)
local custom_string = __tostring(v)
if custom_string then p:acc(custom_string) else
local f = acc_type[type(v)]
if f then f(p, v) else p:acc(tostring(v)) end
end
end
-- FIXME: new_indent seems to be always nil?!s detection
-- FIXME: accumulator function should be configurable,
-- so that print() doesn't need to bufferize the whole string
-- before starting to print.
function M.tostring(t, cfg)
cfg = cfg or M.DEFAULT_CFG or { }
local p = {
cfg = cfg;
indent = 0;
current_offset = cfg.initial_indent or 0;
buffer = { };
nested = { };
acc = function(self, str)
table.insert(self.buffer, str)
self.current_offset = self.current_offset + #str
end;
}
acc_value(p, t)
return table.concat(p.buffer)
end
function M.print(...) return print(M.tostring(...)) end
function M.sprintf(fmt, ...)
local args={...}
for i, v in pairs(args) do
local t=type(v)
if t=='table' then args[i]=M.tostring(v)
elseif t=='nil' then args[i]='nil' end
end
return string.format(fmt, unpack(args))
end
function M.printf(...) print(M.sprintf(...)) end
return M

View File

@@ -0,0 +1,108 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-- Keep these global:
PRINT_AST = true
LINE_WIDTH = 60
PROMPT = "M> "
PROMPT2 = ">> "
local pp=require 'metalua.pprint'
local M = { }
mlc = require 'metalua.compiler'.new()
local readline
do -- set readline() to a line reader, either editline otr a default
local status, editline = pcall(require, 'editline')
if status then
local rl_handle = editline.init 'metalua'
readline = |p| rl_handle:read(p)
else
local status, rl = pcall(require, 'readline')
if status then
rl.set_options{histfile='~/.metalua_history', keeplines=100, completion=false }
readline = rl.readline
else -- neither editline nor readline available
function readline (p)
io.write (p)
io.flush ()
return io.read '*l'
end
end
end
end
local function reached_eof(lx, msg)
return lx:peek().tag=='Eof' or msg:find "token `Eof"
end
function M.run()
pp.printf ("Metalua, interactive REPLoop.\n"..
"(c) 2006-2013 <metalua@gmail.com>")
local lines = { }
while true do
local src, lx, ast, f, results, success
repeat
local line = readline(next(lines) and PROMPT2 or PROMPT)
if not line then print(); os.exit(0) end -- line==nil iff eof on stdin
if not next(lines) then
line = line:gsub('^%s*=', 'return ')
end
table.insert(lines, line)
src = table.concat (lines, "\n")
until #line>0
lx = mlc :src_to_lexstream(src)
success, ast = pcall(mlc.lexstream_to_ast, mlc, lx)
if success then
success, f = pcall(mlc.ast_to_function, mlc, ast, '=stdin')
if success then
results = { xpcall(f, debug.traceback) }
success = table.remove (results, 1)
if success then
-- Success!
for _, x in ipairs(results) do
pp.print(x, {line_max=LINE_WIDTH, metalua_tag=true})
end
lines = { }
else
print "Evaluation error:"
print (results[1])
lines = { }
end
else
print "Can't compile into bytecode:"
print (f)
lines = { }
end
else
-- If lx has been read entirely, try to read
-- another line before failing.
if not reached_eof(lx, ast) then
print "Can't compile source into AST:"
print (ast)
lines = { }
end
end
end
end
return M

View File

@@ -0,0 +1,488 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
local walk = require 'metalua.treequery.walk'
local M = { }
-- support for old-style modules
treequery = M
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
--
-- multimap helper mmap: associate a key to a set of values
--
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
local function mmap_add (mmap, node, x)
if node==nil then return false end
local set = mmap[node]
if set then set[x] = true
else mmap[node] = {[x]=true} end
end
-- currently unused, I throw the whole set away
local function mmap_remove (mmap, node, x)
local set = mmap[node]
if not set then return false
elseif not set[x] then return false
elseif next(set) then set[x]=nil
else mmap[node] = nil end
return true
end
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
--
-- TreeQuery object.
--
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
local ACTIVE_SCOPE = setmetatable({ }, {__mode="k"})
-- treequery metatable
local Q = { }; Q.__index = Q
--- treequery constructor
-- the resultingg object will allow to filter ans operate on the AST
-- @param root the AST to visit
-- @return a treequery visitor instance
function M.treequery(root)
return setmetatable({
root = root,
unsatisfied = 0,
predicates = { },
until_up = { },
from_up = { },
up_f = false,
down_f = false,
filters = { },
}, Q)
end
-- helper to share the implementations of positional filters
local function add_pos_filter(self, position, inverted, inclusive, f, ...)
if type(f)=='string' then f = M.has_tag(f, ...) end
if not inverted then self.unsatisfied += 1 end
local x = {
pred = f,
position = position,
satisfied = false,
inverted = inverted or false,
inclusive = inclusive or false }
table.insert(self.predicates, x)
return self
end
function Q :if_unknown(f)
self.unknown_handler = f or (||nil)
return self
end
-- TODO: offer an API for inclusive pos_filters
--- select nodes which are after one which satisfies predicate f
Q.after = |self, f, ...| add_pos_filter(self, 'after', false, false, f, ...)
--- select nodes which are not after one which satisfies predicate f
Q.not_after = |self, f, ...| add_pos_filter(self, 'after', true, false, f, ...)
--- select nodes which are under one which satisfies predicate f
Q.under = |self, f, ...| add_pos_filter(self, 'under', false, false, f, ...)
--- select nodes which are not under one which satisfies predicate f
Q.not_under = |self, f, ...| add_pos_filter(self, 'under', true, false, f, ...)
--- select nodes which satisfy predicate f
function Q :filter(f, ...)
if type(f)=='string' then f = M.has_tag(f, ...) end
table.insert(self.filters, f);
return self
end
--- select nodes which satisfy predicate f
function Q :filter_not(f, ...)
if type(f)=='string' then f = M.has_tag(f, ...) end
table.insert(self.filters, |...| not f(...))
return self
end
-- private helper: apply filters and execute up/down callbacks when applicable
function Q :execute()
local cfg = { }
-- TODO: optimize away not_under & not_after by pruning the tree
function cfg.down(...)
--printf ("[down]\t%s\t%s", self.unsatisfied, table.tostring((...)))
ACTIVE_SCOPE[...] = cfg.scope
local satisfied = self.unsatisfied==0
for _, x in ipairs(self.predicates) do
if not x.satisfied and x.pred(...) then
x.satisfied = true
local node, parent = ...
local inc = x.inverted and 1 or -1
if x.position=='under' then
-- satisfied from after we get down this node...
self.unsatisfied += inc
-- ...until before we get up this node
mmap_add(self.until_up, node, x)
elseif x.position=='after' then
-- satisfied from after we get up this node...
mmap_add(self.from_up, node, x)
-- ...until before we get up this node's parent
mmap_add(self.until_up, parent, x)
elseif x.position=='under_or_after' then
-- satisfied from after we get down this node...
self.satisfied += inc
-- ...until before we get up this node's parent...
mmap_add(self.until_up, parent, x)
else
error "position not understood"
end -- position
if x.inclusive then satisfied = self.unsatisfied==0 end
end -- predicate passed
end -- for predicates
if satisfied then
for _, f in ipairs(self.filters) do
if not f(...) then satisfied=false; break end
end
if satisfied and self.down_f then self.down_f(...) end
end
end
function cfg.up(...)
--printf ("[up]\t%s", table.tostring((...)))
-- Remove predicates which are due before we go up this node
local preds = self.until_up[...]
if preds then
for x, _ in pairs(preds) do
local inc = x.inverted and -1 or 1
self.unsatisfied += inc
x.satisfied = false
end
self.until_up[...] = nil
end
-- Execute the up callback
-- TODO: cache the filter passing result from the down callback
-- TODO: skip if there's no callback
local satisfied = self.unsatisfied==0
if satisfied then
for _, f in ipairs(self.filters) do
if not f(self, ...) then satisfied=false; break end
end
if satisfied and self.up_f then self.up_f(...) end
end
-- Set predicate which are due after we go up this node
local preds = self.from_up[...]
if preds then
for p, _ in pairs(preds) do
local inc = p.inverted and 1 or -1
self.unsatisfied += inc
end
self.from_up[...] = nil
end
ACTIVE_SCOPE[...] = nil
end
function cfg.binder(id_node, ...)
--printf(" >>> Binder called on %s, %s", table.tostring(id_node),
-- table.tostring{...}:sub(2,-2))
cfg.down(id_node, ...)
cfg.up(id_node, ...)
--printf("down/up on binder done")
end
cfg.unknown = self.unknown_handler
--function cfg.occurrence (binder, occ)
-- if binder then OCC2BIND[occ] = binder[1] end
--printf(" >>> %s is an occurrence of %s", occ[1], table.tostring(binder and binder[2]))
--end
--function cfg.binder(...) cfg.down(...); cfg.up(...) end
return walk.guess(cfg, self.root)
end
--- Execute a function on each selected node
-- @down: function executed when we go down a node, i.e. before its children
-- have been examined.
-- @up: function executed when we go up a node, i.e. after its children
-- have been examined.
function Q :foreach(down, up)
if not up and not down then
error "iterator missing"
end
self.up_f = up
self.down_f = down
return self :execute()
end
--- Return the list of nodes selected by a given treequery.
function Q :list()
local acc = { }
self :foreach(|x| table.insert(acc, x))
return acc
end
--- Return the first matching element
-- TODO: dirty hack, to implement properly with a 'break' return.
-- Also, it won't behave correctly if a predicate causes an error,
-- or if coroutines are involved.
function Q :first()
local result = { }
local function f(...) result = {...}; error() end
pcall(|| self :foreach(f))
return unpack(result)
end
--- Pretty printer for queries
function Q :__tostring() return "<treequery>" end
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
--
-- Predicates.
--
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
--- Return a predicate which is true if the tested node's tag is among the
-- one listed as arguments
-- @param ... a sequence of tag names
function M.has_tag(...)
local args = {...}
if #args==1 then
local tag = ...
return (|node| node.tag==tag)
--return function(self, node) printf("node %s has_tag %s?", table.tostring(node), tag); return node.tag==tag end
else
local tags = { }
for _, tag in ipairs(args) do tags[tag]=true end
return function(node)
local node_tag = node.tag
return node_tag and tags[node_tag]
end
end
end
--- Predicate to test whether a node represents an expression.
M.is_expr = M.has_tag('Nil', 'Dots', 'True', 'False', 'Number','String',
'Function', 'Table', 'Op', 'Paren', 'Call', 'Invoke',
'Id', 'Index')
-- helper for is_stat
local STAT_TAGS = { Do=1, Set=1, While=1, Repeat=1, If=1, Fornum=1,
Forin=1, Local=1, Localrec=1, Return=1, Break=1 }
--- Predicate to test whether a node represents a statement.
-- It is context-aware, i.e. it recognizes `Call and `Invoke nodes
-- used in a statement context as such.
function M.is_stat(node, parent)
local tag = node.tag
if not tag then return false
elseif STAT_TAGS[tag] then return true
elseif tag=='Call' or tag=='Invoke' then return parent and parent.tag==nil
else return false end
end
--- Predicate to test whether a node represents a statements block.
function M.is_block(node) return node.tag==nil end
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
--
-- Variables and scopes.
--
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
local BINDER_PARENT_TAG = {
Local=true, Localrec=true, Forin=true, Function=true }
--- Test whether a node is a binder. This is local predicate, although it
-- might need to inspect the parent node.
function M.is_binder(node, parent)
--printf('is_binder(%s, %s)', table.tostring(node), table.tostring(parent))
if node.tag ~= 'Id' or not parent then return false end
if parent.tag=='Fornum' then return parent[1]==node end
if not BINDER_PARENT_TAG[parent.tag] then return false end
for _, binder in ipairs(parent[1]) do
if binder==node then return true end
end
return false
end
--- Retrieve the binder associated to an occurrence within root.
-- @param occurrence an Id node representing an occurrence in `root`.
-- @param root the tree in which `node` and its binder occur.
-- @return the binder node, and its ancestors up to root if found.
-- @return nil if node is global (or not an occurrence) in `root`.
function M.binder(occurrence, root)
local cfg, id_name, result = { }, occurrence[1], { }
function cfg.occurrence(id)
if id == occurrence then result = cfg.scope :get(id_name) end
-- TODO: break the walker
end
walk.guess(cfg, root)
return unpack(result)
end
--- Predicate to filter occurrences of a given binder.
-- Warning: it relies on internal scope book-keeping,
-- and for this reason, it only works as query method argument.
-- It won't work outside of a query.
-- @param binder the binder whose occurrences must be kept by predicate
-- @return a predicate
-- function M.is_occurrence_of(binder)
-- return function(node, ...)
-- if node.tag ~= 'Id' then return nil end
-- if M.is_binder(node, ...) then return nil end
-- local scope = ACTIVE_SCOPE[node]
-- if not scope then return nil end
-- local result = scope :get (node[1]) or { }
-- if result[1] ~= binder then return nil end
-- return unpack(result)
-- end
-- end
function M.is_occurrence_of(binder)
return function(node, ...)
local b = M.get_binder(node)
return b and b==binder
end
end
function M.get_binder(occurrence, ...)
if occurrence.tag ~= 'Id' then return nil end
if M.is_binder(occurrence, ...) then return nil end
local scope = ACTIVE_SCOPE[occurrence]
local binder_hierarchy = scope :get(occurrence[1])
return unpack (binder_hierarchy or { })
end
--- Transform a predicate on a node into a predicate on this node's
-- parent. For instance if p tests whether a node has property P,
-- then parent(p) tests whether this node's parent has property P.
-- The ancestor level is precised with n, with 1 being the node itself,
-- 2 its parent, 3 its grand-parent etc.
-- @param[optional] n the parent to examine, default=2
-- @param pred the predicate to transform
-- @return a predicate
function M.parent(n, pred, ...)
if type(n)~='number' then n, pred = 2, n end
if type(pred)=='string' then pred = M.has_tag(pred, ...) end
return function(self, ...)
return select(n, ...) and pred(self, select(n, ...))
end
end
--- Transform a predicate on a node into a predicate on this node's
-- n-th child.
-- @param n the child's index number
-- @param pred the predicate to transform
-- @return a predicate
function M.child(n, pred)
return function(node, ...)
local child = node[n]
return child and pred(child, node, ...)
end
end
--- Predicate to test the position of a node in its parent.
-- The predicate succeeds if the node is the n-th child of its parent,
-- and a <= n <= b.
-- nth(a) is equivalent to nth(a, a).
-- Negative indices are admitted, and count from the last child,
-- as done for instance by string.sub().
--
-- TODO: This is wrong, this tests the table relationship rather than the
-- AST node relationship.
-- Must build a getindex helper, based on pattern matching, then build
-- the predicate around it.
--
-- @param a lower bound
-- @param a upper bound
-- @return a predicate
function M.is_nth(a, b)
b = b or a
return function(self, node, parent)
if not parent then return false end
local nchildren = #parent
local a = a<=0 and nchildren+a+1 or a
if a>nchildren then return false end
local b = b<=0 and nchildren+b+1 or b>nchildren and nchildren or b
for i=a,b do if parent[i]==node then return true end end
return false
end
end
--- Returns a list of the direct children of AST node `ast`.
-- Children are only expressions, statements and blocks,
-- not intermediates such as `Pair` nodes or internal lists
-- in `Local` or `Set` nodes.
-- Children are returned in parsing order, which isn't necessarily
-- the same as source code order. For instance, the right-hand-side
-- of a `Local` node is listed before the left-hand-side, because
-- semantically the right is evaluated before the variables on the
-- left enter scope.
--
-- @param ast the node whose children are needed
-- @return a list of the direct children of `ast`
function M.children(ast)
local acc = { }
local cfg = { }
function cfg.down(x)
if x~=ast then table.insert(acc, x); return 'break' end
end
walk.guess(cfg, ast)
return acc
end
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
--
-- Comments parsing.
--
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
local comment_extractor = |which_side| function (node)
local x = node.lineinfo
x = x and x[which_side]
x = x and x.comments
if not x then return nil end
local lines = { }
for _, record in ipairs(x) do
table.insert(lines, record[1])
end
return table.concat(lines, '\n')
end
M.comment_prefix = comment_extractor 'first'
M.comment_suffix = comment_extractor 'last'
--- Shortcut for the query constructor
function M :__call(...) return self.treequery(...) end
setmetatable(M, M)
return M

View File

@@ -0,0 +1,266 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-- Low level AST traversal library.
--
-- This library is a helper for the higher-level `treequery` library.
-- It walks through every node of an AST, depth-first, and executes
-- some callbacks contained in its `cfg` config table:
--
-- * `cfg.down(...)` is called when it walks down a node, and receive as
-- parameters the node just entered, followed by its parent, grand-parent
-- etc. until the root node.
--
-- * `cfg.up(...)` is called when it walks back up a node, and receive as
-- parameters the node just entered, followed by its parent, grand-parent
-- etc. until the root node.
--
-- * `cfg.occurrence(binder, id_node, ...)` is called when it visits
-- an `` `Id{ }`` node which isn't a local variable creator. binder
-- is a reference to its binder with its context. The binder is the
-- `` `Id{ }`` node which created this local variable. By "binder
-- and its context", we mean a list starting with the `` `Id{ }``,
-- and followed by every ancestor of the binder node, up until the
-- common root node. `binder` is nil if the variable is global.
-- `id_node` is followed by its ancestor, up until the root node.
--
-- `cfg.scope` is maintained during the traversal, associating a
-- variable name to the binder which creates it in the context of the
-- node currently visited.
--
-- `walk.traverse.xxx` functions are in charge of the recursive
-- descent into children nodes. They're private helpers. They are also
-- in charge of calling appropriate `cfg.xxx` callbacks.
-{ extension ("match", ...) }
local pp = require 'metalua.pprint'
local M = { traverse = { }; tags = { }; debug = false }
local function table_transpose(t)
local tt = { }; for a, b in pairs(t) do tt[b]=a end; return tt
end
--------------------------------------------------------------------------------
-- Standard tags: can be used to guess the type of an AST, or to check
-- that the type of an AST is respected.
--------------------------------------------------------------------------------
M.tags.stat = table_transpose{
'Do', 'Set', 'While', 'Repeat', 'Local', 'Localrec', 'Return',
'Fornum', 'Forin', 'If', 'Break', 'Goto', 'Label',
'Call', 'Invoke' }
M.tags.expr = table_transpose{
'Paren', 'Call', 'Invoke', 'Index', 'Op', 'Function', 'Stat',
'Table', 'Nil', 'Dots', 'True', 'False', 'Number', 'String', 'Id' }
--------------------------------------------------------------------------------
-- These [M.traverse.xxx()] functions are in charge of actually going through
-- ASTs. At each node, they make sure to call the appropriate walker.
--------------------------------------------------------------------------------
function M.traverse.stat (cfg, x, ...)
if M.debug then pp.printf("traverse stat %s", x) end
local ancestors = {...}
local B = |y| M.block (cfg, y, x, unpack(ancestors)) -- Block
local S = |y| M.stat (cfg, y, x, unpack(ancestors)) -- Statement
local E = |y| M.expr (cfg, y, x, unpack(ancestors)) -- Expression
local EL = |y| M.expr_list (cfg, y, x, unpack(ancestors)) -- Expression List
local IL = |y| M.binder_list (cfg, y, x, unpack(ancestors)) -- Id binders List
local OS = || cfg.scope :save() -- Open scope
local CS = || cfg.scope :restore() -- Close scope
match x with
| {...} if x.tag == nil -> for _, y in ipairs(x) do M.stat(cfg, y, ...) end
-- no tag --> node not inserted in the history ancestors
| `Do{...} -> OS(x); for _, y in ipairs(x) do S(y) end; CS(x)
| `Set{ lhs, rhs } -> EL(lhs); EL(rhs)
| `While{ cond, body } -> E(cond); OS(); B(body); CS()
| `Repeat{ body, cond } -> OS(body); B(body); E(cond); CS(body)
| `Local{ lhs } -> IL(lhs)
| `Local{ lhs, rhs } -> EL(rhs); IL(lhs)
| `Localrec{ lhs, rhs } -> IL(lhs); EL(rhs)
| `Fornum{ i, a, b, body } -> E(a); E(b); OS(); IL{i}; B(body); CS()
| `Fornum{ i, a, b, c, body } -> E(a); E(b); E(c); OS(); IL{i}; B(body); CS()
| `Forin{ i, rhs, body } -> EL(rhs); OS(); IL(i); B(body); CS()
| `If{...} ->
for i=1, #x-1, 2 do
E(x[i]); OS(); B(x[i+1]); CS()
end
if #x%2 == 1 then
OS(); B(x[#x]); CS()
end
| `Call{...}|`Invoke{...}|`Return{...} -> EL(x)
| `Break | `Goto{ _ } | `Label{ _ } -> -- nothing
| { tag=tag, ...} if M.tags.stat[tag]->
M.malformed (cfg, x, unpack (ancestors))
| _ ->
M.unknown (cfg, x, unpack (ancestors))
end
end
function M.traverse.expr (cfg, x, ...)
if M.debug then pp.printf("traverse expr %s", x) end
local ancestors = {...}
local B = |y| M.block (cfg, y, x, unpack(ancestors)) -- Block
local S = |y| M.stat (cfg, y, x, unpack(ancestors)) -- Statement
local E = |y| M.expr (cfg, y, x, unpack(ancestors)) -- Expression
local EL = |y| M.expr_list (cfg, y, x, unpack(ancestors)) -- Expression List
local IL = |y| M.binder_list (cfg, y, x, unpack(ancestors)) -- Id binders list
local OS = || cfg.scope :save() -- Open scope
local CS = || cfg.scope :restore() -- Close scope
match x with
| `Paren{ e } -> E(e)
| `Call{...} | `Invoke{...} -> EL(x)
| `Index{ a, b } -> E(a); E(b)
| `Op{ opid, ... } -> E(x[2]); if #x==3 then E(x[3]) end
| `Function{ params, body } -> OS(body); IL(params); B(body); CS(body)
| `Stat{ b, e } -> OS(b); B(b); E(e); CS(b)
| `Id{ name } -> M.occurrence(cfg, x, unpack(ancestors))
| `Table{ ... } ->
for i = 1, #x do match x[i] with
| `Pair{ k, v } -> E(k); E(v)
| v -> E(v)
end end
| `Nil|`Dots|`True|`False|`Number{_}|`String{_} -> -- terminal node
| { tag=tag, ...} if M.tags.expr[tag]-> M.malformed (cfg, x, unpack (ancestors))
| _ -> M.unknown (cfg, x, unpack (ancestors))
end
end
function M.traverse.block (cfg, x, ...)
assert(type(x)=='table', "traverse.block() expects a table")
if x.tag then M.malformed(cfg, x, ...)
else for _, y in ipairs(x) do M.stat(cfg, y, x, ...) end
end
end
function M.traverse.expr_list (cfg, x, ...)
assert(type(x)=='table', "traverse.expr_list() expects a table")
-- x doesn't appear in the ancestors
for _, y in ipairs(x) do M.expr(cfg, y, ...) end
end
function M.malformed(cfg, x, ...)
local f = cfg.malformed or cfg.error
if f then f(x, ...) else
error ("Malformed node of tag "..(x.tag or '(nil)'))
end
end
function M.unknown(cfg, x, ...)
local f = cfg.unknown or cfg.error
if f then f(x, ...) else
error ("Unknown node tag "..(x.tag or '(nil)'))
end
end
function M.occurrence(cfg, x, ...)
if cfg.occurrence then cfg.occurrence(cfg.scope :get(x[1]), x, ...) end
end
-- TODO: Is it useful to call each error handling function?
function M.binder_list (cfg, id_list, ...)
local f = cfg.binder
local ferror = cfg.error or cfg.malformed or cfg.unknown
for i, id_node in ipairs(id_list) do
local down, up = cfg.down, cfg.up
if id_node.tag == 'Id' then
cfg.scope :set (id_node[1], { id_node, ... })
if down then down(id_node, ...) end
if f then f(id_node, ...) end
if up then up(id_node, ...) end
elseif i==#id_list and id_node.tag=='Dots' then
if down then down(id_node, ...) end
if up then up(id_node, ...) end
-- Do nothing, those are valid `Dots
elseif ferror then
-- Traverse error handling function
ferror(id_node, ...)
else
error("Invalid binders list")
end
end
end
----------------------------------------------------------------------
-- Generic walker generator.
-- * if `cfg' has an entry matching the tree name, use this entry
-- * if not, try to use the entry whose name matched the ast kind
-- * if an entry is a table, look for 'up' and 'down' entries
-- * if it is a function, consider it as a `down' traverser.
----------------------------------------------------------------------
local walker_builder = function(traverse)
assert(traverse)
return function (cfg, ...)
if not cfg.scope then cfg.scope = M.newscope() end
local down, up = cfg.down, cfg.up
local broken = down and down(...)
if broken ~= 'break' then M.traverse[traverse] (cfg, ...) end
if up then up(...) end
end
end
----------------------------------------------------------------------
-- Declare [M.stat], [M.expr], [M.block].
-- `M.binder_list` is not here, because `cfg.up` and `cfg.down` must
-- be called on individual binders, not on the list itself.
-- It's therefore handled in `traverse.binder_list()`
----------------------------------------------------------------------
for _, w in ipairs{ "stat", "expr", "block" } do --, "malformed", "unknown" } do
M[w] = walker_builder (w, M.traverse[w])
end
-- Don't call up/down callbacks on expr lists
M.expr_list = M.traverse.expr_list
----------------------------------------------------------------------
-- Try to guess the type of the AST then choose the right walkker.
----------------------------------------------------------------------
function M.guess (cfg, x, ...)
assert(type(x)=='table', "arg #2 in a walker must be an AST")
if M.tags.expr[x.tag] then return M.expr(cfg, x, ...) end
if M.tags.stat[x.tag] then return M.stat(cfg, x, ...) end
if not x.tag then return M.block(cfg, x, ...) end
error ("Can't guess the AST type from tag "..(x.tag or '<none>'))
end
local S = { }; S.__index = S
function M.newscope()
local instance = { current = { } }
instance.stack = { instance.current }
setmetatable (instance, S)
return instance
end
function S :save(...)
local current_copy = { }
for a, b in pairs(self.current) do current_copy[a]=b end
table.insert (self.stack, current_copy)
if ... then return self :add(...) end
end
function S :restore() self.current = table.remove (self.stack) end
function S :get (var_name) return self.current[var_name] end
function S :set (key, val) self.current[key] = val end
return M