Reduction of moose.lua sizing working now!

This commit is contained in:
FlightControl_Master 2017-09-26 18:47:33 +02:00
parent 11067d4bfd
commit 5558c26db7
160 changed files with 36080 additions and 229 deletions

View File

@ -3,7 +3,7 @@
<listAttribute key="org.eclipse.debug.ui.favoriteGroups">
<listEntry value="org.eclipse.ui.externaltools.launchGroup"/>
</listAttribute>
<stringAttribute key="org.eclipse.ui.externaltools.ATTR_LOCATION" value="${workspace_loc:/Moose_Framework/Utils/luarocks/lua5.1.exe}"/>
<stringAttribute key="org.eclipse.ui.externaltools.ATTR_LOCATION" value="${workspace_loc:/Moose_Framework/Utils/Generate_Moose.bat}"/>
<stringAttribute key="org.eclipse.ui.externaltools.ATTR_TOOL_ARGUMENTS" value="&quot;Moose_Create.lua&quot; &#13;&#10;&quot;D&quot;&#13;&#10;&quot;${current_date}&quot; &#13;&#10;&quot;${workspace_loc:/Moose_Framework//Moose Development/Moose}&quot; &#13;&#10;&quot;${workspace_loc:/Moose_Framework/Moose Mission Setup}&quot;"/>
<stringAttribute key="org.eclipse.ui.externaltools.ATTR_WORKING_DIRECTORY" value="${workspace_loc:/Moose_Framework/Moose Mission Setup}"/>
</launchConfiguration>

View File

@ -3,7 +3,7 @@
<listAttribute key="org.eclipse.debug.ui.favoriteGroups">
<listEntry value="org.eclipse.ui.externaltools.launchGroup"/>
</listAttribute>
<stringAttribute key="org.eclipse.ui.externaltools.ATTR_LOCATION" value="${workspace_loc:/Moose_Framework/Utils/luarocks/lua5.1.exe}"/>
<stringAttribute key="org.eclipse.ui.externaltools.ATTR_LOCATION" value="${workspace_loc:/Moose_Framework/Utils/Generate_Moose.bat}"/>
<stringAttribute key="org.eclipse.ui.externaltools.ATTR_TOOL_ARGUMENTS" value="&quot;Moose_Create.lua&quot; &#13;&#10;&quot;S&quot;&#13;&#10;&quot;${current_date}&quot; &#13;&#10;&quot;${workspace_loc:/Moose_Framework//Moose Development/Moose}&quot; &#13;&#10;&quot;${workspace_loc:/Moose_Framework/Moose Mission Setup}&quot;"/>
<stringAttribute key="org.eclipse.ui.externaltools.ATTR_WORKING_DIRECTORY" value="${workspace_loc:/Moose_Framework/Moose Mission Setup}"/>
</launchConfiguration>

View File

@ -442,7 +442,7 @@ function MISSILETRAINER._MenuMessages( MenuParameters )
if MenuParameters.Distance ~= nil then
self.Distance = MenuParameters.Distance
MESSAGE:New( "Hit detection distance set to " .. self.Distance * 1000 .. " meters", 15, "Menu" ):ToAll()
MESSAGE:New( "Hit detection distance set to " .. ( self.Distance * 1000 ) .. " meters", 15, "Menu" ):ToAll()
end
end

View File

@ -1,90 +1,85 @@
env.info( '*** MOOSE DYNAMIC INCLUDE START *** ' )
env.info( 'Moose Generation Timestamp: 20170924_2152' )
local base = _G
__Moose = {}
__Moose.Include = function( IncludeFile )
if not __Moose.Includes[ IncludeFile ] then
__Moose.Includes[IncludeFile] = IncludeFile
local f = assert( base.loadfile( __Moose.ProgramPath .. IncludeFile ) )
if f == nil then
error ("Moose: Could not load Moose file " .. IncludeFile )
else
env.info( "Moose: " .. IncludeFile .. " dynamically loaded from " .. __Moose.ProgramPath )
return f()
end
end
env.info('*** MOOSE DYNAMIC INCLUDE START *** ')
env.info('Moose Generation Timestamp: 20170926_1846')
local base=_G
__Moose={}
__Moose.Include=function(IncludeFile)
if not __Moose.Includes[IncludeFile]then
__Moose.Includes[IncludeFile]=IncludeFile
local f=assert(base.loadfile(__Moose.ProgramPath..IncludeFile))
if f==nil then
error("Moose: Could not load Moose file "..IncludeFile)
else
env.info("Moose: "..IncludeFile.." dynamically loaded from "..__Moose.ProgramPath)
return f()
end
__Moose.ProgramPath = "Scripts/Moose/"
__Moose.Includes = {}
__Moose.Include( 'Utilities/Routines.lua' )
__Moose.Include( 'Utilities/Utils.lua' )
__Moose.Include( 'Core/Base.lua' )
__Moose.Include( 'Core/Report.lua' )
__Moose.Include( 'Core/Scheduler.lua' )
__Moose.Include( 'Core/ScheduleDispatcher.lua' )
__Moose.Include( 'Core/Event.lua' )
__Moose.Include( 'Core/Settings.lua' )
__Moose.Include( 'Core/Menu.lua' )
__Moose.Include( 'Core/Zone.lua' )
__Moose.Include( 'Core/Database.lua' )
__Moose.Include( 'Core/Set.lua' )
__Moose.Include( 'Core/Point.lua' )
__Moose.Include( 'Core/Message.lua' )
__Moose.Include( 'Core/Fsm.lua' )
__Moose.Include( 'Core/Radio.lua' )
__Moose.Include( 'Core/SpawnStatic.lua' )
__Moose.Include( 'Core/Cargo.lua' )
__Moose.Include( 'Core/Spot.lua' )
__Moose.Include( 'Wrapper/Object.lua' )
__Moose.Include( 'Wrapper/Identifiable.lua' )
__Moose.Include( 'Wrapper/Positionable.lua' )
__Moose.Include( 'Wrapper/Controllable.lua' )
__Moose.Include( 'Wrapper/Group.lua' )
__Moose.Include( 'Wrapper/Unit.lua' )
__Moose.Include( 'Wrapper/Client.lua' )
__Moose.Include( 'Wrapper/Static.lua' )
__Moose.Include( 'Wrapper/Airbase.lua' )
__Moose.Include( 'Wrapper/Scenery.lua' )
__Moose.Include( 'Functional/Scoring.lua' )
__Moose.Include( 'Functional/CleanUp.lua' )
__Moose.Include( 'Functional/Spawn.lua' )
__Moose.Include( 'Functional/Movement.lua' )
__Moose.Include( 'Functional/Sead.lua' )
__Moose.Include( 'Functional/Escort.lua' )
__Moose.Include( 'Functional/MissileTrainer.lua' )
__Moose.Include( 'Functional/AirbasePolice.lua' )
__Moose.Include( 'Functional/Detection.lua' )
__Moose.Include( 'Functional/Designate.lua' )
__Moose.Include( 'Functional/RAT.lua' )
__Moose.Include( 'AI/AI_Balancer.lua' )
__Moose.Include( 'AI/AI_A2A.lua' )
__Moose.Include( 'AI/AI_A2A_Patrol.lua' )
__Moose.Include( 'AI/AI_A2A_Cap.lua' )
__Moose.Include( 'AI/AI_A2A_Gci.lua' )
__Moose.Include( 'AI/AI_A2A_Dispatcher.lua' )
__Moose.Include( 'AI/AI_Patrol.lua' )
__Moose.Include( 'AI/AI_Cap.lua' )
__Moose.Include( 'AI/AI_Cas.lua' )
__Moose.Include( 'AI/AI_Bai.lua' )
__Moose.Include( 'AI/AI_Formation.lua' )
__Moose.Include( 'Actions/Act_Assign.lua' )
__Moose.Include( 'Actions/Act_Route.lua' )
__Moose.Include( 'Actions/Act_Account.lua' )
__Moose.Include( 'Actions/Act_Assist.lua' )
__Moose.Include( 'Tasking/CommandCenter.lua' )
__Moose.Include( 'Tasking/Mission.lua' )
__Moose.Include( 'Tasking/Task.lua' )
__Moose.Include( 'Tasking/DetectionManager.lua' )
__Moose.Include( 'Tasking/Task_A2G_Dispatcher.lua' )
__Moose.Include( 'Tasking/Task_A2G.lua' )
__Moose.Include( 'Tasking/Task_A2A_Dispatcher.lua' )
__Moose.Include( 'Tasking/Task_A2A.lua' )
__Moose.Include( 'Tasking/Task_Cargo.lua' )
__Moose.Include( 'Moose.lua' )
BASE:TraceOnOff( true )
env.info( '*** MOOSE INCLUDE END *** ' )
end
end
__Moose.ProgramPath="Scripts/Moose/"
__Moose.Includes={}
__Moose.Include('Utilities/Routines.lua')
__Moose.Include('Utilities/Utils.lua')
__Moose.Include('Core/Base.lua')
__Moose.Include('Core/Report.lua')
__Moose.Include('Core/Scheduler.lua')
__Moose.Include('Core/ScheduleDispatcher.lua')
__Moose.Include('Core/Event.lua')
__Moose.Include('Core/Settings.lua')
__Moose.Include('Core/Menu.lua')
__Moose.Include('Core/Zone.lua')
__Moose.Include('Core/Database.lua')
__Moose.Include('Core/Set.lua')
__Moose.Include('Core/Point.lua')
__Moose.Include('Core/Message.lua')
__Moose.Include('Core/Fsm.lua')
__Moose.Include('Core/Radio.lua')
__Moose.Include('Core/SpawnStatic.lua')
__Moose.Include('Core/Cargo.lua')
__Moose.Include('Core/Spot.lua')
__Moose.Include('Wrapper/Object.lua')
__Moose.Include('Wrapper/Identifiable.lua')
__Moose.Include('Wrapper/Positionable.lua')
__Moose.Include('Wrapper/Controllable.lua')
__Moose.Include('Wrapper/Group.lua')
__Moose.Include('Wrapper/Unit.lua')
__Moose.Include('Wrapper/Client.lua')
__Moose.Include('Wrapper/Static.lua')
__Moose.Include('Wrapper/Airbase.lua')
__Moose.Include('Wrapper/Scenery.lua')
__Moose.Include('Functional/Scoring.lua')
__Moose.Include('Functional/CleanUp.lua')
__Moose.Include('Functional/Spawn.lua')
__Moose.Include('Functional/Movement.lua')
__Moose.Include('Functional/Sead.lua')
__Moose.Include('Functional/Escort.lua')
__Moose.Include('Functional/MissileTrainer.lua')
__Moose.Include('Functional/AirbasePolice.lua')
__Moose.Include('Functional/Detection.lua')
__Moose.Include('Functional/Designate.lua')
__Moose.Include('Functional/RAT.lua')
__Moose.Include('AI/AI_Balancer.lua')
__Moose.Include('AI/AI_A2A.lua')
__Moose.Include('AI/AI_A2A_Patrol.lua')
__Moose.Include('AI/AI_A2A_Cap.lua')
__Moose.Include('AI/AI_A2A_Gci.lua')
__Moose.Include('AI/AI_A2A_Dispatcher.lua')
__Moose.Include('AI/AI_Patrol.lua')
__Moose.Include('AI/AI_Cap.lua')
__Moose.Include('AI/AI_Cas.lua')
__Moose.Include('AI/AI_Bai.lua')
__Moose.Include('AI/AI_Formation.lua')
__Moose.Include('Actions/Act_Assign.lua')
__Moose.Include('Actions/Act_Route.lua')
__Moose.Include('Actions/Act_Account.lua')
__Moose.Include('Actions/Act_Assist.lua')
__Moose.Include('Tasking/CommandCenter.lua')
__Moose.Include('Tasking/Mission.lua')
__Moose.Include('Tasking/Task.lua')
__Moose.Include('Tasking/DetectionManager.lua')
__Moose.Include('Tasking/Task_A2G_Dispatcher.lua')
__Moose.Include('Tasking/Task_A2G.lua')
__Moose.Include('Tasking/Task_A2A_Dispatcher.lua')
__Moose.Include('Tasking/Task_A2A.lua')
__Moose.Include('Tasking/Task_Cargo.lua')
__Moose.Include('Moose.lua')
BASE:TraceOnOff(true)
env.info('*** MOOSE INCLUDE END *** ')

5
Utils/Generate_Moose.bat Normal file
View File

@ -0,0 +1,5 @@
%~dp0luarocks\lua5.1.exe %1 %2 %3 %4 %5
call %~dp0LuaSrcDiet.bat --basic --opt-emptylines %5\Moose.lua
del %5\Moose.lua
copy %5\Moose_.lua %5\Moose.lua
del Moose_.lua

Binary file not shown.

View File

@ -0,0 +1,178 @@
#!/usr/bin/lua
--------------------------------------------------------------------------------
-- Copyright (c) 2012-2014 Sierra Wireless.
-- All rights reserved. This program and the accompanying materials
-- are made available under the terms of the Eclipse Public License v1.0
-- which accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- Contributors:
-- Kevin KIN-FOO <kkinfoo@sierrawireless.com>
-- - initial API and implementation and initial documentation
--------------------------------------------------------------------------------
-- Check interpreter version
if _VERSION ~= "Lua 5.1" then
print("Luadocumentor is only compatible with Lua 5.1")
return
end
--
-- Defining help message.
--
-- This message is compliant to 'lapp', which will match options and arguments
-- from command line.
local help = [[luadocumentor v0.1.4: tool for Lua Documentation Language
-f, --format (default doc) Define output format :
* doc: Will produce HTML documentation from specified file(s) or directories.
* api: Will produce API file(s) from specified file(s) or directories.
-d, --dir (default docs) Define an output directory. If the given directory doesn't exist, it will be created.
-h, --help Display the help.
-n, --noheuristic Do not use code analysis, use only comments to generate documentation.
-s, --style (default !) The path of your own css file, if you don't want to use the default one. (usefull only for the doc format)
[directories|files] Define the paths or the directories of inputs files. Only Lua or C files containing a @module tag will be considered.
]]
local docgenerator = require 'docgenerator'
local lddextractor = require 'lddextractor'
local lapp = require 'pl.lapp'
local args = lapp( help )
if not args or #args < 1 then
print('No directory provided')
return
elseif args.help then
-- Just print help
print( help )
return
end
--
-- define css file name
--
local cssfilename = "stylesheet.css"
--
-- Parse files from given folders
--
-- Check if all folders exist
local fs = require 'fs.lfs'
local allpresent, missing = fs.checkdirectory(args)
-- Some of given directories are absent
if missing then
-- List missing directories
print 'Unable to open'
for _, file in ipairs( missing ) do
print('\t'.. file)
end
return
end
-- Get files from given directories
local filestoparse, error = fs.filelist( args )
if not filestoparse then
print ( error )
return
end
--
-- Generate documentation only files
--
if args.format == 'api' then
for _, filename in ipairs( filestoparse ) do
-- Loading file content
print('Dealing with "'..filename..'".')
local file, error = io.open(filename, 'r')
if not file then
print ('Unable to open "'..filename.."'.\n"..error)
else
local code = file:read('*all')
file:close()
--
-- Creating comment file
--
local commentfile, error = lddextractor.generatecommentfile(filename, code)
-- Getting module name
-- Optimize me
local module, moduleerror = lddextractor.generateapimodule(filename, code)
if not commentfile then
print('Unable to create documentation file for "'..filename..'"\n'..error)
elseif not module or not module.name then
local error = moduleerror and '\n'..moduleerror or ''
print('Unable to compute module name for "'..filename..'".'..error)
else
--
-- Flush documentation file on disk
--
local path = args.dir..fs.separator..module.name..'.lua'
local status, err = fs.fill(path, commentfile)
if not status then
print(err)
end
end
end
end
print('Done')
return
end
-- Deal only supported output types
if args.format ~= 'doc' then
print ('"'..args.format..'" format is not handled.')
return
end
-- Generate html form files
local parsedfiles, unparsed = docgenerator.generatedocforfiles(filestoparse, cssfilename,args.noheuristic)
-- Show warnings on unparsed files
if #unparsed > 0 then
for _, faultyfile in ipairs( unparsed ) do
print( faultyfile )
end
end
-- This loop is just for counting parsed files
-- TODO: Find a more elegant way to do it
local parsedfilescount = 0
for _, p in pairs(parsedfiles) do
parsedfilescount = parsedfilescount + 1
end
print (parsedfilescount .. ' file(s) parsed.')
-- Create html files
local generated = 0
for _, apifile in pairs ( parsedfiles ) do
local status, err = fs.fill(args.dir..fs.separator..apifile.name..'.html', apifile.body)
if status then
generated = generated + 1
else
print( 'Unable to create '..apifile.name..'.html on disk.')
end
end
print (generated .. ' file(s) generated.')
-- Copying css
local csscontent
if args.style == '!' then
csscontent = require 'defaultcss'
else
local css, error = io.open(args.style, 'r')
if not css then
print('Unable to open "'..args.style .. '".\n'..error)
return
end
csscontent = css:read("*all")
css:close()
end
local status, error = fs.fill(args.dir..fs.separator..cssfilename, csscontent)
if not status then
print(error)
return
end
print('Adding css')
print('Done')

View File

@ -0,0 +1,198 @@
Eclipse Public License - v 1.0
THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC
LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
1. DEFINITIONS
"Contribution" means:
a) in the case of the initial Contributor, the initial code and documentation
distributed under this Agreement, and
b) in the case of each subsequent Contributor:
i) changes to the Program, and
ii) additions to the Program;
where such changes and/or additions to the Program originate from and are
distributed by that particular Contributor. A Contribution 'originates' from
a Contributor if it was added to the Program by such Contributor itself or
anyone acting on such Contributor's behalf. Contributions do not include
additions to the Program which: (i) are separate modules of software
distributed in conjunction with the Program under their own license
agreement, and (ii) are not derivative works of the Program.
"Contributor" means any person or entity that distributes the Program.
"Licensed Patents" mean patent claims licensable by a Contributor which are
necessarily infringed by the use or sale of its Contribution alone or when
combined with the Program.
"Program" means the Contributions distributed in accordance with this Agreement.
"Recipient" means anyone who receives the Program under this Agreement,
including all Contributors.
2. GRANT OF RIGHTS
a) Subject to the terms of this Agreement, each Contributor hereby grants
Recipient a non-exclusive, worldwide, royalty-free copyright license to
reproduce, prepare derivative works of, publicly display, publicly perform,
distribute and sublicense the Contribution of such Contributor, if any, and
such derivative works, in source code and object code form.
b) Subject to the terms of this Agreement, each Contributor hereby grants
Recipient a non-exclusive, worldwide, royalty-free patent license under
Licensed Patents to make, use, sell, offer to sell, import and otherwise
transfer the Contribution of such Contributor, if any, in source code and
object code form. This patent license shall apply to the combination of the
Contribution and the Program if, at the time the Contribution is added by
the Contributor, such addition of the Contribution causes such combination
to be covered by the Licensed Patents. The patent license shall not apply
to any other combinations which include the Contribution. No hardware per
se is licensed hereunder.
c) Recipient understands that although each Contributor grants the licenses to
its Contributions set forth herein, no assurances are provided by any
Contributor that the Program does not infringe the patent or other
intellectual property rights of any other entity. Each Contributor
disclaims any liability to Recipient for claims brought by any other entity
based on infringement of intellectual property rights or otherwise. As a
condition to exercising the rights and licenses granted hereunder, each
Recipient hereby assumes sole responsibility to secure any other
intellectual property rights needed, if any. For example, if a third party
patent license is required to allow Recipient to distribute the Program, it
is Recipient's responsibility to acquire that license before distributing
the Program.
d) Each Contributor represents that to its knowledge it has sufficient
copyright rights in its Contribution, if any, to grant the copyright
license set forth in this Agreement.
3. REQUIREMENTS
A Contributor may choose to distribute the Program in object code form under its
own license agreement, provided that:
a) it complies with the terms and conditions of this Agreement; and
b) its license agreement:
i) effectively disclaims on behalf of all Contributors all warranties and
conditions, express and implied, including warranties or conditions of
title and non-infringement, and implied warranties or conditions of
merchantability and fitness for a particular purpose;
ii) effectively excludes on behalf of all Contributors all liability for
damages, including direct, indirect, special, incidental and
consequential damages, such as lost profits;
iii) states that any provisions which differ from this Agreement are offered
by that Contributor alone and not by any other party; and
iv) states that source code for the Program is available from such
Contributor, and informs licensees how to obtain it in a reasonable
manner on or through a medium customarily used for software exchange.
When the Program is made available in source code form:
a) it must be made available under this Agreement; and
b) a copy of this Agreement must be included with each copy of the Program.
Contributors may not remove or alter any copyright notices contained within
the Program.
Each Contributor must identify itself as the originator of its Contribution, if
any, in a manner that reasonably allows subsequent Recipients to identify the
originator of the Contribution.
4. COMMERCIAL DISTRIBUTION
Commercial distributors of software may accept certain responsibilities with
respect to end users, business partners and the like. While this license is
intended to facilitate the commercial use of the Program, the Contributor who
includes the Program in a commercial product offering should do so in a manner
which does not create potential liability for other Contributors. Therefore, if
a Contributor includes the Program in a commercial product offering, such
Contributor ("Commercial Contributor") hereby agrees to defend and indemnify
every other Contributor ("Indemnified Contributor") against any losses, damages
and costs (collectively "Losses") arising from claims, lawsuits and other legal
actions brought by a third party against the Indemnified Contributor to the
extent caused by the acts or omissions of such Commercial Contributor in
connection with its distribution of the Program in a commercial product
offering. The obligations in this section do not apply to any claims or Losses
relating to any actual or alleged intellectual property infringement. In order
to qualify, an Indemnified Contributor must: a) promptly notify the Commercial
Contributor in writing of such claim, and b) allow the Commercial Contributor to
control, and cooperate with the Commercial Contributor in, the defense and any
related settlement negotiations. The Indemnified Contributor may participate in
any such claim at its own expense.
For example, a Contributor might include the Program in a commercial product
offering, Product X. That Contributor is then a Commercial Contributor. If that
Commercial Contributor then makes performance claims, or offers warranties
related to Product X, those performance claims and warranties are such
Commercial Contributor's responsibility alone. Under this section, the
Commercial Contributor would have to defend claims against the other
Contributors related to those performance claims and warranties, and if a court
requires any other Contributor to pay any damages as a result, the Commercial
Contributor must pay those damages.
5. NO WARRANTY
EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR
IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE,
NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each
Recipient is solely responsible for determining the appropriateness of using and
distributing the Program and assumes all risks associated with its exercise of
rights under this Agreement , including but not limited to the risks and costs
of program errors, compliance with applicable laws, damage to or loss of data,
programs or equipment, and unavailability or interruption of operations.
6. DISCLAIMER OF LIABILITY
EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST
PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS
GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
7. GENERAL
If any provision of this Agreement is invalid or unenforceable under applicable
law, it shall not affect the validity or enforceability of the remainder of the
terms of this Agreement, and without further action by the parties hereto, such
provision shall be reformed to the minimum extent necessary to make such
provision valid and enforceable.
If Recipient institutes patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Program itself
(excluding combinations of the Program with other software or hardware)
infringes such Recipient's patent(s), then such Recipient's rights granted under
Section 2(b) shall terminate as of the date such litigation is filed.
All Recipient's rights under this Agreement shall terminate if it fails to
comply with any of the material terms or conditions of this Agreement and does
not cure such failure in a reasonable period of time after becoming aware of
such noncompliance. If all Recipient's rights under this Agreement terminate,
Recipient agrees to cease use and distribution of the Program as soon as
reasonably practicable. However, Recipient's obligations under this Agreement
and any licenses granted by Recipient relating to the Program shall continue and
survive.
Everyone is permitted to copy and distribute copies of this Agreement, but in
order to avoid inconsistency the Agreement is copyrighted and may only be
modified in the following manner. The Agreement Steward reserves the right to
publish new versions (including revisions) of this Agreement from time to time.
No one other than the Agreement Steward has the right to modify this Agreement.
The Eclipse Foundation is the initial Agreement Steward. The Eclipse Foundation
may assign the responsibility to serve as the Agreement Steward to a suitable
separate entity. Each new version of the Agreement will be given a
distinguishing version number. The Program (including Contributions) may always
be distributed subject to the version of the Agreement under which it was
received. In addition, after a new version of the Agreement is published,
Contributor may elect to distribute the Program (including its Contributions)
under the new version. Except as expressly stated in Sections 2(a) and 2(b)
above, Recipient receives no rights or licenses to the intellectual property of
any Contributor under this Agreement, whether expressly, by implication,
estoppel or otherwise. All rights in the Program not expressly granted under
this Agreement are reserved.
This Agreement is governed by the laws of the State of New York and the
intellectual property laws of the United States of America. No party to this
Agreement will bring a legal action under this Agreement more than one year
after the cause of action arose. Each party waives its rights to a jury trial in
any resulting litigation.

View File

@ -0,0 +1,7 @@
# Lua Documentor
LuaDocumentor allow users to generate HTML and API files from code documented
using Lua documentation language.
Documentation is
[available here](http://wiki.eclipse.org/Koneki/LDT/User_Area/LuaDocumentor).

View File

@ -0,0 +1,57 @@
package = 'LuaDocumentor'
version = '0.1.5-1'
description = {
summary = 'LuaDocumentor allow users to generate HTML and API files from code documented using Lua documentation language.',
detailed = [[
This is an example for the LuaRocks tutorial.
Here we would put a detailed, typically
paragraph-long description.
]],
homepage = 'http://wiki.eclipse.org/Koneki/LDT/User_Area/LuaDocumentor',
license = 'EPL'
}
source = {
url = 'git://github.com/LuaDevelopmentTools/luadocumentor.git',
tag = 'v0.1.5-1'
}
dependencies = {
'lua ~> 5.1',
'luafilesystem ~> 1.6',
'markdown ~> 0.32',
'metalua-compiler ~> 0.7',
'penlight ~> 0.9'
}
build = {
type = 'builtin',
install = {
bin = {
luadocumentor = 'luadocumentor.lua'
},
lua = {
['models.internalmodelbuilder'] = 'models/internalmodelbuilder.mlua'
}
},
modules = {
defaultcss = 'defaultcss.lua',
docgenerator = 'docgenerator.lua',
extractors = 'extractors.lua',
lddextractor = 'lddextractor.lua',
templateengine = 'templateengine.lua',
['fs.lfs'] = 'fs/lfs.lua',
['models.apimodel'] = 'models/apimodel.lua',
['models.apimodelbuilder'] = 'models/apimodelbuilder.lua',
['models.internalmodel'] = 'models/internalmodel.lua',
['models.ldparser'] = 'models/ldparser.lua',
['template.file'] = 'template/file.lua',
['template.index'] = 'template/index.lua',
['template.index.recordtypedef'] = 'template/index/recordtypedef.lua',
['template.item'] = 'template/item.lua',
['template.page'] = 'template/page.lua',
['template.recordtypedef'] = 'template/recordtypedef.lua',
['template.usage'] = 'template/usage.lua',
['template.utils'] = 'template/utils.lua',
}
}

View File

@ -0,0 +1,39 @@
rock_manifest = {
bin = {
luadocumentor = "bc5cc07f56db2cf1dbe80f0827332873"
},
doc = {
LICENSE = "52a21f73ac77fd790dc40dc5acda0fc2",
["README.md"] = "fcef1f43c69f3559b347d854b2626deb"
},
lua = {
["defaultcss.lua"] = "dd9b2b89e5080972bbb52056247c0c65",
["docgenerator.lua"] = "92d0a3947d88226340014d2f033be37f",
["extractors.lua"] = "74191695e5217706ee355925e5ca40fa",
fs = {
["lfs.lua"] = "4d00f9bc942b02a86ccea16544d3e85d"
},
["lddextractor.lua"] = "56edde775a5d57818aa0a07b4f723536",
models = {
["apimodel.lua"] = "3c401de18691b1222b0ad253958260ee",
["apimodelbuilder.lua"] = "4c4a3c0b48b404973542dd99f994eb2c",
["internalmodel.lua"] = "a1a21e50af8db0f0a0b9d164ccc08853",
["internalmodelbuilder.mlua"] = "ff95dfca573ccc1c19a79434e96a492d",
["ldparser.lua"] = "538904a3adbfff4ff83deda029847323"
},
template = {
["file.lua"] = "41f095bc049ef161060d8e3b4ac9de63",
index = {
["recordtypedef.lua"] = "0977ff0048a837389c2ac10285eb1ce1"
},
["index.lua"] = "5a3b3cface3b1fd9cb2d56f1edd5487b",
["item.lua"] = "5d5a6d9bffd8935c4ed283105ede331b",
["page.lua"] = "351f4a7215272f7e448faeece4945bc0",
["recordtypedef.lua"] = "69938e1d60e94eed7f95b0999f1386ca",
["usage.lua"] = "979503deb84877cb221130a5be7c1535",
["utils.lua"] = "ad97fb4e3de9fb6480b25cdd877b50d9"
},
["templateengine.lua"] = "09bfc6350e14f4ab509d14fb0fb295c0"
},
["luadocumentor-0.1.5-1.rockspec"] = "4ba1b88898dce89e7fd8fb6a700496a4"
}

View File

@ -0,0 +1,212 @@
body {
margin-left: 1em;
margin-right: 1em;
font-family: arial, helvetica, geneva, sans-serif;
background-color:#ffffff; margin:0px;
}
code {
font-family: "Andale Mono", monospace;
}
tt {
font-family: "Andale Mono", monospace;
}
body, td, th { font-size: 11pt; }
h1, h2, h3, h4 { margin-left: 0em; }
textarea, pre, tt { font-size:10pt; }
body, td, th { color:#000000; }
small { font-size:0.85em; }
h1 { font-size:1.5em; }
h2 { font-size:1.25em; }
h3 { font-size:1.15em; }
h4 { font-size:1.06em; }
a:link { font-weight:bold; color: #004080; text-decoration: none; }
a:visited { font-weight:bold; color: #006699; text-decoration: none; }
a:link:hover { text-decoration:underline; }
hr { color:#cccccc }
img { border-width: 0px; }
h3 { padding-top: 1em; }
p { margin-left: 1em; }
p.name {
font-family: "Andale Mono", monospace;
padding-top: 1em;
margin-left: 0em;
}
blockquote { margin-left: 3em; }
.example {
background-color: rgb(245, 245, 245);
border-top-width: 1px;
border-right-width: 1px;
border-bottom-width: 1px;
border-left-width: 1px;
border-top-style: solid;
border-right-style: solid;
border-bottom-style: solid;
border-left-style: solid;
border-top-color: silver;
border-right-color: silver;
border-bottom-color: silver;
border-left-color: silver;
padding: 1em;
margin-left: 1em;
margin-right: 1em;
font-family: "Andale Mono", monospace;
font-size: smaller;
}
hr {
margin-left: 0em;
background: #00007f;
border: 0px;
height: 1px;
}
ul { list-style-type: disc; }
table.index { border: 1px #00007f; }
table.index td { text-align: left; vertical-align: top; }
table.index ul { padding-top: 0em; margin-top: 0em; }
table {
border: 1px solid black;
border-collapse: collapse;
margin-left: auto;
margin-right: auto;
}
th {
border: 1px solid black;
padding: 0.5em;
}
td {
border: 1px solid black;
padding: 0.5em;
}
div.header, div.footer { margin-left: 0em; }
#container {
margin-left: 1em;
margin-right: 1em;
background-color: #f0f0f0;
}
#product {
text-align: center;
border-bottom: 1px solid #cccccc;
background-color: #ffffff;
}
#product big {
font-size: 2em;
}
#product_logo {
}
#product_name {
}
#product_description {
}
#main {
background-color: #f0f0f0;
border-left: 2px solid #cccccc;
}
#navigation {
float: left;
width: 12em;
margin: 0;
vertical-align: top;
background-color: #f0f0f0;
overflow:visible;
}
#navigation h1 {
background-color:#e7e7e7;
font-size:1.1em;
color:#000000;
text-align:left;
margin:0px;
padding:0.2em;
border-top:1px solid #dddddd;
border-bottom:1px solid #dddddd;
}
#navigation ul {
font-size:1em;
list-style-type: none;
padding: 0;
margin: 1px;
}
#navigation li {
text-indent: -1em;
margin: 0em 0em 0em 0.5em;
display: block;
padding: 3px 0px 0px 12px;
}
#navigation li li a {
padding: 0px 3px 0px -1em;
}
#content {
margin-left: 12em;
padding: 1em;
border-left: 2px solid #cccccc;
border-right: 2px solid #cccccc;
background-color: #ffffff;
}
#about {
clear: both;
margin: 0;
padding: 5px;
border-top: 2px solid #cccccc;
background-color: #ffffff;
}
@media print {
body {
font: 10pt "Times New Roman", "TimeNR", Times, serif;
}
a {
font-weight:bold; color: #004080; text-decoration: underline;
}
#main {
background-color: #ffffff; border-left: 0px;
}
#container {
margin-left: 2%; margin-right: 2%; background-color: #ffffff;
}
#content {
margin-left: 0px; padding: 1em; border-left: 0px; border-right: 0px; background-color: #ffffff;
}
#navigation {
display: none;
}
#product_logo {
display: none;
}
#about img {
display: none;
}
.example {
font-family: "Andale Mono", monospace;
font-size: 8pt;
page-break-inside: avoid;
}
}

View File

@ -0,0 +1,103 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>LuaFileSystem</title>
<link rel="stylesheet" href="doc.css" type="text/css"/>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
</head>
<body>
<div id="container">
<div id="product">
<div id="product_logo">
<a href="http://www.keplerproject.org">
<img alt="LuaFileSystem" src="luafilesystem.png"/>
</a>
</div>
<div id="product_name"><big><strong>LuaFileSystem</strong></big></div>
<div id="product_description">File System Library for the Lua Programming Language</div>
</div> <!-- id="product" -->
<div id="main">
<div id="navigation">
<h1>LuaFileSystem</h1>
<ul>
<li><a href="index.html">Home</a>
<ul>
<li><a href="index.html#overview">Overview</a></li>
<li><a href="index.html#status">Status</a></li>
<li><a href="index.html#download">Download</a></li>
<li><a href="index.html#history">History</a></li>
<li><a href="index.html#credits">Credits</a></li>
<li><a href="index.html#contact">Contact us</a></li>
</ul>
</li>
<li><a href="manual.html">Manual</a>
<ul>
<li><a href="manual.html#introduction">Introduction</a></li>
<li><a href="manual.html#building">Building</a></li>
<li><a href="manual.html#installation">Installation</a></li>
<li><a href="manual.html#reference">Reference</a></li>
</ul>
</li>
<li><strong>Examples</strong></li>
<li><a href="https://github.com/keplerproject/luafilesystem">Project</a>
<ul>
<li><a href="https://github.com/keplerproject/luafilesystem/issues">Bug Tracker</a></li>
<li><a href="https://github.com/keplerproject/luafilesystem">Git</a></li>
</ul>
</li>
<li><a href="license.html">License</a></li>
</ul>
</div> <!-- id="navigation" -->
<div id="content">
<h2><a name="example"></a>Examples</h2>
<h3>Directory iterator</h3>
<p>The following example iterates over a directory and recursively lists the
attributes for each file inside it.</p>
<pre class="example">
local lfs = require"lfs"
function attrdir (path)
for file in lfs.dir(path) do
if file ~= "." and file ~= ".." then
local f = path..'/'..file
print ("\t "..f)
local attr = lfs.attributes (f)
assert (type(attr) == "table")
if attr.mode == "directory" then
attrdir (f)
else
for name, value in pairs(attr) do
print (name, value)
end
end
end
end
end
attrdir (".")
</pre>
</div> <!-- id="content" -->
</div> <!-- id="main" -->
<div id="about">
<p><a href="http://validator.w3.org/check?uri=referer">Valid XHTML 1.0!</a></p>
<p><small>$Id: examples.html,v 1.8 2007/12/14 15:28:04 carregal Exp $</small></p>
</div> <!-- id="about" -->
</div> <!-- id="container" -->
</body>
</html>

View File

@ -0,0 +1,218 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>LuaFileSystem</title>
<link rel="stylesheet" href="doc.css" type="text/css"/>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
</head>
<body>
<div id="container">
<div id="product">
<div id="product_logo">
<a href="http://www.keplerproject.org">
<img alt="LuaFileSystem" src="luafilesystem.png"/>
</a>
</div>
<div id="product_name"><big><strong>LuaFileSystem</strong></big></div>
<div id="product_description">File System Library for the Lua Programming Language</div>
</div> <!-- id="product" -->
<div id="main">
<div id="navigation">
<h1>LuaFileSystem</h1>
<ul>
<li><strong>Home</strong>
<ul>
<li><a href="index.html#overview">Overview</a></li>
<li><a href="index.html#status">Status</a></li>
<li><a href="index.html#download">Download</a></li>
<li><a href="index.html#history">History</a></li>
<li><a href="index.html#credits">Credits</a></li>
<li><a href="index.html#contact">Contact us</a></li>
</ul>
</li>
<li><a href="manual.html">Manual</a>
<ul>
<li><a href="manual.html#introduction">Introduction</a></li>
<li><a href="manual.html#building">Building</a></li>
<li><a href="manual.html#installation">Installation</a></li>
<li><a href="manual.html#reference">Reference</a></li>
</ul>
</li>
<li><a href="examples.html">Examples</a></li>
<li><a href="https://github.com/keplerproject/luafilesystem">Project</a>
<ul>
<li><a href="https://github.com/keplerproject/luafilesystem/issues">Bug Tracker</a></li>
<li><a href="https://github.com/keplerproject/luafilesystem">Git</a></li>
</ul>
</li>
<li><a href="license.html">License</a></li>
</ul>
</div> <!-- id="navigation" -->
<div id="content">
<h2><a name="overview"></a>Overview</h2>
<p>LuaFileSystem is a <a href="http://www.lua.org">Lua</a> library
developed to complement the set of functions related to file
systems offered by the standard Lua distribution.</p>
<p>LuaFileSystem offers a portable way to access
the underlying directory structure and file attributes.</p>
<p>LuaFileSystem is free software and uses the same
<a href="license.html">license</a> as Lua 5.1.</p>
<h2><a name="status"></a>Status</h2>
<p>Current version is 1.6.3. It works with Lua 5.1, 5.2 and 5.3.</p>
<h2><a name="download"></a>Download</h2>
<p>LuaFileSystem source can be downloaded from its
<a href="http://github.com/keplerproject/luafilesystem">Github</a>
page.</p>
<h2><a name="history"></a>History</h2>
<dl class="history">
<dt><strong>Version 1.6.3</strong> [15/Jan/2015]</dt>
<dd><ul>
<li>Lua 5.3 support.</li>
<li>Assorted bugfixes.</li>
</ul></dd>
<dt><strong>Version 1.6.2</strong> [??/Oct/2012]</dt>
<dd><ul>
<li>Full Lua 5.2 compatibility (with Lua 5.1 fallbacks)</li>
</ul></dd>
<dt><strong>Version 1.6.1</strong> [01/Oct/2012]</dt>
<dd><ul>
<li>fix build for Lua 5.2</li>
</ul></dd>
<dt><strong>Version 1.6.0</strong> [26/Sep/2012]</dt>
<dd><ul>
<li>getcwd fix for Android</li>
<li>support for Lua 5.2</li>
<li>add lfs.link</li>
<li>other bug fixes</li>
</ul></dd>
<dt><strong>Version 1.5.0</strong> [20/Oct/2009]</dt>
<dd><ul>
<li>Added explicit next and close methods to second return value of lfs.dir
(the directory object), for explicit iteration or explicit closing.</li>
<li>Added directory locking via lfs.lock_dir function (see the <a href="manual.html">manual</a>).</li>
</ul></dd>
<dt><strong>Version 1.4.2</strong> [03/Feb/2009]</dt>
<dd>
<ul>
<li>fixed bug [<a href="http://luaforge.net/tracker/?func=detail&amp;group_id=66&amp;aid=13198&amp;atid=356">#13198</a>]
lfs.attributes(filename, 'size') overflow on files > 2 Gb again (bug report and patch by KUBO Takehiro).</li>
<li>fixed bug [<a href="http://luaforge.net/tracker/?group_id=66&amp;atid=356&amp;func=detail&amp;aid=39794">#39794</a>]
Compile error on Solaris 10 (bug report and patch by Aaron B).</li>
<li>fixed compilation problems with Borland C.</li>
</ul>
</dd>
<dt><strong>Version 1.4.1</strong> [07/May/2008]</dt>
<dd>
<ul>
<li>documentation review</li>
<li>fixed Windows compilation issues</li>
<li>fixed bug in the Windows tests (patch by Shmuel Zeigerman)</li>
<li>fixed bug [<a href="http://luaforge.net/tracker/?func=detail&amp;group_id=66&amp;aid=2185&amp;atid=356">#2185</a>]
<code>lfs.attributes(filename, 'size')</code> overflow on files > 2 Gb
</li>
</ul>
</dd>
<dt><strong>Version 1.4.0</strong> [13/Feb/2008]</dt>
<dd>
<ul>
<li>added function
<a href="manual.html#setmode"><code>lfs.setmode</code></a>
(works only in Windows systems).</li>
<li><a href="manual.html#attributes"><code>lfs.attributes</code></a>
raises an error if attribute does not exist</li>
</ul>
</dd>
<dt><strong>Version 1.3.0</strong> [26/Oct/2007]</dt>
<dd>
<ul>
<li>added function
<a href="manual.html#symlinkattributes"><code>lfs.symlinkattributes</code></a>
(works only in non Windows systems).</li>
</ul>
</dd>
<dt><strong>Version 1.2.1</strong> [08/May/2007]</dt>
<dd>
<ul>
<li>compatible only with Lua 5.1 (Lua 5.0 support was dropped)</li>
</ul>
</dd>
<dt><strong>Version 1.2</strong> [15/Mar/2006]</dt>
<dd>
<ul>
<li>added optional argument to
<a href="manual.html#attributes"><code>lfs.attributes</code></a></li>
<li>added function
<a href="manual.html#rmdir"><code>lfs.rmdir</code></a></li>
<li>bug correction on <a href="manual.html#dir"><code>lfs.dir</code></a></li>
</ul>
</dd>
<dt><strong>Version 1.1</strong> [30/May/2005]</dt>
<dd>
<ul>
<li>added function <a href="manual.html#touch"><code>lfs.touch</code></a>.</li>
</ul>
</dd>
<dt><strong>Version 1.0</strong> [21/Jan/2005]</dt>
<dd />
<dt><strong>Version 1.0 Beta</strong> [10/Nov/2004]</dt>
<dd />
</dl>
<h2><a name="credits"></a>Credits</h2>
<p>LuaFileSystem was designed by Roberto Ierusalimschy,
Andr&eacute; Carregal and Tom&aacute;s Guisasola as part of the
<a href="http://www.keplerproject.org">Kepler Project</a>,
which holds its copyright. LuaFileSystem is currently maintained by F&aacute;bio Mascarenhas.</p>
<h2><a name="contact"></a>Contact us</h2>
<p>For more information please
<a href="mailto:info-NO-SPAM-THANKS@keplerproject.org">contact us</a>.
Comments are welcome!</p>
<p>You can also reach other Kepler developers and users on the Kepler Project
<a href="http://luaforge.net/mail/?group_id=104">mailing list</a>.</p>
</div> <!-- id="content" -->
</div> <!-- id="main" -->
<div id="about">
<p><a href="http://validator.w3.org/check?uri=referer">Valid XHTML 1.0!</a></p>
<p><small>$Id: index.html,v 1.44 2009/02/04 21:21:33 carregal Exp $</small></p>
</div> <!-- id="about" -->
</div> <!-- id="container" -->
</body>
</html>

View File

@ -0,0 +1,122 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>LuaFileSystem</title>
<link rel="stylesheet" href="doc.css" type="text/css"/>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
</head>
<body>
<div id="container">
<div id="product">
<div id="product_logo">
<a href="http://www.keplerproject.org">
<img alt="LuaFileSystem" src="luafilesystem.png"/>
</a>
</div>
<div id="product_name"><big><strong>LuaFileSystem</strong></big></div>
<div id="product_description">File System Library for the Lua Programming Language</div>
</div> <!-- id="product" -->
<div id="main">
<div id="navigation">
<h1>LuaFileSystem</h1>
<ul>
<li><a href="index.html">Home</a>
<ul>
<li><a href="index.html#overview">Overview</a></li>
<li><a href="index.html#status">Status</a></li>
<li><a href="index.html#download">Download</a></li>
<li><a href="index.html#history">History</a></li>
<li><a href="index.html#credits">Credits</a></li>
<li><a href="index.html#contact">Contact us</a></li>
</ul>
</li>
<li><a href="manual.html">Manual</a>
<ul>
<li><a href="manual.html#introduction">Introduction</a></li>
<li><a href="manual.html#building">Building</a></li>
<li><a href="manual.html#installation">Installation</a></li>
<li><a href="manual.html#reference">Reference</a></li>
</ul>
</li>
<li><a href="examples.html">Examples</a></li>
<li><a href="https://github.com/keplerproject/luafilesystem">Project</a>
<ul>
<li><a href="https://github.com/keplerproject/luafilesystem/issues/">Bug Tracker</a></li>
<li><a href="https://github.com/keplerproject/luafilesystem">Git</a></li>
</ul>
</li>
<li><strong>License</strong></li>
</ul>
</div> <!-- id="navigation" -->
<div id="content">
<h1>License</h1>
<p>
LuaFileSystem is free software: it can be used for both academic
and commercial purposes at absolutely no cost. There are no
royalties or GNU-like "copyleft" restrictions. LuaFileSystem
qualifies as
<a href="http://www.opensource.org/docs/definition.html">Open Source</a>
software.
Its licenses are compatible with
<a href="http://www.gnu.org/licenses/gpl.html">GPL</a>.
LuaFileSystem is not in the public domain and the
<a href="http://www.keplerproject.org">Kepler Project</a>
keep its copyright.
The legal details are below.
</p>
<p>The spirit of the license is that you are free to use
LuaFileSystem for any purpose at no cost without having to ask us.
The only requirement is that if you do use LuaFileSystem, then you
should give us credit by including the appropriate copyright notice
somewhere in your product or its documentation.</p>
<p>The LuaFileSystem library is designed and implemented by Roberto
Ierusalimschy, Andr&eacute; Carregal and Tom&aacute;s Guisasola.
The implementation is not derived from licensed software.</p>
<hr/>
<p>Copyright &copy; 2003 Kepler Project.</p>
<p>Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use, copy,
modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:</p>
<p>The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.</p>
<p>THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.</p>
</div> <!-- id="content" -->
</div> <!-- id="main" -->
<div id="about">
<p><a href="http://validator.w3.org/check?uri=referer">Valid XHTML 1.0!</a></p>
<p><small>$Id: license.html,v 1.13 2008/02/11 22:42:21 carregal Exp $</small></p>
</div><!-- id="about" -->
</div><!-- id="container" -->
</body>
</html>

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.3 KiB

View File

@ -0,0 +1,280 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>LuaFileSystem</title>
<link rel="stylesheet" href="doc.css" type="text/css"/>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
</head>
<body>
<div id="container">
<div id="product">
<div id="product_logo">
<a href="http://www.keplerproject.org"><img alt="LuaFileSystem" src="luafilesystem.png"/></a>
</div>
<div id="product_name"><big><strong>LuaFileSystem</strong></big></div>
<div id="product_description">File System Library for the Lua Programming Language</div>
</div> <!-- id="product" -->
<div id="main">
<div id="navigation">
<h1>LuaFileSystem</h1>
<ul>
<li><a href="index.html">Home</a>
<ul>
<li><a href="index.html#overview">Overview</a></li>
<li><a href="index.html#status">Status</a></li>
<li><a href="index.html#download">Download</a></li>
<li><a href="index.html#history">History</a></li>
<li><a href="index.html#credits">Credits</a></li>
<li><a href="index.html#contact">Contact us</a></li>
</ul>
</li>
<li><strong>Manual</strong>
<ul>
<li><a href="manual.html#introduction">Introduction</a></li>
<li><a href="manual.html#building">Building</a></li>
<li><a href="manual.html#installation">Installation</a></li>
<li><a href="manual.html#reference">Reference</a></li>
</ul>
</li>
<li><a href="examples.html">Examples</a></li>
<li><a href="https://github.com/keplerproject/luafilesystem">Project</a>
<ul>
<li><a href="https://github.com/keplerproject/luafilesystem/issues">Bug Tracker</a></li>
<li><a href="https://github.com/keplerproject/luafilesystem">Git</a></li>
</ul>
</li>
<li><a href="license.html">License</a></li>
</ul>
</div> <!-- id="navigation" -->
<div id="content">
<h2><a name="introduction"></a>Introduction</h2>
<p>LuaFileSystem is a <a href="http://www.lua.org">Lua</a> library
developed to complement the set of functions related to file
systems offered by the standard Lua distribution.</p>
<p>LuaFileSystem offers a portable way to access
the underlying directory structure and file attributes.</p>
<h2><a name="building"></a>Building</h2>
<p>
LuaFileSystem should be built with Lua 5.1 so the language library
and header files for the target version must be installed properly.
</p>
<p>
LuaFileSystem offers a Makefile and a separate configuration file,
<code>config</code>,
which should be edited to suit your installation before running
<code>make</code>.
The file has some definitions like paths to the external libraries,
compiler options and the like.
</p>
<p>On Windows, the C runtime used to compile LuaFileSystem must be the same
runtime that Lua uses, or some LuaFileSystem functions will not work.</p>
<h2><a name="installation"></a>Installation</h2>
<p>The easiest way to install LuaFileSystem is to use LuaRocks:</p>
<pre class="example">
luarocks install luafilesystem
</pre>
<p>If you prefer to install LuaFileSystem manually, the compiled binary should be copied to a directory in your
<a href="http://www.lua.org/manual/5.1/manual.html#pdf-package.cpath">C path</a>.</p>
<h2><a name="reference"></a>Reference</h2>
<p>
LuaFileSystem offers the following functions:
</p>
<dl class="reference">
<dt><a name="attributes"></a><strong><code>lfs.attributes (filepath [, aname])</code></strong></dt>
<dd>Returns a table with the file attributes corresponding to
<code>filepath</code> (or <code>nil</code> followed by an error message
in case of error).
If the second optional argument is given, then only the value of the
named attribute is returned (this use is equivalent to
<code>lfs.attributes(filepath).aname</code>, but the table is not created
and only one attribute is retrieved from the O.S.).
The attributes are described as follows;
attribute <code>mode</code> is a string, all the others are numbers,
and the time related attributes use the same time reference of
<a href="http://www.lua.org/manual/5.1/manual.html#pdf-os.time"><code>os.time</code></a>:
<dl>
<dt><strong><code>dev</code></strong></dt>
<dd>on Unix systems, this represents the device that the inode resides on. On Windows systems,
represents the drive number of the disk containing the file</dd>
<dt><strong><code>ino</code></strong></dt>
<dd>on Unix systems, this represents the inode number. On Windows systems this has no meaning</dd>
<dt><strong><code>mode</code></strong></dt>
<dd>string representing the associated protection mode (the values could be
<code>file</code>, <code>directory</code>, <code>link</code>, <code>socket</code>,
<code>named pipe</code>, <code>char device</code>, <code>block device</code> or
<code>other</code>)</dd>
<dt><strong><code>nlink</code></strong></dt>
<dd>number of hard links to the file</dd>
<dt><strong><code>uid</code></strong></dt>
<dd>user-id of owner (Unix only, always 0 on Windows)</dd>
<dt><strong><code>gid</code></strong></dt>
<dd>group-id of owner (Unix only, always 0 on Windows)</dd>
<dt><strong><code>rdev</code></strong></dt>
<dd>on Unix systems, represents the device type, for special file inodes.
On Windows systems represents the same as <code>dev</code></dd>
<dt><strong><code>access</code></strong></dt>
<dd>time of last access</dd>
<dt><strong><code>modification</code></strong></dt>
<dd>time of last data modification</dd>
<dt><strong><code>change</code></strong></dt>
<dd>time of last file status change</dd>
<dt><strong><code>size</code></strong></dt>
<dd>file size, in bytes</dd>
<dt><strong><code>blocks</code></strong></dt>
<dd>block allocated for file; (Unix only)</dd>
<dt><strong><code>blksize</code></strong></dt>
<dd>optimal file system I/O blocksize; (Unix only)</dd>
</dl>
This function uses <code>stat</code> internally thus if the given
<code>filepath</code> is a symbolic link, it is followed (if it points to
another link the chain is followed recursively) and the information
is about the file it refers to.
To obtain information about the link itself, see function
<a href="#symlinkattributes">lfs.symlinkattributes</a>.
</dd>
<dt><a name="chdir"></a><strong><code>lfs.chdir (path)</code></strong></dt>
<dd>Changes the current working directory to the given
<code>path</code>.<br />
Returns <code>true</code> in case of success or <code>nil</code> plus an
error string.</dd>
<dt><a name="chdir"></a><strong><code>lfs.lock_dir(path, [seconds_stale])</code></strong></dt>
<dd>Creates a lockfile (called lockfile.lfs) in <code>path</code> if it does not
exist and returns the lock. If the lock already exists checks if
it's stale, using the second parameter (default for the second
parameter is <code>INT_MAX</code>, which in practice means the lock will never
be stale. To free the the lock call <code>lock:free()</code>. <br/>
In case of any errors it returns nil and the error message. In
particular, if the lock exists and is not stale it returns the
"File exists" message.</dd>
<dt><a name="getcwd"></a><strong><code>lfs.currentdir ()</code></strong></dt>
<dd>Returns a string with the current working directory or <code>nil</code>
plus an error string.</dd>
<dt><a name="dir"></a><strong><code>iter, dir_obj = lfs.dir (path)</code></strong></dt>
<dd>
Lua iterator over the entries of a given directory.
Each time the iterator is called with <code>dir_obj</code> it returns a directory entry's name as a string, or
<code>nil</code> if there are no more entries. You can also iterate by calling <code>dir_obj:next()</code>, and
explicitly close the directory before the iteration finished with <code>dir_obj:close()</code>.
Raises an error if <code>path</code> is not a directory.
</dd>
<dt><a name="lock"></a><strong><code>lfs.lock (filehandle, mode[, start[, length]])</code></strong></dt>
<dd>Locks a file or a part of it. This function works on <em>open files</em>; the
file handle should be specified as the first argument.
The string <code>mode</code> could be either
<code>r</code> (for a read/shared lock) or <code>w</code> (for a
write/exclusive lock). The optional arguments <code>start</code>
and <code>length</code> can be used to specify a starting point and
its length; both should be numbers.<br />
Returns <code>true</code> if the operation was successful; in
case of error, it returns <code>nil</code> plus an error string.
</dd>
<dt><a name="link"></a><strong><code>lfs.link (old, new[, symlink])</code></strong></dt>
<dd>Creates a link. The first argument is the object to link to
and the second is the name of the link. If the optional third
argument is true, the link will by a symbolic link (by default, a
hard link is created).
</dd>
<dt><a name="mkdir"></a><strong><code>lfs.mkdir (dirname)</code></strong></dt>
<dd>Creates a new directory. The argument is the name of the new
directory.<br />
Returns <code>true</code> if the operation was successful;
in case of error, it returns <code>nil</code> plus an error string.
</dd>
<dt><a name="rmdir"></a><strong><code>lfs.rmdir (dirname)</code></strong></dt>
<dd>Removes an existing directory. The argument is the name of the directory.<br />
Returns <code>true</code> if the operation was successful;
in case of error, it returns <code>nil</code> plus an error string.</dd>
<dt><a name="setmode"></a><strong><code>lfs.setmode (file, mode)</code></strong></dt>
<dd>Sets the writing mode for a file. The mode string can be either <code>"binary"</code> or <code>"text"</code>.
Returns <code>true</code> followed the previous mode string for the file, or
<code>nil</code> followed by an error string in case of errors.
On non-Windows platforms, where the two modes are identical,
setting the mode has no effect, and the mode is always returned as <code>binary</code>.
</dd>
<dt><a name="symlinkattributes"></a><strong><code>lfs.symlinkattributes (filepath [, aname])</code></strong></dt>
<dd>Identical to <a href="#attributes">lfs.attributes</a> except that
it obtains information about the link itself (not the file it refers to).
On Windows this function does not yet support links, and is identical to
<code>lfs.attributes</code>.
</dd>
<dt><a name="touch"></a><strong><code>lfs.touch (filepath [, atime [, mtime]])</code></strong></dt>
<dd>Set access and modification times of a file. This function is
a bind to <code>utime</code> function. The first argument is the
filename, the second argument (<code>atime</code>) is the access time,
and the third argument (<code>mtime</code>) is the modification time.
Both times are provided in seconds (which should be generated with
Lua standard function <code>os.time</code>).
If the modification time is omitted, the access time provided is used;
if both times are omitted, the current time is used.<br />
Returns <code>true</code> if the operation was successful;
in case of error, it returns <code>nil</code> plus an error string.
</dd>
<dt><a name="unlock"></a><strong><code>lfs.unlock (filehandle[, start[, length]])</code></strong></dt>
<dd>Unlocks a file or a part of it. This function works on
<em>open files</em>; the file handle should be specified as the first
argument. The optional arguments <code>start</code> and
<code>length</code> can be used to specify a starting point and its
length; both should be numbers.<br />
Returns <code>true</code> if the operation was successful;
in case of error, it returns <code>nil</code> plus an error string.
</dd>
</dl>
</div> <!-- id="content" -->
</div> <!-- id="main" -->
<div id="about">
<p><a href="http://validator.w3.org/check?uri=referer">Valid XHTML 1.0!</a></p>
<p><small>$Id: manual.html,v 1.45 2009/06/03 20:53:55 mascarenhas Exp $</small></p>
</div> <!-- id="about" -->
</div> <!-- id="container" -->
</body>
</html>

View File

@ -0,0 +1,29 @@
package = "LuaFileSystem"
version = "1.6.3-2"
source = {
url = "git://github.com/keplerproject/luafilesystem",
tag = "v_1_6_3"
}
description = {
summary = "File System Library for the Lua Programming Language",
detailed = [[
LuaFileSystem is a Lua library developed to complement the set of
functions related to file systems offered by the standard Lua
distribution. LuaFileSystem offers a portable way to access the
underlying directory structure and file attributes.
]],
homepage = "http://keplerproject.github.io/luafilesystem",
license = "MIT/X11"
}
dependencies = {
"lua >= 5.1"
}
build = {
type = "builtin",
modules = {
lfs = "src/lfs.c"
},
copy_directories = {
"doc", "tests"
}
}

View File

@ -0,0 +1,19 @@
rock_manifest = {
doc = {
us = {
["doc.css"] = "d0a913514fb190240b3b4033d105cbc0",
["examples.html"] = "5832f72021728374cf57b621d62ce0ff",
["index.html"] = "96885bdda963939f0a363b5fa6b16b59",
["license.html"] = "e3a756835cb7c8ae277d5e513c8e32ee",
["luafilesystem.png"] = "81e923e976e99f894ea0aa8b52baff29",
["manual.html"] = "d6473799b73ce486c3ea436586cb3b34"
}
},
lib = {
["lfs.dll"] = "c0e2145e1ef2815ae5fae01454291b66"
},
["luafilesystem-1.6.3-2.rockspec"] = "eb0ef7c190516892eb8357af799eea5f",
tests = {
["test.lua"] = "7b4ddb5bdb7e0b1b1ed0150d473535c9"
}
}

View File

@ -0,0 +1,175 @@
#!/usr/bin/env lua5.1
local tmp = "/tmp"
local sep = string.match (package.config, "[^\n]+")
local upper = ".."
local lfs = require"lfs"
print (lfs._VERSION)
io.write(".")
io.flush()
function attrdir (path)
for file in lfs.dir(path) do
if file ~= "." and file ~= ".." then
local f = path..sep..file
print ("\t=> "..f.." <=")
local attr = lfs.attributes (f)
assert (type(attr) == "table")
if attr.mode == "directory" then
attrdir (f)
else
for name, value in pairs(attr) do
print (name, value)
end
end
end
end
end
-- Checking changing directories
local current = assert (lfs.currentdir())
local reldir = string.gsub (current, "^.*%"..sep.."([^"..sep.."])$", "%1")
assert (lfs.chdir (upper), "could not change to upper directory")
assert (lfs.chdir (reldir), "could not change back to current directory")
assert (lfs.currentdir() == current, "error trying to change directories")
assert (lfs.chdir ("this couldn't be an actual directory") == nil, "could change to a non-existent directory")
io.write(".")
io.flush()
-- Changing creating and removing directories
local tmpdir = current..sep.."lfs_tmp_dir"
local tmpfile = tmpdir..sep.."tmp_file"
-- Test for existence of a previous lfs_tmp_dir
-- that may have resulted from an interrupted test execution and remove it
if lfs.chdir (tmpdir) then
assert (lfs.chdir (upper), "could not change to upper directory")
assert (os.remove (tmpfile), "could not remove file from previous test")
assert (lfs.rmdir (tmpdir), "could not remove directory from previous test")
end
io.write(".")
io.flush()
-- tries to create a directory
assert (lfs.mkdir (tmpdir), "could not make a new directory")
local attrib, errmsg = lfs.attributes (tmpdir)
if not attrib then
error ("could not get attributes of file `"..tmpdir.."':\n"..errmsg)
end
local f = io.open(tmpfile, "w")
f:close()
io.write(".")
io.flush()
-- Change access time
local testdate = os.time({ year = 2007, day = 10, month = 2, hour=0})
assert (lfs.touch (tmpfile, testdate))
local new_att = assert (lfs.attributes (tmpfile))
assert (new_att.access == testdate, "could not set access time")
assert (new_att.modification == testdate, "could not set modification time")
io.write(".")
io.flush()
-- Change access and modification time
local testdate1 = os.time({ year = 2007, day = 10, month = 2, hour=0})
local testdate2 = os.time({ year = 2007, day = 11, month = 2, hour=0})
assert (lfs.touch (tmpfile, testdate2, testdate1))
local new_att = assert (lfs.attributes (tmpfile))
assert (new_att.access == testdate2, "could not set access time")
assert (new_att.modification == testdate1, "could not set modification time")
io.write(".")
io.flush()
-- Checking link (does not work on Windows)
if lfs.link (tmpfile, "_a_link_for_test_", true) then
assert (lfs.attributes"_a_link_for_test_".mode == "file")
assert (lfs.symlinkattributes"_a_link_for_test_".mode == "link")
assert (lfs.link (tmpfile, "_a_hard_link_for_test_"))
assert (lfs.attributes (tmpfile, "nlink") == 2)
assert (os.remove"_a_link_for_test_")
assert (os.remove"_a_hard_link_for_test_")
end
io.write(".")
io.flush()
-- Checking text/binary modes (only has an effect in Windows)
local f = io.open(tmpfile, "w")
local result, mode = lfs.setmode(f, "binary")
assert(result) -- on non-Windows platforms, mode is always returned as "binary"
result, mode = lfs.setmode(f, "text")
assert(result and mode == "binary")
f:close()
io.write(".")
io.flush()
-- Restore access time to current value
assert (lfs.touch (tmpfile, attrib.access, attrib.modification))
new_att = assert (lfs.attributes (tmpfile))
assert (new_att.access == attrib.access)
assert (new_att.modification == attrib.modification)
io.write(".")
io.flush()
-- Check consistency of lfs.attributes values
local attr = lfs.attributes (tmpfile)
for key, value in pairs(attr) do
assert (value == lfs.attributes (tmpfile, key),
"lfs.attributes values not consistent")
end
-- Remove new file and directory
assert (os.remove (tmpfile), "could not remove new file")
assert (lfs.rmdir (tmpdir), "could not remove new directory")
assert (lfs.mkdir (tmpdir..sep.."lfs_tmp_dir") == nil, "could create a directory inside a non-existent one")
io.write(".")
io.flush()
-- Trying to get attributes of a non-existent file
assert (lfs.attributes ("this couldn't be an actual file") == nil, "could get attributes of a non-existent file")
assert (type(lfs.attributes (upper)) == "table", "couldn't get attributes of upper directory")
io.write(".")
io.flush()
-- Stressing directory iterator
count = 0
for i = 1, 4000 do
for file in lfs.dir (tmp) do
count = count + 1
end
end
io.write(".")
io.flush()
-- Stressing directory iterator, explicit version
count = 0
for i = 1, 4000 do
local iter, dir = lfs.dir(tmp)
local file = dir:next()
while file do
count = count + 1
file = dir:next()
end
assert(not pcall(dir.next, dir))
end
io.write(".")
io.flush()
-- directory explicit close
local iter, dir = lfs.dir(tmp)
dir:close()
assert(not pcall(dir.next, dir))
print"Ok!"

View File

@ -0,0 +1,653 @@
#!/usr/bin/env lua
---------
-- LuaSrcDiet
--
-- Compresses Lua source code by removing unnecessary characters.
-- For Lua 5.1+ source code.
--
-- **Notes:**
--
-- * Remember to update version and date information below (MSG_TITLE).
-- * TODO: passing data tables around is a horrific mess.
-- * TODO: to implement pcall() to properly handle lexer etc. errors.
-- * TODO: need some automatic testing for a semblance of sanity.
-- * TODO: the plugin module is highly experimental and unstable.
----
local equiv = require "luasrcdiet.equiv"
local fs = require "luasrcdiet.fs"
local llex = require "luasrcdiet.llex"
local lparser = require "luasrcdiet.lparser"
local luasrcdiet = require "luasrcdiet.init"
local optlex = require "luasrcdiet.optlex"
local optparser = require "luasrcdiet.optparser"
local byte = string.byte
local concat = table.concat
local find = string.find
local fmt = string.format
local gmatch = string.gmatch
local match = string.match
local print = print
local rep = string.rep
local sub = string.sub
local plugin
local LUA_VERSION = match(_VERSION, " (5%.[123])$") or "5.1"
-- Is --opt-binequiv available for this Lua version?
local BIN_EQUIV_AVAIL = LUA_VERSION == "5.1" and not package.loaded.jit
---------------------- Messages and textual data ----------------------
local MSG_TITLE = fmt([[
LuaSrcDiet: Puts your Lua 5.1+ source code on a diet
Version %s <%s>
]], luasrcdiet._VERSION, luasrcdiet._HOMEPAGE)
local MSG_USAGE = [[
usage: luasrcdiet [options] [filenames]
example:
>luasrcdiet myscript.lua -o myscript_.lua
options:
-v, --version prints version information
-h, --help prints usage information
-o <file> specify file name to write output
-s <suffix> suffix for output files (default '_')
--keep <msg> keep block comment with <msg> inside
--plugin <module> run <module> in plugin/ directory
- stop handling arguments
(optimization levels)
--none all optimizations off (normalizes EOLs only)
--basic lexer-based optimizations only
--maximum maximize reduction of source
(informational)
--quiet process files quietly
--read-only read file and print token stats only
--dump-lexer dump raw tokens from lexer to stdout
--dump-parser dump variable tracking tables from parser
--details extra info (strings, numbers, locals)
features (to disable, insert 'no' prefix like --noopt-comments):
%s
default settings:
%s]]
-- Optimization options, for ease of switching on and off.
--
-- * Positive to enable optimization, negative (no) to disable.
-- * These options should follow --opt-* and --noopt-* style for now.
local OPTION = [[
--opt-comments,'remove comments and block comments'
--opt-whitespace,'remove whitespace excluding EOLs'
--opt-emptylines,'remove empty lines'
--opt-eols,'all above, plus remove unnecessary EOLs'
--opt-strings,'optimize strings and long strings'
--opt-numbers,'optimize numbers'
--opt-locals,'optimize local variable names'
--opt-entropy,'tries to reduce symbol entropy of locals'
--opt-srcequiv,'insist on source (lexer stream) equivalence'
--opt-binequiv,'insist on binary chunk equivalence (only for PUC Lua 5.1)'
--opt-experimental,'apply experimental optimizations'
]]
-- Preset configuration.
local DEFAULT_CONFIG = [[
--opt-comments --opt-whitespace --opt-emptylines
--opt-numbers --opt-locals
--opt-srcequiv --noopt-binequiv
]]
-- Override configurations: MUST explicitly enable/disable everything.
local BASIC_CONFIG = [[
--opt-comments --opt-whitespace --opt-emptylines
--noopt-eols --noopt-strings --noopt-numbers
--noopt-locals --noopt-entropy
--opt-srcequiv --noopt-binequiv
]]
local MAXIMUM_CONFIG = [[
--opt-comments --opt-whitespace --opt-emptylines
--opt-eols --opt-strings --opt-numbers
--opt-locals --opt-entropy
--opt-srcequiv
]] .. (BIN_EQUIV_AVAIL and ' --opt-binequiv' or ' --noopt-binequiv')
local NONE_CONFIG = [[
--noopt-comments --noopt-whitespace --noopt-emptylines
--noopt-eols --noopt-strings --noopt-numbers
--noopt-locals --noopt-entropy
--opt-srcequiv --noopt-binequiv
]]
local DEFAULT_SUFFIX = "_" -- default suffix for file renaming
local PLUGIN_SUFFIX = "luasrcdiet.plugin." -- relative location of plugins
------------- Startup and initialize option list handling -------------
--- Simple error message handler; change to error if traceback wanted.
--
-- @tparam string msg The message to print.
local function die(msg)
print("LuaSrcDiet (error): "..msg); os.exit(1)
end
--die = error--DEBUG
-- Prepare text for list of optimizations, prepare lookup table.
local MSG_OPTIONS = ""
do
local WIDTH = 24
local o = {}
for op, desc in gmatch(OPTION, "%s*([^,]+),'([^']+)'") do
local msg = " "..op
msg = msg..rep(" ", WIDTH - #msg)..desc.."\n"
MSG_OPTIONS = MSG_OPTIONS..msg
o[op] = true
o["--no"..sub(op, 3)] = true
end
OPTION = o -- replace OPTION with lookup table
end
MSG_USAGE = fmt(MSG_USAGE, MSG_OPTIONS, DEFAULT_CONFIG)
--------- Global variable initialization, option set handling ---------
local suffix = DEFAULT_SUFFIX -- file suffix
local option = {} -- program options
local stat_c, stat_l -- statistics tables
--- Sets option lookup table based on a text list of options.
--
-- Note: additional forced settings for --opt-eols is done in optlex.lua.
--
-- @tparam string CONFIG
local function set_options(CONFIG)
for op in gmatch(CONFIG, "(%-%-%S+)") do
if sub(op, 3, 4) == "no" and -- handle negative options
OPTION["--"..sub(op, 5)] then
option[sub(op, 5)] = false
else
option[sub(op, 3)] = true
end
end
end
-------------------------- Support functions --------------------------
-- List of token types, parser-significant types are up to TTYPE_GRAMMAR
-- while the rest are not used by parsers; arranged for stats display.
local TTYPES = {
"TK_KEYWORD", "TK_NAME", "TK_NUMBER", -- grammar
"TK_STRING", "TK_LSTRING", "TK_OP",
"TK_EOS",
"TK_COMMENT", "TK_LCOMMENT", -- non-grammar
"TK_EOL", "TK_SPACE",
}
local TTYPE_GRAMMAR = 7
local EOLTYPES = { -- EOL names for token dump
["\n"] = "LF", ["\r"] = "CR",
["\n\r"] = "LFCR", ["\r\n"] = "CRLF",
}
--- Reads source code from the file.
--
-- @tparam string fname Path of the file to read.
-- @treturn string Content of the file.
local function load_file(fname)
local data, err = fs.read_file(fname, "rb")
if not data then die(err) end
return data
end
--- Saves source code to the file.
--
-- @tparam string fname Path of the destination file.
-- @tparam string dat The data to write into the file.
local function save_file(fname, dat)
local ok, err = fs.write_file(fname, dat, "wb")
if not ok then die(err) end
end
------------------ Functions to deal with statistics ------------------
--- Initializes the statistics table.
local function stat_init()
stat_c, stat_l = {}, {}
for i = 1, #TTYPES do
local ttype = TTYPES[i]
stat_c[ttype], stat_l[ttype] = 0, 0
end
end
--- Adds a token to the statistics table.
--
-- @tparam string tok The token.
-- @param seminfo
local function stat_add(tok, seminfo)
stat_c[tok] = stat_c[tok] + 1
stat_l[tok] = stat_l[tok] + #seminfo
end
--- Computes totals for the statistics table, returns average table.
--
-- @treturn table
local function stat_calc()
local function avg(c, l) -- safe average function
if c == 0 then return 0 end
return l / c
end
local stat_a = {}
local c, l = 0, 0
for i = 1, TTYPE_GRAMMAR do -- total grammar tokens
local ttype = TTYPES[i]
c = c + stat_c[ttype]; l = l + stat_l[ttype]
end
stat_c.TOTAL_TOK, stat_l.TOTAL_TOK = c, l
stat_a.TOTAL_TOK = avg(c, l)
c, l = 0, 0
for i = 1, #TTYPES do -- total all tokens
local ttype = TTYPES[i]
c = c + stat_c[ttype]; l = l + stat_l[ttype]
stat_a[ttype] = avg(stat_c[ttype], stat_l[ttype])
end
stat_c.TOTAL_ALL, stat_l.TOTAL_ALL = c, l
stat_a.TOTAL_ALL = avg(c, l)
return stat_a
end
----------------------------- Main tasks -----------------------------
--- A simple token dumper, minimal translation of seminfo data.
--
-- @tparam string srcfl Path of the source file.
local function dump_tokens(srcfl)
-- Load file and process source input into tokens.
local z = load_file(srcfl)
local toklist, seminfolist = llex.lex(z)
-- Display output.
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
if tok == "TK_OP" and byte(seminfo) < 32 then
seminfo = "("..byte(seminfo)..")"
elseif tok == "TK_EOL" then
seminfo = EOLTYPES[seminfo]
else
seminfo = "'"..seminfo.."'"
end
print(tok.." "..seminfo)
end--for
end
--- Dumps globalinfo and localinfo tables.
--
-- @tparam string srcfl Path of the source file.
local function dump_parser(srcfl)
-- Load file and process source input into tokens,
local z = load_file(srcfl)
local toklist, seminfolist, toklnlist = llex.lex(z)
-- Do parser optimization here.
local xinfo = lparser.parse(toklist, seminfolist, toklnlist)
local globalinfo, localinfo = xinfo.globalinfo, xinfo.localinfo
-- Display output.
local hl = rep("-", 72)
print("*** Local/Global Variable Tracker Tables ***")
print(hl.."\n GLOBALS\n"..hl)
-- global tables have a list of xref numbers only
for i = 1, #globalinfo do
local obj = globalinfo[i]
local msg = "("..i..") '"..obj.name.."' -> "
local xref = obj.xref
for j = 1, #xref do msg = msg..xref[j].." " end
print(msg)
end
-- Local tables have xref numbers and a few other special
-- numbers that are specially named: decl (declaration xref),
-- act (activation xref), rem (removal xref).
print(hl.."\n LOCALS (decl=declared act=activated rem=removed)\n"..hl)
for i = 1, #localinfo do
local obj = localinfo[i]
local msg = "("..i..") '"..obj.name.."' decl:"..obj.decl..
" act:"..obj.act.." rem:"..obj.rem
if obj.is_special then
msg = msg.." is_special"
end
msg = msg.." -> "
local xref = obj.xref
for j = 1, #xref do msg = msg..xref[j].." " end
print(msg)
end
print(hl.."\n")
end
--- Reads source file(s) and reports some statistics.
--
-- @tparam string srcfl Path of the source file.
local function read_only(srcfl)
-- Load file and process source input into tokens.
local z = load_file(srcfl)
local toklist, seminfolist = llex.lex(z)
print(MSG_TITLE)
print("Statistics for: "..srcfl.."\n")
-- Collect statistics.
stat_init()
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
stat_add(tok, seminfo)
end--for
local stat_a = stat_calc()
-- Display output.
local function figures(tt)
return stat_c[tt], stat_l[tt], stat_a[tt]
end
local tabf1, tabf2 = "%-16s%8s%8s%10s", "%-16s%8d%8d%10.2f"
local hl = rep("-", 42)
print(fmt(tabf1, "Lexical", "Input", "Input", "Input"))
print(fmt(tabf1, "Elements", "Count", "Bytes", "Average"))
print(hl)
for i = 1, #TTYPES do
local ttype = TTYPES[i]
print(fmt(tabf2, ttype, figures(ttype)))
if ttype == "TK_EOS" then print(hl) end
end
print(hl)
print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL")))
print(hl)
print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK")))
print(hl.."\n")
end
--- Processes source file(s), writes output and reports some statistics.
--
-- @tparam string srcfl Path of the source file.
-- @tparam string destfl Path of the destination file where to write optimized source.
local function process_file(srcfl, destfl)
-- handle quiet option
local function print(...) --luacheck: ignore 431
if option.QUIET then return end
_G.print(...)
end
if plugin and plugin.init then -- plugin init
option.EXIT = false
plugin.init(option, srcfl, destfl)
if option.EXIT then return end
end
print(MSG_TITLE) -- title message
-- Load file and process source input into tokens.
local z = load_file(srcfl)
if plugin and plugin.post_load then -- plugin post-load
z = plugin.post_load(z) or z
if option.EXIT then return end
end
local toklist, seminfolist, toklnlist = llex.lex(z)
if plugin and plugin.post_lex then -- plugin post-lex
plugin.post_lex(toklist, seminfolist, toklnlist)
if option.EXIT then return end
end
-- Collect 'before' statistics.
stat_init()
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
stat_add(tok, seminfo)
end--for
local stat1_a = stat_calc()
local stat1_c, stat1_l = stat_c, stat_l
-- Do parser optimization here.
optparser.print = print -- hack
local xinfo = lparser.parse(toklist, seminfolist, toklnlist)
if plugin and plugin.post_parse then -- plugin post-parse
plugin.post_parse(xinfo.globalinfo, xinfo.localinfo)
if option.EXIT then return end
end
optparser.optimize(option, toklist, seminfolist, xinfo)
if plugin and plugin.post_optparse then -- plugin post-optparse
plugin.post_optparse()
if option.EXIT then return end
end
-- Do lexer optimization here, save output file.
local warn = optlex.warn -- use this as a general warning lookup
optlex.print = print -- hack
toklist, seminfolist, toklnlist
= optlex.optimize(option, toklist, seminfolist, toklnlist)
if plugin and plugin.post_optlex then -- plugin post-optlex
plugin.post_optlex(toklist, seminfolist, toklnlist)
if option.EXIT then return end
end
local dat = concat(seminfolist)
-- Depending on options selected, embedded EOLs in long strings and
-- long comments may not have been translated to \n, tack a warning.
if find(dat, "\r\n", 1, 1) or
find(dat, "\n\r", 1, 1) then
warn.MIXEDEOL = true
end
-- Test source and binary chunk equivalence.
equiv.init(option, llex, warn)
equiv.source(z, dat)
if BIN_EQUIV_AVAIL then
equiv.binary(z, dat)
end
local smsg = "before and after lexer streams are NOT equivalent!"
local bmsg = "before and after binary chunks are NOT equivalent!"
-- for reporting, die if option was selected, else just warn
if warn.SRC_EQUIV then
if option["opt-srcequiv"] then die(smsg) end
else
print("*** SRCEQUIV: token streams are sort of equivalent")
if option["opt-locals"] then
print("(but no identifier comparisons since --opt-locals enabled)")
end
print()
end
if warn.BIN_EQUIV then
if option["opt-binequiv"] then die(bmsg) end
elseif BIN_EQUIV_AVAIL then
print("*** BINEQUIV: binary chunks are sort of equivalent")
print()
end
-- Save optimized source stream to output file.
save_file(destfl, dat)
-- Collect 'after' statistics.
stat_init()
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
stat_add(tok, seminfo)
end--for
local stat_a = stat_calc()
-- Display output.
print("Statistics for: "..srcfl.." -> "..destfl.."\n")
local function figures(tt)
return stat1_c[tt], stat1_l[tt], stat1_a[tt],
stat_c[tt], stat_l[tt], stat_a[tt]
end
local tabf1, tabf2 = "%-16s%8s%8s%10s%8s%8s%10s",
"%-16s%8d%8d%10.2f%8d%8d%10.2f"
local hl = rep("-", 68)
print("*** lexer-based optimizations summary ***\n"..hl)
print(fmt(tabf1, "Lexical",
"Input", "Input", "Input",
"Output", "Output", "Output"))
print(fmt(tabf1, "Elements",
"Count", "Bytes", "Average",
"Count", "Bytes", "Average"))
print(hl)
for i = 1, #TTYPES do
local ttype = TTYPES[i]
print(fmt(tabf2, ttype, figures(ttype)))
if ttype == "TK_EOS" then print(hl) end
end
print(hl)
print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL")))
print(hl)
print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK")))
print(hl)
-- Report warning flags from optimizing process.
if warn.LSTRING then
print("* WARNING: "..warn.LSTRING)
elseif warn.MIXEDEOL then
print("* WARNING: ".."output still contains some CRLF or LFCR line endings")
elseif warn.SRC_EQUIV then
print("* WARNING: "..smsg)
elseif warn.BIN_EQUIV then
print("* WARNING: "..bmsg)
end
print()
end
---------------------------- Main functions ---------------------------
local arg = {...} -- program arguments
set_options(DEFAULT_CONFIG) -- set to default options at beginning
--- Does per-file handling, ship off to tasks.
--
-- @tparam {string,...} fspec List of source files.
local function do_files(fspec)
for i = 1, #fspec do
local srcfl = fspec[i]
local destfl
-- Find and replace extension for filenames.
local extb, exte = find(srcfl, "%.[^%.%\\%/]*$")
local basename, extension = srcfl, ""
if extb and extb > 1 then
basename = sub(srcfl, 1, extb - 1)
extension = sub(srcfl, extb, exte)
end
destfl = basename..suffix..extension
if #fspec == 1 and option.OUTPUT_FILE then
destfl = option.OUTPUT_FILE
end
if srcfl == destfl then
die("output filename identical to input filename")
end
-- Perform requested operations.
if option.DUMP_LEXER then
dump_tokens(srcfl)
elseif option.DUMP_PARSER then
dump_parser(srcfl)
elseif option.READ_ONLY then
read_only(srcfl)
else
process_file(srcfl, destfl)
end
end--for
end
--- The main function.
local function main()
local fspec = {}
local argn, i = #arg, 1
if argn == 0 then
option.HELP = true
end
-- Handle arguments.
while i <= argn do
local o, p = arg[i], arg[i + 1]
local dash = match(o, "^%-%-?")
if dash == "-" then -- single-dash options
if o == "-h" then
option.HELP = true; break
elseif o == "-v" then
option.VERSION = true; break
elseif o == "-s" then
if not p then die("-s option needs suffix specification") end
suffix = p
i = i + 1
elseif o == "-o" then
if not p then die("-o option needs a file name") end
option.OUTPUT_FILE = p
i = i + 1
elseif o == "-" then
break -- ignore rest of args
else
die("unrecognized option "..o)
end
elseif dash == "--" then -- double-dash options
if o == "--help" then
option.HELP = true; break
elseif o == "--version" then
option.VERSION = true; break
elseif o == "--keep" then
if not p then die("--keep option needs a string to match for") end
option.KEEP = p
i = i + 1
elseif o == "--plugin" then
if not p then die("--plugin option needs a module name") end
if option.PLUGIN then die("only one plugin can be specified") end
option.PLUGIN = p
plugin = require(PLUGIN_SUFFIX..p)
i = i + 1
elseif o == "--quiet" then
option.QUIET = true
elseif o == "--read-only" then
option.READ_ONLY = true
elseif o == "--basic" then
set_options(BASIC_CONFIG)
elseif o == "--maximum" then
set_options(MAXIMUM_CONFIG)
elseif o == "--none" then
set_options(NONE_CONFIG)
elseif o == "--dump-lexer" then
option.DUMP_LEXER = true
elseif o == "--dump-parser" then
option.DUMP_PARSER = true
elseif o == "--details" then
option.DETAILS = true
elseif OPTION[o] then -- lookup optimization options
set_options(o)
else
die("unrecognized option "..o)
end
else
fspec[#fspec + 1] = o -- potential filename
end
i = i + 1
end--while
if option.HELP then
print(MSG_TITLE..MSG_USAGE); return true
elseif option.VERSION then
print(MSG_TITLE); return true
end
if option["opt-binequiv"] and not BIN_EQUIV_AVAIL then
die("--opt-binequiv is available only for PUC Lua 5.1!")
end
if #fspec > 0 then
if #fspec > 1 and option.OUTPUT_FILE then
die("with -o, only one source file can be specified")
end
do_files(fspec)
return true
else
die("nothing to do!")
end
end
-- entry point -> main() -> do_files()
if not main() then
die("Please run with option -h or --help for usage information")
end

View File

@ -0,0 +1,300 @@
= Features and Usage
Kein-Hong Man
2011-09-13
== Features
LuaSrcDiet features include the following:
* Predefined default, _--basic_ (token-only) and _--maximum_ settings.
* Avoid deleting a block comment with a certain message with _--keep_; this is for copyright or license texts.
* Special handling for `#!` (shbang) lines and in functions, `self` implicit parameters.
* Dumping of raw information using _--dump-lexer_ and _--dump-parser_.
See the `samples` directory.
* A HTML plugin: outputs files that highlights globals and locals, useful for eliminating globals. See the `samples` directory.
* An SLOC plugin: counts significant lines of Lua code, like SLOCCount.
* Source and binary equivalence testing with _--opt-srcequiv_ and _--opt-binequiv_.
List of optimizations:
* Line endings are always normalized to LF, except those embedded in comments or strings.
* _--opt-comments_: Removal of comments and comment blocks.
* _--opt-whitespace_: Removal of whitespace, excluding end-of-line characters.
* _--opt-emptylines_: Removal of empty lines.
* _--opt-eols_: Removal of unnecessary end-of-line characters.
* _--opt-strings_: Rewrite strings and long strings. See the `samples` directory.
* _--opt-numbers_: Rewrite numbers. See the `samples` directory.
* _--opt-locals_: Rename local variable names. Does not rename field or method names.
* _--opt-entropy_: Tries to improve symbol entropy when renaming locals by calculating actual letter frequencies.
* _--opt-experimental_: Apply experimental optimizations.
LuaSrcDiet tries to allow each option to be enabled or disabled separately, but they are not completely orthogonal.
If comment removal is disabled, LuaSrcDiet only removes trailing whitespace.
Trailing whitespace is not removed in long strings, a warning is generated instead.
If empty line removal is disabled, LuaSrcDiet keeps all significant code on the same lines.
Thus, a user is able to debug using the original sources as a reference since the line numbering is unchanged.
String optimization deals mainly with optimizing escape sequences, but delimiters can be switched between single quotes and double quotes if the source size of the string can be reduced.
For long strings and long comments, LuaSrcDiet also tries to reduce the `=` separators in the
delimiters if possible.
For number optimization, LuaSrcDiet saves space by trying to generate the shortest possible sequence, and in the process it does not produce “proper” scientific notation (e.g. 1.23e5) but does away with the decimal point (e.g. 123e3) instead.
The local variable name optimizer uses a full parser of Lua 5.1 source code, thus it can rename all local variables, including upvalues and function parameters.
It should handle the implicit `self` parameter gracefully.
In addition, local variable names are either renamed into the shortest possible names following English frequent letter usage or are arranged by calculating entropy with the _--opt-entropy_ option.
Variable names are reused whenever possible, reducing the number of unique variable names.
For example, for `LuaSrcDiet.lua` (version 0.11.0), 683 local identifiers representing 88 unique names were optimized into 32 unique names, all which are one character in length, saving over 2600 bytes.
If you need some kind of reassurance that your app will still work at reduced size, see the section on verification below.
== Usage
LuaSrcDiet needs a Lua 5.1.x (preferably Lua 5.1.4) binary to run.
On Unix machines, one can use the following command line:
[source, sh]
LuaSrcDiet myscript.lua -o myscript_.lua
On Windows machines, the above command line can be used on Cygwin, or you can run Lua with the LuaSrcDiet script like this:
[source, sh]
lua LuaSrcDiet.lua myscript.lua -o myscript_.lua
When run without arguments, LuaSrcDiet prints a list of options.
Also, you can check the `Makefile` for some examples of command lines to use.
For example, for maximum code size reduction and maximum verbosity, use:
[source, sh]
LuaSrcDiet --maximum --details myscript.lua -o myscript_.lua
=== Output Example
A sample output of LuaSrcDiet 0.11.0 for processing `llex.lua` at _--maximum_ settings is as follows:
----
Statistics for: LuaSrcDiet.lua -> sample/LuaSrcDiet.lua
*** local variable optimization summary ***
----------------------------------------------------------
Variable Unique Decl. Token Size Average
Types Names Count Count Bytes Bytes
----------------------------------------------------------
Global 10 0 19 95 5.00
----------------------------------------------------------
Local (in) 88 153 683 3340 4.89
TOTAL (in) 98 153 702 3435 4.89
----------------------------------------------------------
Local (out) 32 153 683 683 1.00
TOTAL (out) 42 153 702 778 1.11
----------------------------------------------------------
*** lexer-based optimizations summary ***
--------------------------------------------------------------------
Lexical Input Input Input Output Output Output
Elements Count Bytes Average Count Bytes Average
--------------------------------------------------------------------
TK_KEYWORD 374 1531 4.09 374 1531 4.09
TK_NAME 795 3963 4.98 795 1306 1.64
TK_NUMBER 54 59 1.09 54 59 1.09
TK_STRING 152 1725 11.35 152 1717 11.30
TK_LSTRING 7 1976 282.29 7 1976 282.29
TK_OP 997 1092 1.10 997 1092 1.10
TK_EOS 1 0 0.00 1 0 0.00
--------------------------------------------------------------------
TK_COMMENT 140 6884 49.17 1 18 18.00
TK_LCOMMENT 7 1723 246.14 0 0 0.00
TK_EOL 543 543 1.00 197 197 1.00
TK_SPACE 1270 2465 1.94 263 263 1.00
--------------------------------------------------------------------
Total Elements 4340 21961 5.06 2841 8159 2.87
--------------------------------------------------------------------
Total Tokens 2380 10346 4.35 2380 7681 3.23
--------------------------------------------------------------------
----
Overall, the file size is reduced by more than 9 kiB.
Tokens in the above report can be classified into “real” or actual tokens, and “fake” or whitespace tokens.
The number of “real” tokens remained the same.
Short comments and long comments were completely eliminated.
The number of line endings was reduced by 59, while all but 152 whitespace characters were optimized away.
So, token separators (whitespace, including line endings) now takes up just 10 % of the total file size.
No optimization of number tokens was possible, while 2 bytes were saved for string tokens.
For local variable name optimization, the report shows that 38 unique local variable names were reduced to 20 unique names.
The number of identifier tokens should stay the same (there is currently no optimization option to optimize away non-essential or unused “real” tokens).
Since there can be at most 53 single-character identifiers, all local variables are now one character in length.
Over 600 bytes was saved.
_--details_ will give a longer report and much more information.
A sample output of LuaSrcDiet 0.12.0 for processing the one-file `LuaSrcDiet.lua` program itself at _--maximum_ and _--opt-experimental_ settings is as follows:
----
*** local variable optimization summary ***
----------------------------------------------------------
Variable Unique Decl. Token Size Average
Types Names Count Count Bytes Bytes
----------------------------------------------------------
Global 27 0 51 280 5.49
----------------------------------------------------------
Local (in) 482 1063 4889 21466 4.39
TOTAL (in) 509 1063 4940 21746 4.40
----------------------------------------------------------
Local (out) 55 1063 4889 4897 1.00
TOTAL (out) 82 1063 4940 5177 1.05
----------------------------------------------------------
*** BINEQUIV: binary chunks are sort of equivalent
Statistics for: LuaSrcDiet.lua -> app_experimental.lua
*** lexer-based optimizations summary ***
--------------------------------------------------------------------
Lexical Input Input Input Output Output Output
Elements Count Bytes Average Count Bytes Average
--------------------------------------------------------------------
TK_KEYWORD 3083 12247 3.97 3083 12247 3.97
TK_NAME 5401 24121 4.47 5401 7552 1.40
TK_NUMBER 467 494 1.06 467 494 1.06
TK_STRING 787 7983 10.14 787 7974 10.13
TK_LSTRING 14 3453 246.64 14 3453 246.64
TK_OP 6381 6861 1.08 6171 6651 1.08
TK_EOS 1 0 0.00 1 0 0.00
--------------------------------------------------------------------
TK_COMMENT 1611 72339 44.90 1 18 18.00
TK_LCOMMENT 18 4404 244.67 0 0 0.00
TK_EOL 4419 4419 1.00 1778 1778 1.00
TK_SPACE 10439 24475 2.34 2081 2081 1.00
--------------------------------------------------------------------
Total Elements 32621 160796 4.93 19784 42248 2.14
--------------------------------------------------------------------
Total Tokens 16134 55159 3.42 15924 38371 2.41
--------------------------------------------------------------------
* WARNING: before and after lexer streams are NOT equivalent!
----
The command line was:
[source, sh]
lua LuaSrcDiet.lua LuaSrcDiet.lua -o app_experimental.lua --maximum --opt-experimental --noopt-srcequiv
The important thing to note is that while the binary chunks are equivalent, the source lexer streams are not equivalent.
Hence, the _--noopt-srcequiv_ makes LuaSrcDiet report a warning for failing the source equivalence test.
`LuaSrcDiet.lua` was reduced from 157 kiB to about 41.3 kiB.
The _--opt-experimental_ option saves an extra 205 bytes over standard _--maximum_.
Note the reduction in `TK_OP` count due to a reduction in semicolons and parentheses.
`TK_SPACE` has actually increased a bit due to semicolons that are changed into single spaces; some of these spaces could not be removed.
For more performance numbers, see the <<performance-stats#, Performance Statistics>> page.
== Verification
Code size reduction can be quite a hairy thing (even I peer at the results in suspicion), so some kind of verification is desirable for users who expect processed files to _not_ blow up.
Since LuaSrcDiet has been talked about as a tool to reduce code size in projects such as WoW add-ons, `eLua` and `nspire`, adding a verification step will reduce risk for all users of LuaSrcDiet.
LuaSrcDiet performs two kinds of equivalence testing as of version 0.12.0.
The two tests can be very, very loosely termed as _source equivalence testing_ and _binary equivalence testing_.
They are controlled by the _--opt-srcequiv_ and _--opt-binequiv_ options and are enabled by default.
Testing behaviour can be summarized as follows:
* Both tests are always executed.
The options control the resulting actions taken.
* Both options are normally enabled.
This will make any failing test to throw an error.
* When an option is disabled, LuaSrcDiet will at most print a warning.
* For passing results, see the following subsections that describe what the tests actually does.
You only need to disable a testing option for experimental optimizations (see the following section for more information on this).
For anything up to and including _--maximum_, both tests should pass.
If any test fail under these conditions, then something has gone wrong with LuaSrcDiet, and I would be interested to know what has blown up.
=== _--opt-srcequiv_ Source Equivalence
The source equivalence test uses LuaSrcDiets lexer to read and compare the _before_ and _after_ lexer token streams.
Numbers and strings are dumped as binary chunks using `loadstring()` and `string.dump()` and the results compared.
If your file passes this test, it means that a Lua 5.1.x binary should see the exact same token streams for both _before_ and _after_ files.
That is, the parser in Lua will see the same lexer sequence coming from the source for both files and thus they _should_ be equivalent.
Touch wood.
Heh.
However, if you are _cross-compiling_, it may be possible for this test to fail.
Experienced Lua developers can modify `equiv.lua` to handle such cases.
=== _--opt-binequiv_ Binary Equivalence
The binary equivalence test uses `loadstring()` and `string.dump()` to generate binary chunks of the entire _before_ and _after_ files.
Also, any shbang (`#!`) lines are removed prior to generation of the binary chunks.
The binary chunks are then run through a fake `undump` routine to verify the integrity of the binary chunks and to compare all parts that ought to be identical.
On a per-function prototype basis (where _ignored_ means that any difference between the two binary chunks is ignored):
* All debug information is ignored.
* The source name is ignored.
* Any line number data is ignored.
For example, `linedefined` and `lastlinedefined`.
The rest of the two binary chunks must be identical.
So, while the two are not binary-exact, they can be loosely termed as “equivalent” and should run in exactly the same manner.
Sort of.
You get the idea.
This test may also cause problems if you are _cross-compiling_.
== Experimental Stuff
The _--opt-experimental_ option applies experimental optimizations that generally, makes changes to “real” tokens.
Such changes may or may not lead to the result failing binary chunk equivalence testing.
They would likely fail source lexer stream equivalence testing, so the _--noopt-srcequiv_ option needs to be applied so that LuaSrcDiet just gives a warning instead of an error.
For sample files, see the `samples` directory.
Currently implemented experimental optimizations are as follows:
=== Semicolon Operator Removal
The semicolon (`;`) operator is an optional operator that is used to separate statements.
The optimization turns all of these operators into single spaces, which are then run through whitespace removal.
At worst, there will be no change to file size.
* _Fails_ source lexer stream equivalence.
* _Passes_ binary chunk equivalence.
=== Function Call Syntax Sugar Optimization
This optimization turns function calls that takes a single string or long string parameter into its syntax-sugar representation, which leaves out the parentheses.
Since strings can abut anything, each instance saves 2 bytes.
For example, the following:
[source, lua]
fish("cow")fish('cow')fish([[cow]])
is turned into:
[source, lua]
fish"cow"fish'cow'fish[[cow]]
* _Fails_ source lexer stream equivalence.
* _Passes_ binary chunk equivalence.
=== Other Experimental Optimizations
There are two more of these optimizations planned, before focus is turned to the Lua 5.2.x series:
* Simple `local` keyword removal.
Planned to work for a few kinds of patterns only.
* User directed name replacement, which will need user input to modify names or identifiers used in table keys and function methods or fields.

View File

@ -0,0 +1,128 @@
= Performance Statistics
Kein-Hong Man
2011-09-13
== Size Comparisons
The following is the result of processing `llex.lua` from LuaSrcDiet 0.11.0 using various optimization options:
|===
| LuaSrcDiet Option | Size (bytes)
| Original | 12,421
| Empty lines only | 12,395
| Whitespace only | 9,372
| Local rename only | 11,794
| _--basic_ setting | 3,835
| Program default | 3,208
| _--maximum_ setting | 3,130
|===
The programs default settings does not remove all unnecessary EOLs.
The _--basic_ setting is more conservative than the default settings, it disables optimization of strings and numbers and renaming of locals.
For version 0.12.0, the following is the result of processing `LuaSrcDiet.lua` using various optimization options:
|===
| LuaSrcDiet Option | Size (bytes)
| Original | 160,796
| _--basic_ setting | 60,219
| Program default | 43,650
| _--maximum_ setting | 42,453
| max + experimental | 42,248
|===
The above best size can go a lot lower with simple `local` keyword removal and user directed name replacement, which will be the subject of the next release of LuaSrcDiet.
== Compression and luac
File sizes of LuaSrcDiet 0.11.0 main files in various forms:
[cols="m,5*d", options="header,footer"]
|===
| Source File | Original Size (bytes) | `luac` normal (bytes) | `luac` stripped (bytes) | LuaSrcDiet _--basic_ (bytes) | LuaSrcDiet _--maximum_ (bytes)
| LuaSrcDiet.lua | 21,961 | 20,952 | 11,000 | 11,005 | 8,159
| llex.lua | 12,421 | 8,613 | 4,247 | 3,835 | 3,130
| lparser.lua | 41,757 | 27,215 | 12,506 | 11,755 | 7,666
| optlex.lua | 31,009 | 16,992 | 8,021 | 9,129 | 6,858
| optparser.lua | 16,511 | 9,021 | 3,520 | 5,087 | 2,999
| Total | 123,659 | 82,793 | 39,294 | 40,811 | 28,812
|===
* “LuaSrcDiet --maximum” has the smallest total file size.
* The ratio of “Original Size” to “LuaSrcDiet --maximum” is *4.3*.
* The ratio of “Original Size” to “luac stripped” is *3.1*.
* The ratio of “luac stripped” to “LuaSrcDiet --maximum” is *1.4*.
Compressibility of LuaSrcDiet 0.11.0 main files in various forms:
|===
| Compression Method | Original Size | `luac` normal | `luac` stripped | LuaSrcDiet _--basic_ | LuaSrcDiet _--maximum_
| Uncompressed originals | 123,659 | 82,793 | 39,294 | 40,811 | 28,812
| gzip -9 | 28,288 | 29,210 | 17,732 | 12,041 | 10,451
| bzip2 -9 | 24,407 | 27,232 | 16,856 | 11,480 | 9,815
| lzma (7-zip max) | 25,530 | 23,908 | 15,741 | 11,241 | 9,685
|===
* “LuaSrcDiet --maximum” has the smallest total file size (but a binary chunk loads faster and works with a smaller Lua executable).
* The ratio of “Original size” to “Original size + bzip2” is *5.1*.
* The ratio of “Original size” to “LuaSrcDiet --maximum + bzip2” is *12.6*.
* The ratio of “LuaSrcDiet --maximum” to “LuaSrcDiet --maximum + bzip2” is *2.9*.
* The ratio of “Original size” to “luac stripped + bzip2” is *7.3*.
* The ratio of “luac stripped” to “luac stripped + bzip2” is *2.3*.
* The ratio of “luac stripped + bzip2” to “LuaSrcDiet --maximum + bzip2” is *1.7*.
So, squeezed source code are smaller than stripped binary chunks and compresses better than stripped binary chunks, at a ratio of 2.9 for squeezed source code versus 2.3 for stripped binary chunks.
Compressed binary chunks is still a very efficient way of storing Lua scripts, because using only binary chunks allow for the parts of Lua needed to compile from sources to be omitted (`llex.o`, `lparser.o`, `lcode.o`, `ldump.o`), saving over 24KB in the process.
Note that LuaSrcDiet _does not_ answer the question of whether embedding source code is better or embedding binary chunks is better.
It is simply a utility for producing smaller source code files and an exercise in processing Lua source code using a Lua-based lexer and parser skeleton.
== Compile Speed
The following is a primitive attempt to analyze in-memory Lua script loading performance (using the `loadstring` function in Lua).
The LuaSrcDiet 0.11.0 files (original, squeezed with _--maximum_ and stripped binary chunks versions) are loaded into memory first before a loop runs to repeatedly load the script files for 10 seconds.
A null loop is also performed (processing empty strings) and the time taken per null iteration is subtracted as a form of null adjustment.
Then, various performance parameters are calculated.
Note that `LuaSrcDiet.lua` was slightly modified (`#!` line removed) to let the `loadstring` function run.
The results below were obtained with a Lua 5.1.3 executable compiled using `make generic` on Cygwin/Windows XP SP2 on a Sempron 3000+ (1.8GHz).
The LuaSrcDiet 0.11.0 source files have 11,180 “real” tokens in total.
[cols="<h,4*d", options="header"]
|===
| | Null loop | Stripped binary chunk | Original Sources | Squeezed Sources
| Total Size (bytes) | 0 | 39,294 | 123,640 | 28,793
| Iterations | 312,155 | 9,680 | 1306 | 1,592
| Duration (sec) | 10 | 10 | 10 | 10
| Time/iteration (msec) | 0.032 | 1.033 | 7.657 | 6.281
| _Time/iteration, null adjusted (msec)_ | | 1.001 | 7.625 | 6.249
| _Load rate (MiB/sec)_ | | 37.44 | 15.46 | 4.39
| Load time per byte (ns) | | 25.5 | 61.7 | 217.0
| Load time per token (ns) | | | 682 | 559
| Source time vs binary chunk time ratio | | 1.00 | 7.62 | 6.24
| Binary chunk rate vs. source rate ratio | | 1.00 | 2.42 | 8.53
|===
The above shows that stripped binary chunks is still, in many ways, the highest-performance form of fixed Lua scripts.
On a very average machine, scripts load at over 37 MiB/sec (in memory).
This is very comparable to the burst speeds of common desktop hard disks of 2008.
If instant response is paramount, stripped binary chunks has little competition.
By contrast, source code that is squeezed to the maximum using LuaSrcDiet can only muster an in-memory load rate of 4.4 MiB/sec.
The original sources load at about 15.5 MiB/sec, but most of the speed is from the lexer scanning over comments and whitespace.
A quick calculation indicates that the speed of the lexer over comments and whitespace can be as much as 65 MiB/sec, but note that the speed is all for naught.
What really matters are the real tokens, and the squeezed source code manages to load faster than the original sources by 18 %.
So, the loading of stripped binary chunks is faster than squeezed source code by a bit over 6×.
The 4.4 MiB/sec speed for squeezed source code is still quite respectable.
When an application considers the time taken to load data from the disk and perhaps the time taken to decompress, loading source code may be perfectly fine in terms of performance.
For programs that already embed source code, using LuaSrcDiet to squeeze the source code probably speeds loading up by a tiny bit in addition to making programs smaller.

View File

@ -0,0 +1,386 @@
= Technical Notes
Kein-Hong Man
2011-09-13
== Lexer Notes
The lexer (`llex.lua`) is a version of the native 5.1.x lexer from Yueliang 0.4.0, with significant modifications.
It does have several limitations:
* The decimal point must be `.` (period).
There is no localized decimal point replacement magic.
* There is no support for nested `[[`...`]]` long strings (no `LUA_COMPAT_LSTR`).
* The lexer may not properly lex source code with characters beyond the normal ASCII character set.
Identifiers with accented characters (or any character beyond a byte value of 127) cannot be recognized.
Instead of returning one token on each call, `llex.lua` processes an entire string (all data from an entire file) and returns.
Two lists (tokens and semantic information items) are set up in the module for use by the caller.
For maximum flexibility during processing, the lexer returns non-grammar lexical elements as tokens too.
Non-grammar elements, such as comments, whitespace, line endings, are classified along with “normal” tokens.
The lexer classifies 7 kinds of grammar tokens and 4 kinds of non-grammar tokens, as follows:
[cols="m,d"]
|===
| Grammar Token | Description
| TK_KEYWORD | keywords
| TK_NAME | identifiers
| TK_NUMBER | numbers (unconverted, kept in original form)
| TK_STRING | strings (no translation is done, includes delimiters)
| TK_LSTRING | long strings (no translation is done, includes delimiters)
| TK_OP | operators and punctuation (most single-char, some double)
| TK_EOS | end-of-stream (there is only one for each file/stream)
|===
[cols="m,d"]
|===
| Whitespace Token | Description
| TK_SPACE | whitespace (generally, spaces, \t, \v and \f)
| TK_COMMENT | comments (includes delimiters, also includes special first line shbang, which is handled specially in the optimizer)
| TK_LCOMMENT | block comments (includes delimiters)
| TK_EOL | end-of-lines (excludes those embedded in strings)
|===
A list of tokens can be generated by using the _--dump-lexer_ option, like this:
[source, sh]
lua LuaSrcDiet.lua --dump-lexer llex.lua > dump_llex.dat
== Lexer Optimizations
We aim to keep lexer-based optimizations free of parser considerations, i.e. we allow for generalized optimization of token sequences.
The table below considers the requirements for all combinations of significant tokens (except `TK_EOS`).
Other tokens are whitespace-like.
Comments can be considered to be a special kind of whitespace, e.g. a short comment needs to have a following EOL token, if we do not want to optimize away short comments.
[cols="h,6*m", options="header"]
|===
| _1st  2nd Token_ | Keyword | Name | Number | String | LString | Oper
| Keyword | [S] | [S] | [S] | | |
| Name | [S] | [S] | [S] | | |
| Number | [S] | [S] | [S] | | | [1]
| String | | | | | |
| LString | | | | | |
| Oper | | | [1] | | | [2]
|===
A dash (`-`) in the above means that the first token can abut the second token.
`*[S]*`:: Need at least one whitespace, set as either a space or kept as an EOL.
`*[1]*`::
Need a space if operator is a `.`, all others okay.
A `+` or `-` is used as part of a floating-point spec, but there does not appear to be any way of creating a float by joining with number with a `+` or `-` plus another number.
Since an `e` has to be somewhere in the first token, this cant be done.
`*[2]*`::
Normally there cannot be consecutive operators, but we plan to allow for generalized optimization of token sequences, i.e. even sequences that are grammatically illegal; so disallow adjacent operators if:
* the first is in `[=<>]` and the second is `=`
* disallow dot sequences to be adjacent, but `...` first okay
* disallow `[` followed by `=` or `[` (not optimal)
Also, a minus `-` cannot preceed a Comment or LComment, because comments start with a `--` prefix.
Apart from that, all Comment or LComment tokens can be set abut with a real token.
== Local Variable Renaming
The following discusses the problem of local variable optimization, specifically _local variable renaming_ in order to reduce source code size.
=== TK_NAME Token Considerations
A `TK_NAME` token means a number of things, and some of these cannot be renamed without analyzing the source code.
We are interested in the use of `TK_NAME` in the following:
[loweralpha]
. global variable access,
. local variable declaration, including `local` statements, `local` functions, function parameters, implicit `self` locals,
. local variable access, including upvalue access.
`TK_NAME` is also used in parts of the grammar as constant strings these tokens cannot be optimized without user assistance.
These include usage as:
[loweralpha, start=4]
. keys in `key=value` pairs in table construction,
. field or method names in `a:b` or `a.b` syntax forms.
For the local variable name optimization scheme used, we do not consider (d) and (e), and while global variables cannot be renamed without some kind of user assistance, they need to be considered or tracked as part of Luas variable access scheme.
=== Lifetime of a Local Variable
Consider the following example:
[source, lua]
local string, table = string, table
In the example, the two locals are assigned the values of the globals with the same names.
When Lua encounters the declaration portion:
[source, lua]
local string, table
the parser cannot immediately make the two local variable available to following code.
In the parser and code generator, locals are inactive when entries are created.
They are activated only when the function `adjustlocalvars()` is called to activate the appropriate local variables.
NOTE: The terminology used here may not be identical to the ones used in the Dragon Book they merely follow the LuaSrcDiet code as it was written before I have read the Dragon Book.
In the example, the two local variables are activated only after the whole statement has been parsed, that is, after the last `table` token.
Hence, the statement works as expected.
Also, once the two local variables goes out of scope, `removevars()` is called to deactivate them, allowing other variables of the same name to become visible again.
Another example worth mentioning is:
[source, lua]
local a, a, a, = 1, 2, 3
The above will assign 3 to `a`.
Thus, when optimizing local variable names, (1) we need to consider accesses of global variable names affecting the namespace, (2) for the local variable names themselves, we need to consider when they are declared, activated and removed, and (3) within the “live” time of locals, we need to know when they are accessed (since locals that are never accessed dont really matter.)
=== Local Variable Tracking
Every local variable declaration is considered an object to be renamed.
From the parser, we have the original name of the local variable, the token positions for declaration, activation and removal, and the token position for all the `TK_NAME` tokens which references this local.
All instances of the implicit `self` local variable are also flagged as such.
In addition to local variable information, all global variable accesses are tabled, one object entry for one name, and each object has a corresponding list of token positions for the `TK_NAME` tokens, which is where the global variables were accessed.
The key criteria is: *Our act of renaming cannot change the visibility of any of these locals and globals at the time they are accessed*.
However, _their scope of visibility may be changed during which they are not accessed_, so someone who tries to insert a variable reference somewhere into a program that has its locals renamed may find that it now refers to a different variable.
Of course, if every variable has a unique name, then there is no need for a name allocation algorithm, as there will be no conflict.
But, in order to maximize utilization of short identifier names to reduce the final code size, we want to reuse the names as much as possible.
In addition, fewer names will likely reduce symbol entropy and may slightly improve compressibility of the source code.
LuaSrcDiet avoids the use of non-ASCII letters, so there are only 53 single-character variable names.
=== Name Allocation Theory
To understand the renaming algorithm, first we need to establish how different local and global variables can operate happily without interfering with each other.
Consider three objects, local object A, local object B and global object G.
A and B involve declaration, activation and removal, and within the period it is active, there may be zero or more accesses of the local.
For G, there are only global variable accesses to look into.
Assume that we have assigned a new name to A and we wish to consider its effects on other locals and globals, for which we choose B and G as examples.
We assume local B has not been assigned a new name as we expect our algorithm to take care of collisions.
As lifetime is something like this:
----
Decl Act Rem
+ +-------------------------------+
-------------------------------------------------
----
where “Decl” is the time of declaration, “Act” is the time of activation, and “Rem” is the time of removal.
Between “Act” and “Rem”, the local is alive or “live” and Lua can see it if its corresponding `TK_NAME` identifier comes up.
----
Decl Act Rem
+ +-------------------------------+
-------------------------------------------------
* * * *
(1) (2) (3) (4)
----
Recall that the key criteria is to not change the visibility of globals and locals during when they are accessed.
Consider local and global accesses at (1), (2), (3) and (4).
A global G of the same name as A will only collide at (3), where Lua will see A and not G.
Since G must be accessed at (3) according to what the parser says, and we cannot modify the positions of “Decl”, “Act” and “Rem”, it follows that A cannot have the same name as G.
----
Decl Act Rem
+ +-----------------------+
---------------------------------
(1)+ +---+ (2)+ +---+ (3)+ +---+ (4)+ +---+
--------- --------- --------- ---------
----
For the case of A and B having the same names and colliding, consider the cases for which B is at (1), (2), (3) or (4) in the above.
(1) and (4) means that A and B are completely isolated from each other, hence in the two cases, A and B can safely use the same variable names.
To be specific, since we have assigned A, B is considered completely isolated from A if Bs activation-to-removal period is isolated from the time of As first access to last access, meaning Bs active time will never affect any of As accesses.
For (2) and (3), we have two cases where we need to consider which one has been activated first.
For (2), B is active before A, so A cannot impose on B.
But As accesses are valid while B is active, since A can override B.
For no collision in the case of (2), we simply need to ensure that the last access of B occurs before A is activated.
For (3), B is activated before A, hence B can override As accesses.
For no collision, all of As accesses cannot happen while B is active.
Thus position (3) follows the “A is never accessed when B is active” rule in a general way.
Local variables of a child function are in the position of (3).
To illustrate, the local B can use the same name as local A and live in a child function or block scope if each time A is accessed, Lua sees A and not B.
So we have to check all accesses of A and see whether they collide with the active period of B.
If A is not accessed during that period, then B can be active with the same name.
The above appears to resolve all sorts of cases where the active times of A and B overlap.
Note that in the above, the allocator does not need to know how locals are separated according to function prototypes.
Perhaps the allocator can be simplified if knowledge of function structure is utilized.
This scheme was implemented in a hurry in 2008 — it could probably be simpler if Lua grammar is considered, but LuaSrcDiet mainly processes various index values in tables.
=== Name Allocation Algorithm
To begin with, the name generator is mostly separate from the name allocation algorithm.
The name generator returns the next shortest name for the algorithm to apply to local variables.
To attempt to reduce symbol entropy (which benefit compression algorithms), the name generator follows English frequent letter usage.
There is also an option to calculate an actual symbol entropy table from the input data.
Since there are 53 one-character identifiers and (53 * 63 - 4) two-character identifiers (minus a few keywords), there isnt a pressing need to optimally maximize name reuse.
The single-file version of LuaSrcDiet 0.12.0, at just over 3000 SLOC and 156 kiB in size, currently allocates around 55 unique local variable names.
In theory, we should need no more than 260 local identifiers by default.
Why?
Since `LUAI_MAXVARS` is 200 and `LUAI_MAXUPVALUES` is 60, at any block scope, there can be at most `(LUAI_MAXVARS + LUAI_MAXUPVALUES)` locals referenced, or 260.
Also, those from outer scopes not referenced in inner scopes can reuse identifiers.
The net effect of this is that a local variable name allocation method should not allocate more than 260 identifier names for locals.
The current algorithm is a simple first-come first-served scheme:
[loweralpha]
. One local object that use the most tokens is named first.
. Any other non-conflicting locals with respect to the first object are assigned the same name.
. Assigned locals are removed from consideration and the procedure is repeated for objects that have not been assigned new names.
. Steps (a) to (c) repeats until no local objects are left.
In addition, there are a few extra issues to take care of:
[loweralpha, start=5]
. Implicit `self` locals that have been flagged as such are already “assigned to” and so they are left unmodified.
. The name generator skips `self` to avoid conflicts.
This is not optimal but it is unlikely a script will use so many local variables as to reach `self`.
. Keywords are also skipped for the name generator.
. Global name conflict resolution.
For (h), global name conflict resolution is handled just after the new name is generated.
The name can still be used for some locals even if it conflicts with other locals.
To remove conflicts, global variable accesses for the particular identifier name is checked.
Any local variables that are active when a global access is made is marked to be skipped.
The rest of the local objects can then use that name.
The algorithm has additional code for handling locals that use the same name in the same scope.
This extends the basic algorithm that was discussed earlier.
For example:
[source, lua]
----
local foo = 10 -- <1>
...
local foo = 20 -- <2>
...
print(e)
----
Since we are considering name visibility, the first `foo` does not really cease to exist when the second `foo` is declared, because if we were to make that assumption, and the first `foo` is removed before (2), then I should be able to use `e` as the name for the first `foo` and after (2), it should not conflict with variables in the outer scope with the same name.
To illustrate:
[source, lua]
----
local e = 10 -- 'foo' renamed to 'e'
...
local t = 20 -- error if we assumed 'e' removed here
...
print(e)
----
Since `e` is a global in the example, we now have an error as the name as been taken over by a local.
Thus, the first `foo` local must have its active time extend to the end of the current scope.
If there is no conflict between the first and second `foo`, the algorithm may still assign the same names to them.
The current fix to deal with the above chains local objects in order to find the removal position.
It may be possible to handle this in a clean manner LuaSrcDiet handles it as a fix to the basic algorithm.
== Ideas
The following is a list of optimization ideas that do not require heavy-duty source code parsing and comprehension.
=== Lexer-Based Optimization Ideas
* Convert long strings to normal strings, vice versa. +
_A little desperate for a few bytes, can be done, but not real keen on implementing it._
* Special number forms to take advantage of constant number folding. +
_For example, 65536 can be represented using 2^16^, and so on.
An expression must be evaluated in the same way, otherwise this seems unsafe._
* Warn if a number has too many digits. +
_Should we warn or “test and truncate”?
Not really an optimization that will see much use._
* Warn of opportunity for using a `local` to zap a bunch of globals. +
_Current recommendation is to use the HTML plugin to display globals in red.
The developer can then visually analyze the source code and make the appropriate fixes.
I think this is better than having the program guess the intentions of the developer._
* Spaces to tabs in comments, long comments, or long strings. +
_For long strings, need to know users intention.
Would rather not implement._
=== Parser-Based Optimization Ideas
Heavy-duty optimizations will need more data to be generated by the parser.
A full AST may eventually be needed.
The most attractive idea that can be quickly implemented with a significant code size “win” is to reduce the number of `local` keywords.
* Remove unused ``local``s that can be removed in the source. +
_Need to consider unused ``local``s in multiple assignments._
* Simplify declaration of ``local``s that can be merged. +
_From:_
+
[source, lua]
----
-- separate locals
local foo
local bar
-- separate locals with assignments
local foo = 123
local bar = "pqr"
----
+
_To:_
+
[source, lua]
----
-- merged locals
local foo,bar
-- merged locals with assignments
local foo,bar=123,"pqr"
----
* Simplify declarations using `nil`. +
_From:_
[source, lua]
local foo, bar = nil, nil
+
_To:_
[source, lua]
local foo,bar
* Simplify ``return``s using `nil`. +
_How desirable is this? From Lua list discussions, it seems to be potentially unsafe unless all return locations are known and checked._
* Removal of optional semicolons in statements and removal of commas or semicolons in table constructors. +
_Yeah, this might save a few bytes._
* Remove table constructor elements using `nil`. +
_Not sure if this is safe to do._
* Simplify logical or relational operator expressions. +
_This is more suitable for an optimizing compiler project._

View File

@ -0,0 +1,41 @@
-- vim: set ft=lua:
package = 'LuaSrcDiet'
version = '0.3.0-2'
source = { url = 'https://github.com/jirutka/luasrcdiet/archive/v0.3.0/luasrcdiet-0.3.0.tar.gz', md5 = 'c0ff36ef66cd0568c96bc54e9253a8fa' }
description = {
summary = 'Compresses Lua source code by removing unnecessary characters',
detailed = [[
This is revival of LuaSrcDiet originally written by Kein-Hong Man.]],
homepage = 'https://github.com/jirutka/luasrcdiet',
maintainer = 'Jakub Jirutka <jakub@jirutka.cz>',
license = 'MIT',
}
dependencies = {
'lua >= 5.1',
}
build = {
type = 'builtin',
modules = {
['luasrcdiet'] = 'luasrcdiet/init.lua',
['luasrcdiet.equiv'] = 'luasrcdiet/equiv.lua',
['luasrcdiet.fs'] = 'luasrcdiet/fs.lua',
['luasrcdiet.llex'] = 'luasrcdiet/llex.lua',
['luasrcdiet.lparser'] = 'luasrcdiet/lparser.lua',
['luasrcdiet.optlex'] = 'luasrcdiet/optlex.lua',
['luasrcdiet.optparser'] = 'luasrcdiet/optparser.lua',
['luasrcdiet.plugin.example'] = 'luasrcdiet/plugin/example.lua',
['luasrcdiet.plugin.html'] = 'luasrcdiet/plugin/html.lua',
['luasrcdiet.plugin.sloc'] = 'luasrcdiet/plugin/sloc.lua',
['luasrcdiet.utils'] = 'luasrcdiet/utils.lua',
},
install = {
bin = {
luasrcdiet = 'bin/luasrcdiet',
}
}
}

View File

@ -0,0 +1,28 @@
rock_manifest = {
bin = {
luasrcdiet = "6c318685d57f827cf5baf7037a5d6072"
},
doc = {
["features-and-usage.adoc"] = "157587c27a0c340d9d1dd06af9b339b5",
["performance-stats.adoc"] = "cf5f96a86e021a3a584089fafcabd056",
["tech-notes.adoc"] = "075bc34e667a0055e659e656baa2365a"
},
lua = {
luasrcdiet = {
["equiv.lua"] = "967a6b17573d229e326dbb740ad7fe8c",
["fs.lua"] = "53db7dfc50d026b683fad68ed70ead0f",
["init.lua"] = "c6f368e6cf311f3257067fed0fbcd06a",
["llex.lua"] = "ede897af261fc362a82d87fbad91ea2b",
["lparser.lua"] = "c1e1f04d412b79a040fd1c2b74112953",
["optlex.lua"] = "7c986da991a338494c36770b4a30fa9f",
["optparser.lua"] = "b125a271ac1c691dec68b63019b1b5da",
plugin = {
["example.lua"] = "86b5c1e9dc7959db6b221d6d5a0db3d1",
["html.lua"] = "c0d3336a133f0c8663f395ee98d54f6a",
["sloc.lua"] = "fb1a91b18b701ab83f21c87733be470a"
},
["utils.lua"] = "bd6c1e85c6a9bf3383d336a4797fb292"
}
},
["luasrcdiet-0.3.0-2.rockspec"] = "da70047e1b0cbdc1ff08d060327fa110"
}

View File

@ -0,0 +1,650 @@
commands = {
luadocumentor = {
"luadocumentor/0.1.5-1"
},
luasrcdiet = {
"luasrcdiet/0.3.0-2"
}
}
dependencies = {
luadocumentor = {
["0.1.5-1"] = {
{
constraints = {
{
op = "~>",
version = {
5, 1, string = "5.1"
}
}
},
name = "lua"
},
{
constraints = {
{
op = "~>",
version = {
1, 6, string = "1.6"
}
}
},
name = "luafilesystem"
},
{
constraints = {
{
op = "~>",
version = {
0, 32, string = "0.32"
}
}
},
name = "markdown"
},
{
constraints = {
{
op = "~>",
version = {
0, 7, string = "0.7"
}
}
},
name = "metalua-compiler"
},
{
constraints = {
{
op = "~>",
version = {
0, 9, string = "0.9"
}
}
},
name = "penlight"
}
}
},
luafilesystem = {
["1.6.3-2"] = {
{
constraints = {
{
op = ">=",
version = {
5, 1, string = "5.1"
}
}
},
name = "lua"
}
}
},
luasrcdiet = {
["0.3.0-2"] = {
{
constraints = {
{
op = ">=",
version = {
5, 1, string = "5.1"
}
}
},
name = "lua"
}
}
},
markdown = {
["0.32-2"] = {
{
constraints = {
{
op = ">=",
version = {
5, 1, string = "5.1"
}
}
},
name = "lua"
}
}
},
["metalua-compiler"] = {
["0.7.3-1"] = {
{
constraints = {
{
op = "~>",
version = {
5, 1, string = "5.1"
}
}
},
name = "lua"
},
{
constraints = {
{
op = "~>",
version = {
1, 6, string = "1.6"
}
}
},
name = "luafilesystem"
},
{
constraints = {
{
op = ">=",
version = {
0, 7, 3, string = "0.7.3"
}
}
},
name = "metalua-parser"
}
}
},
["metalua-parser"] = {
["0.7.3-2"] = {
{
constraints = {
{
op = ">=",
version = {
5, 1, string = "5.1"
}
}
},
name = "lua"
}
}
},
penlight = {
["0.9.8-1"] = {
{
constraints = {},
name = "luafilesystem"
}
}
}
}
modules = {
defaultcss = {
"luadocumentor/0.1.5-1"
},
docgenerator = {
"luadocumentor/0.1.5-1"
},
extractors = {
"luadocumentor/0.1.5-1"
},
["fs.lfs"] = {
"luadocumentor/0.1.5-1"
},
lddextractor = {
"luadocumentor/0.1.5-1"
},
lfs = {
"luafilesystem/1.6.3-2"
},
luasrcdiet = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.equiv"] = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.fs"] = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.llex"] = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.lparser"] = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.optlex"] = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.optparser"] = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.plugin.example"] = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.plugin.html"] = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.plugin.sloc"] = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.utils"] = {
"luasrcdiet/0.3.0-2"
},
markdown = {
"markdown/0.32-2"
},
["metalua.compiler"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.bytecode"] = {
"metalua-compiler/0.7.3-1"
},
["metalua.compiler.bytecode.compile"] = {
"metalua-compiler/0.7.3-1"
},
["metalua.compiler.bytecode.lcode"] = {
"metalua-compiler/0.7.3-1"
},
["metalua.compiler.bytecode.ldump"] = {
"metalua-compiler/0.7.3-1"
},
["metalua.compiler.bytecode.lopcodes"] = {
"metalua-compiler/0.7.3-1"
},
["metalua.compiler.globals"] = {
"metalua-compiler/0.7.3-1"
},
["metalua.compiler.parser"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.parser.annot.generator"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.parser.annot.grammar"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.parser.expr"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.parser.ext"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.parser.lexer"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.parser.meta"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.parser.misc"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.parser.stat"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.parser.table"] = {
"metalua-parser/0.7.3-2"
},
["metalua.grammar.generator"] = {
"metalua-parser/0.7.3-2"
},
["metalua.grammar.lexer"] = {
"metalua-parser/0.7.3-2"
},
["metalua.loader"] = {
"metalua-compiler/0.7.3-1"
},
["metalua.pprint"] = {
"metalua-parser/0.7.3-2"
},
["metalua/compiler/ast_to_src.mlua"] = {
"metalua-compiler/0.7.3-1"
},
["metalua/extension/comprehension.mlua"] = {
"metalua-compiler/0.7.3-1"
},
["metalua/extension/match.mlua"] = {
"metalua-compiler/0.7.3-1"
},
["metalua/repl.mlua"] = {
"metalua-compiler/0.7.3-1"
},
["metalua/treequery.mlua"] = {
"metalua-compiler/0.7.3-1"
},
["metalua/treequery/walk.mlua"] = {
"metalua-compiler/0.7.3-1"
},
["models.apimodel"] = {
"luadocumentor/0.1.5-1"
},
["models.apimodelbuilder"] = {
"luadocumentor/0.1.5-1"
},
["models.internalmodel"] = {
"luadocumentor/0.1.5-1"
},
["models.ldparser"] = {
"luadocumentor/0.1.5-1"
},
["models/internalmodelbuilder.mlua"] = {
"luadocumentor/0.1.5-1"
},
pl = {
"penlight/0.9.8-1"
},
["pl.Date"] = {
"penlight/0.9.8-1"
},
["pl.List"] = {
"penlight/0.9.8-1"
},
["pl.Map"] = {
"penlight/0.9.8-1"
},
["pl.MultiMap"] = {
"penlight/0.9.8-1"
},
["pl.OrderedMap"] = {
"penlight/0.9.8-1"
},
["pl.Set"] = {
"penlight/0.9.8-1"
},
["pl.app"] = {
"penlight/0.9.8-1"
},
["pl.array2d"] = {
"penlight/0.9.8-1"
},
["pl.class"] = {
"penlight/0.9.8-1"
},
["pl.comprehension"] = {
"penlight/0.9.8-1"
},
["pl.config"] = {
"penlight/0.9.8-1"
},
["pl.data"] = {
"penlight/0.9.8-1"
},
["pl.dir"] = {
"penlight/0.9.8-1"
},
["pl.file"] = {
"penlight/0.9.8-1"
},
["pl.func"] = {
"penlight/0.9.8-1"
},
["pl.input"] = {
"penlight/0.9.8-1"
},
["pl.lapp"] = {
"penlight/0.9.8-1"
},
["pl.lexer"] = {
"penlight/0.9.8-1"
},
["pl.luabalanced"] = {
"penlight/0.9.8-1"
},
["pl.operator"] = {
"penlight/0.9.8-1"
},
["pl.path"] = {
"penlight/0.9.8-1"
},
["pl.permute"] = {
"penlight/0.9.8-1"
},
["pl.platf.luajava"] = {
"penlight/0.9.8-1"
},
["pl.pretty"] = {
"penlight/0.9.8-1"
},
["pl.seq"] = {
"penlight/0.9.8-1"
},
["pl.sip"] = {
"penlight/0.9.8-1"
},
["pl.strict"] = {
"penlight/0.9.8-1"
},
["pl.stringio"] = {
"penlight/0.9.8-1"
},
["pl.stringx"] = {
"penlight/0.9.8-1"
},
["pl.tablex"] = {
"penlight/0.9.8-1"
},
["pl.template"] = {
"penlight/0.9.8-1"
},
["pl.test"] = {
"penlight/0.9.8-1"
},
["pl.text"] = {
"penlight/0.9.8-1"
},
["pl.utils"] = {
"penlight/0.9.8-1"
},
["pl.xml"] = {
"penlight/0.9.8-1"
},
["template.file"] = {
"luadocumentor/0.1.5-1"
},
["template.index"] = {
"luadocumentor/0.1.5-1"
},
["template.index.recordtypedef"] = {
"luadocumentor/0.1.5-1"
},
["template.item"] = {
"luadocumentor/0.1.5-1"
},
["template.page"] = {
"luadocumentor/0.1.5-1"
},
["template.recordtypedef"] = {
"luadocumentor/0.1.5-1"
},
["template.usage"] = {
"luadocumentor/0.1.5-1"
},
["template.utils"] = {
"luadocumentor/0.1.5-1"
},
templateengine = {
"luadocumentor/0.1.5-1"
}
}
repository = {
luadocumentor = {
["0.1.5-1"] = {
{
arch = "installed",
commands = {
luadocumentor = "luadocumentor"
},
dependencies = {
luafilesystem = "1.6.3-2",
markdown = "0.32-2",
["metalua-compiler"] = "0.7.3-1",
["metalua-parser"] = "0.7.3-2",
penlight = "0.9.8-1"
},
modules = {
defaultcss = "defaultcss.lua",
docgenerator = "docgenerator.lua",
extractors = "extractors.lua",
["fs.lfs"] = "fs/lfs.lua",
lddextractor = "lddextractor.lua",
["models.apimodel"] = "models/apimodel.lua",
["models.apimodelbuilder"] = "models/apimodelbuilder.lua",
["models.internalmodel"] = "models/internalmodel.lua",
["models.ldparser"] = "models/ldparser.lua",
["models/internalmodelbuilder.mlua"] = "models/internalmodelbuilder.mlua",
["template.file"] = "template/file.lua",
["template.index"] = "template/index.lua",
["template.index.recordtypedef"] = "template/index/recordtypedef.lua",
["template.item"] = "template/item.lua",
["template.page"] = "template/page.lua",
["template.recordtypedef"] = "template/recordtypedef.lua",
["template.usage"] = "template/usage.lua",
["template.utils"] = "template/utils.lua",
templateengine = "templateengine.lua"
}
}
}
},
luafilesystem = {
["1.6.3-2"] = {
{
arch = "installed",
commands = {},
dependencies = {},
modules = {
lfs = "lfs.dll"
}
}
}
},
luasrcdiet = {
["0.3.0-2"] = {
{
arch = "installed",
commands = {
luasrcdiet = "luasrcdiet"
},
dependencies = {},
modules = {
luasrcdiet = "luasrcdiet/init.lua",
["luasrcdiet.equiv"] = "luasrcdiet/equiv.lua",
["luasrcdiet.fs"] = "luasrcdiet/fs.lua",
["luasrcdiet.llex"] = "luasrcdiet/llex.lua",
["luasrcdiet.lparser"] = "luasrcdiet/lparser.lua",
["luasrcdiet.optlex"] = "luasrcdiet/optlex.lua",
["luasrcdiet.optparser"] = "luasrcdiet/optparser.lua",
["luasrcdiet.plugin.example"] = "luasrcdiet/plugin/example.lua",
["luasrcdiet.plugin.html"] = "luasrcdiet/plugin/html.lua",
["luasrcdiet.plugin.sloc"] = "luasrcdiet/plugin/sloc.lua",
["luasrcdiet.utils"] = "luasrcdiet/utils.lua"
}
}
}
},
markdown = {
["0.32-2"] = {
{
arch = "installed",
commands = {},
dependencies = {},
modules = {
markdown = "markdown.lua"
}
}
}
},
["metalua-compiler"] = {
["0.7.3-1"] = {
{
arch = "installed",
commands = {},
dependencies = {
luafilesystem = "1.6.3-2",
["metalua-parser"] = "0.7.3-2"
},
modules = {
["metalua.compiler.bytecode"] = "metalua/compiler/bytecode.lua",
["metalua.compiler.bytecode.compile"] = "metalua/compiler/bytecode/compile.lua",
["metalua.compiler.bytecode.lcode"] = "metalua/compiler/bytecode/lcode.lua",
["metalua.compiler.bytecode.ldump"] = "metalua/compiler/bytecode/ldump.lua",
["metalua.compiler.bytecode.lopcodes"] = "metalua/compiler/bytecode/lopcodes.lua",
["metalua.compiler.globals"] = "metalua/compiler/globals.lua",
["metalua.loader"] = "metalua/loader.lua",
["metalua/compiler/ast_to_src.mlua"] = "metalua/compiler/ast_to_src.mlua",
["metalua/extension/comprehension.mlua"] = "metalua/extension/comprehension.mlua",
["metalua/extension/match.mlua"] = "metalua/extension/match.mlua",
["metalua/repl.mlua"] = "metalua/repl.mlua",
["metalua/treequery.mlua"] = "metalua/treequery.mlua",
["metalua/treequery/walk.mlua"] = "metalua/treequery/walk.mlua"
}
}
}
},
["metalua-parser"] = {
["0.7.3-2"] = {
{
arch = "installed",
commands = {},
dependencies = {},
modules = {
["metalua.compiler"] = "metalua/compiler.lua",
["metalua.compiler.parser"] = "metalua/compiler/parser.lua",
["metalua.compiler.parser.annot.generator"] = "metalua/compiler/parser/annot/generator.lua",
["metalua.compiler.parser.annot.grammar"] = "metalua/compiler/parser/annot/grammar.lua",
["metalua.compiler.parser.expr"] = "metalua/compiler/parser/expr.lua",
["metalua.compiler.parser.ext"] = "metalua/compiler/parser/ext.lua",
["metalua.compiler.parser.lexer"] = "metalua/compiler/parser/lexer.lua",
["metalua.compiler.parser.meta"] = "metalua/compiler/parser/meta.lua",
["metalua.compiler.parser.misc"] = "metalua/compiler/parser/misc.lua",
["metalua.compiler.parser.stat"] = "metalua/compiler/parser/stat.lua",
["metalua.compiler.parser.table"] = "metalua/compiler/parser/table.lua",
["metalua.grammar.generator"] = "metalua/grammar/generator.lua",
["metalua.grammar.lexer"] = "metalua/grammar/lexer.lua",
["metalua.pprint"] = "metalua/pprint.lua"
}
}
}
},
penlight = {
["0.9.8-1"] = {
{
arch = "installed",
commands = {},
dependencies = {
luafilesystem = "1.6.3-2"
},
modules = {
pl = "pl/init.lua",
["pl.Date"] = "pl/Date.lua",
["pl.List"] = "pl/List.lua",
["pl.Map"] = "pl/Map.lua",
["pl.MultiMap"] = "pl/MultiMap.lua",
["pl.OrderedMap"] = "pl/OrderedMap.lua",
["pl.Set"] = "pl/Set.lua",
["pl.app"] = "pl/app.lua",
["pl.array2d"] = "pl/array2d.lua",
["pl.class"] = "pl/class.lua",
["pl.comprehension"] = "pl/comprehension.lua",
["pl.config"] = "pl/config.lua",
["pl.data"] = "pl/data.lua",
["pl.dir"] = "pl/dir.lua",
["pl.file"] = "pl/file.lua",
["pl.func"] = "pl/func.lua",
["pl.input"] = "pl/input.lua",
["pl.lapp"] = "pl/lapp.lua",
["pl.lexer"] = "pl/lexer.lua",
["pl.luabalanced"] = "pl/luabalanced.lua",
["pl.operator"] = "pl/operator.lua",
["pl.path"] = "pl/path.lua",
["pl.permute"] = "pl/permute.lua",
["pl.platf.luajava"] = "pl/platf/luajava.lua",
["pl.pretty"] = "pl/pretty.lua",
["pl.seq"] = "pl/seq.lua",
["pl.sip"] = "pl/sip.lua",
["pl.strict"] = "pl/strict.lua",
["pl.stringio"] = "pl/stringio.lua",
["pl.stringx"] = "pl/stringx.lua",
["pl.tablex"] = "pl/tablex.lua",
["pl.template"] = "pl/template.lua",
["pl.test"] = "pl/test.lua",
["pl.text"] = "pl/text.lua",
["pl.utils"] = "pl/utils.lua",
["pl.xml"] = "pl/xml.lua"
}
}
}
}
}

View File

@ -0,0 +1,23 @@
package = "Markdown"
version = "0.32-2"
source = {
url = "http://www.frykholm.se/files/markdown-0.32.tar.gz",
dir = "."
}
description = {
summary = "Markdown text-to-html markup system.",
detailed = [[
A pure-lua implementation of the Markdown text-to-html markup system.
]],
license = "MIT",
homepage = "http://www.frykholm.se/files/markdown.lua"
}
dependencies = {
"lua >= 5.1",
}
build = {
type = "none",
install = {
lua = { "markdown.lua" },
}
}

View File

@ -0,0 +1,6 @@
rock_manifest = {
lua = {
["markdown.lua"] = "0ea5f9d6d22a6c9aa4fdf63cf1d7d066"
},
["markdown-0.32-2.rockspec"] = "83f0335058d8fbd078d4f2c1ce941df0"
}

View File

@ -0,0 +1,104 @@
Metalua Compiler
================
## Metalua compiler
This module `metalua-compiler` depends on `metalua-parser`. Its main
feature is to compile ASTs into Lua 5.1 bytecode, allowing to convert
them into bytecode files and executable functions. This opens the
following possibilities:
* compiler objects generated with `require 'metalua.compiler'.new()`
support methods `:xxx_to_function()` and `:xxx_to_bytecode()`;
* Compile-time meta-programming: use of `-{...}` splices in source
code, to generate code during compilation;
* Some syntax extensions, such as structural pattern matching and
lists by comprehension;
* Some AST manipulation facilities such as `treequery`, which are
implemented with Metalua syntax extensions.
## What's new in Metalua 0.7
This is a major overhaul of the compiler's architecture. Some of the
most noteworthy changes are:
* No more installation or bootstrap script. Some Metalua source files
have been rewritten in plain Lua, and module sources have been
refactored, so that if you just drop the `metalua` folder somewhere
in your `LUA_PATH`, it works.
* The compiler can be cut in two parts:
* a parser which generates ASTs out of Lua sources, and should be
either portable or easily ported to Lua 5.2;
* a compiler, which can turn sources and AST into executable
Lua 5.1 bytecode and run it. It also supports compile-time
meta-programming, i.e. code included between `-{ ... }` is
executed during compilation, and the ASTs it produces are
included in the resulting bytecode.
* Both parts are packaged as separate LuaRocks, `metalua-parser` and
`metalua-compiler` respectively, so that you can install the former
without the latter.
* The parser is not a unique object anymore. Instead,
`require "metalua.compiler".new()` returns a different compiler
instance every time it's called. Compiler instances can be reused on
as many source files as wanted, but extending one instance's grammar
doesn't affect other compiler instances.
* Included standard library has been shed. There are too many standard
libs in Lua, and none of them is standard enough, offering
yet-another-one, coupled with a specific compiler can only add to
confusion.
* Many syntax extensions, which either were arguably more code samples
than actual production-ready tools, or relied too heavily on the
removed runtime standard libraries, have been removed.
* The remaining libraries and samples are:
* `metalua.compiler` converts sources into ASTs, bytecode,
functions, and ASTs back into sources.
* `metalua` compiles and/or executes files from the command line,
can start an interactive REPL session.
* `metalua.loader` adds a package loader which allows to use modules
written in Metalua, even from a plain Lua program.
* `metalua.treequery` is an advanced DSL allowing to search ASTs in
a smart way, e.g. "_search `return` statements which return a
`local` variable but aren't in a nested `function`_".
* `metalua.extension.comprehension` is a language extension which
supports lists by comprehension
(`even = { i for i=1, 100 if i%2==0 }`) and improved loops
(`for i=1, 10 for j=1,10 if i~=j do print(i,j) end`).
* `metalua.extension.match` is a language extension which offers
Haskell/ML structural pattern matching
(``match AST with `Function{ args, body } -> ... | `Number{ 0 } -> ...end``)
* **TODO Move basic extensions in a separate module.**
* To remove the compilation speed penalty associated with
metaprogramming, when environment variable `LUA_MCACHE` or Lua
variable `package.mcache` is defined and LuaFileSystem is available,
the results of Metalua source compilations is cached. Unless the
source file is more recent than the latest cached bytecode file, the
latter is loaded instead of the former.
* The Luarock install for the full compiler lists dependencies towards
Readline, LuaFileSytem, and Alt-Getopts. Those projects are
optional, but having them automatically installed by LuaRocks offers
a better user experience.
* The license has changed from MIT to double license MIT + EPL. This
has been done in order to provide the IP guarantees expected by the
Eclipse Foundation, to include Metalua in Eclipse's
[Lua Development Tools](http://www.eclipse.org/koneki/ldt/).

View File

@ -0,0 +1,177 @@
Metalua Parser
==============
`metalua-parser` is a subset of the Metalua compiler, which turns
valid Lua source files and strings into abstract syntax trees
(AST). This README includes a description of this AST format. People
interested by Lua code analysis and generation are encouraged to
produce and/or consume this format to represent ASTs.
It has been designed for Lua 5.1. It hasn't been tested against
Lua 5.2, but should be easily ported.
## Usage
Module `metalua.compiler` has a `new()` function, which returns a
compiler instance. This instance has a set of methods of the form
`:xxx_to_yyy(input)`, where `xxx` and `yyy` must be one of the
following:
* `srcfile` the name of a Lua source file;
* `src` a string containing the Lua sources of a list of statements;
* `lexstream` a lexical tokens stream;
* `ast` an abstract syntax tree;
* `bytecode` a chunk of Lua bytecode that can be loaded in a Lua 5.1
VM (not available if you only installed the parser);
* `function` an executable Lua function.
Compiling into bytecode or executable functions requires the whole
Metalua compiler, not only the parser. The most frequently used
functions are `:src_to_ast(source_string)` and
`:srcfile_to_ast("path/to/source/file.lua")`.
mlc = require 'metalua.compiler'.new()
ast = mlc :src_to_ast[[ return 123 ]]
A compiler instance can be reused as much as you want; it's only
interesting to work with more than one compiler instance when you
start extending their grammars.
## Abstract Syntax Trees definition
### Notation
Trees are written below with some Metalua syntax sugar, which
increases their readability. the backquote symbol introduces a `tag`,
i.e. a string stored in the `"tag"` field of a table:
* `` `Foo{ 1, 2, 3 }`` is a shortcut for `{tag="Foo", 1, 2, 3}`;
* `` `Foo`` is a shortcut for `{tag="Foo"}`;
* `` `Foo 123`` is a shortcut for `` `Foo{ 123 }``, and therefore
`{tag="Foo", 123 }`; the expression after the tag must be a literal
number or string.
When using a Metalua interpreter or compiler, the backtick syntax is
supported and can be used directly. Metalua's pretty-printing helpers
also try to use backtick syntax whenever applicable.
### Tree elements
Tree elements are mainly categorized into statements `stat`,
expressions `expr` and lists of statements `block`. Auxiliary
definitions include function applications/method invocation `apply`,
are both valid statements and expressions, expressions admissible on
the left-hand-side of an assignment statement `lhs`.
block: { stat* }
stat:
`Do{ stat* }
| `Set{ {lhs+} {expr+} } -- lhs1, lhs2... = e1, e2...
| `While{ expr block } -- while e do b end
| `Repeat{ block expr } -- repeat b until e
| `If{ (expr block)+ block? } -- if e1 then b1 [elseif e2 then b2] ... [else bn] end
| `Fornum{ ident expr expr expr? block } -- for ident = e, e[, e] do b end
| `Forin{ {ident+} {expr+} block } -- for i1, i2... in e1, e2... do b end
| `Local{ {ident+} {expr+}? } -- local i1, i2... = e1, e2...
| `Localrec{ ident expr } -- only used for 'local function'
| `Goto{ <string> } -- goto str
| `Label{ <string> } -- ::str::
| `Return{ <expr*> } -- return e1, e2...
| `Break -- break
| apply
expr:
`Nil | `Dots | `True | `False
| `Number{ <number> }
| `String{ <string> }
| `Function{ { ident* `Dots? } block }
| `Table{ ( `Pair{ expr expr } | expr )* }
| `Op{ opid expr expr? }
| `Paren{ expr } -- significant to cut multiple values returns
| apply
| lhs
apply:
`Call{ expr expr* }
| `Invoke{ expr `String{ <string> } expr* }
ident: `Id{ <string> }
lhs: ident | `Index{ expr expr }
opid: 'add' | 'sub' | 'mul' | 'div'
| 'mod' | 'pow' | 'concat'| 'eq'
| 'lt' | 'le' | 'and' | 'or'
| 'not' | 'len'
### Meta-data (lineinfo)
ASTs also embed some metadata, allowing to map them to their source
representation. Those informations are stored in a `"lineinfo"` field
in each tree node, which points to the range of characters in the
source string which represents it, and to the content of any comment
that would appear immediately before or after that node.
Lineinfo objects have two fields, `"first"` and `"last"`, describing
respectively the beginning and the end of the subtree in the
sources. For instance, the sub-node ``Number{123}` produced by parsing
`[[return 123]]` will have `lineinfo.first` describing offset 8, and
`lineinfo.last` describing offset 10:
> mlc = require 'metalua.compiler'.new()
> ast = mlc :src_to_ast "return 123 -- comment"
> print(ast[1][1].lineinfo)
<?|L1|C8-10|K8-10|C>
>
A lineinfo keeps track of character offsets relative to the beginning
of the source string/file ("K8-10" above), line numbers (L1 above; a
lineinfo spanning on several lines would read something like "L1-10"),
columns i.e. offset within the line ("C8-10" above), and a filename if
available (the "?" mark above indicating that we have no file name, as
the AST comes from a string). The final "|C>" indicates that there's a
comment immediately after the node; an initial "<C|" would have meant
that there was a comment immediately before the node.
Positions represent either the end of a token and the beginning of an
inter-token space (`"last"` fields) or the beginning of a token, and
the end of an inter-token space (`"first"` fields). Inter-token spaces
might be empty. They can also contain comments, which might be useful
to link with surrounding tokens and AST subtrees.
Positions are chained with their "dual" one: a position at the
beginning of and inter-token space keeps a refernce to the position at
the end of that inter-token space in its `"facing"` field, and
conversly, end-of-inter-token positions keep track of the inter-token
space beginning, also in `"facing"`. An inter-token space can be
empty, e.g. in `"2+2"`, in which case `lineinfo==lineinfo.facing`.
Comments are also kept in the `"comments"` field. If present, this
field contains a list of comments, with a `"lineinfo"` field
describing the span between the first and last comment. Each comment
is represented by a list of one string, with a `"lineinfo"` describing
the span of this comment only. Consecutive lines of `--` comments are
considered as one comment: `"-- foo\n-- bar\n"` parses as one comment
whose text is `"foo\nbar"`, whereas `"-- foo\n\n-- bar\n"` parses as
two comments `"foo"` and `"bar"`.
So for instance, if `f` is the AST of a function and I want to
retrieve the comment before the function, I'd do:
f_comment = f.lineinfo.first.comments[1][1]
The informations in lineinfo positions, i.e. in each `"first"` and
`"last"` field, are held in the following fields:
* `"source"` the filename (optional);
* `"offset"` the 1-based offset relative to the beginning of the string/file;
* `"line"` the 1-based line number;
* `"column"` the 1-based offset within the line;
* `"facing"` the position at the opposite end of the inter-token space.
* `"comments"` the comments in the associated inter-token space (optional).
* `"id"` an arbitrary number, which uniquely identifies an inter-token
space within a given tokens stream.

View File

@ -0,0 +1,13 @@
Metalua
=======
Metalua is a Lua code analysis tool, as well as a compiler for a
superset of Lua 5.1 supporting Compile-Time Meta-Programming. It's
separated into two LuaRocks, `metalua-parser` and
`metalua-compiler`. The documentation of each rock can be found in
`README-parser.md` and `README-compiler.md`.
All the code in Metalue is released under dual lincenses:
* MIT public license (same as Lua);
* EPL public license (same as Eclipse).

View File

@ -0,0 +1,47 @@
--*-lua-*--
package = "metalua-compiler"
version = "0.7.3-1"
source = {
url = "http://git.eclipse.org/c/koneki/org.eclipse.koneki.metalua.git/snapshot/org.eclipse.koneki.metalua-v0.7.3.tar.gz"
}
description = {
summary = "Metalua's compiler: converting (Meta)lua source strings and files into executable Lua 5.1 bytecode",
detailed = [[
This is the Metalua copmiler, packaged as a rock, depending
on the spearate metalua-parser AST generating library. It
compiles a superset of Lua 5.1 into bytecode, which can
then be loaded and executed by a Lua 5.1 VM. It also allows
to dump ASTs back into Lua source files.
]],
homepage = "http://git.eclipse.org/c/koneki/org.eclipse.koneki.metalua.git",
license = "EPL + MIT"
}
dependencies = {
"lua ~> 5.1", -- Lua 5.2 bytecode not supported
"luafilesystem ~> 1.6", -- Cached compilation based on file timestamps
"metalua-parser >= 0.7.3", -- AST production
}
build = {
type="builtin",
modules={
["metalua.compiler.bytecode"] = "metalua/compiler/bytecode.lua",
["metalua.compiler.globals"] = "metalua/compiler/globals.lua",
["metalua.compiler.bytecode.compile"] = "metalua/compiler/bytecode/compile.lua",
["metalua.compiler.bytecode.lcode"] = "metalua/compiler/bytecode/lcode.lua",
["metalua.compiler.bytecode.lopcodes"] = "metalua/compiler/bytecode/lopcodes.lua",
["metalua.compiler.bytecode.ldump"] = "metalua/compiler/bytecode/ldump.lua",
["metalua.loader"] = "metalua/loader.lua",
},
install={
lua={
["metalua.treequery"] = "metalua/treequery.mlua",
["metalua.compiler.ast_to_src"] = "metalua/compiler/ast_to_src.mlua",
["metalua.treequery.walk"] = "metalua/treequery/walk.mlua",
["metalua.extension.match"] = "metalua/extension/match.mlua",
["metalua.extension.comprehension"] = "metalua/extension/comprehension.mlua",
["metalua.repl"] = "metalua/repl.mlua",
}
}
}

View File

@ -0,0 +1,33 @@
rock_manifest = {
doc = {
["README-compiler.md"] = "292523d759247d210d32fb2f6153e0f4",
["README-parser.md"] = "b44e3673d96dd296f2c0e92a6c87ee18",
["README.md"] = "20bfb490cddef9e101e44688791abcda"
},
lua = {
metalua = {
compiler = {
["ast_to_src.mlua"] = "1309f76df37585ef8e1f67f748b07b22",
bytecode = {
["compile.lua"] = "430e4a6fac8b64b5ebb3ae585ebae75a",
["lcode.lua"] = "3ad8755ebe8ea8eca6b1d2846eec92c4",
["ldump.lua"] = "295e1d9657fb0126ce3471b3366da694",
["lopcodes.lua"] = "a0f15cfc93b026b0a868466d066f1d21"
},
["bytecode.lua"] = "1032e5233455fd4e504daf5d2893527b",
["globals.lua"] = "80ae19c6e640de0746348c91633c4c55"
},
extension = {
["comprehension.mlua"] = "426f5856896bda4c3763bd5f61410685",
["match.mlua"] = "79960265331e8b2f46199c2411a103de"
},
["loader.lua"] = "1cdbf6cdf6ca97c55540d068474f1d8a",
["repl.mlua"] = "729456f3a8cc073788acee564a0495f0",
treequery = {
["walk.mlua"] = "5159aaddbec55936f91ea4236f6451d3"
},
["treequery.mlua"] = "97ffcee0825ac3bc776d01566767b2e8"
}
},
["metalua-compiler-0.7.3-1.rockspec"] = "b3883b25641d862db6828300bb755d51"
}

View File

@ -0,0 +1,104 @@
Metalua Compiler
================
## Metalua compiler
This module `metalua-compiler` depends on `metalua-parser`. Its main
feature is to compile ASTs into Lua 5.1 bytecode, allowing to convert
them into bytecode files and executable functions. This opens the
following possibilities:
* compiler objects generated with `require 'metalua.compiler'.new()`
support methods `:xxx_to_function()` and `:xxx_to_bytecode()`;
* Compile-time meta-programming: use of `-{...}` splices in source
code, to generate code during compilation;
* Some syntax extensions, such as structural pattern matching and
lists by comprehension;
* Some AST manipulation facilities such as `treequery`, which are
implemented with Metalua syntax extensions.
## What's new in Metalua 0.7
This is a major overhaul of the compiler's architecture. Some of the
most noteworthy changes are:
* No more installation or bootstrap script. Some Metalua source files
have been rewritten in plain Lua, and module sources have been
refactored, so that if you just drop the `metalua` folder somewhere
in your `LUA_PATH`, it works.
* The compiler can be cut in two parts:
* a parser which generates ASTs out of Lua sources, and should be
either portable or easily ported to Lua 5.2;
* a compiler, which can turn sources and AST into executable
Lua 5.1 bytecode and run it. It also supports compile-time
meta-programming, i.e. code included between `-{ ... }` is
executed during compilation, and the ASTs it produces are
included in the resulting bytecode.
* Both parts are packaged as separate LuaRocks, `metalua-parser` and
`metalua-compiler` respectively, so that you can install the former
without the latter.
* The parser is not a unique object anymore. Instead,
`require "metalua.compiler".new()` returns a different compiler
instance every time it's called. Compiler instances can be reused on
as many source files as wanted, but extending one instance's grammar
doesn't affect other compiler instances.
* Included standard library has been shed. There are too many standard
libs in Lua, and none of them is standard enough, offering
yet-another-one, coupled with a specific compiler can only add to
confusion.
* Many syntax extensions, which either were arguably more code samples
than actual production-ready tools, or relied too heavily on the
removed runtime standard libraries, have been removed.
* The remaining libraries and samples are:
* `metalua.compiler` converts sources into ASTs, bytecode,
functions, and ASTs back into sources.
* `metalua` compiles and/or executes files from the command line,
can start an interactive REPL session.
* `metalua.loader` adds a package loader which allows to use modules
written in Metalua, even from a plain Lua program.
* `metalua.treequery` is an advanced DSL allowing to search ASTs in
a smart way, e.g. "_search `return` statements which return a
`local` variable but aren't in a nested `function`_".
* `metalua.extension.comprehension` is a language extension which
supports lists by comprehension
(`even = { i for i=1, 100 if i%2==0 }`) and improved loops
(`for i=1, 10 for j=1,10 if i~=j do print(i,j) end`).
* `metalua.extension.match` is a language extension which offers
Haskell/ML structural pattern matching
(``match AST with `Function{ args, body } -> ... | `Number{ 0 } -> ...end``)
* **TODO Move basic extensions in a separate module.**
* To remove the compilation speed penalty associated with
metaprogramming, when environment variable `LUA_MCACHE` or Lua
variable `package.mcache` is defined and LuaFileSystem is available,
the results of Metalua source compilations is cached. Unless the
source file is more recent than the latest cached bytecode file, the
latter is loaded instead of the former.
* The Luarock install for the full compiler lists dependencies towards
Readline, LuaFileSytem, and Alt-Getopts. Those projects are
optional, but having them automatically installed by LuaRocks offers
a better user experience.
* The license has changed from MIT to double license MIT + EPL. This
has been done in order to provide the IP guarantees expected by the
Eclipse Foundation, to include Metalua in Eclipse's
[Lua Development Tools](http://www.eclipse.org/koneki/ldt/).

View File

@ -0,0 +1,177 @@
Metalua Parser
==============
`metalua-parser` is a subset of the Metalua compiler, which turns
valid Lua source files and strings into abstract syntax trees
(AST). This README includes a description of this AST format. People
interested by Lua code analysis and generation are encouraged to
produce and/or consume this format to represent ASTs.
It has been designed for Lua 5.1. It hasn't been tested against
Lua 5.2, but should be easily ported.
## Usage
Module `metalua.compiler` has a `new()` function, which returns a
compiler instance. This instance has a set of methods of the form
`:xxx_to_yyy(input)`, where `xxx` and `yyy` must be one of the
following:
* `srcfile` the name of a Lua source file;
* `src` a string containing the Lua sources of a list of statements;
* `lexstream` a lexical tokens stream;
* `ast` an abstract syntax tree;
* `bytecode` a chunk of Lua bytecode that can be loaded in a Lua 5.1
VM (not available if you only installed the parser);
* `function` an executable Lua function.
Compiling into bytecode or executable functions requires the whole
Metalua compiler, not only the parser. The most frequently used
functions are `:src_to_ast(source_string)` and
`:srcfile_to_ast("path/to/source/file.lua")`.
mlc = require 'metalua.compiler'.new()
ast = mlc :src_to_ast[[ return 123 ]]
A compiler instance can be reused as much as you want; it's only
interesting to work with more than one compiler instance when you
start extending their grammars.
## Abstract Syntax Trees definition
### Notation
Trees are written below with some Metalua syntax sugar, which
increases their readability. the backquote symbol introduces a `tag`,
i.e. a string stored in the `"tag"` field of a table:
* `` `Foo{ 1, 2, 3 }`` is a shortcut for `{tag="Foo", 1, 2, 3}`;
* `` `Foo`` is a shortcut for `{tag="Foo"}`;
* `` `Foo 123`` is a shortcut for `` `Foo{ 123 }``, and therefore
`{tag="Foo", 123 }`; the expression after the tag must be a literal
number or string.
When using a Metalua interpreter or compiler, the backtick syntax is
supported and can be used directly. Metalua's pretty-printing helpers
also try to use backtick syntax whenever applicable.
### Tree elements
Tree elements are mainly categorized into statements `stat`,
expressions `expr` and lists of statements `block`. Auxiliary
definitions include function applications/method invocation `apply`,
are both valid statements and expressions, expressions admissible on
the left-hand-side of an assignment statement `lhs`.
block: { stat* }
stat:
`Do{ stat* }
| `Set{ {lhs+} {expr+} } -- lhs1, lhs2... = e1, e2...
| `While{ expr block } -- while e do b end
| `Repeat{ block expr } -- repeat b until e
| `If{ (expr block)+ block? } -- if e1 then b1 [elseif e2 then b2] ... [else bn] end
| `Fornum{ ident expr expr expr? block } -- for ident = e, e[, e] do b end
| `Forin{ {ident+} {expr+} block } -- for i1, i2... in e1, e2... do b end
| `Local{ {ident+} {expr+}? } -- local i1, i2... = e1, e2...
| `Localrec{ ident expr } -- only used for 'local function'
| `Goto{ <string> } -- goto str
| `Label{ <string> } -- ::str::
| `Return{ <expr*> } -- return e1, e2...
| `Break -- break
| apply
expr:
`Nil | `Dots | `True | `False
| `Number{ <number> }
| `String{ <string> }
| `Function{ { ident* `Dots? } block }
| `Table{ ( `Pair{ expr expr } | expr )* }
| `Op{ opid expr expr? }
| `Paren{ expr } -- significant to cut multiple values returns
| apply
| lhs
apply:
`Call{ expr expr* }
| `Invoke{ expr `String{ <string> } expr* }
ident: `Id{ <string> }
lhs: ident | `Index{ expr expr }
opid: 'add' | 'sub' | 'mul' | 'div'
| 'mod' | 'pow' | 'concat'| 'eq'
| 'lt' | 'le' | 'and' | 'or'
| 'not' | 'len'
### Meta-data (lineinfo)
ASTs also embed some metadata, allowing to map them to their source
representation. Those informations are stored in a `"lineinfo"` field
in each tree node, which points to the range of characters in the
source string which represents it, and to the content of any comment
that would appear immediately before or after that node.
Lineinfo objects have two fields, `"first"` and `"last"`, describing
respectively the beginning and the end of the subtree in the
sources. For instance, the sub-node ``Number{123}` produced by parsing
`[[return 123]]` will have `lineinfo.first` describing offset 8, and
`lineinfo.last` describing offset 10:
> mlc = require 'metalua.compiler'.new()
> ast = mlc :src_to_ast "return 123 -- comment"
> print(ast[1][1].lineinfo)
<?|L1|C8-10|K8-10|C>
>
A lineinfo keeps track of character offsets relative to the beginning
of the source string/file ("K8-10" above), line numbers (L1 above; a
lineinfo spanning on several lines would read something like "L1-10"),
columns i.e. offset within the line ("C8-10" above), and a filename if
available (the "?" mark above indicating that we have no file name, as
the AST comes from a string). The final "|C>" indicates that there's a
comment immediately after the node; an initial "<C|" would have meant
that there was a comment immediately before the node.
Positions represent either the end of a token and the beginning of an
inter-token space (`"last"` fields) or the beginning of a token, and
the end of an inter-token space (`"first"` fields). Inter-token spaces
might be empty. They can also contain comments, which might be useful
to link with surrounding tokens and AST subtrees.
Positions are chained with their "dual" one: a position at the
beginning of and inter-token space keeps a refernce to the position at
the end of that inter-token space in its `"facing"` field, and
conversly, end-of-inter-token positions keep track of the inter-token
space beginning, also in `"facing"`. An inter-token space can be
empty, e.g. in `"2+2"`, in which case `lineinfo==lineinfo.facing`.
Comments are also kept in the `"comments"` field. If present, this
field contains a list of comments, with a `"lineinfo"` field
describing the span between the first and last comment. Each comment
is represented by a list of one string, with a `"lineinfo"` describing
the span of this comment only. Consecutive lines of `--` comments are
considered as one comment: `"-- foo\n-- bar\n"` parses as one comment
whose text is `"foo\nbar"`, whereas `"-- foo\n\n-- bar\n"` parses as
two comments `"foo"` and `"bar"`.
So for instance, if `f` is the AST of a function and I want to
retrieve the comment before the function, I'd do:
f_comment = f.lineinfo.first.comments[1][1]
The informations in lineinfo positions, i.e. in each `"first"` and
`"last"` field, are held in the following fields:
* `"source"` the filename (optional);
* `"offset"` the 1-based offset relative to the beginning of the string/file;
* `"line"` the 1-based line number;
* `"column"` the 1-based offset within the line;
* `"facing"` the position at the opposite end of the inter-token space.
* `"comments"` the comments in the associated inter-token space (optional).
* `"id"` an arbitrary number, which uniquely identifies an inter-token
space within a given tokens stream.

View File

@ -0,0 +1,13 @@
Metalua
=======
Metalua is a Lua code analysis tool, as well as a compiler for a
superset of Lua 5.1 supporting Compile-Time Meta-Programming. It's
separated into two LuaRocks, `metalua-parser` and
`metalua-compiler`. The documentation of each rock can be found in
`README-parser.md` and `README-compiler.md`.
All the code in Metalue is released under dual lincenses:
* MIT public license (same as Lua);
* EPL public license (same as Eclipse).

View File

@ -0,0 +1,38 @@
--*-lua-*--
package = "metalua-parser"
version = "0.7.3-2"
source = {
url = "http://git.eclipse.org/c/koneki/org.eclipse.koneki.metalua.git/snapshot/org.eclipse.koneki.metalua-v0.7.3.tar.gz"
}
description = {
summary = "Metalua's parser: converting Lua source strings and files into AST",
detailed = [[
This is a subset of the full Metalua compiler. It defines and generates an AST
format for Lua programs, which offers a nice level of abstraction to reason about
and manipulate Lua programs.
]],
homepage = "http://git.eclipse.org/c/koneki/org.eclipse.koneki.metalua.git",
license = "EPL + MIT"
}
dependencies = {
"lua >= 5.1"
}
build = {
type="builtin",
modules={
["metalua.grammar.generator"] = "metalua/grammar/generator.lua",
["metalua.grammar.lexer"] = "metalua/grammar/lexer.lua",
["metalua.compiler.parser"] = "metalua/compiler/parser.lua",
["metalua.compiler.parser.table"] = "metalua/compiler/parser/table.lua",
["metalua.compiler.parser.ext"] = "metalua/compiler/parser/ext.lua",
["metalua.compiler.parser.annot.generator"] = "metalua/compiler/parser/annot/generator.lua",
["metalua.compiler.parser.annot.grammar"] = "metalua/compiler/parser/annot/grammar.lua",
["metalua.compiler.parser.stat"] = "metalua/compiler/parser/stat.lua",
["metalua.compiler.parser.misc"] = "metalua/compiler/parser/misc.lua",
["metalua.compiler.parser.lexer"] = "metalua/compiler/parser/lexer.lua",
["metalua.compiler.parser.meta"] = "metalua/compiler/parser/meta.lua",
["metalua.compiler.parser.expr"] = "metalua/compiler/parser/expr.lua",
["metalua.compiler"] = "metalua/compiler.lua",
["metalua.pprint"] = "metalua/pprint.lua",
}
}

View File

@ -0,0 +1,34 @@
rock_manifest = {
doc = {
["README-compiler.md"] = "292523d759247d210d32fb2f6153e0f4",
["README-parser.md"] = "b44e3673d96dd296f2c0e92a6c87ee18",
["README.md"] = "20bfb490cddef9e101e44688791abcda"
},
lua = {
metalua = {
compiler = {
parser = {
annot = {
["generator.lua"] = "d86f7507d66ba6a3692a6f8611e9939b",
["grammar.lua"] = "7d195bde7992efd9923771751b67b18f"
},
["expr.lua"] = "3a0b1984a6f92280e2e63b074fdcec10",
["ext.lua"] = "a99e31a07bc390b826f6653bcc47d89b",
["lexer.lua"] = "eac0f9d475d9dae4ea5a2724014cebec",
["meta.lua"] = "12870bceda6395695020b739196e2a92",
["misc.lua"] = "49d59f4fc1bfb77b36f78d4f87ae258f",
["stat.lua"] = "83f10ac899be12ca4df58bbe8645299f",
["table.lua"] = "5d2389e89603b7f78c731e6918aa1a9b"
},
["parser.lua"] = "e6ae68ce200de8071bb0fefad97f9b79"
},
["compiler.lua"] = "ca65ee9a3053581f4315821a31d0c1fd",
grammar = {
["generator.lua"] = "b8a29e817d6798c12f40a230a0f6d0af",
["lexer.lua"] = "7cb7c835479a9be884130eaacb9be60a"
},
["pprint.lua"] = "0b9bd8757b45c2d4be30106abcbd45b2"
}
},
["metalua-parser-0.7.3-2.rockspec"] = "a56680900b0b51701db7cd7abf49af92"
}

View File

@ -0,0 +1,66 @@
package = "penlight"
version = "0.9.8-1"
source = {
dir = "penlight-0.9.8",
url = "http://stevedonovan.github.com/files/penlight-0.9.8-core.zip",
}
description = {
summary = "Lua utility libraries loosely based on the Python standard libraries",
homepage = "http://stevedonovan.github.com/Penlight",
license = "MIT/X11",
maintainer = "steve.j.donovan@gmail.com",
detailed = [[
Penlight is a set of pure Lua libraries for making it easier to work with common tasks like
iterating over directories, reading configuration files and the like. Provides functional operations
on tables and sequences.
]]
}
dependencies = {
"luafilesystem",
}
build = {
type = "builtin",
modules = {
["pl.strict"] = "lua/pl/strict.lua",
["pl.dir"] = "lua/pl/dir.lua",
["pl.operator"] = "lua/pl/operator.lua",
["pl.input"] = "lua/pl/input.lua",
["pl.config"] = "lua/pl/config.lua",
["pl.seq"] = "lua/pl/seq.lua",
["pl.stringio"] = "lua/pl/stringio.lua",
["pl.text"] = "lua/pl/text.lua",
["pl.test"] = "lua/pl/test.lua",
["pl.tablex"] = "lua/pl/tablex.lua",
["pl.app"] = "lua/pl/app.lua",
["pl.stringx"] = "lua/pl/stringx.lua",
["pl.lexer"] = "lua/pl/lexer.lua",
["pl.utils"] = "lua/pl/utils.lua",
["pl.sip"] = "lua/pl/sip.lua",
["pl.permute"] = "lua/pl/permute.lua",
["pl.pretty"] = "lua/pl/pretty.lua",
["pl.class"] = "lua/pl/class.lua",
["pl.List"] = "lua/pl/List.lua",
["pl.data"] = "lua/pl/data.lua",
["pl.Date"] = "lua/pl/Date.lua",
["pl"] = "lua/pl/init.lua",
["pl.luabalanced"] = "lua/pl/luabalanced.lua",
["pl.comprehension"] = "lua/pl/comprehension.lua",
["pl.path"] = "lua/pl/path.lua",
["pl.array2d"] = "lua/pl/array2d.lua",
["pl.func"] = "lua/pl/func.lua",
["pl.lapp"] = "lua/pl/lapp.lua",
["pl.file"] = "lua/pl/file.lua",
['pl.template'] = "lua/pl/template.lua",
["pl.Map"] = "lua/pl/Map.lua",
["pl.MultiMap"] = "lua/pl/MultiMap.lua",
["pl.OrderedMap"] = "lua/pl/OrderedMap.lua",
["pl.Set"] = "lua/pl/Set.lua",
["pl.xml"] = "lua/pl/xml.lua",
["pl.platf.luajava"] = "lua/pl/platf/luajava.lua"
},
}

View File

@ -0,0 +1,45 @@
rock_manifest = {
lua = {
pl = {
["Date.lua"] = "d2131d59151ce978c4db6a648fcd275a",
["List.lua"] = "1236c5eb08956619daacd25a462a9682",
["Map.lua"] = "0297a536ac0595ac59e8828f8c867f53",
["MultiMap.lua"] = "e5f898fe2443e51c38825e9bc3d1aee5",
["OrderedMap.lua"] = "bd8e39c59e22c582a33e2f025d3ae914",
["Set.lua"] = "346ff7392fd4aeda418fb832e8da7a7f",
["app.lua"] = "23ffb79e69a3fd679013cf82d95ed792",
["array2d.lua"] = "77618ec2e2de4d6d237484dfd742cd73",
["class.lua"] = "6f58bf39e7f90711b6840ad6955d258e",
["comprehension.lua"] = "f8600ba945dde5d959194500a687c69f",
["config.lua"] = "9ea3ce0ac3cdf2ce0e17f1353f32abb6",
["data.lua"] = "be446ff813b5bcf30b4063601165df6a",
["dir.lua"] = "3d60d4c1caeaabe199fe361e4e9b14a4",
["file.lua"] = "f5c9527ea14b511d2cb9af80b219c562",
["func.lua"] = "cc50d73512b6d0518f6587b82844de8c",
["init.lua"] = "9232be7d8790d4f907972a00dec7949d",
["input.lua"] = "bab7c64ca9a740df5e9fb9909610bbc4",
["lapp.lua"] = "1cc81f048bc3fcd775c40cd9a2d601a7",
["lexer.lua"] = "da0db5e323a2d37545ccb02592d0d3c8",
["luabalanced.lua"] = "00b94a997a9ea4d73f54c10893f3b35f",
["operator.lua"] = "e606629c738966cf497bb938457adebd",
["path.lua"] = "b0714bc337c068b7252f64250fe59604",
["permute.lua"] = "b0ed9ba2787119ef99468329a54ea16a",
platf = {
["luajava.lua"] = "9c2898667281ad9501cc05a8e31a6f53"
},
["pretty.lua"] = "3ece64317ce05916eaba91fa96d9e7c0",
["seq.lua"] = "e99e420345ab11120a7b741d8184920a",
["sip.lua"] = "bde74f65e7246017d3ef034d178100ea",
["strict.lua"] = "720e939931dbbe42fad8fd4e7736435e",
["stringio.lua"] = "a8f4c786ea1b62f16ed05e6b09840044",
["stringx.lua"] = "43f57755969c6b4001316226506a3744",
["tablex.lua"] = "dec027cc3a3901766bd933c5fc0f3e93",
["template.lua"] = "f358175bbb84c401c6213c953ce295a4",
["test.lua"] = "1c45f7b1c438673f1eb668e2ca592f1c",
["text.lua"] = "c30f90cab2d00186a6432e408ba1fe14",
["utils.lua"] = "68cd38638a29b4ab5f1cc0eae38dce77",
["xml.lua"] = "e13ed468c450fccb9a8e858a0f787eef"
}
},
["penlight-0.9.8-1.rockspec"] = "96edac3ff1d0ac57cb45d6551a56a775"
}

View File

@ -0,0 +1,653 @@
#!/usr/bin/env lua
---------
-- LuaSrcDiet
--
-- Compresses Lua source code by removing unnecessary characters.
-- For Lua 5.1+ source code.
--
-- **Notes:**
--
-- * Remember to update version and date information below (MSG_TITLE).
-- * TODO: passing data tables around is a horrific mess.
-- * TODO: to implement pcall() to properly handle lexer etc. errors.
-- * TODO: need some automatic testing for a semblance of sanity.
-- * TODO: the plugin module is highly experimental and unstable.
----
local equiv = require "luasrcdiet.equiv"
local fs = require "luasrcdiet.fs"
local llex = require "luasrcdiet.llex"
local lparser = require "luasrcdiet.lparser"
local luasrcdiet = require "luasrcdiet.init"
local optlex = require "luasrcdiet.optlex"
local optparser = require "luasrcdiet.optparser"
local byte = string.byte
local concat = table.concat
local find = string.find
local fmt = string.format
local gmatch = string.gmatch
local match = string.match
local print = print
local rep = string.rep
local sub = string.sub
local plugin
local LUA_VERSION = match(_VERSION, " (5%.[123])$") or "5.1"
-- Is --opt-binequiv available for this Lua version?
local BIN_EQUIV_AVAIL = LUA_VERSION == "5.1" and not package.loaded.jit
---------------------- Messages and textual data ----------------------
local MSG_TITLE = fmt([[
LuaSrcDiet: Puts your Lua 5.1+ source code on a diet
Version %s <%s>
]], luasrcdiet._VERSION, luasrcdiet._HOMEPAGE)
local MSG_USAGE = [[
usage: luasrcdiet [options] [filenames]
example:
>luasrcdiet myscript.lua -o myscript_.lua
options:
-v, --version prints version information
-h, --help prints usage information
-o <file> specify file name to write output
-s <suffix> suffix for output files (default '_')
--keep <msg> keep block comment with <msg> inside
--plugin <module> run <module> in plugin/ directory
- stop handling arguments
(optimization levels)
--none all optimizations off (normalizes EOLs only)
--basic lexer-based optimizations only
--maximum maximize reduction of source
(informational)
--quiet process files quietly
--read-only read file and print token stats only
--dump-lexer dump raw tokens from lexer to stdout
--dump-parser dump variable tracking tables from parser
--details extra info (strings, numbers, locals)
features (to disable, insert 'no' prefix like --noopt-comments):
%s
default settings:
%s]]
-- Optimization options, for ease of switching on and off.
--
-- * Positive to enable optimization, negative (no) to disable.
-- * These options should follow --opt-* and --noopt-* style for now.
local OPTION = [[
--opt-comments,'remove comments and block comments'
--opt-whitespace,'remove whitespace excluding EOLs'
--opt-emptylines,'remove empty lines'
--opt-eols,'all above, plus remove unnecessary EOLs'
--opt-strings,'optimize strings and long strings'
--opt-numbers,'optimize numbers'
--opt-locals,'optimize local variable names'
--opt-entropy,'tries to reduce symbol entropy of locals'
--opt-srcequiv,'insist on source (lexer stream) equivalence'
--opt-binequiv,'insist on binary chunk equivalence (only for PUC Lua 5.1)'
--opt-experimental,'apply experimental optimizations'
]]
-- Preset configuration.
local DEFAULT_CONFIG = [[
--opt-comments --opt-whitespace --opt-emptylines
--opt-numbers --opt-locals
--opt-srcequiv --noopt-binequiv
]]
-- Override configurations: MUST explicitly enable/disable everything.
local BASIC_CONFIG = [[
--opt-comments --opt-whitespace --opt-emptylines
--noopt-eols --noopt-strings --noopt-numbers
--noopt-locals --noopt-entropy
--opt-srcequiv --noopt-binequiv
]]
local MAXIMUM_CONFIG = [[
--opt-comments --opt-whitespace --opt-emptylines
--opt-eols --opt-strings --opt-numbers
--opt-locals --opt-entropy
--opt-srcequiv
]] .. (BIN_EQUIV_AVAIL and ' --opt-binequiv' or ' --noopt-binequiv')
local NONE_CONFIG = [[
--noopt-comments --noopt-whitespace --noopt-emptylines
--noopt-eols --noopt-strings --noopt-numbers
--noopt-locals --noopt-entropy
--opt-srcequiv --noopt-binequiv
]]
local DEFAULT_SUFFIX = "_" -- default suffix for file renaming
local PLUGIN_SUFFIX = "luasrcdiet.plugin." -- relative location of plugins
------------- Startup and initialize option list handling -------------
--- Simple error message handler; change to error if traceback wanted.
--
-- @tparam string msg The message to print.
local function die(msg)
print("LuaSrcDiet (error): "..msg); os.exit(1)
end
--die = error--DEBUG
-- Prepare text for list of optimizations, prepare lookup table.
local MSG_OPTIONS = ""
do
local WIDTH = 24
local o = {}
for op, desc in gmatch(OPTION, "%s*([^,]+),'([^']+)'") do
local msg = " "..op
msg = msg..rep(" ", WIDTH - #msg)..desc.."\n"
MSG_OPTIONS = MSG_OPTIONS..msg
o[op] = true
o["--no"..sub(op, 3)] = true
end
OPTION = o -- replace OPTION with lookup table
end
MSG_USAGE = fmt(MSG_USAGE, MSG_OPTIONS, DEFAULT_CONFIG)
--------- Global variable initialization, option set handling ---------
local suffix = DEFAULT_SUFFIX -- file suffix
local option = {} -- program options
local stat_c, stat_l -- statistics tables
--- Sets option lookup table based on a text list of options.
--
-- Note: additional forced settings for --opt-eols is done in optlex.lua.
--
-- @tparam string CONFIG
local function set_options(CONFIG)
for op in gmatch(CONFIG, "(%-%-%S+)") do
if sub(op, 3, 4) == "no" and -- handle negative options
OPTION["--"..sub(op, 5)] then
option[sub(op, 5)] = false
else
option[sub(op, 3)] = true
end
end
end
-------------------------- Support functions --------------------------
-- List of token types, parser-significant types are up to TTYPE_GRAMMAR
-- while the rest are not used by parsers; arranged for stats display.
local TTYPES = {
"TK_KEYWORD", "TK_NAME", "TK_NUMBER", -- grammar
"TK_STRING", "TK_LSTRING", "TK_OP",
"TK_EOS",
"TK_COMMENT", "TK_LCOMMENT", -- non-grammar
"TK_EOL", "TK_SPACE",
}
local TTYPE_GRAMMAR = 7
local EOLTYPES = { -- EOL names for token dump
["\n"] = "LF", ["\r"] = "CR",
["\n\r"] = "LFCR", ["\r\n"] = "CRLF",
}
--- Reads source code from the file.
--
-- @tparam string fname Path of the file to read.
-- @treturn string Content of the file.
local function load_file(fname)
local data, err = fs.read_file(fname, "rb")
if not data then die(err) end
return data
end
--- Saves source code to the file.
--
-- @tparam string fname Path of the destination file.
-- @tparam string dat The data to write into the file.
local function save_file(fname, dat)
local ok, err = fs.write_file(fname, dat, "wb")
if not ok then die(err) end
end
------------------ Functions to deal with statistics ------------------
--- Initializes the statistics table.
local function stat_init()
stat_c, stat_l = {}, {}
for i = 1, #TTYPES do
local ttype = TTYPES[i]
stat_c[ttype], stat_l[ttype] = 0, 0
end
end
--- Adds a token to the statistics table.
--
-- @tparam string tok The token.
-- @param seminfo
local function stat_add(tok, seminfo)
stat_c[tok] = stat_c[tok] + 1
stat_l[tok] = stat_l[tok] + #seminfo
end
--- Computes totals for the statistics table, returns average table.
--
-- @treturn table
local function stat_calc()
local function avg(c, l) -- safe average function
if c == 0 then return 0 end
return l / c
end
local stat_a = {}
local c, l = 0, 0
for i = 1, TTYPE_GRAMMAR do -- total grammar tokens
local ttype = TTYPES[i]
c = c + stat_c[ttype]; l = l + stat_l[ttype]
end
stat_c.TOTAL_TOK, stat_l.TOTAL_TOK = c, l
stat_a.TOTAL_TOK = avg(c, l)
c, l = 0, 0
for i = 1, #TTYPES do -- total all tokens
local ttype = TTYPES[i]
c = c + stat_c[ttype]; l = l + stat_l[ttype]
stat_a[ttype] = avg(stat_c[ttype], stat_l[ttype])
end
stat_c.TOTAL_ALL, stat_l.TOTAL_ALL = c, l
stat_a.TOTAL_ALL = avg(c, l)
return stat_a
end
----------------------------- Main tasks -----------------------------
--- A simple token dumper, minimal translation of seminfo data.
--
-- @tparam string srcfl Path of the source file.
local function dump_tokens(srcfl)
-- Load file and process source input into tokens.
local z = load_file(srcfl)
local toklist, seminfolist = llex.lex(z)
-- Display output.
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
if tok == "TK_OP" and byte(seminfo) < 32 then
seminfo = "("..byte(seminfo)..")"
elseif tok == "TK_EOL" then
seminfo = EOLTYPES[seminfo]
else
seminfo = "'"..seminfo.."'"
end
print(tok.." "..seminfo)
end--for
end
--- Dumps globalinfo and localinfo tables.
--
-- @tparam string srcfl Path of the source file.
local function dump_parser(srcfl)
-- Load file and process source input into tokens,
local z = load_file(srcfl)
local toklist, seminfolist, toklnlist = llex.lex(z)
-- Do parser optimization here.
local xinfo = lparser.parse(toklist, seminfolist, toklnlist)
local globalinfo, localinfo = xinfo.globalinfo, xinfo.localinfo
-- Display output.
local hl = rep("-", 72)
print("*** Local/Global Variable Tracker Tables ***")
print(hl.."\n GLOBALS\n"..hl)
-- global tables have a list of xref numbers only
for i = 1, #globalinfo do
local obj = globalinfo[i]
local msg = "("..i..") '"..obj.name.."' -> "
local xref = obj.xref
for j = 1, #xref do msg = msg..xref[j].." " end
print(msg)
end
-- Local tables have xref numbers and a few other special
-- numbers that are specially named: decl (declaration xref),
-- act (activation xref), rem (removal xref).
print(hl.."\n LOCALS (decl=declared act=activated rem=removed)\n"..hl)
for i = 1, #localinfo do
local obj = localinfo[i]
local msg = "("..i..") '"..obj.name.."' decl:"..obj.decl..
" act:"..obj.act.." rem:"..obj.rem
if obj.is_special then
msg = msg.." is_special"
end
msg = msg.." -> "
local xref = obj.xref
for j = 1, #xref do msg = msg..xref[j].." " end
print(msg)
end
print(hl.."\n")
end
--- Reads source file(s) and reports some statistics.
--
-- @tparam string srcfl Path of the source file.
local function read_only(srcfl)
-- Load file and process source input into tokens.
local z = load_file(srcfl)
local toklist, seminfolist = llex.lex(z)
print(MSG_TITLE)
print("Statistics for: "..srcfl.."\n")
-- Collect statistics.
stat_init()
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
stat_add(tok, seminfo)
end--for
local stat_a = stat_calc()
-- Display output.
local function figures(tt)
return stat_c[tt], stat_l[tt], stat_a[tt]
end
local tabf1, tabf2 = "%-16s%8s%8s%10s", "%-16s%8d%8d%10.2f"
local hl = rep("-", 42)
print(fmt(tabf1, "Lexical", "Input", "Input", "Input"))
print(fmt(tabf1, "Elements", "Count", "Bytes", "Average"))
print(hl)
for i = 1, #TTYPES do
local ttype = TTYPES[i]
print(fmt(tabf2, ttype, figures(ttype)))
if ttype == "TK_EOS" then print(hl) end
end
print(hl)
print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL")))
print(hl)
print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK")))
print(hl.."\n")
end
--- Processes source file(s), writes output and reports some statistics.
--
-- @tparam string srcfl Path of the source file.
-- @tparam string destfl Path of the destination file where to write optimized source.
local function process_file(srcfl, destfl)
-- handle quiet option
local function print(...) --luacheck: ignore 431
if option.QUIET then return end
_G.print(...)
end
if plugin and plugin.init then -- plugin init
option.EXIT = false
plugin.init(option, srcfl, destfl)
if option.EXIT then return end
end
print(MSG_TITLE) -- title message
-- Load file and process source input into tokens.
local z = load_file(srcfl)
if plugin and plugin.post_load then -- plugin post-load
z = plugin.post_load(z) or z
if option.EXIT then return end
end
local toklist, seminfolist, toklnlist = llex.lex(z)
if plugin and plugin.post_lex then -- plugin post-lex
plugin.post_lex(toklist, seminfolist, toklnlist)
if option.EXIT then return end
end
-- Collect 'before' statistics.
stat_init()
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
stat_add(tok, seminfo)
end--for
local stat1_a = stat_calc()
local stat1_c, stat1_l = stat_c, stat_l
-- Do parser optimization here.
optparser.print = print -- hack
local xinfo = lparser.parse(toklist, seminfolist, toklnlist)
if plugin and plugin.post_parse then -- plugin post-parse
plugin.post_parse(xinfo.globalinfo, xinfo.localinfo)
if option.EXIT then return end
end
optparser.optimize(option, toklist, seminfolist, xinfo)
if plugin and plugin.post_optparse then -- plugin post-optparse
plugin.post_optparse()
if option.EXIT then return end
end
-- Do lexer optimization here, save output file.
local warn = optlex.warn -- use this as a general warning lookup
optlex.print = print -- hack
toklist, seminfolist, toklnlist
= optlex.optimize(option, toklist, seminfolist, toklnlist)
if plugin and plugin.post_optlex then -- plugin post-optlex
plugin.post_optlex(toklist, seminfolist, toklnlist)
if option.EXIT then return end
end
local dat = concat(seminfolist)
-- Depending on options selected, embedded EOLs in long strings and
-- long comments may not have been translated to \n, tack a warning.
if find(dat, "\r\n", 1, 1) or
find(dat, "\n\r", 1, 1) then
warn.MIXEDEOL = true
end
-- Test source and binary chunk equivalence.
equiv.init(option, llex, warn)
equiv.source(z, dat)
if BIN_EQUIV_AVAIL then
equiv.binary(z, dat)
end
local smsg = "before and after lexer streams are NOT equivalent!"
local bmsg = "before and after binary chunks are NOT equivalent!"
-- for reporting, die if option was selected, else just warn
if warn.SRC_EQUIV then
if option["opt-srcequiv"] then die(smsg) end
else
print("*** SRCEQUIV: token streams are sort of equivalent")
if option["opt-locals"] then
print("(but no identifier comparisons since --opt-locals enabled)")
end
print()
end
if warn.BIN_EQUIV then
if option["opt-binequiv"] then die(bmsg) end
elseif BIN_EQUIV_AVAIL then
print("*** BINEQUIV: binary chunks are sort of equivalent")
print()
end
-- Save optimized source stream to output file.
save_file(destfl, dat)
-- Collect 'after' statistics.
stat_init()
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
stat_add(tok, seminfo)
end--for
local stat_a = stat_calc()
-- Display output.
print("Statistics for: "..srcfl.." -> "..destfl.."\n")
local function figures(tt)
return stat1_c[tt], stat1_l[tt], stat1_a[tt],
stat_c[tt], stat_l[tt], stat_a[tt]
end
local tabf1, tabf2 = "%-16s%8s%8s%10s%8s%8s%10s",
"%-16s%8d%8d%10.2f%8d%8d%10.2f"
local hl = rep("-", 68)
print("*** lexer-based optimizations summary ***\n"..hl)
print(fmt(tabf1, "Lexical",
"Input", "Input", "Input",
"Output", "Output", "Output"))
print(fmt(tabf1, "Elements",
"Count", "Bytes", "Average",
"Count", "Bytes", "Average"))
print(hl)
for i = 1, #TTYPES do
local ttype = TTYPES[i]
print(fmt(tabf2, ttype, figures(ttype)))
if ttype == "TK_EOS" then print(hl) end
end
print(hl)
print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL")))
print(hl)
print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK")))
print(hl)
-- Report warning flags from optimizing process.
if warn.LSTRING then
print("* WARNING: "..warn.LSTRING)
elseif warn.MIXEDEOL then
print("* WARNING: ".."output still contains some CRLF or LFCR line endings")
elseif warn.SRC_EQUIV then
print("* WARNING: "..smsg)
elseif warn.BIN_EQUIV then
print("* WARNING: "..bmsg)
end
print()
end
---------------------------- Main functions ---------------------------
local arg = {...} -- program arguments
set_options(DEFAULT_CONFIG) -- set to default options at beginning
--- Does per-file handling, ship off to tasks.
--
-- @tparam {string,...} fspec List of source files.
local function do_files(fspec)
for i = 1, #fspec do
local srcfl = fspec[i]
local destfl
-- Find and replace extension for filenames.
local extb, exte = find(srcfl, "%.[^%.%\\%/]*$")
local basename, extension = srcfl, ""
if extb and extb > 1 then
basename = sub(srcfl, 1, extb - 1)
extension = sub(srcfl, extb, exte)
end
destfl = basename..suffix..extension
if #fspec == 1 and option.OUTPUT_FILE then
destfl = option.OUTPUT_FILE
end
if srcfl == destfl then
die("output filename identical to input filename")
end
-- Perform requested operations.
if option.DUMP_LEXER then
dump_tokens(srcfl)
elseif option.DUMP_PARSER then
dump_parser(srcfl)
elseif option.READ_ONLY then
read_only(srcfl)
else
process_file(srcfl, destfl)
end
end--for
end
--- The main function.
local function main()
local fspec = {}
local argn, i = #arg, 1
if argn == 0 then
option.HELP = true
end
-- Handle arguments.
while i <= argn do
local o, p = arg[i], arg[i + 1]
local dash = match(o, "^%-%-?")
if dash == "-" then -- single-dash options
if o == "-h" then
option.HELP = true; break
elseif o == "-v" then
option.VERSION = true; break
elseif o == "-s" then
if not p then die("-s option needs suffix specification") end
suffix = p
i = i + 1
elseif o == "-o" then
if not p then die("-o option needs a file name") end
option.OUTPUT_FILE = p
i = i + 1
elseif o == "-" then
break -- ignore rest of args
else
die("unrecognized option "..o)
end
elseif dash == "--" then -- double-dash options
if o == "--help" then
option.HELP = true; break
elseif o == "--version" then
option.VERSION = true; break
elseif o == "--keep" then
if not p then die("--keep option needs a string to match for") end
option.KEEP = p
i = i + 1
elseif o == "--plugin" then
if not p then die("--plugin option needs a module name") end
if option.PLUGIN then die("only one plugin can be specified") end
option.PLUGIN = p
plugin = require(PLUGIN_SUFFIX..p)
i = i + 1
elseif o == "--quiet" then
option.QUIET = true
elseif o == "--read-only" then
option.READ_ONLY = true
elseif o == "--basic" then
set_options(BASIC_CONFIG)
elseif o == "--maximum" then
set_options(MAXIMUM_CONFIG)
elseif o == "--none" then
set_options(NONE_CONFIG)
elseif o == "--dump-lexer" then
option.DUMP_LEXER = true
elseif o == "--dump-parser" then
option.DUMP_PARSER = true
elseif o == "--details" then
option.DETAILS = true
elseif OPTION[o] then -- lookup optimization options
set_options(o)
else
die("unrecognized option "..o)
end
else
fspec[#fspec + 1] = o -- potential filename
end
i = i + 1
end--while
if option.HELP then
print(MSG_TITLE..MSG_USAGE); return true
elseif option.VERSION then
print(MSG_TITLE); return true
end
if option["opt-binequiv"] and not BIN_EQUIV_AVAIL then
die("--opt-binequiv is available only for PUC Lua 5.1!")
end
if #fspec > 0 then
if #fspec > 1 and option.OUTPUT_FILE then
die("with -o, only one source file can be specified")
end
do_files(fspec)
return true
else
die("nothing to do!")
end
end
-- entry point -> main() -> do_files()
if not main() then
die("Please run with option -h or --help for usage information")
end

View File

@ -0,0 +1,300 @@
= Features and Usage
Kein-Hong Man
2011-09-13
== Features
LuaSrcDiet features include the following:
* Predefined default, _--basic_ (token-only) and _--maximum_ settings.
* Avoid deleting a block comment with a certain message with _--keep_; this is for copyright or license texts.
* Special handling for `#!` (shbang) lines and in functions, `self` implicit parameters.
* Dumping of raw information using _--dump-lexer_ and _--dump-parser_.
See the `samples` directory.
* A HTML plugin: outputs files that highlights globals and locals, useful for eliminating globals. See the `samples` directory.
* An SLOC plugin: counts significant lines of Lua code, like SLOCCount.
* Source and binary equivalence testing with _--opt-srcequiv_ and _--opt-binequiv_.
List of optimizations:
* Line endings are always normalized to LF, except those embedded in comments or strings.
* _--opt-comments_: Removal of comments and comment blocks.
* _--opt-whitespace_: Removal of whitespace, excluding end-of-line characters.
* _--opt-emptylines_: Removal of empty lines.
* _--opt-eols_: Removal of unnecessary end-of-line characters.
* _--opt-strings_: Rewrite strings and long strings. See the `samples` directory.
* _--opt-numbers_: Rewrite numbers. See the `samples` directory.
* _--opt-locals_: Rename local variable names. Does not rename field or method names.
* _--opt-entropy_: Tries to improve symbol entropy when renaming locals by calculating actual letter frequencies.
* _--opt-experimental_: Apply experimental optimizations.
LuaSrcDiet tries to allow each option to be enabled or disabled separately, but they are not completely orthogonal.
If comment removal is disabled, LuaSrcDiet only removes trailing whitespace.
Trailing whitespace is not removed in long strings, a warning is generated instead.
If empty line removal is disabled, LuaSrcDiet keeps all significant code on the same lines.
Thus, a user is able to debug using the original sources as a reference since the line numbering is unchanged.
String optimization deals mainly with optimizing escape sequences, but delimiters can be switched between single quotes and double quotes if the source size of the string can be reduced.
For long strings and long comments, LuaSrcDiet also tries to reduce the `=` separators in the
delimiters if possible.
For number optimization, LuaSrcDiet saves space by trying to generate the shortest possible sequence, and in the process it does not produce “proper” scientific notation (e.g. 1.23e5) but does away with the decimal point (e.g. 123e3) instead.
The local variable name optimizer uses a full parser of Lua 5.1 source code, thus it can rename all local variables, including upvalues and function parameters.
It should handle the implicit `self` parameter gracefully.
In addition, local variable names are either renamed into the shortest possible names following English frequent letter usage or are arranged by calculating entropy with the _--opt-entropy_ option.
Variable names are reused whenever possible, reducing the number of unique variable names.
For example, for `LuaSrcDiet.lua` (version 0.11.0), 683 local identifiers representing 88 unique names were optimized into 32 unique names, all which are one character in length, saving over 2600 bytes.
If you need some kind of reassurance that your app will still work at reduced size, see the section on verification below.
== Usage
LuaSrcDiet needs a Lua 5.1.x (preferably Lua 5.1.4) binary to run.
On Unix machines, one can use the following command line:
[source, sh]
LuaSrcDiet myscript.lua -o myscript_.lua
On Windows machines, the above command line can be used on Cygwin, or you can run Lua with the LuaSrcDiet script like this:
[source, sh]
lua LuaSrcDiet.lua myscript.lua -o myscript_.lua
When run without arguments, LuaSrcDiet prints a list of options.
Also, you can check the `Makefile` for some examples of command lines to use.
For example, for maximum code size reduction and maximum verbosity, use:
[source, sh]
LuaSrcDiet --maximum --details myscript.lua -o myscript_.lua
=== Output Example
A sample output of LuaSrcDiet 0.11.0 for processing `llex.lua` at _--maximum_ settings is as follows:
----
Statistics for: LuaSrcDiet.lua -> sample/LuaSrcDiet.lua
*** local variable optimization summary ***
----------------------------------------------------------
Variable Unique Decl. Token Size Average
Types Names Count Count Bytes Bytes
----------------------------------------------------------
Global 10 0 19 95 5.00
----------------------------------------------------------
Local (in) 88 153 683 3340 4.89
TOTAL (in) 98 153 702 3435 4.89
----------------------------------------------------------
Local (out) 32 153 683 683 1.00
TOTAL (out) 42 153 702 778 1.11
----------------------------------------------------------
*** lexer-based optimizations summary ***
--------------------------------------------------------------------
Lexical Input Input Input Output Output Output
Elements Count Bytes Average Count Bytes Average
--------------------------------------------------------------------
TK_KEYWORD 374 1531 4.09 374 1531 4.09
TK_NAME 795 3963 4.98 795 1306 1.64
TK_NUMBER 54 59 1.09 54 59 1.09
TK_STRING 152 1725 11.35 152 1717 11.30
TK_LSTRING 7 1976 282.29 7 1976 282.29
TK_OP 997 1092 1.10 997 1092 1.10
TK_EOS 1 0 0.00 1 0 0.00
--------------------------------------------------------------------
TK_COMMENT 140 6884 49.17 1 18 18.00
TK_LCOMMENT 7 1723 246.14 0 0 0.00
TK_EOL 543 543 1.00 197 197 1.00
TK_SPACE 1270 2465 1.94 263 263 1.00
--------------------------------------------------------------------
Total Elements 4340 21961 5.06 2841 8159 2.87
--------------------------------------------------------------------
Total Tokens 2380 10346 4.35 2380 7681 3.23
--------------------------------------------------------------------
----
Overall, the file size is reduced by more than 9 kiB.
Tokens in the above report can be classified into “real” or actual tokens, and “fake” or whitespace tokens.
The number of “real” tokens remained the same.
Short comments and long comments were completely eliminated.
The number of line endings was reduced by 59, while all but 152 whitespace characters were optimized away.
So, token separators (whitespace, including line endings) now takes up just 10 % of the total file size.
No optimization of number tokens was possible, while 2 bytes were saved for string tokens.
For local variable name optimization, the report shows that 38 unique local variable names were reduced to 20 unique names.
The number of identifier tokens should stay the same (there is currently no optimization option to optimize away non-essential or unused “real” tokens).
Since there can be at most 53 single-character identifiers, all local variables are now one character in length.
Over 600 bytes was saved.
_--details_ will give a longer report and much more information.
A sample output of LuaSrcDiet 0.12.0 for processing the one-file `LuaSrcDiet.lua` program itself at _--maximum_ and _--opt-experimental_ settings is as follows:
----
*** local variable optimization summary ***
----------------------------------------------------------
Variable Unique Decl. Token Size Average
Types Names Count Count Bytes Bytes
----------------------------------------------------------
Global 27 0 51 280 5.49
----------------------------------------------------------
Local (in) 482 1063 4889 21466 4.39
TOTAL (in) 509 1063 4940 21746 4.40
----------------------------------------------------------
Local (out) 55 1063 4889 4897 1.00
TOTAL (out) 82 1063 4940 5177 1.05
----------------------------------------------------------
*** BINEQUIV: binary chunks are sort of equivalent
Statistics for: LuaSrcDiet.lua -> app_experimental.lua
*** lexer-based optimizations summary ***
--------------------------------------------------------------------
Lexical Input Input Input Output Output Output
Elements Count Bytes Average Count Bytes Average
--------------------------------------------------------------------
TK_KEYWORD 3083 12247 3.97 3083 12247 3.97
TK_NAME 5401 24121 4.47 5401 7552 1.40
TK_NUMBER 467 494 1.06 467 494 1.06
TK_STRING 787 7983 10.14 787 7974 10.13
TK_LSTRING 14 3453 246.64 14 3453 246.64
TK_OP 6381 6861 1.08 6171 6651 1.08
TK_EOS 1 0 0.00 1 0 0.00
--------------------------------------------------------------------
TK_COMMENT 1611 72339 44.90 1 18 18.00
TK_LCOMMENT 18 4404 244.67 0 0 0.00
TK_EOL 4419 4419 1.00 1778 1778 1.00
TK_SPACE 10439 24475 2.34 2081 2081 1.00
--------------------------------------------------------------------
Total Elements 32621 160796 4.93 19784 42248 2.14
--------------------------------------------------------------------
Total Tokens 16134 55159 3.42 15924 38371 2.41
--------------------------------------------------------------------
* WARNING: before and after lexer streams are NOT equivalent!
----
The command line was:
[source, sh]
lua LuaSrcDiet.lua LuaSrcDiet.lua -o app_experimental.lua --maximum --opt-experimental --noopt-srcequiv
The important thing to note is that while the binary chunks are equivalent, the source lexer streams are not equivalent.
Hence, the _--noopt-srcequiv_ makes LuaSrcDiet report a warning for failing the source equivalence test.
`LuaSrcDiet.lua` was reduced from 157 kiB to about 41.3 kiB.
The _--opt-experimental_ option saves an extra 205 bytes over standard _--maximum_.
Note the reduction in `TK_OP` count due to a reduction in semicolons and parentheses.
`TK_SPACE` has actually increased a bit due to semicolons that are changed into single spaces; some of these spaces could not be removed.
For more performance numbers, see the <<performance-stats#, Performance Statistics>> page.
== Verification
Code size reduction can be quite a hairy thing (even I peer at the results in suspicion), so some kind of verification is desirable for users who expect processed files to _not_ blow up.
Since LuaSrcDiet has been talked about as a tool to reduce code size in projects such as WoW add-ons, `eLua` and `nspire`, adding a verification step will reduce risk for all users of LuaSrcDiet.
LuaSrcDiet performs two kinds of equivalence testing as of version 0.12.0.
The two tests can be very, very loosely termed as _source equivalence testing_ and _binary equivalence testing_.
They are controlled by the _--opt-srcequiv_ and _--opt-binequiv_ options and are enabled by default.
Testing behaviour can be summarized as follows:
* Both tests are always executed.
The options control the resulting actions taken.
* Both options are normally enabled.
This will make any failing test to throw an error.
* When an option is disabled, LuaSrcDiet will at most print a warning.
* For passing results, see the following subsections that describe what the tests actually does.
You only need to disable a testing option for experimental optimizations (see the following section for more information on this).
For anything up to and including _--maximum_, both tests should pass.
If any test fail under these conditions, then something has gone wrong with LuaSrcDiet, and I would be interested to know what has blown up.
=== _--opt-srcequiv_ Source Equivalence
The source equivalence test uses LuaSrcDiets lexer to read and compare the _before_ and _after_ lexer token streams.
Numbers and strings are dumped as binary chunks using `loadstring()` and `string.dump()` and the results compared.
If your file passes this test, it means that a Lua 5.1.x binary should see the exact same token streams for both _before_ and _after_ files.
That is, the parser in Lua will see the same lexer sequence coming from the source for both files and thus they _should_ be equivalent.
Touch wood.
Heh.
However, if you are _cross-compiling_, it may be possible for this test to fail.
Experienced Lua developers can modify `equiv.lua` to handle such cases.
=== _--opt-binequiv_ Binary Equivalence
The binary equivalence test uses `loadstring()` and `string.dump()` to generate binary chunks of the entire _before_ and _after_ files.
Also, any shbang (`#!`) lines are removed prior to generation of the binary chunks.
The binary chunks are then run through a fake `undump` routine to verify the integrity of the binary chunks and to compare all parts that ought to be identical.
On a per-function prototype basis (where _ignored_ means that any difference between the two binary chunks is ignored):
* All debug information is ignored.
* The source name is ignored.
* Any line number data is ignored.
For example, `linedefined` and `lastlinedefined`.
The rest of the two binary chunks must be identical.
So, while the two are not binary-exact, they can be loosely termed as “equivalent” and should run in exactly the same manner.
Sort of.
You get the idea.
This test may also cause problems if you are _cross-compiling_.
== Experimental Stuff
The _--opt-experimental_ option applies experimental optimizations that generally, makes changes to “real” tokens.
Such changes may or may not lead to the result failing binary chunk equivalence testing.
They would likely fail source lexer stream equivalence testing, so the _--noopt-srcequiv_ option needs to be applied so that LuaSrcDiet just gives a warning instead of an error.
For sample files, see the `samples` directory.
Currently implemented experimental optimizations are as follows:
=== Semicolon Operator Removal
The semicolon (`;`) operator is an optional operator that is used to separate statements.
The optimization turns all of these operators into single spaces, which are then run through whitespace removal.
At worst, there will be no change to file size.
* _Fails_ source lexer stream equivalence.
* _Passes_ binary chunk equivalence.
=== Function Call Syntax Sugar Optimization
This optimization turns function calls that takes a single string or long string parameter into its syntax-sugar representation, which leaves out the parentheses.
Since strings can abut anything, each instance saves 2 bytes.
For example, the following:
[source, lua]
fish("cow")fish('cow')fish([[cow]])
is turned into:
[source, lua]
fish"cow"fish'cow'fish[[cow]]
* _Fails_ source lexer stream equivalence.
* _Passes_ binary chunk equivalence.
=== Other Experimental Optimizations
There are two more of these optimizations planned, before focus is turned to the Lua 5.2.x series:
* Simple `local` keyword removal.
Planned to work for a few kinds of patterns only.
* User directed name replacement, which will need user input to modify names or identifiers used in table keys and function methods or fields.

View File

@ -0,0 +1,128 @@
= Performance Statistics
Kein-Hong Man
2011-09-13
== Size Comparisons
The following is the result of processing `llex.lua` from LuaSrcDiet 0.11.0 using various optimization options:
|===
| LuaSrcDiet Option | Size (bytes)
| Original | 12,421
| Empty lines only | 12,395
| Whitespace only | 9,372
| Local rename only | 11,794
| _--basic_ setting | 3,835
| Program default | 3,208
| _--maximum_ setting | 3,130
|===
The programs default settings does not remove all unnecessary EOLs.
The _--basic_ setting is more conservative than the default settings, it disables optimization of strings and numbers and renaming of locals.
For version 0.12.0, the following is the result of processing `LuaSrcDiet.lua` using various optimization options:
|===
| LuaSrcDiet Option | Size (bytes)
| Original | 160,796
| _--basic_ setting | 60,219
| Program default | 43,650
| _--maximum_ setting | 42,453
| max + experimental | 42,248
|===
The above best size can go a lot lower with simple `local` keyword removal and user directed name replacement, which will be the subject of the next release of LuaSrcDiet.
== Compression and luac
File sizes of LuaSrcDiet 0.11.0 main files in various forms:
[cols="m,5*d", options="header,footer"]
|===
| Source File | Original Size (bytes) | `luac` normal (bytes) | `luac` stripped (bytes) | LuaSrcDiet _--basic_ (bytes) | LuaSrcDiet _--maximum_ (bytes)
| LuaSrcDiet.lua | 21,961 | 20,952 | 11,000 | 11,005 | 8,159
| llex.lua | 12,421 | 8,613 | 4,247 | 3,835 | 3,130
| lparser.lua | 41,757 | 27,215 | 12,506 | 11,755 | 7,666
| optlex.lua | 31,009 | 16,992 | 8,021 | 9,129 | 6,858
| optparser.lua | 16,511 | 9,021 | 3,520 | 5,087 | 2,999
| Total | 123,659 | 82,793 | 39,294 | 40,811 | 28,812
|===
* “LuaSrcDiet --maximum” has the smallest total file size.
* The ratio of “Original Size” to “LuaSrcDiet --maximum” is *4.3*.
* The ratio of “Original Size” to “luac stripped” is *3.1*.
* The ratio of “luac stripped” to “LuaSrcDiet --maximum” is *1.4*.
Compressibility of LuaSrcDiet 0.11.0 main files in various forms:
|===
| Compression Method | Original Size | `luac` normal | `luac` stripped | LuaSrcDiet _--basic_ | LuaSrcDiet _--maximum_
| Uncompressed originals | 123,659 | 82,793 | 39,294 | 40,811 | 28,812
| gzip -9 | 28,288 | 29,210 | 17,732 | 12,041 | 10,451
| bzip2 -9 | 24,407 | 27,232 | 16,856 | 11,480 | 9,815
| lzma (7-zip max) | 25,530 | 23,908 | 15,741 | 11,241 | 9,685
|===
* “LuaSrcDiet --maximum” has the smallest total file size (but a binary chunk loads faster and works with a smaller Lua executable).
* The ratio of “Original size” to “Original size + bzip2” is *5.1*.
* The ratio of “Original size” to “LuaSrcDiet --maximum + bzip2” is *12.6*.
* The ratio of “LuaSrcDiet --maximum” to “LuaSrcDiet --maximum + bzip2” is *2.9*.
* The ratio of “Original size” to “luac stripped + bzip2” is *7.3*.
* The ratio of “luac stripped” to “luac stripped + bzip2” is *2.3*.
* The ratio of “luac stripped + bzip2” to “LuaSrcDiet --maximum + bzip2” is *1.7*.
So, squeezed source code are smaller than stripped binary chunks and compresses better than stripped binary chunks, at a ratio of 2.9 for squeezed source code versus 2.3 for stripped binary chunks.
Compressed binary chunks is still a very efficient way of storing Lua scripts, because using only binary chunks allow for the parts of Lua needed to compile from sources to be omitted (`llex.o`, `lparser.o`, `lcode.o`, `ldump.o`), saving over 24KB in the process.
Note that LuaSrcDiet _does not_ answer the question of whether embedding source code is better or embedding binary chunks is better.
It is simply a utility for producing smaller source code files and an exercise in processing Lua source code using a Lua-based lexer and parser skeleton.
== Compile Speed
The following is a primitive attempt to analyze in-memory Lua script loading performance (using the `loadstring` function in Lua).
The LuaSrcDiet 0.11.0 files (original, squeezed with _--maximum_ and stripped binary chunks versions) are loaded into memory first before a loop runs to repeatedly load the script files for 10 seconds.
A null loop is also performed (processing empty strings) and the time taken per null iteration is subtracted as a form of null adjustment.
Then, various performance parameters are calculated.
Note that `LuaSrcDiet.lua` was slightly modified (`#!` line removed) to let the `loadstring` function run.
The results below were obtained with a Lua 5.1.3 executable compiled using `make generic` on Cygwin/Windows XP SP2 on a Sempron 3000+ (1.8GHz).
The LuaSrcDiet 0.11.0 source files have 11,180 “real” tokens in total.
[cols="<h,4*d", options="header"]
|===
| | Null loop | Stripped binary chunk | Original Sources | Squeezed Sources
| Total Size (bytes) | 0 | 39,294 | 123,640 | 28,793
| Iterations | 312,155 | 9,680 | 1306 | 1,592
| Duration (sec) | 10 | 10 | 10 | 10
| Time/iteration (msec) | 0.032 | 1.033 | 7.657 | 6.281
| _Time/iteration, null adjusted (msec)_ | | 1.001 | 7.625 | 6.249
| _Load rate (MiB/sec)_ | | 37.44 | 15.46 | 4.39
| Load time per byte (ns) | | 25.5 | 61.7 | 217.0
| Load time per token (ns) | | | 682 | 559
| Source time vs binary chunk time ratio | | 1.00 | 7.62 | 6.24
| Binary chunk rate vs. source rate ratio | | 1.00 | 2.42 | 8.53
|===
The above shows that stripped binary chunks is still, in many ways, the highest-performance form of fixed Lua scripts.
On a very average machine, scripts load at over 37 MiB/sec (in memory).
This is very comparable to the burst speeds of common desktop hard disks of 2008.
If instant response is paramount, stripped binary chunks has little competition.
By contrast, source code that is squeezed to the maximum using LuaSrcDiet can only muster an in-memory load rate of 4.4 MiB/sec.
The original sources load at about 15.5 MiB/sec, but most of the speed is from the lexer scanning over comments and whitespace.
A quick calculation indicates that the speed of the lexer over comments and whitespace can be as much as 65 MiB/sec, but note that the speed is all for naught.
What really matters are the real tokens, and the squeezed source code manages to load faster than the original sources by 18 %.
So, the loading of stripped binary chunks is faster than squeezed source code by a bit over 6×.
The 4.4 MiB/sec speed for squeezed source code is still quite respectable.
When an application considers the time taken to load data from the disk and perhaps the time taken to decompress, loading source code may be perfectly fine in terms of performance.
For programs that already embed source code, using LuaSrcDiet to squeeze the source code probably speeds loading up by a tiny bit in addition to making programs smaller.

View File

@ -0,0 +1,386 @@
= Technical Notes
Kein-Hong Man
2011-09-13
== Lexer Notes
The lexer (`llex.lua`) is a version of the native 5.1.x lexer from Yueliang 0.4.0, with significant modifications.
It does have several limitations:
* The decimal point must be `.` (period).
There is no localized decimal point replacement magic.
* There is no support for nested `[[`...`]]` long strings (no `LUA_COMPAT_LSTR`).
* The lexer may not properly lex source code with characters beyond the normal ASCII character set.
Identifiers with accented characters (or any character beyond a byte value of 127) cannot be recognized.
Instead of returning one token on each call, `llex.lua` processes an entire string (all data from an entire file) and returns.
Two lists (tokens and semantic information items) are set up in the module for use by the caller.
For maximum flexibility during processing, the lexer returns non-grammar lexical elements as tokens too.
Non-grammar elements, such as comments, whitespace, line endings, are classified along with “normal” tokens.
The lexer classifies 7 kinds of grammar tokens and 4 kinds of non-grammar tokens, as follows:
[cols="m,d"]
|===
| Grammar Token | Description
| TK_KEYWORD | keywords
| TK_NAME | identifiers
| TK_NUMBER | numbers (unconverted, kept in original form)
| TK_STRING | strings (no translation is done, includes delimiters)
| TK_LSTRING | long strings (no translation is done, includes delimiters)
| TK_OP | operators and punctuation (most single-char, some double)
| TK_EOS | end-of-stream (there is only one for each file/stream)
|===
[cols="m,d"]
|===
| Whitespace Token | Description
| TK_SPACE | whitespace (generally, spaces, \t, \v and \f)
| TK_COMMENT | comments (includes delimiters, also includes special first line shbang, which is handled specially in the optimizer)
| TK_LCOMMENT | block comments (includes delimiters)
| TK_EOL | end-of-lines (excludes those embedded in strings)
|===
A list of tokens can be generated by using the _--dump-lexer_ option, like this:
[source, sh]
lua LuaSrcDiet.lua --dump-lexer llex.lua > dump_llex.dat
== Lexer Optimizations
We aim to keep lexer-based optimizations free of parser considerations, i.e. we allow for generalized optimization of token sequences.
The table below considers the requirements for all combinations of significant tokens (except `TK_EOS`).
Other tokens are whitespace-like.
Comments can be considered to be a special kind of whitespace, e.g. a short comment needs to have a following EOL token, if we do not want to optimize away short comments.
[cols="h,6*m", options="header"]
|===
| _1st  2nd Token_ | Keyword | Name | Number | String | LString | Oper
| Keyword | [S] | [S] | [S] | | |
| Name | [S] | [S] | [S] | | |
| Number | [S] | [S] | [S] | | | [1]
| String | | | | | |
| LString | | | | | |
| Oper | | | [1] | | | [2]
|===
A dash (`-`) in the above means that the first token can abut the second token.
`*[S]*`:: Need at least one whitespace, set as either a space or kept as an EOL.
`*[1]*`::
Need a space if operator is a `.`, all others okay.
A `+` or `-` is used as part of a floating-point spec, but there does not appear to be any way of creating a float by joining with number with a `+` or `-` plus another number.
Since an `e` has to be somewhere in the first token, this cant be done.
`*[2]*`::
Normally there cannot be consecutive operators, but we plan to allow for generalized optimization of token sequences, i.e. even sequences that are grammatically illegal; so disallow adjacent operators if:
* the first is in `[=<>]` and the second is `=`
* disallow dot sequences to be adjacent, but `...` first okay
* disallow `[` followed by `=` or `[` (not optimal)
Also, a minus `-` cannot preceed a Comment or LComment, because comments start with a `--` prefix.
Apart from that, all Comment or LComment tokens can be set abut with a real token.
== Local Variable Renaming
The following discusses the problem of local variable optimization, specifically _local variable renaming_ in order to reduce source code size.
=== TK_NAME Token Considerations
A `TK_NAME` token means a number of things, and some of these cannot be renamed without analyzing the source code.
We are interested in the use of `TK_NAME` in the following:
[loweralpha]
. global variable access,
. local variable declaration, including `local` statements, `local` functions, function parameters, implicit `self` locals,
. local variable access, including upvalue access.
`TK_NAME` is also used in parts of the grammar as constant strings these tokens cannot be optimized without user assistance.
These include usage as:
[loweralpha, start=4]
. keys in `key=value` pairs in table construction,
. field or method names in `a:b` or `a.b` syntax forms.
For the local variable name optimization scheme used, we do not consider (d) and (e), and while global variables cannot be renamed without some kind of user assistance, they need to be considered or tracked as part of Luas variable access scheme.
=== Lifetime of a Local Variable
Consider the following example:
[source, lua]
local string, table = string, table
In the example, the two locals are assigned the values of the globals with the same names.
When Lua encounters the declaration portion:
[source, lua]
local string, table
the parser cannot immediately make the two local variable available to following code.
In the parser and code generator, locals are inactive when entries are created.
They are activated only when the function `adjustlocalvars()` is called to activate the appropriate local variables.
NOTE: The terminology used here may not be identical to the ones used in the Dragon Book they merely follow the LuaSrcDiet code as it was written before I have read the Dragon Book.
In the example, the two local variables are activated only after the whole statement has been parsed, that is, after the last `table` token.
Hence, the statement works as expected.
Also, once the two local variables goes out of scope, `removevars()` is called to deactivate them, allowing other variables of the same name to become visible again.
Another example worth mentioning is:
[source, lua]
local a, a, a, = 1, 2, 3
The above will assign 3 to `a`.
Thus, when optimizing local variable names, (1) we need to consider accesses of global variable names affecting the namespace, (2) for the local variable names themselves, we need to consider when they are declared, activated and removed, and (3) within the “live” time of locals, we need to know when they are accessed (since locals that are never accessed dont really matter.)
=== Local Variable Tracking
Every local variable declaration is considered an object to be renamed.
From the parser, we have the original name of the local variable, the token positions for declaration, activation and removal, and the token position for all the `TK_NAME` tokens which references this local.
All instances of the implicit `self` local variable are also flagged as such.
In addition to local variable information, all global variable accesses are tabled, one object entry for one name, and each object has a corresponding list of token positions for the `TK_NAME` tokens, which is where the global variables were accessed.
The key criteria is: *Our act of renaming cannot change the visibility of any of these locals and globals at the time they are accessed*.
However, _their scope of visibility may be changed during which they are not accessed_, so someone who tries to insert a variable reference somewhere into a program that has its locals renamed may find that it now refers to a different variable.
Of course, if every variable has a unique name, then there is no need for a name allocation algorithm, as there will be no conflict.
But, in order to maximize utilization of short identifier names to reduce the final code size, we want to reuse the names as much as possible.
In addition, fewer names will likely reduce symbol entropy and may slightly improve compressibility of the source code.
LuaSrcDiet avoids the use of non-ASCII letters, so there are only 53 single-character variable names.
=== Name Allocation Theory
To understand the renaming algorithm, first we need to establish how different local and global variables can operate happily without interfering with each other.
Consider three objects, local object A, local object B and global object G.
A and B involve declaration, activation and removal, and within the period it is active, there may be zero or more accesses of the local.
For G, there are only global variable accesses to look into.
Assume that we have assigned a new name to A and we wish to consider its effects on other locals and globals, for which we choose B and G as examples.
We assume local B has not been assigned a new name as we expect our algorithm to take care of collisions.
As lifetime is something like this:
----
Decl Act Rem
+ +-------------------------------+
-------------------------------------------------
----
where “Decl” is the time of declaration, “Act” is the time of activation, and “Rem” is the time of removal.
Between “Act” and “Rem”, the local is alive or “live” and Lua can see it if its corresponding `TK_NAME` identifier comes up.
----
Decl Act Rem
+ +-------------------------------+
-------------------------------------------------
* * * *
(1) (2) (3) (4)
----
Recall that the key criteria is to not change the visibility of globals and locals during when they are accessed.
Consider local and global accesses at (1), (2), (3) and (4).
A global G of the same name as A will only collide at (3), where Lua will see A and not G.
Since G must be accessed at (3) according to what the parser says, and we cannot modify the positions of “Decl”, “Act” and “Rem”, it follows that A cannot have the same name as G.
----
Decl Act Rem
+ +-----------------------+
---------------------------------
(1)+ +---+ (2)+ +---+ (3)+ +---+ (4)+ +---+
--------- --------- --------- ---------
----
For the case of A and B having the same names and colliding, consider the cases for which B is at (1), (2), (3) or (4) in the above.
(1) and (4) means that A and B are completely isolated from each other, hence in the two cases, A and B can safely use the same variable names.
To be specific, since we have assigned A, B is considered completely isolated from A if Bs activation-to-removal period is isolated from the time of As first access to last access, meaning Bs active time will never affect any of As accesses.
For (2) and (3), we have two cases where we need to consider which one has been activated first.
For (2), B is active before A, so A cannot impose on B.
But As accesses are valid while B is active, since A can override B.
For no collision in the case of (2), we simply need to ensure that the last access of B occurs before A is activated.
For (3), B is activated before A, hence B can override As accesses.
For no collision, all of As accesses cannot happen while B is active.
Thus position (3) follows the “A is never accessed when B is active” rule in a general way.
Local variables of a child function are in the position of (3).
To illustrate, the local B can use the same name as local A and live in a child function or block scope if each time A is accessed, Lua sees A and not B.
So we have to check all accesses of A and see whether they collide with the active period of B.
If A is not accessed during that period, then B can be active with the same name.
The above appears to resolve all sorts of cases where the active times of A and B overlap.
Note that in the above, the allocator does not need to know how locals are separated according to function prototypes.
Perhaps the allocator can be simplified if knowledge of function structure is utilized.
This scheme was implemented in a hurry in 2008 — it could probably be simpler if Lua grammar is considered, but LuaSrcDiet mainly processes various index values in tables.
=== Name Allocation Algorithm
To begin with, the name generator is mostly separate from the name allocation algorithm.
The name generator returns the next shortest name for the algorithm to apply to local variables.
To attempt to reduce symbol entropy (which benefit compression algorithms), the name generator follows English frequent letter usage.
There is also an option to calculate an actual symbol entropy table from the input data.
Since there are 53 one-character identifiers and (53 * 63 - 4) two-character identifiers (minus a few keywords), there isnt a pressing need to optimally maximize name reuse.
The single-file version of LuaSrcDiet 0.12.0, at just over 3000 SLOC and 156 kiB in size, currently allocates around 55 unique local variable names.
In theory, we should need no more than 260 local identifiers by default.
Why?
Since `LUAI_MAXVARS` is 200 and `LUAI_MAXUPVALUES` is 60, at any block scope, there can be at most `(LUAI_MAXVARS + LUAI_MAXUPVALUES)` locals referenced, or 260.
Also, those from outer scopes not referenced in inner scopes can reuse identifiers.
The net effect of this is that a local variable name allocation method should not allocate more than 260 identifier names for locals.
The current algorithm is a simple first-come first-served scheme:
[loweralpha]
. One local object that use the most tokens is named first.
. Any other non-conflicting locals with respect to the first object are assigned the same name.
. Assigned locals are removed from consideration and the procedure is repeated for objects that have not been assigned new names.
. Steps (a) to (c) repeats until no local objects are left.
In addition, there are a few extra issues to take care of:
[loweralpha, start=5]
. Implicit `self` locals that have been flagged as such are already “assigned to” and so they are left unmodified.
. The name generator skips `self` to avoid conflicts.
This is not optimal but it is unlikely a script will use so many local variables as to reach `self`.
. Keywords are also skipped for the name generator.
. Global name conflict resolution.
For (h), global name conflict resolution is handled just after the new name is generated.
The name can still be used for some locals even if it conflicts with other locals.
To remove conflicts, global variable accesses for the particular identifier name is checked.
Any local variables that are active when a global access is made is marked to be skipped.
The rest of the local objects can then use that name.
The algorithm has additional code for handling locals that use the same name in the same scope.
This extends the basic algorithm that was discussed earlier.
For example:
[source, lua]
----
local foo = 10 -- <1>
...
local foo = 20 -- <2>
...
print(e)
----
Since we are considering name visibility, the first `foo` does not really cease to exist when the second `foo` is declared, because if we were to make that assumption, and the first `foo` is removed before (2), then I should be able to use `e` as the name for the first `foo` and after (2), it should not conflict with variables in the outer scope with the same name.
To illustrate:
[source, lua]
----
local e = 10 -- 'foo' renamed to 'e'
...
local t = 20 -- error if we assumed 'e' removed here
...
print(e)
----
Since `e` is a global in the example, we now have an error as the name as been taken over by a local.
Thus, the first `foo` local must have its active time extend to the end of the current scope.
If there is no conflict between the first and second `foo`, the algorithm may still assign the same names to them.
The current fix to deal with the above chains local objects in order to find the removal position.
It may be possible to handle this in a clean manner LuaSrcDiet handles it as a fix to the basic algorithm.
== Ideas
The following is a list of optimization ideas that do not require heavy-duty source code parsing and comprehension.
=== Lexer-Based Optimization Ideas
* Convert long strings to normal strings, vice versa. +
_A little desperate for a few bytes, can be done, but not real keen on implementing it._
* Special number forms to take advantage of constant number folding. +
_For example, 65536 can be represented using 2^16^, and so on.
An expression must be evaluated in the same way, otherwise this seems unsafe._
* Warn if a number has too many digits. +
_Should we warn or “test and truncate”?
Not really an optimization that will see much use._
* Warn of opportunity for using a `local` to zap a bunch of globals. +
_Current recommendation is to use the HTML plugin to display globals in red.
The developer can then visually analyze the source code and make the appropriate fixes.
I think this is better than having the program guess the intentions of the developer._
* Spaces to tabs in comments, long comments, or long strings. +
_For long strings, need to know users intention.
Would rather not implement._
=== Parser-Based Optimization Ideas
Heavy-duty optimizations will need more data to be generated by the parser.
A full AST may eventually be needed.
The most attractive idea that can be quickly implemented with a significant code size “win” is to reduce the number of `local` keywords.
* Remove unused ``local``s that can be removed in the source. +
_Need to consider unused ``local``s in multiple assignments._
* Simplify declaration of ``local``s that can be merged. +
_From:_
+
[source, lua]
----
-- separate locals
local foo
local bar
-- separate locals with assignments
local foo = 123
local bar = "pqr"
----
+
_To:_
+
[source, lua]
----
-- merged locals
local foo,bar
-- merged locals with assignments
local foo,bar=123,"pqr"
----
* Simplify declarations using `nil`. +
_From:_
[source, lua]
local foo, bar = nil, nil
+
_To:_
[source, lua]
local foo,bar
* Simplify ``return``s using `nil`. +
_How desirable is this? From Lua list discussions, it seems to be potentially unsafe unless all return locations are known and checked._
* Removal of optional semicolons in statements and removal of commas or semicolons in table constructors. +
_Yeah, this might save a few bytes._
* Remove table constructor elements using `nil`. +
_Not sure if this is safe to do._
* Simplify logical or relational operator expressions. +
_This is more suitable for an optimizing compiler project._

View File

@ -0,0 +1,41 @@
-- vim: set ft=lua:
package = 'LuaSrcDiet'
version = '0.3.0-2'
source = { url = 'https://github.com/jirutka/luasrcdiet/archive/v0.3.0/luasrcdiet-0.3.0.tar.gz', md5 = 'c0ff36ef66cd0568c96bc54e9253a8fa' }
description = {
summary = 'Compresses Lua source code by removing unnecessary characters',
detailed = [[
This is revival of LuaSrcDiet originally written by Kein-Hong Man.]],
homepage = 'https://github.com/jirutka/luasrcdiet',
maintainer = 'Jakub Jirutka <jakub@jirutka.cz>',
license = 'MIT',
}
dependencies = {
'lua >= 5.1',
}
build = {
type = 'builtin',
modules = {
['luasrcdiet'] = 'luasrcdiet/init.lua',
['luasrcdiet.equiv'] = 'luasrcdiet/equiv.lua',
['luasrcdiet.fs'] = 'luasrcdiet/fs.lua',
['luasrcdiet.llex'] = 'luasrcdiet/llex.lua',
['luasrcdiet.lparser'] = 'luasrcdiet/lparser.lua',
['luasrcdiet.optlex'] = 'luasrcdiet/optlex.lua',
['luasrcdiet.optparser'] = 'luasrcdiet/optparser.lua',
['luasrcdiet.plugin.example'] = 'luasrcdiet/plugin/example.lua',
['luasrcdiet.plugin.html'] = 'luasrcdiet/plugin/html.lua',
['luasrcdiet.plugin.sloc'] = 'luasrcdiet/plugin/sloc.lua',
['luasrcdiet.utils'] = 'luasrcdiet/utils.lua',
},
install = {
bin = {
luasrcdiet = 'bin/luasrcdiet',
}
}
}

View File

@ -0,0 +1,28 @@
rock_manifest = {
bin = {
luasrcdiet = "6c318685d57f827cf5baf7037a5d6072"
},
doc = {
["features-and-usage.adoc"] = "157587c27a0c340d9d1dd06af9b339b5",
["performance-stats.adoc"] = "cf5f96a86e021a3a584089fafcabd056",
["tech-notes.adoc"] = "075bc34e667a0055e659e656baa2365a"
},
lua = {
luasrcdiet = {
["equiv.lua"] = "967a6b17573d229e326dbb740ad7fe8c",
["fs.lua"] = "53db7dfc50d026b683fad68ed70ead0f",
["init.lua"] = "c6f368e6cf311f3257067fed0fbcd06a",
["llex.lua"] = "ede897af261fc362a82d87fbad91ea2b",
["lparser.lua"] = "c1e1f04d412b79a040fd1c2b74112953",
["optlex.lua"] = "7c986da991a338494c36770b4a30fa9f",
["optparser.lua"] = "b125a271ac1c691dec68b63019b1b5da",
plugin = {
["example.lua"] = "86b5c1e9dc7959db6b221d6d5a0db3d1",
["html.lua"] = "c0d3336a133f0c8663f395ee98d54f6a",
["sloc.lua"] = "fb1a91b18b701ab83f21c87733be470a"
},
["utils.lua"] = "bd6c1e85c6a9bf3383d336a4797fb292"
}
},
["luasrcdiet-0.3.0-2.rockspec"] = "da70047e1b0cbdc1ff08d060327fa110"
}

View File

@ -0,0 +1,270 @@
return [[html {
color: #000;
background: #FFF;
}
body,div,dl,dt,dd,ul,ol,li,h1,h2,h3,h4,h5,h6,pre,code,form,fieldset,legend,input,button,textarea,p,blockquote,th,td {
margin: 0;
padding: 0;
}
table {
border-collapse: collapse;
border-spacing: 0;
}
fieldset,img {
border: 0;
}
address,caption,cite,code,dfn,em,strong,th,var,optgroup {
font-style: inherit;
font-weight: inherit;
}
del,ins {
text-decoration: none;
}
li {
list-style: bullet;
margin-left: 20px;
}
caption,th {
text-align: left;
}
h1,h2,h3,h4,h5,h6 {
font-size: 100%;
font-weight: bold;
}
q:before,q:after {
content: '';
}
abbr,acronym {
border: 0;
font-variant: normal;
}
sup {
vertical-align: baseline;
}
sub {
vertical-align: baseline;
}
legend {
color: #000;
}
input,button,textarea,select,optgroup,option {
font-family: inherit;
font-size: inherit;
font-style: inherit;
font-weight: inherit;
}
input,button,textarea,select {*font-size:100%;
}
/* END RESET */
body {
margin-left: 1em;
margin-right: 1em;
font-family: arial, helvetica, geneva, sans-serif;
background-color: #ffffff; margin: 0px;
}
code, tt { font-family: monospace; }
body, p, td, th { font-size: .95em; line-height: 1.2em;}
p, ul { margin: 10px 0 0 10px;}
strong { font-weight: bold;}
em { font-style: italic;}
h1 {
font-size: 1.5em;
margin: 25px 0 20px 0;
}
h2, h3, h4 { margin: 15px 0 10px 0; }
h2 { font-size: 1.25em; }
h3 { font-size: 1.15em; }
h4 { font-size: 1.06em; }
a:link { font-weight: bold; color: #004080; text-decoration: none; }
a:visited { font-weight: bold; color: #006699; text-decoration: none; }
a:link:hover { text-decoration: underline; }
hr {
color:#cccccc;
background: #00007f;
height: 1px;
}
blockquote { margin-left: 3em; }
ul { list-style-type: disc; }
p.name {
font-family: "Andale Mono", monospace;
padding-top: 1em;
}
p:first-child {
margin-top: 0px;
}
pre.example {
background-color: rgb(245, 245, 245);
border: 1px solid silver;
padding: 10px;
margin: 10px 0 10px 0;
font-family: "Andale Mono", monospace;
font-size: .85em;
}
pre {
background-color: rgb(245, 245, 245);
border: 1px solid silver;
padding: 10px;
margin: 10px 0 10px 0;
font-family: "Andale Mono", monospace;
}
table.index { border: 1px #00007f; }
table.index td { text-align: left; vertical-align: top; }
#container {
margin-left: 1em;
margin-right: 1em;
background-color: #f0f0f0;
}
#product {
text-align: center;
border-bottom: 1px solid #cccccc;
background-color: #ffffff;
}
#product big {
font-size: 2em;
}
#main {
background-color: #f0f0f0;
border-left: 2px solid #cccccc;
}
#navigation {
float: left;
width: 18em;
vertical-align: top;
background-color: #f0f0f0;
overflow: scroll;
position: fixed;
height:100%;
}
#navigation h2 {
background-color:#e7e7e7;
font-size:1.1em;
color:#000000;
text-align: left;
padding:0.2em;
border-top:1px solid #dddddd;
border-bottom:1px solid #dddddd;
}
#navigation ul
{
font-size:1em;
list-style-type: none;
margin: 1px 1px 10px 1px;
}
#navigation li {
text-indent: -1em;
display: block;
margin: 3px 0px 0px 22px;
}
#navigation li li a {
margin: 0px 3px 0px -1em;
}
#content {
margin-left: 18em;
padding: 1em;
border-left: 2px solid #cccccc;
border-right: 2px solid #cccccc;
background-color: #ffffff;
}
#about {
clear: both;
padding: 5px;
border-top: 2px solid #cccccc;
background-color: #ffffff;
}
@media print {
body {
font: 12pt "Times New Roman", "TimeNR", Times, serif;
}
a { font-weight: bold; color: #004080; text-decoration: underline; }
#main {
background-color: #ffffff;
border-left: 0px;
}
#container {
margin-left: 2%;
margin-right: 2%;
background-color: #ffffff;
}
#content {
padding: 1em;
background-color: #ffffff;
}
#navigation {
display: none;
}
pre.example {
font-family: "Andale Mono", monospace;
font-size: 10pt;
page-break-inside: avoid;
}
}
table.module_list {
border-width: 1px;
border-style: solid;
border-color: #cccccc;
border-collapse: collapse;
}
table.module_list td {
border-width: 1px;
padding: 3px;
border-style: solid;
border-color: #cccccc;
}
table.module_list td.name { background-color: #f0f0f0; }
table.module_list td.summary { width: 100%; }
table.function_list {
border-width: 1px;
border-style: solid;
border-color: #cccccc;
border-collapse: collapse;
}
table.function_list td {
border-width: 1px;
padding: 3px;
border-style: solid;
border-color: #cccccc;
}
table.function_list td.name { background-color: #f0f0f0; }
table.function_list td.summary { width: 100%; }
dl.table dt, dl.function dt {border-top: 1px solid #ccc; padding-top: 1em;}
dl.table dd, dl.function dd {padding-bottom: 1em; margin: 10px 0 0 20px;}
dl.table h3, dl.function h3 {font-size: .95em;}
]]

View File

@ -0,0 +1,87 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2012-2014 Sierra Wireless.
-- All rights reserved. This program and the accompanying materials
-- are made available under the terms of the Eclipse Public License v1.0
-- which accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- Contributors:
-- Kevin KIN-FOO <kkinfoo@sierrawireless.com>
-- - initial API and implementation and initial documentation
--------------------------------------------------------------------------------
--
-- Load documentation generator and update its path
--
local templateengine = require 'templateengine'
for name, def in pairs( require 'template.utils' ) do
templateengine.env [ name ] = def
end
-- Load documentation extractor and set handled languages
local lddextractor = require 'lddextractor'
local M = {}
M.defaultsitemainpagename = 'index'
function M.generatedocforfiles(filenames, cssname,noheuristic)
if not filenames then return nil, 'No files provided.' end
--
-- Generate API model elements for all files
--
local generatedfiles = {}
local wrongfiles = {}
for _, filename in pairs( filenames ) do
-- Load file content
local file, error = io.open(filename, 'r')
if not file then return nil, 'Unable to read "'..filename..'"\n'..err end
local code = file:read('*all')
file:close()
-- Get module for current file
local apimodule, err = lddextractor.generateapimodule(filename, code,noheuristic)
-- Handle modules with module name
if apimodule and apimodule.name then
generatedfiles[ apimodule.name ] = apimodule
elseif not apimodule then
-- Track faulty files
table.insert(wrongfiles, 'Unable to extract comments from "'..filename..'".\n'..err)
elseif not apimodule.name then
-- Do not generate documentation for unnamed modules
table.insert(wrongfiles, 'Unable to create documentation for "'..filename..'", no module name provided.')
end
end
--
-- Defining index, which will summarize all modules
--
local index = {
modules = generatedfiles,
name = M.defaultsitemainpagename,
tag='index'
}
generatedfiles[ M.defaultsitemainpagename ] = index
--
-- Define page cursor
--
local page = {
currentmodule = nil,
headers = { [[<link rel="stylesheet" href="]].. cssname ..[[" type="text/css"/>]] },
modules = generatedfiles,
tag = 'page'
}
--
-- Iterate over modules, generating complete doc pages
--
for _, module in pairs( generatedfiles ) do
-- Update current cursor page
page.currentmodule = module
-- Generate page
local content, error = templateengine.applytemplate(page)
if not content then return nil, error end
module.body = content
end
return generatedfiles, wrongfiles
end
return M

View File

@ -0,0 +1,102 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2012-2014 Sierra Wireless.
-- All rights reserved. This program and the accompanying materials
-- are made available under the terms of the Eclipse Public License v1.0
-- which accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- Contributors:
-- Kevin KIN-FOO <kkinfoo@sierrawireless.com>
-- - initial API and implementation and initial documentation
--------------------------------------------------------------------------------
local M = {}
require 'metalua.loader'
local compiler = require 'metalua.compiler'
local mlc = compiler.new()
local Q = require 'metalua.treequery'
-- Enable to retrieve all Javadoc-like comments from C code
function M.c(code)
if not code then return nil, 'No code provided' end
local comments = {}
-- Loop over comments stripping cosmetic '*'
for comment in code:gmatch('%s*/%*%*+(.-)%*+/') do
-- All Lua special comment are prefixed with an '-',
-- so we also comment C comment to make them compliant
table.insert(comments, '-'..comment)
end
return comments
end
-- Enable to retrieve "---" comments from Lua code
function M.lua( code )
if not code then return nil, 'No code provided' end
-- manage shebang
if code then code = code:gsub("^(#.-\n)", function (s) return string.rep(' ',string.len(s)) end) end
-- check for errors
local f, err = loadstring(code,'source_to_check')
if not f then
return nil, 'Syntax error.\n' .. err
end
-- Get ast from file
local status, ast = pcall(mlc.src_to_ast, mlc, code)
--
-- Detect parsing errors
--
if not status then
return nil, 'There might be a syntax error.\n' .. ast
end
--
-- Extract commented nodes from AST
--
-- Function enabling commented node selection
local function acceptcommentednode(node)
return node.lineinfo and ( node.lineinfo.last.comments or node.lineinfo.first.comments )
end
-- Fetch commented node from AST
local commentednodes = Q(ast):filter( acceptcommentednode ):list()
-- Comment cache to avoid selecting same comment twice
local commentcache = {}
-- Will contain selected comments
local comments = {}
-- Loop over commented nodes
for _, node in ipairs( commentednodes ) do
-- A node can is relateds to comment before and after itself,
-- the following gathers them.
local commentlists = {}
if node.lineinfo and node.lineinfo.first.comments then
table.insert(commentlists, node.lineinfo.first.comments)
end
if node.lineinfo and node.lineinfo.last.comments then
table.insert(commentlists, node.lineinfo.last.comments)
end
-- Now that we have comments before and fater the node,
-- collect them in a single table
for _, list in ipairs( commentlists ) do
for _, commenttable in ipairs(list) do
-- Only select special comments
local firstcomment = #commenttable > 0 and #commenttable[1] > 0 and commenttable[1]
if firstcomment:sub(1, 1) == '-' then
for _, comment in ipairs( commenttable ) do
-- Only comments which were not already collected
if not commentcache[comment] then
commentcache[comment] = true
table.insert(comments, comment)
end
end
end
end
end
end
return comments
end
return M

View File

@ -0,0 +1,130 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2012-2014 Sierra Wireless.
-- All rights reserved. This program and the accompanying materials
-- are made available under the terms of the Eclipse Public License v1.0
-- which accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- Contributors:
-- Kevin KIN-FOO <kkinfoo@sierrawireless.com>
-- - initial API and implementation and initial documentation
--------------------------------------------------------------------------------
local lfs = require 'lfs'
local M = {}
local function iswindows()
local p = io.popen("echo %os%")
if not p then
return false
end
local result =p:read("*l")
p:close()
return result == "Windows_NT"
end
M.separator = iswindows() and [[\]] or [[/]]
---
-- Will recursively browse given directories and list files encountered
-- @param tab Table, list where files will be added
-- @param dirorfiles list of path to browse in order to build list.
-- Files from this list will be added to <code>tab</code> list.
-- @return <code>tab</code> list, table containing all files from directories
-- and files contained in <code>dirorfile</code>
local function appendfiles(tab, dirorfile)
-- Nothing to process
if #dirorfile < 1 then return tab end
-- Append all files to list
local dirs = {}
for _, path in ipairs( dirorfile ) do
-- Determine element nature
local elementnature = lfs.attributes (path, "mode")
-- Handle files
if elementnature == 'file' then
table.insert(tab, path)
else if elementnature == 'directory' then
-- Check if folder is accessible
local status, error = pcall(lfs.dir, path)
if not status then return nil, error end
--
-- Handle folders
--
for diskelement in lfs.dir(path) do
-- Format current file name
local currentfilename
if path:sub(#path) == M.separator then
currentfilename = path .. diskelement
else
currentfilename = path .. M.separator .. diskelement
end
-- Handle folder elements
local nature, err = lfs.attributes (currentfilename, "mode")
-- Append file to current list
if nature == 'file' then
table.insert(tab, currentfilename)
elseif nature == 'directory' then
-- Avoid current and parent directory in order to avoid
-- endless recursion
if diskelement ~= '.' and diskelement ~= '..' then
-- Handle subfolders
table.insert(dirs, currentfilename)
end
end
end
end
end
end
-- If we only encountered files, going deeper is useless
if #dirs == 0 then return tab end
-- Append files from encountered directories
return appendfiles(tab, dirs)
end
---
-- Provide a list of files from a directory
-- @param list Table of directories to browse
-- @return table of string, path to files contained in given directories
function M.filelist(list)
if not list then return nil, 'No directory list provided' end
return appendfiles({}, list)
end
function M.checkdirectory( dirlist )
if not dirlist then return false end
local missingdirs = {}
for _, filename in ipairs( dirlist ) do
if not lfs.attributes(filename, 'mode') then
table.insert(missingdirs, filename)
end
end
if #missingdirs > 0 then
return false, missingdirs
end
return true
end
function M.fill(filename, content)
--
-- Ensure parent directory exists
--
local parent = filename:gmatch([[(.*)]] .. M.separator ..[[(.+)]])()
local parentnature = lfs.attributes(parent, 'mode')
-- Create parent directory while absent
if not parentnature then
lfs.mkdir( parent )
elseif parentnature ~= 'directory' then
-- Notify that disk element already exists
return nil, parent..' is a '..parentnature..'.'
end
-- Create actual file
local file, error = io.open(filename, 'w')
if not file then
return nil, error
end
file:write( content )
file:close()
return true
end
return M

View File

@ -0,0 +1,113 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2012-2014 Sierra Wireless.
-- All rights reserved. This program and the accompanying materials
-- are made available under the terms of the Eclipse Public License v1.0
-- which accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- Contributors:
-- Kevin KIN-FOO <kkinfoo@sierrawireless.com>
-- - initial API and implementation and initial documentation
--------------------------------------------------------------------------------
require 'metalua.loader'
local compiler = require 'metalua.compiler'
local mlc = compiler.new()
local M = {}
--
-- Define default supported languages
--
M.supportedlanguages = {}
local extractors = require 'extractors'
-- Support Lua comment extracting
M.supportedlanguages['lua'] = extractors.lua
-- Support C comment extracting
for _,c in ipairs({'c', 'cpp', 'c++'}) do
M.supportedlanguages[c] = extractors.c
end
-- Extract comment from code,
-- type of code is deduced from filename extension
function M.extract(filename, code)
-- Check parameters
if not code then return nil, 'No code provided' end
if type(filename) ~= "string" then
return nil, 'No string for file name provided'
end
-- Extract file extension
local fileextension = filename:gmatch('.*%.(.*)')()
if not fileextension then
return nil, 'File '..filename..' has no extension, could not determine how to extract documentation.'
end
-- Check if it is possible to extract documentation from these files
local extractor = M.supportedlanguages[ fileextension ]
if not extractor then
return nil, 'Unable to extract documentation from '.. fileextension .. ' file.'
end
return extractor( code )
end
-- Generate a file gathering only comments from given code
function M.generatecommentfile(filename, code)
local comments, error = M.extract(filename, code)
if not comments then
return nil, 'Unable to generate comment file.\n'..error
end
local filecontent = {}
for _, comment in ipairs( comments ) do
table.insert(filecontent, "--[[")
table.insert(filecontent, comment)
table.insert(filecontent, "\n]]\n\n")
end
return table.concat(filecontent)..'return nil\n'
end
-- Create API Model module from a 'comment only' lua file
function M.generateapimodule(filename, code,noheuristic)
if not filename then return nil, 'No file name given.' end
if not code then return nil, 'No code provided.' end
if type(filename) ~= "string" then return nil, 'No string for file name provided' end
-- for non lua file get comment file
if filename:gmatch('.*%.(.*)')() ~= 'lua' then
local err
code, err = M.generatecommentfile(filename, code)
if not code then
return nil, 'Unable to create api module for "'..filename..'".\n'..err
end
else
-- manage shebang
if code then code = code:gsub("^(#.-\n)", function (s) return string.rep(' ',string.len(s)) end) end
-- check for errors
local f, err = loadstring(code,'source_to_check')
if not f then
return nil, 'File'..filename..'contains syntax error.\n' .. err
end
end
local status, ast = pcall(mlc.src_to_ast, mlc, code)
if not status then
return nil, 'Unable to compute ast for "'..filename..'".\n'..ast
end
-- Extract module name as the filename without extension
local modulename
local matcher = string.gmatch(filename,'.*/(.*)%..*$')
if matcher then modulename = matcher() end
-- Create api model
local apimodelbuilder = require 'models.apimodelbuilder'
local _file, comment2apiobj = apimodelbuilder.createmoduleapi(ast, modulename)
-- Create internal model
if not noheuristic then
local internalmodelbuilder = require "models.internalmodelbuilder"
local _internalcontent = internalmodelbuilder.createinternalcontent(ast,_file,comment2apiobj, modulename)
end
return _file
end
return M

View File

@ -0,0 +1,465 @@
---------
-- Source and binary equivalency comparisons
--
-- **Notes:**
--
-- * Intended as an extra safety check for mission-critical code,
-- should give affirmative results if everything works.
-- * Heavy on load() and string.dump(), which may be slowish,
-- and may cause problems for cross-compiled applications.
-- * Optional detailed information dump is mainly for debugging,
-- reason being, if the two are not equivalent when they should be,
-- then some form of optimization has failed.
-- * source: IMPORTANT: TK_NAME not compared if opt-locals enabled.
-- * binary: IMPORTANT: Some shortcuts are taken with int and size_t
-- value reading -- if the functions break, then the binary chunk
-- is very large indeed.
-- * binary: There is a lack of diagnostic information when a compare
-- fails; you can use ChunkSpy and compare using visual diff.
----
local byte = string.byte
local dump = string.dump
local load = loadstring or load --luacheck: ignore 113
local sub = string.sub
local M = {}
local is_realtoken = { -- significant (grammar) tokens
TK_KEYWORD = true,
TK_NAME = true,
TK_NUMBER = true,
TK_STRING = true,
TK_LSTRING = true,
TK_OP = true,
TK_EOS = true,
}
local option, llex, warn
--- The initialization function.
--
-- @tparam {[string]=bool,...} _option
-- @tparam luasrcdiet.llex _llex
-- @tparam table _warn
function M.init(_option, _llex, _warn)
option = _option
llex = _llex
warn = _warn
end
--- Builds lists containing a 'normal' lexer stream.
--
-- @tparam string s The source code.
-- @treturn table
-- @treturn table
local function build_stream(s)
local stok, sseminfo = llex.lex(s) -- source list (with whitespace elements)
local tok, seminfo -- processed list (real elements only)
= {}, {}
for i = 1, #stok do
local t = stok[i]
if is_realtoken[t] then
tok[#tok + 1] = t
seminfo[#seminfo + 1] = sseminfo[i]
end
end--for
return tok, seminfo
end
-- Tests source (lexer stream) equivalence.
--
-- @tparam string z
-- @tparam string dat
function M.source(z, dat)
-- Returns a dumped string for seminfo compares.
local function dumpsem(s)
local sf = load("return "..s, "z")
if sf then
return dump(sf)
end
end
-- Marks and optionally reports non-equivalence.
local function bork(msg)
if option.DETAILS then print("SRCEQUIV: "..msg) end
warn.SRC_EQUIV = true
end
-- Get lexer streams for both source strings, compare.
local tok1, seminfo1 = build_stream(z) -- original
local tok2, seminfo2 = build_stream(dat) -- compressed
-- Compare shbang lines ignoring EOL.
local sh1 = z:match("^(#[^\r\n]*)")
local sh2 = dat:match("^(#[^\r\n]*)")
if sh1 or sh2 then
if not sh1 or not sh2 or sh1 ~= sh2 then
bork("shbang lines different")
end
end
-- Compare by simple count.
if #tok1 ~= #tok2 then
bork("count "..#tok1.." "..#tok2)
return
end
-- Compare each element the best we can.
for i = 1, #tok1 do
local t1, t2 = tok1[i], tok2[i]
local s1, s2 = seminfo1[i], seminfo2[i]
if t1 ~= t2 then -- by type
bork("type ["..i.."] "..t1.." "..t2)
break
end
if t1 == "TK_KEYWORD" or t1 == "TK_NAME" or t1 == "TK_OP" then
if t1 == "TK_NAME" and option["opt-locals"] then
-- can't compare identifiers of locals that are optimized
elseif s1 ~= s2 then -- by semantic info (simple)
bork("seminfo ["..i.."] "..t1.." "..s1.." "..s2)
break
end
elseif t1 == "TK_EOS" then
-- no seminfo to compare
else-- "TK_NUMBER" or "TK_STRING" or "TK_LSTRING"
-- compare 'binary' form, so dump a function
local s1b,s2b = dumpsem(s1), dumpsem(s2)
if not s1b or not s2b or s1b ~= s2b then
bork("seminfo ["..i.."] "..t1.." "..s1.." "..s2)
break
end
end
end--for
-- Successful comparison if end is reached with no borks.
end
--- Tests binary chunk equivalence (only for PUC Lua 5.1).
--
-- @tparam string z
-- @tparam string dat
function M.binary(z, dat)
local TNIL = 0 --luacheck: ignore 211
local TBOOLEAN = 1
local TNUMBER = 3
local TSTRING = 4
-- sizes of data types
local endian
local sz_int
local sz_sizet
local sz_inst
local sz_number
local getint
local getsizet
-- Marks and optionally reports non-equivalence.
local function bork(msg)
if option.DETAILS then print("BINEQUIV: "..msg) end
warn.BIN_EQUIV = true
end
-- Checks if bytes exist.
local function ensure(c, sz)
if c.i + sz - 1 > c.len then return end
return true
end
-- Skips some bytes.
local function skip(c, sz)
if not sz then sz = 1 end
c.i = c.i + sz
end
-- Returns a byte value.
local function getbyte(c)
local i = c.i
if i > c.len then return end
local d = sub(c.dat, i, i)
c.i = i + 1
return byte(d)
end
-- Return an int value (little-endian).
local function getint_l(c)
local n, scale = 0, 1
if not ensure(c, sz_int) then return end
for _ = 1, sz_int do
n = n + scale * getbyte(c)
scale = scale * 256
end
return n
end
-- Returns an int value (big-endian).
local function getint_b(c)
local n = 0
if not ensure(c, sz_int) then return end
for _ = 1, sz_int do
n = n * 256 + getbyte(c)
end
return n
end
-- Returns a size_t value (little-endian).
local function getsizet_l(c)
local n, scale = 0, 1
if not ensure(c, sz_sizet) then return end
for _ = 1, sz_sizet do
n = n + scale * getbyte(c)
scale = scale * 256
end
return n
end
-- Returns a size_t value (big-endian).
local function getsizet_b(c)
local n = 0
if not ensure(c, sz_sizet) then return end
for _ = 1, sz_sizet do
n = n * 256 + getbyte(c)
end
return n
end
-- Returns a block (as a string).
local function getblock(c, sz)
local i = c.i
local j = i + sz - 1
if j > c.len then return end
local d = sub(c.dat, i, j)
c.i = i + sz
return d
end
-- Returns a string.
local function getstring(c)
local n = getsizet(c)
if not n then return end
if n == 0 then return "" end
return getblock(c, n)
end
-- Compares byte value.
local function goodbyte(c1, c2)
local b1, b2 = getbyte(c1), getbyte(c2)
if not b1 or not b2 or b1 ~= b2 then
return
end
return b1
end
-- Compares byte value.
local function badbyte(c1, c2)
local b = goodbyte(c1, c2)
if not b then return true end
end
-- Compares int value.
local function goodint(c1, c2)
local i1, i2 = getint(c1), getint(c2)
if not i1 or not i2 or i1 ~= i2 then
return
end
return i1
end
-- Recursively-called function to compare function prototypes.
local function getfunc(c1, c2)
-- source name (ignored)
if not getstring(c1) or not getstring(c2) then
bork("bad source name"); return
end
-- linedefined (ignored)
if not getint(c1) or not getint(c2) then
bork("bad linedefined"); return
end
-- lastlinedefined (ignored)
if not getint(c1) or not getint(c2) then
bork("bad lastlinedefined"); return
end
if not (ensure(c1, 4) and ensure(c2, 4)) then
bork("prototype header broken")
end
-- nups (compared)
if badbyte(c1, c2) then
bork("bad nups"); return
end
-- numparams (compared)
if badbyte(c1, c2) then
bork("bad numparams"); return
end
-- is_vararg (compared)
if badbyte(c1, c2) then
bork("bad is_vararg"); return
end
-- maxstacksize (compared)
if badbyte(c1, c2) then
bork("bad maxstacksize"); return
end
-- code (compared)
local ncode = goodint(c1, c2)
if not ncode then
bork("bad ncode"); return
end
local code1 = getblock(c1, ncode * sz_inst)
local code2 = getblock(c2, ncode * sz_inst)
if not code1 or not code2 or code1 ~= code2 then
bork("bad code block"); return
end
-- constants (compared)
local nconst = goodint(c1, c2)
if not nconst then
bork("bad nconst"); return
end
for _ = 1, nconst do
local ctype = goodbyte(c1, c2)
if not ctype then
bork("bad const type"); return
end
if ctype == TBOOLEAN then
if badbyte(c1, c2) then
bork("bad boolean value"); return
end
elseif ctype == TNUMBER then
local num1 = getblock(c1, sz_number)
local num2 = getblock(c2, sz_number)
if not num1 or not num2 or num1 ~= num2 then
bork("bad number value"); return
end
elseif ctype == TSTRING then
local str1 = getstring(c1)
local str2 = getstring(c2)
if not str1 or not str2 or str1 ~= str2 then
bork("bad string value"); return
end
end
end
-- prototypes (compared recursively)
local nproto = goodint(c1, c2)
if not nproto then
bork("bad nproto"); return
end
for _ = 1, nproto do
if not getfunc(c1, c2) then
bork("bad function prototype"); return
end
end
-- debug information (ignored)
-- lineinfo (ignored)
local sizelineinfo1 = getint(c1)
if not sizelineinfo1 then
bork("bad sizelineinfo1"); return
end
local sizelineinfo2 = getint(c2)
if not sizelineinfo2 then
bork("bad sizelineinfo2"); return
end
if not getblock(c1, sizelineinfo1 * sz_int) then
bork("bad lineinfo1"); return
end
if not getblock(c2, sizelineinfo2 * sz_int) then
bork("bad lineinfo2"); return
end
-- locvars (ignored)
local sizelocvars1 = getint(c1)
if not sizelocvars1 then
bork("bad sizelocvars1"); return
end
local sizelocvars2 = getint(c2)
if not sizelocvars2 then
bork("bad sizelocvars2"); return
end
for _ = 1, sizelocvars1 do
if not getstring(c1) or not getint(c1) or not getint(c1) then
bork("bad locvars1"); return
end
end
for _ = 1, sizelocvars2 do
if not getstring(c2) or not getint(c2) or not getint(c2) then
bork("bad locvars2"); return
end
end
-- upvalues (ignored)
local sizeupvalues1 = getint(c1)
if not sizeupvalues1 then
bork("bad sizeupvalues1"); return
end
local sizeupvalues2 = getint(c2)
if not sizeupvalues2 then
bork("bad sizeupvalues2"); return
end
for _ = 1, sizeupvalues1 do
if not getstring(c1) then bork("bad upvalues1"); return end
end
for _ = 1, sizeupvalues2 do
if not getstring(c2) then bork("bad upvalues2"); return end
end
return true
end
-- Removes shbang line so that load runs.
local function zap_shbang(s)
local shbang = s:match("^(#[^\r\n]*\r?\n?)")
if shbang then -- cut out shbang
s = sub(s, #shbang + 1)
end
return s
end
-- Attempt to compile, then dump to get binary chunk string.
local cz = load(zap_shbang(z), "z")
if not cz then
bork("failed to compile original sources for binary chunk comparison")
return
end
local cdat = load(zap_shbang(dat), "z")
if not cdat then
bork("failed to compile compressed result for binary chunk comparison")
end
-- if load() works, dump assuming string.dump() is error-free
local c1 = { i = 1, dat = dump(cz) }
c1.len = #c1.dat
local c2 = { i = 1, dat = dump(cdat) }
c2.len = #c2.dat
-- Parse binary chunks to verify equivalence.
-- * For headers, handle sizes to allow a degree of flexibility.
-- * Assume a valid binary chunk is generated, since it was not
-- generated via external means.
if not (ensure(c1, 12) and ensure(c2, 12)) then
bork("header broken")
end
skip(c1, 6) -- skip signature(4), version, format
endian = getbyte(c1) -- 1 = little endian
sz_int = getbyte(c1) -- get data type sizes
sz_sizet = getbyte(c1)
sz_inst = getbyte(c1)
sz_number = getbyte(c1)
skip(c1) -- skip integral flag
skip(c2, 12) -- skip other header (assume similar)
if endian == 1 then -- set for endian sensitive data we need
getint = getint_l
getsizet = getsizet_l
else
getint = getint_b
getsizet = getsizet_b
end
getfunc(c1, c2) -- get prototype at root
if c1.i ~= c1.len + 1 then
bork("inconsistent binary chunk1"); return
elseif c2.i ~= c2.len + 1 then
bork("inconsistent binary chunk2"); return
end
-- Successful comparison if end is reached with no borks.
end
return M

View File

@ -0,0 +1,74 @@
---------
-- Utility functions for operations on a file system.
--
-- **Note: This module is not part of public API!**
----
local fmt = string.format
local open = io.open
local UTF8_BOM = '\239\187\191'
local function normalize_io_error (name, err)
if err:sub(1, #name + 2) == name..': ' then
err = err:sub(#name + 3)
end
return err
end
local M = {}
--- Reads the specified file and returns its content as string.
--
-- @tparam string filename Path of the file to read.
-- @tparam string mode The mode in which to open the file, see @{io.open} (default: "r").
-- @treturn[1] string A content of the file.
-- @treturn[2] nil
-- @treturn[2] string An error message.
function M.read_file (filename, mode)
local handler, err = open(filename, mode or 'r')
if not handler then
return nil, fmt('Could not open %s for reading: %s',
filename, normalize_io_error(filename, err))
end
local content, err = handler:read('*a') --luacheck: ignore 411
if not content then
return nil, fmt('Could not read %s: %s', filename, normalize_io_error(filename, err))
end
handler:close()
if content:sub(1, #UTF8_BOM) == UTF8_BOM then
content = content:sub(#UTF8_BOM + 1)
end
return content
end
--- Writes the given data to the specified file.
--
-- @tparam string filename Path of the file to write.
-- @tparam string data The data to write.
-- @tparam ?string mode The mode in which to open the file, see @{io.open} (default: "w").
-- @treturn[1] true
-- @treturn[2] nil
-- @treturn[2] string An error message.
function M.write_file (filename, data, mode)
local handler, err = open(filename, mode or 'w')
if not handler then
return nil, fmt('Could not open %s for writing: %s',
filename, normalize_io_error(filename, err))
end
local _, err = handler:write(data) --luacheck: ignore 411
if err then
return nil, fmt('Could not write %s: %s', filename, normalize_io_error(filename, err))
end
handler:flush()
handler:close()
return true
end
return M

View File

@ -0,0 +1,117 @@
---------
-- LuaSrcDiet API
----
local equiv = require 'luasrcdiet.equiv'
local llex = require 'luasrcdiet.llex'
local lparser = require 'luasrcdiet.lparser'
local optlex = require 'luasrcdiet.optlex'
local optparser = require 'luasrcdiet.optparser'
local utils = require 'luasrcdiet.utils'
local concat = table.concat
local merge = utils.merge
local _ -- placeholder
local function noop ()
return
end
local function opts_to_legacy (opts)
local res = {}
for key, val in pairs(opts) do
res['opt-'..key] = val
end
return res
end
local M = {}
--- The module's name.
M._NAME = 'luasrcdiet'
--- The module's version number.
M._VERSION = '0.3.0'
--- The module's homepage.
M._HOMEPAGE = 'https://github.com/jirutka/luasrcdiet'
--- All optimizations disabled.
M.NONE_OPTS = {
binequiv = false,
comments = false,
emptylines = false,
entropy = false,
eols = false,
experimental = false,
locals = false,
numbers = false,
srcequiv = false,
strings = false,
whitespace = false,
}
--- Basic optimizations enabled.
-- @table BASIC_OPTS
M.BASIC_OPTS = merge(M.NONE_OPTS, {
comments = true,
emptylines = true,
srcequiv = true,
whitespace = true,
})
--- Defaults.
-- @table DEFAULT_OPTS
M.DEFAULT_OPTS = merge(M.BASIC_OPTS, {
locals = true,
numbers = true,
})
--- Maximum optimizations enabled (all except experimental).
-- @table MAXIMUM_OPTS
M.MAXIMUM_OPTS = merge(M.DEFAULT_OPTS, {
entropy = true,
eols = true,
strings = true,
})
--- Optimizes the given Lua source code.
--
-- @tparam ?{[string]=bool,...} opts Optimizations to do (default is @{DEFAULT_OPTS}).
-- @tparam string source The Lua source code to optimize.
-- @treturn string Optimized source.
-- @raise if the source is malformed, source equivalence test failed, or some
-- other error occured.
function M.optimize (opts, source)
assert(source and type(source) == 'string',
'bad argument #2: expected string, got a '..type(source))
opts = opts and merge(M.NONE_OPTS, opts) or M.DEFAULT_OPTS
local legacy_opts = opts_to_legacy(opts)
local toklist, seminfolist, toklnlist = llex.lex(source)
local xinfo = lparser.parse(toklist, seminfolist, toklnlist)
optparser.print = noop
optparser.optimize(legacy_opts, toklist, seminfolist, xinfo)
local warn = optlex.warn -- use this as a general warning lookup
optlex.print = noop
_, seminfolist = optlex.optimize(legacy_opts, toklist, seminfolist, toklnlist)
local optim_source = concat(seminfolist)
if opts.srcequiv and not opts.experimental then
equiv.init(legacy_opts, llex, warn)
equiv.source(source, optim_source)
if warn.SRC_EQUIV then
error('Source equivalence test failed!')
end
end
return optim_source
end
return M

View File

@ -0,0 +1,350 @@
---------
-- Lua 5.1+ lexical analyzer written in Lua.
--
-- This file is part of LuaSrcDiet, based on Yueliang material.
--
-- **Notes:**
--
-- * This is a version of the native 5.1.x lexer from Yueliang 0.4.0,
-- with significant modifications to handle LuaSrcDiet's needs:
-- (1) llex.error is an optional error function handler,
-- (2) seminfo for strings include their delimiters and no
-- translation operations are performed on them.
-- * ADDED shbang handling has been added to support executable scripts.
-- * NO localized decimal point replacement magic.
-- * NO limit to number of lines.
-- * NO support for compatible long strings (LUA\_COMPAT_LSTR).
-- * Added goto keyword and double-colon operator (Lua 5.2+).
----
local find = string.find
local fmt = string.format
local match = string.match
local sub = string.sub
local tonumber = tonumber
local M = {}
local kw = {}
for v in ([[
and break do else elseif end false for function goto if in
local nil not or repeat return then true until while]]):gmatch("%S+") do
kw[v] = true
end
local z, -- source stream
sourceid, -- name of source
I, -- position of lexer
buff, -- buffer for strings
ln, -- line number
tok, -- lexed token list
seminfo, -- lexed semantic information list
tokln -- line numbers for messages
--- Adds information to token listing.
--
-- @tparam string token
-- @tparam string info
local function addtoken(token, info)
local i = #tok + 1
tok[i] = token
seminfo[i] = info
tokln[i] = ln
end
--- Handles line number incrementation and end-of-line characters.
--
-- @tparam int i Position of lexer in the source stream.
-- @tparam bool is_tok
-- @treturn int
local function inclinenumber(i, is_tok)
local old = sub(z, i, i)
i = i + 1 -- skip '\n' or '\r'
local c = sub(z, i, i)
if (c == "\n" or c == "\r") and (c ~= old) then
i = i + 1 -- skip '\n\r' or '\r\n'
old = old..c
end
if is_tok then addtoken("TK_EOL", old) end
ln = ln + 1
I = i
return i
end
--- Returns a chunk name or id, no truncation for long names.
--
-- @treturn string
local function chunkid()
if sourceid and match(sourceid, "^[=@]") then
return sub(sourceid, 2) -- remove first char
end
return "[string]"
end
--- Formats error message and throws error.
--
-- A simplified version, does not report what token was responsible.
--
-- @tparam string s
-- @tparam int line The line number.
-- @raise
local function errorline(s, line)
local e = M.error or error
e(fmt("%s:%d: %s", chunkid(), line or ln, s))
end
--- Counts separators (`="` in a long string delimiter.
--
-- @tparam int i Position of lexer in the source stream.
-- @treturn int
local function skip_sep(i)
local s = sub(z, i, i)
i = i + 1
local count = #match(z, "=*", i)
i = i + count
I = i
return (sub(z, i, i) == s) and count or (-count) - 1
end
--- Reads a long string or long comment.
--
-- @tparam bool is_str
-- @tparam string sep
-- @treturn string
-- @raise if unfinished long string or comment.
local function read_long_string(is_str, sep)
local i = I + 1 -- skip 2nd '['
local c = sub(z, i, i)
if c == "\r" or c == "\n" then -- string starts with a newline?
i = inclinenumber(i) -- skip it
end
while true do
local p, _, r = find(z, "([\r\n%]])", i) -- (long range match)
if not p then
errorline(is_str and "unfinished long string" or
"unfinished long comment")
end
i = p
if r == "]" then -- delimiter test
if skip_sep(i) == sep then
buff = sub(z, buff, I)
I = I + 1 -- skip 2nd ']'
return buff
end
i = I
else -- newline
buff = buff.."\n"
i = inclinenumber(i)
end
end--while
end
--- Reads a string.
--
-- @tparam string del The delimiter.
-- @treturn string
-- @raise if unfinished string or too large escape sequence.
local function read_string(del)
local i = I
while true do
local p, _, r = find(z, "([\n\r\\\"\'])", i) -- (long range match)
if p then
if r == "\n" or r == "\r" then
errorline("unfinished string")
end
i = p
if r == "\\" then -- handle escapes
i = i + 1
r = sub(z, i, i)
if r == "" then break end -- (EOZ error)
p = find("abfnrtv\n\r", r, 1, true)
if p then -- special escapes
if p > 7 then
i = inclinenumber(i)
else
i = i + 1
end
elseif find(r, "%D") then -- other non-digits
i = i + 1
else -- \xxx sequence
local _, q, s = find(z, "^(%d%d?%d?)", i)
i = q + 1
if s + 1 > 256 then -- UCHAR_MAX
errorline("escape sequence too large")
end
end--if p
else
i = i + 1
if r == del then -- ending delimiter
I = i
return sub(z, buff, i - 1) -- return string
end
end--if r
else
break -- (error)
end--if p
end--while
errorline("unfinished string")
end
--- Initializes lexer for given source _z and source name _sourceid.
--
-- @tparam string _z The source code.
-- @tparam string _sourceid Name of the source.
local function init(_z, _sourceid)
z = _z -- source
sourceid = _sourceid -- name of source
I = 1 -- lexer's position in source
ln = 1 -- line number
tok = {} -- lexed token list*
seminfo = {} -- lexed semantic information list*
tokln = {} -- line numbers for messages*
-- Initial processing (shbang handling).
local p, _, q, r = find(z, "^(#[^\r\n]*)(\r?\n?)")
if p then -- skip first line
I = I + #q
addtoken("TK_COMMENT", q)
if #r > 0 then inclinenumber(I, true) end
end
end
--- Runs lexer on the given source code.
--
-- @tparam string source The Lua source to scan.
-- @tparam ?string source_name Name of the source (optional).
-- @treturn {string,...} A list of lexed tokens.
-- @treturn {string,...} A list of semantic information (lexed strings).
-- @treturn {int,...} A list of line numbers.
function M.lex(source, source_name)
init(source, source_name)
while true do--outer
local i = I
-- inner loop allows break to be used to nicely section tests
while true do --luacheck: ignore 512
local p, _, r = find(z, "^([_%a][_%w]*)", i)
if p then
I = i + #r
if kw[r] then
addtoken("TK_KEYWORD", r) -- reserved word (keyword)
else
addtoken("TK_NAME", r) -- identifier
end
break -- (continue)
end
local p, _, r = find(z, "^(%.?)%d", i)
if p then -- numeral
if r == "." then i = i + 1 end
local _, q, r = find(z, "^%d*[%.%d]*([eE]?)", i) --luacheck: ignore 421
i = q + 1
if #r == 1 then -- optional exponent
if match(z, "^[%+%-]", i) then -- optional sign
i = i + 1
end
end
local _, q = find(z, "^[_%w]*", i)
I = q + 1
local v = sub(z, p, q) -- string equivalent
if not tonumber(v) then -- handles hex test also
errorline("malformed number")
end
addtoken("TK_NUMBER", v)
break -- (continue)
end
local p, q, r, t = find(z, "^((%s)[ \t\v\f]*)", i)
if p then
if t == "\n" or t == "\r" then -- newline
inclinenumber(i, true)
else
I = q + 1 -- whitespace
addtoken("TK_SPACE", r)
end
break -- (continue)
end
local _, q = find(z, "^::", i)
if q then
I = q + 1
addtoken("TK_OP", "::")
break -- (continue)
end
local r = match(z, "^%p", i)
if r then
buff = i
local p = find("-[\"\'.=<>~", r, 1, true) --luacheck: ignore 421
if p then
-- two-level if block for punctuation/symbols
if p <= 2 then
if p == 1 then -- minus
local c = match(z, "^%-%-(%[?)", i)
if c then
i = i + 2
local sep = -1
if c == "[" then
sep = skip_sep(i)
end
if sep >= 0 then -- long comment
addtoken("TK_LCOMMENT", read_long_string(false, sep))
else -- short comment
I = find(z, "[\n\r]", i) or (#z + 1)
addtoken("TK_COMMENT", sub(z, buff, I - 1))
end
break -- (continue)
end
-- (fall through for "-")
else -- [ or long string
local sep = skip_sep(i)
if sep >= 0 then
addtoken("TK_LSTRING", read_long_string(true, sep))
elseif sep == -1 then
addtoken("TK_OP", "[")
else
errorline("invalid long string delimiter")
end
break -- (continue)
end
elseif p <= 5 then
if p < 5 then -- strings
I = i + 1
addtoken("TK_STRING", read_string(r))
break -- (continue)
end
r = match(z, "^%.%.?%.?", i) -- .|..|... dots
-- (fall through)
else -- relational
r = match(z, "^%p=?", i)
-- (fall through)
end
end
I = i + #r
addtoken("TK_OP", r) -- for other symbols, fall through
break -- (continue)
end
local r = sub(z, i, i)
if r ~= "" then
I = i + 1
addtoken("TK_OP", r) -- other single-char tokens
break
end
addtoken("TK_EOS", "") -- end of stream,
return tok, seminfo, tokln -- exit here
end--while inner
end--while outer
end
return M

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,852 @@
---------
-- This module does lexer-based optimizations.
--
-- **Notes:**
--
-- * TODO: General string delimiter conversion optimizer.
-- * TODO: (numbers) warn if overly significant digit.
----
local char = string.char
local find = string.find
local match = string.match
local rep = string.rep
local sub = string.sub
local tonumber = tonumber
local tostring = tostring
local print -- set in optimize()
local M = {}
-- error function, can override by setting own function into module
M.error = error
M.warn = {} -- table for warning flags
local stoks, sinfos, stoklns -- source lists
local is_realtoken = { -- significant (grammar) tokens
TK_KEYWORD = true,
TK_NAME = true,
TK_NUMBER = true,
TK_STRING = true,
TK_LSTRING = true,
TK_OP = true,
TK_EOS = true,
}
local is_faketoken = { -- whitespace (non-grammar) tokens
TK_COMMENT = true,
TK_LCOMMENT = true,
TK_EOL = true,
TK_SPACE = true,
}
local opt_details -- for extra information
--- Returns true if current token is at the start of a line.
--
-- It skips over deleted tokens via recursion.
--
-- @tparam int i
-- @treturn bool
local function atlinestart(i)
local tok = stoks[i - 1]
if i <= 1 or tok == "TK_EOL" then
return true
elseif tok == "" then
return atlinestart(i - 1)
end
return false
end
--- Returns true if current token is at the end of a line.
--
-- It skips over deleted tokens via recursion.
--
-- @tparam int i
-- @treturn bool
local function atlineend(i)
local tok = stoks[i + 1]
if i >= #stoks or tok == "TK_EOL" or tok == "TK_EOS" then
return true
elseif tok == "" then
return atlineend(i + 1)
end
return false
end
--- Counts comment EOLs inside a long comment.
--
-- In order to keep line numbering, EOLs need to be reinserted.
--
-- @tparam string lcomment
-- @treturn int
local function commenteols(lcomment)
local sep = #match(lcomment, "^%-%-%[=*%[")
local z = sub(lcomment, sep + 1, -(sep - 1)) -- remove delims
local i, c = 1, 0
while true do
local p, _, r, s = find(z, "([\r\n])([\r\n]?)", i)
if not p then break end -- if no matches, done
i = p + 1
c = c + 1
if #s > 0 and r ~= s then -- skip CRLF or LFCR
i = i + 1
end
end
return c
end
--- Compares two tokens (i, j) and returns the whitespace required.
--
-- See documentation for a reference table of interactions.
--
-- Only two grammar/real tokens are being considered:
--
-- * if `""`, no separation is needed,
-- * if `" "`, then at least one whitespace (or EOL) is required.
--
-- Note: This doesn't work at the start or the end or for EOS!
--
-- @tparam int i
-- @tparam int j
-- @treturn string
local function checkpair(i, j)
local t1, t2 = stoks[i], stoks[j]
if t1 == "TK_STRING" or t1 == "TK_LSTRING" or
t2 == "TK_STRING" or t2 == "TK_LSTRING" then
return ""
elseif t1 == "TK_OP" or t2 == "TK_OP" then
if (t1 == "TK_OP" and (t2 == "TK_KEYWORD" or t2 == "TK_NAME")) or
(t2 == "TK_OP" and (t1 == "TK_KEYWORD" or t1 == "TK_NAME")) then
return ""
end
if t1 == "TK_OP" and t2 == "TK_OP" then
-- for TK_OP/TK_OP pairs, see notes in technotes.txt
local op, op2 = sinfos[i], sinfos[j]
if (match(op, "^%.%.?$") and match(op2, "^%.")) or
(match(op, "^[~=<>]$") and op2 == "=") or
(op == "[" and (op2 == "[" or op2 == "=")) then
return " "
end
return ""
end
-- "TK_OP" + "TK_NUMBER" case
local op = sinfos[i]
if t2 == "TK_OP" then op = sinfos[j] end
if match(op, "^%.%.?%.?$") then
return " "
end
return ""
else-- "TK_KEYWORD" | "TK_NAME" | "TK_NUMBER" then
return " "
end
end
--- Repack tokens, removing deletions caused by optimization process.
local function repack_tokens()
local dtoks, dinfos, dtoklns = {}, {}, {}
local j = 1
for i = 1, #stoks do
local tok = stoks[i]
if tok ~= "" then
dtoks[j], dinfos[j], dtoklns[j] = tok, sinfos[i], stoklns[i]
j = j + 1
end
end
stoks, sinfos, stoklns = dtoks, dinfos, dtoklns
end
--- Does number optimization.
--
-- Optimization using string formatting functions is one way of doing this,
-- but here, we consider all cases and handle them separately (possibly an
-- idiotic approach...).
--
-- Scientific notation being generated is not in canonical form, this may or
-- may not be a bad thing.
--
-- Note: Intermediate portions need to fit into a normal number range.
--
-- Optimizations can be divided based on number patterns:
--
-- * hexadecimal:
-- (1) no need to remove leading zeros, just skip to (2)
-- (2) convert to integer if size equal or smaller
-- * change if equal size -> lose the 'x' to reduce entropy
-- (3) number is then processed as an integer
-- (4) note: does not make 0[xX] consistent
-- * integer:
-- (1) reduce useless fractional part, if present, e.g. 123.000 -> 123.
-- (2) remove leading zeros, e.g. 000123
-- * float:
-- (1) split into digits dot digits
-- (2) if no integer portion, take as zero (can omit later)
-- (3) handle degenerate .000 case, after which the fractional part
-- must be non-zero (if zero, it's matched as float .0)
-- (4) remove trailing zeros for fractional portion
-- (5) p.q where p > 0 and q > 0 cannot be shortened any more
-- (6) otherwise p == 0 and the form is .q, e.g. .000123
-- (7) if scientific shorter, convert, e.g. .000123 -> 123e-6
-- * scientific:
-- (1) split into (digits dot digits) [eE] ([+-] digits)
-- (2) if significand is zero, just use .0
-- (3) remove leading zeros for significand
-- (4) shift out trailing zeros for significand
-- (5) examine exponent and determine which format is best:
-- number with fraction, or scientific
--
-- Note: Number with fraction and scientific number is never converted
-- to integer, because Lua 5.3 distinguishes between integers and floats.
--
--
-- @tparam int i
local function do_number(i)
local before = sinfos[i] -- 'before'
local z = before -- working representation
local y -- 'after', if better
--------------------------------------------------------------------
if match(z, "^0[xX]") then -- hexadecimal number
local v = tostring(tonumber(z))
if #v <= #z then
z = v -- change to integer, AND continue
else
return -- no change; stick to hex
end
end
if match(z, "^%d+$") then -- integer
if tonumber(z) > 0 then
y = match(z, "^0*([1-9]%d*)$") -- remove leading zeros
else
y = "0" -- basic zero
end
elseif not match(z, "[eE]") then -- float
local p, q = match(z, "^(%d*)%.(%d*)$") -- split
if p == "" then p = 0 end -- int part zero
if q == "" then q = "0" end -- fraction part zero
if tonumber(q) == 0 and p == 0 then
y = ".0" -- degenerate .000 to .0
else
-- now, q > 0 holds and p is a number
local zeros_cnt = #match(q, "0*$") -- remove trailing zeros
if zeros_cnt > 0 then
q = sub(q, 1, #q - zeros_cnt)
end
-- if p > 0, nothing else we can do to simplify p.q case
if tonumber(p) > 0 then
y = p.."."..q
else
y = "."..q -- tentative, e.g. .000123
local v = #match(q, "^0*") -- # leading spaces
local w = #q - v -- # significant digits
local nv = tostring(#q)
-- e.g. compare 123e-6 versus .000123
if w + 2 + #nv < 1 + #q then
y = sub(q, -w).."e-"..nv
end
end
end
else -- scientific number
local sig, ex = match(z, "^([^eE]+)[eE]([%+%-]?%d+)$")
ex = tonumber(ex)
-- if got ".", shift out fractional portion of significand
local p, q = match(sig, "^(%d*)%.(%d*)$")
if p then
ex = ex - #q
sig = p..q
end
if tonumber(sig) == 0 then
y = ".0" -- basic float zero
else
local v = #match(sig, "^0*") -- remove leading zeros
sig = sub(sig, v + 1)
v = #match(sig, "0*$") -- shift out trailing zeros
if v > 0 then
sig = sub(sig, 1, #sig - v)
ex = ex + v
end
-- examine exponent and determine which format is best
local nex = tostring(ex)
if ex >= 0 and (ex <= 1 + #nex) then -- a float
y = sig..rep("0", ex).."."
elseif ex < 0 and (ex >= -#sig) then -- fraction, e.g. .123
v = #sig + ex
y = sub(sig, 1, v).."."..sub(sig, v + 1)
elseif ex < 0 and (#nex >= -ex - #sig) then
-- e.g. compare 1234e-5 versus .01234
-- gives: #sig + 1 + #nex >= 1 + (-ex - #sig) + #sig
-- -> #nex >= -ex - #sig
v = -ex - #sig
y = "."..rep("0", v)..sig
else -- non-canonical scientific representation
y = sig.."e"..ex
end
end--if sig
end
if y and y ~= sinfos[i] then
if opt_details then
print("<number> (line "..stoklns[i]..") "..sinfos[i].." -> "..y)
opt_details = opt_details + 1
end
sinfos[i] = y
end
end
--- Does string optimization.
--
-- Note: It works on well-formed strings only!
--
-- Optimizations on characters can be summarized as follows:
--
-- \a\b\f\n\r\t\v -- no change
-- \\ -- no change
-- \"\' -- depends on delim, other can remove \
-- \[\] -- remove \
-- \<char> -- general escape, remove \ (Lua 5.1 only)
-- \<eol> -- normalize the EOL only
-- \ddd -- if \a\b\f\n\r\t\v, change to latter
-- if other < ascii 32, keep ddd but zap leading zeros
-- but cannot have following digits
-- if >= ascii 32, translate it into the literal, then also
-- do escapes for \\,\",\' cases
-- <other> -- no change
--
-- Switch delimiters if string becomes shorter.
--
-- @tparam int I
local function do_string(I)
local info = sinfos[I]
local delim = sub(info, 1, 1) -- delimiter used
local ndelim = (delim == "'") and '"' or "'" -- opposite " <-> '
local z = sub(info, 2, -2) -- actual string
local i = 1
local c_delim, c_ndelim = 0, 0 -- "/' counts
while i <= #z do
local c = sub(z, i, i)
if c == "\\" then -- escaped stuff
local j = i + 1
local d = sub(z, j, j)
local p = find("abfnrtv\\\n\r\"\'0123456789", d, 1, true)
if not p then -- \<char> -- remove \ (Lua 5.1 only)
z = sub(z, 1, i - 1)..sub(z, j)
i = i + 1
elseif p <= 8 then -- \a\b\f\n\r\t\v\\
i = i + 2 -- no change
elseif p <= 10 then -- \<eol> -- normalize EOL
local eol = sub(z, j, j + 1)
if eol == "\r\n" or eol == "\n\r" then
z = sub(z, 1, i).."\n"..sub(z, j + 2)
elseif p == 10 then -- \r case
z = sub(z, 1, i).."\n"..sub(z, j + 1)
end
i = i + 2
elseif p <= 12 then -- \"\' -- remove \ for ndelim
if d == delim then
c_delim = c_delim + 1
i = i + 2
else
c_ndelim = c_ndelim + 1
z = sub(z, 1, i - 1)..sub(z, j)
i = i + 1
end
else -- \ddd -- various steps
local s = match(z, "^(%d%d?%d?)", j)
j = i + 1 + #s -- skip to location
local cv = tonumber(s)
local cc = char(cv)
p = find("\a\b\f\n\r\t\v", cc, 1, true)
if p then -- special escapes
s = "\\"..sub("abfnrtv", p, p)
elseif cv < 32 then -- normalized \ddd
if match(sub(z, j, j), "%d") then
-- if a digit follows, \ddd cannot be shortened
s = "\\"..s
else
s = "\\"..cv
end
elseif cc == delim then -- \<delim>
s = "\\"..cc
c_delim = c_delim + 1
elseif cc == "\\" then -- \\
s = "\\\\"
else -- literal character
s = cc
if cc == ndelim then
c_ndelim = c_ndelim + 1
end
end
z = sub(z, 1, i - 1)..s..sub(z, j)
i = i + #s
end--if p
else-- c ~= "\\" -- <other> -- no change
i = i + 1
if c == ndelim then -- count ndelim, for switching delimiters
c_ndelim = c_ndelim + 1
end
end--if c
end--while
-- Switching delimiters, a long-winded derivation:
-- (1) delim takes 2+2*c_delim bytes, ndelim takes c_ndelim bytes
-- (2) delim becomes c_delim bytes, ndelim becomes 2+2*c_ndelim bytes
-- simplifying the condition (1)>(2) --> c_delim > c_ndelim
if c_delim > c_ndelim then
i = 1
while i <= #z do
local p, _, r = find(z, "([\'\"])", i)
if not p then break end
if r == delim then -- \<delim> -> <delim>
z = sub(z, 1, p - 2)..sub(z, p)
i = p
else-- r == ndelim -- <ndelim> -> \<ndelim>
z = sub(z, 1, p - 1).."\\"..sub(z, p)
i = p + 2
end
end--while
delim = ndelim -- actually change delimiters
end
z = delim..z..delim
if z ~= sinfos[I] then
if opt_details then
print("<string> (line "..stoklns[I]..") "..sinfos[I].." -> "..z)
opt_details = opt_details + 1
end
sinfos[I] = z
end
end
--- Does long string optimization.
--
-- * remove first optional newline
-- * normalize embedded newlines
-- * reduce '=' separators in delimiters if possible
--
-- Note: warning flagged if trailing whitespace found, not trimmed.
--
-- @tparam int I
local function do_lstring(I)
local info = sinfos[I]
local delim1 = match(info, "^%[=*%[") -- cut out delimiters
local sep = #delim1
local delim2 = sub(info, -sep, -1)
local z = sub(info, sep + 1, -(sep + 1)) -- lstring without delims
local y = ""
local i = 1
while true do
local p, _, r, s = find(z, "([\r\n])([\r\n]?)", i)
-- deal with a single line
local ln
if not p then
ln = sub(z, i)
elseif p >= i then
ln = sub(z, i, p - 1)
end
if ln ~= "" then
-- flag a warning if there are trailing spaces, won't optimize!
if match(ln, "%s+$") then
M.warn.LSTRING = "trailing whitespace in long string near line "..stoklns[I]
end
y = y..ln
end
if not p then -- done if no more EOLs
break
end
-- deal with line endings, normalize them
i = p + 1
if p then
if #s > 0 and r ~= s then -- skip CRLF or LFCR
i = i + 1
end
-- skip first newline, which can be safely deleted
if not(i == 1 and i == p) then
y = y.."\n"
end
end
end--while
-- handle possible deletion of one or more '=' separators
if sep >= 3 then
local chk, okay = sep - 1
-- loop to test ending delimiter with less of '=' down to zero
while chk >= 2 do
local delim = "%]"..rep("=", chk - 2).."%]"
if not match(y, delim) then okay = chk end
chk = chk - 1
end
if okay then -- change delimiters
sep = rep("=", okay - 2)
delim1, delim2 = "["..sep.."[", "]"..sep.."]"
end
end
sinfos[I] = delim1..y..delim2
end
--- Does long comment optimization.
--
-- * trim trailing whitespace
-- * normalize embedded newlines
-- * reduce '=' separators in delimiters if possible
--
-- Note: It does not remove first optional newline.
--
-- @tparam int I
local function do_lcomment(I)
local info = sinfos[I]
local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters
local sep = #delim1
local delim2 = sub(info, -(sep - 2), -1)
local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims
local y = ""
local i = 1
while true do
local p, _, r, s = find(z, "([\r\n])([\r\n]?)", i)
-- deal with a single line, extract and check trailing whitespace
local ln
if not p then
ln = sub(z, i)
elseif p >= i then
ln = sub(z, i, p - 1)
end
if ln ~= "" then
-- trim trailing whitespace if non-empty line
local ws = match(ln, "%s*$")
if #ws > 0 then ln = sub(ln, 1, -(ws + 1)) end
y = y..ln
end
if not p then -- done if no more EOLs
break
end
-- deal with line endings, normalize them
i = p + 1
if p then
if #s > 0 and r ~= s then -- skip CRLF or LFCR
i = i + 1
end
y = y.."\n"
end
end--while
-- handle possible deletion of one or more '=' separators
sep = sep - 2
if sep >= 3 then
local chk, okay = sep - 1
-- loop to test ending delimiter with less of '=' down to zero
while chk >= 2 do
local delim = "%]"..rep("=", chk - 2).."%]"
if not match(y, delim) then okay = chk end
chk = chk - 1
end
if okay then -- change delimiters
sep = rep("=", okay - 2)
delim1, delim2 = "--["..sep.."[", "]"..sep.."]"
end
end
sinfos[I] = delim1..y..delim2
end
--- Does short comment optimization.
--
-- * trim trailing whitespace
--
-- @tparam int i
local function do_comment(i)
local info = sinfos[i]
local ws = match(info, "%s*$") -- just look from end of string
if #ws > 0 then
info = sub(info, 1, -(ws + 1)) -- trim trailing whitespace
end
sinfos[i] = info
end
--- Returns true if string found in long comment.
--
-- This is a feature to keep copyright or license texts.
--
-- @tparam bool opt_keep
-- @tparam string info
-- @treturn bool
local function keep_lcomment(opt_keep, info)
if not opt_keep then return false end -- option not set
local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters
local sep = #delim1
local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims
if find(z, opt_keep, 1, true) then -- try to match
return true
end
end
--- The main entry point.
--
-- * currently, lexer processing has 2 passes
-- * processing is done on a line-oriented basis, which is easier to
-- grok due to the next point...
-- * since there are various options that can be enabled or disabled,
-- processing is a little messy or convoluted
--
-- @tparam {[string]=bool,...} option
-- @tparam {string,...} toklist
-- @tparam {string,...} semlist
-- @tparam {int,...} toklnlist
-- @treturn {string,...} toklist
-- @treturn {string,...} semlist
-- @treturn {int,...} toklnlist
function M.optimize(option, toklist, semlist, toklnlist)
-- Set option flags.
local opt_comments = option["opt-comments"]
local opt_whitespace = option["opt-whitespace"]
local opt_emptylines = option["opt-emptylines"]
local opt_eols = option["opt-eols"]
local opt_strings = option["opt-strings"]
local opt_numbers = option["opt-numbers"]
local opt_x = option["opt-experimental"]
local opt_keep = option.KEEP
opt_details = option.DETAILS and 0 -- upvalues for details display
print = M.print or _G.print
if opt_eols then -- forced settings, otherwise won't work properly
opt_comments = true
opt_whitespace = true
opt_emptylines = true
elseif opt_x then
opt_whitespace = true
end
-- Variable initialization.
stoks, sinfos, stoklns -- set source lists
= toklist, semlist, toklnlist
local i = 1 -- token position
local tok, info -- current token
local prev -- position of last grammar token
-- on same line (for TK_SPACE stuff)
-- Changes a token, info pair.
local function settoken(tok, info, I) --luacheck: ignore 431
I = I or i
stoks[I] = tok or ""
sinfos[I] = info or ""
end
-- Experimental optimization for ';' operator.
if opt_x then
while true do
tok, info = stoks[i], sinfos[i]
if tok == "TK_EOS" then -- end of stream/pass
break
elseif tok == "TK_OP" and info == ";" then
-- ';' operator found, since it is entirely optional, set it
-- as a space to let whitespace optimization do the rest
settoken("TK_SPACE", " ")
end
i = i + 1
end
repack_tokens()
end
-- Processing loop (PASS 1)
i = 1
while true do
tok, info = stoks[i], sinfos[i]
local atstart = atlinestart(i) -- set line begin flag
if atstart then prev = nil end
if tok == "TK_EOS" then -- end of stream/pass
break
elseif tok == "TK_KEYWORD" or -- keywords, identifiers,
tok == "TK_NAME" or -- operators
tok == "TK_OP" then
-- TK_KEYWORD and TK_OP can't be optimized without a big
-- optimization framework; it would be more of an optimizing
-- compiler, not a source code compressor
-- TK_NAME that are locals needs parser to analyze/optimize
prev = i
elseif tok == "TK_NUMBER" then -- numbers
if opt_numbers then
do_number(i) -- optimize
end
prev = i
elseif tok == "TK_STRING" or -- strings, long strings
tok == "TK_LSTRING" then
if opt_strings then
if tok == "TK_STRING" then
do_string(i) -- optimize
else
do_lstring(i) -- optimize
end
end
prev = i
elseif tok == "TK_COMMENT" then -- short comments
if opt_comments then
if i == 1 and sub(info, 1, 1) == "#" then
-- keep shbang comment, trim whitespace
do_comment(i)
else
-- safe to delete, as a TK_EOL (or TK_EOS) always follows
settoken() -- remove entirely
end
elseif opt_whitespace then -- trim whitespace only
do_comment(i)
end
elseif tok == "TK_LCOMMENT" then -- long comments
if keep_lcomment(opt_keep, info) then
-- if --keep, we keep a long comment if <msg> is found;
-- this is a feature to keep copyright or license texts
if opt_whitespace then -- trim whitespace only
do_lcomment(i)
end
prev = i
elseif opt_comments then
local eols = commenteols(info)
-- prepare opt_emptylines case first, if a disposable token
-- follows, current one is safe to dump, else keep a space;
-- it is implied that the operation is safe for '-', because
-- current is a TK_LCOMMENT, and must be separate from a '-'
if is_faketoken[stoks[i + 1]] then
settoken() -- remove entirely
tok = ""
else
settoken("TK_SPACE", " ")
end
-- if there are embedded EOLs to keep and opt_emptylines is
-- disabled, then switch the token into one or more EOLs
if not opt_emptylines and eols > 0 then
settoken("TK_EOL", rep("\n", eols))
end
-- if optimizing whitespaces, force reinterpretation of the
-- token to give a chance for the space to be optimized away
if opt_whitespace and tok ~= "" then
i = i - 1 -- to reinterpret
end
else -- disabled case
if opt_whitespace then -- trim whitespace only
do_lcomment(i)
end
prev = i
end
elseif tok == "TK_EOL" then -- line endings
if atstart and opt_emptylines then
settoken() -- remove entirely
elseif info == "\r\n" or info == "\n\r" then
-- normalize the rest of the EOLs for CRLF/LFCR only
-- (note that TK_LCOMMENT can change into several EOLs)
settoken("TK_EOL", "\n")
end
elseif tok == "TK_SPACE" then -- whitespace
if opt_whitespace then
if atstart or atlineend(i) then
-- delete leading and trailing whitespace
settoken() -- remove entirely
else
-- at this point, since leading whitespace have been removed,
-- there should be a either a real token or a TK_LCOMMENT
-- prior to hitting this whitespace; the TK_LCOMMENT case
-- only happens if opt_comments is disabled; so prev ~= nil
local ptok = stoks[prev]
if ptok == "TK_LCOMMENT" then
-- previous TK_LCOMMENT can abut with anything
settoken() -- remove entirely
else
-- prev must be a grammar token; consecutive TK_SPACE
-- tokens is impossible when optimizing whitespace
local ntok = stoks[i + 1]
if is_faketoken[ntok] then
-- handle special case where a '-' cannot abut with
-- either a short comment or a long comment
if (ntok == "TK_COMMENT" or ntok == "TK_LCOMMENT") and
ptok == "TK_OP" and sinfos[prev] == "-" then
-- keep token
else
settoken() -- remove entirely
end
else--is_realtoken
-- check a pair of grammar tokens, if can abut, then
-- delete space token entirely, otherwise keep one space
local s = checkpair(prev, i + 1)
if s == "" then
settoken() -- remove entirely
else
settoken("TK_SPACE", " ")
end
end
end
end
end
else
error("unidentified token encountered")
end
i = i + 1
end--while
repack_tokens()
-- Processing loop (PASS 2)
if opt_eols then
i = 1
-- Aggressive EOL removal only works with most non-grammar tokens
-- optimized away because it is a rather simple scheme -- basically
-- it just checks 'real' token pairs around EOLs.
if stoks[1] == "TK_COMMENT" then
-- first comment still existing must be shbang, skip whole line
i = 3
end
while true do
tok = stoks[i]
if tok == "TK_EOS" then -- end of stream/pass
break
elseif tok == "TK_EOL" then -- consider each TK_EOL
local t1, t2 = stoks[i - 1], stoks[i + 1]
if is_realtoken[t1] and is_realtoken[t2] then -- sanity check
local s = checkpair(i - 1, i + 1)
if s == "" or t2 == "TK_EOS" then
settoken() -- remove entirely
end
end
end--if tok
i = i + 1
end--while
repack_tokens()
end
if opt_details and opt_details > 0 then print() end -- spacing
return stoks, sinfos, stoklns
end
return M

View File

@ -0,0 +1,644 @@
---------
-- This module does parser-based optimizations.
--
-- **Notes:**
--
-- * The processing load is quite significant, but since this is an
-- off-line text processor, I believe we can wait a few seconds.
-- * TODO: Might process "local a,a,a" wrongly... need tests!
-- * TODO: Remove position handling if overlapped locals (rem < 0)
-- needs more study, to check behaviour.
-- * TODO: There are probably better ways to do allocation, e.g. by
-- choosing better methods to sort and pick locals...
-- * TODO: We don't need 53*63 two-letter identifiers; we can make
-- do with significantly less depending on how many that are really
-- needed and improve entropy; e.g. 13 needed -> choose 4*4 instead.
----
local byte = string.byte
local char = string.char
local concat = table.concat
local fmt = string.format
local pairs = pairs
local rep = string.rep
local sort = table.sort
local sub = string.sub
local M = {}
-- Letter frequencies for reducing symbol entropy (fixed version)
-- * Might help a wee bit when the output file is compressed
-- * See Wikipedia: http://en.wikipedia.org/wiki/Letter_frequencies
-- * We use letter frequencies according to a Linotype keyboard, plus
-- the underscore, and both lower case and upper case letters.
-- * The arrangement below (LC, underscore, %d, UC) is arbitrary.
-- * This is certainly not optimal, but is quick-and-dirty and the
-- process has no significant overhead
local LETTERS = "etaoinshrdlucmfwypvbgkqjxz_ETAOINSHRDLUCMFWYPVBGKQJXZ"
local ALPHANUM = "etaoinshrdlucmfwypvbgkqjxz_0123456789ETAOINSHRDLUCMFWYPVBGKQJXZ"
-- Names or identifiers that must be skipped.
-- (The first two lines are for keywords.)
local SKIP_NAME = {}
for v in ([[
and break do else elseif end false for function if in
local nil not or repeat return then true until while
self _ENV]]):gmatch("%S+") do
SKIP_NAME[v] = true
end
local toklist, seminfolist, -- token lists (lexer output)
tokpar, seminfopar, xrefpar, -- token lists (parser output)
globalinfo, localinfo, -- variable information tables
statinfo, -- statment type table
globaluniq, localuniq, -- unique name tables
var_new, -- index of new variable names
varlist -- list of output variables
--- Preprocesses information table to get lists of unique names.
--
-- @tparam {table,...} infotable
-- @treturn table
local function preprocess(infotable)
local uniqtable = {}
for i = 1, #infotable do -- enumerate info table
local obj = infotable[i]
local name = obj.name
if not uniqtable[name] then -- not found, start an entry
uniqtable[name] = {
decl = 0, token = 0, size = 0,
}
end
local uniq = uniqtable[name] -- count declarations, tokens, size
uniq.decl = uniq.decl + 1
local xref = obj.xref
local xcount = #xref
uniq.token = uniq.token + xcount
uniq.size = uniq.size + xcount * #name
if obj.decl then -- if local table, create first,last pairs
obj.id = i
obj.xcount = xcount
if xcount > 1 then -- if ==1, means local never accessed
obj.first = xref[2]
obj.last = xref[xcount]
end
else -- if global table, add a back ref
uniq.id = i
end
end--for
return uniqtable
end
--- Calculates actual symbol frequencies, in order to reduce entropy.
--
-- * This may help further reduce the size of compressed sources.
-- * Note that since parsing optimizations is put before lexing
-- optimizations, the frequency table is not exact!
-- * Yes, this will miss --keep block comments too...
--
-- @tparam table option
local function recalc_for_entropy(option)
-- table of token classes to accept in calculating symbol frequency
local ACCEPT = {
TK_KEYWORD = true, TK_NAME = true, TK_NUMBER = true,
TK_STRING = true, TK_LSTRING = true,
}
if not option["opt-comments"] then
ACCEPT.TK_COMMENT = true
ACCEPT.TK_LCOMMENT = true
end
-- Create a new table and remove any original locals by filtering.
local filtered = {}
for i = 1, #toklist do
filtered[i] = seminfolist[i]
end
for i = 1, #localinfo do -- enumerate local info table
local obj = localinfo[i]
local xref = obj.xref
for j = 1, obj.xcount do
local p = xref[j]
filtered[p] = "" -- remove locals
end
end
local freq = {} -- reset symbol frequency table
for i = 0, 255 do freq[i] = 0 end
for i = 1, #toklist do -- gather symbol frequency
local tok, info = toklist[i], filtered[i]
if ACCEPT[tok] then
for j = 1, #info do
local c = byte(info, j)
freq[c] = freq[c] + 1
end
end--if
end--for
-- Re-sorts symbols according to actual frequencies.
--
-- @tparam string symbols
-- @treturn string
local function resort(symbols)
local symlist = {}
for i = 1, #symbols do -- prepare table to sort
local c = byte(symbols, i)
symlist[i] = { c = c, freq = freq[c], }
end
sort(symlist, function(v1, v2) -- sort selected symbols
return v1.freq > v2.freq
end)
local charlist = {} -- reconstitute the string
for i = 1, #symlist do
charlist[i] = char(symlist[i].c)
end
return concat(charlist)
end
LETTERS = resort(LETTERS) -- change letter arrangement
ALPHANUM = resort(ALPHANUM)
end
--- Returns a string containing a new local variable name to use, and
-- a flag indicating whether it collides with a global variable.
--
-- Trapping keywords and other names like 'self' is done elsewhere.
--
-- @treturn string A new local variable name.
-- @treturn bool Whether the name collides with a global variable.
local function new_var_name()
local var
local cletters, calphanum = #LETTERS, #ALPHANUM
local v = var_new
if v < cletters then -- single char
v = v + 1
var = sub(LETTERS, v, v)
else -- longer names
local range, sz = cletters, 1 -- calculate # chars fit
repeat
v = v - range
range = range * calphanum
sz = sz + 1
until range > v
local n = v % cletters -- left side cycles faster
v = (v - n) / cletters -- do first char first
n = n + 1
var = sub(LETTERS, n, n)
while sz > 1 do
local m = v % calphanum
v = (v - m) / calphanum
m = m + 1
var = var..sub(ALPHANUM, m, m)
sz = sz - 1
end
end
var_new = var_new + 1
return var, globaluniq[var] ~= nil
end
--- Calculates and prints some statistics.
--
-- Note: probably better in main source, put here for now.
--
-- @tparam table globaluniq
-- @tparam table localuniq
-- @tparam table afteruniq
-- @tparam table option
local function stats_summary(globaluniq, localuniq, afteruniq, option) --luacheck: ignore 431
local print = M.print or print
local opt_details = option.DETAILS
if option.QUIET then return end
local uniq_g , uniq_li, uniq_lo = 0, 0, 0
local decl_g, decl_li, decl_lo = 0, 0, 0
local token_g, token_li, token_lo = 0, 0, 0
local size_g, size_li, size_lo = 0, 0, 0
local function avg(c, l) -- safe average function
if c == 0 then return 0 end
return l / c
end
-- Collect statistics (Note: globals do not have declarations!)
for _, uniq in pairs(globaluniq) do
uniq_g = uniq_g + 1
token_g = token_g + uniq.token
size_g = size_g + uniq.size
end
for _, uniq in pairs(localuniq) do
uniq_li = uniq_li + 1
decl_li = decl_li + uniq.decl
token_li = token_li + uniq.token
size_li = size_li + uniq.size
end
for _, uniq in pairs(afteruniq) do
uniq_lo = uniq_lo + 1
decl_lo = decl_lo + uniq.decl
token_lo = token_lo + uniq.token
size_lo = size_lo + uniq.size
end
local uniq_ti = uniq_g + uniq_li
local decl_ti = decl_g + decl_li
local token_ti = token_g + token_li
local size_ti = size_g + size_li
local uniq_to = uniq_g + uniq_lo
local decl_to = decl_g + decl_lo
local token_to = token_g + token_lo
local size_to = size_g + size_lo
-- Detailed stats: global list
if opt_details then
local sorted = {} -- sort table of unique global names by size
for name, uniq in pairs(globaluniq) do
uniq.name = name
sorted[#sorted + 1] = uniq
end
sort(sorted, function(v1, v2)
return v1.size > v2.size
end)
do
local tabf1, tabf2 = "%8s%8s%10s %s", "%8d%8d%10.2f %s"
local hl = rep("-", 44)
print("*** global variable list (sorted by size) ***\n"..hl)
print(fmt(tabf1, "Token", "Input", "Input", "Global"))
print(fmt(tabf1, "Count", "Bytes", "Average", "Name"))
print(hl)
for i = 1, #sorted do
local uniq = sorted[i]
print(fmt(tabf2, uniq.token, uniq.size, avg(uniq.token, uniq.size), uniq.name))
end
print(hl)
print(fmt(tabf2, token_g, size_g, avg(token_g, size_g), "TOTAL"))
print(hl.."\n")
end
-- Detailed stats: local list
do
local tabf1, tabf2 = "%8s%8s%8s%10s%8s%10s %s", "%8d%8d%8d%10.2f%8d%10.2f %s"
local hl = rep("-", 70)
print("*** local variable list (sorted by allocation order) ***\n"..hl)
print(fmt(tabf1, "Decl.", "Token", "Input", "Input", "Output", "Output", "Global"))
print(fmt(tabf1, "Count", "Count", "Bytes", "Average", "Bytes", "Average", "Name"))
print(hl)
for i = 1, #varlist do -- iterate according to order assigned
local name = varlist[i]
local uniq = afteruniq[name]
local old_t, old_s = 0, 0
for j = 1, #localinfo do -- find corresponding old names and calculate
local obj = localinfo[j]
if obj.name == name then
old_t = old_t + obj.xcount
old_s = old_s + obj.xcount * #obj.oldname
end
end
print(fmt(tabf2, uniq.decl, uniq.token, old_s, avg(old_t, old_s),
uniq.size, avg(uniq.token, uniq.size), name))
end
print(hl)
print(fmt(tabf2, decl_lo, token_lo, size_li, avg(token_li, size_li),
size_lo, avg(token_lo, size_lo), "TOTAL"))
print(hl.."\n")
end
end--if opt_details
-- Display output
do
local tabf1, tabf2 = "%-16s%8s%8s%8s%8s%10s", "%-16s%8d%8d%8d%8d%10.2f"
local hl = rep("-", 58)
print("*** local variable optimization summary ***\n"..hl)
print(fmt(tabf1, "Variable", "Unique", "Decl.", "Token", "Size", "Average"))
print(fmt(tabf1, "Types", "Names", "Count", "Count", "Bytes", "Bytes"))
print(hl)
print(fmt(tabf2, "Global", uniq_g, decl_g, token_g, size_g, avg(token_g, size_g)))
print(hl)
print(fmt(tabf2, "Local (in)", uniq_li, decl_li, token_li, size_li, avg(token_li, size_li)))
print(fmt(tabf2, "TOTAL (in)", uniq_ti, decl_ti, token_ti, size_ti, avg(token_ti, size_ti)))
print(hl)
print(fmt(tabf2, "Local (out)", uniq_lo, decl_lo, token_lo, size_lo, avg(token_lo, size_lo)))
print(fmt(tabf2, "TOTAL (out)", uniq_to, decl_to, token_to, size_to, avg(token_to, size_to)))
print(hl.."\n")
end
end
--- Does experimental optimization for f("string") statements.
--
-- It's safe to delete parentheses without adding whitespace, as both
-- kinds of strings can abut with anything else.
local function optimize_func1()
local function is_strcall(j) -- find f("string") pattern
local t1 = tokpar[j + 1] or ""
local t2 = tokpar[j + 2] or ""
local t3 = tokpar[j + 3] or ""
if t1 == "(" and t2 == "<string>" and t3 == ")" then
return true
end
end
local del_list = {} -- scan for function pattern,
local i = 1 -- tokens to be deleted are marked
while i <= #tokpar do
local id = statinfo[i]
if id == "call" and is_strcall(i) then -- found & mark ()
del_list[i + 1] = true -- '('
del_list[i + 3] = true -- ')'
i = i + 3
end
i = i + 1
end
-- Delete a token and adjust all relevant tables.
-- * Currently invalidates globalinfo and localinfo (not updated),
-- so any other optimization is done after processing locals
-- (of course, we can also lex the source data again...).
-- * Faster one-pass token deletion.
local del_list2 = {}
do
local i, dst, idend = 1, 1, #tokpar
while dst <= idend do -- process parser tables
if del_list[i] then -- found a token to delete?
del_list2[xrefpar[i]] = true
i = i + 1
end
if i > dst then
if i <= idend then -- shift table items lower
tokpar[dst] = tokpar[i]
seminfopar[dst] = seminfopar[i]
xrefpar[dst] = xrefpar[i] - (i - dst)
statinfo[dst] = statinfo[i]
else -- nil out excess entries
tokpar[dst] = nil
seminfopar[dst] = nil
xrefpar[dst] = nil
statinfo[dst] = nil
end
end
i = i + 1
dst = dst + 1
end
end
do
local i, dst, idend = 1, 1, #toklist
while dst <= idend do -- process lexer tables
if del_list2[i] then -- found a token to delete?
i = i + 1
end
if i > dst then
if i <= idend then -- shift table items lower
toklist[dst] = toklist[i]
seminfolist[dst] = seminfolist[i]
else -- nil out excess entries
toklist[dst] = nil
seminfolist[dst] = nil
end
end
i = i + 1
dst = dst + 1
end
end
end
--- Does local variable optimization.
--
-- @tparam {[string]=bool,...} option
local function optimize_locals(option)
var_new = 0 -- reset variable name allocator
varlist = {}
-- Preprocess global/local tables, handle entropy reduction.
globaluniq = preprocess(globalinfo)
localuniq = preprocess(localinfo)
if option["opt-entropy"] then -- for entropy improvement
recalc_for_entropy(option)
end
-- Build initial declared object table, then sort according to
-- token count, this might help assign more tokens to more common
-- variable names such as 'e' thus possibly reducing entropy.
-- * An object knows its localinfo index via its 'id' field.
-- * Special handling for "self" and "_ENV" special local (parameter) here.
local object = {}
for i = 1, #localinfo do
object[i] = localinfo[i]
end
sort(object, function(v1, v2) -- sort largest first
return v1.xcount > v2.xcount
end)
-- The special "self" and "_ENV" function parameters must be preserved.
-- * The allocator below will never use "self", so it is safe to
-- keep those implicit declarations as-is.
local temp, j, used_specials = {}, 1, {}
for i = 1, #object do
local obj = object[i]
if not obj.is_special then
temp[j] = obj
j = j + 1
else
used_specials[#used_specials + 1] = obj.name
end
end
object = temp
-- A simple first-come first-served heuristic name allocator,
-- note that this is in no way optimal...
-- * Each object is a local variable declaration plus existence.
-- * The aim is to assign short names to as many tokens as possible,
-- so the following tries to maximize name reuse.
-- * Note that we preserve sort order.
local nobject = #object
while nobject > 0 do
local varname, gcollide
repeat
varname, gcollide = new_var_name() -- collect a variable name
until not SKIP_NAME[varname] -- skip all special names
varlist[#varlist + 1] = varname -- keep a list
local oleft = nobject
-- If variable name collides with an existing global, the name
-- cannot be used by a local when the name is accessed as a global
-- during which the local is alive (between 'act' to 'rem'), so
-- we drop objects that collides with the corresponding global.
if gcollide then
-- find the xref table of the global
local gref = globalinfo[globaluniq[varname].id].xref
local ngref = #gref
-- enumerate for all current objects; all are valid at this point
for i = 1, nobject do
local obj = object[i]
local act, rem = obj.act, obj.rem -- 'live' range of local
-- if rem < 0, it is a -id to a local that had the same name
-- so follow rem to extend it; does this make sense?
while rem < 0 do
rem = localinfo[-rem].rem
end
local drop
for j = 1, ngref do
local p = gref[j]
if p >= act and p <= rem then drop = true end -- in range?
end
if drop then
obj.skip = true
oleft = oleft - 1
end
end--for
end--if gcollide
-- Now the first unassigned local (since it's sorted) will be the
-- one with the most tokens to rename, so we set this one and then
-- eliminate all others that collides, then any locals that left
-- can then reuse the same variable name; this is repeated until
-- all local declaration that can use this name is assigned.
--
-- The criteria for local-local reuse/collision is:
-- A is the local with a name already assigned
-- B is the unassigned local under consideration
-- => anytime A is accessed, it cannot be when B is 'live'
-- => to speed up things, we have first/last accesses noted
while oleft > 0 do
local i = 1
while object[i].skip do -- scan for first object
i = i + 1
end
-- First object is free for assignment of the variable name
-- [first,last] gives the access range for collision checking.
oleft = oleft - 1
local obja = object[i]
i = i + 1
obja.newname = varname
obja.skip = true
obja.done = true
local first, last = obja.first, obja.last
local xref = obja.xref
-- Then, scan all the rest and drop those colliding.
-- If A was never accessed then it'll never collide with anything
-- otherwise trivial skip if:
-- * B was activated after A's last access (last < act),
-- * B was removed before A's first access (first > rem),
-- if not, see detailed skip below...
if first and oleft > 0 then -- must have at least 1 access
local scanleft = oleft
while scanleft > 0 do
while object[i].skip do -- next valid object
i = i + 1
end
scanleft = scanleft - 1
local objb = object[i]
i = i + 1
local act, rem = objb.act, objb.rem -- live range of B
-- if rem < 0, extend range of rem thru' following local
while rem < 0 do
rem = localinfo[-rem].rem
end
if not(last < act or first > rem) then -- possible collision
-- B is activated later than A or at the same statement,
-- this means for no collision, A cannot be accessed when B
-- is alive, since B overrides A (or is a peer).
if act >= obja.act then
for j = 1, obja.xcount do -- ... then check every access
local p = xref[j]
if p >= act and p <= rem then -- A accessed when B live!
oleft = oleft - 1
objb.skip = true
break
end
end--for
-- A is activated later than B, this means for no collision,
-- A's access is okay since it overrides B, but B's last
-- access need to be earlier than A's activation time.
else
if objb.last and objb.last >= obja.act then
oleft = oleft - 1
objb.skip = true
end
end
end
if oleft == 0 then break end
end
end--if first
end--while
-- After assigning all possible locals to one variable name, the
-- unassigned locals/objects have the skip field reset and the table
-- is compacted, to hopefully reduce iteration time.
local temp, j = {}, 1
for i = 1, nobject do
local obj = object[i]
if not obj.done then
obj.skip = false
temp[j] = obj
j = j + 1
end
end
object = temp -- new compacted object table
nobject = #object -- objects left to process
end--while
-- After assigning all locals with new variable names, we can
-- patch in the new names, and reprocess to get 'after' stats.
for i = 1, #localinfo do -- enumerate all locals
local obj = localinfo[i]
local xref = obj.xref
if obj.newname then -- if got new name, patch it in
for j = 1, obj.xcount do
local p = xref[j] -- xrefs indexes the token list
seminfolist[p] = obj.newname
end
obj.name, obj.oldname -- adjust names
= obj.newname, obj.name
else
obj.oldname = obj.name -- for cases like 'self'
end
end
-- Deal with statistics output.
for _, name in ipairs(used_specials) do
varlist[#varlist + 1] = name
end
local afteruniq = preprocess(localinfo)
stats_summary(globaluniq, localuniq, afteruniq, option)
end
--- The main entry point.
--
-- @tparam table option
-- @tparam {string,...} _toklist
-- @tparam {string,...} _seminfolist
-- @tparam table xinfo
function M.optimize(option, _toklist, _seminfolist, xinfo)
-- set tables
toklist, seminfolist -- from lexer
= _toklist, _seminfolist
tokpar, seminfopar, xrefpar -- from parser
= xinfo.toklist, xinfo.seminfolist, xinfo.xreflist
globalinfo, localinfo, statinfo -- from parser
= xinfo.globalinfo, xinfo.localinfo, xinfo.statinfo
-- Optimize locals.
if option["opt-locals"] then
optimize_locals(option)
end
-- Other optimizations.
if option["opt-experimental"] then -- experimental
optimize_func1()
-- WARNING globalinfo and localinfo now invalidated!
end
end
return M

View File

@ -0,0 +1,90 @@
---------
-- Example of a plugin for LuaSrcDiet.
--
-- WARNING: highly experimental! interface liable to change
--
-- **Notes:**
--
-- * Any function can be omitted and LuaSrcDiet won't call it.
-- * The functions are:
-- (1) init(_option, _srcfl, _destfl)
-- (2) post_load(z) can return z
-- (3) post_lex(toklist, seminfolist, toklnlist)
-- (4) post_parse(globalinfo, localinfo)
-- (5) post_optparse()
-- (6) post_optlex(toklist, seminfolist, toklnlist)
-- * Older tables can be copied and kept in the plugin and used later.
-- * If you modify 'option', remember that LuaSrcDiet might be
-- processing more than one file.
-- * Arrangement of the functions is not final!
-- * TODO: can't process additional options from command line yet
----
local M = {}
local option -- local reference to list of options
local srcfl, destfl -- filenames
local old_quiet
local function print(...) -- handle quiet option
if option.QUIET then return end
_G.print(...)
end
--- Initialization.
--
-- @tparam {[string]=bool,...} _option
-- @tparam string _srcfl Path of the source file.
-- @tparam string _destfl Path of the destination file.
function M.init(_option, _srcfl, _destfl)
option = _option
srcfl, destfl = _srcfl, _destfl
-- plugin can impose its own option starting from here
end
--- Message display, post-load processing, can return z.
function M.post_load(z)
-- this message will print after the LuaSrcDiet title message
print([[
Example plugin module for LuaSrcDiet
]])
print("Example: source file name is '"..srcfl.."'")
print("Example: destination file name is '"..destfl.."'")
print("Example: the size of the source file is "..#z.." bytes")
-- returning z is optional; this allows optional replacement of
-- the source data prior to lexing
return z
end
--- Post-lexing processing, can work on lexer table output.
function M.post_lex(toklist, seminfolist, toklnlist) --luacheck: ignore
print("Example: the number of lexed elements is "..#toklist)
end
--- Post-parsing processing, gives globalinfo, localinfo.
function M.post_parse(globalinfo, localinfo)
print("Example: size of globalinfo is "..#globalinfo)
print("Example: size of localinfo is "..#localinfo)
old_quiet = option.QUIET
option.QUIET = true
end
--- Post-parser optimization processing, can get tables from elsewhere.
function M.post_optparse()
option.QUIET = old_quiet
print("Example: pretend to do post-optparse")
end
--- Post-lexer optimization processing, can get tables from elsewhere.
function M.post_optlex(toklist, seminfolist, toklnlist) --luacheck: ignore
print("Example: pretend to do post-optlex")
-- restore old settings, other file might need original settings
option.QUIET = old_quiet
-- option.EXIT can be set at the end of any post_* function to stop
-- further processing and exit for the current file being worked on
-- in this case, final stats printout is disabled and the output will
-- not be written to the destination file
option.EXIT = true
end
return M

View File

@ -0,0 +1,177 @@
---------
-- Turns Lua 5.1 source code into HTML files.
--
-- WARNING: highly experimental! interface liable to change
--
-- **Notes:**
--
-- * This HTML highlighter marks globals brightly so that their usage
-- can be manually optimized.
-- * Either uses a .html extension for output files or it follows the
-- -o <filespec> option.
-- * The HTML style tries to follow that of the Lua wiki.
----
local fs = require "luasrcdiet.fs"
local concat = table.concat
local find = string.find
local fmt = string.format
local sub = string.sub
local M = {}
local HTML_EXT = ".html"
local ENTITIES = {
["&"] = "&amp;", ["<"] = "&lt;", [">"] = "&gt;",
["'"] = "&apos;", ["\""] = "&quot;",
}
-- simple headers and footers
local HEADER = [[
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<title>%s</title>
<meta name="Generator" content="LuaSrcDiet">
<style type="text/css">
%s</style>
</head>
<body>
<pre class="code">
]]
local FOOTER = [[
</pre>
</body>
</html>
]]
-- for more, please see wikimain.css from the Lua wiki site
local STYLESHEET = [[
BODY {
background: white;
color: navy;
}
pre.code { color: black; }
span.comment { color: #00a000; }
span.string { color: #009090; }
span.keyword { color: black; font-weight: bold; }
span.number { color: #993399; }
span.operator { }
span.name { }
span.global { color: #ff0000; font-weight: bold; }
span.local { color: #0000ff; font-weight: bold; }
]]
local option -- local reference to list of options
local srcfl, destfl -- filenames
local toklist, seminfolist -- token data
local function print(...) -- handle quiet option
if option.QUIET then return end
_G.print(...)
end
--- Initialization.
function M.init(_option, _srcfl)
option = _option
srcfl = _srcfl
local extb, _ = find(srcfl, "%.[^%.%\\%/]*$")
local basename = srcfl
if extb and extb > 1 then
basename = sub(srcfl, 1, extb - 1)
end
destfl = basename..HTML_EXT
if option.OUTPUT_FILE then
destfl = option.OUTPUT_FILE
end
if srcfl == destfl then
error("output filename identical to input filename")
end
end
--- Message display, post-load processing.
function M.post_load()
print([[
HTML plugin module for LuaSrcDiet
]])
print("Exporting: "..srcfl.." -> "..destfl.."\n")
end
--- Post-lexing processing, can work on lexer table output.
function M.post_lex(_toklist, _seminfolist)
toklist, seminfolist = _toklist, _seminfolist
end
--- Escapes the usual suspects for HTML/XML.
local function do_entities(z)
local i = 1
while i <= #z do
local c = sub(z, i, i)
local d = ENTITIES[c]
if d then
c = d
z = sub(z, 1, i - 1)..c..sub(z, i + 1)
end
i = i + #c
end--while
return z
end
--- Post-parsing processing, gives globalinfo, localinfo.
function M.post_parse(globalinfo, localinfo)
local html = {}
local function add(s) -- html helpers
html[#html + 1] = s
end
local function span(class, s)
add('<span class="'..class..'">'..s..'</span>')
end
for i = 1, #globalinfo do -- mark global identifiers as TK_GLOBAL
local obj = globalinfo[i]
local xref = obj.xref
for j = 1, #xref do
local p = xref[j]
toklist[p] = "TK_GLOBAL"
end
end--for
for i = 1, #localinfo do -- mark local identifiers as TK_LOCAL
local obj = localinfo[i]
local xref = obj.xref
for j = 1, #xref do
local p = xref[j]
toklist[p] = "TK_LOCAL"
end
end--for
add(fmt(HEADER, -- header and leading stuff
do_entities(srcfl),
STYLESHEET))
for i = 1, #toklist do -- enumerate token list
local tok, info = toklist[i], seminfolist[i]
if tok == "TK_KEYWORD" then
span("keyword", info)
elseif tok == "TK_STRING" or tok == "TK_LSTRING" then
span("string", do_entities(info))
elseif tok == "TK_COMMENT" or tok == "TK_LCOMMENT" then
span("comment", do_entities(info))
elseif tok == "TK_GLOBAL" then
span("global", info)
elseif tok == "TK_LOCAL" then
span("local", info)
elseif tok == "TK_NAME" then
span("name", info)
elseif tok == "TK_NUMBER" then
span("number", info)
elseif tok == "TK_OP" then
span("operator", do_entities(info))
elseif tok ~= "TK_EOS" then -- TK_EOL, TK_SPACE
add(info)
end
end--for
add(FOOTER)
assert(fs.write_file(destfl, concat(html), "wb"))
option.EXIT = true
end
return M

View File

@ -0,0 +1,89 @@
---------
-- Calculates SLOC for Lua 5.1 scripts
--
-- WARNING: highly experimental! interface liable to change
--
-- **Notes:**
--
-- * SLOC's behaviour is based on David Wheeler's SLOCCount.
-- * Empty lines and comment don't count as significant.
-- * Empty lines in long strings are also insignificant. This is
-- debatable. In SLOCCount, this allows counting of invalid multi-
-- line strings for C. But an empty line is still an empty line.
-- * Ignores the --quiet option, print own result line.
----
local M = {}
local option -- local reference to list of options
local srcfl -- source file name
function M.init(_option, _srcfl)
option = _option
option.QUIET = true
srcfl = _srcfl
end
--- Splits a block into a table of lines (minus EOLs).
--
-- @tparam string blk
-- @treturn {string,...} lines
local function split(blk)
local lines = {}
local i, nblk = 1, #blk
while i <= nblk do
local p, q, r, s = blk:find("([\r\n])([\r\n]?)", i)
if not p then
p = nblk + 1
end
lines[#lines + 1] = blk:sub(i, p - 1)
i = p + 1
if p < nblk and q > p and r ~= s then -- handle Lua-style CRLF, LFCR
i = i + 1
end
end
return lines
end
--- Post-lexing processing, can work on lexer table output.
function M.post_lex(toklist, seminfolist, toklnlist)
local lnow, sloc = 0, 0
local function chk(ln) -- if a new line, count it as an SLOC
if ln > lnow then -- new line # must be > old line #
sloc = sloc + 1; lnow = ln
end
end
for i = 1, #toklist do -- enumerate over all tokens
local tok, info, ln
= toklist[i], seminfolist[i], toklnlist[i]
if tok == "TK_KEYWORD" or tok == "TK_NAME" or -- significant
tok == "TK_NUMBER" or tok == "TK_OP" then
chk(ln)
-- Both TK_STRING and TK_LSTRING may be multi-line, hence, a loop
-- is needed in order to mark off lines one-by-one. Since llex.lua
-- currently returns the line number of the last part of the string,
-- we must subtract in order to get the starting line number.
elseif tok == "TK_STRING" then -- possible multi-line
local t = split(info)
ln = ln - #t + 1
for _ = 1, #t do
chk(ln); ln = ln + 1
end
elseif tok == "TK_LSTRING" then -- possible multi-line
local t = split(info)
ln = ln - #t + 1
for j = 1, #t do
if t[j] ~= "" then chk(ln) end
ln = ln + 1
end
-- Other tokens are comments or whitespace and are ignored.
end
end--for
print(srcfl..": "..sloc) -- display result
option.EXIT = true
end
return M

View File

@ -0,0 +1,30 @@
---------
-- General utility functions.
--
-- **Note: This module is not part of public API!**
----
local ipairs = ipairs
local pairs = pairs
local M = {}
--- Returns a new table containing the contents of all the given tables.
-- Tables are iterated using @{pairs}, so this function is intended for tables
-- that represent *associative arrays*. Entries with duplicate keys are
-- overwritten with the values from a later table.
--
-- @tparam {table,...} ... The tables to merge.
-- @treturn table A new table.
function M.merge (...)
local result = {}
for _, tab in ipairs{...} do
for key, val in pairs(tab) do
result[key] = val
end
end
return result
end
return M

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,181 @@
---------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Convert between various code representation formats. Atomic
-- converters are written in extenso, others are composed automatically
-- by chaining the atomic ones together in a closure.
--
-- Supported formats are:
--
-- * srcfile: the name of a file containing sources.
-- * src: these sources as a single string.
-- * lexstream: a stream of lexemes.
-- * ast: an abstract syntax tree.
-- * proto: a (Yueliang) struture containing a high level
-- representation of bytecode. Largely based on the
-- Proto structure in Lua's VM
-- * bytecode: a string dump of the function, as taken by
-- loadstring() and produced by string.dump().
-- * function: an executable lua function in RAM.
--
--------------------------------------------------------------------------------
local checks = require 'checks'
local M = { }
--------------------------------------------------------------------------------
-- Order of the transformations. if 'a' is on the left of 'b', then a 'a' can
-- be transformed into a 'b' (but not the other way around).
-- M.sequence goes for numbers to format names, M.order goes from format
-- names to numbers.
--------------------------------------------------------------------------------
M.sequence = {
'srcfile', 'src', 'lexstream', 'ast', 'proto', 'bytecode', 'function' }
local arg_types = {
srcfile = { 'string', '?string' },
src = { 'string', '?string' },
lexstream = { 'lexer.stream', '?string' },
ast = { 'table', '?string' },
proto = { 'table', '?string' },
bytecode = { 'string', '?string' },
}
if false then
-- if defined, runs on every newly-generated AST
function M.check_ast(ast)
local function rec(x, n, parent)
if not x.lineinfo and parent.lineinfo then
local pp = require 'metalua.pprint'
pp.printf("WARNING: Missing lineinfo in child #%s `%s{...} of node at %s",
n, x.tag or '', tostring(parent.lineinfo))
end
for i, child in ipairs(x) do
if type(child)=='table' then rec(child, i, x) end
end
end
rec(ast, -1, { })
end
end
M.order= { }; for a,b in pairs(M.sequence) do M.order[b]=a end
local CONV = { } -- conversion metatable __index
function CONV :srcfile_to_src(x, name)
checks('metalua.compiler', 'string', '?string')
name = name or '@'..x
local f, msg = io.open (x, 'rb')
if not f then error(msg) end
local r, msg = f :read '*a'
if not r then error("Cannot read file '"..x.."': "..msg) end
f :close()
return r, name
end
function CONV :src_to_lexstream(src, name)
checks('metalua.compiler', 'string', '?string')
local r = self.parser.lexer :newstream (src, name)
return r, name
end
function CONV :lexstream_to_ast(lx, name)
checks('metalua.compiler', 'lexer.stream', '?string')
local r = self.parser.chunk(lx)
r.source = name
if M.check_ast then M.check_ast (r) end
return r, name
end
local bytecode_compiler = nil -- cache to avoid repeated `pcall(require(...))`
local function get_bytecode_compiler()
if bytecode_compiler then return bytecode_compiler else
local status, result = pcall(require, 'metalua.compiler.bytecode')
if status then
bytecode_compiler = result
return result
elseif string.match(result, "not found") then
error "Compilation only available with full Metalua"
else error (result) end
end
end
function CONV :ast_to_proto(ast, name)
checks('metalua.compiler', 'table', '?string')
return get_bytecode_compiler().ast_to_proto(ast, name), name
end
function CONV :proto_to_bytecode(proto, name)
return get_bytecode_compiler().proto_to_bytecode(proto), name
end
function CONV :bytecode_to_function(bc, name)
checks('metalua.compiler', 'string', '?string')
return loadstring(bc, name)
end
-- Create all sensible combinations
for i=1,#M.sequence do
local src = M.sequence[i]
for j=i+2, #M.sequence do
local dst = M.sequence[j]
local dst_name = src.."_to_"..dst
local my_arg_types = arg_types[src]
local functions = { }
for k=i, j-1 do
local name = M.sequence[k].."_to_"..M.sequence[k+1]
local f = assert(CONV[name], name)
table.insert (functions, f)
end
CONV[dst_name] = function(self, a, b)
checks('metalua.compiler', unpack(my_arg_types))
for _, f in ipairs(functions) do
a, b = f(self, a, b)
end
return a, b
end
--printf("Created M.%s out of %s", dst_name, table.concat(n, ', '))
end
end
--------------------------------------------------------------------------------
-- This one goes in the "wrong" direction, cannot be composed.
--------------------------------------------------------------------------------
function CONV :function_to_bytecode(...) return string.dump(...) end
function CONV :ast_to_src(...)
require 'metalua.loader' -- ast_to_string isn't written in plain lua
return require 'metalua.compiler.ast_to_src' (...)
end
local MT = { __index=CONV, __type='metalua.compiler' }
function M.new()
local parser = require 'metalua.compiler.parser' .new()
local self = { parser = parser }
setmetatable(self, MT)
return self
end
return M

View File

@ -0,0 +1,682 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-{ extension ('match', ...) }
local M = { }
M.__index = M
M.__call = |self, ...| self:run(...)
local pp=require 'metalua.pprint'
--------------------------------------------------------------------------------
-- Instanciate a new AST->source synthetizer
--------------------------------------------------------------------------------
function M.new ()
local self = {
_acc = { }, -- Accumulates pieces of source as strings
current_indent = 0, -- Current level of line indentation
indent_step = " " -- Indentation symbol, normally spaces or '\t'
}
return setmetatable (self, M)
end
--------------------------------------------------------------------------------
-- Run a synthetizer on the `ast' arg and return the source as a string.
-- Can also be used as a static method `M.run (ast)'; in this case,
-- a temporary Metizer is instanciated on the fly.
--------------------------------------------------------------------------------
function M:run (ast)
if not ast then
self, ast = M.new(), self
end
self._acc = { }
self:node (ast)
return table.concat (self._acc)
end
--------------------------------------------------------------------------------
-- Accumulate a piece of source file in the synthetizer.
--------------------------------------------------------------------------------
function M:acc (x)
if x then table.insert (self._acc, x) end
end
--------------------------------------------------------------------------------
-- Accumulate an indented newline.
-- Jumps an extra line if indentation is 0, so that
-- toplevel definitions are separated by an extra empty line.
--------------------------------------------------------------------------------
function M:nl ()
if self.current_indent == 0 then self:acc "\n" end
self:acc ("\n" .. self.indent_step:rep (self.current_indent))
end
--------------------------------------------------------------------------------
-- Increase indentation and accumulate a new line.
--------------------------------------------------------------------------------
function M:nlindent ()
self.current_indent = self.current_indent + 1
self:nl ()
end
--------------------------------------------------------------------------------
-- Decrease indentation and accumulate a new line.
--------------------------------------------------------------------------------
function M:nldedent ()
self.current_indent = self.current_indent - 1
self:acc ("\n" .. self.indent_step:rep (self.current_indent))
end
--------------------------------------------------------------------------------
-- Keywords, which are illegal as identifiers.
--------------------------------------------------------------------------------
local keywords_list = {
"and", "break", "do", "else", "elseif",
"end", "false", "for", "function", "if",
"in", "local", "nil", "not", "or",
"repeat", "return", "then", "true", "until",
"while" }
local keywords = { }
for _, kw in pairs(keywords_list) do keywords[kw]=true end
--------------------------------------------------------------------------------
-- Return true iff string `id' is a legal identifier name.
--------------------------------------------------------------------------------
local function is_ident (id)
return string['match'](id, "^[%a_][%w_]*$") and not keywords[id]
end
--------------------------------------------------------------------------------
-- Return true iff ast represents a legal function name for
-- syntax sugar ``function foo.bar.gnat() ... end'':
-- a series of nested string indexes, with an identifier as
-- the innermost node.
--------------------------------------------------------------------------------
local function is_idx_stack (ast)
match ast with
| `Id{ _ } -> return true
| `Index{ left, `String{ _ } } -> return is_idx_stack (left)
| _ -> return false
end
end
--------------------------------------------------------------------------------
-- Operator precedences, in increasing order.
-- This is not directly used, it's used to generate op_prec below.
--------------------------------------------------------------------------------
local op_preprec = {
{ "or", "and" },
{ "lt", "le", "eq", "ne" },
{ "concat" },
{ "add", "sub" },
{ "mul", "div", "mod" },
{ "unary", "not", "len" },
{ "pow" },
{ "index" } }
--------------------------------------------------------------------------------
-- operator --> precedence table, generated from op_preprec.
--------------------------------------------------------------------------------
local op_prec = { }
for prec, ops in ipairs (op_preprec) do
for _, op in ipairs (ops) do
op_prec[op] = prec
end
end
--------------------------------------------------------------------------------
-- operator --> source representation.
--------------------------------------------------------------------------------
local op_symbol = {
add = " + ", sub = " - ", mul = " * ",
div = " / ", mod = " % ", pow = " ^ ",
concat = " .. ", eq = " == ", ne = " ~= ",
lt = " < ", le = " <= ", ["and"] = " and ",
["or"] = " or ", ["not"] = "not ", len = "# " }
--------------------------------------------------------------------------------
-- Accumulate the source representation of AST `node' in
-- the synthetizer. Most of the work is done by delegating to
-- the method having the name of the AST tag.
-- If something can't be converted to normal sources, it's
-- instead dumped as a `-{ ... }' splice in the source accumulator.
--------------------------------------------------------------------------------
function M:node (node)
assert (self~=M and self._acc)
if node==nil then self:acc'<<error>>'
elseif not self.custom_printer or not self.custom_printer (self, node) then
if not node.tag then -- tagless (henceunindented) block.
self:list (node, self.nl)
else
local f = M[node.tag]
if type (f) == "function" then -- Delegate to tag method.
f (self, node, unpack (node))
elseif type (f) == "string" then -- tag string.
self:acc (f)
else -- No appropriate method, fall back to splice dumping.
-- This cannot happen in a plain Lua AST.
self:acc " -{ "
self:acc (pp.tostring (node, {metalua_tag=1, hide_hash=1}), 80)
self:acc " }"
end
end
end
end
function M:block(body)
if not self.custom_printer or not self.custom_printer (self, body) then
self:nlindent ()
self:list (body, self.nl)
self:nldedent ()
end
end
--------------------------------------------------------------------------------
-- Convert every node in the AST list `list' passed as 1st arg.
-- `sep' is an optional separator to be accumulated between each list element,
-- it can be a string or a synth method.
-- `start' is an optional number (default == 1), indicating which is the
-- first element of list to be converted, so that we can skip the begining
-- of a list.
--------------------------------------------------------------------------------
function M:list (list, sep, start)
for i = start or 1, # list do
self:node (list[i])
if list[i + 1] then
if not sep then
elseif type (sep) == "function" then sep (self)
elseif type (sep) == "string" then self:acc (sep)
else error "Invalid list separator" end
end
end
end
--------------------------------------------------------------------------------
--
-- Tag methods.
-- ------------
--
-- Specific AST node dumping methods, associated to their node kinds
-- by their name, which is the corresponding AST tag.
-- synth:node() is in charge of delegating a node's treatment to the
-- appropriate tag method.
--
-- Such tag methods are called with the AST node as 1st arg.
-- As a convenience, the n node's children are passed as args #2 ... n+1.
--
-- There are several things that could be refactored into common subroutines
-- here: statement blocks dumping, function dumping...
-- However, given their small size and linear execution
-- (they basically perform series of :acc(), :node(), :list(),
-- :nl(), :nlindent() and :nldedent() calls), it seems more readable
-- to avoid multiplication of such tiny functions.
--
-- To make sense out of these, you need to know metalua's AST syntax, as
-- found in the reference manual or in metalua/doc/ast.txt.
--
--------------------------------------------------------------------------------
function M:Do (node)
self:acc "do"
self:block (node)
self:acc "end"
end
function M:Set (node)
match node with
| `Set{ { `Index{ lhs, `String{ method } } },
{ `Function{ { `Id "self", ... } == params, body } } }
if is_idx_stack (lhs) and is_ident (method) ->
-- ``function foo:bar(...) ... end'' --
self:acc "function "
self:node (lhs)
self:acc ":"
self:acc (method)
self:acc " ("
self:list (params, ", ", 2)
self:acc ")"
self:block (body)
self:acc "end"
| `Set{ { lhs }, { `Function{ params, body } } } if is_idx_stack (lhs) ->
-- ``function foo(...) ... end'' --
self:acc "function "
self:node (lhs)
self:acc " ("
self:list (params, ", ")
self:acc ")"
self:block (body)
self:acc "end"
| `Set{ { `Id{ lhs1name } == lhs1, ... } == lhs, rhs }
if not is_ident (lhs1name) ->
-- ``foo, ... = ...'' when foo is *not* a valid identifier.
-- In that case, the spliced 1st variable must get parentheses,
-- to be distinguished from a statement splice.
-- This cannot happen in a plain Lua AST.
self:acc "("
self:node (lhs1)
self:acc ")"
if lhs[2] then -- more than one lhs variable
self:acc ", "
self:list (lhs, ", ", 2)
end
self:acc " = "
self:list (rhs, ", ")
| `Set{ lhs, rhs } ->
-- ``... = ...'', no syntax sugar --
self:list (lhs, ", ")
self:acc " = "
self:list (rhs, ", ")
| `Set{ lhs, rhs, annot } ->
-- ``... = ...'', no syntax sugar, annotation --
local n = #lhs
for i=1,n do
local ell, a = lhs[i], annot[i]
self:node (ell)
if a then
self:acc ' #'
self:node(a)
end
if i~=n then self:acc ', ' end
end
self:acc " = "
self:list (rhs, ", ")
end
end
function M:While (node, cond, body)
self:acc "while "
self:node (cond)
self:acc " do"
self:block (body)
self:acc "end"
end
function M:Repeat (node, body, cond)
self:acc "repeat"
self:block (body)
self:acc "until "
self:node (cond)
end
function M:If (node)
for i = 1, #node-1, 2 do
-- for each ``if/then'' and ``elseif/then'' pair --
local cond, body = node[i], node[i+1]
self:acc (i==1 and "if " or "elseif ")
self:node (cond)
self:acc " then"
self:block (body)
end
-- odd number of children --> last one is an `else' clause --
if #node%2 == 1 then
self:acc "else"
self:block (node[#node])
end
self:acc "end"
end
function M:Fornum (node, var, first, last)
local body = node[#node]
self:acc "for "
self:node (var)
self:acc " = "
self:node (first)
self:acc ", "
self:node (last)
if #node==5 then -- 5 children --> child #4 is a step increment.
self:acc ", "
self:node (node[4])
end
self:acc " do"
self:block (body)
self:acc "end"
end
function M:Forin (node, vars, generators, body)
self:acc "for "
self:list (vars, ", ")
self:acc " in "
self:list (generators, ", ")
self:acc " do"
self:block (body)
self:acc "end"
end
function M:Local (node, lhs, rhs, annots)
if next (lhs) then
self:acc "local "
if annots then
local n = #lhs
for i=1, n do
self:node (lhs)
local a = annots[i]
if a then
self:acc ' #'
self:node (a)
end
if i~=n then self:acc ', ' end
end
else
self:list (lhs, ", ")
end
if rhs[1] then
self:acc " = "
self:list (rhs, ", ")
end
else -- Can't create a local statement with 0 variables in plain Lua
self:acc (pp.tostring (node, {metalua_tag=1, hide_hash=1, fix_indent=2}))
end
end
function M:Localrec (node, lhs, rhs)
match node with
| `Localrec{ { `Id{name} }, { `Function{ params, body } } }
if is_ident (name) ->
-- ``local function name() ... end'' --
self:acc "local function "
self:acc (name)
self:acc " ("
self:list (params, ", ")
self:acc ")"
self:block (body)
self:acc "end"
| _ ->
-- Other localrec are unprintable ==> splice them --
-- This cannot happen in a plain Lua AST. --
self:acc "-{ "
self:acc (pp.tostring (node, {metalua_tag=1, hide_hash=1, fix_indent=2}))
self:acc " }"
end
end
function M:Call (node, f)
-- single string or table literal arg ==> no need for parentheses. --
local parens
match node with
| `Call{ _, `String{_} }
| `Call{ _, `Table{...}} -> parens = false
| _ -> parens = true
end
self:node (f)
self:acc (parens and " (" or " ")
self:list (node, ", ", 2) -- skip `f'.
self:acc (parens and ")")
end
function M:Invoke (node, f, method)
-- single string or table literal arg ==> no need for parentheses. --
local parens
match node with
| `Invoke{ _, _, `String{_} }
| `Invoke{ _, _, `Table{...}} -> parens = false
| _ -> parens = true
end
self:node (f)
self:acc ":"
self:acc (method[1])
self:acc (parens and " (" or " ")
self:list (node, ", ", 3) -- Skip args #1 and #2, object and method name.
self:acc (parens and ")")
end
function M:Return (node)
self:acc "return "
self:list (node, ", ")
end
M.Break = "break"
M.Nil = "nil"
M.False = "false"
M.True = "true"
M.Dots = "..."
function M:Number (node, n)
self:acc (tostring (n))
end
function M:String (node, str)
-- format "%q" prints '\n' in an umpractical way IMO,
-- so this is fixed with the :gsub( ) call.
self:acc (string.format ("%q", str):gsub ("\\\n", "\\n"))
end
function M:Function (node, params, body, annots)
self:acc "function ("
if annots then
local n = #params
for i=1,n do
local p, a = params[i], annots[i]
self:node(p)
if annots then
self:acc " #"
self:node(a)
end
if i~=n then self:acc ', ' end
end
else
self:list (params, ", ")
end
self:acc ")"
self:block (body)
self:acc "end"
end
function M:Table (node)
if not node[1] then self:acc "{ }" else
self:acc "{"
if #node > 1 then self:nlindent () else self:acc " " end
for i, elem in ipairs (node) do
match elem with
| `Pair{ `String{ key }, value } if is_ident (key) ->
-- ``key = value''. --
self:acc (key)
self:acc " = "
self:node (value)
| `Pair{ key, value } ->
-- ``[key] = value''. --
self:acc "["
self:node (key)
self:acc "] = "
self:node (value)
| _ ->
-- ``value''. --
self:node (elem)
end
if node [i+1] then
self:acc ","
self:nl ()
end
end
if #node > 1 then self:nldedent () else self:acc " " end
self:acc "}"
end
end
function M:Op (node, op, a, b)
-- Transform ``not (a == b)'' into ``a ~= b''. --
match node with
| `Op{ "not", `Op{ "eq", _a, _b } }
| `Op{ "not", `Paren{ `Op{ "eq", _a, _b } } } ->
op, a, b = "ne", _a, _b
| _ ->
end
if b then -- binary operator.
local left_paren, right_paren
match a with
| `Op{ op_a, ...} if op_prec[op] >= op_prec[op_a] -> left_paren = true
| _ -> left_paren = false
end
match b with -- FIXME: might not work with right assoc operators ^ and ..
| `Op{ op_b, ...} if op_prec[op] >= op_prec[op_b] -> right_paren = true
| _ -> right_paren = false
end
self:acc (left_paren and "(")
self:node (a)
self:acc (left_paren and ")")
self:acc (op_symbol [op])
self:acc (right_paren and "(")
self:node (b)
self:acc (right_paren and ")")
else -- unary operator.
local paren
match a with
| `Op{ op_a, ... } if op_prec[op] >= op_prec[op_a] -> paren = true
| _ -> paren = false
end
self:acc (op_symbol[op])
self:acc (paren and "(")
self:node (a)
self:acc (paren and ")")
end
end
function M:Paren (node, content)
self:acc "("
self:node (content)
self:acc ")"
end
function M:Index (node, table, key)
local paren_table
-- Check precedence, see if parens are needed around the table --
match table with
| `Op{ op, ... } if op_prec[op] < op_prec.index -> paren_table = true
| _ -> paren_table = false
end
self:acc (paren_table and "(")
self:node (table)
self:acc (paren_table and ")")
match key with
| `String{ field } if is_ident (field) ->
-- ``table.key''. --
self:acc "."
self:acc (field)
| _ ->
-- ``table [key]''. --
self:acc "["
self:node (key)
self:acc "]"
end
end
function M:Id (node, name)
if is_ident (name) then
self:acc (name)
else -- Unprintable identifier, fall back to splice representation.
-- This cannot happen in a plain Lua AST.
self:acc "-{`Id "
self:String (node, name)
self:acc "}"
end
end
M.TDyn = '*'
M.TDynbar = '**'
M.TPass = 'pass'
M.TField = 'field'
M.TIdbar = M.TId
M.TReturn = M.Return
function M:TId (node, name) self:acc(name) end
function M:TCatbar(node, te, tebar)
self:acc'('
self:node(te)
self:acc'|'
self:tebar(tebar)
self:acc')'
end
function M:TFunction(node, p, r)
self:tebar(p)
self:acc '->'
self:tebar(r)
end
function M:TTable (node, default, pairs)
self:acc '['
self:list (pairs, ', ')
if default.tag~='TField' then
self:acc '|'
self:node (default)
end
self:acc ']'
end
function M:TPair (node, k, v)
self:node (k)
self:acc '='
self:node (v)
end
function M:TIdbar (node, name)
self :acc (name)
end
function M:TCatbar (node, a, b)
self:node(a)
self:acc ' ++ '
self:node(b)
end
function M:tebar(node)
if node.tag then self:node(node) else
self:acc '('
self:list(node, ', ')
self:acc ')'
end
end
function M:TUnkbar(node, name)
self:acc '~~'
self:acc (name)
end
function M:TUnk(node, name)
self:acc '~'
self:acc (name)
end
for name, tag in pairs{ const='TConst', var='TVar', currently='TCurrently', just='TJust' } do
M[tag] = function(self, node, te)
self:acc (name..' ')
self:node(te)
end
end
return M

View File

@ -0,0 +1,29 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
local compile = require 'metalua.compiler.bytecode.compile'
local ldump = require 'metalua.compiler.bytecode.ldump'
local M = { }
M.ast_to_proto = compile.ast_to_proto
M.proto_to_bytecode = ldump.dump_string
M.proto_to_file = ldump.dump_file
return M

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,448 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2005-2013 Kein-Hong Man, Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Kein-Hong Man - Initial implementation for Lua 5.0, part of Yueliang
-- Fabien Fleutot - Port to Lua 5.1, integration with Metalua
--
-------------------------------------------------------------------------------
--[[--------------------------------------------------------------------
ldump.lua
Save bytecodes in Lua
This file is part of Yueliang.
Copyright (c) 2005 Kein-Hong Man <khman@users.sf.net>
The COPYRIGHT file describes the conditions
under which this software may be distributed.
------------------------------------------------------------------------
[FF] Slightly modified, mainly to produce Lua 5.1 bytecode.
----------------------------------------------------------------------]]
--[[--------------------------------------------------------------------
-- Notes:
-- * LUA_NUMBER (double), byte order (little endian) and some other
-- header values hard-coded; see other notes below...
-- * One significant difference is that instructions are still in table
-- form (with OP/A/B/C/Bx fields) and luaP:Instruction() is needed to
-- convert them into 4-char strings
-- * Deleted:
-- luaU:DumpVector: folded into DumpLines, DumpCode
-- * Added:
-- luaU:endianness() (from lundump.c)
-- luaU:make_setS: create a chunk writer that writes to a string
-- luaU:make_setF: create a chunk writer that writes to a file
-- (lua.h contains a typedef for a Chunkwriter pointer, and
-- a Lua-based implementation exists, writer() in lstrlib.c)
-- luaU:from_double(x): encode double value for writing
-- luaU:from_int(x): encode integer value for writing
-- (error checking is limited for these conversion functions)
-- (double conversion does not support denormals or NaNs)
-- luaU:ttype(o) (from lobject.h)
----------------------------------------------------------------------]]
local luaP = require 'metalua.compiler.bytecode.lopcodes'
local M = { }
local format = { }
format.header = string.dump(function()end):sub(1, 12)
format.little_endian, format.int_size,
format.size_t_size, format.instr_size,
format.number_size, format.integral = format.header:byte(7, 12)
format.little_endian = format.little_endian~=0
format.integral = format.integral ~=0
assert(format.integral or format.number_size==8, "Number format not supported by dumper")
assert(format.little_endian, "Big endian architectures not supported by dumper")
--requires luaP
local luaU = { }
M.luaU = luaU
luaU.format = format
-- constants used by dumper
luaU.LUA_TNIL = 0
luaU.LUA_TBOOLEAN = 1
luaU.LUA_TNUMBER = 3 -- (all in lua.h)
luaU.LUA_TSTRING = 4
luaU.LUA_TNONE = -1
-- definitions for headers of binary files
--luaU.LUA_SIGNATURE = "\27Lua" -- binary files start with "<esc>Lua"
--luaU.VERSION = 81 -- 0x50; last format change was in 5.0
--luaU.FORMAT_VERSION = 0 -- 0 is official version. yeah I know I'm a liar.
-- a multiple of PI for testing native format
-- multiplying by 1E7 gives non-trivial integer values
--luaU.TEST_NUMBER = 3.14159265358979323846E7
--[[--------------------------------------------------------------------
-- Additional functions to handle chunk writing
-- * to use make_setS and make_setF, see test_ldump.lua elsewhere
----------------------------------------------------------------------]]
------------------------------------------------------------------------
-- works like the lobject.h version except that TObject used in these
-- scripts only has a 'value' field, no 'tt' field (native types used)
------------------------------------------------------------------------
function luaU:ttype(o)
local tt = type(o.value)
if tt == "number" then return self.LUA_TNUMBER
elseif tt == "string" then return self.LUA_TSTRING
elseif tt == "nil" then return self.LUA_TNIL
elseif tt == "boolean" then return self.LUA_TBOOLEAN
else
return self.LUA_TNONE -- the rest should not appear
end
end
------------------------------------------------------------------------
-- create a chunk writer that writes to a string
-- * returns the writer function and a table containing the string
-- * to get the final result, look in buff.data
------------------------------------------------------------------------
function luaU:make_setS()
local buff = {}
buff.data = ""
local writer =
function(s, buff) -- chunk writer
if not s then return end
buff.data = buff.data..s
end
return writer, buff
end
------------------------------------------------------------------------
-- create a chunk writer that writes to a file
-- * returns the writer function and a table containing the file handle
-- * if a nil is passed, then writer should close the open file
------------------------------------------------------------------------
function luaU:make_setF(filename)
local buff = {}
buff.h = io.open(filename, "wb")
if not buff.h then return nil end
local writer =
function(s, buff) -- chunk writer
if not buff.h then return end
if not s then buff.h:close(); return end
buff.h:write(s)
end
return writer, buff
end
-----------------------------------------------------------------------
-- converts a IEEE754 double number to an 8-byte little-endian string
-- * luaU:from_double() and luaU:from_int() are from ChunkBake project
-- * supports +/- Infinity, but not denormals or NaNs
-----------------------------------------------------------------------
function luaU:from_double(x)
local function grab_byte(v)
return math.floor(v / 256),
string.char(math.mod(math.floor(v), 256))
end
local sign = 0
if x < 0 then sign = 1; x = -x end
local mantissa, exponent = math.frexp(x)
if x == 0 then -- zero
mantissa, exponent = 0, 0
elseif x == 1/0 then
mantissa, exponent = 0, 2047
else
mantissa = (mantissa * 2 - 1) * math.ldexp(0.5, 53)
exponent = exponent + 1022
end
local v, byte = "" -- convert to bytes
x = mantissa
for i = 1,6 do
x, byte = grab_byte(x); v = v..byte -- 47:0
end
x, byte = grab_byte(exponent * 16 + x); v = v..byte -- 55:48
x, byte = grab_byte(sign * 128 + x); v = v..byte -- 63:56
return v
end
-----------------------------------------------------------------------
-- converts a number to a little-endian 32-bit integer string
-- * input value assumed to not overflow, can be signed/unsigned
-----------------------------------------------------------------------
function luaU:from_int(x, size)
local v = ""
x = math.floor(x)
if x >= 0 then
for i = 1, size do
v = v..string.char(math.mod(x, 256)); x = math.floor(x / 256)
end
else -- x < 0
x = -x
local carry = 1
for i = 1, size do
local c = 255 - math.mod(x, 256) + carry
if c == 256 then c = 0; carry = 1 else carry = 0 end
v = v..string.char(c); x = math.floor(x / 256)
end
end
return v
end
--[[--------------------------------------------------------------------
-- Functions to make a binary chunk
-- * many functions have the size parameter removed, since output is
-- in the form of a string and some sizes are implicit or hard-coded
-- * luaU:DumpVector has been deleted (used in DumpCode & DumpLines)
----------------------------------------------------------------------]]
------------------------------------------------------------------------
-- dump a block of literal bytes
------------------------------------------------------------------------
function luaU:DumpLiteral(s, D) self:DumpBlock(s, D) end
--[[--------------------------------------------------------------------
-- struct DumpState:
-- L -- lua_State (not used in this script)
-- write -- lua_Chunkwriter (chunk writer function)
-- data -- void* (chunk writer context or data already written)
----------------------------------------------------------------------]]
------------------------------------------------------------------------
-- dumps a block of bytes
-- * lua_unlock(D.L), lua_lock(D.L) deleted
------------------------------------------------------------------------
function luaU:DumpBlock(b, D) D.write(b, D.data) end
------------------------------------------------------------------------
-- dumps a single byte
------------------------------------------------------------------------
function luaU:DumpByte(y, D)
self:DumpBlock(string.char(y), D)
end
------------------------------------------------------------------------
-- dumps a signed integer of size `format.int_size` (for int)
------------------------------------------------------------------------
function luaU:DumpInt(x, D)
self:DumpBlock(self:from_int(x, format.int_size), D)
end
------------------------------------------------------------------------
-- dumps an unsigned integer of size `format.size_t_size` (for size_t)
------------------------------------------------------------------------
function luaU:DumpSize(x, D)
self:DumpBlock(self:from_int(x, format.size_t_size), D)
end
------------------------------------------------------------------------
-- dumps a LUA_NUMBER; can be an int or double depending on the VM.
------------------------------------------------------------------------
function luaU:DumpNumber(x, D)
if format.integral then
self:DumpBlock(self:from_int(x, format.number_size), D)
else
self:DumpBlock(self:from_double(x), D)
end
end
------------------------------------------------------------------------
-- dumps a Lua string
------------------------------------------------------------------------
function luaU:DumpString(s, D)
if s == nil then
self:DumpSize(0, D)
else
s = s.."\0" -- include trailing '\0'
self:DumpSize(string.len(s), D)
self:DumpBlock(s, D)
end
end
------------------------------------------------------------------------
-- dumps instruction block from function prototype
------------------------------------------------------------------------
function luaU:DumpCode(f, D)
local n = f.sizecode
self:DumpInt(n, D)
--was DumpVector
for i = 0, n - 1 do
self:DumpBlock(luaP:Instruction(f.code[i]), D)
end
end
------------------------------------------------------------------------
-- dumps local variable names from function prototype
------------------------------------------------------------------------
function luaU:DumpLocals(f, D)
local n = f.sizelocvars
self:DumpInt(n, D)
for i = 0, n - 1 do
-- Dirty temporary fix:
-- `Stat{ } keeps properly count of the number of local vars,
-- but fails to keep score of their debug info (names).
-- It therefore might happen that #f.localvars < f.sizelocvars, or
-- that a variable's startpc and endpc fields are left unset.
-- FIXME: This might not be needed anymore, check the bug report
-- by J. Belmonte.
local var = f.locvars[i]
if not var then break end
-- printf("[DUMPLOCALS] dumping local var #%i = %s", i, table.tostring(var))
self:DumpString(var.varname, D)
self:DumpInt(var.startpc or 0, D)
self:DumpInt(var.endpc or 0, D)
end
end
------------------------------------------------------------------------
-- dumps line information from function prototype
------------------------------------------------------------------------
function luaU:DumpLines(f, D)
local n = f.sizelineinfo
self:DumpInt(n, D)
--was DumpVector
for i = 0, n - 1 do
self:DumpInt(f.lineinfo[i], D) -- was DumpBlock
--print(i, f.lineinfo[i])
end
end
------------------------------------------------------------------------
-- dump upvalue names from function prototype
------------------------------------------------------------------------
function luaU:DumpUpvalues(f, D)
local n = f.sizeupvalues
self:DumpInt(n, D)
for i = 0, n - 1 do
self:DumpString(f.upvalues[i], D)
end
end
------------------------------------------------------------------------
-- dump constant pool from function prototype
-- * nvalue(o) and tsvalue(o) macros removed
------------------------------------------------------------------------
function luaU:DumpConstants(f, D)
local n = f.sizek
self:DumpInt(n, D)
for i = 0, n - 1 do
local o = f.k[i] -- TObject
local tt = self:ttype(o)
assert (tt >= 0)
self:DumpByte(tt, D)
if tt == self.LUA_TNUMBER then
self:DumpNumber(o.value, D)
elseif tt == self.LUA_TSTRING then
self:DumpString(o.value, D)
elseif tt == self.LUA_TBOOLEAN then
self:DumpByte (o.value and 1 or 0, D)
elseif tt == self.LUA_TNIL then
else
assert(false) -- cannot happen
end
end
end
function luaU:DumpProtos (f, D)
local n = f.sizep
assert (n)
self:DumpInt(n, D)
for i = 0, n - 1 do
self:DumpFunction(f.p[i], f.source, D)
end
end
function luaU:DumpDebug(f, D)
self:DumpLines(f, D)
self:DumpLocals(f, D)
self:DumpUpvalues(f, D)
end
------------------------------------------------------------------------
-- dump child function prototypes from function prototype
--FF completely reworked for 5.1 format
------------------------------------------------------------------------
function luaU:DumpFunction(f, p, D)
-- print "Dumping function:"
-- table.print(f, 60)
local source = f.source
if source == p then source = nil end
self:DumpString(source, D)
self:DumpInt(f.lineDefined, D)
self:DumpInt(f.lastLineDefined or 42, D)
self:DumpByte(f.nups, D)
self:DumpByte(f.numparams, D)
self:DumpByte(f.is_vararg, D)
self:DumpByte(f.maxstacksize, D)
self:DumpCode(f, D)
self:DumpConstants(f, D)
self:DumpProtos( f, D)
self:DumpDebug(f, D)
end
------------------------------------------------------------------------
-- dump Lua header section (some sizes hard-coded)
--FF: updated for version 5.1
------------------------------------------------------------------------
function luaU:DumpHeader(D)
self:DumpLiteral(format.header, D)
end
------------------------------------------------------------------------
-- dump function as precompiled chunk
-- * w, data are created from make_setS, make_setF
--FF: suppressed extraneous [L] param
------------------------------------------------------------------------
function luaU:dump (Main, w, data)
local D = {} -- DumpState
D.write = w
D.data = data
self:DumpHeader(D)
self:DumpFunction(Main, nil, D)
-- added: for a chunk writer writing to a file, this final call with
-- nil data is to indicate to the writer to close the file
D.write(nil, D.data)
end
------------------------------------------------------------------------
-- find byte order (from lundump.c)
-- * hard-coded to little-endian
------------------------------------------------------------------------
function luaU:endianness()
return 1
end
-- FIXME: ugly concat-base generation in [make_setS], bufferize properly!
function M.dump_string (proto)
local writer, buff = luaU:make_setS()
luaU:dump (proto, writer, buff)
return buff.data
end
-- FIXME: [make_setS] sucks, perform synchronous file writing
-- Now unused
function M.dump_file (proto, filename)
local writer, buff = luaU:make_setS()
luaU:dump (proto, writer, buff)
local file = io.open (filename, "wb")
file:write (buff.data)
io.close(file)
--if UNIX_SHARPBANG then os.execute ("chmod a+x "..filename) end
end
return M

View File

@ -0,0 +1,442 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2005-2013 Kein-Hong Man, Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Kein-Hong Man - Initial implementation for Lua 5.0, part of Yueliang
-- Fabien Fleutot - Port to Lua 5.1, integration with Metalua
--
-------------------------------------------------------------------------------
--[[--------------------------------------------------------------------
$Id$
lopcodes.lua
Lua 5 virtual machine opcodes in Lua
This file is part of Yueliang.
Copyright (c) 2005 Kein-Hong Man <khman@users.sf.net>
The COPYRIGHT file describes the conditions
under which this software may be distributed.
See the ChangeLog for more information.
------------------------------------------------------------------------
[FF] Slightly modified, mainly to produce Lua 5.1 bytecode.
----------------------------------------------------------------------]]
--[[--------------------------------------------------------------------
-- Notes:
-- * an Instruction is a table with OP, A, B, C, Bx elements; this
-- should allow instruction handling to work with doubles and ints
-- * Added:
-- luaP:Instruction(i): convert field elements to a 4-char string
-- luaP:DecodeInst(x): convert 4-char string into field elements
-- * WARNING luaP:Instruction outputs instructions encoded in little-
-- endian form and field size and positions are hard-coded
----------------------------------------------------------------------]]
local function debugf() end
local luaP = { }
--[[
===========================================================================
We assume that instructions are unsigned numbers.
All instructions have an opcode in the first 6 bits.
Instructions can have the following fields:
'A' : 8 bits
'B' : 9 bits
'C' : 9 bits
'Bx' : 18 bits ('B' and 'C' together)
'sBx' : signed Bx
A signed argument is represented in excess K; that is, the number
value is the unsigned value minus K. K is exactly the maximum value
for that argument (so that -max is represented by 0, and +max is
represented by 2*max), which is half the maximum for the corresponding
unsigned argument.
===========================================================================
--]]
luaP.OpMode = {"iABC", "iABx", "iAsBx"} -- basic instruction format
------------------------------------------------------------------------
-- size and position of opcode arguments.
-- * WARNING size and position is hard-coded elsewhere in this script
------------------------------------------------------------------------
luaP.SIZE_C = 9
luaP.SIZE_B = 9
luaP.SIZE_Bx = luaP.SIZE_C + luaP.SIZE_B
luaP.SIZE_A = 8
luaP.SIZE_OP = 6
luaP.POS_C = luaP.SIZE_OP
luaP.POS_B = luaP.POS_C + luaP.SIZE_C
luaP.POS_Bx = luaP.POS_C
luaP.POS_A = luaP.POS_B + luaP.SIZE_B
--FF from 5.1
luaP.BITRK = 2^(luaP.SIZE_B - 1)
function luaP:ISK(x) return x >= self.BITRK end
luaP.MAXINDEXRK = luaP.BITRK - 1
function luaP:RKASK(x)
if x < self.BITRK then return x+self.BITRK else return x end
end
------------------------------------------------------------------------
-- limits for opcode arguments.
-- we use (signed) int to manipulate most arguments,
-- so they must fit in BITS_INT-1 bits (-1 for sign)
------------------------------------------------------------------------
-- removed "#if SIZE_Bx < BITS_INT-1" test, assume this script is
-- running on a Lua VM with double or int as LUA_NUMBER
luaP.MAXARG_Bx = math.ldexp(1, luaP.SIZE_Bx) - 1
luaP.MAXARG_sBx = math.floor(luaP.MAXARG_Bx / 2) -- 'sBx' is signed
luaP.MAXARG_A = math.ldexp(1, luaP.SIZE_A) - 1
luaP.MAXARG_B = math.ldexp(1, luaP.SIZE_B) - 1
luaP.MAXARG_C = math.ldexp(1, luaP.SIZE_C) - 1
-- creates a mask with 'n' 1 bits at position 'p'
-- MASK1(n,p) deleted
-- creates a mask with 'n' 0 bits at position 'p'
-- MASK0(n,p) deleted
--[[--------------------------------------------------------------------
Visual representation for reference:
31 | | | 0 bit position
+-----+-----+-----+----------+
| B | C | A | Opcode | iABC format
+-----+-----+-----+----------+
- 9 - 9 - 8 - 6 - field sizes
+-----+-----+-----+----------+
| [s]Bx | A | Opcode | iABx | iAsBx format
+-----+-----+-----+----------+
----------------------------------------------------------------------]]
------------------------------------------------------------------------
-- the following macros help to manipulate instructions
-- * changed to a table object representation, very clean compared to
-- the [nightmare] alternatives of using a number or a string
------------------------------------------------------------------------
-- these accept or return opcodes in the form of string names
function luaP:GET_OPCODE(i) return self.ROpCode[i.OP] end
function luaP:SET_OPCODE(i, o) i.OP = self.OpCode[o] end
function luaP:GETARG_A(i) return i.A end
function luaP:SETARG_A(i, u) i.A = u end
function luaP:GETARG_B(i) return i.B end
function luaP:SETARG_B(i, b) i.B = b end
function luaP:GETARG_C(i) return i.C end
function luaP:SETARG_C(i, b) i.C = b end
function luaP:GETARG_Bx(i) return i.Bx end
function luaP:SETARG_Bx(i, b) i.Bx = b end
function luaP:GETARG_sBx(i) return i.Bx - self.MAXARG_sBx end
function luaP:SETARG_sBx(i, b) i.Bx = b + self.MAXARG_sBx end
function luaP:CREATE_ABC(o,a,b,c)
return {OP = self.OpCode[o], A = a, B = b, C = c}
end
function luaP:CREATE_ABx(o,a,bc)
return {OP = self.OpCode[o], A = a, Bx = bc}
end
------------------------------------------------------------------------
-- Bit shuffling stuffs
------------------------------------------------------------------------
if false and pcall (require, 'bit') then
------------------------------------------------------------------------
-- Return a 4-char string little-endian encoded form of an instruction
------------------------------------------------------------------------
function luaP:Instruction(i)
--FIXME
end
else
------------------------------------------------------------------------
-- Version without bit manipulation library.
------------------------------------------------------------------------
local p2 = {1,2,4,8,16,32,64,128,256, 512, 1024, 2048, 4096}
-- keeps [n] bits from [x]
local function keep (x, n) return x % p2[n+1] end
-- shifts bits of [x] [n] places to the right
local function srb (x,n) return math.floor (x / p2[n+1]) end
-- shifts bits of [x] [n] places to the left
local function slb (x,n) return x * p2[n+1] end
------------------------------------------------------------------------
-- Return a 4-char string little-endian encoded form of an instruction
------------------------------------------------------------------------
function luaP:Instruction(i)
-- printf("Instr->string: %s %s", self.opnames[i.OP], table.tostring(i))
local c0, c1, c2, c3
-- change to OP/A/B/C format if needed
if i.Bx then i.C = keep (i.Bx, 9); i.B = srb (i.Bx, 9) end
-- c0 = 6B from opcode + 2LSB from A (flushed to MSB)
c0 = i.OP + slb (keep (i.A, 2), 6)
-- c1 = 6MSB from A + 2LSB from C (flushed to MSB)
c1 = srb (i.A, 2) + slb (keep (i.C, 2), 6)
-- c2 = 7MSB from C + 1LSB from B (flushed to MSB)
c2 = srb (i.C, 2) + slb (keep (i.B, 1), 7)
-- c3 = 8MSB from B
c3 = srb (i.B, 1)
--printf ("Instruction: %s %s", self.opnames[i.OP], tostringv (i))
--printf ("Bin encoding: %x %x %x %x", c0, c1, c2, c3)
return string.char(c0, c1, c2, c3)
end
end
------------------------------------------------------------------------
-- decodes a 4-char little-endian string into an instruction struct
------------------------------------------------------------------------
function luaP:DecodeInst(x)
error "Not implemented"
end
------------------------------------------------------------------------
-- invalid register that fits in 8 bits
------------------------------------------------------------------------
luaP.NO_REG = luaP.MAXARG_A
------------------------------------------------------------------------
-- R(x) - register
-- Kst(x) - constant (in constant table)
-- RK(x) == if x < MAXSTACK then R(x) else Kst(x-MAXSTACK)
------------------------------------------------------------------------
------------------------------------------------------------------------
-- grep "ORDER OP" if you change these enums
------------------------------------------------------------------------
--[[--------------------------------------------------------------------
Lua virtual machine opcodes (enum OpCode):
------------------------------------------------------------------------
name args description
------------------------------------------------------------------------
OP_MOVE A B R(A) := R(B)
OP_LOADK A Bx R(A) := Kst(Bx)
OP_LOADBOOL A B C R(A) := (Bool)B; if (C) PC++
OP_LOADNIL A B R(A) := ... := R(B) := nil
OP_GETUPVAL A B R(A) := UpValue[B]
OP_GETGLOBAL A Bx R(A) := Gbl[Kst(Bx)]
OP_GETTABLE A B C R(A) := R(B)[RK(C)]
OP_SETGLOBAL A Bx Gbl[Kst(Bx)] := R(A)
OP_SETUPVAL A B UpValue[B] := R(A)
OP_SETTABLE A B C R(A)[RK(B)] := RK(C)
OP_NEWTABLE A B C R(A) := {} (size = B,C)
OP_SELF A B C R(A+1) := R(B); R(A) := R(B)[RK(C)]
OP_ADD A B C R(A) := RK(B) + RK(C)
OP_SUB A B C R(A) := RK(B) - RK(C)
OP_MUL A B C R(A) := RK(B) * RK(C)
OP_DIV A B C R(A) := RK(B) / RK(C)
OP_POW A B C R(A) := RK(B) ^ RK(C)
OP_UNM A B R(A) := -R(B)
OP_NOT A B R(A) := not R(B)
OP_CONCAT A B C R(A) := R(B).. ... ..R(C)
OP_JMP sBx PC += sBx
OP_EQ A B C if ((RK(B) == RK(C)) ~= A) then pc++
OP_LT A B C if ((RK(B) < RK(C)) ~= A) then pc++
OP_LE A B C if ((RK(B) <= RK(C)) ~= A) then pc++
OP_TEST A B C if (R(B) <=> C) then R(A) := R(B) else pc++
OP_CALL A B C R(A), ... ,R(A+C-2) := R(A)(R(A+1), ... ,R(A+B-1))
OP_TAILCALL A B C return R(A)(R(A+1), ... ,R(A+B-1))
OP_RETURN A B return R(A), ... ,R(A+B-2) (see note)
OP_FORLOOP A sBx R(A)+=R(A+2); if R(A) <?= R(A+1) then PC+= sBx
OP_TFORLOOP A C R(A+2), ... ,R(A+2+C) := R(A)(R(A+1), R(A+2));
if R(A+2) ~= nil then pc++
OP_TFORPREP A sBx if type(R(A)) == table then R(A+1):=R(A), R(A):=next;
PC += sBx
OP_SETLIST A Bx R(A)[Bx-Bx%FPF+i] := R(A+i), 1 <= i <= Bx%FPF+1
OP_SETLISTO A Bx (see note)
OP_CLOSE A close all variables in the stack up to (>=) R(A)
OP_CLOSURE A Bx R(A) := closure(KPROTO[Bx], R(A), ... ,R(A+n))
----------------------------------------------------------------------]]
luaP.opnames = {} -- opcode names
luaP.OpCode = {} -- lookup name -> number
luaP.ROpCode = {} -- lookup number -> name
local i = 0
for v in string.gfind([[
MOVE -- 0
LOADK
LOADBOOL
LOADNIL
GETUPVAL
GETGLOBAL -- 5
GETTABLE
SETGLOBAL
SETUPVAL
SETTABLE
NEWTABLE -- 10
SELF
ADD
SUB
MUL
DIV -- 15
MOD
POW
UNM
NOT
LEN -- 20
CONCAT
JMP
EQ
LT
LE -- 25
TEST
TESTSET
CALL
TAILCALL
RETURN -- 30
FORLOOP
FORPREP
TFORLOOP
SETLIST
CLOSE -- 35
CLOSURE
VARARG
]], "[%a]+") do
local n = "OP_"..v
luaP.opnames[i] = v
luaP.OpCode[n] = i
luaP.ROpCode[i] = n
i = i + 1
end
luaP.NUM_OPCODES = i
--[[
===========================================================================
Notes:
(1) In OP_CALL, if (B == 0) then B = top. C is the number of returns - 1,
and can be 0: OP_CALL then sets 'top' to last_result+1, so
next open instruction (OP_CALL, OP_RETURN, OP_SETLIST) may use 'top'.
(2) In OP_RETURN, if (B == 0) then return up to 'top'
(3) For comparisons, B specifies what conditions the test should accept.
(4) All 'skips' (pc++) assume that next instruction is a jump
(5) OP_SETLISTO is used when the last item in a table constructor is a
function, so the number of elements set is up to top of stack
===========================================================================
--]]
------------------------------------------------------------------------
-- masks for instruction properties
------------------------------------------------------------------------
-- was enum OpModeMask:
luaP.OpModeBreg = 2 -- B is a register
luaP.OpModeBrk = 3 -- B is a register/constant
luaP.OpModeCrk = 4 -- C is a register/constant
luaP.OpModesetA = 5 -- instruction set register A
luaP.OpModeK = 6 -- Bx is a constant
luaP.OpModeT = 1 -- operator is a test
------------------------------------------------------------------------
-- get opcode mode, e.g. "iABC"
------------------------------------------------------------------------
function luaP:getOpMode(m)
--printv(m)
--printv(self.OpCode[m])
--printv(self.opmodes [self.OpCode[m]+1])
return self.OpMode[tonumber(string.sub(self.opmodes[self.OpCode[m] + 1], 7, 7))]
end
------------------------------------------------------------------------
-- test an instruction property flag
-- * b is a string, e.g. "OpModeBreg"
------------------------------------------------------------------------
function luaP:testOpMode(m, b)
return (string.sub(self.opmodes[self.OpCode[m] + 1], self[b], self[b]) == "1")
end
-- number of list items to accumulate before a SETLIST instruction
-- (must be a power of 2)
-- * used in lparser, lvm, ldebug, ltests
luaP.LFIELDS_PER_FLUSH = 50 --FF updated to match 5.1
-- luaP_opnames[] is set above, as the luaP.opnames table
-- opmode(t,b,bk,ck,sa,k,m) deleted
--[[--------------------------------------------------------------------
Legend for luaP:opmodes:
1 T -> T (is a test?)
2 B -> B is a register
3 b -> B is an RK register/constant combination
4 C -> C is an RK register/constant combination
5 A -> register A is set by the opcode
6 K -> Bx is a constant
7 m -> 1 if iABC layout,
2 if iABx layout,
3 if iAsBx layout
----------------------------------------------------------------------]]
luaP.opmodes = {
-- TBbCAKm opcode
"0100101", -- OP_MOVE 0
"0000112", -- OP_LOADK
"0000101", -- OP_LOADBOOL
"0100101", -- OP_LOADNIL
"0000101", -- OP_GETUPVAL
"0000112", -- OP_GETGLOBAL 5
"0101101", -- OP_GETTABLE
"0000012", -- OP_SETGLOBAL
"0000001", -- OP_SETUPVAL
"0011001", -- OP_SETTABLE
"0000101", -- OP_NEWTABLE 10
"0101101", -- OP_SELF
"0011101", -- OP_ADD
"0011101", -- OP_SUB
"0011101", -- OP_MUL
"0011101", -- OP_DIV 15
"0011101", -- OP_MOD
"0011101", -- OP_POW
"0100101", -- OP_UNM
"0100101", -- OP_NOT
"0100101", -- OP_LEN 20
"0101101", -- OP_CONCAT
"0000003", -- OP_JMP
"1011001", -- OP_EQ
"1011001", -- OP_LT
"1011001", -- OP_LE 25
"1000101", -- OP_TEST
"1100101", -- OP_TESTSET
"0000001", -- OP_CALL
"0000001", -- OP_TAILCALL
"0000001", -- OP_RETURN 30
"0000003", -- OP_FORLOOP
"0000103", -- OP_FORPREP
"1000101", -- OP_TFORLOOP
"0000001", -- OP_SETLIST
"0000001", -- OP_CLOSE 35
"0000102", -- OP_CLOSURE
"0000101" -- OP_VARARG
}
return luaP

View File

@ -0,0 +1,86 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
--*-lua-*-----------------------------------------------------------------------
-- Override Lua's default compilation functions, so that they support Metalua
-- rather than only plain Lua
--------------------------------------------------------------------------------
local mlc = require 'metalua.compiler'
local M = { }
-- Original versions
local original_lua_versions = {
load = load,
loadfile = loadfile,
loadstring = loadstring,
dofile = dofile }
local lua_loadstring = loadstring
local lua_loadfile = loadfile
function M.loadstring(str, name)
if type(str) ~= 'string' then error 'string expected' end
if str:match '^\027LuaQ' then return lua_loadstring(str) end
local n = str:match '^#![^\n]*\n()'
if n then str=str:sub(n, -1) end
-- FIXME: handle erroneous returns (return nil + error msg)
return mlc.new():src_to_function(str, name)
end
function M.loadfile(filename)
local f, err_msg = io.open(filename, 'rb')
if not f then return nil, err_msg end
local success, src = pcall( f.read, f, '*a')
pcall(f.close, f)
if success then return M.loadstring (src, '@'..filename)
else return nil, src end
end
function M.load(f, name)
local acc = { }
while true do
local x = f()
if not x then break end
assert(type(x)=='string', "function passed to load() must return strings")
table.insert(acc, x)
end
return M.loadstring(table.concat(acc))
end
function M.dostring(src)
local f, msg = M.loadstring(src)
if not f then error(msg) end
return f()
end
function M.dofile(name)
local f, msg = M.loadfile(name)
if not f then error(msg) end
return f()
end
-- Export replacement functions as globals
for name, f in pairs(M) do _G[name] = f end
-- To be done *after* exportation
M.lua = original_lua_versions
return M

View File

@ -0,0 +1,42 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
-- Export all public APIs from sub-modules, squashed into a flat spacename
local MT = { __type='metalua.compiler.parser' }
local MODULE_REL_NAMES = { "annot.grammar", "expr", "meta", "misc",
"stat", "table", "ext" }
local function new()
local M = {
lexer = require "metalua.compiler.parser.lexer" ();
extensions = { } }
for _, rel_name in ipairs(MODULE_REL_NAMES) do
local abs_name = "metalua.compiler.parser."..rel_name
local extender = require (abs_name)
if not M.extensions[abs_name] then
if type (extender) == 'function' then extender(M) end
M.extensions[abs_name] = extender
end
end
return setmetatable(M, MT)
end
return { new = new }

View File

@ -0,0 +1,48 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
local checks = require 'checks'
local gg = require 'metalua.grammar.generator'
local M = { }
function M.opt(mlc, primary, a_type)
checks('table', 'table|function', 'string')
return gg.sequence{
primary,
gg.onkeyword{ "#", function() return assert(mlc.annot[a_type]) end },
builder = function(x)
local t, annot = unpack(x)
return annot and { tag='Annot', t, annot } or t
end }
end
-- split a list of "foo" and "`Annot{foo, annot}" into a list of "foo"
-- and a list of "annot".
-- No annot list is returned if none of the elements were annotated.
function M.split(lst)
local x, a, some = { }, { }, false
for i, p in ipairs(lst) do
if p.tag=='Annot' then
some, x[i], a[i] = true, unpack(p)
else x[i] = p end
end
if some then return x, a else return lst end
end
return M

View File

@ -0,0 +1,112 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
local gg = require 'metalua.grammar.generator'
return function(M)
local _M = gg.future(M)
M.lexer :add '->'
local A = { }
local _A = gg.future(A)
M.annot = A
-- Type identifier: Lua keywords such as `"nil"` allowed.
function M.annot.tid(lx)
local w = lx :next()
local t = w.tag
if t=='Keyword' and w[1] :match '^[%a_][%w_]*$' or w.tag=='Id'
then return {tag='TId'; lineinfo=w.lineinfo; w[1]}
else return gg.parse_error (lx, 'tid expected') end
end
local field_types = { var='TVar'; const='TConst';
currently='TCurrently'; field='TField' }
-- TODO check lineinfo
function M.annot.tf(lx)
local tk = lx:next()
local w = tk[1]
local tag = field_types[w]
if not tag then error ('Invalid field type '..w)
elseif tag=='TField' then return {tag='TField'} else
local te = M.te(lx)
return {tag=tag; te}
end
end
M.annot.tebar_content = gg.list{
name = 'tebar content',
primary = _A.te,
separators = { ",", ";" },
terminators = ")" }
M.annot.tebar = gg.multisequence{
name = 'annot.tebar',
--{ '*', builder = 'TDynbar' }, -- maybe not user-available
{ '(', _A.tebar_content, ')',
builder = function(x) return x[1] end },
{ _A.te }
}
M.annot.te = gg.multisequence{
name = 'annot.te',
{ _A.tid, builder=function(x) return x[1] end },
{ '*', builder = 'TDyn' },
{ "[",
gg.list{
primary = gg.sequence{
_M.expr, "=", _A.tf,
builder = 'TPair'
},
separators = { ",", ";" },
terminators = { "]", "|" } },
gg.onkeyword{ "|", _A.tf },
"]",
builder = function(x)
local fields, other = unpack(x)
return { tag='TTable', other or {tag='TField'}, fields }
end }, -- "[ ... ]"
{ '(', _A.tebar_content, ')', '->', '(', _A.tebar_content, ')',
builder = function(x)
local p, r = unpack(x)
return {tag='TFunction', p, r }
end } }
M.annot.ts = gg.multisequence{
name = 'annot.ts',
{ 'return', _A.tebar_content, builder='TReturn' },
{ _A.tid, builder = function(x)
if x[1][1]=='pass' then return {tag='TPass'}
else error "Bad statement type" end
end } }
-- TODO: add parsers for statements:
-- #return tebar
-- #alias = te
-- #ell = tf
--[[
M.annot.stat_annot = gg.sequence{
gg.list{ primary=_A.tid, separators='.' },
'=',
XXX??,
builder = 'Annot' }
--]]
return M.annot
end

View File

@ -0,0 +1,206 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--
-- Exported API:
-- * [mlp.expr()]
-- * [mlp.expr_list()]
-- * [mlp.func_val()]
--
-------------------------------------------------------------------------------
local pp = require 'metalua.pprint'
local gg = require 'metalua.grammar.generator'
local annot = require 'metalua.compiler.parser.annot.generator'
return function(M)
local _M = gg.future(M)
local _table = gg.future(M, 'table')
local _meta = gg.future(M, 'meta') -- TODO move to ext?
local _annot = gg.future(M, 'annot') -- TODO move to annot
--------------------------------------------------------------------------------
-- Non-empty expression list. Actually, this isn't used here, but that's
-- handy to give to users.
--------------------------------------------------------------------------------
M.expr_list = gg.list{ primary=_M.expr, separators="," }
--------------------------------------------------------------------------------
-- Helpers for function applications / method applications
--------------------------------------------------------------------------------
M.func_args_content = gg.list{
name = "function arguments",
primary = _M.expr,
separators = ",",
terminators = ")" }
-- Used to parse methods
M.method_args = gg.multisequence{
name = "function argument(s)",
{ "{", _table.content, "}" },
{ "(", _M.func_args_content, ")", builder = unpack },
{ "+{", _meta.quote_content, "}" },
-- TODO lineinfo?
function(lx) local r = M.opt_string(lx); return r and {r} or { } end }
--------------------------------------------------------------------------------
-- [func_val] parses a function, from opening parameters parenthese to
-- "end" keyword included. Used for anonymous functions as well as
-- function declaration statements (both local and global).
--
-- It's wrapped in a [_func_val] eta expansion, so that when expr
-- parser uses the latter, they will notice updates of [func_val]
-- definitions.
--------------------------------------------------------------------------------
M.func_params_content = gg.list{
name="function parameters",
gg.multisequence{ { "...", builder = "Dots" }, annot.opt(M, _M.id, 'te') },
separators = ",", terminators = {")", "|"} }
-- TODO move to annot
M.func_val = gg.sequence{
name = "function body",
"(", _M.func_params_content, ")", _M.block, "end",
builder = function(x)
local params, body = unpack(x)
local annots, some = { }, false
for i, p in ipairs(params) do
if p.tag=='Annot' then
params[i], annots[i], some = p[1], p[2], true
else annots[i] = false end
end
if some then return { tag='Function', params, body, annots }
else return { tag='Function', params, body } end
end }
local func_val = function(lx) return M.func_val(lx) end
--------------------------------------------------------------------------------
-- Default parser for primary expressions
--------------------------------------------------------------------------------
function M.id_or_literal (lx)
local a = lx:next()
if a.tag~="Id" and a.tag~="String" and a.tag~="Number" then
local msg
if a.tag=='Eof' then
msg = "End of file reached when an expression was expected"
elseif a.tag=='Keyword' then
msg = "An expression was expected, and `"..a[1]..
"' can't start an expression"
else
msg = "Unexpected expr token " .. pp.tostring (a)
end
gg.parse_error (lx, msg)
end
return a
end
--------------------------------------------------------------------------------
-- Builder generator for operators. Wouldn't be worth it if "|x|" notation
-- were allowed, but then lua 5.1 wouldn't compile it
--------------------------------------------------------------------------------
-- opf1 = |op| |_,a| `Op{ op, a }
local function opf1 (op) return
function (_,a) return { tag="Op", op, a } end end
-- opf2 = |op| |a,_,b| `Op{ op, a, b }
local function opf2 (op) return
function (a,_,b) return { tag="Op", op, a, b } end end
-- opf2r = |op| |a,_,b| `Op{ op, b, a } -- (args reversed)
local function opf2r (op) return
function (a,_,b) return { tag="Op", op, b, a } end end
local function op_ne(a, _, b)
-- This version allows to remove the "ne" operator from the AST definition.
-- However, it doesn't always produce the exact same bytecode as Lua 5.1.
return { tag="Op", "not",
{ tag="Op", "eq", a, b, lineinfo= {
first = a.lineinfo.first, last = b.lineinfo.last } } }
end
--------------------------------------------------------------------------------
--
-- complete expression
--
--------------------------------------------------------------------------------
-- FIXME: set line number. In [expr] transformers probably
M.expr = gg.expr {
name = "expression",
primary = gg.multisequence{
name = "expr primary",
{ "(", _M.expr, ")", builder = "Paren" },
{ "function", _M.func_val, builder = unpack },
{ "-{", _meta.splice_content, "}", builder = unpack },
{ "+{", _meta.quote_content, "}", builder = unpack },
{ "nil", builder = "Nil" },
{ "true", builder = "True" },
{ "false", builder = "False" },
{ "...", builder = "Dots" },
{ "{", _table.content, "}", builder = unpack },
_M.id_or_literal },
infix = {
name = "expr infix op",
{ "+", prec = 60, builder = opf2 "add" },
{ "-", prec = 60, builder = opf2 "sub" },
{ "*", prec = 70, builder = opf2 "mul" },
{ "/", prec = 70, builder = opf2 "div" },
{ "%", prec = 70, builder = opf2 "mod" },
{ "^", prec = 90, builder = opf2 "pow", assoc = "right" },
{ "..", prec = 40, builder = opf2 "concat", assoc = "right" },
{ "==", prec = 30, builder = opf2 "eq" },
{ "~=", prec = 30, builder = op_ne },
{ "<", prec = 30, builder = opf2 "lt" },
{ "<=", prec = 30, builder = opf2 "le" },
{ ">", prec = 30, builder = opf2r "lt" },
{ ">=", prec = 30, builder = opf2r "le" },
{ "and",prec = 20, builder = opf2 "and" },
{ "or", prec = 10, builder = opf2 "or" } },
prefix = {
name = "expr prefix op",
{ "not", prec = 80, builder = opf1 "not" },
{ "#", prec = 80, builder = opf1 "len" },
{ "-", prec = 80, builder = opf1 "unm" } },
suffix = {
name = "expr suffix op",
{ "[", _M.expr, "]", builder = function (tab, idx)
return {tag="Index", tab, idx[1]} end},
{ ".", _M.id, builder = function (tab, field)
return {tag="Index", tab, _M.id2string(field[1])} end },
{ "(", _M.func_args_content, ")", builder = function(f, args)
return {tag="Call", f, unpack(args[1])} end },
{ "{", _table.content, "}", builder = function (f, arg)
return {tag="Call", f, arg[1]} end},
{ ":", _M.id, _M.method_args, builder = function (obj, post)
local m_name, args = unpack(post)
return {tag="Invoke", obj, _M.id2string(m_name), unpack(args)} end},
{ "+{", _meta.quote_content, "}", builder = function (f, arg)
return {tag="Call", f, arg[1] } end },
default = { name="opt_string_arg", parse = _M.opt_string, builder = function(f, arg)
return {tag="Call", f, arg } end } } }
return M
end

View File

@ -0,0 +1,96 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Non-Lua syntax extensions
--
--------------------------------------------------------------------------------
local gg = require 'metalua.grammar.generator'
return function(M)
local _M = gg.future(M)
---------------------------------------------------------------------------
-- Algebraic Datatypes
----------------------------------------------------------------------------
local function adt (lx)
local node = _M.id (lx)
local tagval = node[1]
-- tagkey = `Pair{ `String "key", `String{ -{tagval} } }
local tagkey = { tag="Pair", {tag="String", "tag"}, {tag="String", tagval} }
if lx:peek().tag == "String" or lx:peek().tag == "Number" then
-- TODO support boolean litterals
return { tag="Table", tagkey, lx:next() }
elseif lx:is_keyword (lx:peek(), "{") then
local x = M.table.table (lx)
table.insert (x, 1, tagkey)
return x
else return { tag="Table", tagkey } end
end
M.adt = gg.sequence{ "`", adt, builder = unpack }
M.expr.primary :add(M.adt)
----------------------------------------------------------------------------
-- Anonymous lambda
----------------------------------------------------------------------------
M.lambda_expr = gg.sequence{
"|", _M.func_params_content, "|", _M.expr,
builder = function (x)
local li = x[2].lineinfo
return { tag="Function", x[1],
{ {tag="Return", x[2], lineinfo=li }, lineinfo=li } }
end }
M.expr.primary :add (M.lambda_expr)
--------------------------------------------------------------------------------
-- Allows to write "a `f` b" instead of "f(a, b)". Taken from Haskell.
--------------------------------------------------------------------------------
function M.expr_in_backquotes (lx) return M.expr(lx, 35) end -- 35=limited precedence
M.expr.infix :add{ name = "infix function",
"`", _M.expr_in_backquotes, "`", prec = 35, assoc="left",
builder = function(a, op, b) return {tag="Call", op[1], a, b} end }
--------------------------------------------------------------------------------
-- C-style op+assignments
-- TODO: no protection against side-effects in LHS vars.
--------------------------------------------------------------------------------
local function op_assign(kw, op)
local function rhs(a, b) return { tag="Op", op, a, b } end
local function f(a,b)
if #a ~= #b then gg.parse_error "assymetric operator+assignment" end
local right = { }
local r = { tag="Set", a, right }
for i=1, #a do right[i] = { tag="Op", op, a[i], b[i] } end
return r
end
M.lexer :add (kw)
M.assignments[kw] = f
end
local ops = { add='+='; sub='-='; mul='*='; div='/=' }
for ast_op_name, keyword in pairs(ops) do op_assign(keyword, ast_op_name) end
return M
end

View File

@ -0,0 +1,43 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2014 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
----------------------------------------------------------------------
-- Generate a new lua-specific lexer, derived from the generic lexer.
----------------------------------------------------------------------
local generic_lexer = require 'metalua.grammar.lexer'
return function()
local lexer = generic_lexer.lexer :clone()
local keywords = {
"and", "break", "do", "else", "elseif",
"end", "false", "for", "function",
"goto", -- Lua5.2
"if",
"in", "local", "nil", "not", "or", "repeat",
"return", "then", "true", "until", "while",
"...", "..", "==", ">=", "<=", "~=",
"::", -- Lua5,2
"+{", "-{" } -- Metalua
for _, w in ipairs(keywords) do lexer :add (w) end
return lexer
end

View File

@ -0,0 +1,138 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2014 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-- Compile-time metaprogramming features: splicing ASTs generated during compilation,
-- AST quasi-quoting helpers.
local gg = require 'metalua.grammar.generator'
return function(M)
local _M = gg.future(M)
M.meta={ }
local _MM = gg.future(M.meta)
--------------------------------------------------------------------------------
-- External splicing: compile an AST into a chunk, load and evaluate
-- that chunk, and replace the chunk by its result (which must also be
-- an AST).
--------------------------------------------------------------------------------
-- TODO: that's not part of the parser
function M.meta.eval (ast)
-- TODO: should there be one mlc per splice, or per parser instance?
local mlc = require 'metalua.compiler'.new()
local f = mlc :ast_to_function (ast, '=splice')
local result=f(M) -- splices act on the current parser
return result
end
----------------------------------------------------------------------------
-- Going from an AST to an AST representing that AST
-- the only hash-part key being lifted is `"tag"`.
-- Doesn't lift subtrees protected inside a `Splice{ ... }.
-- e.g. change `Foo{ 123 } into
-- `Table{ `Pair{ `String "tag", `String "foo" }, `Number 123 }
----------------------------------------------------------------------------
local function lift (t)
--print("QUOTING:", table.tostring(t, 60,'nohash'))
local cases = { }
function cases.table (t)
local mt = { tag = "Table" }
--table.insert (mt, { tag = "Pair", quote "quote", { tag = "True" } })
if t.tag == "Splice" then
assert (#t==1, "Invalid splice")
local sp = t[1]
return sp
elseif t.tag then
table.insert (mt, { tag="Pair", lift "tag", lift(t.tag) })
end
for _, v in ipairs (t) do
table.insert (mt, lift(v))
end
return mt
end
function cases.number (t) return { tag = "Number", t, quote = true } end
function cases.string (t) return { tag = "String", t, quote = true } end
function cases.boolean (t) return { tag = t and "True" or "False", t, quote = true } end
local f = cases [type(t)]
if f then return f(t) else error ("Cannot quote an AST containing "..tostring(t)) end
end
M.meta.lift = lift
--------------------------------------------------------------------------------
-- when this variable is false, code inside [-{...}] is compiled and
-- avaluated immediately. When it's true (supposedly when we're
-- parsing data inside a quasiquote), [-{foo}] is replaced by
-- [`Splice{foo}], which will be unpacked by [quote()].
--------------------------------------------------------------------------------
local in_a_quote = false
--------------------------------------------------------------------------------
-- Parse the inside of a "-{ ... }"
--------------------------------------------------------------------------------
function M.meta.splice_content (lx)
local parser_name = "expr"
if lx:is_keyword (lx:peek(2), ":") then
local a = lx:next()
lx:next() -- skip ":"
assert (a.tag=="Id", "Invalid splice parser name")
parser_name = a[1]
end
-- TODO FIXME running a new parser with the old lexer?!
local parser = require 'metalua.compiler.parser'.new()
local ast = parser [parser_name](lx)
if in_a_quote then -- only prevent quotation in this subtree
--printf("SPLICE_IN_QUOTE:\n%s", _G.table.tostring(ast, "nohash", 60))
return { tag="Splice", ast }
else -- convert in a block, eval, replace with result
if parser_name == "expr" then ast = { { tag="Return", ast } }
elseif parser_name == "stat" then ast = { ast }
elseif parser_name ~= "block" then
error ("splice content must be an expr, stat or block") end
--printf("EXEC THIS SPLICE:\n%s", _G.table.tostring(ast, "nohash", 60))
return M.meta.eval (ast)
end
end
M.meta.splice = gg.sequence{ "-{", _MM.splice_content, "}", builder=unpack }
--------------------------------------------------------------------------------
-- Parse the inside of a "+{ ... }"
--------------------------------------------------------------------------------
function M.meta.quote_content (lx)
local parser
if lx:is_keyword (lx:peek(2), ":") then -- +{parser: content }
local parser_name = M.id(lx)[1]
parser = M[parser_name]
lx:next() -- skip ":"
else -- +{ content }
parser = M.expr
end
local prev_iq = in_a_quote
in_a_quote = true
--print("IN_A_QUOTE")
local content = parser (lx)
local q_content = M.meta.lift (content)
in_a_quote = prev_iq
return q_content
end
return M
end

View File

@ -0,0 +1,176 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--
-- Summary: metalua parser, miscellaneous utility functions.
--
-------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Exported API:
-- * [mlp.fget()]
-- * [mlp.id()]
-- * [mlp.opt_id()]
-- * [mlp.id_list()]
-- * [mlp.string()]
-- * [mlp.opt_string()]
-- * [mlp.id2string()]
--
--------------------------------------------------------------------------------
local pp = require 'metalua.pprint'
local gg = require 'metalua.grammar.generator'
-- TODO: replace splice-aware versions with naive ones, move etensions in ./meta
return function(M)
local _M = gg.future(M)
--[[ metaprog-free versions:
function M.id(lx)
if lx:peek().tag~='Id' then gg.parse_error(lx, "Identifier expected")
else return lx:next() end
end
function M.opt_id(lx)
if lx:peek().tag~='Id' then return lx:next() else return false end
end
function M.string(lx)
if lx:peek().tag~='String' then gg.parse_error(lx, "String expected")
else return lx:next() end
end
function M.opt_string(lx)
if lx:peek().tag~='String' then return lx:next() else return false end
end
--------------------------------------------------------------------------------
-- Converts an identifier into a string. Hopefully one day it'll handle
-- splices gracefully, but that proves quite tricky.
--------------------------------------------------------------------------------
function M.id2string (id)
if id.tag == "Id" then id.tag = "String"; return id
else error ("Identifier expected: "..table.tostring(id, 'nohash')) end
end
--]]
--------------------------------------------------------------------------------
-- Try to read an identifier (possibly as a splice), or return [false] if no
-- id is found.
--------------------------------------------------------------------------------
function M.opt_id (lx)
local a = lx:peek();
if lx:is_keyword (a, "-{") then
local v = M.meta.splice(lx)
if v.tag ~= "Id" and v.tag ~= "Splice" then
gg.parse_error(lx, "Bad id splice")
end
return v
elseif a.tag == "Id" then return lx:next()
else return false end
end
--------------------------------------------------------------------------------
-- Mandatory reading of an id: causes an error if it can't read one.
--------------------------------------------------------------------------------
function M.id (lx)
return M.opt_id (lx) or gg.parse_error(lx,"Identifier expected")
end
--------------------------------------------------------------------------------
-- Common helper function
--------------------------------------------------------------------------------
M.id_list = gg.list { primary = _M.id, separators = "," }
--------------------------------------------------------------------------------
-- Converts an identifier into a string. Hopefully one day it'll handle
-- splices gracefully, but that proves quite tricky.
--------------------------------------------------------------------------------
function M.id2string (id)
--print("id2string:", disp.ast(id))
if id.tag == "Id" then id.tag = "String"; return id
elseif id.tag == "Splice" then
error ("id2string on splice not implemented")
-- Evaluating id[1] will produce `Id{ xxx },
-- and we want it to produce `String{ xxx }.
-- The following is the plain notation of:
-- +{ `String{ `Index{ `Splice{ -{id[1]} }, `Number 1 } } }
return { tag="String", { tag="Index", { tag="Splice", id[1] },
{ tag="Number", 1 } } }
else error ("Identifier expected: "..pp.tostring (id, {metalua_tag=1, hide_hash=1})) end
end
--------------------------------------------------------------------------------
-- Read a string, possibly spliced, or return an error if it can't
--------------------------------------------------------------------------------
function M.string (lx)
local a = lx:peek()
if lx:is_keyword (a, "-{") then
local v = M.meta.splice(lx)
if v.tag ~= "String" and v.tag ~= "Splice" then
gg.parse_error(lx,"Bad string splice")
end
return v
elseif a.tag == "String" then return lx:next()
else error "String expected" end
end
--------------------------------------------------------------------------------
-- Try to read a string, or return false if it can't. No splice allowed.
--------------------------------------------------------------------------------
function M.opt_string (lx)
return lx:peek().tag == "String" and lx:next()
end
--------------------------------------------------------------------------------
-- Chunk reader: block + Eof
--------------------------------------------------------------------------------
function M.skip_initial_sharp_comment (lx)
-- Dirty hack: I'm happily fondling lexer's private parts
-- FIXME: redundant with lexer:newstream()
lx :sync()
local i = lx.src:match ("^#.-\n()", lx.i)
if i then
lx.i = i
lx.column_offset = i
lx.line = lx.line and lx.line + 1 or 1
end
end
local function chunk (lx)
if lx:peek().tag == 'Eof' then
return { } -- handle empty files
else
M.skip_initial_sharp_comment (lx)
local chunk = M.block (lx)
if lx:peek().tag ~= "Eof" then
gg.parse_error(lx, "End-of-file expected")
end
return chunk
end
end
-- chunk is wrapped in a sequence so that it has a "transformer" field.
M.chunk = gg.sequence { chunk, builder = unpack }
return M
end

View File

@ -0,0 +1,279 @@
------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--
-- Summary: metalua parser, statement/block parser. This is part of the
-- definition of module [mlp].
--
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--
-- Exports API:
-- * [mlp.stat()]
-- * [mlp.block()]
-- * [mlp.for_header()]
--
-------------------------------------------------------------------------------
local lexer = require 'metalua.grammar.lexer'
local gg = require 'metalua.grammar.generator'
local annot = require 'metalua.compiler.parser.annot.generator'
--------------------------------------------------------------------------------
-- List of all keywords that indicate the end of a statement block. Users are
-- likely to extend this list when designing extensions.
--------------------------------------------------------------------------------
return function(M)
local _M = gg.future(M)
M.block_terminators = { "else", "elseif", "end", "until", ")", "}", "]" }
-- FIXME: this must be handled from within GG!!!
-- FIXME: there's no :add method in the list anyway. Added by gg.list?!
function M.block_terminators :add(x)
if type (x) == "table" then for _, y in ipairs(x) do self :add (y) end
else table.insert (self, x) end
end
----------------------------------------------------------------------------
-- list of statements, possibly followed by semicolons
----------------------------------------------------------------------------
M.block = gg.list {
name = "statements block",
terminators = M.block_terminators,
primary = function (lx)
-- FIXME use gg.optkeyword()
local x = M.stat (lx)
if lx:is_keyword (lx:peek(), ";") then lx:next() end
return x
end }
----------------------------------------------------------------------------
-- Helper function for "return <expr_list>" parsing.
-- Called when parsing return statements.
-- The specific test for initial ";" is because it's not a block terminator,
-- so without it gg.list would choke on "return ;" statements.
-- We don't make a modified copy of block_terminators because this list
-- is sometimes modified at runtime, and the return parser would get out of
-- sync if it was relying on a copy.
----------------------------------------------------------------------------
local return_expr_list_parser = gg.multisequence{
{ ";" , builder = function() return { } end },
default = gg.list {
_M.expr, separators = ",", terminators = M.block_terminators } }
local for_vars_list = gg.list{
name = "for variables list",
primary = _M.id,
separators = ",",
terminators = "in" }
----------------------------------------------------------------------------
-- for header, between [for] and [do] (exclusive).
-- Return the `Forxxx{...} AST, without the body element (the last one).
----------------------------------------------------------------------------
function M.for_header (lx)
local vars = M.id_list(lx)
if lx :is_keyword (lx:peek(), "=") then
if #vars ~= 1 then
gg.parse_error (lx, "numeric for only accepts one variable")
end
lx:next() -- skip "="
local exprs = M.expr_list (lx)
if #exprs < 2 or #exprs > 3 then
gg.parse_error (lx, "numeric for requires 2 or 3 boundaries")
end
return { tag="Fornum", vars[1], unpack (exprs) }
else
if not lx :is_keyword (lx :next(), "in") then
gg.parse_error (lx, '"=" or "in" expected in for loop')
end
local exprs = M.expr_list (lx)
return { tag="Forin", vars, exprs }
end
end
----------------------------------------------------------------------------
-- Function def parser helper: id ( . id ) *
----------------------------------------------------------------------------
local function fn_builder (list)
local acc = list[1]
local first = acc.lineinfo.first
for i = 2, #list do
local index = M.id2string(list[i])
local li = lexer.new_lineinfo(first, index.lineinfo.last)
acc = { tag="Index", acc, index, lineinfo=li }
end
return acc
end
local func_name = gg.list{ _M.id, separators = ".", builder = fn_builder }
----------------------------------------------------------------------------
-- Function def parser helper: ( : id )?
----------------------------------------------------------------------------
local method_name = gg.onkeyword{ name = "method invocation", ":", _M.id,
transformers = { function(x) return x and x.tag=='Id' and M.id2string(x) end } }
----------------------------------------------------------------------------
-- Function def builder
----------------------------------------------------------------------------
local function funcdef_builder(x)
local name, method, func = unpack(x)
if method then
name = { tag="Index", name, method,
lineinfo = {
first = name.lineinfo.first,
last = method.lineinfo.last } }
table.insert (func[1], 1, {tag="Id", "self"})
end
local r = { tag="Set", {name}, {func} }
r[1].lineinfo = name.lineinfo
r[2].lineinfo = func.lineinfo
return r
end
----------------------------------------------------------------------------
-- if statement builder
----------------------------------------------------------------------------
local function if_builder (x)
local cond_block_pairs, else_block, r = x[1], x[2], {tag="If"}
local n_pairs = #cond_block_pairs
for i = 1, n_pairs do
local cond, block = unpack(cond_block_pairs[i])
r[2*i-1], r[2*i] = cond, block
end
if else_block then table.insert(r, #r+1, else_block) end
return r
end
--------------------------------------------------------------------------------
-- produce a list of (expr,block) pairs
--------------------------------------------------------------------------------
local elseifs_parser = gg.list {
gg.sequence { _M.expr, "then", _M.block , name='elseif parser' },
separators = "elseif",
terminators = { "else", "end" }
}
local annot_expr = gg.sequence {
_M.expr,
gg.onkeyword{ "#", gg.future(M, 'annot').tf },
builder = function(x)
local e, a = unpack(x)
if a then return { tag='Annot', e, a }
else return e end
end }
local annot_expr_list = gg.list {
primary = annot.opt(M, _M.expr, 'tf'), separators = ',' }
------------------------------------------------------------------------
-- assignments and calls: statements that don't start with a keyword
------------------------------------------------------------------------
local function assign_or_call_stat_parser (lx)
local e = annot_expr_list (lx)
local a = lx:is_keyword(lx:peek())
local op = a and M.assignments[a]
-- TODO: refactor annotations
if op then
--FIXME: check that [e] is a LHS
lx :next()
local annots
e, annots = annot.split(e)
local v = M.expr_list (lx)
if type(op)=="string" then return { tag=op, e, v, annots }
else return op (e, v) end
else
assert (#e > 0)
if #e > 1 then
gg.parse_error (lx,
"comma is not a valid statement separator; statement can be "..
"separated by semicolons, or not separated at all")
elseif e[1].tag ~= "Call" and e[1].tag ~= "Invoke" then
local typename
if e[1].tag == 'Id' then
typename = '("'..e[1][1]..'") is an identifier'
elseif e[1].tag == 'Op' then
typename = "is an arithmetic operation"
else typename = "is of type '"..(e[1].tag or "<list>").."'" end
gg.parse_error (lx,
"This expression %s; "..
"a statement was expected, and only function and method call "..
"expressions can be used as statements", typename);
end
return e[1]
end
end
M.local_stat_parser = gg.multisequence{
-- local function <name> <func_val>
{ "function", _M.id, _M.func_val, builder =
function(x)
local vars = { x[1], lineinfo = x[1].lineinfo }
local vals = { x[2], lineinfo = x[2].lineinfo }
return { tag="Localrec", vars, vals }
end },
-- local <id_list> ( = <expr_list> )?
default = gg.sequence{
gg.list{
primary = annot.opt(M, _M.id, 'tf'),
separators = ',' },
gg.onkeyword{ "=", _M.expr_list },
builder = function(x)
local annotated_left, right = unpack(x)
local left, annotations = annot.split(annotated_left)
return {tag="Local", left, right or { }, annotations }
end } }
------------------------------------------------------------------------
-- statement
------------------------------------------------------------------------
M.stat = gg.multisequence {
name = "statement",
{ "do", _M.block, "end", builder =
function (x) return { tag="Do", unpack (x[1]) } end },
{ "for", _M.for_header, "do", _M.block, "end", builder =
function (x) x[1][#x[1]+1] = x[2]; return x[1] end },
{ "function", func_name, method_name, _M.func_val, builder=funcdef_builder },
{ "while", _M.expr, "do", _M.block, "end", builder = "While" },
{ "repeat", _M.block, "until", _M.expr, builder = "Repeat" },
{ "local", _M.local_stat_parser, builder = unpack },
{ "return", return_expr_list_parser, builder =
function(x) x[1].tag='Return'; return x[1] end },
{ "break", builder = function() return { tag="Break" } end },
{ "-{", gg.future(M, 'meta').splice_content, "}", builder = unpack },
{ "if", gg.nonempty(elseifs_parser), gg.onkeyword{ "else", M.block }, "end",
builder = if_builder },
default = assign_or_call_stat_parser }
M.assignments = {
["="] = "Set"
}
function M.assignments:add(k, v) self[k] = v end
return M
end

View File

@ -0,0 +1,77 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Exported API:
-- * [M.table_bracket_field()]
-- * [M.table_field()]
-- * [M.table_content()]
-- * [M.table()]
--
-- KNOWN BUG: doesn't handle final ";" or "," before final "}"
--
--------------------------------------------------------------------------------
local gg = require 'metalua.grammar.generator'
return function(M)
M.table = { }
local _table = gg.future(M.table)
local _expr = gg.future(M).expr
--------------------------------------------------------------------------------
-- `[key] = value` table field definition
--------------------------------------------------------------------------------
M.table.bracket_pair = gg.sequence{ "[", _expr, "]", "=", _expr, builder = "Pair" }
--------------------------------------------------------------------------------
-- table element parser: list value, `id = value` pair or `[value] = value` pair.
--------------------------------------------------------------------------------
function M.table.element (lx)
if lx :is_keyword (lx :peek(), "[") then return M.table.bracket_pair(lx) end
local e = M.expr (lx)
if not lx :is_keyword (lx :peek(), "=") then return e end
lx :next(); -- skip the "="
local key = M.id2string(e) -- will fail on non-identifiers
local val = M.expr(lx)
local r = { tag="Pair", key, val }
r.lineinfo = { first = key.lineinfo.first, last = val.lineinfo.last }
return r
end
-----------------------------------------------------------------------------
-- table constructor, without enclosing braces; returns a full table object
-----------------------------------------------------------------------------
M.table.content = gg.list {
-- eta expansion to allow patching the element definition
primary = _table.element,
separators = { ",", ";" },
terminators = "}",
builder = "Table" }
--------------------------------------------------------------------------------
-- complete table constructor including [{...}]
--------------------------------------------------------------------------------
-- TODO beware, stat and expr use only table.content, this can't be patched.
M.table.table = gg.sequence{ "{", _table.content, "}", builder = unpack }
return M
end

View File

@ -0,0 +1,282 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
--
-- This extension implements list comprehensions, similar to Haskell and
-- Python syntax, to easily describe lists.
--
-- * x[a ... b] is the list { x[a], x[a+1], ..., x[b] }
-- * { f()..., b } contains all the elements returned by f(), then b
-- (allows to expand list fields other than the last one)
-- * list comprehensions a la python, with "for" and "if" suffixes:
-- {i+10*j for i=1,3 for j=1,3 if i~=j} is { 21, 31, 12, 32, 13, 23 }
--
-------------------------------------------------------------------------------
-{ extension ("match", ...) }
local SUPPORT_IMPROVED_LOOPS = true
local SUPPORT_IMPROVED_INDEXES = false -- depends on deprecated table.isub
local SUPPORT_CONTINUE = true
local SUPPORT_COMP_LISTS = true
assert (SUPPORT_IMPROVED_LOOPS or not SUPPORT_CONTINUE,
"Can't support 'continue' without improved loop headers")
local gg = require 'metalua.grammar.generator'
local Q = require 'metalua.treequery'
local function dots_list_suffix_builder (x) return `DotsSuffix{ x } end
local function for_list_suffix_builder (list_element, suffix)
local new_header = suffix[1]
match list_element with
| `Comp{ _, acc } -> table.insert (acc, new_header); return list_element
| _ -> return `Comp{ list_element, { new_header } }
end
end
local function if_list_suffix_builder (list_element, suffix)
local new_header = `If{ suffix[1] }
match list_element with
| `Comp{ _, acc } -> table.insert (acc, new_header); return list_element
| _ -> return `Comp{ list_element, { new_header } }
end
end
-- Builds a statement from a table element, which adds this element to
-- a table `t`, potentially thanks to an alias `tinsert` to
-- `table.insert`.
-- @param core the part around which the loops are built.
-- either `DotsSuffix{expr}, `Pair{ expr } or a plain expression
-- @param list comprehension suffixes, in the order in which they appear
-- either `Forin{ ... } or `Fornum{ ...} or `If{ ... }. In each case,
-- it misses a last child node as its body.
-- @param t a variable containing the table to fill
-- @param tinsert a variable containing `table.insert`.
--
-- @return fill a statement which fills empty table `t` with the denoted element
local function comp_list_builder(core, list, t, tinsert)
local filler
-- 1 - Build the loop's core: if it has suffix "...", every elements of the
-- multi-return must be inserted, hence the extra [for] loop.
match core with
| `DotsSuffix{ element } ->
local x = gg.gensym()
filler = +{stat: for _, -{x} in pairs{ -{element} } do (-{tinsert})(-{t}, -{x}) end }
| `Pair{ key, value } ->
--filler = +{ -{t}[-{key}] = -{value} }
filler = `Set{ { `Index{ t, key } }, { value } }
| _ -> filler = +{ (-{tinsert})(-{t}, -{core}) }
end
-- 2 - Stack the `if` and `for` control structures, from outside to inside.
-- This is done in a destructive way for the elements of [list].
for i = #list, 1, -1 do
table.insert (list[i], {filler})
filler = list[i]
end
return filler
end
local function table_content_builder (list)
local special = false -- Does the table need a special builder?
for _, element in ipairs(list) do
local etag = element.tag
if etag=='Comp' or etag=='DotsSuffix' then special=true; break end
end
if not special then list.tag='Table'; return list end
local t, tinsert = gg.gensym 'table', gg.gensym 'table_insert'
local filler_block = { +{stat: local -{t}, -{tinsert} = { }, table.insert } }
for _, element in ipairs(list) do
local filler
match element with
| `Comp{ core, comp } -> filler = comp_list_builder(core, comp, t, tinsert)
| _ -> filler = comp_list_builder(element, { }, t, tinsert)
end
table.insert(filler_block, filler)
end
return `Stat{ filler_block, t }
end
--------------------------------------------------------------------------------
-- Back-end for improved index operator.
local function index_builder(a, suffix)
match suffix[1] with
-- Single index, no range: keep the native semantics
| { { e, false } } -> return `Index{ a, e }
-- Either a range, or multiple indexes, or both
| ranges ->
local r = `Call{ +{table.isub}, a }
local function acc (x,y) table.insert (r,x); table.insert (r,y) end
for _, seq in ipairs (ranges) do
match seq with
| { e, false } -> acc(e,e)
| { e, f } -> acc(e,f)
end
end
return r
end
end
-------------------------------------------------------------------
-- Find continue statements in a loop body, change them into goto
-- end-of-body.
local function transform_continue_statements(body)
local continue_statements = Q(body)
:if_unknown() -- tolerate unknown 'Continue' statements
:not_under ('Forin', 'Fornum', 'While', 'Repeat')
:filter ('Continue')
:list()
if next(continue_statements) then
local continue_label = gg.gensym 'continue' [1]
table.insert(body, `Label{ continue_label })
for _, statement in ipairs(continue_statements) do
statement.tag = 'Goto'
statement[1] = continue_label
end
return true
else return false end
end
-------------------------------------------------------------------------------
-- Back-end for loops with a multi-element header
local function loop_builder(x)
local first, elements, body = unpack(x)
-- Change continue statements into gotos.
if SUPPORT_CONTINUE then transform_continue_statements(body) end
-------------------------------------------------------------------
-- If it's a regular loop, don't bloat the code
if not next(elements) then
table.insert(first, body)
return first
end
-------------------------------------------------------------------
-- There's no reason to treat the first element in a special way
table.insert(elements, 1, first)
-------------------------------------------------------------------
-- Change breaks into gotos that escape all loops at once.
local exit_label = nil
local function break_to_goto(break_node)
if not exit_label then exit_label = gg.gensym 'break' [1] end
break_node = break_node or { }
break_node.tag = 'Goto'
break_node[1] = exit_label
return break_node
end
Q(body)
:not_under('Function', 'Forin', 'Fornum', 'While', 'Repeat')
:filter('Break')
:foreach (break_to_goto)
-------------------------------------------------------------------
-- Compile all headers elements, from last to first.
-- invariant: `body` is a block (not a statement)
local result = body
for i = #elements, 1, -1 do
local e = elements[i]
match e with
| `If{ cond } ->
result = { `If{ cond, result } }
| `Until{ cond } ->
result = +{block: if -{cond} then -{break_to_goto()} else -{result} end }
| `While{ cond } ->
if i==1 then result = { `While{ cond, result } } -- top-level while
else result = +{block: if -{cond} then -{result} else -{break_to_goto()} end } end
| `Forin{ ... } | `Fornum{ ... } ->
table.insert (e, result); result={e}
| _-> require'metalua.pprint'.printf("Bad loop header element %s", e)
end
end
-------------------------------------------------------------------
-- If some breaks had to be changed into gotos, insert the label
if exit_label then result = { result, `Label{ exit_label } } end
return result
end
--------------------------------------------------------------------------------
-- Improved "[...]" index operator:
-- * support for multi-indexes ("foo[bar, gnat]")
-- * support for ranges ("foo[bar ... gnat]")
--------------------------------------------------------------------------------
local function extend(M)
local _M = gg.future(M)
if SUPPORT_COMP_LISTS then
-- support for "for" / "if" comprehension suffixes in literal tables
local original_table_element = M.table.element
M.table.element = gg.expr{ name="table cell",
primary = original_table_element,
suffix = { name="table cell suffix",
{ "...", builder = dots_list_suffix_builder },
{ "for", _M.for_header, builder = for_list_suffix_builder },
{ "if", _M.expr, builder = if_list_suffix_builder } } }
M.table.content.builder = table_content_builder
end
if SUPPORT_IMPROVED_INDEXES then
-- Support for ranges and multiple indices in bracket suffixes
M.expr.suffix:del '['
M.expr.suffix:add{ name="table index/range",
"[", gg.list{
gg.sequence { _M.expr, gg.onkeyword{ "...", _M.expr } } ,
separators = { ",", ";" } },
"]", builder = index_builder }
end
if SUPPORT_IMPROVED_LOOPS then
local original_for_header = M.for_header
M.stat :del 'for'
M.stat :del 'while'
M.loop_suffix = gg.multisequence{
{ 'while', _M.expr, builder = |x| `Until{ `Op{ 'not', x[1] } } },
{ 'until', _M.expr, builder = |x| `Until{ x[1] } },
{ 'if', _M.expr, builder = |x| `If{ x[1] } },
{ 'for', original_for_header, builder = |x| x[1] } }
M.loop_suffix_list = gg.list{ _M.loop_suffix, terminators='do' }
M.stat :add{
'for', original_for_header, _M.loop_suffix_list, 'do', _M.block, 'end',
builder = loop_builder }
M.stat :add{
'while', _M.expr, _M.loop_suffix_list, 'do', _M.block, 'end',
builder = |x| loop_builder{ `While{x[1]}, x[2], x[3] } }
end
if SUPPORT_CONTINUE then
M.lexer :add 'continue'
M.stat :add{ 'continue', builder='Continue' }
end
end
return extend

View File

@ -0,0 +1,400 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--
-- Glossary:
--
-- * term_seq: the tested stuff, a sequence of terms
-- * pattern_element: might match one term of a term seq. Represented
-- as expression ASTs.
-- * pattern_seq: might match a term_seq
-- * pattern_group: several pattern seqs, one of them might match
-- the term seq.
-- * case: pattern_group * guard option * block
-- * match_statement: tested term_seq * case list
--
-- Hence a complete match statement is a:
--
-- { list(expr), list{ list(list(expr)), expr or false, block } }
--
-- Implementation hints
-- ====================
--
-- The implementation is made as modular as possible, so that parts
-- can be reused in other extensions. The priviledged way to share
-- contextual information across functions is through the 'cfg' table
-- argument. Its fields include:
--
-- * code: code generated from pattern. A pattern_(element|seq|group)
-- is compiled as a sequence of instructions which will jump to
-- label [cfg.on_failure] if the tested term doesn't match.
--
-- * on_failure: name of the label where the code will jump if the
-- pattern doesn't match
--
-- * locals: names of local variables used by the pattern. This
-- includes bound variables, and temporary variables used to
-- destructure tables. Names are stored as keys of the table,
-- values are meaningless.
--
-- * after_success: label where the code must jump after a pattern
-- succeeded to capture a term, and the guard suceeded if there is
-- any, and the conditional block has run.
--
-- * ntmp: number of temporary variables used to destructurate table
-- in the current case.
--
-- Code generation is performed by acc_xxx() functions, which accumulate
-- code in cfg.code:
--
-- * acc_test(test, cfg) will generate a jump to cfg.on_failure
-- *when the test returns TRUE*
--
-- * acc_stat accumulates a statement
--
-- * acc_assign accumulate an assignment statement, and makes sure that
-- the LHS variable the registered as local in cfg.locals.
--
-------------------------------------------------------------------------------
-- TODO: hygiene wrt type()
-- TODO: cfg.ntmp isn't reset as often as it could. I'm not even sure
-- the corresponding locals are declared.
local checks = require 'checks'
local gg = require 'metalua.grammar.generator'
local pp = require 'metalua.pprint'
----------------------------------------------------------------------
-- This would have been best done through library 'metalua.walk',
-- but walk depends on match, so we have to break the dependency.
-- It replaces all instances of `...' in `ast' with `term', unless
-- it appears in a function.
----------------------------------------------------------------------
local function replace_dots (ast, term)
local function rec (node)
for i, child in ipairs(node) do
if type(child)~="table" then -- pass
elseif child.tag=='Dots' then
if term=='ambiguous' then
error ("You can't use `...' on the right of a match case when it appears "..
"more than once on the left")
else node[i] = term end
elseif child.tag=='Function' then return nil
else rec(child) end
end
end
return rec(ast)
end
local tmpvar_base = gg.gensym 'submatch.' [1]
local function next_tmpvar(cfg)
assert (cfg.ntmp, "No cfg.ntmp imbrication level in the match compiler")
cfg.ntmp = cfg.ntmp+1
return `Id{ tmpvar_base .. cfg.ntmp }
end
-- Code accumulators
local acc_stat = |x,cfg| table.insert (cfg.code, x)
local acc_test = |x,cfg| acc_stat(+{stat: if -{x} then -{`Goto{cfg.on_failure}} end}, cfg)
-- lhs :: `Id{ string }
-- rhs :: expr
local function acc_assign (lhs, rhs, cfg)
assert(lhs.tag=='Id')
cfg.locals[lhs[1]] = true
acc_stat (`Set{ {lhs}, {rhs} }, cfg)
end
local literal_tags = { String=1, Number=1, True=1, False=1, Nil=1 }
-- pattern :: `Id{ string }
-- term :: expr
local function id_pattern_element_builder (pattern, term, cfg)
assert (pattern.tag == "Id")
if pattern[1] == "_" then
-- "_" is used as a dummy var ==> no assignment, no == checking
cfg.locals._ = true
elseif cfg.locals[pattern[1]] then
-- This var is already bound ==> test for equality
acc_test (+{ -{term} ~= -{pattern} }, cfg)
else
-- Free var ==> bind it, and remember it for latter linearity checking
acc_assign (pattern, term, cfg)
cfg.locals[pattern[1]] = true
end
end
-- mutually recursive with table_pattern_element_builder
local pattern_element_builder
-- pattern :: pattern and `Table{ }
-- term :: expr
local function table_pattern_element_builder (pattern, term, cfg)
local seen_dots, len = false, 0
acc_test (+{ type( -{term} ) ~= "table" }, cfg)
for i = 1, #pattern do
local key, sub_pattern
if pattern[i].tag=="Pair" then -- Explicit key/value pair
key, sub_pattern = unpack (pattern[i])
assert (literal_tags[key.tag], "Invalid key")
else -- Implicit key
len, key, sub_pattern = len+1, `Number{ len+1 }, pattern[i]
end
-- '...' can only appear in final position
-- Could be fixed actually...
assert (not seen_dots, "Wrongly placed `...' ")
if sub_pattern.tag == "Id" then
-- Optimization: save a useless [ v(n+1)=v(n).key ]
id_pattern_element_builder (sub_pattern, `Index{ term, key }, cfg)
if sub_pattern[1] ~= "_" then
acc_test (+{ -{sub_pattern} == nil }, cfg)
end
elseif sub_pattern.tag == "Dots" then
-- Remember where the capture is, and thatt arity checking shouldn't occur
seen_dots = true
else
-- Business as usual:
local v2 = next_tmpvar(cfg)
acc_assign (v2, `Index{ term, key }, cfg)
pattern_element_builder (sub_pattern, v2, cfg)
-- TODO: restore ntmp?
end
end
if seen_dots then -- remember how to retrieve `...'
-- FIXME: check, but there might be cases where the variable -{term}
-- will be overridden in contrieved tables.
-- ==> save it now, and clean the setting statement if unused
if cfg.dots_replacement then cfg.dots_replacement = 'ambiguous'
else cfg.dots_replacement = +{ select (-{`Number{len}}, unpack(-{term})) } end
else -- Check arity
acc_test (+{ #-{term} ~= -{`Number{len}} }, cfg)
end
end
-- mutually recursive with pattern_element_builder
local eq_pattern_element_builder, regexp_pattern_element_builder
-- Concatenate code in [cfg.code], that will jump to label
-- [cfg.on_failure] if [pattern] doesn't match [term]. [pattern]
-- should be an identifier, or at least cheap to compute and
-- side-effects free.
--
-- pattern :: pattern_element
-- term :: expr
function pattern_element_builder (pattern, term, cfg)
if literal_tags[pattern.tag] then
acc_test (+{ -{term} ~= -{pattern} }, cfg)
elseif "Id" == pattern.tag then
id_pattern_element_builder (pattern, term, cfg)
elseif "Op" == pattern.tag and "div" == pattern[1] then
regexp_pattern_element_builder (pattern, term, cfg)
elseif "Op" == pattern.tag and "eq" == pattern[1] then
eq_pattern_element_builder (pattern, term, cfg)
elseif "Table" == pattern.tag then
table_pattern_element_builder (pattern, term, cfg)
else
error ("Invalid pattern at "..
tostring(pattern.lineinfo)..
": "..pp.tostring(pattern, {hide_hash=true}))
end
end
function eq_pattern_element_builder (pattern, term, cfg)
local _, pat1, pat2 = unpack (pattern)
local ntmp_save = cfg.ntmp
pattern_element_builder (pat1, term, cfg)
cfg.ntmp = ntmp_save
pattern_element_builder (pat2, term, cfg)
end
-- pattern :: `Op{ 'div', string, list{`Id string} or `Id{ string }}
-- term :: expr
local function regexp_pattern_element_builder (pattern, term, cfg)
local op, regexp, sub_pattern = unpack(pattern)
-- Sanity checks --
assert (op=='div', "Don't know what to do with that op in a pattern")
assert (regexp.tag=="String",
"Left hand side operand for '/' in a pattern must be "..
"a literal string representing a regular expression")
if sub_pattern.tag=="Table" then
for _, x in ipairs(sub_pattern) do
assert (x.tag=="Id" or x.tag=='Dots',
"Right hand side operand for '/' in a pattern must be "..
"a list of identifiers")
end
else
assert (sub_pattern.tag=="Id",
"Right hand side operand for '/' in a pattern must be "..
"an identifier or a list of identifiers")
end
-- Regexp patterns can only match strings
acc_test (+{ type(-{term}) ~= 'string' }, cfg)
-- put all captures in a list
local capt_list = +{ { string.strmatch(-{term}, -{regexp}) } }
-- save them in a var_n for recursive decomposition
local v2 = next_tmpvar(cfg)
acc_stat (+{stat: local -{v2} = -{capt_list} }, cfg)
-- was capture successful?
acc_test (+{ not next (-{v2}) }, cfg)
pattern_element_builder (sub_pattern, v2, cfg)
end
-- Jumps to [cfg.on_faliure] if pattern_seq doesn't match
-- term_seq.
local function pattern_seq_builder (pattern_seq, term_seq, cfg)
if #pattern_seq ~= #term_seq then error ("Bad seq arity") end
cfg.locals = { } -- reset bound variables between alternatives
for i=1, #pattern_seq do
cfg.ntmp = 1 -- reset the tmp var generator
pattern_element_builder(pattern_seq[i], term_seq[i], cfg)
end
end
--------------------------------------------------
-- for each case i:
-- pattern_seq_builder_i:
-- * on failure, go to on_failure_i
-- * on success, go to on_success
-- label on_success:
-- block
-- goto after_success
-- label on_failure_i
--------------------------------------------------
local function case_builder (case, term_seq, cfg)
local patterns_group, guard, block = unpack(case)
local on_success = gg.gensym 'on_success' [1]
for i = 1, #patterns_group do
local pattern_seq = patterns_group[i]
cfg.on_failure = gg.gensym 'match_fail' [1]
cfg.dots_replacement = false
pattern_seq_builder (pattern_seq, term_seq, cfg)
if i<#patterns_group then
acc_stat (`Goto{on_success}, cfg)
acc_stat (`Label{cfg.on_failure}, cfg)
end
end
acc_stat (`Label{on_success}, cfg)
if guard then acc_test (+{not -{guard}}, cfg) end
if cfg.dots_replacement then
replace_dots (block, cfg.dots_replacement)
end
block.tag = 'Do'
acc_stat (block, cfg)
acc_stat (`Goto{cfg.after_success}, cfg)
acc_stat (`Label{cfg.on_failure}, cfg)
end
local function match_builder (x)
local term_seq, cases = unpack(x)
local cfg = {
code = `Do{ },
after_success = gg.gensym "_after_success" }
-- Some sharing issues occur when modifying term_seq,
-- so it's replaced by a copy new_term_seq.
-- TODO: clean that up, and re-suppress the useless copies
-- (cf. remarks about capture bug below).
local new_term_seq = { }
local match_locals
-- Make sure that all tested terms are variables or literals
for i=1, #term_seq do
local t = term_seq[i]
-- Capture problem: the following would compile wrongly:
-- `match x with x -> end'
-- Temporary workaround: suppress the condition, so that
-- all external variables are copied into unique names.
--if t.tag ~= 'Id' and not literal_tags[t.tag] then
local v = gg.gensym 'v'
if not match_locals then match_locals = `Local{ {v}, {t} } else
table.insert(match_locals[1], v)
table.insert(match_locals[2], t)
end
new_term_seq[i] = v
--end
end
term_seq = new_term_seq
if match_locals then acc_stat(match_locals, cfg) end
for i=1, #cases do
local case_cfg = {
after_success = cfg.after_success,
code = `Do{ }
-- locals = { } -- unnecessary, done by pattern_seq_builder
}
case_builder (cases[i], term_seq, case_cfg)
if next (case_cfg.locals) then
local case_locals = { }
table.insert (case_cfg.code, 1, `Local{ case_locals, { } })
for v, _ in pairs (case_cfg.locals) do
table.insert (case_locals, `Id{ v })
end
end
acc_stat(case_cfg.code, cfg)
end
local li = `String{tostring(cases.lineinfo)}
acc_stat(+{error('mismatch at '..-{li})}, cfg)
acc_stat(`Label{cfg.after_success}, cfg)
return cfg.code
end
----------------------------------------------------------------------
-- Syntactical front-end
----------------------------------------------------------------------
local function extend(M)
local _M = gg.future(M)
checks('metalua.compiler.parser')
M.lexer:add{ "match", "with", "->" }
M.block.terminators:add "|"
local match_cases_list_parser = gg.list{ name = "match cases list",
gg.sequence{ name = "match case",
gg.list{ name = "match case patterns list",
primary = _M.expr_list,
separators = "|",
terminators = { "->", "if" } },
gg.onkeyword{ "if", _M.expr, consume = true },
"->",
_M.block },
separators = "|",
terminators = "end" }
M.stat:add{ name = "match statement",
"match",
_M.expr_list,
"with", gg.optkeyword "|",
match_cases_list_parser,
"end",
builder = |x| match_builder{ x[1], x[3] } }
end
return extend

View File

@ -0,0 +1,834 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Summary: parser generator. Collection of higher order functors,
-- which allow to build and combine parsers. Relies on a lexer
-- that supports the same API as the one exposed in mll.lua.
--
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Exported API:
--
-- Parser generators:
-- * [gg.sequence()]
-- * [gg.multisequence()]
-- * [gg.expr()]
-- * [gg.list()]
-- * [gg.onkeyword()]
-- * [gg.optkeyword()]
--
-- Other functions:
-- * [gg.parse_error()]
-- * [gg.make_parser()]
-- * [gg.is_parser()]
--
--------------------------------------------------------------------------------
local M = { }
local checks = require 'checks'
local lexer = require 'metalua.grammar.lexer'
local pp = require 'metalua.pprint'
--------------------------------------------------------------------------------
-- Symbol generator: [gensym()] returns a guaranteed-to-be-unique identifier.
-- The main purpose is to avoid variable capture in macros.
--
-- If a string is passed as an argument, theis string will be part of the
-- id name (helpful for macro debugging)
--------------------------------------------------------------------------------
local gensymidx = 0
function M.gensym (arg)
gensymidx = gensymidx + 1
return { tag="Id", string.format(".%i.%s", gensymidx, arg or "")}
end
-------------------------------------------------------------------------------
-- parser metatable, which maps __call to method parse, and adds some
-- error tracing boilerplate.
-------------------------------------------------------------------------------
local parser_metatable = { }
function parser_metatable :__call (lx, ...)
return self :parse (lx, ...)
end
-------------------------------------------------------------------------------
-- Turn a table into a parser, mainly by setting the metatable.
-------------------------------------------------------------------------------
function M.make_parser(kind, p)
p.kind = kind
if not p.transformers then p.transformers = { } end
function p.transformers:add (x)
table.insert (self, x)
end
setmetatable (p, parser_metatable)
return p
end
-------------------------------------------------------------------------------
-- Return true iff [x] is a parser.
-- If it's a gg-generated parser, return the name of its kind.
-------------------------------------------------------------------------------
function M.is_parser (x)
return type(x)=="function" or getmetatable(x)==parser_metatable and x.kind
end
-------------------------------------------------------------------------------
-- Parse a sequence, without applying builder nor transformers.
-------------------------------------------------------------------------------
local function raw_parse_sequence (lx, p)
local r = { }
for i=1, #p do
local e=p[i]
if type(e) == "string" then
local kw = lx :next()
if not lx :is_keyword (kw, e) then
M.parse_error(
lx, "A keyword was expected, probably `%s'.", e)
end
elseif M.is_parser (e) then
table.insert (r, e(lx))
else -- Invalid parser definition, this is *not* a parsing error
error(string.format(
"Sequence `%s': element #%i is neither a string nor a parser: %s",
p.name, i, pp.tostring(e)))
end
end
return r
end
-------------------------------------------------------------------------------
-- Parse a multisequence, without applying multisequence transformers.
-- The sequences are completely parsed.
-------------------------------------------------------------------------------
local function raw_parse_multisequence (lx, sequence_table, default)
local seq_parser = sequence_table[lx:is_keyword(lx:peek())]
if seq_parser then return seq_parser (lx)
elseif default then return default (lx)
else return false end
end
-------------------------------------------------------------------------------
-- Applies all transformers listed in parser on ast.
-------------------------------------------------------------------------------
local function transform (ast, parser, fli, lli)
if parser.transformers then
for _, t in ipairs (parser.transformers) do ast = t(ast) or ast end
end
if type(ast) == 'table' then
local ali = ast.lineinfo
if not ali or ali.first~=fli or ali.last~=lli then
ast.lineinfo = lexer.new_lineinfo(fli, lli)
end
end
return ast
end
-------------------------------------------------------------------------------
-- Generate a tracable parsing error (not implemented yet)
-------------------------------------------------------------------------------
function M.parse_error(lx, fmt, ...)
local li = lx:lineinfo_left()
local file, line, column, offset, positions
if li then
file, line, column, offset = li.source, li.line, li.column, li.offset
positions = { first = li, last = li }
else
line, column, offset = -1, -1, -1
end
local msg = string.format("line %i, char %i: "..fmt, line, column, ...)
if file and file~='?' then msg = "file "..file..", "..msg end
local src = lx.src
if offset>0 and src then
local i, j = offset, offset
while src:sub(i,i) ~= '\n' and i>=0 do i=i-1 end
while src:sub(j,j) ~= '\n' and j<=#src do j=j+1 end
local srcline = src:sub (i+1, j-1)
local idx = string.rep (" ", column).."^"
msg = string.format("%s\n>>> %s\n>>> %s", msg, srcline, idx)
end
--lx :kill()
error(msg)
end
-------------------------------------------------------------------------------
--
-- Sequence parser generator
--
-------------------------------------------------------------------------------
-- Input fields:
--
-- * [builder]: how to build an AST out of sequence parts. let [x] be the list
-- of subparser results (keywords are simply omitted). [builder] can be:
-- - [nil], in which case the result of parsing is simply [x]
-- - a string, which is then put as a tag on [x]
-- - a function, which takes [x] as a parameter and returns an AST.
--
-- * [name]: the name of the parser. Used for debug messages
--
-- * [transformers]: a list of AST->AST functions, applied in order on ASTs
-- returned by the parser.
--
-- * Table-part entries corresponds to keywords (strings) and subparsers
-- (function and callable objects).
--
-- After creation, the following fields are added:
-- * [parse] the parsing function lexer->AST
-- * [kind] == "sequence"
-- * [name] is set, if it wasn't in the input.
--
-------------------------------------------------------------------------------
function M.sequence (p)
M.make_parser ("sequence", p)
-------------------------------------------------------------------
-- Parsing method
-------------------------------------------------------------------
function p:parse (lx)
-- Raw parsing:
local fli = lx:lineinfo_right()
local seq = raw_parse_sequence (lx, self)
local lli = lx:lineinfo_left()
-- Builder application:
local builder, tb = self.builder, type (self.builder)
if tb == "string" then seq.tag = builder
elseif tb == "function" or builder and builder.__call then seq = builder(seq)
elseif builder == nil then -- nothing
else error ("Invalid builder of type "..tb.." in sequence") end
seq = transform (seq, self, fli, lli)
assert (not seq or seq.lineinfo)
return seq
end
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
-- Try to build a proper name
if p.name then
-- don't touch existing name
elseif type(p[1])=="string" then -- find name based on 1st keyword
if #p==1 then p.name=p[1]
elseif type(p[#p])=="string" then
p.name = p[1] .. " ... " .. p[#p]
else p.name = p[1] .. " ..." end
else -- can't find a decent name
p.name = "unnamed_sequence"
end
return p
end --</sequence>
-------------------------------------------------------------------------------
--
-- Multiple, keyword-driven, sequence parser generator
--
-------------------------------------------------------------------------------
-- in [p], useful fields are:
--
-- * [transformers]: as usual
--
-- * [name]: as usual
--
-- * Table-part entries must be sequence parsers, or tables which can
-- be turned into a sequence parser by [gg.sequence]. These
-- sequences must start with a keyword, and this initial keyword
-- must be different for each sequence. The table-part entries will
-- be removed after [gg.multisequence] returns.
--
-- * [default]: the parser to run if the next keyword in the lexer is
-- none of the registered initial keywords. If there's no default
-- parser and no suitable initial keyword, the multisequence parser
-- simply returns [false].
--
-- After creation, the following fields are added:
--
-- * [parse] the parsing function lexer->AST
--
-- * [sequences] the table of sequences, indexed by initial keywords.
--
-- * [add] method takes a sequence parser or a config table for
-- [gg.sequence], and adds/replaces the corresponding sequence
-- parser. If the keyword was already used, the former sequence is
-- removed and a warning is issued.
--
-- * [get] method returns a sequence by its initial keyword
--
-- * [kind] == "multisequence"
--
-------------------------------------------------------------------------------
function M.multisequence (p)
M.make_parser ("multisequence", p)
-------------------------------------------------------------------
-- Add a sequence (might be just a config table for [gg.sequence])
-------------------------------------------------------------------
function p :add (s)
-- compile if necessary:
local keyword = type(s)=='table' and s[1]
if type(s)=='table' and not M.is_parser(s) then M.sequence(s) end
if M.is_parser(s)~='sequence' or type(keyword)~='string' then
if self.default then -- two defaults
error ("In a multisequence parser, all but one sequences "..
"must start with a keyword")
else self.default = s end -- first default
else
if self.sequences[keyword] then -- duplicate keyword
-- TODO: warn that initial keyword `keyword` is overloaded in multiseq
end
self.sequences[keyword] = s
end
end -- </multisequence.add>
-------------------------------------------------------------------
-- Get the sequence starting with this keyword. [kw :: string]
-------------------------------------------------------------------
function p :get (kw) return self.sequences [kw] end
-------------------------------------------------------------------
-- Remove the sequence starting with keyword [kw :: string]
-------------------------------------------------------------------
function p :del (kw)
if not self.sequences[kw] then
-- TODO: warn that we try to delete a non-existent entry
end
local removed = self.sequences[kw]
self.sequences[kw] = nil
return removed
end
-------------------------------------------------------------------
-- Parsing method
-------------------------------------------------------------------
function p :parse (lx)
local fli = lx:lineinfo_right()
local x = raw_parse_multisequence (lx, self.sequences, self.default)
local lli = lx:lineinfo_left()
return transform (x, self, fli, lli)
end
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
-- Register the sequences passed to the constructor. They're going
-- from the array part of the parser to the hash part of field
-- [sequences]
p.sequences = { }
for i=1, #p do p :add (p[i]); p[i] = nil end
-- FIXME: why is this commented out?
--if p.default and not is_parser(p.default) then sequence(p.default) end
return p
end --</multisequence>
-------------------------------------------------------------------------------
--
-- Expression parser generator
--
-------------------------------------------------------------------------------
--
-- Expression configuration relies on three tables: [prefix], [infix]
-- and [suffix]. Moreover, the primary parser can be replaced by a
-- table: in this case the [primary] table will be passed to
-- [gg.multisequence] to create a parser.
--
-- Each of these tables is a modified multisequence parser: the
-- differences with respect to regular multisequence config tables are:
--
-- * the builder takes specific parameters:
-- - for [prefix], it takes the result of the prefix sequence parser,
-- and the prefixed expression
-- - for [infix], it takes the left-hand-side expression, the results
-- of the infix sequence parser, and the right-hand-side expression.
-- - for [suffix], it takes the suffixed expression, and the result
-- of the suffix sequence parser.
--
-- * the default field is a list, with parameters:
-- - [parser] the raw parsing function
-- - [transformers], as usual
-- - [prec], the operator's precedence
-- - [assoc] for [infix] table, the operator's associativity, which
-- can be "left", "right" or "flat" (default to left)
--
-- In [p], useful fields are:
-- * [transformers]: as usual
-- * [name]: as usual
-- * [primary]: the atomic expression parser, or a multisequence config
-- table (mandatory)
-- * [prefix]: prefix operators config table, see above.
-- * [infix]: infix operators config table, see above.
-- * [suffix]: suffix operators config table, see above.
--
-- After creation, these fields are added:
-- * [kind] == "expr"
-- * [parse] as usual
-- * each table is turned into a multisequence, and therefore has an
-- [add] method
--
-------------------------------------------------------------------------------
function M.expr (p)
M.make_parser ("expr", p)
-------------------------------------------------------------------
-- parser method.
-- In addition to the lexer, it takes an optional precedence:
-- it won't read expressions whose precedence is lower or equal
-- to [prec].
-------------------------------------------------------------------
function p :parse (lx, prec)
prec = prec or 0
------------------------------------------------------
-- Extract the right parser and the corresponding
-- options table, for (pre|in|suff)fix operators.
-- Options include prec, assoc, transformers.
------------------------------------------------------
local function get_parser_info (tab)
local p2 = tab :get (lx :is_keyword (lx :peek()))
if p2 then -- keyword-based sequence found
local function parser(lx) return raw_parse_sequence(lx, p2) end
return parser, p2
else -- Got to use the default parser
local d = tab.default
if d then return d.parse or d.parser, d
else return false, false end
end
end
------------------------------------------------------
-- Look for a prefix sequence. Multiple prefixes are
-- handled through the recursive [p.parse] call.
-- Notice the double-transform: one for the primary
-- expr, and one for the one with the prefix op.
------------------------------------------------------
local function handle_prefix ()
local fli = lx :lineinfo_right()
local p2_func, p2 = get_parser_info (self.prefix)
local op = p2_func and p2_func (lx)
if op then -- Keyword-based sequence found
local ili = lx :lineinfo_right() -- Intermediate LineInfo
local e = p2.builder (op, self :parse (lx, p2.prec))
local lli = lx :lineinfo_left()
return transform (transform (e, p2, ili, lli), self, fli, lli)
else -- No prefix found, get a primary expression
local e = self.primary(lx)
local lli = lx :lineinfo_left()
return transform (e, self, fli, lli)
end
end --</expr.parse.handle_prefix>
------------------------------------------------------
-- Look for an infix sequence+right-hand-side operand.
-- Return the whole binary expression result,
-- or false if no operator was found.
------------------------------------------------------
local function handle_infix (e)
local p2_func, p2 = get_parser_info (self.infix)
if not p2 then return false end
-----------------------------------------
-- Handle flattening operators: gather all operands
-- of the series in [list]; when a different operator
-- is found, stop, build from [list], [transform] and
-- return.
-----------------------------------------
if (not p2.prec or p2.prec>prec) and p2.assoc=="flat" then
local fli = lx:lineinfo_right()
local pflat, list = p2, { e }
repeat
local op = p2_func(lx)
if not op then break end
table.insert (list, self:parse (lx, p2.prec))
local _ -- We only care about checking that p2==pflat
_, p2 = get_parser_info (self.infix)
until p2 ~= pflat
local e2 = pflat.builder (list)
local lli = lx:lineinfo_left()
return transform (transform (e2, pflat, fli, lli), self, fli, lli)
-----------------------------------------
-- Handle regular infix operators: [e] the LHS is known,
-- just gather the operator and [e2] the RHS.
-- Result goes in [e3].
-----------------------------------------
elseif p2.prec and p2.prec>prec or
p2.prec==prec and p2.assoc=="right" then
local fli = e.lineinfo.first -- lx:lineinfo_right()
local op = p2_func(lx)
if not op then return false end
local e2 = self:parse (lx, p2.prec)
local e3 = p2.builder (e, op, e2)
local lli = lx:lineinfo_left()
return transform (transform (e3, p2, fli, lli), self, fli, lli)
-----------------------------------------
-- Check for non-associative operators, and complain if applicable.
-----------------------------------------
elseif p2.assoc=="none" and p2.prec==prec then
M.parse_error (lx, "non-associative operator!")
-----------------------------------------
-- No infix operator suitable at that precedence
-----------------------------------------
else return false end
end --</expr.parse.handle_infix>
------------------------------------------------------
-- Look for a suffix sequence.
-- Return the result of suffix operator on [e],
-- or false if no operator was found.
------------------------------------------------------
local function handle_suffix (e)
-- FIXME bad fli, must take e.lineinfo.first
local p2_func, p2 = get_parser_info (self.suffix)
if not p2 then return false end
if not p2.prec or p2.prec>=prec then
--local fli = lx:lineinfo_right()
local fli = e.lineinfo.first
local op = p2_func(lx)
if not op then return false end
local lli = lx:lineinfo_left()
e = p2.builder (e, op)
e = transform (transform (e, p2, fli, lli), self, fli, lli)
return e
end
return false
end --</expr.parse.handle_suffix>
------------------------------------------------------
-- Parser body: read suffix and (infix+operand)
-- extensions as long as we're able to fetch more at
-- this precedence level.
------------------------------------------------------
local e = handle_prefix()
repeat
local x = handle_suffix (e); e = x or e
local y = handle_infix (e); e = y or e
until not (x or y)
-- No transform: it already happened in operators handling
return e
end --</expr.parse>
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
if not p.primary then p.primary=p[1]; p[1]=nil end
for _, t in ipairs{ "primary", "prefix", "infix", "suffix" } do
if not p[t] then p[t] = { } end
if not M.is_parser(p[t]) then M.multisequence(p[t]) end
end
function p:add(...) return self.primary:add(...) end
return p
end --</expr>
-------------------------------------------------------------------------------
--
-- List parser generator
--
-------------------------------------------------------------------------------
-- In [p], the following fields can be provided in input:
--
-- * [builder]: takes list of subparser results, returns AST
-- * [transformers]: as usual
-- * [name]: as usual
--
-- * [terminators]: list of strings representing the keywords which
-- might mark the end of the list. When non-empty, the list is
-- allowed to be empty. A string is treated as a single-element
-- table, whose element is that string, e.g. ["do"] is the same as
-- [{"do"}].
--
-- * [separators]: list of strings representing the keywords which can
-- separate elements of the list. When non-empty, one of these
-- keyword has to be found between each element. Lack of a separator
-- indicates the end of the list. A string is treated as a
-- single-element table, whose element is that string, e.g. ["do"]
-- is the same as [{"do"}]. If [terminators] is empty/nil, then
-- [separators] has to be non-empty.
--
-- After creation, the following fields are added:
-- * [parse] the parsing function lexer->AST
-- * [kind] == "list"
--
-------------------------------------------------------------------------------
function M.list (p)
M.make_parser ("list", p)
-------------------------------------------------------------------
-- Parsing method
-------------------------------------------------------------------
function p :parse (lx)
------------------------------------------------------
-- Used to quickly check whether there's a terminator
-- or a separator immediately ahead
------------------------------------------------------
local function peek_is_in (keywords)
return keywords and lx:is_keyword(lx:peek(), unpack(keywords)) end
local x = { }
local fli = lx :lineinfo_right()
-- if there's a terminator to start with, don't bother trying
local is_empty_list = self.terminators and (peek_is_in (self.terminators) or lx:peek().tag=="Eof")
if not is_empty_list then
repeat
local item = self.primary(lx)
table.insert (x, item) -- read one element
until
-- There's a separator list specified, and next token isn't in it.
-- Otherwise, consume it with [lx:next()]
self.separators and not(peek_is_in (self.separators) and lx:next()) or
-- Terminator token ahead
peek_is_in (self.terminators) or
-- Last reason: end of file reached
lx:peek().tag=="Eof"
end
local lli = lx:lineinfo_left()
-- Apply the builder. It can be a string, or a callable value,
-- or simply nothing.
local b = self.builder
if b then
if type(b)=="string" then x.tag = b -- b is a string, use it as a tag
elseif type(b)=="function" then x=b(x)
else
local bmt = getmetatable(b)
if bmt and bmt.__call then x=b(x) end
end
end
return transform (x, self, fli, lli)
end --</list.parse>
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
if not p.primary then p.primary = p[1]; p[1] = nil end
if type(p.terminators) == "string" then p.terminators = { p.terminators }
elseif p.terminators and #p.terminators == 0 then p.terminators = nil end
if type(p.separators) == "string" then p.separators = { p.separators }
elseif p.separators and #p.separators == 0 then p.separators = nil end
return p
end --</list>
-------------------------------------------------------------------------------
--
-- Keyword-conditioned parser generator
--
-------------------------------------------------------------------------------
--
-- Only apply a parser if a given keyword is found. The result of
-- [gg.onkeyword] parser is the result of the subparser (modulo
-- [transformers] applications).
--
-- lineinfo: the keyword is *not* included in the boundaries of the
-- resulting lineinfo. A review of all usages of gg.onkeyword() in the
-- implementation of metalua has shown that it was the appropriate choice
-- in every case.
--
-- Input fields:
--
-- * [name]: as usual
--
-- * [transformers]: as usual
--
-- * [peek]: if non-nil, the conditioning keyword is left in the lexeme
-- stream instead of being consumed.
--
-- * [primary]: the subparser.
--
-- * [keywords]: list of strings representing triggering keywords.
--
-- * Table-part entries can contain strings, and/or exactly one parser.
-- Strings are put in [keywords], and the parser is put in [primary].
--
-- After the call, the following fields will be set:
--
-- * [parse] the parsing method
-- * [kind] == "onkeyword"
-- * [primary]
-- * [keywords]
--
-------------------------------------------------------------------------------
function M.onkeyword (p)
M.make_parser ("onkeyword", p)
-------------------------------------------------------------------
-- Parsing method
-------------------------------------------------------------------
function p :parse (lx)
if lx :is_keyword (lx:peek(), unpack(self.keywords)) then
local fli = lx:lineinfo_right()
if not self.peek then lx:next() end
local content = self.primary (lx)
local lli = lx:lineinfo_left()
local li = content.lineinfo or { }
fli, lli = li.first or fli, li.last or lli
return transform (content, p, fli, lli)
else return false end
end
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
if not p.keywords then p.keywords = { } end
for _, x in ipairs(p) do
if type(x)=="string" then table.insert (p.keywords, x)
else assert (not p.primary and M.is_parser (x)); p.primary = x end
end
assert (next (p.keywords), "Missing trigger keyword in gg.onkeyword")
assert (p.primary, 'no primary parser in gg.onkeyword')
return p
end --</onkeyword>
-------------------------------------------------------------------------------
--
-- Optional keyword consummer pseudo-parser generator
--
-------------------------------------------------------------------------------
--
-- This doesn't return a real parser, just a function. That function parses
-- one of the keywords passed as parameters, and returns it. It returns
-- [false] if no matching keyword is found.
--
-- Notice that tokens returned by lexer already carry lineinfo, therefore
-- there's no need to add them, as done usually through transform() calls.
-------------------------------------------------------------------------------
function M.optkeyword (...)
local args = {...}
if type (args[1]) == "table" then
assert (#args == 1)
args = args[1]
end
for _, v in ipairs(args) do assert (type(v)=="string") end
return function (lx)
local x = lx:is_keyword (lx:peek(), unpack (args))
if x then lx:next(); return x
else return false end
end
end
-------------------------------------------------------------------------------
--
-- Run a parser with a special lexer
--
-------------------------------------------------------------------------------
--
-- This doesn't return a real parser, just a function.
-- First argument is the lexer class to be used with the parser,
-- 2nd is the parser itself.
-- The resulting parser returns whatever the argument parser does.
--
-------------------------------------------------------------------------------
function M.with_lexer(new_lexer, parser)
-------------------------------------------------------------------
-- Most gg functions take their parameters in a table, so it's
-- better to silently accept when with_lexer{ } is called with
-- its arguments in a list:
-------------------------------------------------------------------
if not parser and #new_lexer==2 and type(new_lexer[1])=='table' then
return M.with_lexer(unpack(new_lexer))
end
-------------------------------------------------------------------
-- Save the current lexer, switch it for the new one, run the parser,
-- restore the previous lexer, even if the parser caused an error.
-------------------------------------------------------------------
return function (lx)
local old_lexer = getmetatable(lx)
lx:sync()
setmetatable(lx, new_lexer)
local status, result = pcall(parser, lx)
lx:sync()
setmetatable(lx, old_lexer)
if status then return result else error(result) end
end
end
--------------------------------------------------------------------------------
--
-- Make sure a parser is used and returns successfully.
--
--------------------------------------------------------------------------------
function M.nonempty(primary)
local p = M.make_parser('non-empty list', { primary = primary, name=primary.name })
function p :parse (lx)
local fli = lx:lineinfo_right()
local content = self.primary (lx)
local lli = lx:lineinfo_left()
local li = content.lineinfo or { }
fli, lli = li.first or fli, li.last or lli
if #content == 0 then
M.parse_error (lx, "`%s' must not be empty.", self.name or "list")
else
return transform (content, self, fli, lli)
end
end
return p
end
local FUTURE_MT = { }
function FUTURE_MT:__tostring() return "<Proxy parser module>" end
function FUTURE_MT:__newindex(key, value) error "don't write in futures" end
function FUTURE_MT :__index (parser_name)
return function(...)
local p, m = rawget(self, '__path'), self.__module
if p then for _, name in ipairs(p) do
m=rawget(m, name)
if not m then error ("Submodule '"..name.."' undefined") end
end end
local f = rawget(m, parser_name)
if not f then error ("Parser '"..parser_name.."' undefined") end
return f(...)
end
end
function M.future(module, ...)
checks('table')
local path = ... and {...}
if path then for _, x in ipairs(path) do
assert(type(x)=='string', "Bad future arg")
end end
local self = { __module = module,
__path = path }
return setmetatable(self, FUTURE_MT)
end
return M

View File

@ -0,0 +1,672 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
local checks = require 'checks'
local M = { }
local lexer = { alpha={ }, sym={ } }
lexer.__index=lexer
lexer.__type='lexer.stream'
M.lexer = lexer
local debugf = function() end
-- local debugf=printf
----------------------------------------------------------------------
-- Some locale settings produce bad results, e.g. French locale
-- expect float numbers to use commas instead of periods.
-- TODO: change number parser into something loclae-independent,
-- locales are nasty.
----------------------------------------------------------------------
os.setlocale('C')
local MT = { }
M.metatables=MT
----------------------------------------------------------------------
-- Create a new metatable, for a new class of objects.
----------------------------------------------------------------------
local function new_metatable(name)
local mt = { __type = 'lexer.'..name };
mt.__index = mt
MT[name] = mt
end
----------------------------------------------------------------------
-- Position: represent a point in a source file.
----------------------------------------------------------------------
new_metatable 'position'
local position_idx=1
function M.new_position(line, column, offset, source)
checks('number', 'number', 'number', 'string')
local id = position_idx; position_idx = position_idx+1
return setmetatable({line=line, column=column, offset=offset,
source=source, id=id}, MT.position)
end
function MT.position :__tostring()
return string.format("<%s%s|L%d|C%d|K%d>",
self.comments and "C|" or "",
self.source, self.line, self.column, self.offset)
end
----------------------------------------------------------------------
-- Position factory: convert offsets into line/column/offset positions.
----------------------------------------------------------------------
new_metatable 'position_factory'
function M.new_position_factory(src, src_name)
-- assert(type(src)=='string')
-- assert(type(src_name)=='string')
local lines = { 1 }
for offset in src :gmatch '\n()' do table.insert(lines, offset) end
local max = #src+1
table.insert(lines, max+1) -- +1 includes Eof
return setmetatable({ src_name=src_name, line2offset=lines, max=max },
MT.position_factory)
end
function MT.position_factory :get_position (offset)
-- assert(type(offset)=='number')
assert(offset<=self.max)
local line2offset = self.line2offset
local left = self.last_left or 1
if offset<line2offset[left] then left=1 end
local right = left+1
if line2offset[right]<=offset then right = right+1 end
if line2offset[right]<=offset then right = #line2offset end
while true do
-- print (" trying lines "..left.."/"..right..", offsets "..line2offset[left]..
-- "/"..line2offset[right].." for offset "..offset)
-- assert(line2offset[left]<=offset)
-- assert(offset<line2offset[right])
-- assert(left<right)
if left+1==right then break end
local middle = math.floor((left+right)/2)
if line2offset[middle]<=offset then left=middle else right=middle end
end
-- assert(left+1==right)
-- printf("found that offset %d is between %d and %d, hence on line %d",
-- offset, line2offset[left], line2offset[right], left)
local line = left
local column = offset - line2offset[line] + 1
self.last_left = left
return M.new_position(line, column, offset, self.src_name)
end
----------------------------------------------------------------------
-- Lineinfo: represent a node's range in a source file;
-- embed information about prefix and suffix comments.
----------------------------------------------------------------------
new_metatable 'lineinfo'
function M.new_lineinfo(first, last)
checks('lexer.position', 'lexer.position')
return setmetatable({first=first, last=last}, MT.lineinfo)
end
function MT.lineinfo :__tostring()
local fli, lli = self.first, self.last
local line = fli.line; if line~=lli.line then line =line ..'-'..lli.line end
local column = fli.column; if column~=lli.column then column=column..'-'..lli.column end
local offset = fli.offset; if offset~=lli.offset then offset=offset..'-'..lli.offset end
return string.format("<%s%s|L%s|C%s|K%s%s>",
fli.comments and "C|" or "",
fli.source, line, column, offset,
lli.comments and "|C" or "")
end
----------------------------------------------------------------------
-- Token: atomic Lua language element, with a category, a content,
-- and some lineinfo relating it to its original source.
----------------------------------------------------------------------
new_metatable 'token'
function M.new_token(tag, content, lineinfo)
--printf("TOKEN `%s{ %q, lineinfo = %s} boundaries %d, %d",
-- tag, content, tostring(lineinfo), lineinfo.first.id, lineinfo.last.id)
return setmetatable({tag=tag, lineinfo=lineinfo, content}, MT.token)
end
function MT.token :__tostring()
--return string.format("`%s{ %q, %s }", self.tag, self[1], tostring(self.lineinfo))
return string.format("`%s %q", self.tag, self[1])
end
----------------------------------------------------------------------
-- Comment: series of comment blocks with associated lineinfo.
-- To be attached to the tokens just before and just after them.
----------------------------------------------------------------------
new_metatable 'comment'
function M.new_comment(lines)
local first = lines[1].lineinfo.first
local last = lines[#lines].lineinfo.last
local lineinfo = M.new_lineinfo(first, last)
return setmetatable({lineinfo=lineinfo, unpack(lines)}, MT.comment)
end
function MT.comment :text()
local last_line = self[1].lineinfo.last.line
local acc = { }
for i, line in ipairs(self) do
local nreturns = line.lineinfo.first.line - last_line
table.insert(acc, ("\n"):rep(nreturns))
table.insert(acc, line[1])
end
return table.concat(acc)
end
function M.new_comment_line(text, lineinfo, nequals)
checks('string', 'lexer.lineinfo', '?number')
return { lineinfo = lineinfo, text, nequals }
end
----------------------------------------------------------------------
-- Patterns used by [lexer :extract] to decompose the raw string into
-- correctly tagged tokens.
----------------------------------------------------------------------
lexer.patterns = {
spaces = "^[ \r\n\t]*()",
short_comment = "^%-%-([^\n]*)\n?()",
--final_short_comment = "^%-%-([^\n]*)()$",
long_comment = "^%-%-%[(=*)%[\n?(.-)%]%1%]()",
long_string = "^%[(=*)%[\n?(.-)%]%1%]()",
number_mantissa = { "^%d+%.?%d*()", "^%d*%.%d+()" },
number_mantissa_hex = { "^%x+%.?%x*()", "^%x*%.%x+()" }, --Lua5.1 and Lua5.2
number_exponant = "^[eE][%+%-]?%d+()",
number_exponant_hex = "^[pP][%+%-]?%d+()", --Lua5.2
number_hex = "^0[xX]()",
word = "^([%a_][%w_]*)()"
}
----------------------------------------------------------------------
-- unescape a whole string, applying [unesc_digits] and
-- [unesc_letter] as many times as required.
----------------------------------------------------------------------
local function unescape_string (s)
-- Turn the digits of an escape sequence into the corresponding
-- character, e.g. [unesc_digits("123") == string.char(123)].
local function unesc_digits (backslashes, digits)
if #backslashes%2==0 then
-- Even number of backslashes, they escape each other, not the digits.
-- Return them so that unesc_letter() can treat them
return backslashes..digits
else
-- Remove the odd backslash, which escapes the number sequence.
-- The rest will be returned and parsed by unesc_letter()
backslashes = backslashes :sub (1,-2)
end
local k, j, i = digits :reverse() :byte(1, 3)
local z = string.byte "0"
local code = (k or z) + 10*(j or z) + 100*(i or z) - 111*z
if code > 255 then
error ("Illegal escape sequence '\\"..digits..
"' in string: ASCII codes must be in [0..255]")
end
local c = string.char (code)
if c == '\\' then c = '\\\\' end -- parsed by unesc_letter (test: "\092b" --> "\\b")
return backslashes..c
end
-- Turn hex digits of escape sequence into char.
local function unesc_hex(backslashes, digits)
if #backslashes%2==0 then
return backslashes..'x'..digits
else
backslashes = backslashes :sub (1,-2)
end
local c = string.char(tonumber(digits,16))
if c == '\\' then c = '\\\\' end -- parsed by unesc_letter (test: "\x5cb" --> "\\b")
return backslashes..c
end
-- Handle Lua 5.2 \z sequences
local function unesc_z(backslashes, more)
if #backslashes%2==0 then
return backslashes..more
else
return backslashes :sub (1,-2)
end
end
-- Take a letter [x], and returns the character represented by the
-- sequence ['\\'..x], e.g. [unesc_letter "n" == "\n"].
local function unesc_letter(x)
local t = {
a = "\a", b = "\b", f = "\f",
n = "\n", r = "\r", t = "\t", v = "\v",
["\\"] = "\\", ["'"] = "'", ['"'] = '"', ["\n"] = "\n" }
return t[x] or x
end
s = s: gsub ("(\\+)(z%s*)", unesc_z) -- Lua 5.2
s = s: gsub ("(\\+)([0-9][0-9]?[0-9]?)", unesc_digits)
s = s: gsub ("(\\+)x([0-9a-fA-F][0-9a-fA-F])", unesc_hex) -- Lua 5.2
s = s: gsub ("\\(%D)",unesc_letter)
return s
end
lexer.extractors = {
"extract_long_comment", "extract_short_comment",
"extract_short_string", "extract_word", "extract_number",
"extract_long_string", "extract_symbol" }
----------------------------------------------------------------------
-- Really extract next token from the raw string
-- (and update the index).
-- loc: offset of the position just after spaces and comments
-- previous_i: offset in src before extraction began
----------------------------------------------------------------------
function lexer :extract ()
local attached_comments = { }
local function gen_token(...)
local token = M.new_token(...)
if #attached_comments>0 then -- attach previous comments to token
local comments = M.new_comment(attached_comments)
token.lineinfo.first.comments = comments
if self.lineinfo_last_extracted then
self.lineinfo_last_extracted.comments = comments
end
attached_comments = { }
end
token.lineinfo.first.facing = self.lineinfo_last_extracted
self.lineinfo_last_extracted.facing = assert(token.lineinfo.first)
self.lineinfo_last_extracted = assert(token.lineinfo.last)
return token
end
while true do -- loop until a non-comment token is found
-- skip whitespaces
self.i = self.src:match (self.patterns.spaces, self.i)
if self.i>#self.src then
local fli = self.posfact :get_position (#self.src+1)
local lli = self.posfact :get_position (#self.src+1) -- ok?
local tok = gen_token("Eof", "eof", M.new_lineinfo(fli, lli))
tok.lineinfo.last.facing = lli
return tok
end
local i_first = self.i -- loc = position after whitespaces
-- try every extractor until a token is found
for _, extractor in ipairs(self.extractors) do
local tag, content, xtra = self [extractor] (self)
if tag then
local fli = self.posfact :get_position (i_first)
local lli = self.posfact :get_position (self.i-1)
local lineinfo = M.new_lineinfo(fli, lli)
if tag=='Comment' then
local prev_comment = attached_comments[#attached_comments]
if not xtra -- new comment is short
and prev_comment and not prev_comment[2] -- prev comment is short
and prev_comment.lineinfo.last.line+1==fli.line then -- adjascent lines
-- concat with previous comment
prev_comment[1] = prev_comment[1].."\n"..content -- TODO quadratic, BAD!
prev_comment.lineinfo.last = lli
else -- accumulate comment
local comment = M.new_comment_line(content, lineinfo, xtra)
table.insert(attached_comments, comment)
end
break -- back to skipping spaces
else -- not a comment: real token, then
return gen_token(tag, content, lineinfo)
end -- if token is a comment
end -- if token found
end -- for each extractor
end -- while token is a comment
end -- :extract()
----------------------------------------------------------------------
-- Extract a short comment.
----------------------------------------------------------------------
function lexer :extract_short_comment()
-- TODO: handle final_short_comment
local content, j = self.src :match (self.patterns.short_comment, self.i)
if content then self.i=j; return 'Comment', content, nil end
end
----------------------------------------------------------------------
-- Extract a long comment.
----------------------------------------------------------------------
function lexer :extract_long_comment()
local equals, content, j = self.src:match (self.patterns.long_comment, self.i)
if j then self.i = j; return "Comment", content, #equals end
end
----------------------------------------------------------------------
-- Extract a '...' or "..." short string.
----------------------------------------------------------------------
function lexer :extract_short_string()
local k = self.src :sub (self.i,self.i) -- first char
if k~=[[']] and k~=[["]] then return end -- no match'
local i = self.i + 1
local j = i
while true do
local x,y; x, j, y = self.src :match ("([\\\r\n"..k.."])()(.?)", j) -- next interesting char
if x == '\\' then
if y == 'z' then -- Lua 5.2 \z
j = self.src :match ("^%s*()", j+1)
else
j=j+1 -- escaped char
end
elseif x == k then break -- end of string
else
assert (not x or x=='\r' or x=='\n')
return nil, 'Unterminated string'
end
end
self.i = j
return 'String', unescape_string (self.src :sub (i,j-2))
end
----------------------------------------------------------------------
-- Extract Id or Keyword.
----------------------------------------------------------------------
function lexer :extract_word()
local word, j = self.src:match (self.patterns.word, self.i)
if word then
self.i = j
return (self.alpha [word] and 'Keyword' or 'Id'), word
end
end
----------------------------------------------------------------------
-- Extract Number.
----------------------------------------------------------------------
function lexer :extract_number()
local j = self.src:match(self.patterns.number_hex, self.i)
if j then
j = self.src:match (self.patterns.number_mantissa_hex[1], j) or
self.src:match (self.patterns.number_mantissa_hex[2], j)
if j then
j = self.src:match (self.patterns.number_exponant_hex, j) or j
end
else
j = self.src:match (self.patterns.number_mantissa[1], self.i) or
self.src:match (self.patterns.number_mantissa[2], self.i)
if j then
j = self.src:match (self.patterns.number_exponant, j) or j
end
end
if not j then return end
-- Number found, interpret with tonumber() and return it
local str = self.src:sub (self.i, j-1)
-- :TODO: tonumber on Lua5.2 floating hex may or may not work on Lua5.1
local n = tonumber (str)
if not n then error(str.." is not a valid number according to tonumber()") end
self.i = j
return 'Number', n
end
----------------------------------------------------------------------
-- Extract long string.
----------------------------------------------------------------------
function lexer :extract_long_string()
local _, content, j = self.src :match (self.patterns.long_string, self.i)
if j then self.i = j; return 'String', content end
end
----------------------------------------------------------------------
-- Extract symbol.
----------------------------------------------------------------------
function lexer :extract_symbol()
local k = self.src:sub (self.i,self.i)
local symk = self.sym [k] -- symbols starting with `k`
if not symk then
self.i = self.i + 1
return 'Keyword', k
end
for _, sym in pairs (symk) do
if sym == self.src:sub (self.i, self.i + #sym - 1) then
self.i = self.i + #sym
return 'Keyword', sym
end
end
self.i = self.i+1
return 'Keyword', k
end
----------------------------------------------------------------------
-- Add a keyword to the list of keywords recognized by the lexer.
----------------------------------------------------------------------
function lexer :add (w, ...)
assert(not ..., "lexer :add() takes only one arg, although possibly a table")
if type (w) == "table" then
for _, x in ipairs (w) do self :add (x) end
else
if w:match (self.patterns.word .. "$") then self.alpha [w] = true
elseif w:match "^%p%p+$" then
local k = w:sub(1,1)
local list = self.sym [k]
if not list then list = { }; self.sym [k] = list end
table.insert (list, w)
elseif w:match "^%p$" then return
else error "Invalid keyword" end
end
end
----------------------------------------------------------------------
-- Return the [n]th next token, without consuming it.
-- [n] defaults to 1. If it goes pass the end of the stream, an EOF
-- token is returned.
----------------------------------------------------------------------
function lexer :peek (n)
if not n then n=1 end
if n > #self.peeked then
for i = #self.peeked+1, n do
self.peeked [i] = self :extract()
end
end
return self.peeked [n]
end
----------------------------------------------------------------------
-- Return the [n]th next token, removing it as well as the 0..n-1
-- previous tokens. [n] defaults to 1. If it goes pass the end of the
-- stream, an EOF token is returned.
----------------------------------------------------------------------
function lexer :next (n)
n = n or 1
self :peek (n)
local a
for i=1,n do
a = table.remove (self.peeked, 1)
-- TODO: is this used anywhere? I think not. a.lineinfo.last may be nil.
--self.lastline = a.lineinfo.last.line
end
self.lineinfo_last_consumed = a.lineinfo.last
return a
end
----------------------------------------------------------------------
-- Returns an object which saves the stream's current state.
----------------------------------------------------------------------
-- FIXME there are more fields than that to save
function lexer :save () return { self.i; {unpack(self.peeked) } } end
----------------------------------------------------------------------
-- Restore the stream's state, as saved by method [save].
----------------------------------------------------------------------
-- FIXME there are more fields than that to restore
function lexer :restore (s) self.i=s[1]; self.peeked=s[2] end
----------------------------------------------------------------------
-- Resynchronize: cancel any token in self.peeked, by emptying the
-- list and resetting the indexes
----------------------------------------------------------------------
function lexer :sync()
local p1 = self.peeked[1]
if p1 then
local li_first = p1.lineinfo.first
if li_first.comments then li_first=li_first.comments.lineinfo.first end
self.i = li_first.offset
self.column_offset = self.i - li_first.column
self.peeked = { }
self.attached_comments = p1.lineinfo.first.comments or { }
end
end
----------------------------------------------------------------------
-- Take the source and offset of an old lexer.
----------------------------------------------------------------------
function lexer :takeover(old)
self :sync(); old :sync()
for _, field in ipairs{ 'i', 'src', 'attached_comments', 'posfact' } do
self[field] = old[field]
end
return self
end
----------------------------------------------------------------------
-- Return the current position in the sources. This position is between
-- two tokens, and can be within a space / comment area, and therefore
-- have a non-null width. :lineinfo_left() returns the beginning of the
-- separation area, :lineinfo_right() returns the end of that area.
--
-- ____ last consummed token ____ first unconsummed token
-- / /
-- XXXXX <spaces and comments> YYYYY
-- \____ \____
-- :lineinfo_left() :lineinfo_right()
----------------------------------------------------------------------
function lexer :lineinfo_right()
return self :peek(1).lineinfo.first
end
function lexer :lineinfo_left()
return self.lineinfo_last_consumed
end
----------------------------------------------------------------------
-- Create a new lexstream.
----------------------------------------------------------------------
function lexer :newstream (src_or_stream, name)
name = name or "?"
if type(src_or_stream)=='table' then -- it's a stream
return setmetatable ({ }, self) :takeover (src_or_stream)
elseif type(src_or_stream)=='string' then -- it's a source string
local src = src_or_stream
local pos1 = M.new_position(1, 1, 1, name)
local stream = {
src_name = name; -- Name of the file
src = src; -- The source, as a single string
peeked = { }; -- Already peeked, but not discarded yet, tokens
i = 1; -- Character offset in src
attached_comments = { },-- comments accumulator
lineinfo_last_extracted = pos1,
lineinfo_last_consumed = pos1,
posfact = M.new_position_factory (src_or_stream, name)
}
setmetatable (stream, self)
-- Skip initial sharp-bang for Unix scripts
-- FIXME: redundant with mlp.chunk()
if src and src :match "^#!" then
local endofline = src :find "\n"
stream.i = endofline and (endofline + 1) or #src
end
return stream
else
assert(false, ":newstream() takes a source string or a stream, not a "..
type(src_or_stream))
end
end
----------------------------------------------------------------------
-- If there's no ... args, return the token a (whose truth value is
-- true) if it's a `Keyword{ }, or nil. If there are ... args, they
-- have to be strings. if the token a is a keyword, and it's content
-- is one of the ... args, then returns it (it's truth value is
-- true). If no a keyword or not in ..., return nil.
----------------------------------------------------------------------
function lexer :is_keyword (a, ...)
if not a or a.tag ~= "Keyword" then return false end
local words = {...}
if #words == 0 then return a[1] end
for _, w in ipairs (words) do
if w == a[1] then return w end
end
return false
end
----------------------------------------------------------------------
-- Cause an error if the next token isn't a keyword whose content
-- is listed among ... args (which have to be strings).
----------------------------------------------------------------------
function lexer :check (...)
local words = {...}
local a = self :next()
local function err ()
error ("Got " .. tostring (a) ..
", expected one of these keywords : '" ..
table.concat (words,"', '") .. "'") end
if not a or a.tag ~= "Keyword" then err () end
if #words == 0 then return a[1] end
for _, w in ipairs (words) do
if w == a[1] then return w end
end
err ()
end
----------------------------------------------------------------------
--
----------------------------------------------------------------------
function lexer :clone()
local alpha_clone, sym_clone = { }, { }
for word in pairs(self.alpha) do alpha_clone[word]=true end
for letter, list in pairs(self.sym) do sym_clone[letter] = { unpack(list) } end
local clone = { alpha=alpha_clone, sym=sym_clone }
setmetatable(clone, self)
clone.__index = clone
return clone
end
----------------------------------------------------------------------
-- Cancel everything left in a lexer, all subsequent attempts at
-- `:peek()` or `:next()` will return `Eof`.
----------------------------------------------------------------------
function lexer :kill()
self.i = #self.src+1
self.peeked = { }
self.attached_comments = { }
self.lineinfo_last = self.posfact :get_position (#self.src+1)
end
return M

View File

@ -0,0 +1,133 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
local M = require "package" -- extend Lua's basic "package" module
local checks = require 'checks'
M.metalua_extension_prefix = 'metalua.extension.'
-- Initialize package.mpath from package.path
M.mpath = M.mpath or os.getenv 'LUA_MPATH' or
(M.path..";") :gsub("%.(lua[:;])", ".m%1") :sub(1, -2)
M.mcache = M.mcache or os.getenv 'LUA_MCACHE'
----------------------------------------------------------------------
-- resc(k) returns "%"..k if it's a special regular expression char,
-- or just k if it's normal.
----------------------------------------------------------------------
local regexp_magic = { }
for k in ("^$()%.[]*+-?") :gmatch "." do regexp_magic[k]="%"..k end
local function resc(k) return regexp_magic[k] or k end
----------------------------------------------------------------------
-- Take a Lua module name, return the open file and its name,
-- or <false> and an error message.
----------------------------------------------------------------------
function M.findfile(name, path_string)
local config_regexp = ("([^\n])\n"):rep(5):sub(1, -2)
local dir_sep, path_sep, path_mark, execdir, igmark =
M.config :match (config_regexp)
name = name:gsub ('%.', dir_sep)
local errors = { }
local path_pattern = string.format('[^%s]+', resc(path_sep))
for path in path_string:gmatch (path_pattern) do
--printf('path = %s, rpath_mark=%s, name=%s', path, resc(path_mark), name)
local filename = path:gsub (resc (path_mark), name)
--printf('filename = %s', filename)
local file = io.open (filename, 'rb')
if file then return file, filename end
table.insert(errors, string.format("\tno file %q", filename))
end
return false, '\n'..table.concat(errors, "\n")..'\n'
end
----------------------------------------------------------------------
-- Before compiling a metalua source module, try to find and load
-- a more recent bytecode dump. Requires lfs
----------------------------------------------------------------------
local function metalua_cache_loader(name, src_filename, src)
if not M.mcache:find('%?') then
-- This is highly suspicious...
print("WARNING: no '?' character in $LUA_MCACHE/package.mcache")
end
local mlc = require 'metalua.compiler'.new()
local lfs = require 'lfs'
local dir_sep = M.config:sub(1,1)
local dst_filename = M.mcache :gsub ('%?', (name:gsub('%.', dir_sep)))
local src_a = lfs.attributes(src_filename)
local src_date = src_a and src_a.modification or 0
local dst_a = lfs.attributes(dst_filename)
local dst_date = dst_a and dst_a.modification or 0
local delta = dst_date - src_date
local bytecode, file, msg
if delta <= 0 then
--print ("(need to recompile "..src_filename.." into "..dst_filename..")")
bytecode = mlc :src_to_bytecode (src, '@'..src_filename)
for x in dst_filename :gmatch('()'..dir_sep) do
lfs.mkdir(dst_filename:sub(1,x))
end
file, msg = io.open(dst_filename, 'wb')
if not file then error(msg) end
file :write (bytecode)
file :close()
else
file, msg = io.open(dst_filename, 'rb')
if not file then error(msg) end
bytecode = file :read '*a'
file :close()
end
return mlc :bytecode_to_function (bytecode, '@'..src_filename)
end
----------------------------------------------------------------------
-- Load a metalua source file.
----------------------------------------------------------------------
function M.metalua_loader (name)
local file, filename_or_msg = M.findfile (name, M.mpath)
if not file then return filename_or_msg end
local luastring = file:read '*a'
file:close()
if M.mcache and pcall(require, 'lfs') then
return metalua_cache_loader(name, filename_or_msg, luastring)
else return require 'metalua.compiler'.new() :src_to_function (luastring, '@'..filename_or_msg) end
end
----------------------------------------------------------------------
-- Placed after lua/luac loader, so precompiled files have
-- higher precedence.
----------------------------------------------------------------------
table.insert(M.loaders, M.metalua_loader)
----------------------------------------------------------------------
-- Load an extension.
----------------------------------------------------------------------
function extension (name, mlp)
local complete_name = M.metalua_extension_prefix..name
local extend_func = require (complete_name)
if not mlp.extensions[complete_name] then
local ast =extend_func(mlp)
mlp.extensions[complete_name] =extend_func
return ast
end
end
return M

View File

@ -0,0 +1,295 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
----------------------------------------------------------------------
----------------------------------------------------------------------
----------------------------------------------------------------------
--
-- Lua objects pretty-printer
--
----------------------------------------------------------------------
----------------------------------------------------------------------
local M = { }
M.DEFAULT_CFG = {
hide_hash = false; -- Print the non-array part of tables?
metalua_tag = true; -- Use Metalua's backtick syntax sugar?
fix_indent = nil; -- If a number, number of indentation spaces;
-- If false, indent to the previous brace.
line_max = nil; -- If a number, tries to avoid making lines with
-- more than this number of chars.
initial_indent = 0; -- If a number, starts at this level of indentation
keywords = { }; -- Set of keywords which must not use Lua's field
-- shortcuts {["foo"]=...} -> {foo=...}
}
local function valid_id(cfg, x)
if type(x) ~= "string" then return false end
if not x:match "^[a-zA-Z_][a-zA-Z0-9_]*$" then return false end
if cfg.keywords and cfg.keywords[x] then return false end
return true
end
local __tostring_cache = setmetatable({ }, {__mode='k'})
-- Retrieve the string produced by `__tostring` metamethod if present,
-- return `false` otherwise. Cached in `__tostring_cache`.
local function __tostring(x)
local the_string = __tostring_cache[x]
if the_string~=nil then return the_string end
local mt = getmetatable(x)
if mt then
local __tostring = mt.__tostring
if __tostring then
the_string = __tostring(x)
__tostring_cache[x] = the_string
return the_string
end
end
if x~=nil then __tostring_cache[x] = false end -- nil is an illegal key
return false
end
local xlen -- mutually recursive with `xlen_type`
local xlen_cache = setmetatable({ }, {__mode='k'})
-- Helpers for the `xlen` function
local xlen_type = {
["nil"] = function ( ) return 3 end;
number = function (x) return #tostring(x) end;
boolean = function (x) return x and 4 or 5 end;
string = function (x) return #string.format("%q",x) end;
}
function xlen_type.table (adt, cfg, nested)
local custom_string = __tostring(adt)
if custom_string then return #custom_string end
-- Circular referenced objects are printed with the plain
-- `tostring` function in nested positions.
if nested [adt] then return #tostring(adt) end
nested [adt] = true
local has_tag = cfg.metalua_tag and valid_id(cfg, adt.tag)
local alen = #adt
local has_arr = alen>0
local has_hash = false
local x = 0
if not cfg.hide_hash then
-- first pass: count hash-part
for k, v in pairs(adt) do
if k=="tag" and has_tag then
-- this is the tag -> do nothing!
elseif type(k)=="number" and k<=alen and math.fmod(k,1)==0 and k>0 then
-- array-part pair -> do nothing!
else
has_hash = true
if valid_id(cfg, k) then x=x+#k
else x = x + xlen (k, cfg, nested) + 2 end -- count surrounding brackets
x = x + xlen (v, cfg, nested) + 5 -- count " = " and ", "
end
end
end
for i = 1, alen do x = x + xlen (adt[i], nested) + 2 end -- count ", "
nested[adt] = false -- No more nested calls
if not (has_tag or has_arr or has_hash) then return 3 end
if has_tag then x=x+#adt.tag+1 end
if not (has_arr or has_hash) then return x end
if not has_hash and alen==1 and type(adt[1])~="table" then
return x-2 -- substract extraneous ", "
end
return x+2 -- count "{ " and " }", substract extraneous ", "
end
-- Compute the number of chars it would require to display the table
-- on a single line. Helps to decide whether some carriage returns are
-- required. Since the size of each sub-table is required many times,
-- it's cached in [xlen_cache].
xlen = function (x, cfg, nested)
-- no need to compute length for 1-line prints
if not cfg.line_max then return 0 end
nested = nested or { }
if x==nil then return #"nil" end
local len = xlen_cache[x]
if len then return len end
local f = xlen_type[type(x)]
if not f then return #tostring(x) end
len = f (x, cfg, nested)
xlen_cache[x] = len
return len
end
local function consider_newline(p, len)
if not p.cfg.line_max then return end
if p.current_offset + len <= p.cfg.line_max then return end
if p.indent < p.current_offset then
p:acc "\n"; p:acc ((" "):rep(p.indent))
p.current_offset = p.indent
end
end
local acc_value
local acc_type = {
["nil"] = function(p) p:acc("nil") end;
number = function(p, adt) p:acc (tostring (adt)) end;
string = function(p, adt) p:acc ((string.format ("%q", adt):gsub("\\\n", "\\n"))) end;
boolean = function(p, adt) p:acc (adt and "true" or "false") end }
-- Indentation:
-- * if `cfg.fix_indent` is set to a number:
-- * add this number of space for each level of depth
-- * return to the line as soon as it flushes things further left
-- * if not, tabulate to one space after the opening brace.
-- * as a result, it never saves right-space to return before first element
function acc_type.table(p, adt)
if p.nested[adt] then p:acc(tostring(adt)); return end
p.nested[adt] = true
local has_tag = p.cfg.metalua_tag and valid_id(p.cfg, adt.tag)
local alen = #adt
local has_arr = alen>0
local has_hash = false
local previous_indent = p.indent
if has_tag then p:acc("`"); p:acc(adt.tag) end
local function indent(p)
if not p.cfg.fix_indent then p.indent = p.current_offset
else p.indent = p.indent + p.cfg.fix_indent end
end
-- First pass: handle hash-part
if not p.cfg.hide_hash then
for k, v in pairs(adt) do
if has_tag and k=='tag' then -- pass the 'tag' field
elseif type(k)=="number" and k<=alen and k>0 and math.fmod(k,1)==0 then
-- pass array-part keys (consecutive ints less than `#adt`)
else -- hash-part keys
if has_hash then p:acc ", " else -- 1st hash-part pair ever found
p:acc "{ "; indent(p)
end
-- Determine whether a newline is required
local is_id, expected_len=valid_id(p.cfg, k)
if is_id then expected_len=#k+xlen(v, p.cfg, p.nested)+#" = , "
else expected_len = xlen(k, p.cfg, p.nested)+xlen(v, p.cfg, p.nested)+#"[] = , " end
consider_newline(p, expected_len)
-- Print the key
if is_id then p:acc(k); p:acc " = " else
p:acc "["; acc_value (p, k); p:acc "] = "
end
acc_value (p, v) -- Print the value
has_hash = true
end
end
end
-- Now we know whether there's a hash-part, an array-part, and a tag.
-- Tag and hash-part are already printed if they're present.
if not has_tag and not has_hash and not has_arr then p:acc "{ }";
elseif has_tag and not has_hash and not has_arr then -- nothing, tag already in acc
else
assert (has_hash or has_arr) -- special case { } already handled
local no_brace = false
if has_hash and has_arr then p:acc ", "
elseif has_tag and not has_hash and alen==1 and type(adt[1])~="table" then
-- No brace required; don't print "{", remember not to print "}"
p:acc (" "); acc_value (p, adt[1]) -- indent= indent+(cfg.fix_indent or 0))
no_brace = true
elseif not has_hash then
-- Braces required, but not opened by hash-part handler yet
p:acc "{ "; indent(p)
end
-- 2nd pass: array-part
if not no_brace and has_arr then
local expected_len = xlen(adt[1], p.cfg, p.nested)
consider_newline(p, expected_len)
acc_value(p, adt[1]) -- indent+(cfg.fix_indent or 0)
for i=2, alen do
p:acc ", ";
consider_newline(p, xlen(adt[i], p.cfg, p.nested))
acc_value (p, adt[i]) --indent+(cfg.fix_indent or 0)
end
end
if not no_brace then p:acc " }" end
end
p.nested[adt] = false -- No more nested calls
p.indent = previous_indent
end
function acc_value(p, v)
local custom_string = __tostring(v)
if custom_string then p:acc(custom_string) else
local f = acc_type[type(v)]
if f then f(p, v) else p:acc(tostring(v)) end
end
end
-- FIXME: new_indent seems to be always nil?!s detection
-- FIXME: accumulator function should be configurable,
-- so that print() doesn't need to bufferize the whole string
-- before starting to print.
function M.tostring(t, cfg)
cfg = cfg or M.DEFAULT_CFG or { }
local p = {
cfg = cfg;
indent = 0;
current_offset = cfg.initial_indent or 0;
buffer = { };
nested = { };
acc = function(self, str)
table.insert(self.buffer, str)
self.current_offset = self.current_offset + #str
end;
}
acc_value(p, t)
return table.concat(p.buffer)
end
function M.print(...) return print(M.tostring(...)) end
function M.sprintf(fmt, ...)
local args={...}
for i, v in pairs(args) do
local t=type(v)
if t=='table' then args[i]=M.tostring(v)
elseif t=='nil' then args[i]='nil' end
end
return string.format(fmt, unpack(args))
end
function M.printf(...) print(M.sprintf(...)) end
return M

View File

@ -0,0 +1,108 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-- Keep these global:
PRINT_AST = true
LINE_WIDTH = 60
PROMPT = "M> "
PROMPT2 = ">> "
local pp=require 'metalua.pprint'
local M = { }
mlc = require 'metalua.compiler'.new()
local readline
do -- set readline() to a line reader, either editline otr a default
local status, editline = pcall(require, 'editline')
if status then
local rl_handle = editline.init 'metalua'
readline = |p| rl_handle:read(p)
else
local status, rl = pcall(require, 'readline')
if status then
rl.set_options{histfile='~/.metalua_history', keeplines=100, completion=false }
readline = rl.readline
else -- neither editline nor readline available
function readline (p)
io.write (p)
io.flush ()
return io.read '*l'
end
end
end
end
local function reached_eof(lx, msg)
return lx:peek().tag=='Eof' or msg:find "token `Eof"
end
function M.run()
pp.printf ("Metalua, interactive REPLoop.\n"..
"(c) 2006-2013 <metalua@gmail.com>")
local lines = { }
while true do
local src, lx, ast, f, results, success
repeat
local line = readline(next(lines) and PROMPT2 or PROMPT)
if not line then print(); os.exit(0) end -- line==nil iff eof on stdin
if not next(lines) then
line = line:gsub('^%s*=', 'return ')
end
table.insert(lines, line)
src = table.concat (lines, "\n")
until #line>0
lx = mlc :src_to_lexstream(src)
success, ast = pcall(mlc.lexstream_to_ast, mlc, lx)
if success then
success, f = pcall(mlc.ast_to_function, mlc, ast, '=stdin')
if success then
results = { xpcall(f, debug.traceback) }
success = table.remove (results, 1)
if success then
-- Success!
for _, x in ipairs(results) do
pp.print(x, {line_max=LINE_WIDTH, metalua_tag=true})
end
lines = { }
else
print "Evaluation error:"
print (results[1])
lines = { }
end
else
print "Can't compile into bytecode:"
print (f)
lines = { }
end
else
-- If lx has been read entirely, try to read
-- another line before failing.
if not reached_eof(lx, ast) then
print "Can't compile source into AST:"
print (ast)
lines = { }
end
end
end
end
return M

View File

@ -0,0 +1,488 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
local walk = require 'metalua.treequery.walk'
local M = { }
-- support for old-style modules
treequery = M
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
--
-- multimap helper mmap: associate a key to a set of values
--
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
local function mmap_add (mmap, node, x)
if node==nil then return false end
local set = mmap[node]
if set then set[x] = true
else mmap[node] = {[x]=true} end
end
-- currently unused, I throw the whole set away
local function mmap_remove (mmap, node, x)
local set = mmap[node]
if not set then return false
elseif not set[x] then return false
elseif next(set) then set[x]=nil
else mmap[node] = nil end
return true
end
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
--
-- TreeQuery object.
--
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
local ACTIVE_SCOPE = setmetatable({ }, {__mode="k"})
-- treequery metatable
local Q = { }; Q.__index = Q
--- treequery constructor
-- the resultingg object will allow to filter ans operate on the AST
-- @param root the AST to visit
-- @return a treequery visitor instance
function M.treequery(root)
return setmetatable({
root = root,
unsatisfied = 0,
predicates = { },
until_up = { },
from_up = { },
up_f = false,
down_f = false,
filters = { },
}, Q)
end
-- helper to share the implementations of positional filters
local function add_pos_filter(self, position, inverted, inclusive, f, ...)
if type(f)=='string' then f = M.has_tag(f, ...) end
if not inverted then self.unsatisfied += 1 end
local x = {
pred = f,
position = position,
satisfied = false,
inverted = inverted or false,
inclusive = inclusive or false }
table.insert(self.predicates, x)
return self
end
function Q :if_unknown(f)
self.unknown_handler = f or (||nil)
return self
end
-- TODO: offer an API for inclusive pos_filters
--- select nodes which are after one which satisfies predicate f
Q.after = |self, f, ...| add_pos_filter(self, 'after', false, false, f, ...)
--- select nodes which are not after one which satisfies predicate f
Q.not_after = |self, f, ...| add_pos_filter(self, 'after', true, false, f, ...)
--- select nodes which are under one which satisfies predicate f
Q.under = |self, f, ...| add_pos_filter(self, 'under', false, false, f, ...)
--- select nodes which are not under one which satisfies predicate f
Q.not_under = |self, f, ...| add_pos_filter(self, 'under', true, false, f, ...)
--- select nodes which satisfy predicate f
function Q :filter(f, ...)
if type(f)=='string' then f = M.has_tag(f, ...) end
table.insert(self.filters, f);
return self
end
--- select nodes which satisfy predicate f
function Q :filter_not(f, ...)
if type(f)=='string' then f = M.has_tag(f, ...) end
table.insert(self.filters, |...| not f(...))
return self
end
-- private helper: apply filters and execute up/down callbacks when applicable
function Q :execute()
local cfg = { }
-- TODO: optimize away not_under & not_after by pruning the tree
function cfg.down(...)
--printf ("[down]\t%s\t%s", self.unsatisfied, table.tostring((...)))
ACTIVE_SCOPE[...] = cfg.scope
local satisfied = self.unsatisfied==0
for _, x in ipairs(self.predicates) do
if not x.satisfied and x.pred(...) then
x.satisfied = true
local node, parent = ...
local inc = x.inverted and 1 or -1
if x.position=='under' then
-- satisfied from after we get down this node...
self.unsatisfied += inc
-- ...until before we get up this node
mmap_add(self.until_up, node, x)
elseif x.position=='after' then
-- satisfied from after we get up this node...
mmap_add(self.from_up, node, x)
-- ...until before we get up this node's parent
mmap_add(self.until_up, parent, x)
elseif x.position=='under_or_after' then
-- satisfied from after we get down this node...
self.satisfied += inc
-- ...until before we get up this node's parent...
mmap_add(self.until_up, parent, x)
else
error "position not understood"
end -- position
if x.inclusive then satisfied = self.unsatisfied==0 end
end -- predicate passed
end -- for predicates
if satisfied then
for _, f in ipairs(self.filters) do
if not f(...) then satisfied=false; break end
end
if satisfied and self.down_f then self.down_f(...) end
end
end
function cfg.up(...)
--printf ("[up]\t%s", table.tostring((...)))
-- Remove predicates which are due before we go up this node
local preds = self.until_up[...]
if preds then
for x, _ in pairs(preds) do
local inc = x.inverted and -1 or 1
self.unsatisfied += inc
x.satisfied = false
end
self.until_up[...] = nil
end
-- Execute the up callback
-- TODO: cache the filter passing result from the down callback
-- TODO: skip if there's no callback
local satisfied = self.unsatisfied==0
if satisfied then
for _, f in ipairs(self.filters) do
if not f(self, ...) then satisfied=false; break end
end
if satisfied and self.up_f then self.up_f(...) end
end
-- Set predicate which are due after we go up this node
local preds = self.from_up[...]
if preds then
for p, _ in pairs(preds) do
local inc = p.inverted and 1 or -1
self.unsatisfied += inc
end
self.from_up[...] = nil
end
ACTIVE_SCOPE[...] = nil
end
function cfg.binder(id_node, ...)
--printf(" >>> Binder called on %s, %s", table.tostring(id_node),
-- table.tostring{...}:sub(2,-2))
cfg.down(id_node, ...)
cfg.up(id_node, ...)
--printf("down/up on binder done")
end
cfg.unknown = self.unknown_handler
--function cfg.occurrence (binder, occ)
-- if binder then OCC2BIND[occ] = binder[1] end
--printf(" >>> %s is an occurrence of %s", occ[1], table.tostring(binder and binder[2]))
--end
--function cfg.binder(...) cfg.down(...); cfg.up(...) end
return walk.guess(cfg, self.root)
end
--- Execute a function on each selected node
-- @down: function executed when we go down a node, i.e. before its children
-- have been examined.
-- @up: function executed when we go up a node, i.e. after its children
-- have been examined.
function Q :foreach(down, up)
if not up and not down then
error "iterator missing"
end
self.up_f = up
self.down_f = down
return self :execute()
end
--- Return the list of nodes selected by a given treequery.
function Q :list()
local acc = { }
self :foreach(|x| table.insert(acc, x))
return acc
end
--- Return the first matching element
-- TODO: dirty hack, to implement properly with a 'break' return.
-- Also, it won't behave correctly if a predicate causes an error,
-- or if coroutines are involved.
function Q :first()
local result = { }
local function f(...) result = {...}; error() end
pcall(|| self :foreach(f))
return unpack(result)
end
--- Pretty printer for queries
function Q :__tostring() return "<treequery>" end
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
--
-- Predicates.
--
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
--- Return a predicate which is true if the tested node's tag is among the
-- one listed as arguments
-- @param ... a sequence of tag names
function M.has_tag(...)
local args = {...}
if #args==1 then
local tag = ...
return (|node| node.tag==tag)
--return function(self, node) printf("node %s has_tag %s?", table.tostring(node), tag); return node.tag==tag end
else
local tags = { }
for _, tag in ipairs(args) do tags[tag]=true end
return function(node)
local node_tag = node.tag
return node_tag and tags[node_tag]
end
end
end
--- Predicate to test whether a node represents an expression.
M.is_expr = M.has_tag('Nil', 'Dots', 'True', 'False', 'Number','String',
'Function', 'Table', 'Op', 'Paren', 'Call', 'Invoke',
'Id', 'Index')
-- helper for is_stat
local STAT_TAGS = { Do=1, Set=1, While=1, Repeat=1, If=1, Fornum=1,
Forin=1, Local=1, Localrec=1, Return=1, Break=1 }
--- Predicate to test whether a node represents a statement.
-- It is context-aware, i.e. it recognizes `Call and `Invoke nodes
-- used in a statement context as such.
function M.is_stat(node, parent)
local tag = node.tag
if not tag then return false
elseif STAT_TAGS[tag] then return true
elseif tag=='Call' or tag=='Invoke' then return parent and parent.tag==nil
else return false end
end
--- Predicate to test whether a node represents a statements block.
function M.is_block(node) return node.tag==nil end
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
--
-- Variables and scopes.
--
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
local BINDER_PARENT_TAG = {
Local=true, Localrec=true, Forin=true, Function=true }
--- Test whether a node is a binder. This is local predicate, although it
-- might need to inspect the parent node.
function M.is_binder(node, parent)
--printf('is_binder(%s, %s)', table.tostring(node), table.tostring(parent))
if node.tag ~= 'Id' or not parent then return false end
if parent.tag=='Fornum' then return parent[1]==node end
if not BINDER_PARENT_TAG[parent.tag] then return false end
for _, binder in ipairs(parent[1]) do
if binder==node then return true end
end
return false
end
--- Retrieve the binder associated to an occurrence within root.
-- @param occurrence an Id node representing an occurrence in `root`.
-- @param root the tree in which `node` and its binder occur.
-- @return the binder node, and its ancestors up to root if found.
-- @return nil if node is global (or not an occurrence) in `root`.
function M.binder(occurrence, root)
local cfg, id_name, result = { }, occurrence[1], { }
function cfg.occurrence(id)
if id == occurrence then result = cfg.scope :get(id_name) end
-- TODO: break the walker
end
walk.guess(cfg, root)
return unpack(result)
end
--- Predicate to filter occurrences of a given binder.
-- Warning: it relies on internal scope book-keeping,
-- and for this reason, it only works as query method argument.
-- It won't work outside of a query.
-- @param binder the binder whose occurrences must be kept by predicate
-- @return a predicate
-- function M.is_occurrence_of(binder)
-- return function(node, ...)
-- if node.tag ~= 'Id' then return nil end
-- if M.is_binder(node, ...) then return nil end
-- local scope = ACTIVE_SCOPE[node]
-- if not scope then return nil end
-- local result = scope :get (node[1]) or { }
-- if result[1] ~= binder then return nil end
-- return unpack(result)
-- end
-- end
function M.is_occurrence_of(binder)
return function(node, ...)
local b = M.get_binder(node)
return b and b==binder
end
end
function M.get_binder(occurrence, ...)
if occurrence.tag ~= 'Id' then return nil end
if M.is_binder(occurrence, ...) then return nil end
local scope = ACTIVE_SCOPE[occurrence]
local binder_hierarchy = scope :get(occurrence[1])
return unpack (binder_hierarchy or { })
end
--- Transform a predicate on a node into a predicate on this node's
-- parent. For instance if p tests whether a node has property P,
-- then parent(p) tests whether this node's parent has property P.
-- The ancestor level is precised with n, with 1 being the node itself,
-- 2 its parent, 3 its grand-parent etc.
-- @param[optional] n the parent to examine, default=2
-- @param pred the predicate to transform
-- @return a predicate
function M.parent(n, pred, ...)
if type(n)~='number' then n, pred = 2, n end
if type(pred)=='string' then pred = M.has_tag(pred, ...) end
return function(self, ...)
return select(n, ...) and pred(self, select(n, ...))
end
end
--- Transform a predicate on a node into a predicate on this node's
-- n-th child.
-- @param n the child's index number
-- @param pred the predicate to transform
-- @return a predicate
function M.child(n, pred)
return function(node, ...)
local child = node[n]
return child and pred(child, node, ...)
end
end
--- Predicate to test the position of a node in its parent.
-- The predicate succeeds if the node is the n-th child of its parent,
-- and a <= n <= b.
-- nth(a) is equivalent to nth(a, a).
-- Negative indices are admitted, and count from the last child,
-- as done for instance by string.sub().
--
-- TODO: This is wrong, this tests the table relationship rather than the
-- AST node relationship.
-- Must build a getindex helper, based on pattern matching, then build
-- the predicate around it.
--
-- @param a lower bound
-- @param a upper bound
-- @return a predicate
function M.is_nth(a, b)
b = b or a
return function(self, node, parent)
if not parent then return false end
local nchildren = #parent
local a = a<=0 and nchildren+a+1 or a
if a>nchildren then return false end
local b = b<=0 and nchildren+b+1 or b>nchildren and nchildren or b
for i=a,b do if parent[i]==node then return true end end
return false
end
end
--- Returns a list of the direct children of AST node `ast`.
-- Children are only expressions, statements and blocks,
-- not intermediates such as `Pair` nodes or internal lists
-- in `Local` or `Set` nodes.
-- Children are returned in parsing order, which isn't necessarily
-- the same as source code order. For instance, the right-hand-side
-- of a `Local` node is listed before the left-hand-side, because
-- semantically the right is evaluated before the variables on the
-- left enter scope.
--
-- @param ast the node whose children are needed
-- @return a list of the direct children of `ast`
function M.children(ast)
local acc = { }
local cfg = { }
function cfg.down(x)
if x~=ast then table.insert(acc, x); return 'break' end
end
walk.guess(cfg, ast)
return acc
end
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
--
-- Comments parsing.
--
-- -----------------------------------------------------------------------------
-- -----------------------------------------------------------------------------
local comment_extractor = |which_side| function (node)
local x = node.lineinfo
x = x and x[which_side]
x = x and x.comments
if not x then return nil end
local lines = { }
for _, record in ipairs(x) do
table.insert(lines, record[1])
end
return table.concat(lines, '\n')
end
M.comment_prefix = comment_extractor 'first'
M.comment_suffix = comment_extractor 'last'
--- Shortcut for the query constructor
function M :__call(...) return self.treequery(...) end
setmetatable(M, M)
return M

View File

@ -0,0 +1,266 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-- Low level AST traversal library.
--
-- This library is a helper for the higher-level `treequery` library.
-- It walks through every node of an AST, depth-first, and executes
-- some callbacks contained in its `cfg` config table:
--
-- * `cfg.down(...)` is called when it walks down a node, and receive as
-- parameters the node just entered, followed by its parent, grand-parent
-- etc. until the root node.
--
-- * `cfg.up(...)` is called when it walks back up a node, and receive as
-- parameters the node just entered, followed by its parent, grand-parent
-- etc. until the root node.
--
-- * `cfg.occurrence(binder, id_node, ...)` is called when it visits
-- an `` `Id{ }`` node which isn't a local variable creator. binder
-- is a reference to its binder with its context. The binder is the
-- `` `Id{ }`` node which created this local variable. By "binder
-- and its context", we mean a list starting with the `` `Id{ }``,
-- and followed by every ancestor of the binder node, up until the
-- common root node. `binder` is nil if the variable is global.
-- `id_node` is followed by its ancestor, up until the root node.
--
-- `cfg.scope` is maintained during the traversal, associating a
-- variable name to the binder which creates it in the context of the
-- node currently visited.
--
-- `walk.traverse.xxx` functions are in charge of the recursive
-- descent into children nodes. They're private helpers. They are also
-- in charge of calling appropriate `cfg.xxx` callbacks.
-{ extension ("match", ...) }
local pp = require 'metalua.pprint'
local M = { traverse = { }; tags = { }; debug = false }
local function table_transpose(t)
local tt = { }; for a, b in pairs(t) do tt[b]=a end; return tt
end
--------------------------------------------------------------------------------
-- Standard tags: can be used to guess the type of an AST, or to check
-- that the type of an AST is respected.
--------------------------------------------------------------------------------
M.tags.stat = table_transpose{
'Do', 'Set', 'While', 'Repeat', 'Local', 'Localrec', 'Return',
'Fornum', 'Forin', 'If', 'Break', 'Goto', 'Label',
'Call', 'Invoke' }
M.tags.expr = table_transpose{
'Paren', 'Call', 'Invoke', 'Index', 'Op', 'Function', 'Stat',
'Table', 'Nil', 'Dots', 'True', 'False', 'Number', 'String', 'Id' }
--------------------------------------------------------------------------------
-- These [M.traverse.xxx()] functions are in charge of actually going through
-- ASTs. At each node, they make sure to call the appropriate walker.
--------------------------------------------------------------------------------
function M.traverse.stat (cfg, x, ...)
if M.debug then pp.printf("traverse stat %s", x) end
local ancestors = {...}
local B = |y| M.block (cfg, y, x, unpack(ancestors)) -- Block
local S = |y| M.stat (cfg, y, x, unpack(ancestors)) -- Statement
local E = |y| M.expr (cfg, y, x, unpack(ancestors)) -- Expression
local EL = |y| M.expr_list (cfg, y, x, unpack(ancestors)) -- Expression List
local IL = |y| M.binder_list (cfg, y, x, unpack(ancestors)) -- Id binders List
local OS = || cfg.scope :save() -- Open scope
local CS = || cfg.scope :restore() -- Close scope
match x with
| {...} if x.tag == nil -> for _, y in ipairs(x) do M.stat(cfg, y, ...) end
-- no tag --> node not inserted in the history ancestors
| `Do{...} -> OS(x); for _, y in ipairs(x) do S(y) end; CS(x)
| `Set{ lhs, rhs } -> EL(lhs); EL(rhs)
| `While{ cond, body } -> E(cond); OS(); B(body); CS()
| `Repeat{ body, cond } -> OS(body); B(body); E(cond); CS(body)
| `Local{ lhs } -> IL(lhs)
| `Local{ lhs, rhs } -> EL(rhs); IL(lhs)
| `Localrec{ lhs, rhs } -> IL(lhs); EL(rhs)
| `Fornum{ i, a, b, body } -> E(a); E(b); OS(); IL{i}; B(body); CS()
| `Fornum{ i, a, b, c, body } -> E(a); E(b); E(c); OS(); IL{i}; B(body); CS()
| `Forin{ i, rhs, body } -> EL(rhs); OS(); IL(i); B(body); CS()
| `If{...} ->
for i=1, #x-1, 2 do
E(x[i]); OS(); B(x[i+1]); CS()
end
if #x%2 == 1 then
OS(); B(x[#x]); CS()
end
| `Call{...}|`Invoke{...}|`Return{...} -> EL(x)
| `Break | `Goto{ _ } | `Label{ _ } -> -- nothing
| { tag=tag, ...} if M.tags.stat[tag]->
M.malformed (cfg, x, unpack (ancestors))
| _ ->
M.unknown (cfg, x, unpack (ancestors))
end
end
function M.traverse.expr (cfg, x, ...)
if M.debug then pp.printf("traverse expr %s", x) end
local ancestors = {...}
local B = |y| M.block (cfg, y, x, unpack(ancestors)) -- Block
local S = |y| M.stat (cfg, y, x, unpack(ancestors)) -- Statement
local E = |y| M.expr (cfg, y, x, unpack(ancestors)) -- Expression
local EL = |y| M.expr_list (cfg, y, x, unpack(ancestors)) -- Expression List
local IL = |y| M.binder_list (cfg, y, x, unpack(ancestors)) -- Id binders list
local OS = || cfg.scope :save() -- Open scope
local CS = || cfg.scope :restore() -- Close scope
match x with
| `Paren{ e } -> E(e)
| `Call{...} | `Invoke{...} -> EL(x)
| `Index{ a, b } -> E(a); E(b)
| `Op{ opid, ... } -> E(x[2]); if #x==3 then E(x[3]) end
| `Function{ params, body } -> OS(body); IL(params); B(body); CS(body)
| `Stat{ b, e } -> OS(b); B(b); E(e); CS(b)
| `Id{ name } -> M.occurrence(cfg, x, unpack(ancestors))
| `Table{ ... } ->
for i = 1, #x do match x[i] with
| `Pair{ k, v } -> E(k); E(v)
| v -> E(v)
end end
| `Nil|`Dots|`True|`False|`Number{_}|`String{_} -> -- terminal node
| { tag=tag, ...} if M.tags.expr[tag]-> M.malformed (cfg, x, unpack (ancestors))
| _ -> M.unknown (cfg, x, unpack (ancestors))
end
end
function M.traverse.block (cfg, x, ...)
assert(type(x)=='table', "traverse.block() expects a table")
if x.tag then M.malformed(cfg, x, ...)
else for _, y in ipairs(x) do M.stat(cfg, y, x, ...) end
end
end
function M.traverse.expr_list (cfg, x, ...)
assert(type(x)=='table', "traverse.expr_list() expects a table")
-- x doesn't appear in the ancestors
for _, y in ipairs(x) do M.expr(cfg, y, ...) end
end
function M.malformed(cfg, x, ...)
local f = cfg.malformed or cfg.error
if f then f(x, ...) else
error ("Malformed node of tag "..(x.tag or '(nil)'))
end
end
function M.unknown(cfg, x, ...)
local f = cfg.unknown or cfg.error
if f then f(x, ...) else
error ("Unknown node tag "..(x.tag or '(nil)'))
end
end
function M.occurrence(cfg, x, ...)
if cfg.occurrence then cfg.occurrence(cfg.scope :get(x[1]), x, ...) end
end
-- TODO: Is it useful to call each error handling function?
function M.binder_list (cfg, id_list, ...)
local f = cfg.binder
local ferror = cfg.error or cfg.malformed or cfg.unknown
for i, id_node in ipairs(id_list) do
local down, up = cfg.down, cfg.up
if id_node.tag == 'Id' then
cfg.scope :set (id_node[1], { id_node, ... })
if down then down(id_node, ...) end
if f then f(id_node, ...) end
if up then up(id_node, ...) end
elseif i==#id_list and id_node.tag=='Dots' then
if down then down(id_node, ...) end
if up then up(id_node, ...) end
-- Do nothing, those are valid `Dots
elseif ferror then
-- Traverse error handling function
ferror(id_node, ...)
else
error("Invalid binders list")
end
end
end
----------------------------------------------------------------------
-- Generic walker generator.
-- * if `cfg' has an entry matching the tree name, use this entry
-- * if not, try to use the entry whose name matched the ast kind
-- * if an entry is a table, look for 'up' and 'down' entries
-- * if it is a function, consider it as a `down' traverser.
----------------------------------------------------------------------
local walker_builder = function(traverse)
assert(traverse)
return function (cfg, ...)
if not cfg.scope then cfg.scope = M.newscope() end
local down, up = cfg.down, cfg.up
local broken = down and down(...)
if broken ~= 'break' then M.traverse[traverse] (cfg, ...) end
if up then up(...) end
end
end
----------------------------------------------------------------------
-- Declare [M.stat], [M.expr], [M.block].
-- `M.binder_list` is not here, because `cfg.up` and `cfg.down` must
-- be called on individual binders, not on the list itself.
-- It's therefore handled in `traverse.binder_list()`
----------------------------------------------------------------------
for _, w in ipairs{ "stat", "expr", "block" } do --, "malformed", "unknown" } do
M[w] = walker_builder (w, M.traverse[w])
end
-- Don't call up/down callbacks on expr lists
M.expr_list = M.traverse.expr_list
----------------------------------------------------------------------
-- Try to guess the type of the AST then choose the right walkker.
----------------------------------------------------------------------
function M.guess (cfg, x, ...)
assert(type(x)=='table', "arg #2 in a walker must be an AST")
if M.tags.expr[x.tag] then return M.expr(cfg, x, ...) end
if M.tags.stat[x.tag] then return M.stat(cfg, x, ...) end
if not x.tag then return M.block(cfg, x, ...) end
error ("Can't guess the AST type from tag "..(x.tag or '<none>'))
end
local S = { }; S.__index = S
function M.newscope()
local instance = { current = { } }
instance.stack = { instance.current }
setmetatable (instance, S)
return instance
end
function S :save(...)
local current_copy = { }
for a, b in pairs(self.current) do current_copy[a]=b end
table.insert (self.stack, current_copy)
if ... then return self :add(...) end
end
function S :restore() self.current = table.remove (self.stack) end
function S :get (var_name) return self.current[var_name] end
function S :set (key, val) self.current[key] = val end
return M

View File

@ -0,0 +1,241 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2011-2012 Sierra Wireless.
-- All rights reserved. This program and the accompanying materials
-- are made available under the terms of the Eclipse Public License v1.0
-- which accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- Contributors:
-- Simon BERNARD <sbernard@sierrawireless.com>
-- - initial API and implementation and initial documentation
--------------------------------------------------------------------------------
local M = {}
--------------------------------------------------------------------------------
-- API MODEL
--------------------------------------------------------------------------------
function M._file()
local file = {
-- FIELDS
tag = "file",
name = nil, -- string
shortdescription = "", -- string
description = "", -- string
types = {}, -- map from typename to type
globalvars = {}, -- map from varname to item
returns = {}, -- list of return
-- FUNCTIONS
addtype = function (self,type)
self.types[type.name] = type
type.parent = self
end,
mergetype = function (self,newtype,erase,erasesourcerangefield)
local currenttype = self.types[newtype.name]
if currenttype then
-- merge recordtypedef
if currenttype.tag =="recordtypedef" and newtype.tag == "recordtypedef" then
-- merge fields
for fieldname ,field in pairs( newtype.fields) do
local currentfield = currenttype.fields[fieldname]
if erase or not currentfield then
currenttype:addfield(field)
elseif erasesourcerangefield then
if field.sourcerange.min and field.sourcerange.max then
currentfield.sourcerange.min = field.sourcerange.min
currentfield.sourcerange.max = field.sourcerange.max
end
end
end
-- merge descriptions and source ranges
if erase then
if newtype.description or newtype.description == "" then currenttype.description = newtype.description end
if newtype.shortdescription or newtype.shortdescription == "" then currenttype.shortdescription = newtype.shortdescription end
if newtype.sourcerange.min and newtype.sourcerange.max then
currenttype.sourcerange.min = newtype.sourcerange.min
currenttype.sourcerange.max = newtype.sourcerange.max
end
end
-- merge functiontypedef
elseif currenttype.tag == "functiontypedef" and newtype.tag == "functiontypedef" then
-- merge params
for i, param1 in ipairs(newtype.params) do
local missing = true
for j, param2 in ipairs(currenttype.params) do
if param1.name == param2.name then
missing = false
break
end
end
if missing then
table.insert(currenttype.params,param1)
end
end
-- merge descriptions and source ranges
if erase then
if newtype.description or newtype.description == "" then currenttype.description = newtype.description end
if newtype.shortdescription or newtype.shortdescription == "" then currenttype.shortdescription = newtype.shortdescription end
if newtype.sourcerange.min and newtype.sourcerange.max then
currenttype.sourcerange.min = newtype.sourcerange.min
currenttype.sourcerange.max = newtype.sourcerange.max
end
end
end
else
self:addtype(newtype)
end
end,
addglobalvar = function (self,item)
self.globalvars[item.name] = item
item.parent = self
end,
moduletyperef = function (self)
if self and self.returns[1] and self.returns[1].types[1] then
local typeref = self.returns[1].types[1]
return typeref
end
end
}
return file
end
function M._recordtypedef(name)
local recordtype = {
-- FIELDS
tag = "recordtypedef",
name = name, -- string (mandatory)
shortdescription = "", -- string
description = "", -- string
fields = {}, -- map from fieldname to field
sourcerange = {min=0,max=0},
-- FUNCTIONS
addfield = function (self,field)
self.fields[field.name] = field
field.parent = self
end
}
return recordtype
end
function M._functiontypedef(name)
return {
tag = "functiontypedef",
name = name, -- string (mandatory)
shortdescription = "", -- string
description = "", -- string
params = {}, -- list of parameter
returns = {} -- list of return
}
end
function M._parameter(name)
return {
tag = "parameter",
name = name, -- string (mandatory)
description = "", -- string
type = nil -- typeref (external or internal or primitive typeref)
}
end
function M._item(name)
return {
-- FIELDS
tag = "item",
name = name, -- string (mandatory)
shortdescription = "", -- string
description = "", -- string
type = nil, -- typeref (external or internal or primitive typeref)
occurrences = {}, -- list of identifier (see internalmodel)
sourcerange = {min=0, max=0},
-- This is A TRICK
-- This value is ALWAYS nil, except for internal purposes (short references).
external = nil,
-- FUNCTIONS
addoccurence = function (self,occ)
table.insert(self.occurrences,occ)
occ.definition = self
end,
resolvetype = function (self,file)
if self and self.type then
if self.type.tag =="internaltyperef" then
-- if file is not given try to retrieve it.
if not file then
if self.parent and self.parent.tag == 'recordtypedef' then
file = self.parent.parent
elseif self.parent.tag == 'file' then
file = self.parent
end
end
if file then return file.types[self.type.typename] end
elseif self.type.tag =="inlinetyperef" then
return self.type.def
end
end
end
}
end
function M._externaltypref(modulename, typename)
return {
tag = "externaltyperef",
modulename = modulename, -- string
typename = typename -- string
}
end
function M._internaltyperef(typename)
return {
tag = "internaltyperef",
typename = typename -- string
}
end
function M._primitivetyperef(typename)
return {
tag = "primitivetyperef",
typename = typename -- string
}
end
function M._moduletyperef(modulename,returnposition)
return {
tag = "moduletyperef",
modulename = modulename, -- string
returnposition = returnposition -- number
}
end
function M._exprtyperef(expression,returnposition)
return {
tag = "exprtyperef",
expression = expression, -- expression (see internal model)
returnposition = returnposition -- number
}
end
function M._inlinetyperef(definition)
return {
tag = "inlinetyperef",
def = definition, -- expression (see internal model)
}
end
function M._return(description)
return {
tag = "return",
description = description or "", -- string
types = {} -- list of typref (external or internal or primitive typeref)
}
end
return M

View File

@ -0,0 +1,459 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2011-2012 Sierra Wireless.
-- All rights reserved. This program and the accompanying materials
-- are made available under the terms of the Eclipse Public License v1.0
-- which accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- Contributors:
-- Simon BERNARD <sbernard@sierrawireless.com>
-- - initial API and implementation and initial documentation
--------------------------------------------------------------------------------
local apimodel = require "models.apimodel"
local ldp = require "models.ldparser"
local Q = require "metalua.treequery"
local M = {}
local handledcomments={} -- cache to know the comment already handled
----
-- UTILITY METHODS
local primitivetypes = {
['boolean'] = true,
['function'] = true,
['nil'] = true,
['number'] = true,
['string'] = true,
['table'] = true,
['thread'] = true,
['userdata'] = true
}
-- get or create the typedef with the name "name"
local function gettypedef(_file,name,kind,sourcerangemin,sourcerangemax)
local kind = kind or "recordtypedef"
local _typedef = _file.types[name]
if _typedef then
if _typedef.tag == kind then return _typedef end
else
if kind == "recordtypedef" and name ~= "global" then
local _recordtypedef = apimodel._recordtypedef(name)
-- define sourcerange
_recordtypedef.sourcerange.min = sourcerangemin
_recordtypedef.sourcerange.max = sourcerangemax
-- add to file if a name is defined
if _recordtypedef.name then _file:addtype(_recordtypedef) end
return _recordtypedef
elseif kind == "functiontypedef" then
-- TODO support function
return nil
else
return nil
end
end
return nil
end
-- create a typeref from the typref doc_tag
local function createtyperef(dt_typeref,_file,sourcerangemin,sourcerangemax)
local _typeref
if dt_typeref.tag == "typeref" then
if dt_typeref.module then
-- manage external type
_typeref = apimodel._externaltypref()
_typeref.modulename = dt_typeref.module
_typeref.typename = dt_typeref.type
else
if primitivetypes[dt_typeref.type] then
-- manage primitive type
_typeref = apimodel._primitivetyperef()
_typeref.typename = dt_typeref.type
else
-- manage internal type
_typeref = apimodel._internaltyperef()
_typeref.typename = dt_typeref.type
if _file then
gettypedef(_file, _typeref.typename, "recordtypedef", sourcerangemin,sourcerangemax)
end
end
end
end
return _typeref
end
-- create a return from the return doc_tag
local function createreturn(dt_return,_file,sourcerangemin,sourcerangemax)
local _return = apimodel._return()
_return.description = dt_return.description
-- manage typeref
if dt_return.types then
for _, dt_typeref in ipairs(dt_return.types) do
local _typeref = createtyperef(dt_typeref,_file,sourcerangemin,sourcerangemax)
if _typeref then
table.insert(_return.types,_typeref)
end
end
end
return _return
end
-- create a item from the field doc_tag
local function createfield(dt_field,_file,sourcerangemin,sourcerangemax)
local _item = apimodel._item(dt_field.name)
if dt_field.shortdescription then
_item.shortdescription = dt_field.shortdescription
_item.description = dt_field.description
else
_item.shortdescription = dt_field.description
end
-- manage typeref
local dt_typeref = dt_field.type
if dt_typeref then
_item.type = createtyperef(dt_typeref,_file,sourcerangemin,sourcerangemax)
end
return _item
end
-- create a param from the param doc_tag
local function createparam(dt_param,_file,sourcerangemin,sourcerangemax)
if not dt_param.name then return nil end
local _parameter = apimodel._parameter(dt_param.name)
_parameter.description = dt_param.description
-- manage typeref
local dt_typeref = dt_param.type
if dt_typeref then
_parameter.type = createtyperef(dt_typeref,_file,sourcerangemin,sourcerangemax)
end
return _parameter
end
-- get or create the typedef with the name "name"
function M.additemtoparent(_file,_item,scope,sourcerangemin,sourcerangemax)
if scope and not scope.module then
if _item.name then
if scope.type == "global" then
_file:addglobalvar(_item)
else
local _recordtypedef = gettypedef (_file, scope.type ,"recordtypedef",sourcerangemin,sourcerangemax)
_recordtypedef:addfield(_item)
end
else
-- if no item name precise we store the scope in the item to be able to add it to the right parent later
_item.scope = scope
end
end
end
-- Function type counter
local i = 0
-- Reset function type counter
local function resetfunctiontypeidgenerator()
i = 0
end
-- Provides an unique index for a function type
local function generatefunctiontypeid()
i = i + 1
return i
end
-- generate a function type name
local function generatefunctiontypename(_functiontypedef)
local name = {"__"}
if _functiontypedef.returns and _functiontypedef.returns[1] then
local ret = _functiontypedef.returns[1]
for _, type in ipairs(ret.types) do
if type.typename then
if type.modulename then
table.insert(name,type.modulename)
end
table.insert(name,"#")
table.insert(name,type.typename)
end
end
end
table.insert(name,"=")
if _functiontypedef.params then
for _, param in ipairs(_functiontypedef.params) do
local type = param.type
if type then
if type.typename then
if type.modulename then
table.insert(name,type.modulename)
end
table.insert(name,"#")
table.insert(name,type.typename)
else
table.insert(name,"#unknown")
end
end
table.insert(name,"[")
table.insert(name,param.name)
table.insert(name,"]")
end
end
table.insert(name,"__")
table.insert(name, generatefunctiontypeid())
return table.concat(name)
end
------------------------------------------------------
-- create the module api
function M.createmoduleapi(ast,modulename)
-- Initialise function type naming
resetfunctiontypeidgenerator()
local _file = apimodel._file()
local _comment2apiobj = {}
local function handlecomment(comment)
-- Extract information from tagged comments
local parsedcomment = ldp.parse(comment[1])
if not parsedcomment then return nil end
-- Get tags from the languages
local regulartags = parsedcomment.tags
-- Will contain last API object generated from comments
local _lastapiobject
-- if comment is an ld comment
if regulartags then
-- manage "module" comment
if regulartags["module"] then
-- get name
_file.name = regulartags["module"][1].name or modulename
_lastapiobject = _file
-- manage descriptions
_file.shortdescription = parsedcomment.shortdescription
_file.description = parsedcomment.description
local sourcerangemin = comment.lineinfo.first.offset
local sourcerangemax = comment.lineinfo.last.offset
-- manage returns
if regulartags ["return"] then
for _, dt_return in ipairs(regulartags ["return"]) do
local _return = createreturn(dt_return,_file,sourcerangemin,sourcerangemax)
table.insert(_file.returns,_return)
end
end
-- if no returns on module create a defaultreturn of type #modulename
if #_file.returns == 0 and _file.name then
-- create internal type ref
local _typeref = apimodel._internaltyperef()
_typeref.typename = _file.name
-- create return
local _return = apimodel._return()
table.insert(_return.types,_typeref)
-- add return
table.insert(_file.returns,_return)
--create recordtypedef is not define
gettypedef(_file,_typeref.typename,"recordtypedef",sourcerangemin,sourcerangemax)
end
-- manage "type" comment
elseif regulartags["type"] and regulartags["type"][1].name ~= "global" then
local dt_type = regulartags["type"][1];
-- create record type if it doesn't exist
local sourcerangemin = comment.lineinfo.first.offset
local sourcerangemax = comment.lineinfo.last.offset
local _recordtypedef = gettypedef (_file, dt_type.name ,"recordtypedef",sourcerangemin,sourcerangemax)
_lastapiobject = _recordtypedef
-- re-set sourcerange in case the type was created before the type tag
_recordtypedef.sourcerange.min = sourcerangemin
_recordtypedef.sourcerange.max = sourcerangemax
-- manage description
_recordtypedef.shortdescription = parsedcomment.shortdescription
_recordtypedef.description = parsedcomment.description
-- manage fields
if regulartags["field"] then
for _, dt_field in ipairs(regulartags["field"]) do
local _item = createfield(dt_field,_file,sourcerangemin,sourcerangemax)
-- define sourcerange only if we create it
_item.sourcerange.min = sourcerangemin
_item.sourcerange.max = sourcerangemax
if _item then _recordtypedef:addfield(_item) end
end
end
elseif regulartags["field"] then
local dt_field = regulartags["field"][1]
-- create item
local sourcerangemin = comment.lineinfo.first.offset
local sourcerangemax = comment.lineinfo.last.offset
local _item = createfield(dt_field,_file,sourcerangemin,sourcerangemax)
_item.shortdescription = parsedcomment.shortdescription
_item.description = parsedcomment.description
_lastapiobject = _item
-- define sourcerange
_item.sourcerange.min = sourcerangemin
_item.sourcerange.max = sourcerangemax
-- add item to its parent
local scope = regulartags["field"][1].parent
M.additemtoparent(_file,_item,scope,sourcerangemin,sourcerangemax)
elseif regulartags["function"] or regulartags["param"] or regulartags["return"] then
-- create item
local _item = apimodel._item()
_item.shortdescription = parsedcomment.shortdescription
_item.description = parsedcomment.description
_lastapiobject = _item
-- set name
if regulartags["function"] then _item.name = regulartags["function"][1].name end
-- define sourcerange
local sourcerangemin = comment.lineinfo.first.offset
local sourcerangemax = comment.lineinfo.last.offset
_item.sourcerange.min = sourcerangemin
_item.sourcerange.max = sourcerangemax
-- create function type
local _functiontypedef = apimodel._functiontypedef()
_functiontypedef.shortdescription = parsedcomment.shortdescription
_functiontypedef.description = parsedcomment.description
-- manage params
if regulartags["param"] then
for _, dt_param in ipairs(regulartags["param"]) do
local _param = createparam(dt_param,_file,sourcerangemin,sourcerangemax)
table.insert(_functiontypedef.params,_param)
end
end
-- manage returns
if regulartags["return"] then
for _, dt_return in ipairs(regulartags["return"]) do
local _return = createreturn(dt_return,_file,sourcerangemin,sourcerangemax)
table.insert(_functiontypedef.returns,_return)
end
end
-- add type name
_functiontypedef.name = generatefunctiontypename(_functiontypedef)
_file:addtype(_functiontypedef)
-- create ref to this type
local _internaltyperef = apimodel._internaltyperef()
_internaltyperef.typename = _functiontypedef.name
_item.type=_internaltyperef
-- add item to its parent
local sourcerangemin = comment.lineinfo.first.offset
local sourcerangemax = comment.lineinfo.last.offset
local scope = (regulartags["function"] and regulartags["function"][1].parent) or nil
M.additemtoparent(_file,_item,scope,sourcerangemin,sourcerangemax)
end
end
-- when we could not know which type of api object it is, we suppose this is an item
if not _lastapiobject then
_lastapiobject = apimodel._item()
_lastapiobject.shortdescription = parsedcomment.shortdescription
_lastapiobject.description = parsedcomment.description
_lastapiobject.sourcerange.min = comment.lineinfo.first.offset
_lastapiobject.sourcerange.max = comment.lineinfo.last.offset
end
--
-- Store user defined tags
--
local thirdtags = parsedcomment and parsedcomment.unknowntags
if thirdtags then
-- Define a storage index for user defined tags on current API element
if not _lastapiobject.metadata then _lastapiobject.metadata = {} end
-- Loop over user defined tags
for usertag, taglist in pairs(thirdtags) do
if not _lastapiobject.metadata[ usertag ] then
_lastapiobject.metadata[ usertag ] = {
tag = usertag
}
end
for _, tag in ipairs( taglist ) do
table.insert(_lastapiobject.metadata[usertag], tag)
end
end
end
-- if we create an api object linked it to
_comment2apiobj[comment] =_lastapiobject
end
local function parsecomment(node, parent, ...)
-- check for comments before this node
if node.lineinfo and node.lineinfo.first.comments then
local comments = node.lineinfo.first.comments
-- check all comments
for _,comment in ipairs(comments) do
-- if not already handled
if not handledcomments[comment] then
handlecomment(comment)
handledcomments[comment]=true
end
end
end
-- check for comments after this node
if node.lineinfo and node.lineinfo.last.comments then
local comments = node.lineinfo.last.comments
-- check all comments
for _,comment in ipairs(comments) do
-- if not already handled
if not handledcomments[comment] then
handlecomment(comment)
handledcomments[comment]=true
end
end
end
end
Q(ast):filter(function(x) return x.tag~=nil end):foreach(parsecomment)
return _file, _comment2apiobj
end
function M.extractlocaltype ( commentblock,_file)
if not commentblock then return nil end
local stringcomment = commentblock[1]
local parsedtag = ldp.parseinlinecomment(stringcomment)
if parsedtag then
local sourcerangemin = commentblock.lineinfo.first.offset
local sourcerangemax = commentblock.lineinfo.last.offset
return createtyperef(parsedtag,_file,sourcerangemin,sourcerangemax), parsedtag.description
end
return nil, stringcomment
end
M.generatefunctiontypename = generatefunctiontypename
return M

View File

@ -0,0 +1,65 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2012 Sierra Wireless.
-- All rights reserved. This program and the accompanying materials
-- are made available under the terms of the Eclipse Public License v1.0
-- which accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- Contributors:
-- Kevin KIN-FOO <kkinfoo@sierrawireless.com>
-- - initial API and implementation and initial documentation
--------------------------------------------------------------------------------
local M = {}
function M._internalcontent()
return {
content = nil, -- block
unknownglobalvars = {}, -- list of item
tag = "MInternalContent"
}
end
function M._block()
return {
content = {}, -- list of expr (identifier, index, call, invoke, block)
localvars = {}, -- list of {var=item, scope ={min,max}}
sourcerange = {min=0,max=0},
tag = "MBlock"
}
end
function M._identifier()
return {
definition = nil, -- item
sourcerange = {min=0,max=0},
tag = "MIdentifier"
}
end
function M._index(key, value)
return {
left= key, -- expr (identifier, index, call, invoke, block)
right= value, -- string
sourcerange = {min=0,max=0},
tag = "MIndex"
}
end
function M._call(funct)
return {
func = funct, -- expr (identifier, index, call, invoke, block)
sourcerange = {min=0,max=0},
tag = "MCall"
}
end
function M._invoke(name, expr)
return {
functionname = name, -- string
record = expr, -- expr (identifier, index, call, invoke, block)
sourcerange = {min=0,max=0},
tag = "MInvoke"
}
end
return M

View File

@ -0,0 +1,861 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2011-2012 Sierra Wireless.
-- All rights reserved. This program and the accompanying materials
-- are made available under the terms of the Eclipse Public License v1.0
-- which accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- Contributors:
-- Simon BERNARD <sbernard@sierrawireless.com>
-- - initial API and implementation and initial documentation
--------------------------------------------------------------------------------
-{ extension ('match', ...) }
local Q = require 'metalua.treequery'
local internalmodel = require 'models.internalmodel'
local apimodel = require 'models.apimodel'
local apimodelbuilder = require 'models.apimodelbuilder'
local M = {}
-- Analyzes an AST and returns two tables
-- * `locals`, which associates `Id{ } nodes which create a local variable
-- to a list of the `Id{ } occurrence nodes of that variable;
-- * `globals` which associates variable names to occurrences of
-- global variables having that name.
function bindings(ast)
local locals, globals = { }, { }
local function f(id, ...)
local name = id[1]
if Q.is_binder(id, ...) then
local binder = ... -- parent is the binder
locals[binder] = locals[binder] or { }
locals[binder][name]={ }
else
local _, binder = Q.get_binder(id, ...)
if binder then -- this is a local
table.insert(locals[binder][name], id)
else
local g = globals[name]
if g then table.insert(g, id) else globals[name]={id} end
end
end
end
Q(ast) :filter('Id') :foreach(f)
return locals, globals
end
-- --------------------------------------
-- ----------------------------------------------------------
-- return the comment linked before to this node
-- ----------------------------------------------------------
local function getlinkedcommentbefore(node)
local function _getlinkedcomment(node,line)
if node and node.lineinfo and node.lineinfo.first.line == line then
-- get the last comment before (the nearest of code)
local comments = node.lineinfo.first.comments
local comment = comments and comments[#comments]
if comment and comment.lineinfo.last.line == line-1 then
-- ignore the comment if there are code before on the same line
if node.lineinfo.first.facing and (node.lineinfo.first.facing.line ~= comment.lineinfo.first.line) then
return comment
end
else
return _getlinkedcomment(node.parent,line)
end
end
return nil
end
if node.lineinfo and node.lineinfo.first.line then
return _getlinkedcomment(node,node.lineinfo.first.line)
else
return nil
end
end
-- ----------------------------------------------------------
-- return the comment linked after to this node
-- ----------------------------------------------------------
local function getlinkedcommentafter(node)
local function _getlinkedcomment(node,line)
if node and node.lineinfo and node.lineinfo.last.line == line then
-- get the first comment after (the nearest of code)
local comments = node.lineinfo.last.comments
local comment = comments and comments[1]
if comment and comment.lineinfo.first.line == line then
return comment
else
return _getlinkedcomment(node.parent,line)
end
end
return nil
end
if node.lineinfo and node.lineinfo.last.line then
return _getlinkedcomment(node,node.lineinfo.last.line)
else
return nil
end
end
-- ----------------------------------------------------------
-- return true if this node is a block for the internal representation
-- ----------------------------------------------------------
local supported_b = {
Function = true,
Do = true,
While = true,
Fornum = true,
Forin = true,
Repeat = true,
}
local function supportedblock(node, parent)
return supported_b[ node.tag ] or
(parent and parent.tag == "If" and node.tag == nil)
end
-- ----------------------------------------------------------
-- create a block from the metalua node
-- ----------------------------------------------------------
local function createblock(block, parent)
local _block = internalmodel._block()
match block with
| `Function{param, body}
| `Do{...}
| `Fornum {identifier, min, max, body}
| `Forin {identifiers, exprs, body}
| `Repeat {body, expr} ->
_block.sourcerange.min = block.lineinfo.first.offset
_block.sourcerange.max = block.lineinfo.last.offset
| `While {expr, body} ->
_block.sourcerange.min = body.lineinfo.first.facing.offset
_block.sourcerange.max = body.lineinfo.last.facing.offset
| _ ->
if parent and parent.tag == "If" and block.tag == nil then
_block.sourcerange.min = block.lineinfo.first.facing.offset
_block.sourcerange.max = block.lineinfo.last.facing.offset
end
end
return _block
end
-- ----------------------------------------------------------
-- return true if this node is a expression in the internal representation
-- ----------------------------------------------------------
local supported_e = {
Index = true,
Id = true,
Call = true,
Invoke = true
}
local function supportedexpr(node)
return supported_e[ node.tag ]
end
local idto_block = {} -- cache from metalua id to internal model block
local idto_identifier = {} -- cache from metalua id to internal model indentifier
local expreto_expression = {} -- cache from metalua expression to internal model expression
-- ----------------------------------------------------------
-- create an expression from a metalua node
-- ----------------------------------------------------------
local function createexpr(expr,_block)
local _expr = nil
match expr with
| `Id { name } ->
-- we store the block which hold this node
-- to be able to define
idto_block[expr]= _block
-- if expr has not line info, it means expr has no representation in the code
-- so we don't need it.
if not expr.lineinfo then return nil end
-- create identifier
local _identifier = internalmodel._identifier()
idto_identifier[expr]= _identifier
_expr = _identifier
| `Index { innerexpr, `String{fieldname} } ->
if not expr.lineinfo then return nil end
-- create index
local _expression = createexpr(innerexpr,_block)
if _expression then _expr = internalmodel._index(_expression,fieldname) end
| `Call{innerexpr, ...} ->
if not expr.lineinfo then return nil end
-- create call
local _expression = createexpr(innerexpr,_block)
if _expression then _expr = internalmodel._call(_expression) end
| `Invoke{innerexpr,`String{functionname},...} ->
if not expr.lineinfo then return nil end
-- create invoke
local _expression = createexpr(innerexpr,_block)
if _expression then _expr = internalmodel._invoke(functionname,_expression) end
| _ ->
end
if _expr then
_expr.sourcerange.min = expr.lineinfo.first.offset
_expr.sourcerange.max = expr.lineinfo.last.offset
expreto_expression[expr] = _expr
end
return _expr
end
-- ----------------------------------------------------------
-- create block and expression node
-- ----------------------------------------------------------
local function createtreestructure(ast)
-- create internal content
local _internalcontent = internalmodel._internalcontent()
-- create root block
local _block = internalmodel._block()
local _blocks = { _block }
_block.sourcerange.min = ast.lineinfo.first.facing.offset
-- TODO remove the math.max when we support partial AST
_block.sourcerange.max = math.max(ast.lineinfo.last.facing.offset, 10000)
_internalcontent.content = _block
-- visitor function (down)
local function down (node,parent)
if supportedblock(node,parent) then
-- create the block
local _block = createblock(node,parent)
-- add it to parent block
table.insert(_blocks[#_blocks].content, _block)
-- enqueue the last block to know the "current" block
table.insert(_blocks,_block)
elseif supportedexpr(node) then
-- we handle expression only if it was not already do
if not expreto_expression[node] then
-- create expr
local _expression = createexpr(node,_blocks[#_blocks])
-- add it to parent block
if _expression then
table.insert(_blocks[#_blocks].content, _expression)
end
end
end
end
-- visitor function (up)
local function up (node, parent)
if supportedblock(node,parent) then
-- dequeue the last block to know the "current" block
table.remove(_blocks,#_blocks)
end
end
-- visit ast and build internal model
Q(ast):foreach(down,up)
return _internalcontent
end
local getitem
-- ----------------------------------------------------------
-- create the type from the node and position
-- ----------------------------------------------------------
local function createtype(node,position,comment2apiobj,file)
-- create module type ref
match node with
| `Call{ `Id "require", `String {modulename}} ->
return apimodel._moduletyperef(modulename,position)
| `Function {params, body} ->
-- create the functiontypedef from code
local _functiontypedef = apimodel._functiontypedef()
for _, p in ipairs(params) do
-- create parameters
local paramname
if p.tag=="Dots" then
paramname = "..."
else
paramname = p[1]
end
local _param = apimodel._parameter(paramname)
table.insert(_functiontypedef.params,_param)
end
_functiontypedef.name = "___" -- no name for inline type
return apimodel._inlinetyperef(_functiontypedef)
| `String {value} ->
local typeref = apimodel._primitivetyperef("string")
return typeref
| `Number {value} ->
local typeref = apimodel._primitivetyperef("number")
return typeref
| `True | `False ->
local typeref = apimodel._primitivetyperef("boolean")
return typeref
| `Table {...} ->
-- create recordtypedef from code
local _recordtypedef = apimodel._recordtypedef("___") -- no name for inline type
-- for each element of the table
for i=1,select("#", ...) do
local pair = select(i, ...)
-- if this is a pair we create a new item in the type
if pair.tag == "Pair" then
-- create an item
local _item = getitem(pair,nil, comment2apiobj,file)
if _item then
_recordtypedef:addfield(_item)
end
end
end
return apimodel._inlinetyperef(_recordtypedef)
| _ ->
end
-- if node is an expression supported
local supportedexpr = expreto_expression[node]
if supportedexpr then
-- create expression type ref
return apimodel._exprtyperef(supportedexpr,position)
end
end
local function completeapidoctype(apidoctype,itemname,init,file,comment2apiobj)
if not apidoctype.name then
apidoctype.name = itemname
file:mergetype(apidoctype)
end
-- create type from code
local typeref = createtype(init,1,comment2apiobj,file)
if typeref and typeref.tag == "inlinetyperef"
and typeref.def.tag == "recordtypedef" then
-- set the name
typeref.def.name = apidoctype.name
-- merge the type with priority to documentation except for source range
file:mergetype(typeref.def,false,true)
end
end
local function completeapidocitem (apidocitem, itemname, init, file, binder, comment2apiobj)
-- manage the case item has no name
if not apidocitem.name then
apidocitem.name = itemname
-- if item has no name this means it could not be attach to a parent
if apidocitem.scope then
apimodelbuilder.additemtoparent(file,apidocitem,apidocitem.scope,apidocitem.sourcerange.min,apidocitem.sourcerange.max)
apidocitem.scope = nil
end
end
-- for function try to merge definition
local apitype = apidocitem:resolvetype(file)
if apitype and apitype.tag == "functiontypedef" then
local codetype = createtype(init,1,comment2apiobj,file)
if codetype and codetype.tag =="inlinetyperef" then
codetype.def.name = apitype.name
file:mergetype(codetype.def)
end
end
-- manage the case item has no type
if not apidocitem.type then
-- extract typing from comment
local type, desc = apimodelbuilder.extractlocaltype(getlinkedcommentafter(binder),file)
if type then
apidocitem.type = type
else
-- if not found extracttype from code
apidocitem.type = createtype(init,1,comment2apiobj,file)
end
end
end
-- ----------------------------------------------------------
-- create or get the item finding in the binder with the given itemname
-- return also the ast node corresponding to this item
-- ----------------------------------------------------------
getitem = function (binder, itemname, comment2apiobj, file)
-- local function to create item
local function createitem(itemname, astnode, itemtype, description)
local _item = apimodel._item(itemname)
if description then _item.description = description end
_item.type = itemtype
if astnode and astnode.lineinfo then
_item.sourcerange.min = astnode.lineinfo.first.offset
_item.sourcerange.max = astnode.lineinfo.last.offset
end
return _item, astnode
end
-- try to match binder with known patter of item declaration
match binder with
| `Pair {string, init}
| `Set { {`Index { right , string}}, {init,...}} if string and string.tag =="String" ->
-- Pair and set is for searching field from type ..
-- if the itemname is given this mean we search for a local or a global not a field type.
if not itemname then
local itemname = string[1]
-- check for luadoc typing
local commentbefore = getlinkedcommentbefore(binder)
local apiobj = comment2apiobj[commentbefore] -- find apiobj linked to this comment
if apiobj then
if apiobj.tag=="item" then
if not apiobj.name or apiobj.name == itemname then
-- use code to complete api information if it's necessary
completeapidocitem(apiobj, itemname, init,file,binder,comment2apiobj)
-- for item use code source range rather than doc source range
if string and string.lineinfo then
apiobj.sourcerange.min = string.lineinfo.first.offset
apiobj.sourcerange.max = string.lineinfo.last.offset
end
return apiobj, string
end
elseif apiobj.tag=="recordtypedef" then
-- use code to complete api information if it's necessary
completeapidoctype(apiobj, itemname, init,file,comment2apiobj)
return createitem(itemname, string, apimodel._internaltyperef(apiobj.name), nil)
end
-- if the apiobj could not be associated to the current obj,
-- we do not use the documentation neither
commentbefore = nil
end
-- else we use code to extract the type and description
-- check for "local" typing
local type, desc = apimodelbuilder.extractlocaltype(getlinkedcommentafter(binder),file)
local desc = desc or (commentbefore and commentbefore[1])
if type then
return createitem(itemname, string, type, desc )
else
-- if no "local typing" extract type from code
return createitem(itemname, string, createtype(init,1,comment2apiobj,file), desc)
end
end
| `Set {ids, inits}
| `Local {ids, inits} ->
-- if this is a single local var declaration
-- we check if there are a comment block linked and try to extract the type
if #ids == 1 then
local currentid, currentinit = ids[1],inits[1]
-- ignore non Ids node
if currentid.tag ~= 'Id' or currentid[1] ~= itemname then return nil end
-- check for luadoc typing
local commentbefore = getlinkedcommentbefore(binder)
local apiobj = comment2apiobj[commentbefore] -- find apiobj linked to this comment
if apiobj then
if apiobj.tag=="item" then
-- use code to complete api information if it's necessary
if not apiobj.name or apiobj.name == itemname then
completeapidocitem(apiobj, itemname, currentinit,file,binder,comment2apiobj)
-- if this is a global var or if is has no parent
-- we do not create a new item
if not apiobj.parent or apiobj.parent == file then
-- for item use code source range rather than doc source range
if currentid and currentid.lineinfo then
apiobj.sourcerange.min = currentid.lineinfo.first.offset
apiobj.sourcerange.max = currentid.lineinfo.last.offset
end
return apiobj, currentid
else
return createitem(itemname, currentid, apiobj.type, nil)
end
end
elseif apiobj.tag=="recordtypedef" then
-- use code to complete api information if it's necessary
completeapidoctype(apiobj, itemname, currentinit,file,comment2apiobj)
return createitem(itemname, currentid, apimodel._internaltyperef(apiobj.name), nil)
end
-- if the apiobj could not be associated to the current obj,
-- we do not use the documentation neither
commentbefore = nil
end
-- else we use code to extract the type and description
-- check for "local" typing
local type,desc = apimodelbuilder.extractlocaltype(getlinkedcommentafter(binder),file)
desc = desc or (commentbefore and commentbefore[1])
if type then
return createitem(itemname, currentid, type, desc)
else
-- if no "local typing" extract type from code
return createitem(itemname, currentid, createtype(currentinit,1,comment2apiobj,file), desc)
end
end
-- else we use code to extract the type
local init,returnposition = nil,1
for i,id in ipairs(ids) do
-- calculate the current return position
if init and (init.tag == "Call" or init.tag == "Invoke") then
-- if previous init was a call or an invoke
-- we increment the returnposition
returnposition= returnposition+1
else
-- if init is not a function call
-- we change the init used to determine the type
init = inits[i]
end
-- get the name of the current id
local idname = id[1]
-- if this is the good id
if itemname == idname then
-- create type from init node and return position
return createitem (itemname, id, createtype(init,returnposition,comment2apiobj,file),nil)
end
end
| `Function {params, body} ->
for i,id in ipairs(params) do
-- get the name of the current id
local idname = id[1]
-- if this is the good id
if itemname == idname then
-- extract param's type from luadocumentation
local obj = comment2apiobj[getlinkedcommentbefore(binder)]
if obj and obj.tag=="item" then
local typedef = obj:resolvetype(file)
if typedef and typedef.tag =="functiontypedef" then
for j, param in ipairs(typedef.params) do
if i==j then
if i ==1 and itemname == "self" and param.type == nil
and obj.parent and obj.parent.tag == "recordtypedef" and obj.parent.name then
param.type = apimodel._internaltyperef(obj.parent.name)
end
-- TODO perhaps we must clone the typeref
return createitem(itemname,id, param.type,param.description)
end
end
end
end
return createitem(itemname,id)
end
end
| `Forin {ids, expr, body} ->
for i,id in ipairs(ids) do
-- get the name of the current id
local idname = id[1]
-- if this is the good id
if itemname == idname then
-- return data : we can not guess the type for now
return createitem(itemname,id)
end
end
| `Fornum {id, ...} ->
-- get the name of the current id
local idname = id[1]
-- if this is the good id
if itemname == idname then
-- return data : we can not guess the type for now
return createitem(itemname,id)
end
| `Localrec {{id}, {func}} ->
-- get the name of the current id
local idname = id[1]
-- if this is the good id
if itemname == idname then
-- check for luadoc typing
local commentbefore = getlinkedcommentbefore(binder)
local apiobj = comment2apiobj[commentbefore] -- find apiobj linked to this comment
if apiobj then
if apiobj.tag=="item" then
if not apiobj.name or apiobj.name == itemname then
-- use code to complete api information if it's necessary
completeapidocitem(apiobj, itemname, func,file,binder,comment2apiobj)
return createitem(itemname,id,apiobj.type,nil)
end
end
-- if the apiobj could not be associated to the current obj,
-- we do not use the documentation neither
commentbefore = nil
end
-- else we use code to extract the type and description
-- check for "local" typing
local type,desc = apimodelbuilder.extractlocaltype(getlinkedcommentafter(binder),file)
desc = desc or (commentbefore and commentbefore[1])
if type then
return createitem(itemname, id, type, desc)
else
-- if no "local typing" extract type from code
return createitem(itemname, id, createtype(func,1,comment2apiobj,file), desc)
end
end
| _ ->
end
end
-- ----------------------------------------------------------
-- Search from Id node to Set node to find field of type.
--
-- Lua code : table.field1.field2 = 12
-- looks like that in metalua :
-- `Set{
-- `Index { `Index { `Id "table", `String "field1" },
-- `String "field2"},
-- `Number "12"}
-- ----------------------------------------------------------
local function searchtypefield(node,_currentitem,comment2apiobj,file)
-- we are just interested :
-- by item which is field of recordtypedef
-- by ast node which are Index
if _currentitem then
local type = _currentitem:resolvetype(file)
if type and type.tag == "recordtypedef" then
if node and node.tag == "Index" then
local rightpart = node[2]
local _newcurrentitem = type.fields[rightpart[1]]
if _newcurrentitem then
-- if this index represent a known field of the type we continue to search
searchtypefield (node.parent,_newcurrentitem,comment2apiobj,file)
else
-- if not, this is perhaps a new field, but
-- to be a new field this index must be include in a Set
if node.parent and node.parent.tag =="Set" then
-- in this case we create the new item ans add it to the type
local set = node.parent
local item, string = getitem(set,nil, comment2apiobj,file)
-- add this item to the type, only if it has no parent and if this type does not contain already this field
if item and not item.parent and string and not type.fields[string[1]] then
type:addfield(item)
end
end
end
end
end
end
end
-- ----------------------------------------------------------
-- create local vars, global vars and linked it with theirs occurences
-- ----------------------------------------------------------
local function createvardefinitions(_internalcontent,ast,file,comment2apiobj)
-- use bindings to get locals and globals definition
local locals, globals = bindings( ast )
-- create locals var
for binder, namesAndOccurrences in pairs(locals) do
for name, occurrences in pairs(namesAndOccurrences) do
-- get item, id
local _item, id = getitem(binder, name,comment2apiobj,file)
if id then
-- add definition as occurence
-- we consider the identifier in the binder as an occurence
local _identifierdef = idto_identifier[id]
if _identifierdef then
table.insert(_item.occurrences, _identifierdef)
_identifierdef.definition = _item
end
-- add occurences
for _,occurrence in ipairs(occurrences) do
searchtypefield(occurrence.parent, _item,comment2apiobj,file)
local _identifier = idto_identifier[occurrence]
if _identifier then
table.insert(_item.occurrences, _identifier)
_identifier.definition = _item
end
end
-- add item to block
local _block = idto_block[id]
table.insert(_block.localvars,{item=_item,scope = {min=0,max=0}})
end
end
end
-- create globals var
for name, occurrences in pairs( globals ) do
-- get or create definition
local _item = file.globalvars[name]
local binder = occurrences[1].parent
if not _item then
-- global declaration is only if the first occurence in left part of a 'Set'
if binder and binder.tag == "Set" then
_item = getitem(binder, name,comment2apiobj,file)
end
-- if we find and item this is a global var declaration
if _item then
file:addglobalvar(_item)
else
-- else it is an unknown global var
_item = apimodel._item(name)
local _firstoccurrence = idto_identifier[occurrences[1]]
if _firstoccurrence then
_item.sourcerange.min = _firstoccurrence.sourcerange.min
_item.sourcerange.max = _firstoccurrence.sourcerange.max
end
table.insert(_internalcontent.unknownglobalvars,_item)
end
else
-- if the global var definition already exists, we just try to it
if binder then
match binder with
| `Set {ids, inits} ->
-- manage case only if there are 1 element in the Set
if #ids == 1 then
local currentid, currentinit = ids[1],inits[1]
-- ignore non Ids node and bad name
if currentid.tag == 'Id' and currentid[1] == name then
completeapidocitem(_item, name, currentinit,file,binder,comment2apiobj)
if currentid and currentid.lineinfo then
_item.sourcerange.min = currentid.lineinfo.first.offset
_item.sourcerange.max = currentid.lineinfo.last.offset
end
end
end
| _ ->
end
end
end
-- add occurences
for _,occurence in ipairs(occurrences) do
local _identifier = idto_identifier[occurence]
searchtypefield(occurence.parent, _item,comment2apiobj,file)
if _identifier then
table.insert(_item.occurrences, _identifier)
_identifier.definition = _item
end
end
end
end
-- ----------------------------------------------------------
-- add parent to all ast node
-- ----------------------------------------------------------
local function addparents(ast)
-- visitor function (down)
local function down (node,parent)
node.parent = parent
end
-- visit ast and build internal model
Q(ast):foreach(down,up)
end
-- ----------------------------------------------------------
-- try to detect a module declaration from code
-- ----------------------------------------------------------
local function searchmodule(ast,file,comment2apiobj,modulename)
-- if the last statement is a return
if ast then
local laststatement = ast[#ast]
if laststatement and laststatement.tag == "Return" then
-- and if the first expression returned is an identifier.
local firstexpr = laststatement[1]
if firstexpr and firstexpr.tag == "Id" then
-- get identifier in internal model
local _identifier = idto_identifier [firstexpr]
-- the definition should be an inline type
if _identifier
and _identifier.definition
and _identifier.definition.type
and _identifier.definition.type.tag == "inlinetyperef"
and _identifier.definition.type.def.tag == "recordtypedef" then
--set modulename if needed
if not file.name then file.name = modulename end
-- create or merge type
local _type = _identifier.definition.type.def
_type.name = modulename
-- if file (module) has no documentation add item documentation to it
-- else add it to the type.
if not file.description or file.description == "" then
file.description = _identifier.definition.description
else
_type.description = _identifier.definition.description
end
_identifier.definition.description = ""
if not file.shortdescription or file.shortdescription == "" then
file.shortdescription = _identifier.definition.shortdescription
else
_type.shortdescription = _identifier.definition.shortdescription
end
_identifier.definition.shortdescription = ""
-- WORKAROUND FOR BUG 421622: [outline]module selection in outline does not select it in texteditor
--_type.sourcerange.min = _identifier.definition.sourcerange.min
--_type.sourcerange.max = _identifier.definition.sourcerange.max
-- merge the type with priority to documentation except for source range
file:mergetype(_type,false,true)
-- create return if needed
if not file.returns[1] then
file.returns[1] = apimodel._return()
file.returns[1].types = { apimodel._internaltyperef(modulename) }
end
-- change the type of the identifier
_identifier.definition.type = apimodel._internaltyperef(modulename)
end
end
end
end
end
-- ----------------------------------------------------------
-- create the internalcontent from an ast metalua
-- ----------------------------------------------------------
function M.createinternalcontent (ast,file,comment2apiobj,modulename)
-- init cache
idto_block = {}
idto_identifier = {}
expreto_expression = {}
comment2apiobj = comment2apiobj or {}
file = file or apimodel._file()
-- execute code safely to be sure to clean cache correctly
local internalcontent
local ok, errmsg = pcall(function ()
-- add parent to all node
addparents(ast)
-- create block and expression node
internalcontent = createtreestructure(ast)
-- create Local vars, global vars and linked occurences (Items)
createvardefinitions(internalcontent,ast,file,comment2apiobj)
-- try to dectect module information from code
local moduletyperef = file:moduletyperef()
if moduletyperef and moduletyperef.tag == "internaltyperef" then
modulename = moduletyperef.typename or modulename
end
if modulename then
searchmodule(ast,file,comment2apiobj,modulename)
end
end)
-- clean cache
idto_block = {}
idto_identifier = {}
expreto_expression = {}
-- if not ok raise an error
if not ok then error (errmsg) end
return internalcontent
end
return M

View File

@ -0,0 +1,656 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2011-2013 Sierra Wireless and others.
-- All rights reserved. This program and the accompanying materials
-- are made available under the terms of the Eclipse Public License v1.0
-- which accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- Contributors:
-- Sierra Wireless - initial API and implementation
-------------------------------------------------------------------------------
local mlc = require ('metalua.compiler').new()
local gg = require 'metalua.grammar.generator'
local lexer = require 'metalua.grammar.lexer'
local mlp = mlc.parser
local M = {} -- module
local lx -- lexer used to parse tag
local registeredparsers -- table {tagname => {list de parsers}}
-- ----------------------------------------------------
-- raise an error if result contains a node error
-- ----------------------------------------------------
local function raiserror(result)
for i, node in ipairs(result) do
assert(not node or node.tag ~= "Error")
end
end
-- ----------------------------------------------------
-- copy key and value from one table to an other
-- ----------------------------------------------------
local function copykey(tablefrom, tableto)
for key, value in pairs(tablefrom) do
if key ~= "lineinfos" then
tableto[key] = value
end
end
end
-- ----------------------------------------------------
-- Handle keyword and identifiers as word
-- ----------------------------------------------------
local function parseword(lx)
local word = lx :peek()
local tag = word.tag
if tag=='Keyword' or tag=='Id' then
lx:next()
return {tag='Word', lineinfo=word.lineinfo, word[1]}
else
return gg.parse_error(lx,'Id or Keyword expected')
end
end
-- ----------------------------------------------------
-- parse an id
-- return a table {name, lineinfo)
-- ----------------------------------------------------
local idparser = gg.sequence({
builder = function (result)
raiserror(result)
return { name = result[1][1] }
end,
parseword
})
-- ----------------------------------------------------
-- parse a modulename (id.)?id
-- return a table {name, lineinfo)
-- ----------------------------------------------------
local modulenameparser = gg.list({
builder = function (result)
raiserror(result)
local ids = {}
for i, id in ipairs(result) do
table.insert(ids,id.name)
end
return {name = table.concat(ids,".")}
end,
primary = idparser,
separators = '.'
})
-- ----------------------------------------------------
-- parse a typename (id.)?id
-- return a table {name, lineinfo)
-- ----------------------------------------------------
local typenameparser= modulenameparser
-- ----------------------------------------------------
-- parse an internaltype ref
-- return a table {name, lineinfo)
-- ----------------------------------------------------
local internaltyperefparser = gg.sequence({
builder = function(result)
raiserror(result)
return {tag = "typeref",type=result[1].name}
end,
"#", typenameparser
})
-- ----------------------------------------------------
-- parse en external type ref
-- return a table {name, lineinfo)
-- ----------------------------------------------------
local externaltyperefparser = gg.sequence({
builder = function(result)
raiserror(result)
return {tag = "typeref",module=result[1].name,type=result[2].name}
end,
modulenameparser,"#", typenameparser
})
-- ----------------------------------------------------
-- parse a typeref
-- return a table {name, lineinfo)
-- ----------------------------------------------------
local typerefparser = gg.multisequence{
internaltyperefparser,
externaltyperefparser}
-- ----------------------------------------------------
-- parse a list of typeref
-- return a list of table {name, lineinfo)
-- ----------------------------------------------------
local typereflistparser = gg.list({
primary = typerefparser,
separators = ','
})
-- ----------------------------------------------------
-- TODO use a more generic way to parse (modifier if not always a typeref)
-- TODO support more than one modifier
-- ----------------------------------------------------
local modifiersparser = gg.sequence({
builder = function(result)
raiserror(result)
return {[result[1].name]=result[2]}
end,
"[", idparser , "=" , internaltyperefparser , "]"
})
-- ----------------------------------------------------
-- parse a return tag
-- ----------------------------------------------------
local returnparsers = {
-- full parser
gg.sequence({
builder = function (result)
raiserror(result)
return { types= result[1]}
end,
'@','return', typereflistparser
}),
-- parser without typerefs
gg.sequence({
builder = function (result)
raiserror(result)
return { types = {}}
end,
'@','return'
})
}
-- ----------------------------------------------------
-- parse a param tag
-- ----------------------------------------------------
local paramparsers = {
-- full parser
gg.sequence({
builder = function (result)
raiserror(result)
return { name = result[2].name, type = result[1]}
end,
'@','param', typerefparser, idparser
}),
-- full parser without type
gg.sequence({
builder = function (result)
raiserror(result)
return { name = result[1].name}
end,
'@','param', idparser
}),
-- Parser for `Dots
gg.sequence({
builder = function (result)
raiserror(result)
return { name = '...' }
end,
'@','param', '...'
}),
}
-- ----------------------------------------------------
-- parse a field tag
-- ----------------------------------------------------
local fieldparsers = {
-- full parser
gg.sequence({
builder = function (result)
raiserror(result)
local tag = {}
copykey(result[1],tag)
tag.type = result[2]
tag.name = result[3].name
return tag
end,
'@','field', modifiersparser, typerefparser, idparser
}),
-- parser without name
gg.sequence({
builder = function (result)
raiserror(result)
local tag = {}
copykey(result[1],tag)
tag.type = result[2]
return tag
end,
'@','field', modifiersparser, typerefparser
}),
-- parser without type
gg.sequence({
builder = function (result)
raiserror(result)
local tag = {}
copykey(result[1],tag)
tag.name = result[2].name
return tag
end,
'@','field', modifiersparser, idparser
}),
-- parser without type and name
gg.sequence({
builder = function (result)
raiserror(result)
local tag = {}
copykey(result[1],tag)
return tag
end,
'@','field', modifiersparser
}),
-- parser without modifiers
gg.sequence({
builder = function (result)
raiserror(result)
return { name = result[2].name, type = result[1]}
end,
'@','field', typerefparser, idparser
}),
-- parser without modifiers and name
gg.sequence({
builder = function (result)
raiserror(result)
return {type = result[1]}
end,
'@','field', typerefparser
}),
-- parser without type and modifiers
gg.sequence({
builder = function (result)
raiserror(result)
return { name = result[1].name}
end,
'@','field', idparser
}),
-- parser with nothing
gg.sequence({
builder = function (result)
raiserror(result)
return {}
end,
'@','field'
})
}
-- ----------------------------------------------------
-- parse a function tag
-- TODO use a more generic way to parse modifier !
-- ----------------------------------------------------
local functionparsers = {
-- full parser
gg.sequence({
builder = function (result)
raiserror(result)
local tag = {}
copykey(result[1],tag)
tag.name = result[2].name
return tag
end,
'@','function', modifiersparser, idparser
}),
-- parser without name
gg.sequence({
builder = function (result)
raiserror(result)
local tag = {}
copykey(result[1],tag)
return tag
end,
'@','function', modifiersparser
}),
-- parser without modifier
gg.sequence({
builder = function (result)
raiserror(result)
local tag = {}
tag.name = result[1].name
return tag
end,
'@','function', idparser
}),
-- empty parser
gg.sequence({
builder = function (result)
raiserror(result)
return {}
end,
'@','function'
})
}
-- ----------------------------------------------------
-- parse a type tag
-- ----------------------------------------------------
local typeparsers = {
-- full parser
gg.sequence({
builder = function (result)
raiserror(result)
return { name = result[1].name}
end,
'@','type',typenameparser
}),
-- parser without name
gg.sequence({
builder = function (result)
raiserror(result)
return {}
end,
'@','type'
})
}
-- ----------------------------------------------------
-- parse a module tag
-- ----------------------------------------------------
local moduleparsers = {
-- full parser
gg.sequence({
builder = function (result)
raiserror(result)
return { name = result[1].name }
end,
'@','module', modulenameparser
}),
-- parser without name
gg.sequence({
builder = function (result)
raiserror(result)
return {}
end,
'@','module'
})
}
-- ----------------------------------------------------
-- parse a third tag
-- ----------------------------------------------------
local thirdtagsparser = gg.sequence({
builder = function (result)
raiserror(result)
return { name = result[1][1] }
end,
'@', mlp.id
})
-- ----------------------------------------------------
-- init parser
-- ----------------------------------------------------
local function initparser()
-- register parsers
-- each tag name has several parsers
registeredparsers = {
["module"] = moduleparsers,
["return"] = returnparsers,
["type"] = typeparsers,
["field"] = fieldparsers,
["function"] = functionparsers,
["param"] = paramparsers
}
-- create lexer used for parsing
lx = lexer.lexer:clone()
lx.extractors = {
-- "extract_long_comment",
-- "extract_short_comment",
-- "extract_long_string",
"extract_short_string",
"extract_word",
"extract_number",
"extract_symbol"
}
-- Add dots as keyword
local tagnames = { '...' }
-- Add tag names as key word
for tagname, _ in pairs(registeredparsers) do
table.insert(tagnames,tagname)
end
lx:add(tagnames)
return lx, parsers
end
initparser()
-- ----------------------------------------------------
-- get the string pattern to remove for each line of description
-- the goal is to fix the indentation problems
-- ----------------------------------------------------
local function getstringtoremove (stringcomment,commentstart)
local _,_,capture = string.find(stringcomment,"\n?([ \t]*)@[^{]+",commentstart)
if not capture then
_,_,capture = string.find(stringcomment,"^([ \t]*)",commentstart)
end
capture = string.gsub(capture,"(.)","%1?")
return capture
end
-- ----------------------------------------------------
-- parse comment tag partition and return table structure
-- ----------------------------------------------------
local function parsetag(part)
if part.comment:find("^@") then
-- check if the part start by a supported tag
for tagname,parsers in pairs(registeredparsers) do
if (part.comment:find("^@"..tagname)) then
-- try the registered parsers for this tag
local result
for i, parser in ipairs(parsers) do
local valid, tag = pcall(parser, lx:newstream(part.comment, tagname .. 'tag lexer'))
if valid then
-- add tagname
tag.tagname = tagname
-- add description
local endoffset = tag.lineinfo.last.offset
tag.description = part.comment:sub(endoffset+2,-1)
return tag
end
end
end
end
end
return nil
end
-- ----------------------------------------------------
-- Parse third party tags.
--
-- Enable to parse a tag not defined in language.
-- So for, accepted format is: @sometagname adescription
-- ----------------------------------------------------
local function parsethirdtag( part )
-- Check it there is someting to process
if not part.comment:find("^@") then
return nil, 'No tag to parse'
end
-- Apply parser
local status, parsedtag = pcall(thirdtagsparser, lx:newstream(part.comment, 'Third party tag lexer'))
if not status then
return nil, "Unable to parse given string."
end
-- Retrieve description
local endoffset = parsedtag.lineinfo.last.offset
local tag = {
description = part.comment:sub(endoffset+2,-1)
}
return parsedtag.name, tag
end
-- ---------------------------------------------------------
-- split string comment in several part
-- return list of {comment = string, offset = number}
-- the first part is the part before the first tag
-- the others are the part from a tag to the next one
-- ----------------------------------------------------
local function split(stringcomment,commentstart)
local partstart = commentstart
local result = {}
-- manage case where the comment start by @
-- (we must ignore the inline see tag @{..})
local at_startoffset, at_endoffset = stringcomment:find("^[ \t]*@[^{]",partstart)
if at_endoffset then
partstart = at_endoffset-1 -- we start before the @ and the non '{' character
end
-- split comment
-- (we must ignore the inline see tag @{..})
repeat
at_startoffset, at_endoffset = stringcomment:find("\n[ \t]*@[^{]",partstart)
local partend
if at_startoffset then
partend= at_startoffset-1 -- the end is before the separator pattern (just before the \n)
else
partend = #stringcomment -- we don't find any pattern so the end is the end of the string
end
table.insert(result, { comment = stringcomment:sub (partstart,partend) ,
offset = partstart})
if at_endoffset then
partstart = at_endoffset-1 -- the new start is befire the @ and the non { char
end
until not at_endoffset
return result
end
-- ----------------------------------------------------
-- parse a comment block and return a table
-- ----------------------------------------------------
function M.parse(stringcomment)
local _comment = {description="", shortdescription=""}
-- clean windows carriage return
stringcomment = string.gsub(stringcomment,"\r\n","\n")
-- check if it's a ld comment
-- get the begin of the comment
-- ============================
if not stringcomment:find("^-") then
-- if this comment don't start by -, we will not handle it.
return nil
end
-- retrieve the real start
local commentstart = 2 --after the first hyphen
-- if the first line is an empty comment line with at least 3 hyphens we ignore it
local _ , endoffset = stringcomment:find("^-+[ \t]*\n")
if endoffset then
commentstart = endoffset+1
end
-- clean comments
-- ===================
-- remove line of "-"
stringcomment = string.sub(stringcomment,commentstart)
-- clean indentation
local pattern = getstringtoremove (stringcomment,1)
stringcomment = string.gsub(stringcomment,"^"..pattern,"")
stringcomment = string.gsub(stringcomment,"\n"..pattern,"\n")
-- split comment part
-- ====================
local commentparts = split(stringcomment, 1)
-- Extract descriptions
-- ====================
local firstpart = commentparts[1].comment
if firstpart:find("^[^@]") or firstpart:find("^@{") then
-- if the comment part don't start by @
-- it's the part which contains descriptions
-- (there are an exception for the in-line see tag @{..})
local shortdescription, description = string.match(firstpart,'^(.-[.?])(%s.+)')
-- store description
if shortdescription then
_comment.shortdescription = shortdescription
-- clean description
-- remove always the first space character
-- (this manage the case short and long description is on the same line)
description = string.gsub(description, "^[ \t]","")
-- if first line is only an empty string remove it
description = string.gsub(description, "^[ \t]*\n","")
_comment.description = description
else
_comment.shortdescription = firstpart
_comment.description = ""
end
end
-- Extract tags
-- ===================
-- Parse regular tags
local tag
for i, part in ipairs(commentparts) do
tag = parsetag(part)
--if it's a supported tag (so tag is not nil, it's a table)
if tag then
if not _comment.tags then _comment.tags = {} end
if not _comment.tags[tag.tagname] then
_comment.tags[tag.tagname] = {}
end
table.insert(_comment.tags[tag.tagname], tag)
else
-- Try user defined tags, so far they will look like
-- @identifier description
local tagname, thirdtag = parsethirdtag( part )
if tagname then
--
-- Append found tag
--
local reservedname = 'unknowntags'
if not _comment.unknowntags then
_comment.unknowntags = {}
end
-- Create specific section for parsed tag
if not _comment.unknowntags[tagname] then
_comment.unknowntags[tagname] = {}
end
-- Append to specific section
table.insert(_comment.unknowntags[tagname], thirdtag)
end
end
end
return _comment
end
function M.parseinlinecomment(stringcomment)
--TODO this code is use to activate typage only on --- comments. (deactivate for now)
-- if not stringcomment or not stringcomment:find("^-") then
-- -- if this comment don't start by -, we will not handle it.
-- return nil
-- end
-- -- remove the first '-'
-- stringcomment = string.sub(stringcomment,2)
-- print (stringcomment)
-- io.flush()
local valid, parsedtag = pcall(typerefparser, lx:newstream(stringcomment, 'typeref parser'))
if valid then
local endoffset = parsedtag.lineinfo.last.offset
parsedtag.description = stringcomment:sub(endoffset+2,-1)
return parsedtag
end
end
return M

View File

@ -0,0 +1,546 @@
--- Date and Date Format classes.
-- See <a href="../../index.html#date">the Guide</a>.
-- @class module
-- @name pl.Date
-- @pragma nostrip
--[[
module("pl.Date")
]]
local class = require 'pl.class'
local os_time, os_date = os.time, os.date
local stringx = require 'pl.stringx'
local Date = class()
Date.Format = class()
--- Date constructor.
-- @param t this can be either <ul>
-- <li>nil - use current date and time</li>
-- <li>number - seconds since epoch (as returned by @{os.time})</li>
-- <li>Date - copy constructor</li>
-- <li>table - table containing year, month, etc as for os.time()
-- You may leave out year, month or day, in which case current values will be used.
-- </li>
-- <li> two to six numbers: year, month, day, hour, min, sec
-- </ul>
-- @function Date
function Date:_init(t,...)
local time
if select('#',...) > 0 then
local extra = {...}
local year = t
t = {
year = year,
month = extra[1],
day = extra[2],
hour = extra[3],
min = extra[4],
sec = extra[5]
}
end
if t == nil then
time = os_time()
elseif type(t) == 'number' then
time = t
elseif type(t) == 'table' then
if getmetatable(t) == Date then -- copy ctor
time = t.time
else
if not (t.year and t.month and t.year) then
local lt = os.date('*t')
if not t.year and not t.month and not t.day then
t.year = lt.year
t.month = lt.month
t.day = lt.day
else
t.year = t.year or lt.year
t.month = t.month or (t.day and lt.month or 1)
t.day = t.day or 1
end
end
time = os_time(t)
end
end
self:set(time)
end
local thour,tmin
--- get the time zone offset from UTC.
-- @return hours ahead of UTC
-- @return minutes ahead of UTC
function Date.tzone ()
if not thour then
local t = os.time()
local ut = os.date('!*t',t)
local lt = os.date('*t',t)
thour = lt.hour - ut.hour
tmin = lt.min - ut.min
end
return thour, tmin
end
--- convert this date to UTC.
function Date:toUTC ()
local th, tm = Date.tzone()
self:add { hour = -th }
if tm > 0 then self:add {min = -tm} end
end
--- convert this UTC date to local.
function Date:toLocal ()
local th, tm = Date.tzone()
self:add { hour = th }
if tm > 0 then self:add {min = tm} end
end
--- set the current time of this Date object.
-- @param t seconds since epoch
function Date:set(t)
self.time = t
self.tab = os_date('*t',self.time)
end
--- set the year.
-- @param y Four-digit year
-- @class function
-- @name Date:year
--- set the month.
-- @param m month
-- @class function
-- @name Date:month
--- set the day.
-- @param d day
-- @class function
-- @name Date:day
--- set the hour.
-- @param h hour
-- @class function
-- @name Date:hour
--- set the minutes.
-- @param min minutes
-- @class function
-- @name Date:min
--- set the seconds.
-- @param sec seconds
-- @class function
-- @name Date:sec
--- set the day of year.
-- @class function
-- @param yday day of year
-- @name Date:yday
--- get the year.
-- @param y Four-digit year
-- @class function
-- @name Date:year
--- get the month.
-- @class function
-- @name Date:month
--- get the day.
-- @class function
-- @name Date:day
--- get the hour.
-- @class function
-- @name Date:hour
--- get the minutes.
-- @class function
-- @name Date:min
--- get the seconds.
-- @class function
-- @name Date:sec
--- get the day of year.
-- @class function
-- @name Date:yday
for _,c in ipairs{'year','month','day','hour','min','sec','yday'} do
Date[c] = function(self,val)
if val then
self.tab[c] = val
self:set(os_time(self.tab))
return self
else
return self.tab[c]
end
end
end
--- name of day of week.
-- @param full abbreviated if true, full otherwise.
-- @return string name
function Date:weekday_name(full)
return os_date(full and '%A' or '%a',self.time)
end
--- name of month.
-- @param full abbreviated if true, full otherwise.
-- @return string name
function Date:month_name(full)
return os_date(full and '%B' or '%b',self.time)
end
--- is this day on a weekend?.
function Date:is_weekend()
return self.tab.wday == 0 or self.tab.wday == 6
end
--- add to a date object.
-- @param t a table containing one of the following keys and a value:<br>
-- year,month,day,hour,min,sec
-- @return this date
function Date:add(t)
local key,val = next(t)
self.tab[key] = self.tab[key] + val
self:set(os_time(self.tab))
return self
end
--- last day of the month.
-- @return int day
function Date:last_day()
local d = 28
local m = self.tab.month
while self.tab.month == m do
d = d + 1
self:add{day=1}
end
self:add{day=-1}
return self
end
--- difference between two Date objects.
-- Note: currently the result is a regular @{Date} object,
-- but also has `interval` field set, which means a more
-- appropriate string rep is used.
-- @param other Date object
-- @return a Date object
function Date:diff(other)
local dt = self.time - other.time
if dt < 0 then error("date difference is negative!",2) end
local date = Date(dt)
date.interval = true
return date
end
--- long numerical ISO data format version of this date.
function Date:__tostring()
if not self.interval then
return os_date('%Y-%m-%d %H:%M:%S',self.time)
else
local t, res = self.tab, ''
local y,m,d = t.year - 1970, t.month - 1, t.day - 1
if y > 0 then res = res .. y .. ' years ' end
if m > 0 then res = res .. m .. ' months ' end
if d > 0 then res = res .. d .. ' days ' end
if y == 0 and m == 0 then
local h = t.hour - Date.tzone() -- not accounting for UTC mins!
if h > 0 then res = res .. h .. ' hours ' end
if t.min > 0 then res = res .. t.min .. ' min ' end
if t.sec > 0 then res = res .. t.sec .. ' sec ' end
end
return res
end
end
--- equality between Date objects.
function Date:__eq(other)
return self.time == other.time
end
--- equality between Date objects.
function Date:__lt(other)
return self.time < other.time
end
------------ Date.Format class: parsing and renderinig dates ------------
-- short field names, explicit os.date names, and a mask for allowed field repeats
local formats = {
d = {'day',{true,true}},
y = {'year',{false,true,false,true}},
m = {'month',{true,true}},
H = {'hour',{true,true}},
M = {'min',{true,true}},
S = {'sec',{true,true}},
}
--
--- Date.Format constructor.
-- @param fmt. A string where the following fields are significant: <ul>
-- <li>d day (either d or dd)</li>
-- <li>y year (either yy or yyy)</li>
-- <li>m month (either m or mm)</li>
-- <li>H hour (either H or HH)</li>
-- <li>M minute (either M or MM)</li>
-- <li>S second (either S or SS)</li>
-- </ul>
-- Alternatively, if fmt is nil then this returns a flexible date parser
-- that tries various date/time schemes in turn:
-- <ol>
-- <li> <a href="http://en.wikipedia.org/wiki/ISO_8601">ISO 8601</a>,
-- like 2010-05-10 12:35:23Z or 2008-10-03T14:30+02<li>
-- <li> times like 15:30 or 8.05pm (assumed to be today's date)</li>
-- <li> dates like 28/10/02 (European order!) or 5 Feb 2012 </li>
-- <li> month name like march or Mar (case-insensitive, first 3 letters);
-- here the day will be 1 and the year this current year </li>
-- </ol>
-- A date in format 3 can be optionally followed by a time in format 2.
-- Please see test-date.lua in the tests folder for more examples.
-- @usage df = Date.Format("yyyy-mm-dd HH:MM:SS")
-- @class function
-- @name Date.Format
function Date.Format:_init(fmt)
if not fmt then return end
local append = table.insert
local D,PLUS,OPENP,CLOSEP = '\001','\002','\003','\004'
local vars,used = {},{}
local patt,outf = {},{}
local i = 1
while i < #fmt do
local ch = fmt:sub(i,i)
local df = formats[ch]
if df then
if used[ch] then error("field appeared twice: "..ch,2) end
used[ch] = true
-- this field may be repeated
local _,inext = fmt:find(ch..'+',i+1)
local cnt = not _ and 1 or inext-i+1
if not df[2][cnt] then error("wrong number of fields: "..ch,2) end
-- single chars mean 'accept more than one digit'
local p = cnt==1 and (D..PLUS) or (D):rep(cnt)
append(patt,OPENP..p..CLOSEP)
append(vars,ch)
if ch == 'y' then
append(outf,cnt==2 and '%y' or '%Y')
else
append(outf,'%'..ch)
end
i = i + cnt
else
append(patt,ch)
append(outf,ch)
i = i + 1
end
end
-- escape any magic characters
fmt = table.concat(patt):gsub('[%-%.%+%[%]%(%)%$%^%%%?%*]','%%%1')
-- replace markers with their magic equivalents
fmt = fmt:gsub(D,'%%d'):gsub(PLUS,'+'):gsub(OPENP,'('):gsub(CLOSEP,')')
self.fmt = fmt
self.outf = table.concat(outf)
self.vars = vars
end
local parse_date
--- parse a string into a Date object.
-- @param str a date string
-- @return date object
function Date.Format:parse(str)
if not self.fmt then
return parse_date(str,self.us)
end
local res = {str:match(self.fmt)}
if #res==0 then return nil, 'cannot parse '..str end
local tab = {}
for i,v in ipairs(self.vars) do
local name = formats[v][1] -- e.g. 'y' becomes 'year'
tab[name] = tonumber(res[i])
end
-- os.date() requires these fields; if not present, we assume
-- that the time set is for the current day.
if not (tab.year and tab.month and tab.year) then
local today = Date()
tab.year = tab.year or today:year()
tab.month = tab.month or today:month()
tab.day = tab.day or today:month()
end
local Y = tab.year
if Y < 100 then -- classic Y2K pivot
tab.year = Y + (Y < 35 and 2000 or 1999)
elseif not Y then
tab.year = 1970
end
--dump(tab)
return Date(tab)
end
--- convert a Date object into a string.
-- @param d a date object, or a time value as returned by @{os.time}
-- @return string
function Date.Format:tostring(d)
local tm = type(d) == 'number' and d or d.time
if self.outf then
return os.date(self.outf,tm)
else
return tostring(Date(d))
end
end
function Date.Format:US_order(yesno)
self.us = yesno
end
local months = {jan=1,feb=2,mar=3,apr=4,may=5,jun=6,jul=7,aug=8,sep=9,oct=10,nov=11,dec=12}
--[[
Allowed patterns:
- [day] [monthname] [year] [time]
- [day]/[month][/year] [time]
]]
local is_word = stringx.isalpha
local is_number = stringx.isdigit
local function tonum(s,l1,l2,kind)
kind = kind or ''
local n = tonumber(s)
if not n then error(("%snot a number: '%s'"):format(kind,s)) end
if n < l1 or n > l2 then
error(("%s out of range: %s is not between %d and %d"):format(kind,s,l1,l2))
end
return n
end
local function parse_iso_end(p,ns,sec)
-- may be fractional part of seconds
local _,nfrac,secfrac = p:find('^%.%d+',ns+1)
if secfrac then
sec = sec .. secfrac
p = p:sub(nfrac+1)
else
p = p:sub(ns+1)
end
-- ISO 8601 dates may end in Z (for UTC) or [+-][isotime]
if p:match 'z$' then return sec, {h=0,m=0} end -- we're UTC!
p = p:gsub(':','') -- turn 00:30 to 0030
local _,_,sign,offs = p:find('^([%+%-])(%d+)')
if not sign then return sec, nil end -- not UTC
if #offs == 2 then offs = offs .. '00' end -- 01 to 0100
local tz = { h = tonumber(offs:sub(1,2)), m = tonumber(offs:sub(3,4)) }
if sign == '-' then tz.h = -tz.h; tz.m = -tz.m end
return sec, tz
end
local function parse_date_unsafe (s,US)
s = s:gsub('T',' ') -- ISO 8601
local parts = stringx.split(s:lower())
local i,p = 1,parts[1]
local function nextp() i = i + 1; p = parts[i] end
local year,min,hour,sec,apm
local tz
local _,nxt,day, month = p:find '^(%d+)/(%d+)'
if day then
-- swop for US case
if US then
day, month = month, day
end
_,_,year = p:find('^/(%d+)',nxt+1)
nextp()
else -- ISO
year,month,day = p:match('^(%d+)%-(%d+)%-(%d+)')
if year then
nextp()
end
end
if p and not year and is_number(p) then -- has to be date
day = p
nextp()
end
if p and is_word(p) then
p = p:sub(1,3)
local mon = months[p]
if mon then
month = mon
else error("not a month: " .. p) end
nextp()
end
if p and not year and is_number(p) then
year = p
nextp()
end
if p then -- time is hh:mm[:ss], hhmm[ss] or H.M[am|pm]
_,nxt,hour,min = p:find '^(%d+):(%d+)'
local ns
if nxt then -- are there seconds?
_,ns,sec = p:find ('^:(%d+)',nxt+1)
--if ns then
sec,tz = parse_iso_end(p,ns or nxt,sec)
--end
else -- might be h.m
_,ns,hour,min = p:find '^(%d+)%.(%d+)'
if ns then
apm = p:match '[ap]m$'
else -- or hhmm[ss]
local hourmin
_,nxt,hourmin = p:find ('^(%d+)')
if nxt then
hour = hourmin:sub(1,2)
min = hourmin:sub(3,4)
sec = hourmin:sub(5,6)
if #sec == 0 then sec = nil end
sec,tz = parse_iso_end(p,nxt,sec)
end
end
end
end
local today
if not (year and month and day) then
today = Date()
end
day = day and tonum(day,1,31,'day') or (month and 1 or today:day())
month = month and tonum(month,1,12,'month') or today:month()
year = year and tonumber(year) or today:year()
if year < 100 then -- two-digit year pivot
year = year + (year < 35 and 2000 or 1900)
end
hour = hour and tonum(hour,1,apm and 12 or 24,'hour') or 12
if apm == 'pm' then
hour = hour + 12
end
min = min and tonum(min,1,60) or 0
sec = sec and tonum(sec,1,60) or 0
local res = Date {year = year, month = month, day = day, hour = hour, min = min, sec = sec}
if tz then -- ISO 8601 UTC time
res:toUTC()
res:add {hour = tz.h}
if tz.m ~= 0 then res:add {min = tz.m} end
end
return res
end
function parse_date (s)
local ok, d = pcall(parse_date_unsafe,s)
if not ok then -- error
d = d:gsub('.-:%d+: ','')
return nil, d
else
return d
end
end
return Date

View File

@ -0,0 +1,553 @@
--- Python-style list class. <p>
-- Based on original code by Nick Trout.
-- <p>
-- <b>Please Note</b>: methods that change the list will return the list.
-- This is to allow for method chaining, but please note that <tt>ls = ls:sort()</tt>
-- does not mean that a new copy of the list is made. In-place (mutable) methods
-- are marked as returning 'the list' in this documentation.
-- <p>
-- See the Guide for further <a href="../../index.html#list">discussion</a>
-- <p>
-- See <a href="http://www.python.org/doc/current/tut/tut.html">http://www.python.org/doc/current/tut/tut.html</a>, section 5.1
-- <p>
-- <b>Note</b>: The comments before some of the functions are from the Python docs
-- and contain Python code.
-- <p>
-- Written for Lua version 4.0 <br />
-- Redone for Lua 5.1, Steve Donovan.
-- @class module
-- @name pl.List
-- @pragma nostrip
local tinsert,tremove,concat,tsort = table.insert,table.remove,table.concat,table.sort
local setmetatable, getmetatable,type,tostring,assert,string,next = setmetatable,getmetatable,type,tostring,assert,string,next
local write = io.write
local tablex = require 'pl.tablex'
local filter,imap,imap2,reduce,transform,tremovevalues = tablex.filter,tablex.imap,tablex.imap2,tablex.reduce,tablex.transform,tablex.removevalues
local tablex = tablex
local tsub = tablex.sub
local utils = require 'pl.utils'
local function_arg = utils.function_arg
local is_type = utils.is_type
local split = utils.split
local assert_arg = utils.assert_arg
local normalize_slice = tablex._normalize_slice
--[[
module ('pl.List',utils._module)
]]
local Multimap = utils.stdmt.MultiMap
-- metatable for our list objects
local List = utils.stdmt.List
List.__index = List
List._class = List
local iter
-- we give the metatable its own metatable so that we can call it like a function!
setmetatable(List,{
__call = function (tbl,arg)
return List.new(arg)
end,
})
local function makelist (t,obj)
local klass = List
if obj then
klass = getmetatable(obj)
end
return setmetatable(t,klass)
end
local function is_list(t)
return getmetatable(t) == List
end
local function simple_table(t)
return type(t) == 'table' and not is_list(t) and #t > 0
end
function List:_init (src)
if src then
for v in iter(src) do
tinsert(self,v)
end
end
end
--- Create a new list. Can optionally pass a table;
-- passing another instance of List will cause a copy to be created
-- we pass anything which isn't a simple table to iterate() to work out
-- an appropriate iterator @see List.iterate
-- @param t An optional list-like table
-- @return a new List
-- @usage ls = List(); ls = List {1,2,3,4}
function List.new(t)
local ls
if not simple_table(t) then
ls = {}
List._init(ls,t)
else
ls = t
end
makelist(ls)
return ls
end
function List:clone()
local ls = makelist({},self)
List._init(ls,self)
return ls
end
function List.default_map_with(T)
return function(self,name)
local f = T[name]
if f then
return function(self,...)
return self:map(f,...)
end
else
error("method not found: "..name,2)
end
end
end
---Add an item to the end of the list.
-- @param i An item
-- @return the list
function List:append(i)
tinsert(self,i)
return self
end
List.push = tinsert
--- Extend the list by appending all the items in the given list.
-- equivalent to 'a[len(a):] = L'.
-- @param L Another List
-- @return the list
function List:extend(L)
assert_arg(1,L,'table')
for i = 1,#L do tinsert(self,L[i]) end
return self
end
--- Insert an item at a given position. i is the index of the
-- element before which to insert.
-- @param i index of element before whichh to insert
-- @param x A data item
-- @return the list
function List:insert(i, x)
assert_arg(1,i,'number')
tinsert(self,i,x)
return self
end
--- Insert an item at the begining of the list.
-- @param x a data item
-- @return the list
function List:put (x)
return self:insert(1,x)
end
--- Remove an element given its index.
-- (equivalent of Python's del s[i])
-- @param i the index
-- @return the list
function List:remove (i)
assert_arg(1,i,'number')
tremove(self,i)
return self
end
--- Remove the first item from the list whose value is given.
-- (This is called 'remove' in Python; renamed to avoid confusion
-- with table.remove)
-- Return nil if there is no such item.
-- @param x A data value
-- @return the list
function List:remove_value(x)
for i=1,#self do
if self[i]==x then tremove(self,i) return self end
end
return self
end
--- Remove the item at the given position in the list, and return it.
-- If no index is specified, a:pop() returns the last item in the list.
-- The item is also removed from the list.
-- @param i An index
-- @return the item
function List:pop(i)
if not i then i = #self end
assert_arg(1,i,'number')
return tremove(self,i)
end
List.get = List.pop
--- Return the index in the list of the first item whose value is given.
-- Return nil if there is no such item.
-- @class function
-- @name List:index
-- @param x A data value
-- @param idx where to start search (default 1)
-- @return the index, or nil if not found.
local tfind = tablex.find
List.index = tfind
--- does this list contain the value?.
-- @param x A data value
-- @return true or false
function List:contains(x)
return tfind(self,x) and true or false
end
--- Return the number of times value appears in the list.
-- @param x A data value
-- @return number of times x appears
function List:count(x)
local cnt=0
for i=1,#self do
if self[i]==x then cnt=cnt+1 end
end
return cnt
end
--- Sort the items of the list, in place.
-- @param cmp an optional comparison function; '<' is used if not given.
-- @return the list
function List:sort(cmp)
tsort(self,cmp)
return self
end
--- Reverse the elements of the list, in place.
-- @return the list
function List:reverse()
local t = self
local n = #t
local n2 = n/2
for i = 1,n2 do
local k = n-i+1
t[i],t[k] = t[k],t[i]
end
return self
end
--- Emulate list slicing. like 'list[first:last]' in Python.
-- If first or last are negative then they are relative to the end of the list
-- eg. slice(-2) gives last 2 entries in a list, and
-- slice(-4,-2) gives from -4th to -2nd
-- @param first An index
-- @param last An index
-- @return a new List
function List:slice(first,last)
return tsub(self,first,last)
end
--- empty the list.
-- @return the list
function List:clear()
for i=1,#self do tremove(self,i) end
return self
end
local eps = 1.0e-10
--- Emulate Python's range(x) function.
-- Include it in List table for tidiness
-- @param start A number
-- @param finish A number greater than start; if zero, then 0..start-1
-- @param incr an optional increment (may be less than 1)
-- @usage List.range(0,3) == List {0,1,2,3}
function List.range(start,finish,incr)
if not finish then
start = 0
finish = finish - 1
end
if incr then
if not utils.is_integer(incr) then finish = finish + eps end
else
incr = 1
end
assert_arg(1,start,'number')
assert_arg(2,finish,'number')
local t = List.new()
for i=start,finish,incr do tinsert(t,i) end
return t
end
--- list:len() is the same as #list.
function List:len()
return #self
end
-- Extended operations --
--- Remove a subrange of elements.
-- equivalent to 'del s[i1:i2]' in Python.
-- @param i1 start of range
-- @param i2 end of range
-- @return the list
function List:chop(i1,i2)
return tremovevalues(self,i1,i2)
end
--- Insert a sublist into a list
-- equivalent to 's[idx:idx] = list' in Python
-- @param idx index
-- @param list list to insert
-- @return the list
-- @usage l = List{10,20}; l:splice(2,{21,22}); assert(l == List{10,21,22,20})
function List:splice(idx,list)
assert_arg(1,idx,'number')
idx = idx - 1
local i = 1
for v in iter(list) do
tinsert(self,i+idx,v)
i = i + 1
end
return self
end
--- general slice assignment s[i1:i2] = seq.
-- @param i1 start index
-- @param i2 end index
-- @param seq a list
-- @return the list
function List:slice_assign(i1,i2,seq)
assert_arg(1,i1,'number')
assert_arg(1,i2,'number')
i1,i2 = normalize_slice(self,i1,i2)
if i2 >= i1 then self:chop(i1,i2) end
self:splice(i1,seq)
return self
end
--- concatenation operator.
-- @param L another List
-- @return a new list consisting of the list with the elements of the new list appended
function List:__concat(L)
assert_arg(1,L,'table')
local ls = self:clone()
ls:extend(L)
return ls
end
--- equality operator ==. True iff all elements of two lists are equal.
-- @param L another List
-- @return true or false
function List:__eq(L)
if #self ~= #L then return false end
for i = 1,#self do
if self[i] ~= L[i] then return false end
end
return true
end
--- join the elements of a list using a delimiter. <br>
-- This method uses tostring on all elements.
-- @param delim a delimiter string, can be empty.
-- @return a string
function List:join (delim)
delim = delim or ''
assert_arg(1,delim,'string')
return concat(imap(tostring,self),delim)
end
--- join a list of strings. <br>
-- Uses table.concat directly.
-- @class function
-- @name List:concat
-- @param delim a delimiter
-- @return a string
List.concat = concat
local function tostring_q(val)
local s = tostring(val)
if type(val) == 'string' then
s = '"'..s..'"'
end
return s
end
--- how our list should be rendered as a string. Uses join().
-- @see List:join
function List:__tostring()
return '{'..self:join(',',tostring_q)..'}'
end
--[[
-- NOTE: this works, but is unreliable. If you leave the loop before finishing,
-- then the iterator is not reset.
--- can iterate over a list directly.
-- @usage for v in ls do print(v) end
function List:__call()
if not self.key then self.key = 1 end
local value = self[self.key]
self.key = self.key + 1
if not value then self.key = nil end
return value
end
--]]
--[[
function List.__call(t,v,i)
i = (i or 0) + 1
v = t[i]
if v then return i, v end
end
--]]
--- call the function for each element of the list.
-- @param fun a function or callable object
-- @param ... optional values to pass to function
function List:foreach (fun,...)
local t = self
fun = function_arg(1,fun)
for i = 1,#t do
fun(t[i],...)
end
end
--- create a list of all elements which match a function.
-- @param fun a boolean function
-- @param arg optional argument to be passed as second argument of the predicate
-- @return a new filtered list.
function List:filter (fun,arg)
return makelist(filter(self,fun,arg),self)
end
--- split a string using a delimiter.
-- @param s the string
-- @param delim the delimiter (default spaces)
-- @return a List of strings
-- @see pl.utils.split
function List.split (s,delim)
assert_arg(1,s,'string')
return makelist(split(s,delim))
end
--- apply a function to all elements.
-- Any extra arguments will be passed to the function
-- @param fun a function of at least one argument
-- @param ... arbitrary extra arguments.
-- @return a new list: {f(x) for x in self}
-- @see pl.tablex.imap
function List:map (fun,...)
return makelist(imap(fun,self,...),self)
end
--- apply a function to all elements, in-place.
-- Any extra arguments are passed to the function.
-- @param fun A function that takes at least one argument
-- @param ... arbitrary extra arguments.
function List:transform (fun,...)
transform(fun,self,...)
end
--- apply a function to elements of two lists.
-- Any extra arguments will be passed to the function
-- @param fun a function of at least two arguments
-- @param ls another list
-- @param ... arbitrary extra arguments.
-- @return a new list: {f(x,y) for x in self, for x in arg1}
-- @see pl.tablex.imap2
function List:map2 (fun,ls,...)
return makelist(imap2(fun,self,ls,...),self)
end
--- apply a named method to all elements.
-- Any extra arguments will be passed to the method.
-- @param name name of method
-- @param ... extra arguments
-- @return a new list of the results
-- @see pl.seq.mapmethod
function List:mapm (name,...)
local res = {}
local t = self
for i = 1,#t do
local val = t[i]
local fn = val[name]
if not fn then error(type(val).." does not have method "..name,2) end
res[i] = fn(val,...)
end
return makelist(res,self)
end
--- 'reduce' a list using a binary function.
-- @param fun a function of two arguments
-- @return result of the function
-- @see pl.tablex.reduce
function List:reduce (fun)
return reduce(fun,self)
end
--- partition a list using a classifier function.
-- The function may return nil, but this will be converted to the string key '<nil>'.
-- @param fun a function of at least one argument
-- @param ... will also be passed to the function
-- @return a table where the keys are the returned values, and the values are Lists
-- of values where the function returned that key. It is given the type of Multimap.
-- @see pl.MultiMap
function List:partition (fun,...)
fun = function_arg(1,fun)
local res = {}
for i = 1,#self do
local val = self[i]
local klass = fun(val,...)
if klass == nil then klass = '<nil>' end
if not res[klass] then res[klass] = List() end
res[klass]:append(val)
end
return setmetatable(res,Multimap)
end
--- return an iterator over all values.
function List:iter ()
return iter(self)
end
--- Create an iterator over a seqence.
-- This captures the Python concept of 'sequence'.
-- For tables, iterates over all values with integer indices.
-- @param seq a sequence; a string (over characters), a table, a file object (over lines) or an iterator function
-- @usage for x in iterate {1,10,22,55} do io.write(x,',') end ==> 1,10,22,55
-- @usage for ch in iterate 'help' do do io.write(ch,' ') end ==> h e l p
function List.iterate(seq)
if type(seq) == 'string' then
local idx = 0
local n = #seq
local sub = string.sub
return function ()
idx = idx + 1
if idx > n then return nil
else
return sub(seq,idx,idx)
end
end
elseif type(seq) == 'table' then
local idx = 0
local n = #seq
return function()
idx = idx + 1
if idx > n then return nil
else
return seq[idx]
end
end
elseif type(seq) == 'function' then
return seq
elseif type(seq) == 'userdata' and io.type(seq) == 'file' then
return seq:lines()
end
end
iter = List.iterate
return List

View File

@ -0,0 +1,108 @@
--- A Map class.
-- @class module
-- @name pl.Map
--[[
module ('pl.Map')
]]
local tablex = require 'pl.tablex'
local utils = require 'pl.utils'
local stdmt = utils.stdmt
local is_callable = utils.is_callable
local tmakeset,deepcompare,merge,keys,difference,tupdate = tablex.makeset,tablex.deepcompare,tablex.merge,tablex.keys,tablex.difference,tablex.update
local pretty_write = require 'pl.pretty' . write
local Map = stdmt.Map
local Set = stdmt.Set
local List = stdmt.List
local class = require 'pl.class'
-- the Map class ---------------------
class(nil,nil,Map)
local function makemap (m)
return setmetatable(m,Map)
end
function Map:_init (t)
local mt = getmetatable(t)
if mt == Set or mt == Map then
self:update(t)
else
return t -- otherwise assumed to be a map-like table
end
end
local function makelist(t)
return setmetatable(t,List)
end
--- list of keys.
Map.keys = tablex.keys
--- list of values.
Map.values = tablex.values
--- return an iterator over all key-value pairs.
function Map:iter ()
return pairs(self)
end
--- return a List of all key-value pairs, sorted by the keys.
function Map:items()
local ls = makelist(tablex.pairmap (function (k,v) return makelist {k,v} end, self))
ls:sort(function(t1,t2) return t1[1] < t2[1] end)
return ls
end
-- Will return the existing value, or if it doesn't exist it will set
-- a default value and return it.
function Map:setdefault(key, defaultval)
return self[key] or self:set(key,defaultval) or defaultval
end
--- size of map.
-- note: this is a relatively expensive operation!
-- @class function
-- @name Map:len
Map.len = tablex.size
--- put a value into the map.
-- @param key the key
-- @param val the value
function Map:set (key,val)
self[key] = val
end
--- get a value from the map.
-- @param key the key
-- @return the value, or nil if not found.
function Map:get (key)
return rawget(self,key)
end
local index_by = tablex.index_by
-- get a list of values indexed by a list of keys.
-- @param keys a list-like table of keys
-- @return a new list
function Map:getvalues (keys)
return makelist(index_by(self,keys))
end
Map.iter = pairs
Map.update = tablex.update
function Map:__eq (m)
-- note we explicitly ask deepcompare _not_ to use __eq!
return deepcompare(self,m,true)
end
function Map:__tostring ()
return pretty_write(self,'')
end
return Map

View File

@ -0,0 +1,65 @@
--- MultiMap, a Map which has multiple values per key. <br>
-- @class module
-- @name pl.MultiMap
--[[
module ('pl.MultiMap')
]]
local classes = require 'pl.class'
local tablex = require 'pl.tablex'
local utils = require 'pl.utils'
local List = require 'pl.List'
local index_by,tsort,concat = tablex.index_by,table.sort,table.concat
local append,extend,slice = List.append,List.extend,List.slice
local append = table.insert
local is_type = utils.is_type
local class = require 'pl.class'
local Map = require 'pl.Map'
-- MultiMap is a standard MT
local MultiMap = utils.stdmt.MultiMap
class(Map,nil,MultiMap)
MultiMap._name = 'MultiMap'
function MultiMap:_init (t)
if not t then return end
self:update(t)
end
--- update a MultiMap using a table.
-- @param t either a Multimap or a map-like table.
-- @return the map
function MultiMap:update (t)
utils.assert_arg(1,t,'table')
if Map:class_of(t) then
for k,v in pairs(t) do
self[k] = List()
self[k]:append(v)
end
else
for k,v in pairs(t) do
self[k] = List(v)
end
end
end
--- add a new value to a key. Setting a nil value removes the key.
-- @param key the key
-- @param val the value
-- @return the map
function MultiMap:set (key,val)
if val == nil then
self[key] = nil
else
if not self[key] then
self[key] = List()
end
self[key]:append(val)
end
end
return MultiMap

Some files were not shown because too many files have changed in this diff Show More