Merge branch 'master' into funkyfranky

This commit is contained in:
funkyfranky 2017-10-04 18:14:23 +02:00
commit 2d6b74ee9e
185 changed files with 36261 additions and 888 deletions

View File

@ -3,7 +3,7 @@
<listAttribute key="org.eclipse.debug.ui.favoriteGroups">
<listEntry value="org.eclipse.ui.externaltools.launchGroup"/>
</listAttribute>
<stringAttribute key="org.eclipse.ui.externaltools.ATTR_LOCATION" value="${workspace_loc:/Moose_Framework/Utils/luarocks/lua5.1.exe}"/>
<stringAttribute key="org.eclipse.ui.externaltools.ATTR_LOCATION" value="${workspace_loc:/Moose_Framework/Utils/Generate_Moose.bat}"/>
<stringAttribute key="org.eclipse.ui.externaltools.ATTR_TOOL_ARGUMENTS" value="&quot;Moose_Create.lua&quot; &#13;&#10;&quot;D&quot;&#13;&#10;&quot;${current_date}&quot; &#13;&#10;&quot;${workspace_loc:/Moose_Framework//Moose Development/Moose}&quot; &#13;&#10;&quot;${workspace_loc:/Moose_Framework/Moose Mission Setup}&quot;"/>
<stringAttribute key="org.eclipse.ui.externaltools.ATTR_WORKING_DIRECTORY" value="${workspace_loc:/Moose_Framework/Moose Mission Setup}"/>
</launchConfiguration>

View File

@ -3,7 +3,7 @@
<listAttribute key="org.eclipse.debug.ui.favoriteGroups">
<listEntry value="org.eclipse.ui.externaltools.launchGroup"/>
</listAttribute>
<stringAttribute key="org.eclipse.ui.externaltools.ATTR_LOCATION" value="${workspace_loc:/Moose_Framework/Utils/luarocks/lua5.1.exe}"/>
<stringAttribute key="org.eclipse.ui.externaltools.ATTR_LOCATION" value="${workspace_loc:/Moose_Framework/Utils/Generate_Moose.bat}"/>
<stringAttribute key="org.eclipse.ui.externaltools.ATTR_TOOL_ARGUMENTS" value="&quot;Moose_Create.lua&quot; &#13;&#10;&quot;S&quot;&#13;&#10;&quot;${current_date}&quot; &#13;&#10;&quot;${workspace_loc:/Moose_Framework//Moose Development/Moose}&quot; &#13;&#10;&quot;${workspace_loc:/Moose_Framework/Moose Mission Setup}&quot;"/>
<stringAttribute key="org.eclipse.ui.externaltools.ATTR_WORKING_DIRECTORY" value="${workspace_loc:/Moose_Framework/Moose Mission Setup}"/>
</launchConfiguration>

View File

@ -466,7 +466,6 @@ function AI_A2A:onafterStatus()
else
self:E( self.Controllable:GetName() .. " is out of fuel: " .. Fuel .. " ... RTB!" )
local OldAIControllable = self.Controllable
local AIControllableTemplate = self.Controllable:GetTemplate()
local OrbitTask = OldAIControllable:TaskOrbitCircle( math.random( self.PatrolFloorAltitude, self.PatrolCeilingAltitude ), self.PatrolMinSpeed )
local TimedOrbitTask = OldAIControllable:TaskControlled( OrbitTask, OldAIControllable:TaskCondition(nil,nil,nil,nil,self.PatrolOutOfFuelOrbitTime,nil ) )

View File

@ -2774,7 +2774,9 @@ do -- AI_A2A_DISPATCHER
self:F( { Grouping = DefenderGrouping, SquadronGrouping = DefenderSquadron.Grouping, DefaultGrouping = self.DefenderDefault.Grouping } )
self:F( { DefendersCount = DefenderCount, DefendersNeeded = DefendersNeeded } )
if DefendersNeeded > DefenderSquadron.Resources then
-- DefenderSquadron.Resources can have the value nil, which expresses unlimited resources.
-- DefendersNeeded cannot exceed DefenderSquadron.Resources!
if DefenderSquadron.Resources and DefendersNeeded > DefenderSquadron.Resources then
DefendersNeeded = DefenderSquadron.Resources
BreakLoop = true
end

View File

@ -834,7 +834,6 @@ function AI_PATROL_ZONE:onafterStatus()
if Fuel < self.PatrolFuelThresholdPercentage then
self:E( self.Controllable:GetName() .. " is out of fuel:" .. Fuel .. ", RTB!" )
local OldAIControllable = self.Controllable
local AIControllableTemplate = self.Controllable:GetTemplate()
local OrbitTask = OldAIControllable:TaskOrbitCircle( math.random( self.PatrolFloorAltitude, self.PatrolCeilingAltitude ), self.PatrolMinSpeed )
local TimedOrbitTask = OldAIControllable:TaskControlled( OrbitTask, OldAIControllable:TaskCondition(nil,nil,nil,nil,self.PatrolOutOfFuelOrbitTime,nil ) )

View File

@ -198,13 +198,17 @@ BASE = {
ClassID = 0,
Events = {},
States = {},
_ = {},
}
--- @field #BASE.__
BASE.__ = {}
--- @field #BASE._
BASE._ = {
Schedules = {} --- Contains the Schedulers Active
}
--- The Formation Class
-- @type FORMATION
-- @field Cone A cone formation.
@ -654,6 +658,86 @@ function BASE:onEvent(event)
end
end
do -- Scheduling
--- Schedule a new time event. Note that the schedule will only take place if the scheduler is *started*. Even for a single schedule event, the scheduler needs to be started also.
-- @param #BASE self
-- @param #number Start Specifies the amount of seconds that will be waited before the scheduling is started, and the event function is called.
-- @param #function SchedulerFunction The event function to be called when a timer event occurs. The event function needs to accept the parameters specified in SchedulerArguments.
-- @param #table ... Optional arguments that can be given as part of scheduler. The arguments need to be given as a table { param1, param 2, ... }.
-- @return #number The ScheduleID of the planned schedule.
function BASE:ScheduleOnce( Start, SchedulerFunction, ... )
self:F2( { Start } )
self:T3( { ... } )
local ObjectName = "-"
ObjectName = self.ClassName .. self.ClassID
self:F3( { "ScheduleOnce: ", ObjectName, Start } )
self.SchedulerObject = self
local ScheduleID = _SCHEDULEDISPATCHER:AddSchedule(
self,
SchedulerFunction,
{ ... },
Start,
nil,
nil,
nil
)
self._.Schedules[#self.Schedules+1] = ScheduleID
return self._.Schedules
end
--- Schedule a new time event. Note that the schedule will only take place if the scheduler is *started*. Even for a single schedule event, the scheduler needs to be started also.
-- @param #BASE self
-- @param #number Start Specifies the amount of seconds that will be waited before the scheduling is started, and the event function is called.
-- @param #number Repeat Specifies the interval in seconds when the scheduler will call the event function.
-- @param #number RandomizeFactor Specifies a randomization factor between 0 and 1 to randomize the Repeat.
-- @param #number Stop Specifies the amount of seconds when the scheduler will be stopped.
-- @param #function SchedulerFunction The event function to be called when a timer event occurs. The event function needs to accept the parameters specified in SchedulerArguments.
-- @param #table ... Optional arguments that can be given as part of scheduler. The arguments need to be given as a table { param1, param 2, ... }.
-- @return #number The ScheduleID of the planned schedule.
function BASE:ScheduleRepeat( Start, Repeat, RandomizeFactor, Stop, SchedulerFunction, ... )
self:F2( { Start } )
self:T3( { ... } )
local ObjectName = "-"
ObjectName = self.ClassName .. self.ClassID
self:F3( { "ScheduleRepeat: ", ObjectName, Start, Repeat, RandomizeFactor, Stop } )
self.SchedulerObject = self
local ScheduleID = _SCHEDULEDISPATCHER:AddSchedule(
self,
SchedulerFunction,
{ ... },
Start,
Repeat,
RandomizeFactor,
Stop
)
self._.Schedules[SchedulerFunction] = ScheduleID
return self._.Schedules
end
--- Stops the Schedule.
-- @param #BASE self
-- @param #function SchedulerFunction The event function to be called when a timer event occurs. The event function needs to accept the parameters specified in SchedulerArguments.
function BASE:ScheduleStop( SchedulerFunction )
self:F3( { "ScheduleStop:" } )
_SCHEDULEDISPATCHER:Stop( self, self._.Schedules[SchedulerFunction] )
end
end
--- Set a state or property of the Object given a Key and a Value.
-- Note that if the Object is destroyed, nillified or garbage collected, then the Values and Keys will also be gone.
-- @param #BASE self

View File

@ -834,6 +834,39 @@ do -- COORDINATE
end
--- Returns if a Coordinate is in a certain Radius of this Coordinate in 2D plane using the X and Z axis.
-- @param #COORDINATE self
-- @param #COORDINATE ToCoordinate The coordinate that will be tested if it is in the radius of this coordinate.
-- @param #number Radius The radius of the circle on the 2D plane around this coordinate.
-- @return #boolean true if in the Radius.
function COORDINATE:IsInRadius( Coordinate, Radius )
local InVec2 = self:GetVec2()
local Vec2 = Coordinate:GetVec2()
local InRadius = UTILS.IsInRadius( InVec2, Vec2, Radius)
return InRadius
end
--- Returns if a Coordinate is in a certain radius of this Coordinate in 3D space using the X, Y and Z axis.
-- So Radius defines the radius of the a Sphere in 3D space around this coordinate.
-- @param #COORDINATE self
-- @param #COORDINATE ToCoordinate The coordinate that will be tested if it is in the radius of this coordinate.
-- @param #number Radius The radius of the sphere in the 3D space around this coordinate.
-- @return #boolean true if in the Sphere.
function COORDINATE:IsInSphere( Coordinate, Radius )
local InVec3 = self:GetVec3()
local Vec3 = Coordinate:GetVec3()
local InSphere = UTILS.IsInSphere( InVec3, Vec3, Radius)
return InSphere
end
--- Return a BR string from a COORDINATE to the COORDINATE.
-- @param #COORDINATE self
-- @param #COORDINATE TargetCoordinate The target COORDINATE.

View File

@ -181,8 +181,21 @@ function AIRBASEPOLICE_BASE:_AirbaseMonitor()
Client:Message( "You are speeding on the taxiway! Slow down or you will be removed from this airbase! Your current velocity is " .. string.format( "%2.0f km/h", Velocity ), 5, "Warning " .. SpeedingWarnings .. " / 3" )
Client:SetState( self, "Warnings", SpeedingWarnings + 1 )
else
MESSAGE:New( "Player " .. Client:GetPlayerName() .. " has been removed from the airbase, due to a speeding violation ...", 10, "Airbase Police" ):ToAll()
Client:Destroy()
MESSAGE:New( "Player " .. Client:GetPlayerName() .. " is being damaged at the airbase, due to a speeding violation ...", 10, "Airbase Police" ):ToAll()
--- @param Wrapper.Client#CLIENT Client
local function DestroyUntilHeavilyDamaged( Client )
local ClientCoord = Client:GetCoordinate()
ClientCoord:Explosion( 100 )
local Damage = Client:GetLife()
local InitialLife = Client:GetLife0()
MESSAGE:New( "Player " .. Client:GetPlayerName() .. " Damage ... " .. Damage, 5, "Airbase Police" ):ToAll()
if ( Damage / InitialLife ) * 100 < 80 then
Client:ScheduleStop( DestroyUntilHeavilyDamaged )
end
end
Client:ScheduleOnce( 1, DestroyUntilHeavilyDamaged, Client )
--Client:ScheduleRepeat( 1, 1, 0, nil, DestroyUntilHeavilyDamaged, Client )
--Client:Destroy()
trigger.action.setUserFlag( "AIRCRAFT_"..Client:GetID(), 100)
Client:SetState( self, "Speeding", false )
Client:SetState( self, "Warnings", 0 )

View File

@ -442,7 +442,7 @@ function MISSILETRAINER._MenuMessages( MenuParameters )
if MenuParameters.Distance ~= nil then
self.Distance = MenuParameters.Distance
MESSAGE:New( "Hit detection distance set to " .. self.Distance * 1000 .. " meters", 15, "Menu" ):ToAll()
MESSAGE:New( "Hit detection distance set to " .. ( self.Distance * 1000 ) .. " meters", 15, "Menu" ):ToAll()
end
end

View File

@ -405,3 +405,20 @@ function UTILS.GetMarkID()
return UTILS._MarkID
end
-- Test if a Vec2 is in a radius of another Vec2
function UTILS.IsInRadius( InVec2, Vec2, Radius )
local InRadius = ( ( InVec2.x - Vec2.x ) ^2 + ( InVec2.y - Vec2.y ) ^2 ) ^ 0.5 <= Radius
return InRadius
end
-- Test if a Vec3 is in the sphere of another Vec3
function UTILS.IsInSphere( InVec3, Vec3, Radius )
local InSphere = ( ( InVec3.x - Vec3.x ) ^2 + ( InVec3.y - Vec3.y ) ^2 + ( InVec3.z - Vec3.z ) ^2 ) ^ 0.5 <= Radius
return InSphere
end

View File

@ -384,7 +384,7 @@ function CONTROLLABLE:SetTask( DCSTask, WaitTime )
end
if not WaitTime or WaitTime == 0 then
SetTask( DCSTask )
SetTask( self, DCSTask )
else
self.TaskScheduler:Schedule( self, SetTask, { DCSTask }, WaitTime )
end
@ -1649,7 +1649,7 @@ do -- Patrol methods
self:E( { PatrolGroup = PatrolGroup:GetName() } )
if PatrolGroup:IsGround() then
if PatrolGroup:IsGround() or PatrolGroup:IsShip() then
local Waypoints = PatrolGroup:GetTemplateRoutePoints()
@ -1673,7 +1673,7 @@ do -- Patrol methods
-- A random waypoint will be picked and the group will move towards that point.
-- @param #CONTROLLABLE self
-- @return #CONTROLLABLE
function CONTROLLABLE:PatrolRouteRandom( Speed, Formation )
function CONTROLLABLE:PatrolRouteRandom( Speed, Formation, ToWaypoint )
local PatrolGroup = self -- Wrapper.Group#GROUP
@ -1683,30 +1683,40 @@ do -- Patrol methods
self:E( { PatrolGroup = PatrolGroup:GetName() } )
if PatrolGroup:IsGround() then
if PatrolGroup:IsGround() or PatrolGroup:IsShip() then
local Waypoints = PatrolGroup:GetTemplateRoutePoints()
local WaypointNumber = math.random( 1, #Waypoints )
self:E( { WaypointNumber = WaypointNumber } )
local Waypoint = Waypoints[WaypointNumber] -- Select random waypoint.
-- Calculate the new Route.
local FromCoord = PatrolGroup:GetCoordinate()
local FromWaypoint = 1
if ToWaypoint then
FromWaypoint = ToWaypoint
end
-- Select a random Zone and get the Coordinate of the new Zone.
local ToCoord = COORDINATE:NewFromVec2( { x = Waypoint.x + 10, y = Waypoint.y + 10 } )
-- Loop until a waypoint has been found that is not the same as the current waypoint.
-- Otherwise the object zon't move or drive in circles and the algorithm would not do exactly
-- what it is supposed to do, which is making groups drive around.
local ToWaypoint
repeat
-- Select a random waypoint and check if it is not the same waypoint as where the object is about.
ToWaypoint = math.random( 1, #Waypoints )
until( ToWaypoint ~= FromWaypoint )
self:E( { FromWaypoint = FromWaypoint, ToWaypoint = ToWaypoint } )
local Waypoint = Waypoints[ToWaypoint] -- Select random waypoint.
local ToCoord = COORDINATE:NewFromVec2( { x = Waypoint.x, y = Waypoint.y } )
-- Create a "ground route point", which is a "point" structure that can be given as a parameter to a Task
local Route = {}
Route[#Route+1] = FromCoord:WaypointGround( 0 )
Route[#Route+1] = ToCoord:WaypointGround( Speed, Formation )
local TaskRouteToZone = PatrolGroup:TaskFunction( "CONTROLLABLE.PatrolRouteRandom", Speed, Formation )
local TaskRouteToZone = PatrolGroup:TaskFunction( "CONTROLLABLE.PatrolRouteRandom", Speed, Formation, ToWaypoint )
PatrolGroup:SetTaskWaypoint( Route[#Route], TaskRouteToZone ) -- Set for the given Route at Waypoint 2 the TaskRouteToZone.
PatrolGroup:Route( Route, 2 ) -- Move after a random seconds to the Route. See the Route method for details.
PatrolGroup:Route( Route, 1 ) -- Move after a random seconds to the Route. See the Route method for details.
end
end
@ -1728,7 +1738,7 @@ do -- Patrol methods
self:E( { PatrolGroup = PatrolGroup:GetName() } )
if PatrolGroup:IsGround() then
if PatrolGroup:IsGround() or PatrolGroup:IsShip() then
local Waypoints = PatrolGroup:GetTemplateRoutePoints()
local Waypoint = Waypoints[math.random( 1, #Waypoints )] -- Select random waypoint.
@ -1750,7 +1760,7 @@ do -- Patrol methods
PatrolGroup:SetTaskWaypoint( Route[#Route], TaskRouteToZone ) -- Set for the given Route at Waypoint 2 the TaskRouteToZone.
PatrolGroup:Route( Route, 2 ) -- Move after a random seconds to the Route. See the Route method for details.
PatrolGroup:Route( Route, 1 ) -- Move after a random seconds to the Route. See the Route method for details.
end
end

View File

@ -938,7 +938,7 @@ end
-- @return #table
function GROUP:GetTemplate()
local GroupName = self:GetName()
return _DATABASE:GetGroupTemplate( GroupName )
return UTILS.DeepCopy( _DATABASE:GetGroupTemplate( GroupName ) )
end
--- Returns the group template route.points[] (the waypoints) from the @{DATABASE} (_DATABASE object).
@ -946,7 +946,7 @@ end
-- @return #table
function GROUP:GetTemplateRoutePoints()
local GroupName = self:GetName()
return _DATABASE:GetGroupTemplate( GroupName ).route.points
return UTILS.DeepCopy( _DATABASE:GetGroupTemplate( GroupName ).route.points )
end

View File

@ -1,5 +1,5 @@
env.info( '*** MOOSE DYNAMIC INCLUDE START *** ' )
env.info( 'Moose Generation Timestamp: 20170924_2152' )
env.info( 'Moose Generation Timestamp: 20171003_1348' )
local base = _G

View File

@ -0,0 +1,85 @@
env.info('*** MOOSE DYNAMIC INCLUDE START *** ')
env.info('Moose Generation Timestamp: 20171003_1348')
local base=_G
__Moose={}
__Moose.Include=function(IncludeFile)
if not __Moose.Includes[IncludeFile]then
__Moose.Includes[IncludeFile]=IncludeFile
local f=assert(base.loadfile(__Moose.ProgramPath..IncludeFile))
if f==nil then
error("Moose: Could not load Moose file "..IncludeFile)
else
env.info("Moose: "..IncludeFile.." dynamically loaded from "..__Moose.ProgramPath)
return f()
end
end
end
__Moose.ProgramPath="Scripts/Moose/"
__Moose.Includes={}
__Moose.Include('Utilities/Routines.lua')
__Moose.Include('Utilities/Utils.lua')
__Moose.Include('Core/Base.lua')
__Moose.Include('Core/Report.lua')
__Moose.Include('Core/Scheduler.lua')
__Moose.Include('Core/ScheduleDispatcher.lua')
__Moose.Include('Core/Event.lua')
__Moose.Include('Core/Settings.lua')
__Moose.Include('Core/Menu.lua')
__Moose.Include('Core/Zone.lua')
__Moose.Include('Core/Database.lua')
__Moose.Include('Core/Set.lua')
__Moose.Include('Core/Point.lua')
__Moose.Include('Core/Message.lua')
__Moose.Include('Core/Fsm.lua')
__Moose.Include('Core/Radio.lua')
__Moose.Include('Core/SpawnStatic.lua')
__Moose.Include('Core/Cargo.lua')
__Moose.Include('Core/Spot.lua')
__Moose.Include('Wrapper/Object.lua')
__Moose.Include('Wrapper/Identifiable.lua')
__Moose.Include('Wrapper/Positionable.lua')
__Moose.Include('Wrapper/Controllable.lua')
__Moose.Include('Wrapper/Group.lua')
__Moose.Include('Wrapper/Unit.lua')
__Moose.Include('Wrapper/Client.lua')
__Moose.Include('Wrapper/Static.lua')
__Moose.Include('Wrapper/Airbase.lua')
__Moose.Include('Wrapper/Scenery.lua')
__Moose.Include('Functional/Scoring.lua')
__Moose.Include('Functional/CleanUp.lua')
__Moose.Include('Functional/Spawn.lua')
__Moose.Include('Functional/Movement.lua')
__Moose.Include('Functional/Sead.lua')
__Moose.Include('Functional/Escort.lua')
__Moose.Include('Functional/MissileTrainer.lua')
__Moose.Include('Functional/AirbasePolice.lua')
__Moose.Include('Functional/Detection.lua')
__Moose.Include('Functional/Designate.lua')
__Moose.Include('Functional/RAT.lua')
__Moose.Include('AI/AI_Balancer.lua')
__Moose.Include('AI/AI_A2A.lua')
__Moose.Include('AI/AI_A2A_Patrol.lua')
__Moose.Include('AI/AI_A2A_Cap.lua')
__Moose.Include('AI/AI_A2A_Gci.lua')
__Moose.Include('AI/AI_A2A_Dispatcher.lua')
__Moose.Include('AI/AI_Patrol.lua')
__Moose.Include('AI/AI_Cap.lua')
__Moose.Include('AI/AI_Cas.lua')
__Moose.Include('AI/AI_Bai.lua')
__Moose.Include('AI/AI_Formation.lua')
__Moose.Include('Actions/Act_Assign.lua')
__Moose.Include('Actions/Act_Route.lua')
__Moose.Include('Actions/Act_Account.lua')
__Moose.Include('Actions/Act_Assist.lua')
__Moose.Include('Tasking/CommandCenter.lua')
__Moose.Include('Tasking/Mission.lua')
__Moose.Include('Tasking/Task.lua')
__Moose.Include('Tasking/DetectionManager.lua')
__Moose.Include('Tasking/Task_A2G_Dispatcher.lua')
__Moose.Include('Tasking/Task_A2G.lua')
__Moose.Include('Tasking/Task_A2A_Dispatcher.lua')
__Moose.Include('Tasking/Task_A2A.lua')
__Moose.Include('Tasking/Task_Cargo.lua')
__Moose.Include('Moose.lua')
BASE:TraceOnOff(true)
env.info('*** MOOSE INCLUDE END *** ')

View File

@ -1,362 +0,0 @@
[SIZE=7]MOOSE Release 2.1.0[/SIZE]
Finally it is here, release 2.1.0 of MOOSE!
It took some time to prepare this release, as it was a lot of work to get the building blocks of the framework developed and tested. You'll find in this release a lot of new features as well as a couple of important bug fixes.
Release 2.1.0 is now published into the [B]master-release-2.1[/B] branch of this repository on github.
You can download the file moose.lua below to use MOOSE in your missions.
The moose.lua file is also located [URL="https://github.com/FlightControl-Master/MOOSE/blob/master-release-2.1/Moose%20Mission%20Setup/Moose.lua"]here[/URL] in the [B]master-release-2.1[/B] branch.
Those who are using the [B]master[/B] branch can continue to beta test, as new bleeding edge features will be added soon in preparation for release 2.2.0! There are many topics on the agenda to be added.
[B]This release would not have been possible without the help and contribution of many members of this community. THANK YOU![/B]
[SIZE=6]In summary:[/SIZE]
This release brings you [B]an improved tasking mechanism[/B].
Tasking is the system in MOOSE that allows to:
* Execute [B]co-op[/B] missions and tasks
* [B]Detect[/B] targets dynamically
* Define new tasks [B]dynamically[/B]
* Execute the tasks
* Complete the mission [B]goals[/B]
* Extensive menu system and briefings/reports for [B]player interaction[/B]
* Improved Scoring of mission goal achievements, and task achievements.
On top, release brings you new functionality by the introduction of new classes to:
* [B]Designate targets[/B] (lase, smoke or illuminate targets) by AI, assisting your attack. Allows to drop laser guides bombs.
* A new [B]tasking[/B] system to [B]transport cargo[/B] of various types
* Dynamically [B]spawn static objects[/B]
* Improved [B]coordinate system[/B]
* Build [B]large formations[/B], like bombers flying to a target area
[SIZE=6]1. TASKING SYSTEM![/SIZE]
A lot of work has been done in improving the tasking framework within MOOSE.
**The tasking system comes with TASK DISPATCHING mechanisms, that DYNAMICALLY
allocate new tasks based on the tactical or strategical situation in the mission!!!
These tasks can then be engaged upon by the players!!!**
The [URL="http://flightcontrol-master.github.io/MOOSE/Documentation/Task_A2G_Dispatcher.html"]TASK_A2G_DISPATCHER[/URL] class implements the dynamic dispatching of tasks upon groups of detected units determined a Set of FAC (groups). The FAC will detect units, will group them, and will dispatch Tasks to groups of players. Depending on the type of target detected, different tasks will be dispatched. Find a summary below describing for which situation a task type is created:
* [B]CAS Task[/B]: Is created when there are enemy ground units within range of the FAC, while there are friendly units in the FAC perimeter.
* [B]BAI Task[/B]: Is created when there are enemy ground units within range of the FAC, while there are NO other friendly units within the FAC perimeter.
* [B]SEAD Task[/B]: Is created when there are enemy ground units wihtin range of the FAC, with air search radars.
More TASK_... dispatcher classes are to come in the future, like A2A, G2G, etc...
Improvements on the TASKING are in summary:
* A COMMANDCENTER has a dedicated menu.
* A MISSION has a dedicated menu system.
* A MISSION has a briefing report.
* A MISSION has dedicated status reports.
* A MISSION has for each TASK TYPE a menu.
* A MISSION has for each TASK TYPE a dedicated menu system for each TASK defined.
* A MISSION has an "assigned" task menu that contains menu actions relevant to the assigned task.
* A TASK (of various types) has a dedicated menu system.
* A TASK has a briefing report.
* A TASK has dedicated status reports.
* Player reports can be retrieved that explain which player is at which task.
* ...
TASKING is vast, and at the moment there is too much to explain.
[B]The best way to explore the TASKING is to TRY it...[/B]
I suggest you have a look at the [URL="https://www.youtube.com/watch?v=v2Us8SS1-44&t=1070s"]GORI Valley Mission - Iteration 3[/URL].
Many people have contributed in the testing of the mechanism, especially:
@baluballa, @doom, @whiplash
[SIZE=6]2. New MOOSE classes have been added.[/SIZE]
MOOSE 2.1.0 comes with new classes that extends the functionality of the MOOSE framework and allow you to do new things in your missions:
[SIZE=5]2.1. Target designation by laser, smoke or illumination.[/SIZE]
[URL="http://flightcontrol-master.github.io/MOOSE/Documentation/Designate.html"]DESIGNATE[/URL] is orchestrating the designation of potential targets executed by a Recce group,
and communicates these to a dedicated attacking group of players,
so that following a dynamically generated menu system,
each detected set of potential targets can be lased or smoked...
Targets can be:
* [B]Lased[/B] for a period of time.
* [B]Smoked[/B]. Artillery or airplanes with Illuminatino ordonance need to be present. (WIP, but early demo ready.)
* [B]Illuminated[/B] through an illumination bomb. Artillery or airplanes with Illuminatino ordonance need to be present. (WIP, but early demo ready.
This class was made with the help of @EasyEB and many others.
[URL="https://www.youtube.com/playlist?list=PL7ZUrU4zZUl0dQ9UKQMb7YL8z2sKSqemH"]DESIGNATE is demonstrated on youtube[/URL]
DESIGNATE demonstration missions:
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master-release-2.1/DES%20-%20Designation"]DES - Designation[/URL]
[SIZE=5]2.2. Transport cargo of different types to various locations as a human task within a mission.[/SIZE]
The Moose framework provides various CARGO classes that allow DCS physical or logical objects to be transported or sling loaded by Carriers.
The CARGO_ classes, as part of the moose core, are able to Board, Load, UnBoard and UnLoad cargo between Carrier units.
This collection of classes in this module define tasks for human players to handle these cargo objects.
Cargo can be transported, picked-up, deployed and sling-loaded from and to other places.
[URL="http://flightcontrol-master.github.io/MOOSE/Documentation/Task_Cargo.html#TASK_CARGO_TRANSPORT"]TASK_CARGO_TRANSPORT[/URL] defines a task for a human player to transport a set of cargo between various zones.
It is the first class that forms part of the TASK_CARGO classes suite.
The TASK_CARGO classes provide you with a flexible tasking sytem,
that allows you to transport cargo of various types between various locations
and various dedicated deployment zones.
A human player can join the battle field in a client airborne slot or a ground vehicle within the CA module (ALT-J).
The player needs to accept the task from the task overview list within the mission, using the radio menus.
Once the TASK_CARGO_TRANSPORT is assigned to the player and accepted by the player, the player will obtain
an extra [B]Cargo Handling Radio Menu[/B] that contains the CARGO objects that need to be transported.
Cargo can be transported towards different [B]Deployment Zones[/B], but can also be deployed anywhere within the battle field.
The Cargo Handling Radio Menu system allows to execute [B]various actions[/B] to handle the cargo.
In the menu, you'll find for each CARGO, that is part of the scope of the task, various actions that can be completed.
Depending on the location of your Carrier unit, the menu options will vary.
The [URL="http://flightcontrol-master.github.io/MOOSE/Documentation/Cargo.html#CARGO_GROUP"]CARGO_GROUP[/URL] class defines a
cargo that is represented by a GROUP object within the simulator, and can be transported by a carrier.
The [URL="http://flightcontrol-master.github.io/MOOSE/Documentation/Cargo.html#CARGO_UNIT"]CARGO_UNIT[/URL] class defines a
cargo that is represented by a UNIT object within the simulator, and can be transported by a carrier.
Mission designers can use the [URL="http://flightcontrol-master.github.io/MOOSE/Documentation/Set.html#SET_CARGO"]SET_CARGO[/URL]
class to build sets of cargos.
Note 1: [B]Various other CARGO classes are defined and are WIP[/B].
Now that the foundation for Cargo handling is getting form, future releases will bring other types of CARGO handling
classes to the MOOSE framework quickly. Sling-loading, package, beacon and other types of CARGO will be released soon.
Note 2: [B]AI_CARGO has been renamed to CARGO and now forms part of the Core or MOOSE[/B].
If you were using AI_CARGO in your missions, please rename AI_CARGO with CARGO...
TASK_TRANSPORT_CARGO is demonstrated at the [URL="https://www.youtube.com/watch?v=v2Us8SS1-44&t=1070s"]GORI Valley Mission - Iteration 4[/URL]
TASK_TRANSPORT_CARGO demonstration missions:
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master-release-2.1/TSK%20-%20Task%20Modelling/TSK-110%20-%20Ground%20-%20Transport%20Cargo%20Group"]TSK-110 - Ground - Transport Cargo Group[/URL]
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master-release-2.1/TSK%20-%20Task%20Modelling/TSK-210%20-%20Helicopter%20-%20Transport%20Cargo%20Group"]TSK-210 - Helicopter - Transport Cargo Group[/URL]
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master-release-2.1/TSK%20-%20Task%20Modelling/TSK-211%20-%20Helicopter%20-%20Transport%20Multiple%20Cargo%20Groups"]TSK-211 - Helicopter - Transport Multiple Cargo Groups[/URL]
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master-release-2.1/TSK%20-%20Task%20Modelling/TSK-212%20-%20Helicopter%20-%20Cargo%20handle%20PickedUp%20and%20Deployed%20events"]TSK-212 - Helicopter - Cargo handle PickedUp and Deployed events[/URL]
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master-release-2.1/TSK%20-%20Task%20Modelling/TSK-213%20-%20Helicopter%20-%20Cargo%20Group%20Destroyed"]TSK-213 - Helicopter - Cargo Group Destroyed[/URL]
[SIZE=5]2.3. Dynamically spawn STATIC objects into your mission.[/SIZE]
The [URL="http://flightcontrol-master.github.io/MOOSE/Documentation/SpawnStatic.html#SPAWNSTATIC"]SPAWNSTATIC[/URL] class allows to spawn dynamically new Statics.
By creating a copy of an existing static object template as defined in the Mission Editor (ME), SPAWNSTATIC can retireve the properties of the defined static object template (like type, category etc), and "copy" these properties to create a new static object and place it at the desired coordinate.
New spawned Statics get the same name as the name of the template Static, or gets the given name when a new name is provided at the Spawn method.
SPAWNSTATIC demonstration missions:
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master-release-2.1/SPS%20-%20Spawning%20Statics/SPS-100%20-%20Simple%20Spawning"]SPS-100 - Simple Spawning[/URL]
[SIZE=5]2.4. Better coordinate management in MGRS or LLor LLDecimal.[/SIZE]
The [URL="http://flightcontrol-master.github.io/MOOSE/Documentation/Point.html#COORDINATE"]COORDINATE[/URL] class
defines a 2D coordinate in the simulator. A COORDINATE can be expressed in LL or in MGRS.
[SIZE=5]2.5. Improved scoring system[/SIZE]
Scoring is implemented throught the [URL="http://flightcontrol-master.github.io/MOOSE/Documentation/Scoring.html"]SCORING[/URL] class.
The scoring system has been improved a lot! Now, the scoring is correctly counting scores on normal units, statics and scenary objects.
Specific scores can be registered for specific targets. The scoring works together with the tasking system, so players can achieve
additional scores when they achieve goals!
SCORING demonstration missions:
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/SCO%20-%20Scoring/SCO-100%20-%20Scoring%20of%20Statics"]SCO-100 - Scoring of Statics[/URL]
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/SCO%20-%20Scoring/SCO-101%20-%20Scoring%20Client%20to%20Client"]SCO-101 - Scoring Client to Client[/URL]
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/SCO%20-%20Scoring/SCO-500%20-%20Scoring%20Multi%20Player%20Demo%20Mission%201"]SCO-500 - Scoring Multi Player Demo Mission 1[/URL]
[SIZE=5]2.6. Beacons and Radio[/SIZE]
The Radio contains 2 classes : RADIO and BEACON
What are radio communications in DCS ?
* Radio transmissions consist of [B]sound files[/B] that are broadcasted on a specific [B]frequency[/B] (e.g. 115MHz) and [B]modulation[/B] (e.g. AM),
* They can be [B]subtitled[/B] for a specific [B]duration[/B], the [B]power[/B] in Watts of the transmiter's antenna can be set, and the transmission can be [B]looped[/B].
These classes are the work of @Grey-Echo.
RADIO and BEACON demonstration missions:
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/RAD%20-%20Radio/RAD-000%20-%20Transmission%20from%20Static"]RAD-000 - Transmission from Static[/URL]
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/RAD%20-%20Radio/RAD-001%20-%20Transmission%20from%20UNIT%20or%20GROUP"]RAD-001 - Transmission from UNIT or GROUP[/URL]
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/RAD%20-%20Radio/RAD-002%20-%20Transmission%20Tips%20and%20Tricks"]RAD-002 - Transmission Tips and Tricks[/URL]
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/RAD%20-%20Radio/RAD-010%20-%20Beacons"] RAD-010 - Beacons[/URL]
[SIZE=5]2.7. Build large formations of AI.[/SIZE]
[URL="http://flightcontrol-master.github.io/MOOSE/Documentation/AI_Formation.html"]AI_FORMATION[/URL] makes AI @{GROUP}s fly in formation of various compositions.
The AI_FORMATION class models formations in a different manner than the internal DCS formation logic!!!
The purpose of the class is to:
* Make formation building a process that can be managed while in flight, rather than a task.
* Human players can guide formations, consisting of larget planes.
* Build large formations (like a large bomber field).
* Form formations that DCS does not support off the shelve.
AI_FORMATION Demo Missions: [URL=""]FOR - AI Group Formation[/URL]
AI_FORMATION demonstration missions:
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/FOR%20-%20AI%20Group%20Formation/FOR-100%20-%20Bomber%20Left%20Line%20Formation"]FOR-100 - Bomber Left Line Formation[/URL]
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/FOR%20-%20AI%20Group%20Formation/FOR-101%20-%20Bomber%20Right%20Line%20Formation"]FOR-101 - Bomber Right Line Formation[/URL]
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/FOR%20-%20AI%20Group%20Formation/FOR-102%20-%20Bomber%20Left%20Wing%20Formation"]FOR-102 - Bomber Left Wing Formation[/URL]
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/FOR%20-%20AI%20Group%20Formation/FOR-103%20-%20Bomber%20Right%20Wing%20Formation"]FOR-103 - Bomber Right Wing Formation[/URL]
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/FOR%20-%20AI%20Group%20Formation/FOR-104%20-%20Bomber%20Center%20Wing%20Formation"]FOR-104 - Bomber Center Wing Formation[/URL]
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/FOR%20-%20AI%20Group%20Formation/FOR-105%20-%20Bomber%20Trail%20Formation"]FOR-105 - Bomber Trail Formation[/URL]
* [URL="https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/FOR%20-%20AI%20Group%20Formation/FOR-106%20-%20Bomber%20Box%20Formation"]FOR-106 - Bomber Box Formation[/URL]
Note: The AI_FORMATION is currently a first version showing the potential, a "building block". From this class, further classes will be derived and the class will be fine-tuned.
[SIZE=6]3. A lot of components have been reworked and bugs have been fixed.[/SIZE]
[SIZE=5]3.1. Better event handling and event dispatching.[/SIZE]
The underlying mechanisms to handle DCS events has been improved. Bugs have been fixed.
The MISSION_END event is now also supported.
[SIZE=5]2.2. Cargo handling has been made much better now.[/SIZE]
As a result, some of the WIP cargo classes that were defined earlier are still WIP.
But as mentioned earlier, new CARGO classes can be published faster now.
The framework is now more consistent internally.
[SIZE=6]3. A lot of new methods have been defined in several existing or new classes.[/SIZE]
AI_FORMATION:New( FollowUnit, FollowGroupSet, FollowName, FollowBriefing ) --R2.1
AI_FORMATION:TestSmokeDirectionVector( SmokeDirection ) --R2.1
AI_FORMATION:onafterFormationLine( FollowGroupSet, From , Event , To, XStart, XSpace, YStart, YSpace, ZStart, ZSpace ) --R2.1
AI_FORMATION:onafterFormationTrail( FollowGroupSet, From , Event , To, XStart, XSpace, YStart ) --R2.1
AI_FORMATION:onafterFormationStack( FollowGroupSet, From , Event , To, XStart, XSpace, YStart, YSpace ) --R2.1
AI_FORMATION:onafterFormationLeftLine( FollowGroupSet, From , Event , To, XStart, YStart, ZStart, ZSpace ) --R2.1
AI_FORMATION:onafterFormationRightLine( FollowGroupSet, From , Event , To, XStart, YStart, ZStart, ZSpace ) --R2.1
AI_FORMATION:onafterFormationLeftWing( FollowGroupSet, From , Event , To, XStart, XSpace, YStart, ZStart, ZSpace ) --R2.1
AI_FORMATION:onafterFormationRightWing( FollowGroupSet, From , Event , To, XStart, XSpace, YStart, ZStart, ZSpace ) --R2.1
AI_FORMATION:onafterFormationCenterWing( FollowGroupSet, From , Event , To, XStart, XSpace, YStart, YSpace, ZStart, ZSpace ) --R2.1
AI_FORMATION:onafterFormationVic( FollowGroupSet, From , Event , To, XStart, XSpace, YStart, YSpace, ZStart, ZSpace ) --R2.1
AI_FORMATION:onafterFormationBox( FollowGroupSet, From , Event , To, XStart, XSpace, YStart, YSpace, ZStart, ZSpace, ZLevels ) --R2.1
AI_FORMATION:SetFlightRandomization( FlightRandomization ) --R2.1
AI_FORMATION:onenterFollowing( FollowGroupSet ) --R2.1
CARGO:GetName()
CARGO:GetObjectName()
DATABASE:ForEachStatic( IteratorFunction, FinalizeFunction, ... )
EVENT:Reset( EventObject ) --R2.1
POINT_VEC3:IsLOS( ToPointVec3 ) --R2.1
COORDINATE:New( x, y, LandHeightAdd ) --R2.1 Fixes issue #424.
COORDINATE:NewFromVec2( Vec2, LandHeightAdd ) --R2.1 Fixes issue #424.
COORDINATE:NewFromVec3( Vec3 ) --R2.1 Fixes issue #424.
COORDINATE:ToStringLL( LL_Accuracy, LL_DMS ) --R2.1 Fixes issue #424.
COORDINATE:ToStringMGRS( MGRS_Accuracy ) --R2.1 Fixes issue #424.
COORDINATE:ToString() --R2.1 Fixes issue #424.
COORDINATE:CoordinateMenu( RootMenu ) --R2.1 Fixes issue #424.
COORDINATE:MenuSystem( System ) --R2.1 Fixes issue #424.
COORDINATE:MenuLL_Accuracy( LL_Accuracy ) --R2.1 Fixes issue #424.
COORDINATE:MenuLL_DMS( LL_DMS ) --R2.1 Fixes issue #424.
COORDINATE:MenuMGRS_Accuracy( MGRS_Accuracy ) --R2.1 Fixes issue #424.
SET_BASE:FilterDeads() --R2.1 allow deads to be filtered to automatically handle deads in the collection.
SET_BASE:FilterCrashes() --R2.1 allow crashes to be filtered to automatically handle crashes in the collection.
SET_UNIT:ForEachUnitPerThreatLevel( FromThreatLevel, ToThreatLevel, IteratorFunction, ... ) --R2.1 Threat Level implementation
SET_CARGO:New() --R2.1
SET_CARGO:AddCargosByName( AddCargoNames ) --R2.1
SET_CARGO:RemoveCargosByName( RemoveCargoNames ) --R2.1
SET_CARGO:FindCargo( CargoName ) --R2.1
SET_CARGO:FilterCoalitions( Coalitions ) --R2.1
SET_CARGO:FilterTypes( Types ) --R2.1
SET_CARGO:FilterCountries( Countries ) --R2.1
SET_CARGO:FilterPrefixes( Prefixes ) --R2.1
SET_CARGO:FilterStart() --R2.1
SET_CARGO:AddInDatabase( Event ) --R2.1
SET_CARGO:FindInDatabase( Event ) --R2.1
SET_CARGO:ForEachCargo( IteratorFunction, ... ) --R2.1
SET_CARGO:FindNearestCargoFromPointVec2( PointVec2 ) --R2.1
SET_CARGO:IsIncludeObject( MCargo ) --R2.1
SET_CARGO:OnEventNewCargo( EventData ) --R2.1
SET_CARGO:OnEventDeleteCargo( EventData ) --R2.1 SpawnStatic.lua (5 matches)
SPAWNSTATIC:NewFromStatic( SpawnTemplatePrefix, CountryID ) --R2.1
SPAWNSTATIC:NewFromType( SpawnTypeName, SpawnShapeName, SpawnCategory, CountryID ) --R2.1
SPAWNSTATIC:SpawnFromPointVec2( PointVec2, Heading, NewName ) --R2.1
SPAWNSTATIC:SpawnFromZone( Zone, Heading, NewName ) --R2.1
ZONE_BASE:GetCoordinate( Height ) --R2.1
DESIGNATE:SetFlashStatusMenu( FlashMenu ) --R2.1
DESIGNATE:SetLaserCodes( LaserCodes ) --R2.1
DESIGNATE:GenerateLaserCodes() --R2.1
DESIGNATE:SetAutoLase( AutoLase ) --R2.1
DESIGNATE:SetThreatLevelPrioritization( Prioritize ) --R2.1
DETECTION_BASE:CleanDetectionItems() --R2.1 Clean the DetectionItems list
DETECTION_BASE:GetDetectedItemID( Index ) --R2.1
DETECTION_BASE:GetDetectedID( Index ) --R2.1
DETECTION_AREAS:DetectedReportDetailed() --R2.1 Fixed missing report
REPORT:HasText() --R2.1
REPORT:SetIndent( Indent ) --R2.1
REPORT:AddIndent( Text ) --R2.1
MISSION:GetMenu( TaskGroup ) -- R2.1 -- Changed Menu Structure
TASK:SetMenu( MenuTime ) --R2.1 Mission Reports and Task Reports added. Fixes issue #424.
TASK:ReportSummary() --R2.1 fixed report. Now nicely formatted and contains the info required.
TASK:ReportOverview() --R2.1 fixed report. Now nicely formatted and contains the info required.
TASK:GetPlayerCount() --R2.1 Get a count of the players.
TASK:GetPlayerNames() --R2.1 Get a map of the players.
TASK:ReportDetails() --R2.1 fixed report. Now nicely formatted and contains the info required.
UTILS.tostringMGRS = function(MGRS, acc) --R2.1
POSITIONABLE:GetBoundingBox() --R2.1
POSITIONABLE:GetHeight() --R2.1
POSITIONABLE:GetMessageText( Message, Name ) --R2.1 added
POSITIONABLE:GetMessage( Message, Duration, Name ) --R2.1 changed callsign and name and using GetMessageText
POSITIONABLE:MessageToSetGroup( Message, Duration, MessageSetGroup, Name ) --R2.1
POSITIONABLE:GetRadio() --R2.1
POSITIONABLE:GetBeacon() --R2.1
POSITIONABLE:LaseUnit( Target, LaserCode, Duration ) --R2.1
POSITIONABLE:LaseOff() --R2.1
POSITIONABLE:IsLasing() --R2.1
POSITIONABLE:GetSpot() --R2.1
POSITIONABLE:GetLaserCode() --R2.1
UNIT:IsDetected( TargetUnit ) --R2.1
UNIT:IsLOS( TargetUnit ) --R2.1

View File

@ -1,363 +0,0 @@
# MOOSE Release 2.1.0
Finally it is here, release 2.1.0 of MOOSE!
It took some time to prepare this release, as it was a lot of work to get the building blocks of the framework developed and tested. You'll find in this release a lot of new features as well as a couple of important bug fixes.
Release 2.1.0 is now published into the **master-release-2.1** branch of this repository on github.
You can download the file moose.lua below to use MOOSE in your missions.
The moose.lua file is also located [here](https://github.com/FlightControl-Master/MOOSE/blob/master-release-2.1/Moose%20Mission%20Setup/Moose.lua) in the **master-release-2.1** branch.
Those who are using the **master** branch can continue to beta test, as new bleeding edge features will be added soon in preparation for release 2.2.0! There are many topics on the agenda to be added.
**This release would not have been possible without the help and contribution of many
members of this community. THANK YOU!**
## In summary:
This release brings you **an improved tasking mechanism**.
Tasking is the system in MOOSE that allows to:
* Execute **co-op** missions and tasks
* **Detect** targets dynamically
* Define new tasks **dynamically**
* Execute the tasks
* Complete the mission **goals**
* Extensive menu system and briefings/reports for **player interaction**
* Improved Scoring of mission goal achievements, and task achievements.
On top, release brings you new functionality by the introduction of new classes to:
* **Designate targets** (lase, smoke or illuminate targets) by AI, assisting your attack. Allows to drop laser guides bombs.
* A new **tasking** system to **transport cargo** of various types
* Dynamically **spawn static objects**
* Improved **coordinate system**
* Build **large formations**, like bombers flying to a target area
## 1. TASKING SYSTEM!
A lot of work has been done in improving the tasking framework within MOOSE.
**The tasking system comes with TASK DISPATCHING mechanisms, that DYNAMICALLY
allocate new tasks based on the tactical or strategical situation in the mission!!!
These tasks can then be engaged upon by the players!!!**
The [TASK\_A2G\_DISPATCHER](http://flightcontrol-master.github.io/MOOSE/Documentation/Task_A2G_Dispatcher.html) class implements the dynamic dispatching of tasks upon groups of detected units determined a Set of FAC (groups). The FAC will detect units, will group them, and will dispatch Tasks to groups of players. Depending on the type of target detected, different tasks will be dispatched. Find a summary below describing for which situation a task type is created:
* **CAS Task**: Is created when there are enemy ground units within range of the FAC, while there are friendly units in the FAC perimeter.
* **BAI Task**: Is created when there are enemy ground units within range of the FAC, while there are NO other friendly units within the FAC perimeter.
* **SEAD Task**: Is created when there are enemy ground units wihtin range of the FAC, with air search radars.
More TASK_... dispatcher classes are to come in the future, like A2A, G2G, etc...
Improvements on the TASKING are in summary:
* A COMMANDCENTER has a dedicated menu.
* A MISSION has a dedicated menu system.
* A MISSION has a briefing report.
* A MISSION has dedicated status reports.
* A MISSION has for each TASK TYPE a menu.
* A MISSION has for each TASK TYPE a dedicated menu system for each TASK defined.
* A MISSION has an "assigned" task menu that contains menu actions relevant to the assigned task.
* A TASK (of various types) has a dedicated menu system.
* A TASK has a briefing report.
* A TASK has dedicated status reports.
* Player reports can be retrieved that explain which player is at which task.
* ...
TASKING is vast, and at the moment there is too much to explain.
**The best way to explore the TASKING is to TRY it...**
I suggest you have a look at the [GORI Valley Mission - Iteration 3](https://www.youtube.com/watch?v=v2Us8SS1-44&t=1070s).
Many people have contributed in the testing of the mechanism, especially:
@baluballa, @doom, @whiplash
## 2. New MOOSE classes have been added.
MOOSE 2.1.0 comes with new classes that extends the functionality of the MOOSE framework and allow you to do new things in your missions:
### 2.1. Target designation by laser, smoke or illumination.
[DESIGNATE](http://flightcontrol-master.github.io/MOOSE/Documentation/Designate.html) is orchestrating the designation of potential targets executed by a Recce group,
and communicates these to a dedicated attacking group of players,
so that following a dynamically generated menu system,
each detected set of potential targets can be lased or smoked...
Targets can be:
* **Lased** for a period of time.
* **Smoked**. Artillery or airplanes with Illuminatino ordonance need to be present. (WIP, but early demo ready.)
* **Illuminated** through an illumination bomb. Artillery or airplanes with Illuminatino ordonance need to be present. (WIP, but early demo ready.
This class was made with the help of @EasyEB and many others.
[DESIGNATE is demonstrated on youtube](https://www.youtube.com/playlist?list=PL7ZUrU4zZUl0dQ9UKQMb7YL8z2sKSqemH)
DESIGNATE demonstration missions:
* [DES - Designation](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master-release-2.1/DES%20-%20Designation)
### 2.2. Transport cargo of different types to various locations as a human task within a mission.
The Moose framework provides various CARGO classes that allow DCS physical or logical objects to be transported or sling loaded by Carriers.
The CARGO_ classes, as part of the moose core, are able to Board, Load, UnBoard and UnLoad cargo between Carrier units.
This collection of classes in this module define tasks for human players to handle these cargo objects.
Cargo can be transported, picked-up, deployed and sling-loaded from and to other places.
[TASK\_CARGO\_TRANSPORT](http://flightcontrol-master.github.io/MOOSE/Documentation/Task_Cargo.html#TASK_CARGO_TRANSPORT) defines a task for a human player to transport a set of cargo between various zones.
It is the first class that forms part of the TASK_CARGO classes suite.
The TASK_CARGO classes provide you with a flexible tasking sytem,
that allows you to transport cargo of various types between various locations
and various dedicated deployment zones.
A human player can join the battle field in a client airborne slot or a ground vehicle within the CA module (ALT-J).
The player needs to accept the task from the task overview list within the mission, using the radio menus.
Once the TASK\_CARGO\_TRANSPORT is assigned to the player and accepted by the player, the player will obtain
an extra **Cargo Handling Radio Menu** that contains the CARGO objects that need to be transported.
Cargo can be transported towards different **Deployment Zones**, but can also be deployed anywhere within the battle field.
The Cargo Handling Radio Menu system allows to execute **various actions** to handle the cargo.
In the menu, you'll find for each CARGO, that is part of the scope of the task, various actions that can be completed.
Depending on the location of your Carrier unit, the menu options will vary.
The [CARGO_GROUP](http://flightcontrol-master.github.io/MOOSE/Documentation/Cargo.html#CARGO_GROUP) class defines a
cargo that is represented by a GROUP object within the simulator, and can be transported by a carrier.
The [CARGO_UNIT](http://flightcontrol-master.github.io/MOOSE/Documentation/Cargo.html#CARGO_UNIT) class defines a
cargo that is represented by a UNIT object within the simulator, and can be transported by a carrier.
Mission designers can use the [SET_CARGO](http://flightcontrol-master.github.io/MOOSE/Documentation/Set.html#SET_CARGO)
class to build sets of cargos.
Note 1: **Various other CARGO classes are defined and are WIP**.
Now that the foundation for Cargo handling is getting form, future releases will bring other types of CARGO handling
classes to the MOOSE framework quickly. Sling-loading, package, beacon and other types of CARGO will be released soon.
Note 2: **AI_CARGO has been renamed to CARGO and now forms part of the Core or MOOSE**.
If you were using AI_CARGO in your missions, please rename AI_CARGO with CARGO...
TASK\_TRANSPORT\_CARGO is demonstrated at the [GORI Valley Mission - Iteration 4](https://www.youtube.com/watch?v=v2Us8SS1-44&t=1070s)
TASK_TRANSPORT_CARGO demonstration missions:
* [TSK-110 - Ground - Transport Cargo Group](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master-release-2.1/TSK%20-%20Task%20Modelling/TSK-110%20-%20Ground%20-%20Transport%20Cargo%20Group)
* [TSK-210 - Helicopter - Transport Cargo Group](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master-release-2.1/TSK%20-%20Task%20Modelling/TSK-210%20-%20Helicopter%20-%20Transport%20Cargo%20Group)
* [TSK-211 - Helicopter - Transport Multiple Cargo Groups](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master-release-2.1/TSK%20-%20Task%20Modelling/TSK-211%20-%20Helicopter%20-%20Transport%20Multiple%20Cargo%20Groups)
* [TSK-212 - Helicopter - Cargo handle PickedUp and Deployed events](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master-release-2.1/TSK%20-%20Task%20Modelling/TSK-212%20-%20Helicopter%20-%20Cargo%20handle%20PickedUp%20and%20Deployed%20events)
* [TSK-213 - Helicopter - Cargo Group Destroyed](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master-release-2.1/TSK%20-%20Task%20Modelling/TSK-213%20-%20Helicopter%20-%20Cargo%20Group%20Destroyed)
### 2.3. Dynamically spawn STATIC objects into your mission.
The [SPAWNSTATIC](http://flightcontrol-master.github.io/MOOSE/Documentation/SpawnStatic.html#SPAWNSTATIC) class allows to spawn dynamically new Statics.
By creating a copy of an existing static object template as defined in the Mission Editor (ME), SPAWNSTATIC can retireve the properties of the defined static object template (like type, category etc), and "copy" these properties to create a new static object and place it at the desired coordinate.
New spawned Statics get the same name as the name of the template Static, or gets the given name when a new name is provided at the Spawn method.
SPAWNSTATIC demonstration missions:
* [SPS-100 - Simple Spawning](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master-release-2.1/SPS%20-%20Spawning%20Statics/SPS-100%20-%20Simple%20Spawning)
### 2.4. Better coordinate management in MGRS or LLor LLDecimal.
The [COORDINATE](http://flightcontrol-master.github.io/MOOSE/Documentation/Point.html#COORDINATE) class
defines a 2D coordinate in the simulator. A COORDINATE can be expressed in LL or in MGRS.
### 2.5. Improved scoring system
Scoring is implemented throught the [SCORING](http://flightcontrol-master.github.io/MOOSE/Documentation/Scoring.html) class.
The scoring system has been improved a lot! Now, the scoring is correctly counting scores on normal units, statics and scenary objects.
Specific scores can be registered for specific targets. The scoring works together with the tasking system, so players can achieve
additional scores when they achieve goals!
SCORING demonstration missions:
* [SCO-100 - Scoring of Statics](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/SCO%20-%20Scoring/SCO-100%20-%20Scoring%20of%20Statics)
* [SCO-101 - Scoring Client to Client](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/SCO%20-%20Scoring/SCO-101%20-%20Scoring%20Client%20to%20Client)
* [SCO-500 - Scoring Multi Player Demo Mission 1](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/SCO%20-%20Scoring/SCO-500%20-%20Scoring%20Multi%20Player%20Demo%20Mission%201)
### 2.6. Beacons and Radio
The Radio contains 2 classes : RADIO and BEACON
What are radio communications in DCS ?
* Radio transmissions consist of **sound files** that are broadcasted on a specific **frequency** (e.g. 115MHz) and **modulation** (e.g. AM),
* They can be **subtitled** for a specific **duration**, the **power** in Watts of the transmiter's antenna can be set, and the transmission can be **looped**.
These classes are the work of @Grey-Echo.
RADIO and BEACON demonstration missions:
* [RAD-000 - Transmission from Static](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/RAD%20-%20Radio/RAD-000%20-%20Transmission%20from%20Static)
* [RAD-001 - Transmission from UNIT or GROUP](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/RAD%20-%20Radio/RAD-001%20-%20Transmission%20from%20UNIT%20or%20GROUP)
* [RAD-002 - Transmission Tips and Tricks](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/RAD%20-%20Radio/RAD-002%20-%20Transmission%20Tips%20and%20Tricks)
* [ RAD-010 - Beacons](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/RAD%20-%20Radio/RAD-010%20-%20Beacons)
### 2.7. Build large formations of AI.
[AI_FORMATION](http://flightcontrol-master.github.io/MOOSE/Documentation/AI_Formation.html) makes AI @{GROUP}s fly in formation of various compositions.
The AI_FORMATION class models formations in a different manner than the internal DCS formation logic!!!
The purpose of the class is to:
* Make formation building a process that can be managed while in flight, rather than a task.
* Human players can guide formations, consisting of larget planes.
* Build large formations (like a large bomber field).
* Form formations that DCS does not support off the shelve.
AI_FORMATION Demo Missions: [FOR - AI Group Formation]()
AI\_FORMATION demonstration missions:
* [FOR-100 - Bomber Left Line Formation](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/FOR%20-%20AI%20Group%20Formation/FOR-100%20-%20Bomber%20Left%20Line%20Formation)
* [FOR-101 - Bomber Right Line Formation](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/FOR%20-%20AI%20Group%20Formation/FOR-101%20-%20Bomber%20Right%20Line%20Formation)
* [FOR-102 - Bomber Left Wing Formation](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/FOR%20-%20AI%20Group%20Formation/FOR-102%20-%20Bomber%20Left%20Wing%20Formation)
* [FOR-103 - Bomber Right Wing Formation](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/FOR%20-%20AI%20Group%20Formation/FOR-103%20-%20Bomber%20Right%20Wing%20Formation)
* [FOR-104 - Bomber Center Wing Formation](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/FOR%20-%20AI%20Group%20Formation/FOR-104%20-%20Bomber%20Center%20Wing%20Formation)
* [FOR-105 - Bomber Trail Formation](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/FOR%20-%20AI%20Group%20Formation/FOR-105%20-%20Bomber%20Trail%20Formation)
* [FOR-106 - Bomber Box Formation](https://github.com/FlightControl-Master/MOOSE_MISSIONS/tree/master/FOR%20-%20AI%20Group%20Formation/FOR-106%20-%20Bomber%20Box%20Formation)
Note: The AI_FORMATION is currently a first version showing the potential, a "building block". From this class, further classes will be derived and the class will be fine-tuned.
## 3. A lot of components have been reworked and bugs have been fixed.
### 3.1. Better event handling and event dispatching.
The underlying mechanisms to handle DCS events has been improved. Bugs have been fixed.
The MISSION_END event is now also supported.
### 2.2. Cargo handling has been made much better now.
As a result, some of the WIP cargo classes that were defined earlier are still WIP.
But as mentioned earlier, new CARGO classes can be published faster now.
The framework is now more consistent internally.
## 3. A lot of new methods have been defined in several existing or new classes.
AI_FORMATION:New( FollowUnit, FollowGroupSet, FollowName, FollowBriefing ) --R2.1
AI_FORMATION:TestSmokeDirectionVector( SmokeDirection ) --R2.1
AI_FORMATION:onafterFormationLine( FollowGroupSet, From , Event , To, XStart, XSpace, YStart, YSpace, ZStart, ZSpace ) --R2.1
AI_FORMATION:onafterFormationTrail( FollowGroupSet, From , Event , To, XStart, XSpace, YStart ) --R2.1
AI_FORMATION:onafterFormationStack( FollowGroupSet, From , Event , To, XStart, XSpace, YStart, YSpace ) --R2.1
AI_FORMATION:onafterFormationLeftLine( FollowGroupSet, From , Event , To, XStart, YStart, ZStart, ZSpace ) --R2.1
AI_FORMATION:onafterFormationRightLine( FollowGroupSet, From , Event , To, XStart, YStart, ZStart, ZSpace ) --R2.1
AI_FORMATION:onafterFormationLeftWing( FollowGroupSet, From , Event , To, XStart, XSpace, YStart, ZStart, ZSpace ) --R2.1
AI_FORMATION:onafterFormationRightWing( FollowGroupSet, From , Event , To, XStart, XSpace, YStart, ZStart, ZSpace ) --R2.1
AI_FORMATION:onafterFormationCenterWing( FollowGroupSet, From , Event , To, XStart, XSpace, YStart, YSpace, ZStart, ZSpace ) --R2.1
AI_FORMATION:onafterFormationVic( FollowGroupSet, From , Event , To, XStart, XSpace, YStart, YSpace, ZStart, ZSpace ) --R2.1
AI_FORMATION:onafterFormationBox( FollowGroupSet, From , Event , To, XStart, XSpace, YStart, YSpace, ZStart, ZSpace, ZLevels ) --R2.1
AI_FORMATION:SetFlightRandomization( FlightRandomization ) --R2.1
AI_FORMATION:onenterFollowing( FollowGroupSet ) --R2.1
CARGO:GetName()
CARGO:GetObjectName()
DATABASE:ForEachStatic( IteratorFunction, FinalizeFunction, ... )
EVENT:Reset( EventObject ) --R2.1
POINT_VEC3:IsLOS( ToPointVec3 ) --R2.1
COORDINATE:New( x, y, LandHeightAdd ) --R2.1 Fixes issue #424.
COORDINATE:NewFromVec2( Vec2, LandHeightAdd ) --R2.1 Fixes issue #424.
COORDINATE:NewFromVec3( Vec3 ) --R2.1 Fixes issue #424.
COORDINATE:ToStringLL( LL_Accuracy, LL_DMS ) --R2.1 Fixes issue #424.
COORDINATE:ToStringMGRS( MGRS_Accuracy ) --R2.1 Fixes issue #424.
COORDINATE:ToString() --R2.1 Fixes issue #424.
COORDINATE:CoordinateMenu( RootMenu ) --R2.1 Fixes issue #424.
COORDINATE:MenuSystem( System ) --R2.1 Fixes issue #424.
COORDINATE:MenuLL_Accuracy( LL_Accuracy ) --R2.1 Fixes issue #424.
COORDINATE:MenuLL_DMS( LL_DMS ) --R2.1 Fixes issue #424.
COORDINATE:MenuMGRS_Accuracy( MGRS_Accuracy ) --R2.1 Fixes issue #424.
SET_BASE:FilterDeads() --R2.1 allow deads to be filtered to automatically handle deads in the collection.
SET_BASE:FilterCrashes() --R2.1 allow crashes to be filtered to automatically handle crashes in the collection.
SET_UNIT:ForEachUnitPerThreatLevel( FromThreatLevel, ToThreatLevel, IteratorFunction, ... ) --R2.1 Threat Level implementation
SET_CARGO:New() --R2.1
SET_CARGO:AddCargosByName( AddCargoNames ) --R2.1
SET_CARGO:RemoveCargosByName( RemoveCargoNames ) --R2.1
SET_CARGO:FindCargo( CargoName ) --R2.1
SET_CARGO:FilterCoalitions( Coalitions ) --R2.1
SET_CARGO:FilterTypes( Types ) --R2.1
SET_CARGO:FilterCountries( Countries ) --R2.1
SET_CARGO:FilterPrefixes( Prefixes ) --R2.1
SET_CARGO:FilterStart() --R2.1
SET_CARGO:AddInDatabase( Event ) --R2.1
SET_CARGO:FindInDatabase( Event ) --R2.1
SET_CARGO:ForEachCargo( IteratorFunction, ... ) --R2.1
SET_CARGO:FindNearestCargoFromPointVec2( PointVec2 ) --R2.1
SET_CARGO:IsIncludeObject( MCargo ) --R2.1
SET_CARGO:OnEventNewCargo( EventData ) --R2.1
SET_CARGO:OnEventDeleteCargo( EventData ) --R2.1 SpawnStatic.lua (5 matches)
SPAWNSTATIC:NewFromStatic( SpawnTemplatePrefix, CountryID ) --R2.1
SPAWNSTATIC:NewFromType( SpawnTypeName, SpawnShapeName, SpawnCategory, CountryID ) --R2.1
SPAWNSTATIC:SpawnFromPointVec2( PointVec2, Heading, NewName ) --R2.1
SPAWNSTATIC:SpawnFromZone( Zone, Heading, NewName ) --R2.1
ZONE_BASE:GetCoordinate( Height ) --R2.1
DESIGNATE:SetFlashStatusMenu( FlashMenu ) --R2.1
DESIGNATE:SetLaserCodes( LaserCodes ) --R2.1
DESIGNATE:GenerateLaserCodes() --R2.1
DESIGNATE:SetAutoLase( AutoLase ) --R2.1
DESIGNATE:SetThreatLevelPrioritization( Prioritize ) --R2.1
DETECTION_BASE:CleanDetectionItems() --R2.1 Clean the DetectionItems list
DETECTION_BASE:GetDetectedItemID( Index ) --R2.1
DETECTION_BASE:GetDetectedID( Index ) --R2.1
DETECTION_AREAS:DetectedReportDetailed() --R2.1 Fixed missing report
REPORT:HasText() --R2.1
REPORT:SetIndent( Indent ) --R2.1
REPORT:AddIndent( Text ) --R2.1
MISSION:GetMenu( TaskGroup ) -- R2.1 -- Changed Menu Structure
TASK:SetMenu( MenuTime ) --R2.1 Mission Reports and Task Reports added. Fixes issue #424.
TASK:ReportSummary() --R2.1 fixed report. Now nicely formatted and contains the info required.
TASK:ReportOverview() --R2.1 fixed report. Now nicely formatted and contains the info required.
TASK:GetPlayerCount() --R2.1 Get a count of the players.
TASK:GetPlayerNames() --R2.1 Get a map of the players.
TASK:ReportDetails() --R2.1 fixed report. Now nicely formatted and contains the info required.
UTILS.tostringMGRS = function(MGRS, acc) --R2.1
POSITIONABLE:GetBoundingBox() --R2.1
POSITIONABLE:GetHeight() --R2.1
POSITIONABLE:GetMessageText( Message, Name ) --R2.1 added
POSITIONABLE:GetMessage( Message, Duration, Name ) --R2.1 changed callsign and name and using GetMessageText
POSITIONABLE:MessageToSetGroup( Message, Duration, MessageSetGroup, Name ) --R2.1
POSITIONABLE:GetRadio() --R2.1
POSITIONABLE:GetBeacon() --R2.1
POSITIONABLE:LaseUnit( Target, LaserCode, Duration ) --R2.1
POSITIONABLE:LaseOff() --R2.1
POSITIONABLE:IsLasing() --R2.1
POSITIONABLE:GetSpot() --R2.1
POSITIONABLE:GetLaserCode() --R2.1
UNIT:IsDetected( TargetUnit ) --R2.1
UNIT:IsLOS( TargetUnit ) --R2.1

BIN
Release Notes 2.2.0.docx Normal file

Binary file not shown.

BIN
Release Notes 2.2.0.pdf Normal file

Binary file not shown.

5
Utils/Generate_Moose.bat Normal file
View File

@ -0,0 +1,5 @@
%~dp0luarocks\lua5.1.exe %1 %2 %3 %4 %5
call %~dp0LuaSrcDiet.bat --basic --opt-emptylines %5\Moose.lua
rem del %5\Moose.lua
rem copy %5\Moose_.lua %5\Moose.lua
rem del Moose_.lua

Binary file not shown.

View File

@ -0,0 +1,178 @@
#!/usr/bin/lua
--------------------------------------------------------------------------------
-- Copyright (c) 2012-2014 Sierra Wireless.
-- All rights reserved. This program and the accompanying materials
-- are made available under the terms of the Eclipse Public License v1.0
-- which accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- Contributors:
-- Kevin KIN-FOO <kkinfoo@sierrawireless.com>
-- - initial API and implementation and initial documentation
--------------------------------------------------------------------------------
-- Check interpreter version
if _VERSION ~= "Lua 5.1" then
print("Luadocumentor is only compatible with Lua 5.1")
return
end
--
-- Defining help message.
--
-- This message is compliant to 'lapp', which will match options and arguments
-- from command line.
local help = [[luadocumentor v0.1.4: tool for Lua Documentation Language
-f, --format (default doc) Define output format :
* doc: Will produce HTML documentation from specified file(s) or directories.
* api: Will produce API file(s) from specified file(s) or directories.
-d, --dir (default docs) Define an output directory. If the given directory doesn't exist, it will be created.
-h, --help Display the help.
-n, --noheuristic Do not use code analysis, use only comments to generate documentation.
-s, --style (default !) The path of your own css file, if you don't want to use the default one. (usefull only for the doc format)
[directories|files] Define the paths or the directories of inputs files. Only Lua or C files containing a @module tag will be considered.
]]
local docgenerator = require 'docgenerator'
local lddextractor = require 'lddextractor'
local lapp = require 'pl.lapp'
local args = lapp( help )
if not args or #args < 1 then
print('No directory provided')
return
elseif args.help then
-- Just print help
print( help )
return
end
--
-- define css file name
--
local cssfilename = "stylesheet.css"
--
-- Parse files from given folders
--
-- Check if all folders exist
local fs = require 'fs.lfs'
local allpresent, missing = fs.checkdirectory(args)
-- Some of given directories are absent
if missing then
-- List missing directories
print 'Unable to open'
for _, file in ipairs( missing ) do
print('\t'.. file)
end
return
end
-- Get files from given directories
local filestoparse, error = fs.filelist( args )
if not filestoparse then
print ( error )
return
end
--
-- Generate documentation only files
--
if args.format == 'api' then
for _, filename in ipairs( filestoparse ) do
-- Loading file content
print('Dealing with "'..filename..'".')
local file, error = io.open(filename, 'r')
if not file then
print ('Unable to open "'..filename.."'.\n"..error)
else
local code = file:read('*all')
file:close()
--
-- Creating comment file
--
local commentfile, error = lddextractor.generatecommentfile(filename, code)
-- Getting module name
-- Optimize me
local module, moduleerror = lddextractor.generateapimodule(filename, code)
if not commentfile then
print('Unable to create documentation file for "'..filename..'"\n'..error)
elseif not module or not module.name then
local error = moduleerror and '\n'..moduleerror or ''
print('Unable to compute module name for "'..filename..'".'..error)
else
--
-- Flush documentation file on disk
--
local path = args.dir..fs.separator..module.name..'.lua'
local status, err = fs.fill(path, commentfile)
if not status then
print(err)
end
end
end
end
print('Done')
return
end
-- Deal only supported output types
if args.format ~= 'doc' then
print ('"'..args.format..'" format is not handled.')
return
end
-- Generate html form files
local parsedfiles, unparsed = docgenerator.generatedocforfiles(filestoparse, cssfilename,args.noheuristic)
-- Show warnings on unparsed files
if #unparsed > 0 then
for _, faultyfile in ipairs( unparsed ) do
print( faultyfile )
end
end
-- This loop is just for counting parsed files
-- TODO: Find a more elegant way to do it
local parsedfilescount = 0
for _, p in pairs(parsedfiles) do
parsedfilescount = parsedfilescount + 1
end
print (parsedfilescount .. ' file(s) parsed.')
-- Create html files
local generated = 0
for _, apifile in pairs ( parsedfiles ) do
local status, err = fs.fill(args.dir..fs.separator..apifile.name..'.html', apifile.body)
if status then
generated = generated + 1
else
print( 'Unable to create '..apifile.name..'.html on disk.')
end
end
print (generated .. ' file(s) generated.')
-- Copying css
local csscontent
if args.style == '!' then
csscontent = require 'defaultcss'
else
local css, error = io.open(args.style, 'r')
if not css then
print('Unable to open "'..args.style .. '".\n'..error)
return
end
csscontent = css:read("*all")
css:close()
end
local status, error = fs.fill(args.dir..fs.separator..cssfilename, csscontent)
if not status then
print(error)
return
end
print('Adding css')
print('Done')

View File

@ -0,0 +1,198 @@
Eclipse Public License - v 1.0
THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC
LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM
CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
1. DEFINITIONS
"Contribution" means:
a) in the case of the initial Contributor, the initial code and documentation
distributed under this Agreement, and
b) in the case of each subsequent Contributor:
i) changes to the Program, and
ii) additions to the Program;
where such changes and/or additions to the Program originate from and are
distributed by that particular Contributor. A Contribution 'originates' from
a Contributor if it was added to the Program by such Contributor itself or
anyone acting on such Contributor's behalf. Contributions do not include
additions to the Program which: (i) are separate modules of software
distributed in conjunction with the Program under their own license
agreement, and (ii) are not derivative works of the Program.
"Contributor" means any person or entity that distributes the Program.
"Licensed Patents" mean patent claims licensable by a Contributor which are
necessarily infringed by the use or sale of its Contribution alone or when
combined with the Program.
"Program" means the Contributions distributed in accordance with this Agreement.
"Recipient" means anyone who receives the Program under this Agreement,
including all Contributors.
2. GRANT OF RIGHTS
a) Subject to the terms of this Agreement, each Contributor hereby grants
Recipient a non-exclusive, worldwide, royalty-free copyright license to
reproduce, prepare derivative works of, publicly display, publicly perform,
distribute and sublicense the Contribution of such Contributor, if any, and
such derivative works, in source code and object code form.
b) Subject to the terms of this Agreement, each Contributor hereby grants
Recipient a non-exclusive, worldwide, royalty-free patent license under
Licensed Patents to make, use, sell, offer to sell, import and otherwise
transfer the Contribution of such Contributor, if any, in source code and
object code form. This patent license shall apply to the combination of the
Contribution and the Program if, at the time the Contribution is added by
the Contributor, such addition of the Contribution causes such combination
to be covered by the Licensed Patents. The patent license shall not apply
to any other combinations which include the Contribution. No hardware per
se is licensed hereunder.
c) Recipient understands that although each Contributor grants the licenses to
its Contributions set forth herein, no assurances are provided by any
Contributor that the Program does not infringe the patent or other
intellectual property rights of any other entity. Each Contributor
disclaims any liability to Recipient for claims brought by any other entity
based on infringement of intellectual property rights or otherwise. As a
condition to exercising the rights and licenses granted hereunder, each
Recipient hereby assumes sole responsibility to secure any other
intellectual property rights needed, if any. For example, if a third party
patent license is required to allow Recipient to distribute the Program, it
is Recipient's responsibility to acquire that license before distributing
the Program.
d) Each Contributor represents that to its knowledge it has sufficient
copyright rights in its Contribution, if any, to grant the copyright
license set forth in this Agreement.
3. REQUIREMENTS
A Contributor may choose to distribute the Program in object code form under its
own license agreement, provided that:
a) it complies with the terms and conditions of this Agreement; and
b) its license agreement:
i) effectively disclaims on behalf of all Contributors all warranties and
conditions, express and implied, including warranties or conditions of
title and non-infringement, and implied warranties or conditions of
merchantability and fitness for a particular purpose;
ii) effectively excludes on behalf of all Contributors all liability for
damages, including direct, indirect, special, incidental and
consequential damages, such as lost profits;
iii) states that any provisions which differ from this Agreement are offered
by that Contributor alone and not by any other party; and
iv) states that source code for the Program is available from such
Contributor, and informs licensees how to obtain it in a reasonable
manner on or through a medium customarily used for software exchange.
When the Program is made available in source code form:
a) it must be made available under this Agreement; and
b) a copy of this Agreement must be included with each copy of the Program.
Contributors may not remove or alter any copyright notices contained within
the Program.
Each Contributor must identify itself as the originator of its Contribution, if
any, in a manner that reasonably allows subsequent Recipients to identify the
originator of the Contribution.
4. COMMERCIAL DISTRIBUTION
Commercial distributors of software may accept certain responsibilities with
respect to end users, business partners and the like. While this license is
intended to facilitate the commercial use of the Program, the Contributor who
includes the Program in a commercial product offering should do so in a manner
which does not create potential liability for other Contributors. Therefore, if
a Contributor includes the Program in a commercial product offering, such
Contributor ("Commercial Contributor") hereby agrees to defend and indemnify
every other Contributor ("Indemnified Contributor") against any losses, damages
and costs (collectively "Losses") arising from claims, lawsuits and other legal
actions brought by a third party against the Indemnified Contributor to the
extent caused by the acts or omissions of such Commercial Contributor in
connection with its distribution of the Program in a commercial product
offering. The obligations in this section do not apply to any claims or Losses
relating to any actual or alleged intellectual property infringement. In order
to qualify, an Indemnified Contributor must: a) promptly notify the Commercial
Contributor in writing of such claim, and b) allow the Commercial Contributor to
control, and cooperate with the Commercial Contributor in, the defense and any
related settlement negotiations. The Indemnified Contributor may participate in
any such claim at its own expense.
For example, a Contributor might include the Program in a commercial product
offering, Product X. That Contributor is then a Commercial Contributor. If that
Commercial Contributor then makes performance claims, or offers warranties
related to Product X, those performance claims and warranties are such
Commercial Contributor's responsibility alone. Under this section, the
Commercial Contributor would have to defend claims against the other
Contributors related to those performance claims and warranties, and if a court
requires any other Contributor to pay any damages as a result, the Commercial
Contributor must pay those damages.
5. NO WARRANTY
EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON AN
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR
IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE,
NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each
Recipient is solely responsible for determining the appropriateness of using and
distributing the Program and assumes all risks associated with its exercise of
rights under this Agreement , including but not limited to the risks and costs
of program errors, compliance with applicable laws, damage to or loss of data,
programs or equipment, and unavailability or interruption of operations.
6. DISCLAIMER OF LIABILITY
EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY
CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST
PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS
GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
7. GENERAL
If any provision of this Agreement is invalid or unenforceable under applicable
law, it shall not affect the validity or enforceability of the remainder of the
terms of this Agreement, and without further action by the parties hereto, such
provision shall be reformed to the minimum extent necessary to make such
provision valid and enforceable.
If Recipient institutes patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Program itself
(excluding combinations of the Program with other software or hardware)
infringes such Recipient's patent(s), then such Recipient's rights granted under
Section 2(b) shall terminate as of the date such litigation is filed.
All Recipient's rights under this Agreement shall terminate if it fails to
comply with any of the material terms or conditions of this Agreement and does
not cure such failure in a reasonable period of time after becoming aware of
such noncompliance. If all Recipient's rights under this Agreement terminate,
Recipient agrees to cease use and distribution of the Program as soon as
reasonably practicable. However, Recipient's obligations under this Agreement
and any licenses granted by Recipient relating to the Program shall continue and
survive.
Everyone is permitted to copy and distribute copies of this Agreement, but in
order to avoid inconsistency the Agreement is copyrighted and may only be
modified in the following manner. The Agreement Steward reserves the right to
publish new versions (including revisions) of this Agreement from time to time.
No one other than the Agreement Steward has the right to modify this Agreement.
The Eclipse Foundation is the initial Agreement Steward. The Eclipse Foundation
may assign the responsibility to serve as the Agreement Steward to a suitable
separate entity. Each new version of the Agreement will be given a
distinguishing version number. The Program (including Contributions) may always
be distributed subject to the version of the Agreement under which it was
received. In addition, after a new version of the Agreement is published,
Contributor may elect to distribute the Program (including its Contributions)
under the new version. Except as expressly stated in Sections 2(a) and 2(b)
above, Recipient receives no rights or licenses to the intellectual property of
any Contributor under this Agreement, whether expressly, by implication,
estoppel or otherwise. All rights in the Program not expressly granted under
this Agreement are reserved.
This Agreement is governed by the laws of the State of New York and the
intellectual property laws of the United States of America. No party to this
Agreement will bring a legal action under this Agreement more than one year
after the cause of action arose. Each party waives its rights to a jury trial in
any resulting litigation.

View File

@ -0,0 +1,7 @@
# Lua Documentor
LuaDocumentor allow users to generate HTML and API files from code documented
using Lua documentation language.
Documentation is
[available here](http://wiki.eclipse.org/Koneki/LDT/User_Area/LuaDocumentor).

View File

@ -0,0 +1,57 @@
package = 'LuaDocumentor'
version = '0.1.5-1'
description = {
summary = 'LuaDocumentor allow users to generate HTML and API files from code documented using Lua documentation language.',
detailed = [[
This is an example for the LuaRocks tutorial.
Here we would put a detailed, typically
paragraph-long description.
]],
homepage = 'http://wiki.eclipse.org/Koneki/LDT/User_Area/LuaDocumentor',
license = 'EPL'
}
source = {
url = 'git://github.com/LuaDevelopmentTools/luadocumentor.git',
tag = 'v0.1.5-1'
}
dependencies = {
'lua ~> 5.1',
'luafilesystem ~> 1.6',
'markdown ~> 0.32',
'metalua-compiler ~> 0.7',
'penlight ~> 0.9'
}
build = {
type = 'builtin',
install = {
bin = {
luadocumentor = 'luadocumentor.lua'
},
lua = {
['models.internalmodelbuilder'] = 'models/internalmodelbuilder.mlua'
}
},
modules = {
defaultcss = 'defaultcss.lua',
docgenerator = 'docgenerator.lua',
extractors = 'extractors.lua',
lddextractor = 'lddextractor.lua',
templateengine = 'templateengine.lua',
['fs.lfs'] = 'fs/lfs.lua',
['models.apimodel'] = 'models/apimodel.lua',
['models.apimodelbuilder'] = 'models/apimodelbuilder.lua',
['models.internalmodel'] = 'models/internalmodel.lua',
['models.ldparser'] = 'models/ldparser.lua',
['template.file'] = 'template/file.lua',
['template.index'] = 'template/index.lua',
['template.index.recordtypedef'] = 'template/index/recordtypedef.lua',
['template.item'] = 'template/item.lua',
['template.page'] = 'template/page.lua',
['template.recordtypedef'] = 'template/recordtypedef.lua',
['template.usage'] = 'template/usage.lua',
['template.utils'] = 'template/utils.lua',
}
}

View File

@ -0,0 +1,39 @@
rock_manifest = {
bin = {
luadocumentor = "bc5cc07f56db2cf1dbe80f0827332873"
},
doc = {
LICENSE = "52a21f73ac77fd790dc40dc5acda0fc2",
["README.md"] = "fcef1f43c69f3559b347d854b2626deb"
},
lua = {
["defaultcss.lua"] = "dd9b2b89e5080972bbb52056247c0c65",
["docgenerator.lua"] = "92d0a3947d88226340014d2f033be37f",
["extractors.lua"] = "74191695e5217706ee355925e5ca40fa",
fs = {
["lfs.lua"] = "4d00f9bc942b02a86ccea16544d3e85d"
},
["lddextractor.lua"] = "56edde775a5d57818aa0a07b4f723536",
models = {
["apimodel.lua"] = "3c401de18691b1222b0ad253958260ee",
["apimodelbuilder.lua"] = "4c4a3c0b48b404973542dd99f994eb2c",
["internalmodel.lua"] = "a1a21e50af8db0f0a0b9d164ccc08853",
["internalmodelbuilder.mlua"] = "ff95dfca573ccc1c19a79434e96a492d",
["ldparser.lua"] = "538904a3adbfff4ff83deda029847323"
},
template = {
["file.lua"] = "41f095bc049ef161060d8e3b4ac9de63",
index = {
["recordtypedef.lua"] = "0977ff0048a837389c2ac10285eb1ce1"
},
["index.lua"] = "5a3b3cface3b1fd9cb2d56f1edd5487b",
["item.lua"] = "5d5a6d9bffd8935c4ed283105ede331b",
["page.lua"] = "351f4a7215272f7e448faeece4945bc0",
["recordtypedef.lua"] = "69938e1d60e94eed7f95b0999f1386ca",
["usage.lua"] = "979503deb84877cb221130a5be7c1535",
["utils.lua"] = "ad97fb4e3de9fb6480b25cdd877b50d9"
},
["templateengine.lua"] = "09bfc6350e14f4ab509d14fb0fb295c0"
},
["luadocumentor-0.1.5-1.rockspec"] = "4ba1b88898dce89e7fd8fb6a700496a4"
}

View File

@ -0,0 +1,212 @@
body {
margin-left: 1em;
margin-right: 1em;
font-family: arial, helvetica, geneva, sans-serif;
background-color:#ffffff; margin:0px;
}
code {
font-family: "Andale Mono", monospace;
}
tt {
font-family: "Andale Mono", monospace;
}
body, td, th { font-size: 11pt; }
h1, h2, h3, h4 { margin-left: 0em; }
textarea, pre, tt { font-size:10pt; }
body, td, th { color:#000000; }
small { font-size:0.85em; }
h1 { font-size:1.5em; }
h2 { font-size:1.25em; }
h3 { font-size:1.15em; }
h4 { font-size:1.06em; }
a:link { font-weight:bold; color: #004080; text-decoration: none; }
a:visited { font-weight:bold; color: #006699; text-decoration: none; }
a:link:hover { text-decoration:underline; }
hr { color:#cccccc }
img { border-width: 0px; }
h3 { padding-top: 1em; }
p { margin-left: 1em; }
p.name {
font-family: "Andale Mono", monospace;
padding-top: 1em;
margin-left: 0em;
}
blockquote { margin-left: 3em; }
.example {
background-color: rgb(245, 245, 245);
border-top-width: 1px;
border-right-width: 1px;
border-bottom-width: 1px;
border-left-width: 1px;
border-top-style: solid;
border-right-style: solid;
border-bottom-style: solid;
border-left-style: solid;
border-top-color: silver;
border-right-color: silver;
border-bottom-color: silver;
border-left-color: silver;
padding: 1em;
margin-left: 1em;
margin-right: 1em;
font-family: "Andale Mono", monospace;
font-size: smaller;
}
hr {
margin-left: 0em;
background: #00007f;
border: 0px;
height: 1px;
}
ul { list-style-type: disc; }
table.index { border: 1px #00007f; }
table.index td { text-align: left; vertical-align: top; }
table.index ul { padding-top: 0em; margin-top: 0em; }
table {
border: 1px solid black;
border-collapse: collapse;
margin-left: auto;
margin-right: auto;
}
th {
border: 1px solid black;
padding: 0.5em;
}
td {
border: 1px solid black;
padding: 0.5em;
}
div.header, div.footer { margin-left: 0em; }
#container {
margin-left: 1em;
margin-right: 1em;
background-color: #f0f0f0;
}
#product {
text-align: center;
border-bottom: 1px solid #cccccc;
background-color: #ffffff;
}
#product big {
font-size: 2em;
}
#product_logo {
}
#product_name {
}
#product_description {
}
#main {
background-color: #f0f0f0;
border-left: 2px solid #cccccc;
}
#navigation {
float: left;
width: 12em;
margin: 0;
vertical-align: top;
background-color: #f0f0f0;
overflow:visible;
}
#navigation h1 {
background-color:#e7e7e7;
font-size:1.1em;
color:#000000;
text-align:left;
margin:0px;
padding:0.2em;
border-top:1px solid #dddddd;
border-bottom:1px solid #dddddd;
}
#navigation ul {
font-size:1em;
list-style-type: none;
padding: 0;
margin: 1px;
}
#navigation li {
text-indent: -1em;
margin: 0em 0em 0em 0.5em;
display: block;
padding: 3px 0px 0px 12px;
}
#navigation li li a {
padding: 0px 3px 0px -1em;
}
#content {
margin-left: 12em;
padding: 1em;
border-left: 2px solid #cccccc;
border-right: 2px solid #cccccc;
background-color: #ffffff;
}
#about {
clear: both;
margin: 0;
padding: 5px;
border-top: 2px solid #cccccc;
background-color: #ffffff;
}
@media print {
body {
font: 10pt "Times New Roman", "TimeNR", Times, serif;
}
a {
font-weight:bold; color: #004080; text-decoration: underline;
}
#main {
background-color: #ffffff; border-left: 0px;
}
#container {
margin-left: 2%; margin-right: 2%; background-color: #ffffff;
}
#content {
margin-left: 0px; padding: 1em; border-left: 0px; border-right: 0px; background-color: #ffffff;
}
#navigation {
display: none;
}
#product_logo {
display: none;
}
#about img {
display: none;
}
.example {
font-family: "Andale Mono", monospace;
font-size: 8pt;
page-break-inside: avoid;
}
}

View File

@ -0,0 +1,103 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>LuaFileSystem</title>
<link rel="stylesheet" href="doc.css" type="text/css"/>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
</head>
<body>
<div id="container">
<div id="product">
<div id="product_logo">
<a href="http://www.keplerproject.org">
<img alt="LuaFileSystem" src="luafilesystem.png"/>
</a>
</div>
<div id="product_name"><big><strong>LuaFileSystem</strong></big></div>
<div id="product_description">File System Library for the Lua Programming Language</div>
</div> <!-- id="product" -->
<div id="main">
<div id="navigation">
<h1>LuaFileSystem</h1>
<ul>
<li><a href="index.html">Home</a>
<ul>
<li><a href="index.html#overview">Overview</a></li>
<li><a href="index.html#status">Status</a></li>
<li><a href="index.html#download">Download</a></li>
<li><a href="index.html#history">History</a></li>
<li><a href="index.html#credits">Credits</a></li>
<li><a href="index.html#contact">Contact us</a></li>
</ul>
</li>
<li><a href="manual.html">Manual</a>
<ul>
<li><a href="manual.html#introduction">Introduction</a></li>
<li><a href="manual.html#building">Building</a></li>
<li><a href="manual.html#installation">Installation</a></li>
<li><a href="manual.html#reference">Reference</a></li>
</ul>
</li>
<li><strong>Examples</strong></li>
<li><a href="https://github.com/keplerproject/luafilesystem">Project</a>
<ul>
<li><a href="https://github.com/keplerproject/luafilesystem/issues">Bug Tracker</a></li>
<li><a href="https://github.com/keplerproject/luafilesystem">Git</a></li>
</ul>
</li>
<li><a href="license.html">License</a></li>
</ul>
</div> <!-- id="navigation" -->
<div id="content">
<h2><a name="example"></a>Examples</h2>
<h3>Directory iterator</h3>
<p>The following example iterates over a directory and recursively lists the
attributes for each file inside it.</p>
<pre class="example">
local lfs = require"lfs"
function attrdir (path)
for file in lfs.dir(path) do
if file ~= "." and file ~= ".." then
local f = path..'/'..file
print ("\t "..f)
local attr = lfs.attributes (f)
assert (type(attr) == "table")
if attr.mode == "directory" then
attrdir (f)
else
for name, value in pairs(attr) do
print (name, value)
end
end
end
end
end
attrdir (".")
</pre>
</div> <!-- id="content" -->
</div> <!-- id="main" -->
<div id="about">
<p><a href="http://validator.w3.org/check?uri=referer">Valid XHTML 1.0!</a></p>
<p><small>$Id: examples.html,v 1.8 2007/12/14 15:28:04 carregal Exp $</small></p>
</div> <!-- id="about" -->
</div> <!-- id="container" -->
</body>
</html>

View File

@ -0,0 +1,218 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>LuaFileSystem</title>
<link rel="stylesheet" href="doc.css" type="text/css"/>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
</head>
<body>
<div id="container">
<div id="product">
<div id="product_logo">
<a href="http://www.keplerproject.org">
<img alt="LuaFileSystem" src="luafilesystem.png"/>
</a>
</div>
<div id="product_name"><big><strong>LuaFileSystem</strong></big></div>
<div id="product_description">File System Library for the Lua Programming Language</div>
</div> <!-- id="product" -->
<div id="main">
<div id="navigation">
<h1>LuaFileSystem</h1>
<ul>
<li><strong>Home</strong>
<ul>
<li><a href="index.html#overview">Overview</a></li>
<li><a href="index.html#status">Status</a></li>
<li><a href="index.html#download">Download</a></li>
<li><a href="index.html#history">History</a></li>
<li><a href="index.html#credits">Credits</a></li>
<li><a href="index.html#contact">Contact us</a></li>
</ul>
</li>
<li><a href="manual.html">Manual</a>
<ul>
<li><a href="manual.html#introduction">Introduction</a></li>
<li><a href="manual.html#building">Building</a></li>
<li><a href="manual.html#installation">Installation</a></li>
<li><a href="manual.html#reference">Reference</a></li>
</ul>
</li>
<li><a href="examples.html">Examples</a></li>
<li><a href="https://github.com/keplerproject/luafilesystem">Project</a>
<ul>
<li><a href="https://github.com/keplerproject/luafilesystem/issues">Bug Tracker</a></li>
<li><a href="https://github.com/keplerproject/luafilesystem">Git</a></li>
</ul>
</li>
<li><a href="license.html">License</a></li>
</ul>
</div> <!-- id="navigation" -->
<div id="content">
<h2><a name="overview"></a>Overview</h2>
<p>LuaFileSystem is a <a href="http://www.lua.org">Lua</a> library
developed to complement the set of functions related to file
systems offered by the standard Lua distribution.</p>
<p>LuaFileSystem offers a portable way to access
the underlying directory structure and file attributes.</p>
<p>LuaFileSystem is free software and uses the same
<a href="license.html">license</a> as Lua 5.1.</p>
<h2><a name="status"></a>Status</h2>
<p>Current version is 1.6.3. It works with Lua 5.1, 5.2 and 5.3.</p>
<h2><a name="download"></a>Download</h2>
<p>LuaFileSystem source can be downloaded from its
<a href="http://github.com/keplerproject/luafilesystem">Github</a>
page.</p>
<h2><a name="history"></a>History</h2>
<dl class="history">
<dt><strong>Version 1.6.3</strong> [15/Jan/2015]</dt>
<dd><ul>
<li>Lua 5.3 support.</li>
<li>Assorted bugfixes.</li>
</ul></dd>
<dt><strong>Version 1.6.2</strong> [??/Oct/2012]</dt>
<dd><ul>
<li>Full Lua 5.2 compatibility (with Lua 5.1 fallbacks)</li>
</ul></dd>
<dt><strong>Version 1.6.1</strong> [01/Oct/2012]</dt>
<dd><ul>
<li>fix build for Lua 5.2</li>
</ul></dd>
<dt><strong>Version 1.6.0</strong> [26/Sep/2012]</dt>
<dd><ul>
<li>getcwd fix for Android</li>
<li>support for Lua 5.2</li>
<li>add lfs.link</li>
<li>other bug fixes</li>
</ul></dd>
<dt><strong>Version 1.5.0</strong> [20/Oct/2009]</dt>
<dd><ul>
<li>Added explicit next and close methods to second return value of lfs.dir
(the directory object), for explicit iteration or explicit closing.</li>
<li>Added directory locking via lfs.lock_dir function (see the <a href="manual.html">manual</a>).</li>
</ul></dd>
<dt><strong>Version 1.4.2</strong> [03/Feb/2009]</dt>
<dd>
<ul>
<li>fixed bug [<a href="http://luaforge.net/tracker/?func=detail&amp;group_id=66&amp;aid=13198&amp;atid=356">#13198</a>]
lfs.attributes(filename, 'size') overflow on files > 2 Gb again (bug report and patch by KUBO Takehiro).</li>
<li>fixed bug [<a href="http://luaforge.net/tracker/?group_id=66&amp;atid=356&amp;func=detail&amp;aid=39794">#39794</a>]
Compile error on Solaris 10 (bug report and patch by Aaron B).</li>
<li>fixed compilation problems with Borland C.</li>
</ul>
</dd>
<dt><strong>Version 1.4.1</strong> [07/May/2008]</dt>
<dd>
<ul>
<li>documentation review</li>
<li>fixed Windows compilation issues</li>
<li>fixed bug in the Windows tests (patch by Shmuel Zeigerman)</li>
<li>fixed bug [<a href="http://luaforge.net/tracker/?func=detail&amp;group_id=66&amp;aid=2185&amp;atid=356">#2185</a>]
<code>lfs.attributes(filename, 'size')</code> overflow on files > 2 Gb
</li>
</ul>
</dd>
<dt><strong>Version 1.4.0</strong> [13/Feb/2008]</dt>
<dd>
<ul>
<li>added function
<a href="manual.html#setmode"><code>lfs.setmode</code></a>
(works only in Windows systems).</li>
<li><a href="manual.html#attributes"><code>lfs.attributes</code></a>
raises an error if attribute does not exist</li>
</ul>
</dd>
<dt><strong>Version 1.3.0</strong> [26/Oct/2007]</dt>
<dd>
<ul>
<li>added function
<a href="manual.html#symlinkattributes"><code>lfs.symlinkattributes</code></a>
(works only in non Windows systems).</li>
</ul>
</dd>
<dt><strong>Version 1.2.1</strong> [08/May/2007]</dt>
<dd>
<ul>
<li>compatible only with Lua 5.1 (Lua 5.0 support was dropped)</li>
</ul>
</dd>
<dt><strong>Version 1.2</strong> [15/Mar/2006]</dt>
<dd>
<ul>
<li>added optional argument to
<a href="manual.html#attributes"><code>lfs.attributes</code></a></li>
<li>added function
<a href="manual.html#rmdir"><code>lfs.rmdir</code></a></li>
<li>bug correction on <a href="manual.html#dir"><code>lfs.dir</code></a></li>
</ul>
</dd>
<dt><strong>Version 1.1</strong> [30/May/2005]</dt>
<dd>
<ul>
<li>added function <a href="manual.html#touch"><code>lfs.touch</code></a>.</li>
</ul>
</dd>
<dt><strong>Version 1.0</strong> [21/Jan/2005]</dt>
<dd />
<dt><strong>Version 1.0 Beta</strong> [10/Nov/2004]</dt>
<dd />
</dl>
<h2><a name="credits"></a>Credits</h2>
<p>LuaFileSystem was designed by Roberto Ierusalimschy,
Andr&eacute; Carregal and Tom&aacute;s Guisasola as part of the
<a href="http://www.keplerproject.org">Kepler Project</a>,
which holds its copyright. LuaFileSystem is currently maintained by F&aacute;bio Mascarenhas.</p>
<h2><a name="contact"></a>Contact us</h2>
<p>For more information please
<a href="mailto:info-NO-SPAM-THANKS@keplerproject.org">contact us</a>.
Comments are welcome!</p>
<p>You can also reach other Kepler developers and users on the Kepler Project
<a href="http://luaforge.net/mail/?group_id=104">mailing list</a>.</p>
</div> <!-- id="content" -->
</div> <!-- id="main" -->
<div id="about">
<p><a href="http://validator.w3.org/check?uri=referer">Valid XHTML 1.0!</a></p>
<p><small>$Id: index.html,v 1.44 2009/02/04 21:21:33 carregal Exp $</small></p>
</div> <!-- id="about" -->
</div> <!-- id="container" -->
</body>
</html>

View File

@ -0,0 +1,122 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>LuaFileSystem</title>
<link rel="stylesheet" href="doc.css" type="text/css"/>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
</head>
<body>
<div id="container">
<div id="product">
<div id="product_logo">
<a href="http://www.keplerproject.org">
<img alt="LuaFileSystem" src="luafilesystem.png"/>
</a>
</div>
<div id="product_name"><big><strong>LuaFileSystem</strong></big></div>
<div id="product_description">File System Library for the Lua Programming Language</div>
</div> <!-- id="product" -->
<div id="main">
<div id="navigation">
<h1>LuaFileSystem</h1>
<ul>
<li><a href="index.html">Home</a>
<ul>
<li><a href="index.html#overview">Overview</a></li>
<li><a href="index.html#status">Status</a></li>
<li><a href="index.html#download">Download</a></li>
<li><a href="index.html#history">History</a></li>
<li><a href="index.html#credits">Credits</a></li>
<li><a href="index.html#contact">Contact us</a></li>
</ul>
</li>
<li><a href="manual.html">Manual</a>
<ul>
<li><a href="manual.html#introduction">Introduction</a></li>
<li><a href="manual.html#building">Building</a></li>
<li><a href="manual.html#installation">Installation</a></li>
<li><a href="manual.html#reference">Reference</a></li>
</ul>
</li>
<li><a href="examples.html">Examples</a></li>
<li><a href="https://github.com/keplerproject/luafilesystem">Project</a>
<ul>
<li><a href="https://github.com/keplerproject/luafilesystem/issues/">Bug Tracker</a></li>
<li><a href="https://github.com/keplerproject/luafilesystem">Git</a></li>
</ul>
</li>
<li><strong>License</strong></li>
</ul>
</div> <!-- id="navigation" -->
<div id="content">
<h1>License</h1>
<p>
LuaFileSystem is free software: it can be used for both academic
and commercial purposes at absolutely no cost. There are no
royalties or GNU-like "copyleft" restrictions. LuaFileSystem
qualifies as
<a href="http://www.opensource.org/docs/definition.html">Open Source</a>
software.
Its licenses are compatible with
<a href="http://www.gnu.org/licenses/gpl.html">GPL</a>.
LuaFileSystem is not in the public domain and the
<a href="http://www.keplerproject.org">Kepler Project</a>
keep its copyright.
The legal details are below.
</p>
<p>The spirit of the license is that you are free to use
LuaFileSystem for any purpose at no cost without having to ask us.
The only requirement is that if you do use LuaFileSystem, then you
should give us credit by including the appropriate copyright notice
somewhere in your product or its documentation.</p>
<p>The LuaFileSystem library is designed and implemented by Roberto
Ierusalimschy, Andr&eacute; Carregal and Tom&aacute;s Guisasola.
The implementation is not derived from licensed software.</p>
<hr/>
<p>Copyright &copy; 2003 Kepler Project.</p>
<p>Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use, copy,
modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:</p>
<p>The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.</p>
<p>THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.</p>
</div> <!-- id="content" -->
</div> <!-- id="main" -->
<div id="about">
<p><a href="http://validator.w3.org/check?uri=referer">Valid XHTML 1.0!</a></p>
<p><small>$Id: license.html,v 1.13 2008/02/11 22:42:21 carregal Exp $</small></p>
</div><!-- id="about" -->
</div><!-- id="container" -->
</body>
</html>

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.3 KiB

View File

@ -0,0 +1,280 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<title>LuaFileSystem</title>
<link rel="stylesheet" href="doc.css" type="text/css"/>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
</head>
<body>
<div id="container">
<div id="product">
<div id="product_logo">
<a href="http://www.keplerproject.org"><img alt="LuaFileSystem" src="luafilesystem.png"/></a>
</div>
<div id="product_name"><big><strong>LuaFileSystem</strong></big></div>
<div id="product_description">File System Library for the Lua Programming Language</div>
</div> <!-- id="product" -->
<div id="main">
<div id="navigation">
<h1>LuaFileSystem</h1>
<ul>
<li><a href="index.html">Home</a>
<ul>
<li><a href="index.html#overview">Overview</a></li>
<li><a href="index.html#status">Status</a></li>
<li><a href="index.html#download">Download</a></li>
<li><a href="index.html#history">History</a></li>
<li><a href="index.html#credits">Credits</a></li>
<li><a href="index.html#contact">Contact us</a></li>
</ul>
</li>
<li><strong>Manual</strong>
<ul>
<li><a href="manual.html#introduction">Introduction</a></li>
<li><a href="manual.html#building">Building</a></li>
<li><a href="manual.html#installation">Installation</a></li>
<li><a href="manual.html#reference">Reference</a></li>
</ul>
</li>
<li><a href="examples.html">Examples</a></li>
<li><a href="https://github.com/keplerproject/luafilesystem">Project</a>
<ul>
<li><a href="https://github.com/keplerproject/luafilesystem/issues">Bug Tracker</a></li>
<li><a href="https://github.com/keplerproject/luafilesystem">Git</a></li>
</ul>
</li>
<li><a href="license.html">License</a></li>
</ul>
</div> <!-- id="navigation" -->
<div id="content">
<h2><a name="introduction"></a>Introduction</h2>
<p>LuaFileSystem is a <a href="http://www.lua.org">Lua</a> library
developed to complement the set of functions related to file
systems offered by the standard Lua distribution.</p>
<p>LuaFileSystem offers a portable way to access
the underlying directory structure and file attributes.</p>
<h2><a name="building"></a>Building</h2>
<p>
LuaFileSystem should be built with Lua 5.1 so the language library
and header files for the target version must be installed properly.
</p>
<p>
LuaFileSystem offers a Makefile and a separate configuration file,
<code>config</code>,
which should be edited to suit your installation before running
<code>make</code>.
The file has some definitions like paths to the external libraries,
compiler options and the like.
</p>
<p>On Windows, the C runtime used to compile LuaFileSystem must be the same
runtime that Lua uses, or some LuaFileSystem functions will not work.</p>
<h2><a name="installation"></a>Installation</h2>
<p>The easiest way to install LuaFileSystem is to use LuaRocks:</p>
<pre class="example">
luarocks install luafilesystem
</pre>
<p>If you prefer to install LuaFileSystem manually, the compiled binary should be copied to a directory in your
<a href="http://www.lua.org/manual/5.1/manual.html#pdf-package.cpath">C path</a>.</p>
<h2><a name="reference"></a>Reference</h2>
<p>
LuaFileSystem offers the following functions:
</p>
<dl class="reference">
<dt><a name="attributes"></a><strong><code>lfs.attributes (filepath [, aname])</code></strong></dt>
<dd>Returns a table with the file attributes corresponding to
<code>filepath</code> (or <code>nil</code> followed by an error message
in case of error).
If the second optional argument is given, then only the value of the
named attribute is returned (this use is equivalent to
<code>lfs.attributes(filepath).aname</code>, but the table is not created
and only one attribute is retrieved from the O.S.).
The attributes are described as follows;
attribute <code>mode</code> is a string, all the others are numbers,
and the time related attributes use the same time reference of
<a href="http://www.lua.org/manual/5.1/manual.html#pdf-os.time"><code>os.time</code></a>:
<dl>
<dt><strong><code>dev</code></strong></dt>
<dd>on Unix systems, this represents the device that the inode resides on. On Windows systems,
represents the drive number of the disk containing the file</dd>
<dt><strong><code>ino</code></strong></dt>
<dd>on Unix systems, this represents the inode number. On Windows systems this has no meaning</dd>
<dt><strong><code>mode</code></strong></dt>
<dd>string representing the associated protection mode (the values could be
<code>file</code>, <code>directory</code>, <code>link</code>, <code>socket</code>,
<code>named pipe</code>, <code>char device</code>, <code>block device</code> or
<code>other</code>)</dd>
<dt><strong><code>nlink</code></strong></dt>
<dd>number of hard links to the file</dd>
<dt><strong><code>uid</code></strong></dt>
<dd>user-id of owner (Unix only, always 0 on Windows)</dd>
<dt><strong><code>gid</code></strong></dt>
<dd>group-id of owner (Unix only, always 0 on Windows)</dd>
<dt><strong><code>rdev</code></strong></dt>
<dd>on Unix systems, represents the device type, for special file inodes.
On Windows systems represents the same as <code>dev</code></dd>
<dt><strong><code>access</code></strong></dt>
<dd>time of last access</dd>
<dt><strong><code>modification</code></strong></dt>
<dd>time of last data modification</dd>
<dt><strong><code>change</code></strong></dt>
<dd>time of last file status change</dd>
<dt><strong><code>size</code></strong></dt>
<dd>file size, in bytes</dd>
<dt><strong><code>blocks</code></strong></dt>
<dd>block allocated for file; (Unix only)</dd>
<dt><strong><code>blksize</code></strong></dt>
<dd>optimal file system I/O blocksize; (Unix only)</dd>
</dl>
This function uses <code>stat</code> internally thus if the given
<code>filepath</code> is a symbolic link, it is followed (if it points to
another link the chain is followed recursively) and the information
is about the file it refers to.
To obtain information about the link itself, see function
<a href="#symlinkattributes">lfs.symlinkattributes</a>.
</dd>
<dt><a name="chdir"></a><strong><code>lfs.chdir (path)</code></strong></dt>
<dd>Changes the current working directory to the given
<code>path</code>.<br />
Returns <code>true</code> in case of success or <code>nil</code> plus an
error string.</dd>
<dt><a name="chdir"></a><strong><code>lfs.lock_dir(path, [seconds_stale])</code></strong></dt>
<dd>Creates a lockfile (called lockfile.lfs) in <code>path</code> if it does not
exist and returns the lock. If the lock already exists checks if
it's stale, using the second parameter (default for the second
parameter is <code>INT_MAX</code>, which in practice means the lock will never
be stale. To free the the lock call <code>lock:free()</code>. <br/>
In case of any errors it returns nil and the error message. In
particular, if the lock exists and is not stale it returns the
"File exists" message.</dd>
<dt><a name="getcwd"></a><strong><code>lfs.currentdir ()</code></strong></dt>
<dd>Returns a string with the current working directory or <code>nil</code>
plus an error string.</dd>
<dt><a name="dir"></a><strong><code>iter, dir_obj = lfs.dir (path)</code></strong></dt>
<dd>
Lua iterator over the entries of a given directory.
Each time the iterator is called with <code>dir_obj</code> it returns a directory entry's name as a string, or
<code>nil</code> if there are no more entries. You can also iterate by calling <code>dir_obj:next()</code>, and
explicitly close the directory before the iteration finished with <code>dir_obj:close()</code>.
Raises an error if <code>path</code> is not a directory.
</dd>
<dt><a name="lock"></a><strong><code>lfs.lock (filehandle, mode[, start[, length]])</code></strong></dt>
<dd>Locks a file or a part of it. This function works on <em>open files</em>; the
file handle should be specified as the first argument.
The string <code>mode</code> could be either
<code>r</code> (for a read/shared lock) or <code>w</code> (for a
write/exclusive lock). The optional arguments <code>start</code>
and <code>length</code> can be used to specify a starting point and
its length; both should be numbers.<br />
Returns <code>true</code> if the operation was successful; in
case of error, it returns <code>nil</code> plus an error string.
</dd>
<dt><a name="link"></a><strong><code>lfs.link (old, new[, symlink])</code></strong></dt>
<dd>Creates a link. The first argument is the object to link to
and the second is the name of the link. If the optional third
argument is true, the link will by a symbolic link (by default, a
hard link is created).
</dd>
<dt><a name="mkdir"></a><strong><code>lfs.mkdir (dirname)</code></strong></dt>
<dd>Creates a new directory. The argument is the name of the new
directory.<br />
Returns <code>true</code> if the operation was successful;
in case of error, it returns <code>nil</code> plus an error string.
</dd>
<dt><a name="rmdir"></a><strong><code>lfs.rmdir (dirname)</code></strong></dt>
<dd>Removes an existing directory. The argument is the name of the directory.<br />
Returns <code>true</code> if the operation was successful;
in case of error, it returns <code>nil</code> plus an error string.</dd>
<dt><a name="setmode"></a><strong><code>lfs.setmode (file, mode)</code></strong></dt>
<dd>Sets the writing mode for a file. The mode string can be either <code>"binary"</code> or <code>"text"</code>.
Returns <code>true</code> followed the previous mode string for the file, or
<code>nil</code> followed by an error string in case of errors.
On non-Windows platforms, where the two modes are identical,
setting the mode has no effect, and the mode is always returned as <code>binary</code>.
</dd>
<dt><a name="symlinkattributes"></a><strong><code>lfs.symlinkattributes (filepath [, aname])</code></strong></dt>
<dd>Identical to <a href="#attributes">lfs.attributes</a> except that
it obtains information about the link itself (not the file it refers to).
On Windows this function does not yet support links, and is identical to
<code>lfs.attributes</code>.
</dd>
<dt><a name="touch"></a><strong><code>lfs.touch (filepath [, atime [, mtime]])</code></strong></dt>
<dd>Set access and modification times of a file. This function is
a bind to <code>utime</code> function. The first argument is the
filename, the second argument (<code>atime</code>) is the access time,
and the third argument (<code>mtime</code>) is the modification time.
Both times are provided in seconds (which should be generated with
Lua standard function <code>os.time</code>).
If the modification time is omitted, the access time provided is used;
if both times are omitted, the current time is used.<br />
Returns <code>true</code> if the operation was successful;
in case of error, it returns <code>nil</code> plus an error string.
</dd>
<dt><a name="unlock"></a><strong><code>lfs.unlock (filehandle[, start[, length]])</code></strong></dt>
<dd>Unlocks a file or a part of it. This function works on
<em>open files</em>; the file handle should be specified as the first
argument. The optional arguments <code>start</code> and
<code>length</code> can be used to specify a starting point and its
length; both should be numbers.<br />
Returns <code>true</code> if the operation was successful;
in case of error, it returns <code>nil</code> plus an error string.
</dd>
</dl>
</div> <!-- id="content" -->
</div> <!-- id="main" -->
<div id="about">
<p><a href="http://validator.w3.org/check?uri=referer">Valid XHTML 1.0!</a></p>
<p><small>$Id: manual.html,v 1.45 2009/06/03 20:53:55 mascarenhas Exp $</small></p>
</div> <!-- id="about" -->
</div> <!-- id="container" -->
</body>
</html>

View File

@ -0,0 +1,29 @@
package = "LuaFileSystem"
version = "1.6.3-2"
source = {
url = "git://github.com/keplerproject/luafilesystem",
tag = "v_1_6_3"
}
description = {
summary = "File System Library for the Lua Programming Language",
detailed = [[
LuaFileSystem is a Lua library developed to complement the set of
functions related to file systems offered by the standard Lua
distribution. LuaFileSystem offers a portable way to access the
underlying directory structure and file attributes.
]],
homepage = "http://keplerproject.github.io/luafilesystem",
license = "MIT/X11"
}
dependencies = {
"lua >= 5.1"
}
build = {
type = "builtin",
modules = {
lfs = "src/lfs.c"
},
copy_directories = {
"doc", "tests"
}
}

View File

@ -0,0 +1,19 @@
rock_manifest = {
doc = {
us = {
["doc.css"] = "d0a913514fb190240b3b4033d105cbc0",
["examples.html"] = "5832f72021728374cf57b621d62ce0ff",
["index.html"] = "96885bdda963939f0a363b5fa6b16b59",
["license.html"] = "e3a756835cb7c8ae277d5e513c8e32ee",
["luafilesystem.png"] = "81e923e976e99f894ea0aa8b52baff29",
["manual.html"] = "d6473799b73ce486c3ea436586cb3b34"
}
},
lib = {
["lfs.dll"] = "c0e2145e1ef2815ae5fae01454291b66"
},
["luafilesystem-1.6.3-2.rockspec"] = "eb0ef7c190516892eb8357af799eea5f",
tests = {
["test.lua"] = "7b4ddb5bdb7e0b1b1ed0150d473535c9"
}
}

View File

@ -0,0 +1,175 @@
#!/usr/bin/env lua5.1
local tmp = "/tmp"
local sep = string.match (package.config, "[^\n]+")
local upper = ".."
local lfs = require"lfs"
print (lfs._VERSION)
io.write(".")
io.flush()
function attrdir (path)
for file in lfs.dir(path) do
if file ~= "." and file ~= ".." then
local f = path..sep..file
print ("\t=> "..f.." <=")
local attr = lfs.attributes (f)
assert (type(attr) == "table")
if attr.mode == "directory" then
attrdir (f)
else
for name, value in pairs(attr) do
print (name, value)
end
end
end
end
end
-- Checking changing directories
local current = assert (lfs.currentdir())
local reldir = string.gsub (current, "^.*%"..sep.."([^"..sep.."])$", "%1")
assert (lfs.chdir (upper), "could not change to upper directory")
assert (lfs.chdir (reldir), "could not change back to current directory")
assert (lfs.currentdir() == current, "error trying to change directories")
assert (lfs.chdir ("this couldn't be an actual directory") == nil, "could change to a non-existent directory")
io.write(".")
io.flush()
-- Changing creating and removing directories
local tmpdir = current..sep.."lfs_tmp_dir"
local tmpfile = tmpdir..sep.."tmp_file"
-- Test for existence of a previous lfs_tmp_dir
-- that may have resulted from an interrupted test execution and remove it
if lfs.chdir (tmpdir) then
assert (lfs.chdir (upper), "could not change to upper directory")
assert (os.remove (tmpfile), "could not remove file from previous test")
assert (lfs.rmdir (tmpdir), "could not remove directory from previous test")
end
io.write(".")
io.flush()
-- tries to create a directory
assert (lfs.mkdir (tmpdir), "could not make a new directory")
local attrib, errmsg = lfs.attributes (tmpdir)
if not attrib then
error ("could not get attributes of file `"..tmpdir.."':\n"..errmsg)
end
local f = io.open(tmpfile, "w")
f:close()
io.write(".")
io.flush()
-- Change access time
local testdate = os.time({ year = 2007, day = 10, month = 2, hour=0})
assert (lfs.touch (tmpfile, testdate))
local new_att = assert (lfs.attributes (tmpfile))
assert (new_att.access == testdate, "could not set access time")
assert (new_att.modification == testdate, "could not set modification time")
io.write(".")
io.flush()
-- Change access and modification time
local testdate1 = os.time({ year = 2007, day = 10, month = 2, hour=0})
local testdate2 = os.time({ year = 2007, day = 11, month = 2, hour=0})
assert (lfs.touch (tmpfile, testdate2, testdate1))
local new_att = assert (lfs.attributes (tmpfile))
assert (new_att.access == testdate2, "could not set access time")
assert (new_att.modification == testdate1, "could not set modification time")
io.write(".")
io.flush()
-- Checking link (does not work on Windows)
if lfs.link (tmpfile, "_a_link_for_test_", true) then
assert (lfs.attributes"_a_link_for_test_".mode == "file")
assert (lfs.symlinkattributes"_a_link_for_test_".mode == "link")
assert (lfs.link (tmpfile, "_a_hard_link_for_test_"))
assert (lfs.attributes (tmpfile, "nlink") == 2)
assert (os.remove"_a_link_for_test_")
assert (os.remove"_a_hard_link_for_test_")
end
io.write(".")
io.flush()
-- Checking text/binary modes (only has an effect in Windows)
local f = io.open(tmpfile, "w")
local result, mode = lfs.setmode(f, "binary")
assert(result) -- on non-Windows platforms, mode is always returned as "binary"
result, mode = lfs.setmode(f, "text")
assert(result and mode == "binary")
f:close()
io.write(".")
io.flush()
-- Restore access time to current value
assert (lfs.touch (tmpfile, attrib.access, attrib.modification))
new_att = assert (lfs.attributes (tmpfile))
assert (new_att.access == attrib.access)
assert (new_att.modification == attrib.modification)
io.write(".")
io.flush()
-- Check consistency of lfs.attributes values
local attr = lfs.attributes (tmpfile)
for key, value in pairs(attr) do
assert (value == lfs.attributes (tmpfile, key),
"lfs.attributes values not consistent")
end
-- Remove new file and directory
assert (os.remove (tmpfile), "could not remove new file")
assert (lfs.rmdir (tmpdir), "could not remove new directory")
assert (lfs.mkdir (tmpdir..sep.."lfs_tmp_dir") == nil, "could create a directory inside a non-existent one")
io.write(".")
io.flush()
-- Trying to get attributes of a non-existent file
assert (lfs.attributes ("this couldn't be an actual file") == nil, "could get attributes of a non-existent file")
assert (type(lfs.attributes (upper)) == "table", "couldn't get attributes of upper directory")
io.write(".")
io.flush()
-- Stressing directory iterator
count = 0
for i = 1, 4000 do
for file in lfs.dir (tmp) do
count = count + 1
end
end
io.write(".")
io.flush()
-- Stressing directory iterator, explicit version
count = 0
for i = 1, 4000 do
local iter, dir = lfs.dir(tmp)
local file = dir:next()
while file do
count = count + 1
file = dir:next()
end
assert(not pcall(dir.next, dir))
end
io.write(".")
io.flush()
-- directory explicit close
local iter, dir = lfs.dir(tmp)
dir:close()
assert(not pcall(dir.next, dir))
print"Ok!"

View File

@ -0,0 +1,653 @@
#!/usr/bin/env lua
---------
-- LuaSrcDiet
--
-- Compresses Lua source code by removing unnecessary characters.
-- For Lua 5.1+ source code.
--
-- **Notes:**
--
-- * Remember to update version and date information below (MSG_TITLE).
-- * TODO: passing data tables around is a horrific mess.
-- * TODO: to implement pcall() to properly handle lexer etc. errors.
-- * TODO: need some automatic testing for a semblance of sanity.
-- * TODO: the plugin module is highly experimental and unstable.
----
local equiv = require "luasrcdiet.equiv"
local fs = require "luasrcdiet.fs"
local llex = require "luasrcdiet.llex"
local lparser = require "luasrcdiet.lparser"
local luasrcdiet = require "luasrcdiet.init"
local optlex = require "luasrcdiet.optlex"
local optparser = require "luasrcdiet.optparser"
local byte = string.byte
local concat = table.concat
local find = string.find
local fmt = string.format
local gmatch = string.gmatch
local match = string.match
local print = print
local rep = string.rep
local sub = string.sub
local plugin
local LUA_VERSION = match(_VERSION, " (5%.[123])$") or "5.1"
-- Is --opt-binequiv available for this Lua version?
local BIN_EQUIV_AVAIL = LUA_VERSION == "5.1" and not package.loaded.jit
---------------------- Messages and textual data ----------------------
local MSG_TITLE = fmt([[
LuaSrcDiet: Puts your Lua 5.1+ source code on a diet
Version %s <%s>
]], luasrcdiet._VERSION, luasrcdiet._HOMEPAGE)
local MSG_USAGE = [[
usage: luasrcdiet [options] [filenames]
example:
>luasrcdiet myscript.lua -o myscript_.lua
options:
-v, --version prints version information
-h, --help prints usage information
-o <file> specify file name to write output
-s <suffix> suffix for output files (default '_')
--keep <msg> keep block comment with <msg> inside
--plugin <module> run <module> in plugin/ directory
- stop handling arguments
(optimization levels)
--none all optimizations off (normalizes EOLs only)
--basic lexer-based optimizations only
--maximum maximize reduction of source
(informational)
--quiet process files quietly
--read-only read file and print token stats only
--dump-lexer dump raw tokens from lexer to stdout
--dump-parser dump variable tracking tables from parser
--details extra info (strings, numbers, locals)
features (to disable, insert 'no' prefix like --noopt-comments):
%s
default settings:
%s]]
-- Optimization options, for ease of switching on and off.
--
-- * Positive to enable optimization, negative (no) to disable.
-- * These options should follow --opt-* and --noopt-* style for now.
local OPTION = [[
--opt-comments,'remove comments and block comments'
--opt-whitespace,'remove whitespace excluding EOLs'
--opt-emptylines,'remove empty lines'
--opt-eols,'all above, plus remove unnecessary EOLs'
--opt-strings,'optimize strings and long strings'
--opt-numbers,'optimize numbers'
--opt-locals,'optimize local variable names'
--opt-entropy,'tries to reduce symbol entropy of locals'
--opt-srcequiv,'insist on source (lexer stream) equivalence'
--opt-binequiv,'insist on binary chunk equivalence (only for PUC Lua 5.1)'
--opt-experimental,'apply experimental optimizations'
]]
-- Preset configuration.
local DEFAULT_CONFIG = [[
--opt-comments --opt-whitespace --opt-emptylines
--opt-numbers --opt-locals
--opt-srcequiv --noopt-binequiv
]]
-- Override configurations: MUST explicitly enable/disable everything.
local BASIC_CONFIG = [[
--opt-comments --opt-whitespace --opt-emptylines
--noopt-eols --noopt-strings --noopt-numbers
--noopt-locals --noopt-entropy
--opt-srcequiv --noopt-binequiv
]]
local MAXIMUM_CONFIG = [[
--opt-comments --opt-whitespace --opt-emptylines
--opt-eols --opt-strings --opt-numbers
--opt-locals --opt-entropy
--opt-srcequiv
]] .. (BIN_EQUIV_AVAIL and ' --opt-binequiv' or ' --noopt-binequiv')
local NONE_CONFIG = [[
--noopt-comments --noopt-whitespace --noopt-emptylines
--noopt-eols --noopt-strings --noopt-numbers
--noopt-locals --noopt-entropy
--opt-srcequiv --noopt-binequiv
]]
local DEFAULT_SUFFIX = "_" -- default suffix for file renaming
local PLUGIN_SUFFIX = "luasrcdiet.plugin." -- relative location of plugins
------------- Startup and initialize option list handling -------------
--- Simple error message handler; change to error if traceback wanted.
--
-- @tparam string msg The message to print.
local function die(msg)
print("LuaSrcDiet (error): "..msg); os.exit(1)
end
--die = error--DEBUG
-- Prepare text for list of optimizations, prepare lookup table.
local MSG_OPTIONS = ""
do
local WIDTH = 24
local o = {}
for op, desc in gmatch(OPTION, "%s*([^,]+),'([^']+)'") do
local msg = " "..op
msg = msg..rep(" ", WIDTH - #msg)..desc.."\n"
MSG_OPTIONS = MSG_OPTIONS..msg
o[op] = true
o["--no"..sub(op, 3)] = true
end
OPTION = o -- replace OPTION with lookup table
end
MSG_USAGE = fmt(MSG_USAGE, MSG_OPTIONS, DEFAULT_CONFIG)
--------- Global variable initialization, option set handling ---------
local suffix = DEFAULT_SUFFIX -- file suffix
local option = {} -- program options
local stat_c, stat_l -- statistics tables
--- Sets option lookup table based on a text list of options.
--
-- Note: additional forced settings for --opt-eols is done in optlex.lua.
--
-- @tparam string CONFIG
local function set_options(CONFIG)
for op in gmatch(CONFIG, "(%-%-%S+)") do
if sub(op, 3, 4) == "no" and -- handle negative options
OPTION["--"..sub(op, 5)] then
option[sub(op, 5)] = false
else
option[sub(op, 3)] = true
end
end
end
-------------------------- Support functions --------------------------
-- List of token types, parser-significant types are up to TTYPE_GRAMMAR
-- while the rest are not used by parsers; arranged for stats display.
local TTYPES = {
"TK_KEYWORD", "TK_NAME", "TK_NUMBER", -- grammar
"TK_STRING", "TK_LSTRING", "TK_OP",
"TK_EOS",
"TK_COMMENT", "TK_LCOMMENT", -- non-grammar
"TK_EOL", "TK_SPACE",
}
local TTYPE_GRAMMAR = 7
local EOLTYPES = { -- EOL names for token dump
["\n"] = "LF", ["\r"] = "CR",
["\n\r"] = "LFCR", ["\r\n"] = "CRLF",
}
--- Reads source code from the file.
--
-- @tparam string fname Path of the file to read.
-- @treturn string Content of the file.
local function load_file(fname)
local data, err = fs.read_file(fname, "rb")
if not data then die(err) end
return data
end
--- Saves source code to the file.
--
-- @tparam string fname Path of the destination file.
-- @tparam string dat The data to write into the file.
local function save_file(fname, dat)
local ok, err = fs.write_file(fname, dat, "wb")
if not ok then die(err) end
end
------------------ Functions to deal with statistics ------------------
--- Initializes the statistics table.
local function stat_init()
stat_c, stat_l = {}, {}
for i = 1, #TTYPES do
local ttype = TTYPES[i]
stat_c[ttype], stat_l[ttype] = 0, 0
end
end
--- Adds a token to the statistics table.
--
-- @tparam string tok The token.
-- @param seminfo
local function stat_add(tok, seminfo)
stat_c[tok] = stat_c[tok] + 1
stat_l[tok] = stat_l[tok] + #seminfo
end
--- Computes totals for the statistics table, returns average table.
--
-- @treturn table
local function stat_calc()
local function avg(c, l) -- safe average function
if c == 0 then return 0 end
return l / c
end
local stat_a = {}
local c, l = 0, 0
for i = 1, TTYPE_GRAMMAR do -- total grammar tokens
local ttype = TTYPES[i]
c = c + stat_c[ttype]; l = l + stat_l[ttype]
end
stat_c.TOTAL_TOK, stat_l.TOTAL_TOK = c, l
stat_a.TOTAL_TOK = avg(c, l)
c, l = 0, 0
for i = 1, #TTYPES do -- total all tokens
local ttype = TTYPES[i]
c = c + stat_c[ttype]; l = l + stat_l[ttype]
stat_a[ttype] = avg(stat_c[ttype], stat_l[ttype])
end
stat_c.TOTAL_ALL, stat_l.TOTAL_ALL = c, l
stat_a.TOTAL_ALL = avg(c, l)
return stat_a
end
----------------------------- Main tasks -----------------------------
--- A simple token dumper, minimal translation of seminfo data.
--
-- @tparam string srcfl Path of the source file.
local function dump_tokens(srcfl)
-- Load file and process source input into tokens.
local z = load_file(srcfl)
local toklist, seminfolist = llex.lex(z)
-- Display output.
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
if tok == "TK_OP" and byte(seminfo) < 32 then
seminfo = "("..byte(seminfo)..")"
elseif tok == "TK_EOL" then
seminfo = EOLTYPES[seminfo]
else
seminfo = "'"..seminfo.."'"
end
print(tok.." "..seminfo)
end--for
end
--- Dumps globalinfo and localinfo tables.
--
-- @tparam string srcfl Path of the source file.
local function dump_parser(srcfl)
-- Load file and process source input into tokens,
local z = load_file(srcfl)
local toklist, seminfolist, toklnlist = llex.lex(z)
-- Do parser optimization here.
local xinfo = lparser.parse(toklist, seminfolist, toklnlist)
local globalinfo, localinfo = xinfo.globalinfo, xinfo.localinfo
-- Display output.
local hl = rep("-", 72)
print("*** Local/Global Variable Tracker Tables ***")
print(hl.."\n GLOBALS\n"..hl)
-- global tables have a list of xref numbers only
for i = 1, #globalinfo do
local obj = globalinfo[i]
local msg = "("..i..") '"..obj.name.."' -> "
local xref = obj.xref
for j = 1, #xref do msg = msg..xref[j].." " end
print(msg)
end
-- Local tables have xref numbers and a few other special
-- numbers that are specially named: decl (declaration xref),
-- act (activation xref), rem (removal xref).
print(hl.."\n LOCALS (decl=declared act=activated rem=removed)\n"..hl)
for i = 1, #localinfo do
local obj = localinfo[i]
local msg = "("..i..") '"..obj.name.."' decl:"..obj.decl..
" act:"..obj.act.." rem:"..obj.rem
if obj.is_special then
msg = msg.." is_special"
end
msg = msg.." -> "
local xref = obj.xref
for j = 1, #xref do msg = msg..xref[j].." " end
print(msg)
end
print(hl.."\n")
end
--- Reads source file(s) and reports some statistics.
--
-- @tparam string srcfl Path of the source file.
local function read_only(srcfl)
-- Load file and process source input into tokens.
local z = load_file(srcfl)
local toklist, seminfolist = llex.lex(z)
print(MSG_TITLE)
print("Statistics for: "..srcfl.."\n")
-- Collect statistics.
stat_init()
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
stat_add(tok, seminfo)
end--for
local stat_a = stat_calc()
-- Display output.
local function figures(tt)
return stat_c[tt], stat_l[tt], stat_a[tt]
end
local tabf1, tabf2 = "%-16s%8s%8s%10s", "%-16s%8d%8d%10.2f"
local hl = rep("-", 42)
print(fmt(tabf1, "Lexical", "Input", "Input", "Input"))
print(fmt(tabf1, "Elements", "Count", "Bytes", "Average"))
print(hl)
for i = 1, #TTYPES do
local ttype = TTYPES[i]
print(fmt(tabf2, ttype, figures(ttype)))
if ttype == "TK_EOS" then print(hl) end
end
print(hl)
print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL")))
print(hl)
print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK")))
print(hl.."\n")
end
--- Processes source file(s), writes output and reports some statistics.
--
-- @tparam string srcfl Path of the source file.
-- @tparam string destfl Path of the destination file where to write optimized source.
local function process_file(srcfl, destfl)
-- handle quiet option
local function print(...) --luacheck: ignore 431
if option.QUIET then return end
_G.print(...)
end
if plugin and plugin.init then -- plugin init
option.EXIT = false
plugin.init(option, srcfl, destfl)
if option.EXIT then return end
end
print(MSG_TITLE) -- title message
-- Load file and process source input into tokens.
local z = load_file(srcfl)
if plugin and plugin.post_load then -- plugin post-load
z = plugin.post_load(z) or z
if option.EXIT then return end
end
local toklist, seminfolist, toklnlist = llex.lex(z)
if plugin and plugin.post_lex then -- plugin post-lex
plugin.post_lex(toklist, seminfolist, toklnlist)
if option.EXIT then return end
end
-- Collect 'before' statistics.
stat_init()
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
stat_add(tok, seminfo)
end--for
local stat1_a = stat_calc()
local stat1_c, stat1_l = stat_c, stat_l
-- Do parser optimization here.
optparser.print = print -- hack
local xinfo = lparser.parse(toklist, seminfolist, toklnlist)
if plugin and plugin.post_parse then -- plugin post-parse
plugin.post_parse(xinfo.globalinfo, xinfo.localinfo)
if option.EXIT then return end
end
optparser.optimize(option, toklist, seminfolist, xinfo)
if plugin and plugin.post_optparse then -- plugin post-optparse
plugin.post_optparse()
if option.EXIT then return end
end
-- Do lexer optimization here, save output file.
local warn = optlex.warn -- use this as a general warning lookup
optlex.print = print -- hack
toklist, seminfolist, toklnlist
= optlex.optimize(option, toklist, seminfolist, toklnlist)
if plugin and plugin.post_optlex then -- plugin post-optlex
plugin.post_optlex(toklist, seminfolist, toklnlist)
if option.EXIT then return end
end
local dat = concat(seminfolist)
-- Depending on options selected, embedded EOLs in long strings and
-- long comments may not have been translated to \n, tack a warning.
if find(dat, "\r\n", 1, 1) or
find(dat, "\n\r", 1, 1) then
warn.MIXEDEOL = true
end
-- Test source and binary chunk equivalence.
equiv.init(option, llex, warn)
equiv.source(z, dat)
if BIN_EQUIV_AVAIL then
equiv.binary(z, dat)
end
local smsg = "before and after lexer streams are NOT equivalent!"
local bmsg = "before and after binary chunks are NOT equivalent!"
-- for reporting, die if option was selected, else just warn
if warn.SRC_EQUIV then
if option["opt-srcequiv"] then die(smsg) end
else
print("*** SRCEQUIV: token streams are sort of equivalent")
if option["opt-locals"] then
print("(but no identifier comparisons since --opt-locals enabled)")
end
print()
end
if warn.BIN_EQUIV then
if option["opt-binequiv"] then die(bmsg) end
elseif BIN_EQUIV_AVAIL then
print("*** BINEQUIV: binary chunks are sort of equivalent")
print()
end
-- Save optimized source stream to output file.
save_file(destfl, dat)
-- Collect 'after' statistics.
stat_init()
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
stat_add(tok, seminfo)
end--for
local stat_a = stat_calc()
-- Display output.
print("Statistics for: "..srcfl.." -> "..destfl.."\n")
local function figures(tt)
return stat1_c[tt], stat1_l[tt], stat1_a[tt],
stat_c[tt], stat_l[tt], stat_a[tt]
end
local tabf1, tabf2 = "%-16s%8s%8s%10s%8s%8s%10s",
"%-16s%8d%8d%10.2f%8d%8d%10.2f"
local hl = rep("-", 68)
print("*** lexer-based optimizations summary ***\n"..hl)
print(fmt(tabf1, "Lexical",
"Input", "Input", "Input",
"Output", "Output", "Output"))
print(fmt(tabf1, "Elements",
"Count", "Bytes", "Average",
"Count", "Bytes", "Average"))
print(hl)
for i = 1, #TTYPES do
local ttype = TTYPES[i]
print(fmt(tabf2, ttype, figures(ttype)))
if ttype == "TK_EOS" then print(hl) end
end
print(hl)
print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL")))
print(hl)
print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK")))
print(hl)
-- Report warning flags from optimizing process.
if warn.LSTRING then
print("* WARNING: "..warn.LSTRING)
elseif warn.MIXEDEOL then
print("* WARNING: ".."output still contains some CRLF or LFCR line endings")
elseif warn.SRC_EQUIV then
print("* WARNING: "..smsg)
elseif warn.BIN_EQUIV then
print("* WARNING: "..bmsg)
end
print()
end
---------------------------- Main functions ---------------------------
local arg = {...} -- program arguments
set_options(DEFAULT_CONFIG) -- set to default options at beginning
--- Does per-file handling, ship off to tasks.
--
-- @tparam {string,...} fspec List of source files.
local function do_files(fspec)
for i = 1, #fspec do
local srcfl = fspec[i]
local destfl
-- Find and replace extension for filenames.
local extb, exte = find(srcfl, "%.[^%.%\\%/]*$")
local basename, extension = srcfl, ""
if extb and extb > 1 then
basename = sub(srcfl, 1, extb - 1)
extension = sub(srcfl, extb, exte)
end
destfl = basename..suffix..extension
if #fspec == 1 and option.OUTPUT_FILE then
destfl = option.OUTPUT_FILE
end
if srcfl == destfl then
die("output filename identical to input filename")
end
-- Perform requested operations.
if option.DUMP_LEXER then
dump_tokens(srcfl)
elseif option.DUMP_PARSER then
dump_parser(srcfl)
elseif option.READ_ONLY then
read_only(srcfl)
else
process_file(srcfl, destfl)
end
end--for
end
--- The main function.
local function main()
local fspec = {}
local argn, i = #arg, 1
if argn == 0 then
option.HELP = true
end
-- Handle arguments.
while i <= argn do
local o, p = arg[i], arg[i + 1]
local dash = match(o, "^%-%-?")
if dash == "-" then -- single-dash options
if o == "-h" then
option.HELP = true; break
elseif o == "-v" then
option.VERSION = true; break
elseif o == "-s" then
if not p then die("-s option needs suffix specification") end
suffix = p
i = i + 1
elseif o == "-o" then
if not p then die("-o option needs a file name") end
option.OUTPUT_FILE = p
i = i + 1
elseif o == "-" then
break -- ignore rest of args
else
die("unrecognized option "..o)
end
elseif dash == "--" then -- double-dash options
if o == "--help" then
option.HELP = true; break
elseif o == "--version" then
option.VERSION = true; break
elseif o == "--keep" then
if not p then die("--keep option needs a string to match for") end
option.KEEP = p
i = i + 1
elseif o == "--plugin" then
if not p then die("--plugin option needs a module name") end
if option.PLUGIN then die("only one plugin can be specified") end
option.PLUGIN = p
plugin = require(PLUGIN_SUFFIX..p)
i = i + 1
elseif o == "--quiet" then
option.QUIET = true
elseif o == "--read-only" then
option.READ_ONLY = true
elseif o == "--basic" then
set_options(BASIC_CONFIG)
elseif o == "--maximum" then
set_options(MAXIMUM_CONFIG)
elseif o == "--none" then
set_options(NONE_CONFIG)
elseif o == "--dump-lexer" then
option.DUMP_LEXER = true
elseif o == "--dump-parser" then
option.DUMP_PARSER = true
elseif o == "--details" then
option.DETAILS = true
elseif OPTION[o] then -- lookup optimization options
set_options(o)
else
die("unrecognized option "..o)
end
else
fspec[#fspec + 1] = o -- potential filename
end
i = i + 1
end--while
if option.HELP then
print(MSG_TITLE..MSG_USAGE); return true
elseif option.VERSION then
print(MSG_TITLE); return true
end
if option["opt-binequiv"] and not BIN_EQUIV_AVAIL then
die("--opt-binequiv is available only for PUC Lua 5.1!")
end
if #fspec > 0 then
if #fspec > 1 and option.OUTPUT_FILE then
die("with -o, only one source file can be specified")
end
do_files(fspec)
return true
else
die("nothing to do!")
end
end
-- entry point -> main() -> do_files()
if not main() then
die("Please run with option -h or --help for usage information")
end

View File

@ -0,0 +1,300 @@
= Features and Usage
Kein-Hong Man
2011-09-13
== Features
LuaSrcDiet features include the following:
* Predefined default, _--basic_ (token-only) and _--maximum_ settings.
* Avoid deleting a block comment with a certain message with _--keep_; this is for copyright or license texts.
* Special handling for `#!` (shbang) lines and in functions, `self` implicit parameters.
* Dumping of raw information using _--dump-lexer_ and _--dump-parser_.
See the `samples` directory.
* A HTML plugin: outputs files that highlights globals and locals, useful for eliminating globals. See the `samples` directory.
* An SLOC plugin: counts significant lines of Lua code, like SLOCCount.
* Source and binary equivalence testing with _--opt-srcequiv_ and _--opt-binequiv_.
List of optimizations:
* Line endings are always normalized to LF, except those embedded in comments or strings.
* _--opt-comments_: Removal of comments and comment blocks.
* _--opt-whitespace_: Removal of whitespace, excluding end-of-line characters.
* _--opt-emptylines_: Removal of empty lines.
* _--opt-eols_: Removal of unnecessary end-of-line characters.
* _--opt-strings_: Rewrite strings and long strings. See the `samples` directory.
* _--opt-numbers_: Rewrite numbers. See the `samples` directory.
* _--opt-locals_: Rename local variable names. Does not rename field or method names.
* _--opt-entropy_: Tries to improve symbol entropy when renaming locals by calculating actual letter frequencies.
* _--opt-experimental_: Apply experimental optimizations.
LuaSrcDiet tries to allow each option to be enabled or disabled separately, but they are not completely orthogonal.
If comment removal is disabled, LuaSrcDiet only removes trailing whitespace.
Trailing whitespace is not removed in long strings, a warning is generated instead.
If empty line removal is disabled, LuaSrcDiet keeps all significant code on the same lines.
Thus, a user is able to debug using the original sources as a reference since the line numbering is unchanged.
String optimization deals mainly with optimizing escape sequences, but delimiters can be switched between single quotes and double quotes if the source size of the string can be reduced.
For long strings and long comments, LuaSrcDiet also tries to reduce the `=` separators in the
delimiters if possible.
For number optimization, LuaSrcDiet saves space by trying to generate the shortest possible sequence, and in the process it does not produce “proper” scientific notation (e.g. 1.23e5) but does away with the decimal point (e.g. 123e3) instead.
The local variable name optimizer uses a full parser of Lua 5.1 source code, thus it can rename all local variables, including upvalues and function parameters.
It should handle the implicit `self` parameter gracefully.
In addition, local variable names are either renamed into the shortest possible names following English frequent letter usage or are arranged by calculating entropy with the _--opt-entropy_ option.
Variable names are reused whenever possible, reducing the number of unique variable names.
For example, for `LuaSrcDiet.lua` (version 0.11.0), 683 local identifiers representing 88 unique names were optimized into 32 unique names, all which are one character in length, saving over 2600 bytes.
If you need some kind of reassurance that your app will still work at reduced size, see the section on verification below.
== Usage
LuaSrcDiet needs a Lua 5.1.x (preferably Lua 5.1.4) binary to run.
On Unix machines, one can use the following command line:
[source, sh]
LuaSrcDiet myscript.lua -o myscript_.lua
On Windows machines, the above command line can be used on Cygwin, or you can run Lua with the LuaSrcDiet script like this:
[source, sh]
lua LuaSrcDiet.lua myscript.lua -o myscript_.lua
When run without arguments, LuaSrcDiet prints a list of options.
Also, you can check the `Makefile` for some examples of command lines to use.
For example, for maximum code size reduction and maximum verbosity, use:
[source, sh]
LuaSrcDiet --maximum --details myscript.lua -o myscript_.lua
=== Output Example
A sample output of LuaSrcDiet 0.11.0 for processing `llex.lua` at _--maximum_ settings is as follows:
----
Statistics for: LuaSrcDiet.lua -> sample/LuaSrcDiet.lua
*** local variable optimization summary ***
----------------------------------------------------------
Variable Unique Decl. Token Size Average
Types Names Count Count Bytes Bytes
----------------------------------------------------------
Global 10 0 19 95 5.00
----------------------------------------------------------
Local (in) 88 153 683 3340 4.89
TOTAL (in) 98 153 702 3435 4.89
----------------------------------------------------------
Local (out) 32 153 683 683 1.00
TOTAL (out) 42 153 702 778 1.11
----------------------------------------------------------
*** lexer-based optimizations summary ***
--------------------------------------------------------------------
Lexical Input Input Input Output Output Output
Elements Count Bytes Average Count Bytes Average
--------------------------------------------------------------------
TK_KEYWORD 374 1531 4.09 374 1531 4.09
TK_NAME 795 3963 4.98 795 1306 1.64
TK_NUMBER 54 59 1.09 54 59 1.09
TK_STRING 152 1725 11.35 152 1717 11.30
TK_LSTRING 7 1976 282.29 7 1976 282.29
TK_OP 997 1092 1.10 997 1092 1.10
TK_EOS 1 0 0.00 1 0 0.00
--------------------------------------------------------------------
TK_COMMENT 140 6884 49.17 1 18 18.00
TK_LCOMMENT 7 1723 246.14 0 0 0.00
TK_EOL 543 543 1.00 197 197 1.00
TK_SPACE 1270 2465 1.94 263 263 1.00
--------------------------------------------------------------------
Total Elements 4340 21961 5.06 2841 8159 2.87
--------------------------------------------------------------------
Total Tokens 2380 10346 4.35 2380 7681 3.23
--------------------------------------------------------------------
----
Overall, the file size is reduced by more than 9 kiB.
Tokens in the above report can be classified into “real” or actual tokens, and “fake” or whitespace tokens.
The number of “real” tokens remained the same.
Short comments and long comments were completely eliminated.
The number of line endings was reduced by 59, while all but 152 whitespace characters were optimized away.
So, token separators (whitespace, including line endings) now takes up just 10 % of the total file size.
No optimization of number tokens was possible, while 2 bytes were saved for string tokens.
For local variable name optimization, the report shows that 38 unique local variable names were reduced to 20 unique names.
The number of identifier tokens should stay the same (there is currently no optimization option to optimize away non-essential or unused “real” tokens).
Since there can be at most 53 single-character identifiers, all local variables are now one character in length.
Over 600 bytes was saved.
_--details_ will give a longer report and much more information.
A sample output of LuaSrcDiet 0.12.0 for processing the one-file `LuaSrcDiet.lua` program itself at _--maximum_ and _--opt-experimental_ settings is as follows:
----
*** local variable optimization summary ***
----------------------------------------------------------
Variable Unique Decl. Token Size Average
Types Names Count Count Bytes Bytes
----------------------------------------------------------
Global 27 0 51 280 5.49
----------------------------------------------------------
Local (in) 482 1063 4889 21466 4.39
TOTAL (in) 509 1063 4940 21746 4.40
----------------------------------------------------------
Local (out) 55 1063 4889 4897 1.00
TOTAL (out) 82 1063 4940 5177 1.05
----------------------------------------------------------
*** BINEQUIV: binary chunks are sort of equivalent
Statistics for: LuaSrcDiet.lua -> app_experimental.lua
*** lexer-based optimizations summary ***
--------------------------------------------------------------------
Lexical Input Input Input Output Output Output
Elements Count Bytes Average Count Bytes Average
--------------------------------------------------------------------
TK_KEYWORD 3083 12247 3.97 3083 12247 3.97
TK_NAME 5401 24121 4.47 5401 7552 1.40
TK_NUMBER 467 494 1.06 467 494 1.06
TK_STRING 787 7983 10.14 787 7974 10.13
TK_LSTRING 14 3453 246.64 14 3453 246.64
TK_OP 6381 6861 1.08 6171 6651 1.08
TK_EOS 1 0 0.00 1 0 0.00
--------------------------------------------------------------------
TK_COMMENT 1611 72339 44.90 1 18 18.00
TK_LCOMMENT 18 4404 244.67 0 0 0.00
TK_EOL 4419 4419 1.00 1778 1778 1.00
TK_SPACE 10439 24475 2.34 2081 2081 1.00
--------------------------------------------------------------------
Total Elements 32621 160796 4.93 19784 42248 2.14
--------------------------------------------------------------------
Total Tokens 16134 55159 3.42 15924 38371 2.41
--------------------------------------------------------------------
* WARNING: before and after lexer streams are NOT equivalent!
----
The command line was:
[source, sh]
lua LuaSrcDiet.lua LuaSrcDiet.lua -o app_experimental.lua --maximum --opt-experimental --noopt-srcequiv
The important thing to note is that while the binary chunks are equivalent, the source lexer streams are not equivalent.
Hence, the _--noopt-srcequiv_ makes LuaSrcDiet report a warning for failing the source equivalence test.
`LuaSrcDiet.lua` was reduced from 157 kiB to about 41.3 kiB.
The _--opt-experimental_ option saves an extra 205 bytes over standard _--maximum_.
Note the reduction in `TK_OP` count due to a reduction in semicolons and parentheses.
`TK_SPACE` has actually increased a bit due to semicolons that are changed into single spaces; some of these spaces could not be removed.
For more performance numbers, see the <<performance-stats#, Performance Statistics>> page.
== Verification
Code size reduction can be quite a hairy thing (even I peer at the results in suspicion), so some kind of verification is desirable for users who expect processed files to _not_ blow up.
Since LuaSrcDiet has been talked about as a tool to reduce code size in projects such as WoW add-ons, `eLua` and `nspire`, adding a verification step will reduce risk for all users of LuaSrcDiet.
LuaSrcDiet performs two kinds of equivalence testing as of version 0.12.0.
The two tests can be very, very loosely termed as _source equivalence testing_ and _binary equivalence testing_.
They are controlled by the _--opt-srcequiv_ and _--opt-binequiv_ options and are enabled by default.
Testing behaviour can be summarized as follows:
* Both tests are always executed.
The options control the resulting actions taken.
* Both options are normally enabled.
This will make any failing test to throw an error.
* When an option is disabled, LuaSrcDiet will at most print a warning.
* For passing results, see the following subsections that describe what the tests actually does.
You only need to disable a testing option for experimental optimizations (see the following section for more information on this).
For anything up to and including _--maximum_, both tests should pass.
If any test fail under these conditions, then something has gone wrong with LuaSrcDiet, and I would be interested to know what has blown up.
=== _--opt-srcequiv_ Source Equivalence
The source equivalence test uses LuaSrcDiets lexer to read and compare the _before_ and _after_ lexer token streams.
Numbers and strings are dumped as binary chunks using `loadstring()` and `string.dump()` and the results compared.
If your file passes this test, it means that a Lua 5.1.x binary should see the exact same token streams for both _before_ and _after_ files.
That is, the parser in Lua will see the same lexer sequence coming from the source for both files and thus they _should_ be equivalent.
Touch wood.
Heh.
However, if you are _cross-compiling_, it may be possible for this test to fail.
Experienced Lua developers can modify `equiv.lua` to handle such cases.
=== _--opt-binequiv_ Binary Equivalence
The binary equivalence test uses `loadstring()` and `string.dump()` to generate binary chunks of the entire _before_ and _after_ files.
Also, any shbang (`#!`) lines are removed prior to generation of the binary chunks.
The binary chunks are then run through a fake `undump` routine to verify the integrity of the binary chunks and to compare all parts that ought to be identical.
On a per-function prototype basis (where _ignored_ means that any difference between the two binary chunks is ignored):
* All debug information is ignored.
* The source name is ignored.
* Any line number data is ignored.
For example, `linedefined` and `lastlinedefined`.
The rest of the two binary chunks must be identical.
So, while the two are not binary-exact, they can be loosely termed as “equivalent” and should run in exactly the same manner.
Sort of.
You get the idea.
This test may also cause problems if you are _cross-compiling_.
== Experimental Stuff
The _--opt-experimental_ option applies experimental optimizations that generally, makes changes to “real” tokens.
Such changes may or may not lead to the result failing binary chunk equivalence testing.
They would likely fail source lexer stream equivalence testing, so the _--noopt-srcequiv_ option needs to be applied so that LuaSrcDiet just gives a warning instead of an error.
For sample files, see the `samples` directory.
Currently implemented experimental optimizations are as follows:
=== Semicolon Operator Removal
The semicolon (`;`) operator is an optional operator that is used to separate statements.
The optimization turns all of these operators into single spaces, which are then run through whitespace removal.
At worst, there will be no change to file size.
* _Fails_ source lexer stream equivalence.
* _Passes_ binary chunk equivalence.
=== Function Call Syntax Sugar Optimization
This optimization turns function calls that takes a single string or long string parameter into its syntax-sugar representation, which leaves out the parentheses.
Since strings can abut anything, each instance saves 2 bytes.
For example, the following:
[source, lua]
fish("cow")fish('cow')fish([[cow]])
is turned into:
[source, lua]
fish"cow"fish'cow'fish[[cow]]
* _Fails_ source lexer stream equivalence.
* _Passes_ binary chunk equivalence.
=== Other Experimental Optimizations
There are two more of these optimizations planned, before focus is turned to the Lua 5.2.x series:
* Simple `local` keyword removal.
Planned to work for a few kinds of patterns only.
* User directed name replacement, which will need user input to modify names or identifiers used in table keys and function methods or fields.

View File

@ -0,0 +1,128 @@
= Performance Statistics
Kein-Hong Man
2011-09-13
== Size Comparisons
The following is the result of processing `llex.lua` from LuaSrcDiet 0.11.0 using various optimization options:
|===
| LuaSrcDiet Option | Size (bytes)
| Original | 12,421
| Empty lines only | 12,395
| Whitespace only | 9,372
| Local rename only | 11,794
| _--basic_ setting | 3,835
| Program default | 3,208
| _--maximum_ setting | 3,130
|===
The programs default settings does not remove all unnecessary EOLs.
The _--basic_ setting is more conservative than the default settings, it disables optimization of strings and numbers and renaming of locals.
For version 0.12.0, the following is the result of processing `LuaSrcDiet.lua` using various optimization options:
|===
| LuaSrcDiet Option | Size (bytes)
| Original | 160,796
| _--basic_ setting | 60,219
| Program default | 43,650
| _--maximum_ setting | 42,453
| max + experimental | 42,248
|===
The above best size can go a lot lower with simple `local` keyword removal and user directed name replacement, which will be the subject of the next release of LuaSrcDiet.
== Compression and luac
File sizes of LuaSrcDiet 0.11.0 main files in various forms:
[cols="m,5*d", options="header,footer"]
|===
| Source File | Original Size (bytes) | `luac` normal (bytes) | `luac` stripped (bytes) | LuaSrcDiet _--basic_ (bytes) | LuaSrcDiet _--maximum_ (bytes)
| LuaSrcDiet.lua | 21,961 | 20,952 | 11,000 | 11,005 | 8,159
| llex.lua | 12,421 | 8,613 | 4,247 | 3,835 | 3,130
| lparser.lua | 41,757 | 27,215 | 12,506 | 11,755 | 7,666
| optlex.lua | 31,009 | 16,992 | 8,021 | 9,129 | 6,858
| optparser.lua | 16,511 | 9,021 | 3,520 | 5,087 | 2,999
| Total | 123,659 | 82,793 | 39,294 | 40,811 | 28,812
|===
* “LuaSrcDiet --maximum” has the smallest total file size.
* The ratio of “Original Size” to “LuaSrcDiet --maximum” is *4.3*.
* The ratio of “Original Size” to “luac stripped” is *3.1*.
* The ratio of “luac stripped” to “LuaSrcDiet --maximum” is *1.4*.
Compressibility of LuaSrcDiet 0.11.0 main files in various forms:
|===
| Compression Method | Original Size | `luac` normal | `luac` stripped | LuaSrcDiet _--basic_ | LuaSrcDiet _--maximum_
| Uncompressed originals | 123,659 | 82,793 | 39,294 | 40,811 | 28,812
| gzip -9 | 28,288 | 29,210 | 17,732 | 12,041 | 10,451
| bzip2 -9 | 24,407 | 27,232 | 16,856 | 11,480 | 9,815
| lzma (7-zip max) | 25,530 | 23,908 | 15,741 | 11,241 | 9,685
|===
* “LuaSrcDiet --maximum” has the smallest total file size (but a binary chunk loads faster and works with a smaller Lua executable).
* The ratio of “Original size” to “Original size + bzip2” is *5.1*.
* The ratio of “Original size” to “LuaSrcDiet --maximum + bzip2” is *12.6*.
* The ratio of “LuaSrcDiet --maximum” to “LuaSrcDiet --maximum + bzip2” is *2.9*.
* The ratio of “Original size” to “luac stripped + bzip2” is *7.3*.
* The ratio of “luac stripped” to “luac stripped + bzip2” is *2.3*.
* The ratio of “luac stripped + bzip2” to “LuaSrcDiet --maximum + bzip2” is *1.7*.
So, squeezed source code are smaller than stripped binary chunks and compresses better than stripped binary chunks, at a ratio of 2.9 for squeezed source code versus 2.3 for stripped binary chunks.
Compressed binary chunks is still a very efficient way of storing Lua scripts, because using only binary chunks allow for the parts of Lua needed to compile from sources to be omitted (`llex.o`, `lparser.o`, `lcode.o`, `ldump.o`), saving over 24KB in the process.
Note that LuaSrcDiet _does not_ answer the question of whether embedding source code is better or embedding binary chunks is better.
It is simply a utility for producing smaller source code files and an exercise in processing Lua source code using a Lua-based lexer and parser skeleton.
== Compile Speed
The following is a primitive attempt to analyze in-memory Lua script loading performance (using the `loadstring` function in Lua).
The LuaSrcDiet 0.11.0 files (original, squeezed with _--maximum_ and stripped binary chunks versions) are loaded into memory first before a loop runs to repeatedly load the script files for 10 seconds.
A null loop is also performed (processing empty strings) and the time taken per null iteration is subtracted as a form of null adjustment.
Then, various performance parameters are calculated.
Note that `LuaSrcDiet.lua` was slightly modified (`#!` line removed) to let the `loadstring` function run.
The results below were obtained with a Lua 5.1.3 executable compiled using `make generic` on Cygwin/Windows XP SP2 on a Sempron 3000+ (1.8GHz).
The LuaSrcDiet 0.11.0 source files have 11,180 “real” tokens in total.
[cols="<h,4*d", options="header"]
|===
| | Null loop | Stripped binary chunk | Original Sources | Squeezed Sources
| Total Size (bytes) | 0 | 39,294 | 123,640 | 28,793
| Iterations | 312,155 | 9,680 | 1306 | 1,592
| Duration (sec) | 10 | 10 | 10 | 10
| Time/iteration (msec) | 0.032 | 1.033 | 7.657 | 6.281
| _Time/iteration, null adjusted (msec)_ | | 1.001 | 7.625 | 6.249
| _Load rate (MiB/sec)_ | | 37.44 | 15.46 | 4.39
| Load time per byte (ns) | | 25.5 | 61.7 | 217.0
| Load time per token (ns) | | | 682 | 559
| Source time vs binary chunk time ratio | | 1.00 | 7.62 | 6.24
| Binary chunk rate vs. source rate ratio | | 1.00 | 2.42 | 8.53
|===
The above shows that stripped binary chunks is still, in many ways, the highest-performance form of fixed Lua scripts.
On a very average machine, scripts load at over 37 MiB/sec (in memory).
This is very comparable to the burst speeds of common desktop hard disks of 2008.
If instant response is paramount, stripped binary chunks has little competition.
By contrast, source code that is squeezed to the maximum using LuaSrcDiet can only muster an in-memory load rate of 4.4 MiB/sec.
The original sources load at about 15.5 MiB/sec, but most of the speed is from the lexer scanning over comments and whitespace.
A quick calculation indicates that the speed of the lexer over comments and whitespace can be as much as 65 MiB/sec, but note that the speed is all for naught.
What really matters are the real tokens, and the squeezed source code manages to load faster than the original sources by 18 %.
So, the loading of stripped binary chunks is faster than squeezed source code by a bit over 6×.
The 4.4 MiB/sec speed for squeezed source code is still quite respectable.
When an application considers the time taken to load data from the disk and perhaps the time taken to decompress, loading source code may be perfectly fine in terms of performance.
For programs that already embed source code, using LuaSrcDiet to squeeze the source code probably speeds loading up by a tiny bit in addition to making programs smaller.

View File

@ -0,0 +1,386 @@
= Technical Notes
Kein-Hong Man
2011-09-13
== Lexer Notes
The lexer (`llex.lua`) is a version of the native 5.1.x lexer from Yueliang 0.4.0, with significant modifications.
It does have several limitations:
* The decimal point must be `.` (period).
There is no localized decimal point replacement magic.
* There is no support for nested `[[`...`]]` long strings (no `LUA_COMPAT_LSTR`).
* The lexer may not properly lex source code with characters beyond the normal ASCII character set.
Identifiers with accented characters (or any character beyond a byte value of 127) cannot be recognized.
Instead of returning one token on each call, `llex.lua` processes an entire string (all data from an entire file) and returns.
Two lists (tokens and semantic information items) are set up in the module for use by the caller.
For maximum flexibility during processing, the lexer returns non-grammar lexical elements as tokens too.
Non-grammar elements, such as comments, whitespace, line endings, are classified along with “normal” tokens.
The lexer classifies 7 kinds of grammar tokens and 4 kinds of non-grammar tokens, as follows:
[cols="m,d"]
|===
| Grammar Token | Description
| TK_KEYWORD | keywords
| TK_NAME | identifiers
| TK_NUMBER | numbers (unconverted, kept in original form)
| TK_STRING | strings (no translation is done, includes delimiters)
| TK_LSTRING | long strings (no translation is done, includes delimiters)
| TK_OP | operators and punctuation (most single-char, some double)
| TK_EOS | end-of-stream (there is only one for each file/stream)
|===
[cols="m,d"]
|===
| Whitespace Token | Description
| TK_SPACE | whitespace (generally, spaces, \t, \v and \f)
| TK_COMMENT | comments (includes delimiters, also includes special first line shbang, which is handled specially in the optimizer)
| TK_LCOMMENT | block comments (includes delimiters)
| TK_EOL | end-of-lines (excludes those embedded in strings)
|===
A list of tokens can be generated by using the _--dump-lexer_ option, like this:
[source, sh]
lua LuaSrcDiet.lua --dump-lexer llex.lua > dump_llex.dat
== Lexer Optimizations
We aim to keep lexer-based optimizations free of parser considerations, i.e. we allow for generalized optimization of token sequences.
The table below considers the requirements for all combinations of significant tokens (except `TK_EOS`).
Other tokens are whitespace-like.
Comments can be considered to be a special kind of whitespace, e.g. a short comment needs to have a following EOL token, if we do not want to optimize away short comments.
[cols="h,6*m", options="header"]
|===
| _1st  2nd Token_ | Keyword | Name | Number | String | LString | Oper
| Keyword | [S] | [S] | [S] | | |
| Name | [S] | [S] | [S] | | |
| Number | [S] | [S] | [S] | | | [1]
| String | | | | | |
| LString | | | | | |
| Oper | | | [1] | | | [2]
|===
A dash (`-`) in the above means that the first token can abut the second token.
`*[S]*`:: Need at least one whitespace, set as either a space or kept as an EOL.
`*[1]*`::
Need a space if operator is a `.`, all others okay.
A `+` or `-` is used as part of a floating-point spec, but there does not appear to be any way of creating a float by joining with number with a `+` or `-` plus another number.
Since an `e` has to be somewhere in the first token, this cant be done.
`*[2]*`::
Normally there cannot be consecutive operators, but we plan to allow for generalized optimization of token sequences, i.e. even sequences that are grammatically illegal; so disallow adjacent operators if:
* the first is in `[=<>]` and the second is `=`
* disallow dot sequences to be adjacent, but `...` first okay
* disallow `[` followed by `=` or `[` (not optimal)
Also, a minus `-` cannot preceed a Comment or LComment, because comments start with a `--` prefix.
Apart from that, all Comment or LComment tokens can be set abut with a real token.
== Local Variable Renaming
The following discusses the problem of local variable optimization, specifically _local variable renaming_ in order to reduce source code size.
=== TK_NAME Token Considerations
A `TK_NAME` token means a number of things, and some of these cannot be renamed without analyzing the source code.
We are interested in the use of `TK_NAME` in the following:
[loweralpha]
. global variable access,
. local variable declaration, including `local` statements, `local` functions, function parameters, implicit `self` locals,
. local variable access, including upvalue access.
`TK_NAME` is also used in parts of the grammar as constant strings these tokens cannot be optimized without user assistance.
These include usage as:
[loweralpha, start=4]
. keys in `key=value` pairs in table construction,
. field or method names in `a:b` or `a.b` syntax forms.
For the local variable name optimization scheme used, we do not consider (d) and (e), and while global variables cannot be renamed without some kind of user assistance, they need to be considered or tracked as part of Luas variable access scheme.
=== Lifetime of a Local Variable
Consider the following example:
[source, lua]
local string, table = string, table
In the example, the two locals are assigned the values of the globals with the same names.
When Lua encounters the declaration portion:
[source, lua]
local string, table
the parser cannot immediately make the two local variable available to following code.
In the parser and code generator, locals are inactive when entries are created.
They are activated only when the function `adjustlocalvars()` is called to activate the appropriate local variables.
NOTE: The terminology used here may not be identical to the ones used in the Dragon Book they merely follow the LuaSrcDiet code as it was written before I have read the Dragon Book.
In the example, the two local variables are activated only after the whole statement has been parsed, that is, after the last `table` token.
Hence, the statement works as expected.
Also, once the two local variables goes out of scope, `removevars()` is called to deactivate them, allowing other variables of the same name to become visible again.
Another example worth mentioning is:
[source, lua]
local a, a, a, = 1, 2, 3
The above will assign 3 to `a`.
Thus, when optimizing local variable names, (1) we need to consider accesses of global variable names affecting the namespace, (2) for the local variable names themselves, we need to consider when they are declared, activated and removed, and (3) within the “live” time of locals, we need to know when they are accessed (since locals that are never accessed dont really matter.)
=== Local Variable Tracking
Every local variable declaration is considered an object to be renamed.
From the parser, we have the original name of the local variable, the token positions for declaration, activation and removal, and the token position for all the `TK_NAME` tokens which references this local.
All instances of the implicit `self` local variable are also flagged as such.
In addition to local variable information, all global variable accesses are tabled, one object entry for one name, and each object has a corresponding list of token positions for the `TK_NAME` tokens, which is where the global variables were accessed.
The key criteria is: *Our act of renaming cannot change the visibility of any of these locals and globals at the time they are accessed*.
However, _their scope of visibility may be changed during which they are not accessed_, so someone who tries to insert a variable reference somewhere into a program that has its locals renamed may find that it now refers to a different variable.
Of course, if every variable has a unique name, then there is no need for a name allocation algorithm, as there will be no conflict.
But, in order to maximize utilization of short identifier names to reduce the final code size, we want to reuse the names as much as possible.
In addition, fewer names will likely reduce symbol entropy and may slightly improve compressibility of the source code.
LuaSrcDiet avoids the use of non-ASCII letters, so there are only 53 single-character variable names.
=== Name Allocation Theory
To understand the renaming algorithm, first we need to establish how different local and global variables can operate happily without interfering with each other.
Consider three objects, local object A, local object B and global object G.
A and B involve declaration, activation and removal, and within the period it is active, there may be zero or more accesses of the local.
For G, there are only global variable accesses to look into.
Assume that we have assigned a new name to A and we wish to consider its effects on other locals and globals, for which we choose B and G as examples.
We assume local B has not been assigned a new name as we expect our algorithm to take care of collisions.
As lifetime is something like this:
----
Decl Act Rem
+ +-------------------------------+
-------------------------------------------------
----
where “Decl” is the time of declaration, “Act” is the time of activation, and “Rem” is the time of removal.
Between “Act” and “Rem”, the local is alive or “live” and Lua can see it if its corresponding `TK_NAME` identifier comes up.
----
Decl Act Rem
+ +-------------------------------+
-------------------------------------------------
* * * *
(1) (2) (3) (4)
----
Recall that the key criteria is to not change the visibility of globals and locals during when they are accessed.
Consider local and global accesses at (1), (2), (3) and (4).
A global G of the same name as A will only collide at (3), where Lua will see A and not G.
Since G must be accessed at (3) according to what the parser says, and we cannot modify the positions of “Decl”, “Act” and “Rem”, it follows that A cannot have the same name as G.
----
Decl Act Rem
+ +-----------------------+
---------------------------------
(1)+ +---+ (2)+ +---+ (3)+ +---+ (4)+ +---+
--------- --------- --------- ---------
----
For the case of A and B having the same names and colliding, consider the cases for which B is at (1), (2), (3) or (4) in the above.
(1) and (4) means that A and B are completely isolated from each other, hence in the two cases, A and B can safely use the same variable names.
To be specific, since we have assigned A, B is considered completely isolated from A if Bs activation-to-removal period is isolated from the time of As first access to last access, meaning Bs active time will never affect any of As accesses.
For (2) and (3), we have two cases where we need to consider which one has been activated first.
For (2), B is active before A, so A cannot impose on B.
But As accesses are valid while B is active, since A can override B.
For no collision in the case of (2), we simply need to ensure that the last access of B occurs before A is activated.
For (3), B is activated before A, hence B can override As accesses.
For no collision, all of As accesses cannot happen while B is active.
Thus position (3) follows the “A is never accessed when B is active” rule in a general way.
Local variables of a child function are in the position of (3).
To illustrate, the local B can use the same name as local A and live in a child function or block scope if each time A is accessed, Lua sees A and not B.
So we have to check all accesses of A and see whether they collide with the active period of B.
If A is not accessed during that period, then B can be active with the same name.
The above appears to resolve all sorts of cases where the active times of A and B overlap.
Note that in the above, the allocator does not need to know how locals are separated according to function prototypes.
Perhaps the allocator can be simplified if knowledge of function structure is utilized.
This scheme was implemented in a hurry in 2008 — it could probably be simpler if Lua grammar is considered, but LuaSrcDiet mainly processes various index values in tables.
=== Name Allocation Algorithm
To begin with, the name generator is mostly separate from the name allocation algorithm.
The name generator returns the next shortest name for the algorithm to apply to local variables.
To attempt to reduce symbol entropy (which benefit compression algorithms), the name generator follows English frequent letter usage.
There is also an option to calculate an actual symbol entropy table from the input data.
Since there are 53 one-character identifiers and (53 * 63 - 4) two-character identifiers (minus a few keywords), there isnt a pressing need to optimally maximize name reuse.
The single-file version of LuaSrcDiet 0.12.0, at just over 3000 SLOC and 156 kiB in size, currently allocates around 55 unique local variable names.
In theory, we should need no more than 260 local identifiers by default.
Why?
Since `LUAI_MAXVARS` is 200 and `LUAI_MAXUPVALUES` is 60, at any block scope, there can be at most `(LUAI_MAXVARS + LUAI_MAXUPVALUES)` locals referenced, or 260.
Also, those from outer scopes not referenced in inner scopes can reuse identifiers.
The net effect of this is that a local variable name allocation method should not allocate more than 260 identifier names for locals.
The current algorithm is a simple first-come first-served scheme:
[loweralpha]
. One local object that use the most tokens is named first.
. Any other non-conflicting locals with respect to the first object are assigned the same name.
. Assigned locals are removed from consideration and the procedure is repeated for objects that have not been assigned new names.
. Steps (a) to (c) repeats until no local objects are left.
In addition, there are a few extra issues to take care of:
[loweralpha, start=5]
. Implicit `self` locals that have been flagged as such are already “assigned to” and so they are left unmodified.
. The name generator skips `self` to avoid conflicts.
This is not optimal but it is unlikely a script will use so many local variables as to reach `self`.
. Keywords are also skipped for the name generator.
. Global name conflict resolution.
For (h), global name conflict resolution is handled just after the new name is generated.
The name can still be used for some locals even if it conflicts with other locals.
To remove conflicts, global variable accesses for the particular identifier name is checked.
Any local variables that are active when a global access is made is marked to be skipped.
The rest of the local objects can then use that name.
The algorithm has additional code for handling locals that use the same name in the same scope.
This extends the basic algorithm that was discussed earlier.
For example:
[source, lua]
----
local foo = 10 -- <1>
...
local foo = 20 -- <2>
...
print(e)
----
Since we are considering name visibility, the first `foo` does not really cease to exist when the second `foo` is declared, because if we were to make that assumption, and the first `foo` is removed before (2), then I should be able to use `e` as the name for the first `foo` and after (2), it should not conflict with variables in the outer scope with the same name.
To illustrate:
[source, lua]
----
local e = 10 -- 'foo' renamed to 'e'
...
local t = 20 -- error if we assumed 'e' removed here
...
print(e)
----
Since `e` is a global in the example, we now have an error as the name as been taken over by a local.
Thus, the first `foo` local must have its active time extend to the end of the current scope.
If there is no conflict between the first and second `foo`, the algorithm may still assign the same names to them.
The current fix to deal with the above chains local objects in order to find the removal position.
It may be possible to handle this in a clean manner LuaSrcDiet handles it as a fix to the basic algorithm.
== Ideas
The following is a list of optimization ideas that do not require heavy-duty source code parsing and comprehension.
=== Lexer-Based Optimization Ideas
* Convert long strings to normal strings, vice versa. +
_A little desperate for a few bytes, can be done, but not real keen on implementing it._
* Special number forms to take advantage of constant number folding. +
_For example, 65536 can be represented using 2^16^, and so on.
An expression must be evaluated in the same way, otherwise this seems unsafe._
* Warn if a number has too many digits. +
_Should we warn or “test and truncate”?
Not really an optimization that will see much use._
* Warn of opportunity for using a `local` to zap a bunch of globals. +
_Current recommendation is to use the HTML plugin to display globals in red.
The developer can then visually analyze the source code and make the appropriate fixes.
I think this is better than having the program guess the intentions of the developer._
* Spaces to tabs in comments, long comments, or long strings. +
_For long strings, need to know users intention.
Would rather not implement._
=== Parser-Based Optimization Ideas
Heavy-duty optimizations will need more data to be generated by the parser.
A full AST may eventually be needed.
The most attractive idea that can be quickly implemented with a significant code size “win” is to reduce the number of `local` keywords.
* Remove unused ``local``s that can be removed in the source. +
_Need to consider unused ``local``s in multiple assignments._
* Simplify declaration of ``local``s that can be merged. +
_From:_
+
[source, lua]
----
-- separate locals
local foo
local bar
-- separate locals with assignments
local foo = 123
local bar = "pqr"
----
+
_To:_
+
[source, lua]
----
-- merged locals
local foo,bar
-- merged locals with assignments
local foo,bar=123,"pqr"
----
* Simplify declarations using `nil`. +
_From:_
[source, lua]
local foo, bar = nil, nil
+
_To:_
[source, lua]
local foo,bar
* Simplify ``return``s using `nil`. +
_How desirable is this? From Lua list discussions, it seems to be potentially unsafe unless all return locations are known and checked._
* Removal of optional semicolons in statements and removal of commas or semicolons in table constructors. +
_Yeah, this might save a few bytes._
* Remove table constructor elements using `nil`. +
_Not sure if this is safe to do._
* Simplify logical or relational operator expressions. +
_This is more suitable for an optimizing compiler project._

View File

@ -0,0 +1,41 @@
-- vim: set ft=lua:
package = 'LuaSrcDiet'
version = '0.3.0-2'
source = { url = 'https://github.com/jirutka/luasrcdiet/archive/v0.3.0/luasrcdiet-0.3.0.tar.gz', md5 = 'c0ff36ef66cd0568c96bc54e9253a8fa' }
description = {
summary = 'Compresses Lua source code by removing unnecessary characters',
detailed = [[
This is revival of LuaSrcDiet originally written by Kein-Hong Man.]],
homepage = 'https://github.com/jirutka/luasrcdiet',
maintainer = 'Jakub Jirutka <jakub@jirutka.cz>',
license = 'MIT',
}
dependencies = {
'lua >= 5.1',
}
build = {
type = 'builtin',
modules = {
['luasrcdiet'] = 'luasrcdiet/init.lua',
['luasrcdiet.equiv'] = 'luasrcdiet/equiv.lua',
['luasrcdiet.fs'] = 'luasrcdiet/fs.lua',
['luasrcdiet.llex'] = 'luasrcdiet/llex.lua',
['luasrcdiet.lparser'] = 'luasrcdiet/lparser.lua',
['luasrcdiet.optlex'] = 'luasrcdiet/optlex.lua',
['luasrcdiet.optparser'] = 'luasrcdiet/optparser.lua',
['luasrcdiet.plugin.example'] = 'luasrcdiet/plugin/example.lua',
['luasrcdiet.plugin.html'] = 'luasrcdiet/plugin/html.lua',
['luasrcdiet.plugin.sloc'] = 'luasrcdiet/plugin/sloc.lua',
['luasrcdiet.utils'] = 'luasrcdiet/utils.lua',
},
install = {
bin = {
luasrcdiet = 'bin/luasrcdiet',
}
}
}

View File

@ -0,0 +1,28 @@
rock_manifest = {
bin = {
luasrcdiet = "6c318685d57f827cf5baf7037a5d6072"
},
doc = {
["features-and-usage.adoc"] = "157587c27a0c340d9d1dd06af9b339b5",
["performance-stats.adoc"] = "cf5f96a86e021a3a584089fafcabd056",
["tech-notes.adoc"] = "075bc34e667a0055e659e656baa2365a"
},
lua = {
luasrcdiet = {
["equiv.lua"] = "967a6b17573d229e326dbb740ad7fe8c",
["fs.lua"] = "53db7dfc50d026b683fad68ed70ead0f",
["init.lua"] = "c6f368e6cf311f3257067fed0fbcd06a",
["llex.lua"] = "ede897af261fc362a82d87fbad91ea2b",
["lparser.lua"] = "c1e1f04d412b79a040fd1c2b74112953",
["optlex.lua"] = "7c986da991a338494c36770b4a30fa9f",
["optparser.lua"] = "b125a271ac1c691dec68b63019b1b5da",
plugin = {
["example.lua"] = "86b5c1e9dc7959db6b221d6d5a0db3d1",
["html.lua"] = "c0d3336a133f0c8663f395ee98d54f6a",
["sloc.lua"] = "fb1a91b18b701ab83f21c87733be470a"
},
["utils.lua"] = "bd6c1e85c6a9bf3383d336a4797fb292"
}
},
["luasrcdiet-0.3.0-2.rockspec"] = "da70047e1b0cbdc1ff08d060327fa110"
}

View File

@ -0,0 +1,650 @@
commands = {
luadocumentor = {
"luadocumentor/0.1.5-1"
},
luasrcdiet = {
"luasrcdiet/0.3.0-2"
}
}
dependencies = {
luadocumentor = {
["0.1.5-1"] = {
{
constraints = {
{
op = "~>",
version = {
5, 1, string = "5.1"
}
}
},
name = "lua"
},
{
constraints = {
{
op = "~>",
version = {
1, 6, string = "1.6"
}
}
},
name = "luafilesystem"
},
{
constraints = {
{
op = "~>",
version = {
0, 32, string = "0.32"
}
}
},
name = "markdown"
},
{
constraints = {
{
op = "~>",
version = {
0, 7, string = "0.7"
}
}
},
name = "metalua-compiler"
},
{
constraints = {
{
op = "~>",
version = {
0, 9, string = "0.9"
}
}
},
name = "penlight"
}
}
},
luafilesystem = {
["1.6.3-2"] = {
{
constraints = {
{
op = ">=",
version = {
5, 1, string = "5.1"
}
}
},
name = "lua"
}
}
},
luasrcdiet = {
["0.3.0-2"] = {
{
constraints = {
{
op = ">=",
version = {
5, 1, string = "5.1"
}
}
},
name = "lua"
}
}
},
markdown = {
["0.32-2"] = {
{
constraints = {
{
op = ">=",
version = {
5, 1, string = "5.1"
}
}
},
name = "lua"
}
}
},
["metalua-compiler"] = {
["0.7.3-1"] = {
{
constraints = {
{
op = "~>",
version = {
5, 1, string = "5.1"
}
}
},
name = "lua"
},
{
constraints = {
{
op = "~>",
version = {
1, 6, string = "1.6"
}
}
},
name = "luafilesystem"
},
{
constraints = {
{
op = ">=",
version = {
0, 7, 3, string = "0.7.3"
}
}
},
name = "metalua-parser"
}
}
},
["metalua-parser"] = {
["0.7.3-2"] = {
{
constraints = {
{
op = ">=",
version = {
5, 1, string = "5.1"
}
}
},
name = "lua"
}
}
},
penlight = {
["0.9.8-1"] = {
{
constraints = {},
name = "luafilesystem"
}
}
}
}
modules = {
defaultcss = {
"luadocumentor/0.1.5-1"
},
docgenerator = {
"luadocumentor/0.1.5-1"
},
extractors = {
"luadocumentor/0.1.5-1"
},
["fs.lfs"] = {
"luadocumentor/0.1.5-1"
},
lddextractor = {
"luadocumentor/0.1.5-1"
},
lfs = {
"luafilesystem/1.6.3-2"
},
luasrcdiet = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.equiv"] = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.fs"] = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.llex"] = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.lparser"] = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.optlex"] = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.optparser"] = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.plugin.example"] = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.plugin.html"] = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.plugin.sloc"] = {
"luasrcdiet/0.3.0-2"
},
["luasrcdiet.utils"] = {
"luasrcdiet/0.3.0-2"
},
markdown = {
"markdown/0.32-2"
},
["metalua.compiler"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.bytecode"] = {
"metalua-compiler/0.7.3-1"
},
["metalua.compiler.bytecode.compile"] = {
"metalua-compiler/0.7.3-1"
},
["metalua.compiler.bytecode.lcode"] = {
"metalua-compiler/0.7.3-1"
},
["metalua.compiler.bytecode.ldump"] = {
"metalua-compiler/0.7.3-1"
},
["metalua.compiler.bytecode.lopcodes"] = {
"metalua-compiler/0.7.3-1"
},
["metalua.compiler.globals"] = {
"metalua-compiler/0.7.3-1"
},
["metalua.compiler.parser"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.parser.annot.generator"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.parser.annot.grammar"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.parser.expr"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.parser.ext"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.parser.lexer"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.parser.meta"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.parser.misc"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.parser.stat"] = {
"metalua-parser/0.7.3-2"
},
["metalua.compiler.parser.table"] = {
"metalua-parser/0.7.3-2"
},
["metalua.grammar.generator"] = {
"metalua-parser/0.7.3-2"
},
["metalua.grammar.lexer"] = {
"metalua-parser/0.7.3-2"
},
["metalua.loader"] = {
"metalua-compiler/0.7.3-1"
},
["metalua.pprint"] = {
"metalua-parser/0.7.3-2"
},
["metalua/compiler/ast_to_src.mlua"] = {
"metalua-compiler/0.7.3-1"
},
["metalua/extension/comprehension.mlua"] = {
"metalua-compiler/0.7.3-1"
},
["metalua/extension/match.mlua"] = {
"metalua-compiler/0.7.3-1"
},
["metalua/repl.mlua"] = {
"metalua-compiler/0.7.3-1"
},
["metalua/treequery.mlua"] = {
"metalua-compiler/0.7.3-1"
},
["metalua/treequery/walk.mlua"] = {
"metalua-compiler/0.7.3-1"
},
["models.apimodel"] = {
"luadocumentor/0.1.5-1"
},
["models.apimodelbuilder"] = {
"luadocumentor/0.1.5-1"
},
["models.internalmodel"] = {
"luadocumentor/0.1.5-1"
},
["models.ldparser"] = {
"luadocumentor/0.1.5-1"
},
["models/internalmodelbuilder.mlua"] = {
"luadocumentor/0.1.5-1"
},
pl = {
"penlight/0.9.8-1"
},
["pl.Date"] = {
"penlight/0.9.8-1"
},
["pl.List"] = {
"penlight/0.9.8-1"
},
["pl.Map"] = {
"penlight/0.9.8-1"
},
["pl.MultiMap"] = {
"penlight/0.9.8-1"
},
["pl.OrderedMap"] = {
"penlight/0.9.8-1"
},
["pl.Set"] = {
"penlight/0.9.8-1"
},
["pl.app"] = {
"penlight/0.9.8-1"
},
["pl.array2d"] = {
"penlight/0.9.8-1"
},
["pl.class"] = {
"penlight/0.9.8-1"
},
["pl.comprehension"] = {
"penlight/0.9.8-1"
},
["pl.config"] = {
"penlight/0.9.8-1"
},
["pl.data"] = {
"penlight/0.9.8-1"
},
["pl.dir"] = {
"penlight/0.9.8-1"
},
["pl.file"] = {
"penlight/0.9.8-1"
},
["pl.func"] = {
"penlight/0.9.8-1"
},
["pl.input"] = {
"penlight/0.9.8-1"
},
["pl.lapp"] = {
"penlight/0.9.8-1"
},
["pl.lexer"] = {
"penlight/0.9.8-1"
},
["pl.luabalanced"] = {
"penlight/0.9.8-1"
},
["pl.operator"] = {
"penlight/0.9.8-1"
},
["pl.path"] = {
"penlight/0.9.8-1"
},
["pl.permute"] = {
"penlight/0.9.8-1"
},
["pl.platf.luajava"] = {
"penlight/0.9.8-1"
},
["pl.pretty"] = {
"penlight/0.9.8-1"
},
["pl.seq"] = {
"penlight/0.9.8-1"
},
["pl.sip"] = {
"penlight/0.9.8-1"
},
["pl.strict"] = {
"penlight/0.9.8-1"
},
["pl.stringio"] = {
"penlight/0.9.8-1"
},
["pl.stringx"] = {
"penlight/0.9.8-1"
},
["pl.tablex"] = {
"penlight/0.9.8-1"
},
["pl.template"] = {
"penlight/0.9.8-1"
},
["pl.test"] = {
"penlight/0.9.8-1"
},
["pl.text"] = {
"penlight/0.9.8-1"
},
["pl.utils"] = {
"penlight/0.9.8-1"
},
["pl.xml"] = {
"penlight/0.9.8-1"
},
["template.file"] = {
"luadocumentor/0.1.5-1"
},
["template.index"] = {
"luadocumentor/0.1.5-1"
},
["template.index.recordtypedef"] = {
"luadocumentor/0.1.5-1"
},
["template.item"] = {
"luadocumentor/0.1.5-1"
},
["template.page"] = {
"luadocumentor/0.1.5-1"
},
["template.recordtypedef"] = {
"luadocumentor/0.1.5-1"
},
["template.usage"] = {
"luadocumentor/0.1.5-1"
},
["template.utils"] = {
"luadocumentor/0.1.5-1"
},
templateengine = {
"luadocumentor/0.1.5-1"
}
}
repository = {
luadocumentor = {
["0.1.5-1"] = {
{
arch = "installed",
commands = {
luadocumentor = "luadocumentor"
},
dependencies = {
luafilesystem = "1.6.3-2",
markdown = "0.32-2",
["metalua-compiler"] = "0.7.3-1",
["metalua-parser"] = "0.7.3-2",
penlight = "0.9.8-1"
},
modules = {
defaultcss = "defaultcss.lua",
docgenerator = "docgenerator.lua",
extractors = "extractors.lua",
["fs.lfs"] = "fs/lfs.lua",
lddextractor = "lddextractor.lua",
["models.apimodel"] = "models/apimodel.lua",
["models.apimodelbuilder"] = "models/apimodelbuilder.lua",
["models.internalmodel"] = "models/internalmodel.lua",
["models.ldparser"] = "models/ldparser.lua",
["models/internalmodelbuilder.mlua"] = "models/internalmodelbuilder.mlua",
["template.file"] = "template/file.lua",
["template.index"] = "template/index.lua",
["template.index.recordtypedef"] = "template/index/recordtypedef.lua",
["template.item"] = "template/item.lua",
["template.page"] = "template/page.lua",
["template.recordtypedef"] = "template/recordtypedef.lua",
["template.usage"] = "template/usage.lua",
["template.utils"] = "template/utils.lua",
templateengine = "templateengine.lua"
}
}
}
},
luafilesystem = {
["1.6.3-2"] = {
{
arch = "installed",
commands = {},
dependencies = {},
modules = {
lfs = "lfs.dll"
}
}
}
},
luasrcdiet = {
["0.3.0-2"] = {
{
arch = "installed",
commands = {
luasrcdiet = "luasrcdiet"
},
dependencies = {},
modules = {
luasrcdiet = "luasrcdiet/init.lua",
["luasrcdiet.equiv"] = "luasrcdiet/equiv.lua",
["luasrcdiet.fs"] = "luasrcdiet/fs.lua",
["luasrcdiet.llex"] = "luasrcdiet/llex.lua",
["luasrcdiet.lparser"] = "luasrcdiet/lparser.lua",
["luasrcdiet.optlex"] = "luasrcdiet/optlex.lua",
["luasrcdiet.optparser"] = "luasrcdiet/optparser.lua",
["luasrcdiet.plugin.example"] = "luasrcdiet/plugin/example.lua",
["luasrcdiet.plugin.html"] = "luasrcdiet/plugin/html.lua",
["luasrcdiet.plugin.sloc"] = "luasrcdiet/plugin/sloc.lua",
["luasrcdiet.utils"] = "luasrcdiet/utils.lua"
}
}
}
},
markdown = {
["0.32-2"] = {
{
arch = "installed",
commands = {},
dependencies = {},
modules = {
markdown = "markdown.lua"
}
}
}
},
["metalua-compiler"] = {
["0.7.3-1"] = {
{
arch = "installed",
commands = {},
dependencies = {
luafilesystem = "1.6.3-2",
["metalua-parser"] = "0.7.3-2"
},
modules = {
["metalua.compiler.bytecode"] = "metalua/compiler/bytecode.lua",
["metalua.compiler.bytecode.compile"] = "metalua/compiler/bytecode/compile.lua",
["metalua.compiler.bytecode.lcode"] = "metalua/compiler/bytecode/lcode.lua",
["metalua.compiler.bytecode.ldump"] = "metalua/compiler/bytecode/ldump.lua",
["metalua.compiler.bytecode.lopcodes"] = "metalua/compiler/bytecode/lopcodes.lua",
["metalua.compiler.globals"] = "metalua/compiler/globals.lua",
["metalua.loader"] = "metalua/loader.lua",
["metalua/compiler/ast_to_src.mlua"] = "metalua/compiler/ast_to_src.mlua",
["metalua/extension/comprehension.mlua"] = "metalua/extension/comprehension.mlua",
["metalua/extension/match.mlua"] = "metalua/extension/match.mlua",
["metalua/repl.mlua"] = "metalua/repl.mlua",
["metalua/treequery.mlua"] = "metalua/treequery.mlua",
["metalua/treequery/walk.mlua"] = "metalua/treequery/walk.mlua"
}
}
}
},
["metalua-parser"] = {
["0.7.3-2"] = {
{
arch = "installed",
commands = {},
dependencies = {},
modules = {
["metalua.compiler"] = "metalua/compiler.lua",
["metalua.compiler.parser"] = "metalua/compiler/parser.lua",
["metalua.compiler.parser.annot.generator"] = "metalua/compiler/parser/annot/generator.lua",
["metalua.compiler.parser.annot.grammar"] = "metalua/compiler/parser/annot/grammar.lua",
["metalua.compiler.parser.expr"] = "metalua/compiler/parser/expr.lua",
["metalua.compiler.parser.ext"] = "metalua/compiler/parser/ext.lua",
["metalua.compiler.parser.lexer"] = "metalua/compiler/parser/lexer.lua",
["metalua.compiler.parser.meta"] = "metalua/compiler/parser/meta.lua",
["metalua.compiler.parser.misc"] = "metalua/compiler/parser/misc.lua",
["metalua.compiler.parser.stat"] = "metalua/compiler/parser/stat.lua",
["metalua.compiler.parser.table"] = "metalua/compiler/parser/table.lua",
["metalua.grammar.generator"] = "metalua/grammar/generator.lua",
["metalua.grammar.lexer"] = "metalua/grammar/lexer.lua",
["metalua.pprint"] = "metalua/pprint.lua"
}
}
}
},
penlight = {
["0.9.8-1"] = {
{
arch = "installed",
commands = {},
dependencies = {
luafilesystem = "1.6.3-2"
},
modules = {
pl = "pl/init.lua",
["pl.Date"] = "pl/Date.lua",
["pl.List"] = "pl/List.lua",
["pl.Map"] = "pl/Map.lua",
["pl.MultiMap"] = "pl/MultiMap.lua",
["pl.OrderedMap"] = "pl/OrderedMap.lua",
["pl.Set"] = "pl/Set.lua",
["pl.app"] = "pl/app.lua",
["pl.array2d"] = "pl/array2d.lua",
["pl.class"] = "pl/class.lua",
["pl.comprehension"] = "pl/comprehension.lua",
["pl.config"] = "pl/config.lua",
["pl.data"] = "pl/data.lua",
["pl.dir"] = "pl/dir.lua",
["pl.file"] = "pl/file.lua",
["pl.func"] = "pl/func.lua",
["pl.input"] = "pl/input.lua",
["pl.lapp"] = "pl/lapp.lua",
["pl.lexer"] = "pl/lexer.lua",
["pl.luabalanced"] = "pl/luabalanced.lua",
["pl.operator"] = "pl/operator.lua",
["pl.path"] = "pl/path.lua",
["pl.permute"] = "pl/permute.lua",
["pl.platf.luajava"] = "pl/platf/luajava.lua",
["pl.pretty"] = "pl/pretty.lua",
["pl.seq"] = "pl/seq.lua",
["pl.sip"] = "pl/sip.lua",
["pl.strict"] = "pl/strict.lua",
["pl.stringio"] = "pl/stringio.lua",
["pl.stringx"] = "pl/stringx.lua",
["pl.tablex"] = "pl/tablex.lua",
["pl.template"] = "pl/template.lua",
["pl.test"] = "pl/test.lua",
["pl.text"] = "pl/text.lua",
["pl.utils"] = "pl/utils.lua",
["pl.xml"] = "pl/xml.lua"
}
}
}
}
}

View File

@ -0,0 +1,23 @@
package = "Markdown"
version = "0.32-2"
source = {
url = "http://www.frykholm.se/files/markdown-0.32.tar.gz",
dir = "."
}
description = {
summary = "Markdown text-to-html markup system.",
detailed = [[
A pure-lua implementation of the Markdown text-to-html markup system.
]],
license = "MIT",
homepage = "http://www.frykholm.se/files/markdown.lua"
}
dependencies = {
"lua >= 5.1",
}
build = {
type = "none",
install = {
lua = { "markdown.lua" },
}
}

View File

@ -0,0 +1,6 @@
rock_manifest = {
lua = {
["markdown.lua"] = "0ea5f9d6d22a6c9aa4fdf63cf1d7d066"
},
["markdown-0.32-2.rockspec"] = "83f0335058d8fbd078d4f2c1ce941df0"
}

View File

@ -0,0 +1,104 @@
Metalua Compiler
================
## Metalua compiler
This module `metalua-compiler` depends on `metalua-parser`. Its main
feature is to compile ASTs into Lua 5.1 bytecode, allowing to convert
them into bytecode files and executable functions. This opens the
following possibilities:
* compiler objects generated with `require 'metalua.compiler'.new()`
support methods `:xxx_to_function()` and `:xxx_to_bytecode()`;
* Compile-time meta-programming: use of `-{...}` splices in source
code, to generate code during compilation;
* Some syntax extensions, such as structural pattern matching and
lists by comprehension;
* Some AST manipulation facilities such as `treequery`, which are
implemented with Metalua syntax extensions.
## What's new in Metalua 0.7
This is a major overhaul of the compiler's architecture. Some of the
most noteworthy changes are:
* No more installation or bootstrap script. Some Metalua source files
have been rewritten in plain Lua, and module sources have been
refactored, so that if you just drop the `metalua` folder somewhere
in your `LUA_PATH`, it works.
* The compiler can be cut in two parts:
* a parser which generates ASTs out of Lua sources, and should be
either portable or easily ported to Lua 5.2;
* a compiler, which can turn sources and AST into executable
Lua 5.1 bytecode and run it. It also supports compile-time
meta-programming, i.e. code included between `-{ ... }` is
executed during compilation, and the ASTs it produces are
included in the resulting bytecode.
* Both parts are packaged as separate LuaRocks, `metalua-parser` and
`metalua-compiler` respectively, so that you can install the former
without the latter.
* The parser is not a unique object anymore. Instead,
`require "metalua.compiler".new()` returns a different compiler
instance every time it's called. Compiler instances can be reused on
as many source files as wanted, but extending one instance's grammar
doesn't affect other compiler instances.
* Included standard library has been shed. There are too many standard
libs in Lua, and none of them is standard enough, offering
yet-another-one, coupled with a specific compiler can only add to
confusion.
* Many syntax extensions, which either were arguably more code samples
than actual production-ready tools, or relied too heavily on the
removed runtime standard libraries, have been removed.
* The remaining libraries and samples are:
* `metalua.compiler` converts sources into ASTs, bytecode,
functions, and ASTs back into sources.
* `metalua` compiles and/or executes files from the command line,
can start an interactive REPL session.
* `metalua.loader` adds a package loader which allows to use modules
written in Metalua, even from a plain Lua program.
* `metalua.treequery` is an advanced DSL allowing to search ASTs in
a smart way, e.g. "_search `return` statements which return a
`local` variable but aren't in a nested `function`_".
* `metalua.extension.comprehension` is a language extension which
supports lists by comprehension
(`even = { i for i=1, 100 if i%2==0 }`) and improved loops
(`for i=1, 10 for j=1,10 if i~=j do print(i,j) end`).
* `metalua.extension.match` is a language extension which offers
Haskell/ML structural pattern matching
(``match AST with `Function{ args, body } -> ... | `Number{ 0 } -> ...end``)
* **TODO Move basic extensions in a separate module.**
* To remove the compilation speed penalty associated with
metaprogramming, when environment variable `LUA_MCACHE` or Lua
variable `package.mcache` is defined and LuaFileSystem is available,
the results of Metalua source compilations is cached. Unless the
source file is more recent than the latest cached bytecode file, the
latter is loaded instead of the former.
* The Luarock install for the full compiler lists dependencies towards
Readline, LuaFileSytem, and Alt-Getopts. Those projects are
optional, but having them automatically installed by LuaRocks offers
a better user experience.
* The license has changed from MIT to double license MIT + EPL. This
has been done in order to provide the IP guarantees expected by the
Eclipse Foundation, to include Metalua in Eclipse's
[Lua Development Tools](http://www.eclipse.org/koneki/ldt/).

View File

@ -0,0 +1,177 @@
Metalua Parser
==============
`metalua-parser` is a subset of the Metalua compiler, which turns
valid Lua source files and strings into abstract syntax trees
(AST). This README includes a description of this AST format. People
interested by Lua code analysis and generation are encouraged to
produce and/or consume this format to represent ASTs.
It has been designed for Lua 5.1. It hasn't been tested against
Lua 5.2, but should be easily ported.
## Usage
Module `metalua.compiler` has a `new()` function, which returns a
compiler instance. This instance has a set of methods of the form
`:xxx_to_yyy(input)`, where `xxx` and `yyy` must be one of the
following:
* `srcfile` the name of a Lua source file;
* `src` a string containing the Lua sources of a list of statements;
* `lexstream` a lexical tokens stream;
* `ast` an abstract syntax tree;
* `bytecode` a chunk of Lua bytecode that can be loaded in a Lua 5.1
VM (not available if you only installed the parser);
* `function` an executable Lua function.
Compiling into bytecode or executable functions requires the whole
Metalua compiler, not only the parser. The most frequently used
functions are `:src_to_ast(source_string)` and
`:srcfile_to_ast("path/to/source/file.lua")`.
mlc = require 'metalua.compiler'.new()
ast = mlc :src_to_ast[[ return 123 ]]
A compiler instance can be reused as much as you want; it's only
interesting to work with more than one compiler instance when you
start extending their grammars.
## Abstract Syntax Trees definition
### Notation
Trees are written below with some Metalua syntax sugar, which
increases their readability. the backquote symbol introduces a `tag`,
i.e. a string stored in the `"tag"` field of a table:
* `` `Foo{ 1, 2, 3 }`` is a shortcut for `{tag="Foo", 1, 2, 3}`;
* `` `Foo`` is a shortcut for `{tag="Foo"}`;
* `` `Foo 123`` is a shortcut for `` `Foo{ 123 }``, and therefore
`{tag="Foo", 123 }`; the expression after the tag must be a literal
number or string.
When using a Metalua interpreter or compiler, the backtick syntax is
supported and can be used directly. Metalua's pretty-printing helpers
also try to use backtick syntax whenever applicable.
### Tree elements
Tree elements are mainly categorized into statements `stat`,
expressions `expr` and lists of statements `block`. Auxiliary
definitions include function applications/method invocation `apply`,
are both valid statements and expressions, expressions admissible on
the left-hand-side of an assignment statement `lhs`.
block: { stat* }
stat:
`Do{ stat* }
| `Set{ {lhs+} {expr+} } -- lhs1, lhs2... = e1, e2...
| `While{ expr block } -- while e do b end
| `Repeat{ block expr } -- repeat b until e
| `If{ (expr block)+ block? } -- if e1 then b1 [elseif e2 then b2] ... [else bn] end
| `Fornum{ ident expr expr expr? block } -- for ident = e, e[, e] do b end
| `Forin{ {ident+} {expr+} block } -- for i1, i2... in e1, e2... do b end
| `Local{ {ident+} {expr+}? } -- local i1, i2... = e1, e2...
| `Localrec{ ident expr } -- only used for 'local function'
| `Goto{ <string> } -- goto str
| `Label{ <string> } -- ::str::
| `Return{ <expr*> } -- return e1, e2...
| `Break -- break
| apply
expr:
`Nil | `Dots | `True | `False
| `Number{ <number> }
| `String{ <string> }
| `Function{ { ident* `Dots? } block }
| `Table{ ( `Pair{ expr expr } | expr )* }
| `Op{ opid expr expr? }
| `Paren{ expr } -- significant to cut multiple values returns
| apply
| lhs
apply:
`Call{ expr expr* }
| `Invoke{ expr `String{ <string> } expr* }
ident: `Id{ <string> }
lhs: ident | `Index{ expr expr }
opid: 'add' | 'sub' | 'mul' | 'div'
| 'mod' | 'pow' | 'concat'| 'eq'
| 'lt' | 'le' | 'and' | 'or'
| 'not' | 'len'
### Meta-data (lineinfo)
ASTs also embed some metadata, allowing to map them to their source
representation. Those informations are stored in a `"lineinfo"` field
in each tree node, which points to the range of characters in the
source string which represents it, and to the content of any comment
that would appear immediately before or after that node.
Lineinfo objects have two fields, `"first"` and `"last"`, describing
respectively the beginning and the end of the subtree in the
sources. For instance, the sub-node ``Number{123}` produced by parsing
`[[return 123]]` will have `lineinfo.first` describing offset 8, and
`lineinfo.last` describing offset 10:
> mlc = require 'metalua.compiler'.new()
> ast = mlc :src_to_ast "return 123 -- comment"
> print(ast[1][1].lineinfo)
<?|L1|C8-10|K8-10|C>
>
A lineinfo keeps track of character offsets relative to the beginning
of the source string/file ("K8-10" above), line numbers (L1 above; a
lineinfo spanning on several lines would read something like "L1-10"),
columns i.e. offset within the line ("C8-10" above), and a filename if
available (the "?" mark above indicating that we have no file name, as
the AST comes from a string). The final "|C>" indicates that there's a
comment immediately after the node; an initial "<C|" would have meant
that there was a comment immediately before the node.
Positions represent either the end of a token and the beginning of an
inter-token space (`"last"` fields) or the beginning of a token, and
the end of an inter-token space (`"first"` fields). Inter-token spaces
might be empty. They can also contain comments, which might be useful
to link with surrounding tokens and AST subtrees.
Positions are chained with their "dual" one: a position at the
beginning of and inter-token space keeps a refernce to the position at
the end of that inter-token space in its `"facing"` field, and
conversly, end-of-inter-token positions keep track of the inter-token
space beginning, also in `"facing"`. An inter-token space can be
empty, e.g. in `"2+2"`, in which case `lineinfo==lineinfo.facing`.
Comments are also kept in the `"comments"` field. If present, this
field contains a list of comments, with a `"lineinfo"` field
describing the span between the first and last comment. Each comment
is represented by a list of one string, with a `"lineinfo"` describing
the span of this comment only. Consecutive lines of `--` comments are
considered as one comment: `"-- foo\n-- bar\n"` parses as one comment
whose text is `"foo\nbar"`, whereas `"-- foo\n\n-- bar\n"` parses as
two comments `"foo"` and `"bar"`.
So for instance, if `f` is the AST of a function and I want to
retrieve the comment before the function, I'd do:
f_comment = f.lineinfo.first.comments[1][1]
The informations in lineinfo positions, i.e. in each `"first"` and
`"last"` field, are held in the following fields:
* `"source"` the filename (optional);
* `"offset"` the 1-based offset relative to the beginning of the string/file;
* `"line"` the 1-based line number;
* `"column"` the 1-based offset within the line;
* `"facing"` the position at the opposite end of the inter-token space.
* `"comments"` the comments in the associated inter-token space (optional).
* `"id"` an arbitrary number, which uniquely identifies an inter-token
space within a given tokens stream.

View File

@ -0,0 +1,13 @@
Metalua
=======
Metalua is a Lua code analysis tool, as well as a compiler for a
superset of Lua 5.1 supporting Compile-Time Meta-Programming. It's
separated into two LuaRocks, `metalua-parser` and
`metalua-compiler`. The documentation of each rock can be found in
`README-parser.md` and `README-compiler.md`.
All the code in Metalue is released under dual lincenses:
* MIT public license (same as Lua);
* EPL public license (same as Eclipse).

View File

@ -0,0 +1,47 @@
--*-lua-*--
package = "metalua-compiler"
version = "0.7.3-1"
source = {
url = "http://git.eclipse.org/c/koneki/org.eclipse.koneki.metalua.git/snapshot/org.eclipse.koneki.metalua-v0.7.3.tar.gz"
}
description = {
summary = "Metalua's compiler: converting (Meta)lua source strings and files into executable Lua 5.1 bytecode",
detailed = [[
This is the Metalua copmiler, packaged as a rock, depending
on the spearate metalua-parser AST generating library. It
compiles a superset of Lua 5.1 into bytecode, which can
then be loaded and executed by a Lua 5.1 VM. It also allows
to dump ASTs back into Lua source files.
]],
homepage = "http://git.eclipse.org/c/koneki/org.eclipse.koneki.metalua.git",
license = "EPL + MIT"
}
dependencies = {
"lua ~> 5.1", -- Lua 5.2 bytecode not supported
"luafilesystem ~> 1.6", -- Cached compilation based on file timestamps
"metalua-parser >= 0.7.3", -- AST production
}
build = {
type="builtin",
modules={
["metalua.compiler.bytecode"] = "metalua/compiler/bytecode.lua",
["metalua.compiler.globals"] = "metalua/compiler/globals.lua",
["metalua.compiler.bytecode.compile"] = "metalua/compiler/bytecode/compile.lua",
["metalua.compiler.bytecode.lcode"] = "metalua/compiler/bytecode/lcode.lua",
["metalua.compiler.bytecode.lopcodes"] = "metalua/compiler/bytecode/lopcodes.lua",
["metalua.compiler.bytecode.ldump"] = "metalua/compiler/bytecode/ldump.lua",
["metalua.loader"] = "metalua/loader.lua",
},
install={
lua={
["metalua.treequery"] = "metalua/treequery.mlua",
["metalua.compiler.ast_to_src"] = "metalua/compiler/ast_to_src.mlua",
["metalua.treequery.walk"] = "metalua/treequery/walk.mlua",
["metalua.extension.match"] = "metalua/extension/match.mlua",
["metalua.extension.comprehension"] = "metalua/extension/comprehension.mlua",
["metalua.repl"] = "metalua/repl.mlua",
}
}
}

View File

@ -0,0 +1,33 @@
rock_manifest = {
doc = {
["README-compiler.md"] = "292523d759247d210d32fb2f6153e0f4",
["README-parser.md"] = "b44e3673d96dd296f2c0e92a6c87ee18",
["README.md"] = "20bfb490cddef9e101e44688791abcda"
},
lua = {
metalua = {
compiler = {
["ast_to_src.mlua"] = "1309f76df37585ef8e1f67f748b07b22",
bytecode = {
["compile.lua"] = "430e4a6fac8b64b5ebb3ae585ebae75a",
["lcode.lua"] = "3ad8755ebe8ea8eca6b1d2846eec92c4",
["ldump.lua"] = "295e1d9657fb0126ce3471b3366da694",
["lopcodes.lua"] = "a0f15cfc93b026b0a868466d066f1d21"
},
["bytecode.lua"] = "1032e5233455fd4e504daf5d2893527b",
["globals.lua"] = "80ae19c6e640de0746348c91633c4c55"
},
extension = {
["comprehension.mlua"] = "426f5856896bda4c3763bd5f61410685",
["match.mlua"] = "79960265331e8b2f46199c2411a103de"
},
["loader.lua"] = "1cdbf6cdf6ca97c55540d068474f1d8a",
["repl.mlua"] = "729456f3a8cc073788acee564a0495f0",
treequery = {
["walk.mlua"] = "5159aaddbec55936f91ea4236f6451d3"
},
["treequery.mlua"] = "97ffcee0825ac3bc776d01566767b2e8"
}
},
["metalua-compiler-0.7.3-1.rockspec"] = "b3883b25641d862db6828300bb755d51"
}

View File

@ -0,0 +1,104 @@
Metalua Compiler
================
## Metalua compiler
This module `metalua-compiler` depends on `metalua-parser`. Its main
feature is to compile ASTs into Lua 5.1 bytecode, allowing to convert
them into bytecode files and executable functions. This opens the
following possibilities:
* compiler objects generated with `require 'metalua.compiler'.new()`
support methods `:xxx_to_function()` and `:xxx_to_bytecode()`;
* Compile-time meta-programming: use of `-{...}` splices in source
code, to generate code during compilation;
* Some syntax extensions, such as structural pattern matching and
lists by comprehension;
* Some AST manipulation facilities such as `treequery`, which are
implemented with Metalua syntax extensions.
## What's new in Metalua 0.7
This is a major overhaul of the compiler's architecture. Some of the
most noteworthy changes are:
* No more installation or bootstrap script. Some Metalua source files
have been rewritten in plain Lua, and module sources have been
refactored, so that if you just drop the `metalua` folder somewhere
in your `LUA_PATH`, it works.
* The compiler can be cut in two parts:
* a parser which generates ASTs out of Lua sources, and should be
either portable or easily ported to Lua 5.2;
* a compiler, which can turn sources and AST into executable
Lua 5.1 bytecode and run it. It also supports compile-time
meta-programming, i.e. code included between `-{ ... }` is
executed during compilation, and the ASTs it produces are
included in the resulting bytecode.
* Both parts are packaged as separate LuaRocks, `metalua-parser` and
`metalua-compiler` respectively, so that you can install the former
without the latter.
* The parser is not a unique object anymore. Instead,
`require "metalua.compiler".new()` returns a different compiler
instance every time it's called. Compiler instances can be reused on
as many source files as wanted, but extending one instance's grammar
doesn't affect other compiler instances.
* Included standard library has been shed. There are too many standard
libs in Lua, and none of them is standard enough, offering
yet-another-one, coupled with a specific compiler can only add to
confusion.
* Many syntax extensions, which either were arguably more code samples
than actual production-ready tools, or relied too heavily on the
removed runtime standard libraries, have been removed.
* The remaining libraries and samples are:
* `metalua.compiler` converts sources into ASTs, bytecode,
functions, and ASTs back into sources.
* `metalua` compiles and/or executes files from the command line,
can start an interactive REPL session.
* `metalua.loader` adds a package loader which allows to use modules
written in Metalua, even from a plain Lua program.
* `metalua.treequery` is an advanced DSL allowing to search ASTs in
a smart way, e.g. "_search `return` statements which return a
`local` variable but aren't in a nested `function`_".
* `metalua.extension.comprehension` is a language extension which
supports lists by comprehension
(`even = { i for i=1, 100 if i%2==0 }`) and improved loops
(`for i=1, 10 for j=1,10 if i~=j do print(i,j) end`).
* `metalua.extension.match` is a language extension which offers
Haskell/ML structural pattern matching
(``match AST with `Function{ args, body } -> ... | `Number{ 0 } -> ...end``)
* **TODO Move basic extensions in a separate module.**
* To remove the compilation speed penalty associated with
metaprogramming, when environment variable `LUA_MCACHE` or Lua
variable `package.mcache` is defined and LuaFileSystem is available,
the results of Metalua source compilations is cached. Unless the
source file is more recent than the latest cached bytecode file, the
latter is loaded instead of the former.
* The Luarock install for the full compiler lists dependencies towards
Readline, LuaFileSytem, and Alt-Getopts. Those projects are
optional, but having them automatically installed by LuaRocks offers
a better user experience.
* The license has changed from MIT to double license MIT + EPL. This
has been done in order to provide the IP guarantees expected by the
Eclipse Foundation, to include Metalua in Eclipse's
[Lua Development Tools](http://www.eclipse.org/koneki/ldt/).

View File

@ -0,0 +1,177 @@
Metalua Parser
==============
`metalua-parser` is a subset of the Metalua compiler, which turns
valid Lua source files and strings into abstract syntax trees
(AST). This README includes a description of this AST format. People
interested by Lua code analysis and generation are encouraged to
produce and/or consume this format to represent ASTs.
It has been designed for Lua 5.1. It hasn't been tested against
Lua 5.2, but should be easily ported.
## Usage
Module `metalua.compiler` has a `new()` function, which returns a
compiler instance. This instance has a set of methods of the form
`:xxx_to_yyy(input)`, where `xxx` and `yyy` must be one of the
following:
* `srcfile` the name of a Lua source file;
* `src` a string containing the Lua sources of a list of statements;
* `lexstream` a lexical tokens stream;
* `ast` an abstract syntax tree;
* `bytecode` a chunk of Lua bytecode that can be loaded in a Lua 5.1
VM (not available if you only installed the parser);
* `function` an executable Lua function.
Compiling into bytecode or executable functions requires the whole
Metalua compiler, not only the parser. The most frequently used
functions are `:src_to_ast(source_string)` and
`:srcfile_to_ast("path/to/source/file.lua")`.
mlc = require 'metalua.compiler'.new()
ast = mlc :src_to_ast[[ return 123 ]]
A compiler instance can be reused as much as you want; it's only
interesting to work with more than one compiler instance when you
start extending their grammars.
## Abstract Syntax Trees definition
### Notation
Trees are written below with some Metalua syntax sugar, which
increases their readability. the backquote symbol introduces a `tag`,
i.e. a string stored in the `"tag"` field of a table:
* `` `Foo{ 1, 2, 3 }`` is a shortcut for `{tag="Foo", 1, 2, 3}`;
* `` `Foo`` is a shortcut for `{tag="Foo"}`;
* `` `Foo 123`` is a shortcut for `` `Foo{ 123 }``, and therefore
`{tag="Foo", 123 }`; the expression after the tag must be a literal
number or string.
When using a Metalua interpreter or compiler, the backtick syntax is
supported and can be used directly. Metalua's pretty-printing helpers
also try to use backtick syntax whenever applicable.
### Tree elements
Tree elements are mainly categorized into statements `stat`,
expressions `expr` and lists of statements `block`. Auxiliary
definitions include function applications/method invocation `apply`,
are both valid statements and expressions, expressions admissible on
the left-hand-side of an assignment statement `lhs`.
block: { stat* }
stat:
`Do{ stat* }
| `Set{ {lhs+} {expr+} } -- lhs1, lhs2... = e1, e2...
| `While{ expr block } -- while e do b end
| `Repeat{ block expr } -- repeat b until e
| `If{ (expr block)+ block? } -- if e1 then b1 [elseif e2 then b2] ... [else bn] end
| `Fornum{ ident expr expr expr? block } -- for ident = e, e[, e] do b end
| `Forin{ {ident+} {expr+} block } -- for i1, i2... in e1, e2... do b end
| `Local{ {ident+} {expr+}? } -- local i1, i2... = e1, e2...
| `Localrec{ ident expr } -- only used for 'local function'
| `Goto{ <string> } -- goto str
| `Label{ <string> } -- ::str::
| `Return{ <expr*> } -- return e1, e2...
| `Break -- break
| apply
expr:
`Nil | `Dots | `True | `False
| `Number{ <number> }
| `String{ <string> }
| `Function{ { ident* `Dots? } block }
| `Table{ ( `Pair{ expr expr } | expr )* }
| `Op{ opid expr expr? }
| `Paren{ expr } -- significant to cut multiple values returns
| apply
| lhs
apply:
`Call{ expr expr* }
| `Invoke{ expr `String{ <string> } expr* }
ident: `Id{ <string> }
lhs: ident | `Index{ expr expr }
opid: 'add' | 'sub' | 'mul' | 'div'
| 'mod' | 'pow' | 'concat'| 'eq'
| 'lt' | 'le' | 'and' | 'or'
| 'not' | 'len'
### Meta-data (lineinfo)
ASTs also embed some metadata, allowing to map them to their source
representation. Those informations are stored in a `"lineinfo"` field
in each tree node, which points to the range of characters in the
source string which represents it, and to the content of any comment
that would appear immediately before or after that node.
Lineinfo objects have two fields, `"first"` and `"last"`, describing
respectively the beginning and the end of the subtree in the
sources. For instance, the sub-node ``Number{123}` produced by parsing
`[[return 123]]` will have `lineinfo.first` describing offset 8, and
`lineinfo.last` describing offset 10:
> mlc = require 'metalua.compiler'.new()
> ast = mlc :src_to_ast "return 123 -- comment"
> print(ast[1][1].lineinfo)
<?|L1|C8-10|K8-10|C>
>
A lineinfo keeps track of character offsets relative to the beginning
of the source string/file ("K8-10" above), line numbers (L1 above; a
lineinfo spanning on several lines would read something like "L1-10"),
columns i.e. offset within the line ("C8-10" above), and a filename if
available (the "?" mark above indicating that we have no file name, as
the AST comes from a string). The final "|C>" indicates that there's a
comment immediately after the node; an initial "<C|" would have meant
that there was a comment immediately before the node.
Positions represent either the end of a token and the beginning of an
inter-token space (`"last"` fields) or the beginning of a token, and
the end of an inter-token space (`"first"` fields). Inter-token spaces
might be empty. They can also contain comments, which might be useful
to link with surrounding tokens and AST subtrees.
Positions are chained with their "dual" one: a position at the
beginning of and inter-token space keeps a refernce to the position at
the end of that inter-token space in its `"facing"` field, and
conversly, end-of-inter-token positions keep track of the inter-token
space beginning, also in `"facing"`. An inter-token space can be
empty, e.g. in `"2+2"`, in which case `lineinfo==lineinfo.facing`.
Comments are also kept in the `"comments"` field. If present, this
field contains a list of comments, with a `"lineinfo"` field
describing the span between the first and last comment. Each comment
is represented by a list of one string, with a `"lineinfo"` describing
the span of this comment only. Consecutive lines of `--` comments are
considered as one comment: `"-- foo\n-- bar\n"` parses as one comment
whose text is `"foo\nbar"`, whereas `"-- foo\n\n-- bar\n"` parses as
two comments `"foo"` and `"bar"`.
So for instance, if `f` is the AST of a function and I want to
retrieve the comment before the function, I'd do:
f_comment = f.lineinfo.first.comments[1][1]
The informations in lineinfo positions, i.e. in each `"first"` and
`"last"` field, are held in the following fields:
* `"source"` the filename (optional);
* `"offset"` the 1-based offset relative to the beginning of the string/file;
* `"line"` the 1-based line number;
* `"column"` the 1-based offset within the line;
* `"facing"` the position at the opposite end of the inter-token space.
* `"comments"` the comments in the associated inter-token space (optional).
* `"id"` an arbitrary number, which uniquely identifies an inter-token
space within a given tokens stream.

View File

@ -0,0 +1,13 @@
Metalua
=======
Metalua is a Lua code analysis tool, as well as a compiler for a
superset of Lua 5.1 supporting Compile-Time Meta-Programming. It's
separated into two LuaRocks, `metalua-parser` and
`metalua-compiler`. The documentation of each rock can be found in
`README-parser.md` and `README-compiler.md`.
All the code in Metalue is released under dual lincenses:
* MIT public license (same as Lua);
* EPL public license (same as Eclipse).

View File

@ -0,0 +1,38 @@
--*-lua-*--
package = "metalua-parser"
version = "0.7.3-2"
source = {
url = "http://git.eclipse.org/c/koneki/org.eclipse.koneki.metalua.git/snapshot/org.eclipse.koneki.metalua-v0.7.3.tar.gz"
}
description = {
summary = "Metalua's parser: converting Lua source strings and files into AST",
detailed = [[
This is a subset of the full Metalua compiler. It defines and generates an AST
format for Lua programs, which offers a nice level of abstraction to reason about
and manipulate Lua programs.
]],
homepage = "http://git.eclipse.org/c/koneki/org.eclipse.koneki.metalua.git",
license = "EPL + MIT"
}
dependencies = {
"lua >= 5.1"
}
build = {
type="builtin",
modules={
["metalua.grammar.generator"] = "metalua/grammar/generator.lua",
["metalua.grammar.lexer"] = "metalua/grammar/lexer.lua",
["metalua.compiler.parser"] = "metalua/compiler/parser.lua",
["metalua.compiler.parser.table"] = "metalua/compiler/parser/table.lua",
["metalua.compiler.parser.ext"] = "metalua/compiler/parser/ext.lua",
["metalua.compiler.parser.annot.generator"] = "metalua/compiler/parser/annot/generator.lua",
["metalua.compiler.parser.annot.grammar"] = "metalua/compiler/parser/annot/grammar.lua",
["metalua.compiler.parser.stat"] = "metalua/compiler/parser/stat.lua",
["metalua.compiler.parser.misc"] = "metalua/compiler/parser/misc.lua",
["metalua.compiler.parser.lexer"] = "metalua/compiler/parser/lexer.lua",
["metalua.compiler.parser.meta"] = "metalua/compiler/parser/meta.lua",
["metalua.compiler.parser.expr"] = "metalua/compiler/parser/expr.lua",
["metalua.compiler"] = "metalua/compiler.lua",
["metalua.pprint"] = "metalua/pprint.lua",
}
}

View File

@ -0,0 +1,34 @@
rock_manifest = {
doc = {
["README-compiler.md"] = "292523d759247d210d32fb2f6153e0f4",
["README-parser.md"] = "b44e3673d96dd296f2c0e92a6c87ee18",
["README.md"] = "20bfb490cddef9e101e44688791abcda"
},
lua = {
metalua = {
compiler = {
parser = {
annot = {
["generator.lua"] = "d86f7507d66ba6a3692a6f8611e9939b",
["grammar.lua"] = "7d195bde7992efd9923771751b67b18f"
},
["expr.lua"] = "3a0b1984a6f92280e2e63b074fdcec10",
["ext.lua"] = "a99e31a07bc390b826f6653bcc47d89b",
["lexer.lua"] = "eac0f9d475d9dae4ea5a2724014cebec",
["meta.lua"] = "12870bceda6395695020b739196e2a92",
["misc.lua"] = "49d59f4fc1bfb77b36f78d4f87ae258f",
["stat.lua"] = "83f10ac899be12ca4df58bbe8645299f",
["table.lua"] = "5d2389e89603b7f78c731e6918aa1a9b"
},
["parser.lua"] = "e6ae68ce200de8071bb0fefad97f9b79"
},
["compiler.lua"] = "ca65ee9a3053581f4315821a31d0c1fd",
grammar = {
["generator.lua"] = "b8a29e817d6798c12f40a230a0f6d0af",
["lexer.lua"] = "7cb7c835479a9be884130eaacb9be60a"
},
["pprint.lua"] = "0b9bd8757b45c2d4be30106abcbd45b2"
}
},
["metalua-parser-0.7.3-2.rockspec"] = "a56680900b0b51701db7cd7abf49af92"
}

View File

@ -0,0 +1,66 @@
package = "penlight"
version = "0.9.8-1"
source = {
dir = "penlight-0.9.8",
url = "http://stevedonovan.github.com/files/penlight-0.9.8-core.zip",
}
description = {
summary = "Lua utility libraries loosely based on the Python standard libraries",
homepage = "http://stevedonovan.github.com/Penlight",
license = "MIT/X11",
maintainer = "steve.j.donovan@gmail.com",
detailed = [[
Penlight is a set of pure Lua libraries for making it easier to work with common tasks like
iterating over directories, reading configuration files and the like. Provides functional operations
on tables and sequences.
]]
}
dependencies = {
"luafilesystem",
}
build = {
type = "builtin",
modules = {
["pl.strict"] = "lua/pl/strict.lua",
["pl.dir"] = "lua/pl/dir.lua",
["pl.operator"] = "lua/pl/operator.lua",
["pl.input"] = "lua/pl/input.lua",
["pl.config"] = "lua/pl/config.lua",
["pl.seq"] = "lua/pl/seq.lua",
["pl.stringio"] = "lua/pl/stringio.lua",
["pl.text"] = "lua/pl/text.lua",
["pl.test"] = "lua/pl/test.lua",
["pl.tablex"] = "lua/pl/tablex.lua",
["pl.app"] = "lua/pl/app.lua",
["pl.stringx"] = "lua/pl/stringx.lua",
["pl.lexer"] = "lua/pl/lexer.lua",
["pl.utils"] = "lua/pl/utils.lua",
["pl.sip"] = "lua/pl/sip.lua",
["pl.permute"] = "lua/pl/permute.lua",
["pl.pretty"] = "lua/pl/pretty.lua",
["pl.class"] = "lua/pl/class.lua",
["pl.List"] = "lua/pl/List.lua",
["pl.data"] = "lua/pl/data.lua",
["pl.Date"] = "lua/pl/Date.lua",
["pl"] = "lua/pl/init.lua",
["pl.luabalanced"] = "lua/pl/luabalanced.lua",
["pl.comprehension"] = "lua/pl/comprehension.lua",
["pl.path"] = "lua/pl/path.lua",
["pl.array2d"] = "lua/pl/array2d.lua",
["pl.func"] = "lua/pl/func.lua",
["pl.lapp"] = "lua/pl/lapp.lua",
["pl.file"] = "lua/pl/file.lua",
['pl.template'] = "lua/pl/template.lua",
["pl.Map"] = "lua/pl/Map.lua",
["pl.MultiMap"] = "lua/pl/MultiMap.lua",
["pl.OrderedMap"] = "lua/pl/OrderedMap.lua",
["pl.Set"] = "lua/pl/Set.lua",
["pl.xml"] = "lua/pl/xml.lua",
["pl.platf.luajava"] = "lua/pl/platf/luajava.lua"
},
}

View File

@ -0,0 +1,45 @@
rock_manifest = {
lua = {
pl = {
["Date.lua"] = "d2131d59151ce978c4db6a648fcd275a",
["List.lua"] = "1236c5eb08956619daacd25a462a9682",
["Map.lua"] = "0297a536ac0595ac59e8828f8c867f53",
["MultiMap.lua"] = "e5f898fe2443e51c38825e9bc3d1aee5",
["OrderedMap.lua"] = "bd8e39c59e22c582a33e2f025d3ae914",
["Set.lua"] = "346ff7392fd4aeda418fb832e8da7a7f",
["app.lua"] = "23ffb79e69a3fd679013cf82d95ed792",
["array2d.lua"] = "77618ec2e2de4d6d237484dfd742cd73",
["class.lua"] = "6f58bf39e7f90711b6840ad6955d258e",
["comprehension.lua"] = "f8600ba945dde5d959194500a687c69f",
["config.lua"] = "9ea3ce0ac3cdf2ce0e17f1353f32abb6",
["data.lua"] = "be446ff813b5bcf30b4063601165df6a",
["dir.lua"] = "3d60d4c1caeaabe199fe361e4e9b14a4",
["file.lua"] = "f5c9527ea14b511d2cb9af80b219c562",
["func.lua"] = "cc50d73512b6d0518f6587b82844de8c",
["init.lua"] = "9232be7d8790d4f907972a00dec7949d",
["input.lua"] = "bab7c64ca9a740df5e9fb9909610bbc4",
["lapp.lua"] = "1cc81f048bc3fcd775c40cd9a2d601a7",
["lexer.lua"] = "da0db5e323a2d37545ccb02592d0d3c8",
["luabalanced.lua"] = "00b94a997a9ea4d73f54c10893f3b35f",
["operator.lua"] = "e606629c738966cf497bb938457adebd",
["path.lua"] = "b0714bc337c068b7252f64250fe59604",
["permute.lua"] = "b0ed9ba2787119ef99468329a54ea16a",
platf = {
["luajava.lua"] = "9c2898667281ad9501cc05a8e31a6f53"
},
["pretty.lua"] = "3ece64317ce05916eaba91fa96d9e7c0",
["seq.lua"] = "e99e420345ab11120a7b741d8184920a",
["sip.lua"] = "bde74f65e7246017d3ef034d178100ea",
["strict.lua"] = "720e939931dbbe42fad8fd4e7736435e",
["stringio.lua"] = "a8f4c786ea1b62f16ed05e6b09840044",
["stringx.lua"] = "43f57755969c6b4001316226506a3744",
["tablex.lua"] = "dec027cc3a3901766bd933c5fc0f3e93",
["template.lua"] = "f358175bbb84c401c6213c953ce295a4",
["test.lua"] = "1c45f7b1c438673f1eb668e2ca592f1c",
["text.lua"] = "c30f90cab2d00186a6432e408ba1fe14",
["utils.lua"] = "68cd38638a29b4ab5f1cc0eae38dce77",
["xml.lua"] = "e13ed468c450fccb9a8e858a0f787eef"
}
},
["penlight-0.9.8-1.rockspec"] = "96edac3ff1d0ac57cb45d6551a56a775"
}

View File

@ -0,0 +1,653 @@
#!/usr/bin/env lua
---------
-- LuaSrcDiet
--
-- Compresses Lua source code by removing unnecessary characters.
-- For Lua 5.1+ source code.
--
-- **Notes:**
--
-- * Remember to update version and date information below (MSG_TITLE).
-- * TODO: passing data tables around is a horrific mess.
-- * TODO: to implement pcall() to properly handle lexer etc. errors.
-- * TODO: need some automatic testing for a semblance of sanity.
-- * TODO: the plugin module is highly experimental and unstable.
----
local equiv = require "luasrcdiet.equiv"
local fs = require "luasrcdiet.fs"
local llex = require "luasrcdiet.llex"
local lparser = require "luasrcdiet.lparser"
local luasrcdiet = require "luasrcdiet.init"
local optlex = require "luasrcdiet.optlex"
local optparser = require "luasrcdiet.optparser"
local byte = string.byte
local concat = table.concat
local find = string.find
local fmt = string.format
local gmatch = string.gmatch
local match = string.match
local print = print
local rep = string.rep
local sub = string.sub
local plugin
local LUA_VERSION = match(_VERSION, " (5%.[123])$") or "5.1"
-- Is --opt-binequiv available for this Lua version?
local BIN_EQUIV_AVAIL = LUA_VERSION == "5.1" and not package.loaded.jit
---------------------- Messages and textual data ----------------------
local MSG_TITLE = fmt([[
LuaSrcDiet: Puts your Lua 5.1+ source code on a diet
Version %s <%s>
]], luasrcdiet._VERSION, luasrcdiet._HOMEPAGE)
local MSG_USAGE = [[
usage: luasrcdiet [options] [filenames]
example:
>luasrcdiet myscript.lua -o myscript_.lua
options:
-v, --version prints version information
-h, --help prints usage information
-o <file> specify file name to write output
-s <suffix> suffix for output files (default '_')
--keep <msg> keep block comment with <msg> inside
--plugin <module> run <module> in plugin/ directory
- stop handling arguments
(optimization levels)
--none all optimizations off (normalizes EOLs only)
--basic lexer-based optimizations only
--maximum maximize reduction of source
(informational)
--quiet process files quietly
--read-only read file and print token stats only
--dump-lexer dump raw tokens from lexer to stdout
--dump-parser dump variable tracking tables from parser
--details extra info (strings, numbers, locals)
features (to disable, insert 'no' prefix like --noopt-comments):
%s
default settings:
%s]]
-- Optimization options, for ease of switching on and off.
--
-- * Positive to enable optimization, negative (no) to disable.
-- * These options should follow --opt-* and --noopt-* style for now.
local OPTION = [[
--opt-comments,'remove comments and block comments'
--opt-whitespace,'remove whitespace excluding EOLs'
--opt-emptylines,'remove empty lines'
--opt-eols,'all above, plus remove unnecessary EOLs'
--opt-strings,'optimize strings and long strings'
--opt-numbers,'optimize numbers'
--opt-locals,'optimize local variable names'
--opt-entropy,'tries to reduce symbol entropy of locals'
--opt-srcequiv,'insist on source (lexer stream) equivalence'
--opt-binequiv,'insist on binary chunk equivalence (only for PUC Lua 5.1)'
--opt-experimental,'apply experimental optimizations'
]]
-- Preset configuration.
local DEFAULT_CONFIG = [[
--opt-comments --opt-whitespace --opt-emptylines
--opt-numbers --opt-locals
--opt-srcequiv --noopt-binequiv
]]
-- Override configurations: MUST explicitly enable/disable everything.
local BASIC_CONFIG = [[
--opt-comments --opt-whitespace --opt-emptylines
--noopt-eols --noopt-strings --noopt-numbers
--noopt-locals --noopt-entropy
--opt-srcequiv --noopt-binequiv
]]
local MAXIMUM_CONFIG = [[
--opt-comments --opt-whitespace --opt-emptylines
--opt-eols --opt-strings --opt-numbers
--opt-locals --opt-entropy
--opt-srcequiv
]] .. (BIN_EQUIV_AVAIL and ' --opt-binequiv' or ' --noopt-binequiv')
local NONE_CONFIG = [[
--noopt-comments --noopt-whitespace --noopt-emptylines
--noopt-eols --noopt-strings --noopt-numbers
--noopt-locals --noopt-entropy
--opt-srcequiv --noopt-binequiv
]]
local DEFAULT_SUFFIX = "_" -- default suffix for file renaming
local PLUGIN_SUFFIX = "luasrcdiet.plugin." -- relative location of plugins
------------- Startup and initialize option list handling -------------
--- Simple error message handler; change to error if traceback wanted.
--
-- @tparam string msg The message to print.
local function die(msg)
print("LuaSrcDiet (error): "..msg); os.exit(1)
end
--die = error--DEBUG
-- Prepare text for list of optimizations, prepare lookup table.
local MSG_OPTIONS = ""
do
local WIDTH = 24
local o = {}
for op, desc in gmatch(OPTION, "%s*([^,]+),'([^']+)'") do
local msg = " "..op
msg = msg..rep(" ", WIDTH - #msg)..desc.."\n"
MSG_OPTIONS = MSG_OPTIONS..msg
o[op] = true
o["--no"..sub(op, 3)] = true
end
OPTION = o -- replace OPTION with lookup table
end
MSG_USAGE = fmt(MSG_USAGE, MSG_OPTIONS, DEFAULT_CONFIG)
--------- Global variable initialization, option set handling ---------
local suffix = DEFAULT_SUFFIX -- file suffix
local option = {} -- program options
local stat_c, stat_l -- statistics tables
--- Sets option lookup table based on a text list of options.
--
-- Note: additional forced settings for --opt-eols is done in optlex.lua.
--
-- @tparam string CONFIG
local function set_options(CONFIG)
for op in gmatch(CONFIG, "(%-%-%S+)") do
if sub(op, 3, 4) == "no" and -- handle negative options
OPTION["--"..sub(op, 5)] then
option[sub(op, 5)] = false
else
option[sub(op, 3)] = true
end
end
end
-------------------------- Support functions --------------------------
-- List of token types, parser-significant types are up to TTYPE_GRAMMAR
-- while the rest are not used by parsers; arranged for stats display.
local TTYPES = {
"TK_KEYWORD", "TK_NAME", "TK_NUMBER", -- grammar
"TK_STRING", "TK_LSTRING", "TK_OP",
"TK_EOS",
"TK_COMMENT", "TK_LCOMMENT", -- non-grammar
"TK_EOL", "TK_SPACE",
}
local TTYPE_GRAMMAR = 7
local EOLTYPES = { -- EOL names for token dump
["\n"] = "LF", ["\r"] = "CR",
["\n\r"] = "LFCR", ["\r\n"] = "CRLF",
}
--- Reads source code from the file.
--
-- @tparam string fname Path of the file to read.
-- @treturn string Content of the file.
local function load_file(fname)
local data, err = fs.read_file(fname, "rb")
if not data then die(err) end
return data
end
--- Saves source code to the file.
--
-- @tparam string fname Path of the destination file.
-- @tparam string dat The data to write into the file.
local function save_file(fname, dat)
local ok, err = fs.write_file(fname, dat, "wb")
if not ok then die(err) end
end
------------------ Functions to deal with statistics ------------------
--- Initializes the statistics table.
local function stat_init()
stat_c, stat_l = {}, {}
for i = 1, #TTYPES do
local ttype = TTYPES[i]
stat_c[ttype], stat_l[ttype] = 0, 0
end
end
--- Adds a token to the statistics table.
--
-- @tparam string tok The token.
-- @param seminfo
local function stat_add(tok, seminfo)
stat_c[tok] = stat_c[tok] + 1
stat_l[tok] = stat_l[tok] + #seminfo
end
--- Computes totals for the statistics table, returns average table.
--
-- @treturn table
local function stat_calc()
local function avg(c, l) -- safe average function
if c == 0 then return 0 end
return l / c
end
local stat_a = {}
local c, l = 0, 0
for i = 1, TTYPE_GRAMMAR do -- total grammar tokens
local ttype = TTYPES[i]
c = c + stat_c[ttype]; l = l + stat_l[ttype]
end
stat_c.TOTAL_TOK, stat_l.TOTAL_TOK = c, l
stat_a.TOTAL_TOK = avg(c, l)
c, l = 0, 0
for i = 1, #TTYPES do -- total all tokens
local ttype = TTYPES[i]
c = c + stat_c[ttype]; l = l + stat_l[ttype]
stat_a[ttype] = avg(stat_c[ttype], stat_l[ttype])
end
stat_c.TOTAL_ALL, stat_l.TOTAL_ALL = c, l
stat_a.TOTAL_ALL = avg(c, l)
return stat_a
end
----------------------------- Main tasks -----------------------------
--- A simple token dumper, minimal translation of seminfo data.
--
-- @tparam string srcfl Path of the source file.
local function dump_tokens(srcfl)
-- Load file and process source input into tokens.
local z = load_file(srcfl)
local toklist, seminfolist = llex.lex(z)
-- Display output.
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
if tok == "TK_OP" and byte(seminfo) < 32 then
seminfo = "("..byte(seminfo)..")"
elseif tok == "TK_EOL" then
seminfo = EOLTYPES[seminfo]
else
seminfo = "'"..seminfo.."'"
end
print(tok.." "..seminfo)
end--for
end
--- Dumps globalinfo and localinfo tables.
--
-- @tparam string srcfl Path of the source file.
local function dump_parser(srcfl)
-- Load file and process source input into tokens,
local z = load_file(srcfl)
local toklist, seminfolist, toklnlist = llex.lex(z)
-- Do parser optimization here.
local xinfo = lparser.parse(toklist, seminfolist, toklnlist)
local globalinfo, localinfo = xinfo.globalinfo, xinfo.localinfo
-- Display output.
local hl = rep("-", 72)
print("*** Local/Global Variable Tracker Tables ***")
print(hl.."\n GLOBALS\n"..hl)
-- global tables have a list of xref numbers only
for i = 1, #globalinfo do
local obj = globalinfo[i]
local msg = "("..i..") '"..obj.name.."' -> "
local xref = obj.xref
for j = 1, #xref do msg = msg..xref[j].." " end
print(msg)
end
-- Local tables have xref numbers and a few other special
-- numbers that are specially named: decl (declaration xref),
-- act (activation xref), rem (removal xref).
print(hl.."\n LOCALS (decl=declared act=activated rem=removed)\n"..hl)
for i = 1, #localinfo do
local obj = localinfo[i]
local msg = "("..i..") '"..obj.name.."' decl:"..obj.decl..
" act:"..obj.act.." rem:"..obj.rem
if obj.is_special then
msg = msg.." is_special"
end
msg = msg.." -> "
local xref = obj.xref
for j = 1, #xref do msg = msg..xref[j].." " end
print(msg)
end
print(hl.."\n")
end
--- Reads source file(s) and reports some statistics.
--
-- @tparam string srcfl Path of the source file.
local function read_only(srcfl)
-- Load file and process source input into tokens.
local z = load_file(srcfl)
local toklist, seminfolist = llex.lex(z)
print(MSG_TITLE)
print("Statistics for: "..srcfl.."\n")
-- Collect statistics.
stat_init()
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
stat_add(tok, seminfo)
end--for
local stat_a = stat_calc()
-- Display output.
local function figures(tt)
return stat_c[tt], stat_l[tt], stat_a[tt]
end
local tabf1, tabf2 = "%-16s%8s%8s%10s", "%-16s%8d%8d%10.2f"
local hl = rep("-", 42)
print(fmt(tabf1, "Lexical", "Input", "Input", "Input"))
print(fmt(tabf1, "Elements", "Count", "Bytes", "Average"))
print(hl)
for i = 1, #TTYPES do
local ttype = TTYPES[i]
print(fmt(tabf2, ttype, figures(ttype)))
if ttype == "TK_EOS" then print(hl) end
end
print(hl)
print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL")))
print(hl)
print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK")))
print(hl.."\n")
end
--- Processes source file(s), writes output and reports some statistics.
--
-- @tparam string srcfl Path of the source file.
-- @tparam string destfl Path of the destination file where to write optimized source.
local function process_file(srcfl, destfl)
-- handle quiet option
local function print(...) --luacheck: ignore 431
if option.QUIET then return end
_G.print(...)
end
if plugin and plugin.init then -- plugin init
option.EXIT = false
plugin.init(option, srcfl, destfl)
if option.EXIT then return end
end
print(MSG_TITLE) -- title message
-- Load file and process source input into tokens.
local z = load_file(srcfl)
if plugin and plugin.post_load then -- plugin post-load
z = plugin.post_load(z) or z
if option.EXIT then return end
end
local toklist, seminfolist, toklnlist = llex.lex(z)
if plugin and plugin.post_lex then -- plugin post-lex
plugin.post_lex(toklist, seminfolist, toklnlist)
if option.EXIT then return end
end
-- Collect 'before' statistics.
stat_init()
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
stat_add(tok, seminfo)
end--for
local stat1_a = stat_calc()
local stat1_c, stat1_l = stat_c, stat_l
-- Do parser optimization here.
optparser.print = print -- hack
local xinfo = lparser.parse(toklist, seminfolist, toklnlist)
if plugin and plugin.post_parse then -- plugin post-parse
plugin.post_parse(xinfo.globalinfo, xinfo.localinfo)
if option.EXIT then return end
end
optparser.optimize(option, toklist, seminfolist, xinfo)
if plugin and plugin.post_optparse then -- plugin post-optparse
plugin.post_optparse()
if option.EXIT then return end
end
-- Do lexer optimization here, save output file.
local warn = optlex.warn -- use this as a general warning lookup
optlex.print = print -- hack
toklist, seminfolist, toklnlist
= optlex.optimize(option, toklist, seminfolist, toklnlist)
if plugin and plugin.post_optlex then -- plugin post-optlex
plugin.post_optlex(toklist, seminfolist, toklnlist)
if option.EXIT then return end
end
local dat = concat(seminfolist)
-- Depending on options selected, embedded EOLs in long strings and
-- long comments may not have been translated to \n, tack a warning.
if find(dat, "\r\n", 1, 1) or
find(dat, "\n\r", 1, 1) then
warn.MIXEDEOL = true
end
-- Test source and binary chunk equivalence.
equiv.init(option, llex, warn)
equiv.source(z, dat)
if BIN_EQUIV_AVAIL then
equiv.binary(z, dat)
end
local smsg = "before and after lexer streams are NOT equivalent!"
local bmsg = "before and after binary chunks are NOT equivalent!"
-- for reporting, die if option was selected, else just warn
if warn.SRC_EQUIV then
if option["opt-srcequiv"] then die(smsg) end
else
print("*** SRCEQUIV: token streams are sort of equivalent")
if option["opt-locals"] then
print("(but no identifier comparisons since --opt-locals enabled)")
end
print()
end
if warn.BIN_EQUIV then
if option["opt-binequiv"] then die(bmsg) end
elseif BIN_EQUIV_AVAIL then
print("*** BINEQUIV: binary chunks are sort of equivalent")
print()
end
-- Save optimized source stream to output file.
save_file(destfl, dat)
-- Collect 'after' statistics.
stat_init()
for i = 1, #toklist do
local tok, seminfo = toklist[i], seminfolist[i]
stat_add(tok, seminfo)
end--for
local stat_a = stat_calc()
-- Display output.
print("Statistics for: "..srcfl.." -> "..destfl.."\n")
local function figures(tt)
return stat1_c[tt], stat1_l[tt], stat1_a[tt],
stat_c[tt], stat_l[tt], stat_a[tt]
end
local tabf1, tabf2 = "%-16s%8s%8s%10s%8s%8s%10s",
"%-16s%8d%8d%10.2f%8d%8d%10.2f"
local hl = rep("-", 68)
print("*** lexer-based optimizations summary ***\n"..hl)
print(fmt(tabf1, "Lexical",
"Input", "Input", "Input",
"Output", "Output", "Output"))
print(fmt(tabf1, "Elements",
"Count", "Bytes", "Average",
"Count", "Bytes", "Average"))
print(hl)
for i = 1, #TTYPES do
local ttype = TTYPES[i]
print(fmt(tabf2, ttype, figures(ttype)))
if ttype == "TK_EOS" then print(hl) end
end
print(hl)
print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL")))
print(hl)
print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK")))
print(hl)
-- Report warning flags from optimizing process.
if warn.LSTRING then
print("* WARNING: "..warn.LSTRING)
elseif warn.MIXEDEOL then
print("* WARNING: ".."output still contains some CRLF or LFCR line endings")
elseif warn.SRC_EQUIV then
print("* WARNING: "..smsg)
elseif warn.BIN_EQUIV then
print("* WARNING: "..bmsg)
end
print()
end
---------------------------- Main functions ---------------------------
local arg = {...} -- program arguments
set_options(DEFAULT_CONFIG) -- set to default options at beginning
--- Does per-file handling, ship off to tasks.
--
-- @tparam {string,...} fspec List of source files.
local function do_files(fspec)
for i = 1, #fspec do
local srcfl = fspec[i]
local destfl
-- Find and replace extension for filenames.
local extb, exte = find(srcfl, "%.[^%.%\\%/]*$")
local basename, extension = srcfl, ""
if extb and extb > 1 then
basename = sub(srcfl, 1, extb - 1)
extension = sub(srcfl, extb, exte)
end
destfl = basename..suffix..extension
if #fspec == 1 and option.OUTPUT_FILE then
destfl = option.OUTPUT_FILE
end
if srcfl == destfl then
die("output filename identical to input filename")
end
-- Perform requested operations.
if option.DUMP_LEXER then
dump_tokens(srcfl)
elseif option.DUMP_PARSER then
dump_parser(srcfl)
elseif option.READ_ONLY then
read_only(srcfl)
else
process_file(srcfl, destfl)
end
end--for
end
--- The main function.
local function main()
local fspec = {}
local argn, i = #arg, 1
if argn == 0 then
option.HELP = true
end
-- Handle arguments.
while i <= argn do
local o, p = arg[i], arg[i + 1]
local dash = match(o, "^%-%-?")
if dash == "-" then -- single-dash options
if o == "-h" then
option.HELP = true; break
elseif o == "-v" then
option.VERSION = true; break
elseif o == "-s" then
if not p then die("-s option needs suffix specification") end
suffix = p
i = i + 1
elseif o == "-o" then
if not p then die("-o option needs a file name") end
option.OUTPUT_FILE = p
i = i + 1
elseif o == "-" then
break -- ignore rest of args
else
die("unrecognized option "..o)
end
elseif dash == "--" then -- double-dash options
if o == "--help" then
option.HELP = true; break
elseif o == "--version" then
option.VERSION = true; break
elseif o == "--keep" then
if not p then die("--keep option needs a string to match for") end
option.KEEP = p
i = i + 1
elseif o == "--plugin" then
if not p then die("--plugin option needs a module name") end
if option.PLUGIN then die("only one plugin can be specified") end
option.PLUGIN = p
plugin = require(PLUGIN_SUFFIX..p)
i = i + 1
elseif o == "--quiet" then
option.QUIET = true
elseif o == "--read-only" then
option.READ_ONLY = true
elseif o == "--basic" then
set_options(BASIC_CONFIG)
elseif o == "--maximum" then
set_options(MAXIMUM_CONFIG)
elseif o == "--none" then
set_options(NONE_CONFIG)
elseif o == "--dump-lexer" then
option.DUMP_LEXER = true
elseif o == "--dump-parser" then
option.DUMP_PARSER = true
elseif o == "--details" then
option.DETAILS = true
elseif OPTION[o] then -- lookup optimization options
set_options(o)
else
die("unrecognized option "..o)
end
else
fspec[#fspec + 1] = o -- potential filename
end
i = i + 1
end--while
if option.HELP then
print(MSG_TITLE..MSG_USAGE); return true
elseif option.VERSION then
print(MSG_TITLE); return true
end
if option["opt-binequiv"] and not BIN_EQUIV_AVAIL then
die("--opt-binequiv is available only for PUC Lua 5.1!")
end
if #fspec > 0 then
if #fspec > 1 and option.OUTPUT_FILE then
die("with -o, only one source file can be specified")
end
do_files(fspec)
return true
else
die("nothing to do!")
end
end
-- entry point -> main() -> do_files()
if not main() then
die("Please run with option -h or --help for usage information")
end

View File

@ -0,0 +1,300 @@
= Features and Usage
Kein-Hong Man
2011-09-13
== Features
LuaSrcDiet features include the following:
* Predefined default, _--basic_ (token-only) and _--maximum_ settings.
* Avoid deleting a block comment with a certain message with _--keep_; this is for copyright or license texts.
* Special handling for `#!` (shbang) lines and in functions, `self` implicit parameters.
* Dumping of raw information using _--dump-lexer_ and _--dump-parser_.
See the `samples` directory.
* A HTML plugin: outputs files that highlights globals and locals, useful for eliminating globals. See the `samples` directory.
* An SLOC plugin: counts significant lines of Lua code, like SLOCCount.
* Source and binary equivalence testing with _--opt-srcequiv_ and _--opt-binequiv_.
List of optimizations:
* Line endings are always normalized to LF, except those embedded in comments or strings.
* _--opt-comments_: Removal of comments and comment blocks.
* _--opt-whitespace_: Removal of whitespace, excluding end-of-line characters.
* _--opt-emptylines_: Removal of empty lines.
* _--opt-eols_: Removal of unnecessary end-of-line characters.
* _--opt-strings_: Rewrite strings and long strings. See the `samples` directory.
* _--opt-numbers_: Rewrite numbers. See the `samples` directory.
* _--opt-locals_: Rename local variable names. Does not rename field or method names.
* _--opt-entropy_: Tries to improve symbol entropy when renaming locals by calculating actual letter frequencies.
* _--opt-experimental_: Apply experimental optimizations.
LuaSrcDiet tries to allow each option to be enabled or disabled separately, but they are not completely orthogonal.
If comment removal is disabled, LuaSrcDiet only removes trailing whitespace.
Trailing whitespace is not removed in long strings, a warning is generated instead.
If empty line removal is disabled, LuaSrcDiet keeps all significant code on the same lines.
Thus, a user is able to debug using the original sources as a reference since the line numbering is unchanged.
String optimization deals mainly with optimizing escape sequences, but delimiters can be switched between single quotes and double quotes if the source size of the string can be reduced.
For long strings and long comments, LuaSrcDiet also tries to reduce the `=` separators in the
delimiters if possible.
For number optimization, LuaSrcDiet saves space by trying to generate the shortest possible sequence, and in the process it does not produce “proper” scientific notation (e.g. 1.23e5) but does away with the decimal point (e.g. 123e3) instead.
The local variable name optimizer uses a full parser of Lua 5.1 source code, thus it can rename all local variables, including upvalues and function parameters.
It should handle the implicit `self` parameter gracefully.
In addition, local variable names are either renamed into the shortest possible names following English frequent letter usage or are arranged by calculating entropy with the _--opt-entropy_ option.
Variable names are reused whenever possible, reducing the number of unique variable names.
For example, for `LuaSrcDiet.lua` (version 0.11.0), 683 local identifiers representing 88 unique names were optimized into 32 unique names, all which are one character in length, saving over 2600 bytes.
If you need some kind of reassurance that your app will still work at reduced size, see the section on verification below.
== Usage
LuaSrcDiet needs a Lua 5.1.x (preferably Lua 5.1.4) binary to run.
On Unix machines, one can use the following command line:
[source, sh]
LuaSrcDiet myscript.lua -o myscript_.lua
On Windows machines, the above command line can be used on Cygwin, or you can run Lua with the LuaSrcDiet script like this:
[source, sh]
lua LuaSrcDiet.lua myscript.lua -o myscript_.lua
When run without arguments, LuaSrcDiet prints a list of options.
Also, you can check the `Makefile` for some examples of command lines to use.
For example, for maximum code size reduction and maximum verbosity, use:
[source, sh]
LuaSrcDiet --maximum --details myscript.lua -o myscript_.lua
=== Output Example
A sample output of LuaSrcDiet 0.11.0 for processing `llex.lua` at _--maximum_ settings is as follows:
----
Statistics for: LuaSrcDiet.lua -> sample/LuaSrcDiet.lua
*** local variable optimization summary ***
----------------------------------------------------------
Variable Unique Decl. Token Size Average
Types Names Count Count Bytes Bytes
----------------------------------------------------------
Global 10 0 19 95 5.00
----------------------------------------------------------
Local (in) 88 153 683 3340 4.89
TOTAL (in) 98 153 702 3435 4.89
----------------------------------------------------------
Local (out) 32 153 683 683 1.00
TOTAL (out) 42 153 702 778 1.11
----------------------------------------------------------
*** lexer-based optimizations summary ***
--------------------------------------------------------------------
Lexical Input Input Input Output Output Output
Elements Count Bytes Average Count Bytes Average
--------------------------------------------------------------------
TK_KEYWORD 374 1531 4.09 374 1531 4.09
TK_NAME 795 3963 4.98 795 1306 1.64
TK_NUMBER 54 59 1.09 54 59 1.09
TK_STRING 152 1725 11.35 152 1717 11.30
TK_LSTRING 7 1976 282.29 7 1976 282.29
TK_OP 997 1092 1.10 997 1092 1.10
TK_EOS 1 0 0.00 1 0 0.00
--------------------------------------------------------------------
TK_COMMENT 140 6884 49.17 1 18 18.00
TK_LCOMMENT 7 1723 246.14 0 0 0.00
TK_EOL 543 543 1.00 197 197 1.00
TK_SPACE 1270 2465 1.94 263 263 1.00
--------------------------------------------------------------------
Total Elements 4340 21961 5.06 2841 8159 2.87
--------------------------------------------------------------------
Total Tokens 2380 10346 4.35 2380 7681 3.23
--------------------------------------------------------------------
----
Overall, the file size is reduced by more than 9 kiB.
Tokens in the above report can be classified into “real” or actual tokens, and “fake” or whitespace tokens.
The number of “real” tokens remained the same.
Short comments and long comments were completely eliminated.
The number of line endings was reduced by 59, while all but 152 whitespace characters were optimized away.
So, token separators (whitespace, including line endings) now takes up just 10 % of the total file size.
No optimization of number tokens was possible, while 2 bytes were saved for string tokens.
For local variable name optimization, the report shows that 38 unique local variable names were reduced to 20 unique names.
The number of identifier tokens should stay the same (there is currently no optimization option to optimize away non-essential or unused “real” tokens).
Since there can be at most 53 single-character identifiers, all local variables are now one character in length.
Over 600 bytes was saved.
_--details_ will give a longer report and much more information.
A sample output of LuaSrcDiet 0.12.0 for processing the one-file `LuaSrcDiet.lua` program itself at _--maximum_ and _--opt-experimental_ settings is as follows:
----
*** local variable optimization summary ***
----------------------------------------------------------
Variable Unique Decl. Token Size Average
Types Names Count Count Bytes Bytes
----------------------------------------------------------
Global 27 0 51 280 5.49
----------------------------------------------------------
Local (in) 482 1063 4889 21466 4.39
TOTAL (in) 509 1063 4940 21746 4.40
----------------------------------------------------------
Local (out) 55 1063 4889 4897 1.00
TOTAL (out) 82 1063 4940 5177 1.05
----------------------------------------------------------
*** BINEQUIV: binary chunks are sort of equivalent
Statistics for: LuaSrcDiet.lua -> app_experimental.lua
*** lexer-based optimizations summary ***
--------------------------------------------------------------------
Lexical Input Input Input Output Output Output
Elements Count Bytes Average Count Bytes Average
--------------------------------------------------------------------
TK_KEYWORD 3083 12247 3.97 3083 12247 3.97
TK_NAME 5401 24121 4.47 5401 7552 1.40
TK_NUMBER 467 494 1.06 467 494 1.06
TK_STRING 787 7983 10.14 787 7974 10.13
TK_LSTRING 14 3453 246.64 14 3453 246.64
TK_OP 6381 6861 1.08 6171 6651 1.08
TK_EOS 1 0 0.00 1 0 0.00
--------------------------------------------------------------------
TK_COMMENT 1611 72339 44.90 1 18 18.00
TK_LCOMMENT 18 4404 244.67 0 0 0.00
TK_EOL 4419 4419 1.00 1778 1778 1.00
TK_SPACE 10439 24475 2.34 2081 2081 1.00
--------------------------------------------------------------------
Total Elements 32621 160796 4.93 19784 42248 2.14
--------------------------------------------------------------------
Total Tokens 16134 55159 3.42 15924 38371 2.41
--------------------------------------------------------------------
* WARNING: before and after lexer streams are NOT equivalent!
----
The command line was:
[source, sh]
lua LuaSrcDiet.lua LuaSrcDiet.lua -o app_experimental.lua --maximum --opt-experimental --noopt-srcequiv
The important thing to note is that while the binary chunks are equivalent, the source lexer streams are not equivalent.
Hence, the _--noopt-srcequiv_ makes LuaSrcDiet report a warning for failing the source equivalence test.
`LuaSrcDiet.lua` was reduced from 157 kiB to about 41.3 kiB.
The _--opt-experimental_ option saves an extra 205 bytes over standard _--maximum_.
Note the reduction in `TK_OP` count due to a reduction in semicolons and parentheses.
`TK_SPACE` has actually increased a bit due to semicolons that are changed into single spaces; some of these spaces could not be removed.
For more performance numbers, see the <<performance-stats#, Performance Statistics>> page.
== Verification
Code size reduction can be quite a hairy thing (even I peer at the results in suspicion), so some kind of verification is desirable for users who expect processed files to _not_ blow up.
Since LuaSrcDiet has been talked about as a tool to reduce code size in projects such as WoW add-ons, `eLua` and `nspire`, adding a verification step will reduce risk for all users of LuaSrcDiet.
LuaSrcDiet performs two kinds of equivalence testing as of version 0.12.0.
The two tests can be very, very loosely termed as _source equivalence testing_ and _binary equivalence testing_.
They are controlled by the _--opt-srcequiv_ and _--opt-binequiv_ options and are enabled by default.
Testing behaviour can be summarized as follows:
* Both tests are always executed.
The options control the resulting actions taken.
* Both options are normally enabled.
This will make any failing test to throw an error.
* When an option is disabled, LuaSrcDiet will at most print a warning.
* For passing results, see the following subsections that describe what the tests actually does.
You only need to disable a testing option for experimental optimizations (see the following section for more information on this).
For anything up to and including _--maximum_, both tests should pass.
If any test fail under these conditions, then something has gone wrong with LuaSrcDiet, and I would be interested to know what has blown up.
=== _--opt-srcequiv_ Source Equivalence
The source equivalence test uses LuaSrcDiets lexer to read and compare the _before_ and _after_ lexer token streams.
Numbers and strings are dumped as binary chunks using `loadstring()` and `string.dump()` and the results compared.
If your file passes this test, it means that a Lua 5.1.x binary should see the exact same token streams for both _before_ and _after_ files.
That is, the parser in Lua will see the same lexer sequence coming from the source for both files and thus they _should_ be equivalent.
Touch wood.
Heh.
However, if you are _cross-compiling_, it may be possible for this test to fail.
Experienced Lua developers can modify `equiv.lua` to handle such cases.
=== _--opt-binequiv_ Binary Equivalence
The binary equivalence test uses `loadstring()` and `string.dump()` to generate binary chunks of the entire _before_ and _after_ files.
Also, any shbang (`#!`) lines are removed prior to generation of the binary chunks.
The binary chunks are then run through a fake `undump` routine to verify the integrity of the binary chunks and to compare all parts that ought to be identical.
On a per-function prototype basis (where _ignored_ means that any difference between the two binary chunks is ignored):
* All debug information is ignored.
* The source name is ignored.
* Any line number data is ignored.
For example, `linedefined` and `lastlinedefined`.
The rest of the two binary chunks must be identical.
So, while the two are not binary-exact, they can be loosely termed as “equivalent” and should run in exactly the same manner.
Sort of.
You get the idea.
This test may also cause problems if you are _cross-compiling_.
== Experimental Stuff
The _--opt-experimental_ option applies experimental optimizations that generally, makes changes to “real” tokens.
Such changes may or may not lead to the result failing binary chunk equivalence testing.
They would likely fail source lexer stream equivalence testing, so the _--noopt-srcequiv_ option needs to be applied so that LuaSrcDiet just gives a warning instead of an error.
For sample files, see the `samples` directory.
Currently implemented experimental optimizations are as follows:
=== Semicolon Operator Removal
The semicolon (`;`) operator is an optional operator that is used to separate statements.
The optimization turns all of these operators into single spaces, which are then run through whitespace removal.
At worst, there will be no change to file size.
* _Fails_ source lexer stream equivalence.
* _Passes_ binary chunk equivalence.
=== Function Call Syntax Sugar Optimization
This optimization turns function calls that takes a single string or long string parameter into its syntax-sugar representation, which leaves out the parentheses.
Since strings can abut anything, each instance saves 2 bytes.
For example, the following:
[source, lua]
fish("cow")fish('cow')fish([[cow]])
is turned into:
[source, lua]
fish"cow"fish'cow'fish[[cow]]
* _Fails_ source lexer stream equivalence.
* _Passes_ binary chunk equivalence.
=== Other Experimental Optimizations
There are two more of these optimizations planned, before focus is turned to the Lua 5.2.x series:
* Simple `local` keyword removal.
Planned to work for a few kinds of patterns only.
* User directed name replacement, which will need user input to modify names or identifiers used in table keys and function methods or fields.

View File

@ -0,0 +1,128 @@
= Performance Statistics
Kein-Hong Man
2011-09-13
== Size Comparisons
The following is the result of processing `llex.lua` from LuaSrcDiet 0.11.0 using various optimization options:
|===
| LuaSrcDiet Option | Size (bytes)
| Original | 12,421
| Empty lines only | 12,395
| Whitespace only | 9,372
| Local rename only | 11,794
| _--basic_ setting | 3,835
| Program default | 3,208
| _--maximum_ setting | 3,130
|===
The programs default settings does not remove all unnecessary EOLs.
The _--basic_ setting is more conservative than the default settings, it disables optimization of strings and numbers and renaming of locals.
For version 0.12.0, the following is the result of processing `LuaSrcDiet.lua` using various optimization options:
|===
| LuaSrcDiet Option | Size (bytes)
| Original | 160,796
| _--basic_ setting | 60,219
| Program default | 43,650
| _--maximum_ setting | 42,453
| max + experimental | 42,248
|===
The above best size can go a lot lower with simple `local` keyword removal and user directed name replacement, which will be the subject of the next release of LuaSrcDiet.
== Compression and luac
File sizes of LuaSrcDiet 0.11.0 main files in various forms:
[cols="m,5*d", options="header,footer"]
|===
| Source File | Original Size (bytes) | `luac` normal (bytes) | `luac` stripped (bytes) | LuaSrcDiet _--basic_ (bytes) | LuaSrcDiet _--maximum_ (bytes)
| LuaSrcDiet.lua | 21,961 | 20,952 | 11,000 | 11,005 | 8,159
| llex.lua | 12,421 | 8,613 | 4,247 | 3,835 | 3,130
| lparser.lua | 41,757 | 27,215 | 12,506 | 11,755 | 7,666
| optlex.lua | 31,009 | 16,992 | 8,021 | 9,129 | 6,858
| optparser.lua | 16,511 | 9,021 | 3,520 | 5,087 | 2,999
| Total | 123,659 | 82,793 | 39,294 | 40,811 | 28,812
|===
* “LuaSrcDiet --maximum” has the smallest total file size.
* The ratio of “Original Size” to “LuaSrcDiet --maximum” is *4.3*.
* The ratio of “Original Size” to “luac stripped” is *3.1*.
* The ratio of “luac stripped” to “LuaSrcDiet --maximum” is *1.4*.
Compressibility of LuaSrcDiet 0.11.0 main files in various forms:
|===
| Compression Method | Original Size | `luac` normal | `luac` stripped | LuaSrcDiet _--basic_ | LuaSrcDiet _--maximum_
| Uncompressed originals | 123,659 | 82,793 | 39,294 | 40,811 | 28,812
| gzip -9 | 28,288 | 29,210 | 17,732 | 12,041 | 10,451
| bzip2 -9 | 24,407 | 27,232 | 16,856 | 11,480 | 9,815
| lzma (7-zip max) | 25,530 | 23,908 | 15,741 | 11,241 | 9,685
|===
* “LuaSrcDiet --maximum” has the smallest total file size (but a binary chunk loads faster and works with a smaller Lua executable).
* The ratio of “Original size” to “Original size + bzip2” is *5.1*.
* The ratio of “Original size” to “LuaSrcDiet --maximum + bzip2” is *12.6*.
* The ratio of “LuaSrcDiet --maximum” to “LuaSrcDiet --maximum + bzip2” is *2.9*.
* The ratio of “Original size” to “luac stripped + bzip2” is *7.3*.
* The ratio of “luac stripped” to “luac stripped + bzip2” is *2.3*.
* The ratio of “luac stripped + bzip2” to “LuaSrcDiet --maximum + bzip2” is *1.7*.
So, squeezed source code are smaller than stripped binary chunks and compresses better than stripped binary chunks, at a ratio of 2.9 for squeezed source code versus 2.3 for stripped binary chunks.
Compressed binary chunks is still a very efficient way of storing Lua scripts, because using only binary chunks allow for the parts of Lua needed to compile from sources to be omitted (`llex.o`, `lparser.o`, `lcode.o`, `ldump.o`), saving over 24KB in the process.
Note that LuaSrcDiet _does not_ answer the question of whether embedding source code is better or embedding binary chunks is better.
It is simply a utility for producing smaller source code files and an exercise in processing Lua source code using a Lua-based lexer and parser skeleton.
== Compile Speed
The following is a primitive attempt to analyze in-memory Lua script loading performance (using the `loadstring` function in Lua).
The LuaSrcDiet 0.11.0 files (original, squeezed with _--maximum_ and stripped binary chunks versions) are loaded into memory first before a loop runs to repeatedly load the script files for 10 seconds.
A null loop is also performed (processing empty strings) and the time taken per null iteration is subtracted as a form of null adjustment.
Then, various performance parameters are calculated.
Note that `LuaSrcDiet.lua` was slightly modified (`#!` line removed) to let the `loadstring` function run.
The results below were obtained with a Lua 5.1.3 executable compiled using `make generic` on Cygwin/Windows XP SP2 on a Sempron 3000+ (1.8GHz).
The LuaSrcDiet 0.11.0 source files have 11,180 “real” tokens in total.
[cols="<h,4*d", options="header"]
|===
| | Null loop | Stripped binary chunk | Original Sources | Squeezed Sources
| Total Size (bytes) | 0 | 39,294 | 123,640 | 28,793
| Iterations | 312,155 | 9,680 | 1306 | 1,592
| Duration (sec) | 10 | 10 | 10 | 10
| Time/iteration (msec) | 0.032 | 1.033 | 7.657 | 6.281
| _Time/iteration, null adjusted (msec)_ | | 1.001 | 7.625 | 6.249
| _Load rate (MiB/sec)_ | | 37.44 | 15.46 | 4.39
| Load time per byte (ns) | | 25.5 | 61.7 | 217.0
| Load time per token (ns) | | | 682 | 559
| Source time vs binary chunk time ratio | | 1.00 | 7.62 | 6.24
| Binary chunk rate vs. source rate ratio | | 1.00 | 2.42 | 8.53
|===
The above shows that stripped binary chunks is still, in many ways, the highest-performance form of fixed Lua scripts.
On a very average machine, scripts load at over 37 MiB/sec (in memory).
This is very comparable to the burst speeds of common desktop hard disks of 2008.
If instant response is paramount, stripped binary chunks has little competition.
By contrast, source code that is squeezed to the maximum using LuaSrcDiet can only muster an in-memory load rate of 4.4 MiB/sec.
The original sources load at about 15.5 MiB/sec, but most of the speed is from the lexer scanning over comments and whitespace.
A quick calculation indicates that the speed of the lexer over comments and whitespace can be as much as 65 MiB/sec, but note that the speed is all for naught.
What really matters are the real tokens, and the squeezed source code manages to load faster than the original sources by 18 %.
So, the loading of stripped binary chunks is faster than squeezed source code by a bit over 6×.
The 4.4 MiB/sec speed for squeezed source code is still quite respectable.
When an application considers the time taken to load data from the disk and perhaps the time taken to decompress, loading source code may be perfectly fine in terms of performance.
For programs that already embed source code, using LuaSrcDiet to squeeze the source code probably speeds loading up by a tiny bit in addition to making programs smaller.

View File

@ -0,0 +1,386 @@
= Technical Notes
Kein-Hong Man
2011-09-13
== Lexer Notes
The lexer (`llex.lua`) is a version of the native 5.1.x lexer from Yueliang 0.4.0, with significant modifications.
It does have several limitations:
* The decimal point must be `.` (period).
There is no localized decimal point replacement magic.
* There is no support for nested `[[`...`]]` long strings (no `LUA_COMPAT_LSTR`).
* The lexer may not properly lex source code with characters beyond the normal ASCII character set.
Identifiers with accented characters (or any character beyond a byte value of 127) cannot be recognized.
Instead of returning one token on each call, `llex.lua` processes an entire string (all data from an entire file) and returns.
Two lists (tokens and semantic information items) are set up in the module for use by the caller.
For maximum flexibility during processing, the lexer returns non-grammar lexical elements as tokens too.
Non-grammar elements, such as comments, whitespace, line endings, are classified along with “normal” tokens.
The lexer classifies 7 kinds of grammar tokens and 4 kinds of non-grammar tokens, as follows:
[cols="m,d"]
|===
| Grammar Token | Description
| TK_KEYWORD | keywords
| TK_NAME | identifiers
| TK_NUMBER | numbers (unconverted, kept in original form)
| TK_STRING | strings (no translation is done, includes delimiters)
| TK_LSTRING | long strings (no translation is done, includes delimiters)
| TK_OP | operators and punctuation (most single-char, some double)
| TK_EOS | end-of-stream (there is only one for each file/stream)
|===
[cols="m,d"]
|===
| Whitespace Token | Description
| TK_SPACE | whitespace (generally, spaces, \t, \v and \f)
| TK_COMMENT | comments (includes delimiters, also includes special first line shbang, which is handled specially in the optimizer)
| TK_LCOMMENT | block comments (includes delimiters)
| TK_EOL | end-of-lines (excludes those embedded in strings)
|===
A list of tokens can be generated by using the _--dump-lexer_ option, like this:
[source, sh]
lua LuaSrcDiet.lua --dump-lexer llex.lua > dump_llex.dat
== Lexer Optimizations
We aim to keep lexer-based optimizations free of parser considerations, i.e. we allow for generalized optimization of token sequences.
The table below considers the requirements for all combinations of significant tokens (except `TK_EOS`).
Other tokens are whitespace-like.
Comments can be considered to be a special kind of whitespace, e.g. a short comment needs to have a following EOL token, if we do not want to optimize away short comments.
[cols="h,6*m", options="header"]
|===
| _1st  2nd Token_ | Keyword | Name | Number | String | LString | Oper
| Keyword | [S] | [S] | [S] | | |
| Name | [S] | [S] | [S] | | |
| Number | [S] | [S] | [S] | | | [1]
| String | | | | | |
| LString | | | | | |
| Oper | | | [1] | | | [2]
|===
A dash (`-`) in the above means that the first token can abut the second token.
`*[S]*`:: Need at least one whitespace, set as either a space or kept as an EOL.
`*[1]*`::
Need a space if operator is a `.`, all others okay.
A `+` or `-` is used as part of a floating-point spec, but there does not appear to be any way of creating a float by joining with number with a `+` or `-` plus another number.
Since an `e` has to be somewhere in the first token, this cant be done.
`*[2]*`::
Normally there cannot be consecutive operators, but we plan to allow for generalized optimization of token sequences, i.e. even sequences that are grammatically illegal; so disallow adjacent operators if:
* the first is in `[=<>]` and the second is `=`
* disallow dot sequences to be adjacent, but `...` first okay
* disallow `[` followed by `=` or `[` (not optimal)
Also, a minus `-` cannot preceed a Comment or LComment, because comments start with a `--` prefix.
Apart from that, all Comment or LComment tokens can be set abut with a real token.
== Local Variable Renaming
The following discusses the problem of local variable optimization, specifically _local variable renaming_ in order to reduce source code size.
=== TK_NAME Token Considerations
A `TK_NAME` token means a number of things, and some of these cannot be renamed without analyzing the source code.
We are interested in the use of `TK_NAME` in the following:
[loweralpha]
. global variable access,
. local variable declaration, including `local` statements, `local` functions, function parameters, implicit `self` locals,
. local variable access, including upvalue access.
`TK_NAME` is also used in parts of the grammar as constant strings these tokens cannot be optimized without user assistance.
These include usage as:
[loweralpha, start=4]
. keys in `key=value` pairs in table construction,
. field or method names in `a:b` or `a.b` syntax forms.
For the local variable name optimization scheme used, we do not consider (d) and (e), and while global variables cannot be renamed without some kind of user assistance, they need to be considered or tracked as part of Luas variable access scheme.
=== Lifetime of a Local Variable
Consider the following example:
[source, lua]
local string, table = string, table
In the example, the two locals are assigned the values of the globals with the same names.
When Lua encounters the declaration portion:
[source, lua]
local string, table
the parser cannot immediately make the two local variable available to following code.
In the parser and code generator, locals are inactive when entries are created.
They are activated only when the function `adjustlocalvars()` is called to activate the appropriate local variables.
NOTE: The terminology used here may not be identical to the ones used in the Dragon Book they merely follow the LuaSrcDiet code as it was written before I have read the Dragon Book.
In the example, the two local variables are activated only after the whole statement has been parsed, that is, after the last `table` token.
Hence, the statement works as expected.
Also, once the two local variables goes out of scope, `removevars()` is called to deactivate them, allowing other variables of the same name to become visible again.
Another example worth mentioning is:
[source, lua]
local a, a, a, = 1, 2, 3
The above will assign 3 to `a`.
Thus, when optimizing local variable names, (1) we need to consider accesses of global variable names affecting the namespace, (2) for the local variable names themselves, we need to consider when they are declared, activated and removed, and (3) within the “live” time of locals, we need to know when they are accessed (since locals that are never accessed dont really matter.)
=== Local Variable Tracking
Every local variable declaration is considered an object to be renamed.
From the parser, we have the original name of the local variable, the token positions for declaration, activation and removal, and the token position for all the `TK_NAME` tokens which references this local.
All instances of the implicit `self` local variable are also flagged as such.
In addition to local variable information, all global variable accesses are tabled, one object entry for one name, and each object has a corresponding list of token positions for the `TK_NAME` tokens, which is where the global variables were accessed.
The key criteria is: *Our act of renaming cannot change the visibility of any of these locals and globals at the time they are accessed*.
However, _their scope of visibility may be changed during which they are not accessed_, so someone who tries to insert a variable reference somewhere into a program that has its locals renamed may find that it now refers to a different variable.
Of course, if every variable has a unique name, then there is no need for a name allocation algorithm, as there will be no conflict.
But, in order to maximize utilization of short identifier names to reduce the final code size, we want to reuse the names as much as possible.
In addition, fewer names will likely reduce symbol entropy and may slightly improve compressibility of the source code.
LuaSrcDiet avoids the use of non-ASCII letters, so there are only 53 single-character variable names.
=== Name Allocation Theory
To understand the renaming algorithm, first we need to establish how different local and global variables can operate happily without interfering with each other.
Consider three objects, local object A, local object B and global object G.
A and B involve declaration, activation and removal, and within the period it is active, there may be zero or more accesses of the local.
For G, there are only global variable accesses to look into.
Assume that we have assigned a new name to A and we wish to consider its effects on other locals and globals, for which we choose B and G as examples.
We assume local B has not been assigned a new name as we expect our algorithm to take care of collisions.
As lifetime is something like this:
----
Decl Act Rem
+ +-------------------------------+
-------------------------------------------------
----
where “Decl” is the time of declaration, “Act” is the time of activation, and “Rem” is the time of removal.
Between “Act” and “Rem”, the local is alive or “live” and Lua can see it if its corresponding `TK_NAME` identifier comes up.
----
Decl Act Rem
+ +-------------------------------+
-------------------------------------------------
* * * *
(1) (2) (3) (4)
----
Recall that the key criteria is to not change the visibility of globals and locals during when they are accessed.
Consider local and global accesses at (1), (2), (3) and (4).
A global G of the same name as A will only collide at (3), where Lua will see A and not G.
Since G must be accessed at (3) according to what the parser says, and we cannot modify the positions of “Decl”, “Act” and “Rem”, it follows that A cannot have the same name as G.
----
Decl Act Rem
+ +-----------------------+
---------------------------------
(1)+ +---+ (2)+ +---+ (3)+ +---+ (4)+ +---+
--------- --------- --------- ---------
----
For the case of A and B having the same names and colliding, consider the cases for which B is at (1), (2), (3) or (4) in the above.
(1) and (4) means that A and B are completely isolated from each other, hence in the two cases, A and B can safely use the same variable names.
To be specific, since we have assigned A, B is considered completely isolated from A if Bs activation-to-removal period is isolated from the time of As first access to last access, meaning Bs active time will never affect any of As accesses.
For (2) and (3), we have two cases where we need to consider which one has been activated first.
For (2), B is active before A, so A cannot impose on B.
But As accesses are valid while B is active, since A can override B.
For no collision in the case of (2), we simply need to ensure that the last access of B occurs before A is activated.
For (3), B is activated before A, hence B can override As accesses.
For no collision, all of As accesses cannot happen while B is active.
Thus position (3) follows the “A is never accessed when B is active” rule in a general way.
Local variables of a child function are in the position of (3).
To illustrate, the local B can use the same name as local A and live in a child function or block scope if each time A is accessed, Lua sees A and not B.
So we have to check all accesses of A and see whether they collide with the active period of B.
If A is not accessed during that period, then B can be active with the same name.
The above appears to resolve all sorts of cases where the active times of A and B overlap.
Note that in the above, the allocator does not need to know how locals are separated according to function prototypes.
Perhaps the allocator can be simplified if knowledge of function structure is utilized.
This scheme was implemented in a hurry in 2008 — it could probably be simpler if Lua grammar is considered, but LuaSrcDiet mainly processes various index values in tables.
=== Name Allocation Algorithm
To begin with, the name generator is mostly separate from the name allocation algorithm.
The name generator returns the next shortest name for the algorithm to apply to local variables.
To attempt to reduce symbol entropy (which benefit compression algorithms), the name generator follows English frequent letter usage.
There is also an option to calculate an actual symbol entropy table from the input data.
Since there are 53 one-character identifiers and (53 * 63 - 4) two-character identifiers (minus a few keywords), there isnt a pressing need to optimally maximize name reuse.
The single-file version of LuaSrcDiet 0.12.0, at just over 3000 SLOC and 156 kiB in size, currently allocates around 55 unique local variable names.
In theory, we should need no more than 260 local identifiers by default.
Why?
Since `LUAI_MAXVARS` is 200 and `LUAI_MAXUPVALUES` is 60, at any block scope, there can be at most `(LUAI_MAXVARS + LUAI_MAXUPVALUES)` locals referenced, or 260.
Also, those from outer scopes not referenced in inner scopes can reuse identifiers.
The net effect of this is that a local variable name allocation method should not allocate more than 260 identifier names for locals.
The current algorithm is a simple first-come first-served scheme:
[loweralpha]
. One local object that use the most tokens is named first.
. Any other non-conflicting locals with respect to the first object are assigned the same name.
. Assigned locals are removed from consideration and the procedure is repeated for objects that have not been assigned new names.
. Steps (a) to (c) repeats until no local objects are left.
In addition, there are a few extra issues to take care of:
[loweralpha, start=5]
. Implicit `self` locals that have been flagged as such are already “assigned to” and so they are left unmodified.
. The name generator skips `self` to avoid conflicts.
This is not optimal but it is unlikely a script will use so many local variables as to reach `self`.
. Keywords are also skipped for the name generator.
. Global name conflict resolution.
For (h), global name conflict resolution is handled just after the new name is generated.
The name can still be used for some locals even if it conflicts with other locals.
To remove conflicts, global variable accesses for the particular identifier name is checked.
Any local variables that are active when a global access is made is marked to be skipped.
The rest of the local objects can then use that name.
The algorithm has additional code for handling locals that use the same name in the same scope.
This extends the basic algorithm that was discussed earlier.
For example:
[source, lua]
----
local foo = 10 -- <1>
...
local foo = 20 -- <2>
...
print(e)
----
Since we are considering name visibility, the first `foo` does not really cease to exist when the second `foo` is declared, because if we were to make that assumption, and the first `foo` is removed before (2), then I should be able to use `e` as the name for the first `foo` and after (2), it should not conflict with variables in the outer scope with the same name.
To illustrate:
[source, lua]
----
local e = 10 -- 'foo' renamed to 'e'
...
local t = 20 -- error if we assumed 'e' removed here
...
print(e)
----
Since `e` is a global in the example, we now have an error as the name as been taken over by a local.
Thus, the first `foo` local must have its active time extend to the end of the current scope.
If there is no conflict between the first and second `foo`, the algorithm may still assign the same names to them.
The current fix to deal with the above chains local objects in order to find the removal position.
It may be possible to handle this in a clean manner LuaSrcDiet handles it as a fix to the basic algorithm.
== Ideas
The following is a list of optimization ideas that do not require heavy-duty source code parsing and comprehension.
=== Lexer-Based Optimization Ideas
* Convert long strings to normal strings, vice versa. +
_A little desperate for a few bytes, can be done, but not real keen on implementing it._
* Special number forms to take advantage of constant number folding. +
_For example, 65536 can be represented using 2^16^, and so on.
An expression must be evaluated in the same way, otherwise this seems unsafe._
* Warn if a number has too many digits. +
_Should we warn or “test and truncate”?
Not really an optimization that will see much use._
* Warn of opportunity for using a `local` to zap a bunch of globals. +
_Current recommendation is to use the HTML plugin to display globals in red.
The developer can then visually analyze the source code and make the appropriate fixes.
I think this is better than having the program guess the intentions of the developer._
* Spaces to tabs in comments, long comments, or long strings. +
_For long strings, need to know users intention.
Would rather not implement._
=== Parser-Based Optimization Ideas
Heavy-duty optimizations will need more data to be generated by the parser.
A full AST may eventually be needed.
The most attractive idea that can be quickly implemented with a significant code size “win” is to reduce the number of `local` keywords.
* Remove unused ``local``s that can be removed in the source. +
_Need to consider unused ``local``s in multiple assignments._
* Simplify declaration of ``local``s that can be merged. +
_From:_
+
[source, lua]
----
-- separate locals
local foo
local bar
-- separate locals with assignments
local foo = 123
local bar = "pqr"
----
+
_To:_
+
[source, lua]
----
-- merged locals
local foo,bar
-- merged locals with assignments
local foo,bar=123,"pqr"
----
* Simplify declarations using `nil`. +
_From:_
[source, lua]
local foo, bar = nil, nil
+
_To:_
[source, lua]
local foo,bar
* Simplify ``return``s using `nil`. +
_How desirable is this? From Lua list discussions, it seems to be potentially unsafe unless all return locations are known and checked._
* Removal of optional semicolons in statements and removal of commas or semicolons in table constructors. +
_Yeah, this might save a few bytes._
* Remove table constructor elements using `nil`. +
_Not sure if this is safe to do._
* Simplify logical or relational operator expressions. +
_This is more suitable for an optimizing compiler project._

View File

@ -0,0 +1,41 @@
-- vim: set ft=lua:
package = 'LuaSrcDiet'
version = '0.3.0-2'
source = { url = 'https://github.com/jirutka/luasrcdiet/archive/v0.3.0/luasrcdiet-0.3.0.tar.gz', md5 = 'c0ff36ef66cd0568c96bc54e9253a8fa' }
description = {
summary = 'Compresses Lua source code by removing unnecessary characters',
detailed = [[
This is revival of LuaSrcDiet originally written by Kein-Hong Man.]],
homepage = 'https://github.com/jirutka/luasrcdiet',
maintainer = 'Jakub Jirutka <jakub@jirutka.cz>',
license = 'MIT',
}
dependencies = {
'lua >= 5.1',
}
build = {
type = 'builtin',
modules = {
['luasrcdiet'] = 'luasrcdiet/init.lua',
['luasrcdiet.equiv'] = 'luasrcdiet/equiv.lua',
['luasrcdiet.fs'] = 'luasrcdiet/fs.lua',
['luasrcdiet.llex'] = 'luasrcdiet/llex.lua',
['luasrcdiet.lparser'] = 'luasrcdiet/lparser.lua',
['luasrcdiet.optlex'] = 'luasrcdiet/optlex.lua',
['luasrcdiet.optparser'] = 'luasrcdiet/optparser.lua',
['luasrcdiet.plugin.example'] = 'luasrcdiet/plugin/example.lua',
['luasrcdiet.plugin.html'] = 'luasrcdiet/plugin/html.lua',
['luasrcdiet.plugin.sloc'] = 'luasrcdiet/plugin/sloc.lua',
['luasrcdiet.utils'] = 'luasrcdiet/utils.lua',
},
install = {
bin = {
luasrcdiet = 'bin/luasrcdiet',
}
}
}

View File

@ -0,0 +1,28 @@
rock_manifest = {
bin = {
luasrcdiet = "6c318685d57f827cf5baf7037a5d6072"
},
doc = {
["features-and-usage.adoc"] = "157587c27a0c340d9d1dd06af9b339b5",
["performance-stats.adoc"] = "cf5f96a86e021a3a584089fafcabd056",
["tech-notes.adoc"] = "075bc34e667a0055e659e656baa2365a"
},
lua = {
luasrcdiet = {
["equiv.lua"] = "967a6b17573d229e326dbb740ad7fe8c",
["fs.lua"] = "53db7dfc50d026b683fad68ed70ead0f",
["init.lua"] = "c6f368e6cf311f3257067fed0fbcd06a",
["llex.lua"] = "ede897af261fc362a82d87fbad91ea2b",
["lparser.lua"] = "c1e1f04d412b79a040fd1c2b74112953",
["optlex.lua"] = "7c986da991a338494c36770b4a30fa9f",
["optparser.lua"] = "b125a271ac1c691dec68b63019b1b5da",
plugin = {
["example.lua"] = "86b5c1e9dc7959db6b221d6d5a0db3d1",
["html.lua"] = "c0d3336a133f0c8663f395ee98d54f6a",
["sloc.lua"] = "fb1a91b18b701ab83f21c87733be470a"
},
["utils.lua"] = "bd6c1e85c6a9bf3383d336a4797fb292"
}
},
["luasrcdiet-0.3.0-2.rockspec"] = "da70047e1b0cbdc1ff08d060327fa110"
}

View File

@ -0,0 +1,270 @@
return [[html {
color: #000;
background: #FFF;
}
body,div,dl,dt,dd,ul,ol,li,h1,h2,h3,h4,h5,h6,pre,code,form,fieldset,legend,input,button,textarea,p,blockquote,th,td {
margin: 0;
padding: 0;
}
table {
border-collapse: collapse;
border-spacing: 0;
}
fieldset,img {
border: 0;
}
address,caption,cite,code,dfn,em,strong,th,var,optgroup {
font-style: inherit;
font-weight: inherit;
}
del,ins {
text-decoration: none;
}
li {
list-style: bullet;
margin-left: 20px;
}
caption,th {
text-align: left;
}
h1,h2,h3,h4,h5,h6 {
font-size: 100%;
font-weight: bold;
}
q:before,q:after {
content: '';
}
abbr,acronym {
border: 0;
font-variant: normal;
}
sup {
vertical-align: baseline;
}
sub {
vertical-align: baseline;
}
legend {
color: #000;
}
input,button,textarea,select,optgroup,option {
font-family: inherit;
font-size: inherit;
font-style: inherit;
font-weight: inherit;
}
input,button,textarea,select {*font-size:100%;
}
/* END RESET */
body {
margin-left: 1em;
margin-right: 1em;
font-family: arial, helvetica, geneva, sans-serif;
background-color: #ffffff; margin: 0px;
}
code, tt { font-family: monospace; }
body, p, td, th { font-size: .95em; line-height: 1.2em;}
p, ul { margin: 10px 0 0 10px;}
strong { font-weight: bold;}
em { font-style: italic;}
h1 {
font-size: 1.5em;
margin: 25px 0 20px 0;
}
h2, h3, h4 { margin: 15px 0 10px 0; }
h2 { font-size: 1.25em; }
h3 { font-size: 1.15em; }
h4 { font-size: 1.06em; }
a:link { font-weight: bold; color: #004080; text-decoration: none; }
a:visited { font-weight: bold; color: #006699; text-decoration: none; }
a:link:hover { text-decoration: underline; }
hr {
color:#cccccc;
background: #00007f;
height: 1px;
}
blockquote { margin-left: 3em; }
ul { list-style-type: disc; }
p.name {
font-family: "Andale Mono", monospace;
padding-top: 1em;
}
p:first-child {
margin-top: 0px;
}
pre.example {
background-color: rgb(245, 245, 245);
border: 1px solid silver;
padding: 10px;
margin: 10px 0 10px 0;
font-family: "Andale Mono", monospace;
font-size: .85em;
}
pre {
background-color: rgb(245, 245, 245);
border: 1px solid silver;
padding: 10px;
margin: 10px 0 10px 0;
font-family: "Andale Mono", monospace;
}
table.index { border: 1px #00007f; }
table.index td { text-align: left; vertical-align: top; }
#container {
margin-left: 1em;
margin-right: 1em;
background-color: #f0f0f0;
}
#product {
text-align: center;
border-bottom: 1px solid #cccccc;
background-color: #ffffff;
}
#product big {
font-size: 2em;
}
#main {
background-color: #f0f0f0;
border-left: 2px solid #cccccc;
}
#navigation {
float: left;
width: 18em;
vertical-align: top;
background-color: #f0f0f0;
overflow: scroll;
position: fixed;
height:100%;
}
#navigation h2 {
background-color:#e7e7e7;
font-size:1.1em;
color:#000000;
text-align: left;
padding:0.2em;
border-top:1px solid #dddddd;
border-bottom:1px solid #dddddd;
}
#navigation ul
{
font-size:1em;
list-style-type: none;
margin: 1px 1px 10px 1px;
}
#navigation li {
text-indent: -1em;
display: block;
margin: 3px 0px 0px 22px;
}
#navigation li li a {
margin: 0px 3px 0px -1em;
}
#content {
margin-left: 18em;
padding: 1em;
border-left: 2px solid #cccccc;
border-right: 2px solid #cccccc;
background-color: #ffffff;
}
#about {
clear: both;
padding: 5px;
border-top: 2px solid #cccccc;
background-color: #ffffff;
}
@media print {
body {
font: 12pt "Times New Roman", "TimeNR", Times, serif;
}
a { font-weight: bold; color: #004080; text-decoration: underline; }
#main {
background-color: #ffffff;
border-left: 0px;
}
#container {
margin-left: 2%;
margin-right: 2%;
background-color: #ffffff;
}
#content {
padding: 1em;
background-color: #ffffff;
}
#navigation {
display: none;
}
pre.example {
font-family: "Andale Mono", monospace;
font-size: 10pt;
page-break-inside: avoid;
}
}
table.module_list {
border-width: 1px;
border-style: solid;
border-color: #cccccc;
border-collapse: collapse;
}
table.module_list td {
border-width: 1px;
padding: 3px;
border-style: solid;
border-color: #cccccc;
}
table.module_list td.name { background-color: #f0f0f0; }
table.module_list td.summary { width: 100%; }
table.function_list {
border-width: 1px;
border-style: solid;
border-color: #cccccc;
border-collapse: collapse;
}
table.function_list td {
border-width: 1px;
padding: 3px;
border-style: solid;
border-color: #cccccc;
}
table.function_list td.name { background-color: #f0f0f0; }
table.function_list td.summary { width: 100%; }
dl.table dt, dl.function dt {border-top: 1px solid #ccc; padding-top: 1em;}
dl.table dd, dl.function dd {padding-bottom: 1em; margin: 10px 0 0 20px;}
dl.table h3, dl.function h3 {font-size: .95em;}
]]

View File

@ -0,0 +1,87 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2012-2014 Sierra Wireless.
-- All rights reserved. This program and the accompanying materials
-- are made available under the terms of the Eclipse Public License v1.0
-- which accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- Contributors:
-- Kevin KIN-FOO <kkinfoo@sierrawireless.com>
-- - initial API and implementation and initial documentation
--------------------------------------------------------------------------------
--
-- Load documentation generator and update its path
--
local templateengine = require 'templateengine'
for name, def in pairs( require 'template.utils' ) do
templateengine.env [ name ] = def
end
-- Load documentation extractor and set handled languages
local lddextractor = require 'lddextractor'
local M = {}
M.defaultsitemainpagename = 'index'
function M.generatedocforfiles(filenames, cssname,noheuristic)
if not filenames then return nil, 'No files provided.' end
--
-- Generate API model elements for all files
--
local generatedfiles = {}
local wrongfiles = {}
for _, filename in pairs( filenames ) do
-- Load file content
local file, error = io.open(filename, 'r')
if not file then return nil, 'Unable to read "'..filename..'"\n'..err end
local code = file:read('*all')
file:close()
-- Get module for current file
local apimodule, err = lddextractor.generateapimodule(filename, code,noheuristic)
-- Handle modules with module name
if apimodule and apimodule.name then
generatedfiles[ apimodule.name ] = apimodule
elseif not apimodule then
-- Track faulty files
table.insert(wrongfiles, 'Unable to extract comments from "'..filename..'".\n'..err)
elseif not apimodule.name then
-- Do not generate documentation for unnamed modules
table.insert(wrongfiles, 'Unable to create documentation for "'..filename..'", no module name provided.')
end
end
--
-- Defining index, which will summarize all modules
--
local index = {
modules = generatedfiles,
name = M.defaultsitemainpagename,
tag='index'
}
generatedfiles[ M.defaultsitemainpagename ] = index
--
-- Define page cursor
--
local page = {
currentmodule = nil,
headers = { [[<link rel="stylesheet" href="]].. cssname ..[[" type="text/css"/>]] },
modules = generatedfiles,
tag = 'page'
}
--
-- Iterate over modules, generating complete doc pages
--
for _, module in pairs( generatedfiles ) do
-- Update current cursor page
page.currentmodule = module
-- Generate page
local content, error = templateengine.applytemplate(page)
if not content then return nil, error end
module.body = content
end
return generatedfiles, wrongfiles
end
return M

View File

@ -0,0 +1,102 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2012-2014 Sierra Wireless.
-- All rights reserved. This program and the accompanying materials
-- are made available under the terms of the Eclipse Public License v1.0
-- which accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- Contributors:
-- Kevin KIN-FOO <kkinfoo@sierrawireless.com>
-- - initial API and implementation and initial documentation
--------------------------------------------------------------------------------
local M = {}
require 'metalua.loader'
local compiler = require 'metalua.compiler'
local mlc = compiler.new()
local Q = require 'metalua.treequery'
-- Enable to retrieve all Javadoc-like comments from C code
function M.c(code)
if not code then return nil, 'No code provided' end
local comments = {}
-- Loop over comments stripping cosmetic '*'
for comment in code:gmatch('%s*/%*%*+(.-)%*+/') do
-- All Lua special comment are prefixed with an '-',
-- so we also comment C comment to make them compliant
table.insert(comments, '-'..comment)
end
return comments
end
-- Enable to retrieve "---" comments from Lua code
function M.lua( code )
if not code then return nil, 'No code provided' end
-- manage shebang
if code then code = code:gsub("^(#.-\n)", function (s) return string.rep(' ',string.len(s)) end) end
-- check for errors
local f, err = loadstring(code,'source_to_check')
if not f then
return nil, 'Syntax error.\n' .. err
end
-- Get ast from file
local status, ast = pcall(mlc.src_to_ast, mlc, code)
--
-- Detect parsing errors
--
if not status then
return nil, 'There might be a syntax error.\n' .. ast
end
--
-- Extract commented nodes from AST
--
-- Function enabling commented node selection
local function acceptcommentednode(node)
return node.lineinfo and ( node.lineinfo.last.comments or node.lineinfo.first.comments )
end
-- Fetch commented node from AST
local commentednodes = Q(ast):filter( acceptcommentednode ):list()
-- Comment cache to avoid selecting same comment twice
local commentcache = {}
-- Will contain selected comments
local comments = {}
-- Loop over commented nodes
for _, node in ipairs( commentednodes ) do
-- A node can is relateds to comment before and after itself,
-- the following gathers them.
local commentlists = {}
if node.lineinfo and node.lineinfo.first.comments then
table.insert(commentlists, node.lineinfo.first.comments)
end
if node.lineinfo and node.lineinfo.last.comments then
table.insert(commentlists, node.lineinfo.last.comments)
end
-- Now that we have comments before and fater the node,
-- collect them in a single table
for _, list in ipairs( commentlists ) do
for _, commenttable in ipairs(list) do
-- Only select special comments
local firstcomment = #commenttable > 0 and #commenttable[1] > 0 and commenttable[1]
if firstcomment:sub(1, 1) == '-' then
for _, comment in ipairs( commenttable ) do
-- Only comments which were not already collected
if not commentcache[comment] then
commentcache[comment] = true
table.insert(comments, comment)
end
end
end
end
end
end
return comments
end
return M

View File

@ -0,0 +1,130 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2012-2014 Sierra Wireless.
-- All rights reserved. This program and the accompanying materials
-- are made available under the terms of the Eclipse Public License v1.0
-- which accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- Contributors:
-- Kevin KIN-FOO <kkinfoo@sierrawireless.com>
-- - initial API and implementation and initial documentation
--------------------------------------------------------------------------------
local lfs = require 'lfs'
local M = {}
local function iswindows()
local p = io.popen("echo %os%")
if not p then
return false
end
local result =p:read("*l")
p:close()
return result == "Windows_NT"
end
M.separator = iswindows() and [[\]] or [[/]]
---
-- Will recursively browse given directories and list files encountered
-- @param tab Table, list where files will be added
-- @param dirorfiles list of path to browse in order to build list.
-- Files from this list will be added to <code>tab</code> list.
-- @return <code>tab</code> list, table containing all files from directories
-- and files contained in <code>dirorfile</code>
local function appendfiles(tab, dirorfile)
-- Nothing to process
if #dirorfile < 1 then return tab end
-- Append all files to list
local dirs = {}
for _, path in ipairs( dirorfile ) do
-- Determine element nature
local elementnature = lfs.attributes (path, "mode")
-- Handle files
if elementnature == 'file' then
table.insert(tab, path)
else if elementnature == 'directory' then
-- Check if folder is accessible
local status, error = pcall(lfs.dir, path)
if not status then return nil, error end
--
-- Handle folders
--
for diskelement in lfs.dir(path) do
-- Format current file name
local currentfilename
if path:sub(#path) == M.separator then
currentfilename = path .. diskelement
else
currentfilename = path .. M.separator .. diskelement
end
-- Handle folder elements
local nature, err = lfs.attributes (currentfilename, "mode")
-- Append file to current list
if nature == 'file' then
table.insert(tab, currentfilename)
elseif nature == 'directory' then
-- Avoid current and parent directory in order to avoid
-- endless recursion
if diskelement ~= '.' and diskelement ~= '..' then
-- Handle subfolders
table.insert(dirs, currentfilename)
end
end
end
end
end
end
-- If we only encountered files, going deeper is useless
if #dirs == 0 then return tab end
-- Append files from encountered directories
return appendfiles(tab, dirs)
end
---
-- Provide a list of files from a directory
-- @param list Table of directories to browse
-- @return table of string, path to files contained in given directories
function M.filelist(list)
if not list then return nil, 'No directory list provided' end
return appendfiles({}, list)
end
function M.checkdirectory( dirlist )
if not dirlist then return false end
local missingdirs = {}
for _, filename in ipairs( dirlist ) do
if not lfs.attributes(filename, 'mode') then
table.insert(missingdirs, filename)
end
end
if #missingdirs > 0 then
return false, missingdirs
end
return true
end
function M.fill(filename, content)
--
-- Ensure parent directory exists
--
local parent = filename:gmatch([[(.*)]] .. M.separator ..[[(.+)]])()
local parentnature = lfs.attributes(parent, 'mode')
-- Create parent directory while absent
if not parentnature then
lfs.mkdir( parent )
elseif parentnature ~= 'directory' then
-- Notify that disk element already exists
return nil, parent..' is a '..parentnature..'.'
end
-- Create actual file
local file, error = io.open(filename, 'w')
if not file then
return nil, error
end
file:write( content )
file:close()
return true
end
return M

View File

@ -0,0 +1,113 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2012-2014 Sierra Wireless.
-- All rights reserved. This program and the accompanying materials
-- are made available under the terms of the Eclipse Public License v1.0
-- which accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- Contributors:
-- Kevin KIN-FOO <kkinfoo@sierrawireless.com>
-- - initial API and implementation and initial documentation
--------------------------------------------------------------------------------
require 'metalua.loader'
local compiler = require 'metalua.compiler'
local mlc = compiler.new()
local M = {}
--
-- Define default supported languages
--
M.supportedlanguages = {}
local extractors = require 'extractors'
-- Support Lua comment extracting
M.supportedlanguages['lua'] = extractors.lua
-- Support C comment extracting
for _,c in ipairs({'c', 'cpp', 'c++'}) do
M.supportedlanguages[c] = extractors.c
end
-- Extract comment from code,
-- type of code is deduced from filename extension
function M.extract(filename, code)
-- Check parameters
if not code then return nil, 'No code provided' end
if type(filename) ~= "string" then
return nil, 'No string for file name provided'
end
-- Extract file extension
local fileextension = filename:gmatch('.*%.(.*)')()
if not fileextension then
return nil, 'File '..filename..' has no extension, could not determine how to extract documentation.'
end
-- Check if it is possible to extract documentation from these files
local extractor = M.supportedlanguages[ fileextension ]
if not extractor then
return nil, 'Unable to extract documentation from '.. fileextension .. ' file.'
end
return extractor( code )
end
-- Generate a file gathering only comments from given code
function M.generatecommentfile(filename, code)
local comments, error = M.extract(filename, code)
if not comments then
return nil, 'Unable to generate comment file.\n'..error
end
local filecontent = {}
for _, comment in ipairs( comments ) do
table.insert(filecontent, "--[[")
table.insert(filecontent, comment)
table.insert(filecontent, "\n]]\n\n")
end
return table.concat(filecontent)..'return nil\n'
end
-- Create API Model module from a 'comment only' lua file
function M.generateapimodule(filename, code,noheuristic)
if not filename then return nil, 'No file name given.' end
if not code then return nil, 'No code provided.' end
if type(filename) ~= "string" then return nil, 'No string for file name provided' end
-- for non lua file get comment file
if filename:gmatch('.*%.(.*)')() ~= 'lua' then
local err
code, err = M.generatecommentfile(filename, code)
if not code then
return nil, 'Unable to create api module for "'..filename..'".\n'..err
end
else
-- manage shebang
if code then code = code:gsub("^(#.-\n)", function (s) return string.rep(' ',string.len(s)) end) end
-- check for errors
local f, err = loadstring(code,'source_to_check')
if not f then
return nil, 'File'..filename..'contains syntax error.\n' .. err
end
end
local status, ast = pcall(mlc.src_to_ast, mlc, code)
if not status then
return nil, 'Unable to compute ast for "'..filename..'".\n'..ast
end
-- Extract module name as the filename without extension
local modulename
local matcher = string.gmatch(filename,'.*/(.*)%..*$')
if matcher then modulename = matcher() end
-- Create api model
local apimodelbuilder = require 'models.apimodelbuilder'
local _file, comment2apiobj = apimodelbuilder.createmoduleapi(ast, modulename)
-- Create internal model
if not noheuristic then
local internalmodelbuilder = require "models.internalmodelbuilder"
local _internalcontent = internalmodelbuilder.createinternalcontent(ast,_file,comment2apiobj, modulename)
end
return _file
end
return M

View File

@ -0,0 +1,465 @@
---------
-- Source and binary equivalency comparisons
--
-- **Notes:**
--
-- * Intended as an extra safety check for mission-critical code,
-- should give affirmative results if everything works.
-- * Heavy on load() and string.dump(), which may be slowish,
-- and may cause problems for cross-compiled applications.
-- * Optional detailed information dump is mainly for debugging,
-- reason being, if the two are not equivalent when they should be,
-- then some form of optimization has failed.
-- * source: IMPORTANT: TK_NAME not compared if opt-locals enabled.
-- * binary: IMPORTANT: Some shortcuts are taken with int and size_t
-- value reading -- if the functions break, then the binary chunk
-- is very large indeed.
-- * binary: There is a lack of diagnostic information when a compare
-- fails; you can use ChunkSpy and compare using visual diff.
----
local byte = string.byte
local dump = string.dump
local load = loadstring or load --luacheck: ignore 113
local sub = string.sub
local M = {}
local is_realtoken = { -- significant (grammar) tokens
TK_KEYWORD = true,
TK_NAME = true,
TK_NUMBER = true,
TK_STRING = true,
TK_LSTRING = true,
TK_OP = true,
TK_EOS = true,
}
local option, llex, warn
--- The initialization function.
--
-- @tparam {[string]=bool,...} _option
-- @tparam luasrcdiet.llex _llex
-- @tparam table _warn
function M.init(_option, _llex, _warn)
option = _option
llex = _llex
warn = _warn
end
--- Builds lists containing a 'normal' lexer stream.
--
-- @tparam string s The source code.
-- @treturn table
-- @treturn table
local function build_stream(s)
local stok, sseminfo = llex.lex(s) -- source list (with whitespace elements)
local tok, seminfo -- processed list (real elements only)
= {}, {}
for i = 1, #stok do
local t = stok[i]
if is_realtoken[t] then
tok[#tok + 1] = t
seminfo[#seminfo + 1] = sseminfo[i]
end
end--for
return tok, seminfo
end
-- Tests source (lexer stream) equivalence.
--
-- @tparam string z
-- @tparam string dat
function M.source(z, dat)
-- Returns a dumped string for seminfo compares.
local function dumpsem(s)
local sf = load("return "..s, "z")
if sf then
return dump(sf)
end
end
-- Marks and optionally reports non-equivalence.
local function bork(msg)
if option.DETAILS then print("SRCEQUIV: "..msg) end
warn.SRC_EQUIV = true
end
-- Get lexer streams for both source strings, compare.
local tok1, seminfo1 = build_stream(z) -- original
local tok2, seminfo2 = build_stream(dat) -- compressed
-- Compare shbang lines ignoring EOL.
local sh1 = z:match("^(#[^\r\n]*)")
local sh2 = dat:match("^(#[^\r\n]*)")
if sh1 or sh2 then
if not sh1 or not sh2 or sh1 ~= sh2 then
bork("shbang lines different")
end
end
-- Compare by simple count.
if #tok1 ~= #tok2 then
bork("count "..#tok1.." "..#tok2)
return
end
-- Compare each element the best we can.
for i = 1, #tok1 do
local t1, t2 = tok1[i], tok2[i]
local s1, s2 = seminfo1[i], seminfo2[i]
if t1 ~= t2 then -- by type
bork("type ["..i.."] "..t1.." "..t2)
break
end
if t1 == "TK_KEYWORD" or t1 == "TK_NAME" or t1 == "TK_OP" then
if t1 == "TK_NAME" and option["opt-locals"] then
-- can't compare identifiers of locals that are optimized
elseif s1 ~= s2 then -- by semantic info (simple)
bork("seminfo ["..i.."] "..t1.." "..s1.." "..s2)
break
end
elseif t1 == "TK_EOS" then
-- no seminfo to compare
else-- "TK_NUMBER" or "TK_STRING" or "TK_LSTRING"
-- compare 'binary' form, so dump a function
local s1b,s2b = dumpsem(s1), dumpsem(s2)
if not s1b or not s2b or s1b ~= s2b then
bork("seminfo ["..i.."] "..t1.." "..s1.." "..s2)
break
end
end
end--for
-- Successful comparison if end is reached with no borks.
end
--- Tests binary chunk equivalence (only for PUC Lua 5.1).
--
-- @tparam string z
-- @tparam string dat
function M.binary(z, dat)
local TNIL = 0 --luacheck: ignore 211
local TBOOLEAN = 1
local TNUMBER = 3
local TSTRING = 4
-- sizes of data types
local endian
local sz_int
local sz_sizet
local sz_inst
local sz_number
local getint
local getsizet
-- Marks and optionally reports non-equivalence.
local function bork(msg)
if option.DETAILS then print("BINEQUIV: "..msg) end
warn.BIN_EQUIV = true
end
-- Checks if bytes exist.
local function ensure(c, sz)
if c.i + sz - 1 > c.len then return end
return true
end
-- Skips some bytes.
local function skip(c, sz)
if not sz then sz = 1 end
c.i = c.i + sz
end
-- Returns a byte value.
local function getbyte(c)
local i = c.i
if i > c.len then return end
local d = sub(c.dat, i, i)
c.i = i + 1
return byte(d)
end
-- Return an int value (little-endian).
local function getint_l(c)
local n, scale = 0, 1
if not ensure(c, sz_int) then return end
for _ = 1, sz_int do
n = n + scale * getbyte(c)
scale = scale * 256
end
return n
end
-- Returns an int value (big-endian).
local function getint_b(c)
local n = 0
if not ensure(c, sz_int) then return end
for _ = 1, sz_int do
n = n * 256 + getbyte(c)
end
return n
end
-- Returns a size_t value (little-endian).
local function getsizet_l(c)
local n, scale = 0, 1
if not ensure(c, sz_sizet) then return end
for _ = 1, sz_sizet do
n = n + scale * getbyte(c)
scale = scale * 256
end
return n
end
-- Returns a size_t value (big-endian).
local function getsizet_b(c)
local n = 0
if not ensure(c, sz_sizet) then return end
for _ = 1, sz_sizet do
n = n * 256 + getbyte(c)
end
return n
end
-- Returns a block (as a string).
local function getblock(c, sz)
local i = c.i
local j = i + sz - 1
if j > c.len then return end
local d = sub(c.dat, i, j)
c.i = i + sz
return d
end
-- Returns a string.
local function getstring(c)
local n = getsizet(c)
if not n then return end
if n == 0 then return "" end
return getblock(c, n)
end
-- Compares byte value.
local function goodbyte(c1, c2)
local b1, b2 = getbyte(c1), getbyte(c2)
if not b1 or not b2 or b1 ~= b2 then
return
end
return b1
end
-- Compares byte value.
local function badbyte(c1, c2)
local b = goodbyte(c1, c2)
if not b then return true end
end
-- Compares int value.
local function goodint(c1, c2)
local i1, i2 = getint(c1), getint(c2)
if not i1 or not i2 or i1 ~= i2 then
return
end
return i1
end
-- Recursively-called function to compare function prototypes.
local function getfunc(c1, c2)
-- source name (ignored)
if not getstring(c1) or not getstring(c2) then
bork("bad source name"); return
end
-- linedefined (ignored)
if not getint(c1) or not getint(c2) then
bork("bad linedefined"); return
end
-- lastlinedefined (ignored)
if not getint(c1) or not getint(c2) then
bork("bad lastlinedefined"); return
end
if not (ensure(c1, 4) and ensure(c2, 4)) then
bork("prototype header broken")
end
-- nups (compared)
if badbyte(c1, c2) then
bork("bad nups"); return
end
-- numparams (compared)
if badbyte(c1, c2) then
bork("bad numparams"); return
end
-- is_vararg (compared)
if badbyte(c1, c2) then
bork("bad is_vararg"); return
end
-- maxstacksize (compared)
if badbyte(c1, c2) then
bork("bad maxstacksize"); return
end
-- code (compared)
local ncode = goodint(c1, c2)
if not ncode then
bork("bad ncode"); return
end
local code1 = getblock(c1, ncode * sz_inst)
local code2 = getblock(c2, ncode * sz_inst)
if not code1 or not code2 or code1 ~= code2 then
bork("bad code block"); return
end
-- constants (compared)
local nconst = goodint(c1, c2)
if not nconst then
bork("bad nconst"); return
end
for _ = 1, nconst do
local ctype = goodbyte(c1, c2)
if not ctype then
bork("bad const type"); return
end
if ctype == TBOOLEAN then
if badbyte(c1, c2) then
bork("bad boolean value"); return
end
elseif ctype == TNUMBER then
local num1 = getblock(c1, sz_number)
local num2 = getblock(c2, sz_number)
if not num1 or not num2 or num1 ~= num2 then
bork("bad number value"); return
end
elseif ctype == TSTRING then
local str1 = getstring(c1)
local str2 = getstring(c2)
if not str1 or not str2 or str1 ~= str2 then
bork("bad string value"); return
end
end
end
-- prototypes (compared recursively)
local nproto = goodint(c1, c2)
if not nproto then
bork("bad nproto"); return
end
for _ = 1, nproto do
if not getfunc(c1, c2) then
bork("bad function prototype"); return
end
end
-- debug information (ignored)
-- lineinfo (ignored)
local sizelineinfo1 = getint(c1)
if not sizelineinfo1 then
bork("bad sizelineinfo1"); return
end
local sizelineinfo2 = getint(c2)
if not sizelineinfo2 then
bork("bad sizelineinfo2"); return
end
if not getblock(c1, sizelineinfo1 * sz_int) then
bork("bad lineinfo1"); return
end
if not getblock(c2, sizelineinfo2 * sz_int) then
bork("bad lineinfo2"); return
end
-- locvars (ignored)
local sizelocvars1 = getint(c1)
if not sizelocvars1 then
bork("bad sizelocvars1"); return
end
local sizelocvars2 = getint(c2)
if not sizelocvars2 then
bork("bad sizelocvars2"); return
end
for _ = 1, sizelocvars1 do
if not getstring(c1) or not getint(c1) or not getint(c1) then
bork("bad locvars1"); return
end
end
for _ = 1, sizelocvars2 do
if not getstring(c2) or not getint(c2) or not getint(c2) then
bork("bad locvars2"); return
end
end
-- upvalues (ignored)
local sizeupvalues1 = getint(c1)
if not sizeupvalues1 then
bork("bad sizeupvalues1"); return
end
local sizeupvalues2 = getint(c2)
if not sizeupvalues2 then
bork("bad sizeupvalues2"); return
end
for _ = 1, sizeupvalues1 do
if not getstring(c1) then bork("bad upvalues1"); return end
end
for _ = 1, sizeupvalues2 do
if not getstring(c2) then bork("bad upvalues2"); return end
end
return true
end
-- Removes shbang line so that load runs.
local function zap_shbang(s)
local shbang = s:match("^(#[^\r\n]*\r?\n?)")
if shbang then -- cut out shbang
s = sub(s, #shbang + 1)
end
return s
end
-- Attempt to compile, then dump to get binary chunk string.
local cz = load(zap_shbang(z), "z")
if not cz then
bork("failed to compile original sources for binary chunk comparison")
return
end
local cdat = load(zap_shbang(dat), "z")
if not cdat then
bork("failed to compile compressed result for binary chunk comparison")
end
-- if load() works, dump assuming string.dump() is error-free
local c1 = { i = 1, dat = dump(cz) }
c1.len = #c1.dat
local c2 = { i = 1, dat = dump(cdat) }
c2.len = #c2.dat
-- Parse binary chunks to verify equivalence.
-- * For headers, handle sizes to allow a degree of flexibility.
-- * Assume a valid binary chunk is generated, since it was not
-- generated via external means.
if not (ensure(c1, 12) and ensure(c2, 12)) then
bork("header broken")
end
skip(c1, 6) -- skip signature(4), version, format
endian = getbyte(c1) -- 1 = little endian
sz_int = getbyte(c1) -- get data type sizes
sz_sizet = getbyte(c1)
sz_inst = getbyte(c1)
sz_number = getbyte(c1)
skip(c1) -- skip integral flag
skip(c2, 12) -- skip other header (assume similar)
if endian == 1 then -- set for endian sensitive data we need
getint = getint_l
getsizet = getsizet_l
else
getint = getint_b
getsizet = getsizet_b
end
getfunc(c1, c2) -- get prototype at root
if c1.i ~= c1.len + 1 then
bork("inconsistent binary chunk1"); return
elseif c2.i ~= c2.len + 1 then
bork("inconsistent binary chunk2"); return
end
-- Successful comparison if end is reached with no borks.
end
return M

View File

@ -0,0 +1,74 @@
---------
-- Utility functions for operations on a file system.
--
-- **Note: This module is not part of public API!**
----
local fmt = string.format
local open = io.open
local UTF8_BOM = '\239\187\191'
local function normalize_io_error (name, err)
if err:sub(1, #name + 2) == name..': ' then
err = err:sub(#name + 3)
end
return err
end
local M = {}
--- Reads the specified file and returns its content as string.
--
-- @tparam string filename Path of the file to read.
-- @tparam string mode The mode in which to open the file, see @{io.open} (default: "r").
-- @treturn[1] string A content of the file.
-- @treturn[2] nil
-- @treturn[2] string An error message.
function M.read_file (filename, mode)
local handler, err = open(filename, mode or 'r')
if not handler then
return nil, fmt('Could not open %s for reading: %s',
filename, normalize_io_error(filename, err))
end
local content, err = handler:read('*a') --luacheck: ignore 411
if not content then
return nil, fmt('Could not read %s: %s', filename, normalize_io_error(filename, err))
end
handler:close()
if content:sub(1, #UTF8_BOM) == UTF8_BOM then
content = content:sub(#UTF8_BOM + 1)
end
return content
end
--- Writes the given data to the specified file.
--
-- @tparam string filename Path of the file to write.
-- @tparam string data The data to write.
-- @tparam ?string mode The mode in which to open the file, see @{io.open} (default: "w").
-- @treturn[1] true
-- @treturn[2] nil
-- @treturn[2] string An error message.
function M.write_file (filename, data, mode)
local handler, err = open(filename, mode or 'w')
if not handler then
return nil, fmt('Could not open %s for writing: %s',
filename, normalize_io_error(filename, err))
end
local _, err = handler:write(data) --luacheck: ignore 411
if err then
return nil, fmt('Could not write %s: %s', filename, normalize_io_error(filename, err))
end
handler:flush()
handler:close()
return true
end
return M

View File

@ -0,0 +1,117 @@
---------
-- LuaSrcDiet API
----
local equiv = require 'luasrcdiet.equiv'
local llex = require 'luasrcdiet.llex'
local lparser = require 'luasrcdiet.lparser'
local optlex = require 'luasrcdiet.optlex'
local optparser = require 'luasrcdiet.optparser'
local utils = require 'luasrcdiet.utils'
local concat = table.concat
local merge = utils.merge
local _ -- placeholder
local function noop ()
return
end
local function opts_to_legacy (opts)
local res = {}
for key, val in pairs(opts) do
res['opt-'..key] = val
end
return res
end
local M = {}
--- The module's name.
M._NAME = 'luasrcdiet'
--- The module's version number.
M._VERSION = '0.3.0'
--- The module's homepage.
M._HOMEPAGE = 'https://github.com/jirutka/luasrcdiet'
--- All optimizations disabled.
M.NONE_OPTS = {
binequiv = false,
comments = false,
emptylines = false,
entropy = false,
eols = false,
experimental = false,
locals = false,
numbers = false,
srcequiv = false,
strings = false,
whitespace = false,
}
--- Basic optimizations enabled.
-- @table BASIC_OPTS
M.BASIC_OPTS = merge(M.NONE_OPTS, {
comments = true,
emptylines = true,
srcequiv = true,
whitespace = true,
})
--- Defaults.
-- @table DEFAULT_OPTS
M.DEFAULT_OPTS = merge(M.BASIC_OPTS, {
locals = true,
numbers = true,
})
--- Maximum optimizations enabled (all except experimental).
-- @table MAXIMUM_OPTS
M.MAXIMUM_OPTS = merge(M.DEFAULT_OPTS, {
entropy = true,
eols = true,
strings = true,
})
--- Optimizes the given Lua source code.
--
-- @tparam ?{[string]=bool,...} opts Optimizations to do (default is @{DEFAULT_OPTS}).
-- @tparam string source The Lua source code to optimize.
-- @treturn string Optimized source.
-- @raise if the source is malformed, source equivalence test failed, or some
-- other error occured.
function M.optimize (opts, source)
assert(source and type(source) == 'string',
'bad argument #2: expected string, got a '..type(source))
opts = opts and merge(M.NONE_OPTS, opts) or M.DEFAULT_OPTS
local legacy_opts = opts_to_legacy(opts)
local toklist, seminfolist, toklnlist = llex.lex(source)
local xinfo = lparser.parse(toklist, seminfolist, toklnlist)
optparser.print = noop
optparser.optimize(legacy_opts, toklist, seminfolist, xinfo)
local warn = optlex.warn -- use this as a general warning lookup
optlex.print = noop
_, seminfolist = optlex.optimize(legacy_opts, toklist, seminfolist, toklnlist)
local optim_source = concat(seminfolist)
if opts.srcequiv and not opts.experimental then
equiv.init(legacy_opts, llex, warn)
equiv.source(source, optim_source)
if warn.SRC_EQUIV then
error('Source equivalence test failed!')
end
end
return optim_source
end
return M

View File

@ -0,0 +1,350 @@
---------
-- Lua 5.1+ lexical analyzer written in Lua.
--
-- This file is part of LuaSrcDiet, based on Yueliang material.
--
-- **Notes:**
--
-- * This is a version of the native 5.1.x lexer from Yueliang 0.4.0,
-- with significant modifications to handle LuaSrcDiet's needs:
-- (1) llex.error is an optional error function handler,
-- (2) seminfo for strings include their delimiters and no
-- translation operations are performed on them.
-- * ADDED shbang handling has been added to support executable scripts.
-- * NO localized decimal point replacement magic.
-- * NO limit to number of lines.
-- * NO support for compatible long strings (LUA\_COMPAT_LSTR).
-- * Added goto keyword and double-colon operator (Lua 5.2+).
----
local find = string.find
local fmt = string.format
local match = string.match
local sub = string.sub
local tonumber = tonumber
local M = {}
local kw = {}
for v in ([[
and break do else elseif end false for function goto if in
local nil not or repeat return then true until while]]):gmatch("%S+") do
kw[v] = true
end
local z, -- source stream
sourceid, -- name of source
I, -- position of lexer
buff, -- buffer for strings
ln, -- line number
tok, -- lexed token list
seminfo, -- lexed semantic information list
tokln -- line numbers for messages
--- Adds information to token listing.
--
-- @tparam string token
-- @tparam string info
local function addtoken(token, info)
local i = #tok + 1
tok[i] = token
seminfo[i] = info
tokln[i] = ln
end
--- Handles line number incrementation and end-of-line characters.
--
-- @tparam int i Position of lexer in the source stream.
-- @tparam bool is_tok
-- @treturn int
local function inclinenumber(i, is_tok)
local old = sub(z, i, i)
i = i + 1 -- skip '\n' or '\r'
local c = sub(z, i, i)
if (c == "\n" or c == "\r") and (c ~= old) then
i = i + 1 -- skip '\n\r' or '\r\n'
old = old..c
end
if is_tok then addtoken("TK_EOL", old) end
ln = ln + 1
I = i
return i
end
--- Returns a chunk name or id, no truncation for long names.
--
-- @treturn string
local function chunkid()
if sourceid and match(sourceid, "^[=@]") then
return sub(sourceid, 2) -- remove first char
end
return "[string]"
end
--- Formats error message and throws error.
--
-- A simplified version, does not report what token was responsible.
--
-- @tparam string s
-- @tparam int line The line number.
-- @raise
local function errorline(s, line)
local e = M.error or error
e(fmt("%s:%d: %s", chunkid(), line or ln, s))
end
--- Counts separators (`="` in a long string delimiter.
--
-- @tparam int i Position of lexer in the source stream.
-- @treturn int
local function skip_sep(i)
local s = sub(z, i, i)
i = i + 1
local count = #match(z, "=*", i)
i = i + count
I = i
return (sub(z, i, i) == s) and count or (-count) - 1
end
--- Reads a long string or long comment.
--
-- @tparam bool is_str
-- @tparam string sep
-- @treturn string
-- @raise if unfinished long string or comment.
local function read_long_string(is_str, sep)
local i = I + 1 -- skip 2nd '['
local c = sub(z, i, i)
if c == "\r" or c == "\n" then -- string starts with a newline?
i = inclinenumber(i) -- skip it
end
while true do
local p, _, r = find(z, "([\r\n%]])", i) -- (long range match)
if not p then
errorline(is_str and "unfinished long string" or
"unfinished long comment")
end
i = p
if r == "]" then -- delimiter test
if skip_sep(i) == sep then
buff = sub(z, buff, I)
I = I + 1 -- skip 2nd ']'
return buff
end
i = I
else -- newline
buff = buff.."\n"
i = inclinenumber(i)
end
end--while
end
--- Reads a string.
--
-- @tparam string del The delimiter.
-- @treturn string
-- @raise if unfinished string or too large escape sequence.
local function read_string(del)
local i = I
while true do
local p, _, r = find(z, "([\n\r\\\"\'])", i) -- (long range match)
if p then
if r == "\n" or r == "\r" then
errorline("unfinished string")
end
i = p
if r == "\\" then -- handle escapes
i = i + 1
r = sub(z, i, i)
if r == "" then break end -- (EOZ error)
p = find("abfnrtv\n\r", r, 1, true)
if p then -- special escapes
if p > 7 then
i = inclinenumber(i)
else
i = i + 1
end
elseif find(r, "%D") then -- other non-digits
i = i + 1
else -- \xxx sequence
local _, q, s = find(z, "^(%d%d?%d?)", i)
i = q + 1
if s + 1 > 256 then -- UCHAR_MAX
errorline("escape sequence too large")
end
end--if p
else
i = i + 1
if r == del then -- ending delimiter
I = i
return sub(z, buff, i - 1) -- return string
end
end--if r
else
break -- (error)
end--if p
end--while
errorline("unfinished string")
end
--- Initializes lexer for given source _z and source name _sourceid.
--
-- @tparam string _z The source code.
-- @tparam string _sourceid Name of the source.
local function init(_z, _sourceid)
z = _z -- source
sourceid = _sourceid -- name of source
I = 1 -- lexer's position in source
ln = 1 -- line number
tok = {} -- lexed token list*
seminfo = {} -- lexed semantic information list*
tokln = {} -- line numbers for messages*
-- Initial processing (shbang handling).
local p, _, q, r = find(z, "^(#[^\r\n]*)(\r?\n?)")
if p then -- skip first line
I = I + #q
addtoken("TK_COMMENT", q)
if #r > 0 then inclinenumber(I, true) end
end
end
--- Runs lexer on the given source code.
--
-- @tparam string source The Lua source to scan.
-- @tparam ?string source_name Name of the source (optional).
-- @treturn {string,...} A list of lexed tokens.
-- @treturn {string,...} A list of semantic information (lexed strings).
-- @treturn {int,...} A list of line numbers.
function M.lex(source, source_name)
init(source, source_name)
while true do--outer
local i = I
-- inner loop allows break to be used to nicely section tests
while true do --luacheck: ignore 512
local p, _, r = find(z, "^([_%a][_%w]*)", i)
if p then
I = i + #r
if kw[r] then
addtoken("TK_KEYWORD", r) -- reserved word (keyword)
else
addtoken("TK_NAME", r) -- identifier
end
break -- (continue)
end
local p, _, r = find(z, "^(%.?)%d", i)
if p then -- numeral
if r == "." then i = i + 1 end
local _, q, r = find(z, "^%d*[%.%d]*([eE]?)", i) --luacheck: ignore 421
i = q + 1
if #r == 1 then -- optional exponent
if match(z, "^[%+%-]", i) then -- optional sign
i = i + 1
end
end
local _, q = find(z, "^[_%w]*", i)
I = q + 1
local v = sub(z, p, q) -- string equivalent
if not tonumber(v) then -- handles hex test also
errorline("malformed number")
end
addtoken("TK_NUMBER", v)
break -- (continue)
end
local p, q, r, t = find(z, "^((%s)[ \t\v\f]*)", i)
if p then
if t == "\n" or t == "\r" then -- newline
inclinenumber(i, true)
else
I = q + 1 -- whitespace
addtoken("TK_SPACE", r)
end
break -- (continue)
end
local _, q = find(z, "^::", i)
if q then
I = q + 1
addtoken("TK_OP", "::")
break -- (continue)
end
local r = match(z, "^%p", i)
if r then
buff = i
local p = find("-[\"\'.=<>~", r, 1, true) --luacheck: ignore 421
if p then
-- two-level if block for punctuation/symbols
if p <= 2 then
if p == 1 then -- minus
local c = match(z, "^%-%-(%[?)", i)
if c then
i = i + 2
local sep = -1
if c == "[" then
sep = skip_sep(i)
end
if sep >= 0 then -- long comment
addtoken("TK_LCOMMENT", read_long_string(false, sep))
else -- short comment
I = find(z, "[\n\r]", i) or (#z + 1)
addtoken("TK_COMMENT", sub(z, buff, I - 1))
end
break -- (continue)
end
-- (fall through for "-")
else -- [ or long string
local sep = skip_sep(i)
if sep >= 0 then
addtoken("TK_LSTRING", read_long_string(true, sep))
elseif sep == -1 then
addtoken("TK_OP", "[")
else
errorline("invalid long string delimiter")
end
break -- (continue)
end
elseif p <= 5 then
if p < 5 then -- strings
I = i + 1
addtoken("TK_STRING", read_string(r))
break -- (continue)
end
r = match(z, "^%.%.?%.?", i) -- .|..|... dots
-- (fall through)
else -- relational
r = match(z, "^%p=?", i)
-- (fall through)
end
end
I = i + #r
addtoken("TK_OP", r) -- for other symbols, fall through
break -- (continue)
end
local r = sub(z, i, i)
if r ~= "" then
I = i + 1
addtoken("TK_OP", r) -- other single-char tokens
break
end
addtoken("TK_EOS", "") -- end of stream,
return tok, seminfo, tokln -- exit here
end--while inner
end--while outer
end
return M

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,852 @@
---------
-- This module does lexer-based optimizations.
--
-- **Notes:**
--
-- * TODO: General string delimiter conversion optimizer.
-- * TODO: (numbers) warn if overly significant digit.
----
local char = string.char
local find = string.find
local match = string.match
local rep = string.rep
local sub = string.sub
local tonumber = tonumber
local tostring = tostring
local print -- set in optimize()
local M = {}
-- error function, can override by setting own function into module
M.error = error
M.warn = {} -- table for warning flags
local stoks, sinfos, stoklns -- source lists
local is_realtoken = { -- significant (grammar) tokens
TK_KEYWORD = true,
TK_NAME = true,
TK_NUMBER = true,
TK_STRING = true,
TK_LSTRING = true,
TK_OP = true,
TK_EOS = true,
}
local is_faketoken = { -- whitespace (non-grammar) tokens
TK_COMMENT = true,
TK_LCOMMENT = true,
TK_EOL = true,
TK_SPACE = true,
}
local opt_details -- for extra information
--- Returns true if current token is at the start of a line.
--
-- It skips over deleted tokens via recursion.
--
-- @tparam int i
-- @treturn bool
local function atlinestart(i)
local tok = stoks[i - 1]
if i <= 1 or tok == "TK_EOL" then
return true
elseif tok == "" then
return atlinestart(i - 1)
end
return false
end
--- Returns true if current token is at the end of a line.
--
-- It skips over deleted tokens via recursion.
--
-- @tparam int i
-- @treturn bool
local function atlineend(i)
local tok = stoks[i + 1]
if i >= #stoks or tok == "TK_EOL" or tok == "TK_EOS" then
return true
elseif tok == "" then
return atlineend(i + 1)
end
return false
end
--- Counts comment EOLs inside a long comment.
--
-- In order to keep line numbering, EOLs need to be reinserted.
--
-- @tparam string lcomment
-- @treturn int
local function commenteols(lcomment)
local sep = #match(lcomment, "^%-%-%[=*%[")
local z = sub(lcomment, sep + 1, -(sep - 1)) -- remove delims
local i, c = 1, 0
while true do
local p, _, r, s = find(z, "([\r\n])([\r\n]?)", i)
if not p then break end -- if no matches, done
i = p + 1
c = c + 1
if #s > 0 and r ~= s then -- skip CRLF or LFCR
i = i + 1
end
end
return c
end
--- Compares two tokens (i, j) and returns the whitespace required.
--
-- See documentation for a reference table of interactions.
--
-- Only two grammar/real tokens are being considered:
--
-- * if `""`, no separation is needed,
-- * if `" "`, then at least one whitespace (or EOL) is required.
--
-- Note: This doesn't work at the start or the end or for EOS!
--
-- @tparam int i
-- @tparam int j
-- @treturn string
local function checkpair(i, j)
local t1, t2 = stoks[i], stoks[j]
if t1 == "TK_STRING" or t1 == "TK_LSTRING" or
t2 == "TK_STRING" or t2 == "TK_LSTRING" then
return ""
elseif t1 == "TK_OP" or t2 == "TK_OP" then
if (t1 == "TK_OP" and (t2 == "TK_KEYWORD" or t2 == "TK_NAME")) or
(t2 == "TK_OP" and (t1 == "TK_KEYWORD" or t1 == "TK_NAME")) then
return ""
end
if t1 == "TK_OP" and t2 == "TK_OP" then
-- for TK_OP/TK_OP pairs, see notes in technotes.txt
local op, op2 = sinfos[i], sinfos[j]
if (match(op, "^%.%.?$") and match(op2, "^%.")) or
(match(op, "^[~=<>]$") and op2 == "=") or
(op == "[" and (op2 == "[" or op2 == "=")) then
return " "
end
return ""
end
-- "TK_OP" + "TK_NUMBER" case
local op = sinfos[i]
if t2 == "TK_OP" then op = sinfos[j] end
if match(op, "^%.%.?%.?$") then
return " "
end
return ""
else-- "TK_KEYWORD" | "TK_NAME" | "TK_NUMBER" then
return " "
end
end
--- Repack tokens, removing deletions caused by optimization process.
local function repack_tokens()
local dtoks, dinfos, dtoklns = {}, {}, {}
local j = 1
for i = 1, #stoks do
local tok = stoks[i]
if tok ~= "" then
dtoks[j], dinfos[j], dtoklns[j] = tok, sinfos[i], stoklns[i]
j = j + 1
end
end
stoks, sinfos, stoklns = dtoks, dinfos, dtoklns
end
--- Does number optimization.
--
-- Optimization using string formatting functions is one way of doing this,
-- but here, we consider all cases and handle them separately (possibly an
-- idiotic approach...).
--
-- Scientific notation being generated is not in canonical form, this may or
-- may not be a bad thing.
--
-- Note: Intermediate portions need to fit into a normal number range.
--
-- Optimizations can be divided based on number patterns:
--
-- * hexadecimal:
-- (1) no need to remove leading zeros, just skip to (2)
-- (2) convert to integer if size equal or smaller
-- * change if equal size -> lose the 'x' to reduce entropy
-- (3) number is then processed as an integer
-- (4) note: does not make 0[xX] consistent
-- * integer:
-- (1) reduce useless fractional part, if present, e.g. 123.000 -> 123.
-- (2) remove leading zeros, e.g. 000123
-- * float:
-- (1) split into digits dot digits
-- (2) if no integer portion, take as zero (can omit later)
-- (3) handle degenerate .000 case, after which the fractional part
-- must be non-zero (if zero, it's matched as float .0)
-- (4) remove trailing zeros for fractional portion
-- (5) p.q where p > 0 and q > 0 cannot be shortened any more
-- (6) otherwise p == 0 and the form is .q, e.g. .000123
-- (7) if scientific shorter, convert, e.g. .000123 -> 123e-6
-- * scientific:
-- (1) split into (digits dot digits) [eE] ([+-] digits)
-- (2) if significand is zero, just use .0
-- (3) remove leading zeros for significand
-- (4) shift out trailing zeros for significand
-- (5) examine exponent and determine which format is best:
-- number with fraction, or scientific
--
-- Note: Number with fraction and scientific number is never converted
-- to integer, because Lua 5.3 distinguishes between integers and floats.
--
--
-- @tparam int i
local function do_number(i)
local before = sinfos[i] -- 'before'
local z = before -- working representation
local y -- 'after', if better
--------------------------------------------------------------------
if match(z, "^0[xX]") then -- hexadecimal number
local v = tostring(tonumber(z))
if #v <= #z then
z = v -- change to integer, AND continue
else
return -- no change; stick to hex
end
end
if match(z, "^%d+$") then -- integer
if tonumber(z) > 0 then
y = match(z, "^0*([1-9]%d*)$") -- remove leading zeros
else
y = "0" -- basic zero
end
elseif not match(z, "[eE]") then -- float
local p, q = match(z, "^(%d*)%.(%d*)$") -- split
if p == "" then p = 0 end -- int part zero
if q == "" then q = "0" end -- fraction part zero
if tonumber(q) == 0 and p == 0 then
y = ".0" -- degenerate .000 to .0
else
-- now, q > 0 holds and p is a number
local zeros_cnt = #match(q, "0*$") -- remove trailing zeros
if zeros_cnt > 0 then
q = sub(q, 1, #q - zeros_cnt)
end
-- if p > 0, nothing else we can do to simplify p.q case
if tonumber(p) > 0 then
y = p.."."..q
else
y = "."..q -- tentative, e.g. .000123
local v = #match(q, "^0*") -- # leading spaces
local w = #q - v -- # significant digits
local nv = tostring(#q)
-- e.g. compare 123e-6 versus .000123
if w + 2 + #nv < 1 + #q then
y = sub(q, -w).."e-"..nv
end
end
end
else -- scientific number
local sig, ex = match(z, "^([^eE]+)[eE]([%+%-]?%d+)$")
ex = tonumber(ex)
-- if got ".", shift out fractional portion of significand
local p, q = match(sig, "^(%d*)%.(%d*)$")
if p then
ex = ex - #q
sig = p..q
end
if tonumber(sig) == 0 then
y = ".0" -- basic float zero
else
local v = #match(sig, "^0*") -- remove leading zeros
sig = sub(sig, v + 1)
v = #match(sig, "0*$") -- shift out trailing zeros
if v > 0 then
sig = sub(sig, 1, #sig - v)
ex = ex + v
end
-- examine exponent and determine which format is best
local nex = tostring(ex)
if ex >= 0 and (ex <= 1 + #nex) then -- a float
y = sig..rep("0", ex).."."
elseif ex < 0 and (ex >= -#sig) then -- fraction, e.g. .123
v = #sig + ex
y = sub(sig, 1, v).."."..sub(sig, v + 1)
elseif ex < 0 and (#nex >= -ex - #sig) then
-- e.g. compare 1234e-5 versus .01234
-- gives: #sig + 1 + #nex >= 1 + (-ex - #sig) + #sig
-- -> #nex >= -ex - #sig
v = -ex - #sig
y = "."..rep("0", v)..sig
else -- non-canonical scientific representation
y = sig.."e"..ex
end
end--if sig
end
if y and y ~= sinfos[i] then
if opt_details then
print("<number> (line "..stoklns[i]..") "..sinfos[i].." -> "..y)
opt_details = opt_details + 1
end
sinfos[i] = y
end
end
--- Does string optimization.
--
-- Note: It works on well-formed strings only!
--
-- Optimizations on characters can be summarized as follows:
--
-- \a\b\f\n\r\t\v -- no change
-- \\ -- no change
-- \"\' -- depends on delim, other can remove \
-- \[\] -- remove \
-- \<char> -- general escape, remove \ (Lua 5.1 only)
-- \<eol> -- normalize the EOL only
-- \ddd -- if \a\b\f\n\r\t\v, change to latter
-- if other < ascii 32, keep ddd but zap leading zeros
-- but cannot have following digits
-- if >= ascii 32, translate it into the literal, then also
-- do escapes for \\,\",\' cases
-- <other> -- no change
--
-- Switch delimiters if string becomes shorter.
--
-- @tparam int I
local function do_string(I)
local info = sinfos[I]
local delim = sub(info, 1, 1) -- delimiter used
local ndelim = (delim == "'") and '"' or "'" -- opposite " <-> '
local z = sub(info, 2, -2) -- actual string
local i = 1
local c_delim, c_ndelim = 0, 0 -- "/' counts
while i <= #z do
local c = sub(z, i, i)
if c == "\\" then -- escaped stuff
local j = i + 1
local d = sub(z, j, j)
local p = find("abfnrtv\\\n\r\"\'0123456789", d, 1, true)
if not p then -- \<char> -- remove \ (Lua 5.1 only)
z = sub(z, 1, i - 1)..sub(z, j)
i = i + 1
elseif p <= 8 then -- \a\b\f\n\r\t\v\\
i = i + 2 -- no change
elseif p <= 10 then -- \<eol> -- normalize EOL
local eol = sub(z, j, j + 1)
if eol == "\r\n" or eol == "\n\r" then
z = sub(z, 1, i).."\n"..sub(z, j + 2)
elseif p == 10 then -- \r case
z = sub(z, 1, i).."\n"..sub(z, j + 1)
end
i = i + 2
elseif p <= 12 then -- \"\' -- remove \ for ndelim
if d == delim then
c_delim = c_delim + 1
i = i + 2
else
c_ndelim = c_ndelim + 1
z = sub(z, 1, i - 1)..sub(z, j)
i = i + 1
end
else -- \ddd -- various steps
local s = match(z, "^(%d%d?%d?)", j)
j = i + 1 + #s -- skip to location
local cv = tonumber(s)
local cc = char(cv)
p = find("\a\b\f\n\r\t\v", cc, 1, true)
if p then -- special escapes
s = "\\"..sub("abfnrtv", p, p)
elseif cv < 32 then -- normalized \ddd
if match(sub(z, j, j), "%d") then
-- if a digit follows, \ddd cannot be shortened
s = "\\"..s
else
s = "\\"..cv
end
elseif cc == delim then -- \<delim>
s = "\\"..cc
c_delim = c_delim + 1
elseif cc == "\\" then -- \\
s = "\\\\"
else -- literal character
s = cc
if cc == ndelim then
c_ndelim = c_ndelim + 1
end
end
z = sub(z, 1, i - 1)..s..sub(z, j)
i = i + #s
end--if p
else-- c ~= "\\" -- <other> -- no change
i = i + 1
if c == ndelim then -- count ndelim, for switching delimiters
c_ndelim = c_ndelim + 1
end
end--if c
end--while
-- Switching delimiters, a long-winded derivation:
-- (1) delim takes 2+2*c_delim bytes, ndelim takes c_ndelim bytes
-- (2) delim becomes c_delim bytes, ndelim becomes 2+2*c_ndelim bytes
-- simplifying the condition (1)>(2) --> c_delim > c_ndelim
if c_delim > c_ndelim then
i = 1
while i <= #z do
local p, _, r = find(z, "([\'\"])", i)
if not p then break end
if r == delim then -- \<delim> -> <delim>
z = sub(z, 1, p - 2)..sub(z, p)
i = p
else-- r == ndelim -- <ndelim> -> \<ndelim>
z = sub(z, 1, p - 1).."\\"..sub(z, p)
i = p + 2
end
end--while
delim = ndelim -- actually change delimiters
end
z = delim..z..delim
if z ~= sinfos[I] then
if opt_details then
print("<string> (line "..stoklns[I]..") "..sinfos[I].." -> "..z)
opt_details = opt_details + 1
end
sinfos[I] = z
end
end
--- Does long string optimization.
--
-- * remove first optional newline
-- * normalize embedded newlines
-- * reduce '=' separators in delimiters if possible
--
-- Note: warning flagged if trailing whitespace found, not trimmed.
--
-- @tparam int I
local function do_lstring(I)
local info = sinfos[I]
local delim1 = match(info, "^%[=*%[") -- cut out delimiters
local sep = #delim1
local delim2 = sub(info, -sep, -1)
local z = sub(info, sep + 1, -(sep + 1)) -- lstring without delims
local y = ""
local i = 1
while true do
local p, _, r, s = find(z, "([\r\n])([\r\n]?)", i)
-- deal with a single line
local ln
if not p then
ln = sub(z, i)
elseif p >= i then
ln = sub(z, i, p - 1)
end
if ln ~= "" then
-- flag a warning if there are trailing spaces, won't optimize!
if match(ln, "%s+$") then
M.warn.LSTRING = "trailing whitespace in long string near line "..stoklns[I]
end
y = y..ln
end
if not p then -- done if no more EOLs
break
end
-- deal with line endings, normalize them
i = p + 1
if p then
if #s > 0 and r ~= s then -- skip CRLF or LFCR
i = i + 1
end
-- skip first newline, which can be safely deleted
if not(i == 1 and i == p) then
y = y.."\n"
end
end
end--while
-- handle possible deletion of one or more '=' separators
if sep >= 3 then
local chk, okay = sep - 1
-- loop to test ending delimiter with less of '=' down to zero
while chk >= 2 do
local delim = "%]"..rep("=", chk - 2).."%]"
if not match(y, delim) then okay = chk end
chk = chk - 1
end
if okay then -- change delimiters
sep = rep("=", okay - 2)
delim1, delim2 = "["..sep.."[", "]"..sep.."]"
end
end
sinfos[I] = delim1..y..delim2
end
--- Does long comment optimization.
--
-- * trim trailing whitespace
-- * normalize embedded newlines
-- * reduce '=' separators in delimiters if possible
--
-- Note: It does not remove first optional newline.
--
-- @tparam int I
local function do_lcomment(I)
local info = sinfos[I]
local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters
local sep = #delim1
local delim2 = sub(info, -(sep - 2), -1)
local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims
local y = ""
local i = 1
while true do
local p, _, r, s = find(z, "([\r\n])([\r\n]?)", i)
-- deal with a single line, extract and check trailing whitespace
local ln
if not p then
ln = sub(z, i)
elseif p >= i then
ln = sub(z, i, p - 1)
end
if ln ~= "" then
-- trim trailing whitespace if non-empty line
local ws = match(ln, "%s*$")
if #ws > 0 then ln = sub(ln, 1, -(ws + 1)) end
y = y..ln
end
if not p then -- done if no more EOLs
break
end
-- deal with line endings, normalize them
i = p + 1
if p then
if #s > 0 and r ~= s then -- skip CRLF or LFCR
i = i + 1
end
y = y.."\n"
end
end--while
-- handle possible deletion of one or more '=' separators
sep = sep - 2
if sep >= 3 then
local chk, okay = sep - 1
-- loop to test ending delimiter with less of '=' down to zero
while chk >= 2 do
local delim = "%]"..rep("=", chk - 2).."%]"
if not match(y, delim) then okay = chk end
chk = chk - 1
end
if okay then -- change delimiters
sep = rep("=", okay - 2)
delim1, delim2 = "--["..sep.."[", "]"..sep.."]"
end
end
sinfos[I] = delim1..y..delim2
end
--- Does short comment optimization.
--
-- * trim trailing whitespace
--
-- @tparam int i
local function do_comment(i)
local info = sinfos[i]
local ws = match(info, "%s*$") -- just look from end of string
if #ws > 0 then
info = sub(info, 1, -(ws + 1)) -- trim trailing whitespace
end
sinfos[i] = info
end
--- Returns true if string found in long comment.
--
-- This is a feature to keep copyright or license texts.
--
-- @tparam bool opt_keep
-- @tparam string info
-- @treturn bool
local function keep_lcomment(opt_keep, info)
if not opt_keep then return false end -- option not set
local delim1 = match(info, "^%-%-%[=*%[") -- cut out delimiters
local sep = #delim1
local z = sub(info, sep + 1, -(sep - 1)) -- comment without delims
if find(z, opt_keep, 1, true) then -- try to match
return true
end
end
--- The main entry point.
--
-- * currently, lexer processing has 2 passes
-- * processing is done on a line-oriented basis, which is easier to
-- grok due to the next point...
-- * since there are various options that can be enabled or disabled,
-- processing is a little messy or convoluted
--
-- @tparam {[string]=bool,...} option
-- @tparam {string,...} toklist
-- @tparam {string,...} semlist
-- @tparam {int,...} toklnlist
-- @treturn {string,...} toklist
-- @treturn {string,...} semlist
-- @treturn {int,...} toklnlist
function M.optimize(option, toklist, semlist, toklnlist)
-- Set option flags.
local opt_comments = option["opt-comments"]
local opt_whitespace = option["opt-whitespace"]
local opt_emptylines = option["opt-emptylines"]
local opt_eols = option["opt-eols"]
local opt_strings = option["opt-strings"]
local opt_numbers = option["opt-numbers"]
local opt_x = option["opt-experimental"]
local opt_keep = option.KEEP
opt_details = option.DETAILS and 0 -- upvalues for details display
print = M.print or _G.print
if opt_eols then -- forced settings, otherwise won't work properly
opt_comments = true
opt_whitespace = true
opt_emptylines = true
elseif opt_x then
opt_whitespace = true
end
-- Variable initialization.
stoks, sinfos, stoklns -- set source lists
= toklist, semlist, toklnlist
local i = 1 -- token position
local tok, info -- current token
local prev -- position of last grammar token
-- on same line (for TK_SPACE stuff)
-- Changes a token, info pair.
local function settoken(tok, info, I) --luacheck: ignore 431
I = I or i
stoks[I] = tok or ""
sinfos[I] = info or ""
end
-- Experimental optimization for ';' operator.
if opt_x then
while true do
tok, info = stoks[i], sinfos[i]
if tok == "TK_EOS" then -- end of stream/pass
break
elseif tok == "TK_OP" and info == ";" then
-- ';' operator found, since it is entirely optional, set it
-- as a space to let whitespace optimization do the rest
settoken("TK_SPACE", " ")
end
i = i + 1
end
repack_tokens()
end
-- Processing loop (PASS 1)
i = 1
while true do
tok, info = stoks[i], sinfos[i]
local atstart = atlinestart(i) -- set line begin flag
if atstart then prev = nil end
if tok == "TK_EOS" then -- end of stream/pass
break
elseif tok == "TK_KEYWORD" or -- keywords, identifiers,
tok == "TK_NAME" or -- operators
tok == "TK_OP" then
-- TK_KEYWORD and TK_OP can't be optimized without a big
-- optimization framework; it would be more of an optimizing
-- compiler, not a source code compressor
-- TK_NAME that are locals needs parser to analyze/optimize
prev = i
elseif tok == "TK_NUMBER" then -- numbers
if opt_numbers then
do_number(i) -- optimize
end
prev = i
elseif tok == "TK_STRING" or -- strings, long strings
tok == "TK_LSTRING" then
if opt_strings then
if tok == "TK_STRING" then
do_string(i) -- optimize
else
do_lstring(i) -- optimize
end
end
prev = i
elseif tok == "TK_COMMENT" then -- short comments
if opt_comments then
if i == 1 and sub(info, 1, 1) == "#" then
-- keep shbang comment, trim whitespace
do_comment(i)
else
-- safe to delete, as a TK_EOL (or TK_EOS) always follows
settoken() -- remove entirely
end
elseif opt_whitespace then -- trim whitespace only
do_comment(i)
end
elseif tok == "TK_LCOMMENT" then -- long comments
if keep_lcomment(opt_keep, info) then
-- if --keep, we keep a long comment if <msg> is found;
-- this is a feature to keep copyright or license texts
if opt_whitespace then -- trim whitespace only
do_lcomment(i)
end
prev = i
elseif opt_comments then
local eols = commenteols(info)
-- prepare opt_emptylines case first, if a disposable token
-- follows, current one is safe to dump, else keep a space;
-- it is implied that the operation is safe for '-', because
-- current is a TK_LCOMMENT, and must be separate from a '-'
if is_faketoken[stoks[i + 1]] then
settoken() -- remove entirely
tok = ""
else
settoken("TK_SPACE", " ")
end
-- if there are embedded EOLs to keep and opt_emptylines is
-- disabled, then switch the token into one or more EOLs
if not opt_emptylines and eols > 0 then
settoken("TK_EOL", rep("\n", eols))
end
-- if optimizing whitespaces, force reinterpretation of the
-- token to give a chance for the space to be optimized away
if opt_whitespace and tok ~= "" then
i = i - 1 -- to reinterpret
end
else -- disabled case
if opt_whitespace then -- trim whitespace only
do_lcomment(i)
end
prev = i
end
elseif tok == "TK_EOL" then -- line endings
if atstart and opt_emptylines then
settoken() -- remove entirely
elseif info == "\r\n" or info == "\n\r" then
-- normalize the rest of the EOLs for CRLF/LFCR only
-- (note that TK_LCOMMENT can change into several EOLs)
settoken("TK_EOL", "\n")
end
elseif tok == "TK_SPACE" then -- whitespace
if opt_whitespace then
if atstart or atlineend(i) then
-- delete leading and trailing whitespace
settoken() -- remove entirely
else
-- at this point, since leading whitespace have been removed,
-- there should be a either a real token or a TK_LCOMMENT
-- prior to hitting this whitespace; the TK_LCOMMENT case
-- only happens if opt_comments is disabled; so prev ~= nil
local ptok = stoks[prev]
if ptok == "TK_LCOMMENT" then
-- previous TK_LCOMMENT can abut with anything
settoken() -- remove entirely
else
-- prev must be a grammar token; consecutive TK_SPACE
-- tokens is impossible when optimizing whitespace
local ntok = stoks[i + 1]
if is_faketoken[ntok] then
-- handle special case where a '-' cannot abut with
-- either a short comment or a long comment
if (ntok == "TK_COMMENT" or ntok == "TK_LCOMMENT") and
ptok == "TK_OP" and sinfos[prev] == "-" then
-- keep token
else
settoken() -- remove entirely
end
else--is_realtoken
-- check a pair of grammar tokens, if can abut, then
-- delete space token entirely, otherwise keep one space
local s = checkpair(prev, i + 1)
if s == "" then
settoken() -- remove entirely
else
settoken("TK_SPACE", " ")
end
end
end
end
end
else
error("unidentified token encountered")
end
i = i + 1
end--while
repack_tokens()
-- Processing loop (PASS 2)
if opt_eols then
i = 1
-- Aggressive EOL removal only works with most non-grammar tokens
-- optimized away because it is a rather simple scheme -- basically
-- it just checks 'real' token pairs around EOLs.
if stoks[1] == "TK_COMMENT" then
-- first comment still existing must be shbang, skip whole line
i = 3
end
while true do
tok = stoks[i]
if tok == "TK_EOS" then -- end of stream/pass
break
elseif tok == "TK_EOL" then -- consider each TK_EOL
local t1, t2 = stoks[i - 1], stoks[i + 1]
if is_realtoken[t1] and is_realtoken[t2] then -- sanity check
local s = checkpair(i - 1, i + 1)
if s == "" or t2 == "TK_EOS" then
settoken() -- remove entirely
end
end
end--if tok
i = i + 1
end--while
repack_tokens()
end
if opt_details and opt_details > 0 then print() end -- spacing
return stoks, sinfos, stoklns
end
return M

View File

@ -0,0 +1,644 @@
---------
-- This module does parser-based optimizations.
--
-- **Notes:**
--
-- * The processing load is quite significant, but since this is an
-- off-line text processor, I believe we can wait a few seconds.
-- * TODO: Might process "local a,a,a" wrongly... need tests!
-- * TODO: Remove position handling if overlapped locals (rem < 0)
-- needs more study, to check behaviour.
-- * TODO: There are probably better ways to do allocation, e.g. by
-- choosing better methods to sort and pick locals...
-- * TODO: We don't need 53*63 two-letter identifiers; we can make
-- do with significantly less depending on how many that are really
-- needed and improve entropy; e.g. 13 needed -> choose 4*4 instead.
----
local byte = string.byte
local char = string.char
local concat = table.concat
local fmt = string.format
local pairs = pairs
local rep = string.rep
local sort = table.sort
local sub = string.sub
local M = {}
-- Letter frequencies for reducing symbol entropy (fixed version)
-- * Might help a wee bit when the output file is compressed
-- * See Wikipedia: http://en.wikipedia.org/wiki/Letter_frequencies
-- * We use letter frequencies according to a Linotype keyboard, plus
-- the underscore, and both lower case and upper case letters.
-- * The arrangement below (LC, underscore, %d, UC) is arbitrary.
-- * This is certainly not optimal, but is quick-and-dirty and the
-- process has no significant overhead
local LETTERS = "etaoinshrdlucmfwypvbgkqjxz_ETAOINSHRDLUCMFWYPVBGKQJXZ"
local ALPHANUM = "etaoinshrdlucmfwypvbgkqjxz_0123456789ETAOINSHRDLUCMFWYPVBGKQJXZ"
-- Names or identifiers that must be skipped.
-- (The first two lines are for keywords.)
local SKIP_NAME = {}
for v in ([[
and break do else elseif end false for function if in
local nil not or repeat return then true until while
self _ENV]]):gmatch("%S+") do
SKIP_NAME[v] = true
end
local toklist, seminfolist, -- token lists (lexer output)
tokpar, seminfopar, xrefpar, -- token lists (parser output)
globalinfo, localinfo, -- variable information tables
statinfo, -- statment type table
globaluniq, localuniq, -- unique name tables
var_new, -- index of new variable names
varlist -- list of output variables
--- Preprocesses information table to get lists of unique names.
--
-- @tparam {table,...} infotable
-- @treturn table
local function preprocess(infotable)
local uniqtable = {}
for i = 1, #infotable do -- enumerate info table
local obj = infotable[i]
local name = obj.name
if not uniqtable[name] then -- not found, start an entry
uniqtable[name] = {
decl = 0, token = 0, size = 0,
}
end
local uniq = uniqtable[name] -- count declarations, tokens, size
uniq.decl = uniq.decl + 1
local xref = obj.xref
local xcount = #xref
uniq.token = uniq.token + xcount
uniq.size = uniq.size + xcount * #name
if obj.decl then -- if local table, create first,last pairs
obj.id = i
obj.xcount = xcount
if xcount > 1 then -- if ==1, means local never accessed
obj.first = xref[2]
obj.last = xref[xcount]
end
else -- if global table, add a back ref
uniq.id = i
end
end--for
return uniqtable
end
--- Calculates actual symbol frequencies, in order to reduce entropy.
--
-- * This may help further reduce the size of compressed sources.
-- * Note that since parsing optimizations is put before lexing
-- optimizations, the frequency table is not exact!
-- * Yes, this will miss --keep block comments too...
--
-- @tparam table option
local function recalc_for_entropy(option)
-- table of token classes to accept in calculating symbol frequency
local ACCEPT = {
TK_KEYWORD = true, TK_NAME = true, TK_NUMBER = true,
TK_STRING = true, TK_LSTRING = true,
}
if not option["opt-comments"] then
ACCEPT.TK_COMMENT = true
ACCEPT.TK_LCOMMENT = true
end
-- Create a new table and remove any original locals by filtering.
local filtered = {}
for i = 1, #toklist do
filtered[i] = seminfolist[i]
end
for i = 1, #localinfo do -- enumerate local info table
local obj = localinfo[i]
local xref = obj.xref
for j = 1, obj.xcount do
local p = xref[j]
filtered[p] = "" -- remove locals
end
end
local freq = {} -- reset symbol frequency table
for i = 0, 255 do freq[i] = 0 end
for i = 1, #toklist do -- gather symbol frequency
local tok, info = toklist[i], filtered[i]
if ACCEPT[tok] then
for j = 1, #info do
local c = byte(info, j)
freq[c] = freq[c] + 1
end
end--if
end--for
-- Re-sorts symbols according to actual frequencies.
--
-- @tparam string symbols
-- @treturn string
local function resort(symbols)
local symlist = {}
for i = 1, #symbols do -- prepare table to sort
local c = byte(symbols, i)
symlist[i] = { c = c, freq = freq[c], }
end
sort(symlist, function(v1, v2) -- sort selected symbols
return v1.freq > v2.freq
end)
local charlist = {} -- reconstitute the string
for i = 1, #symlist do
charlist[i] = char(symlist[i].c)
end
return concat(charlist)
end
LETTERS = resort(LETTERS) -- change letter arrangement
ALPHANUM = resort(ALPHANUM)
end
--- Returns a string containing a new local variable name to use, and
-- a flag indicating whether it collides with a global variable.
--
-- Trapping keywords and other names like 'self' is done elsewhere.
--
-- @treturn string A new local variable name.
-- @treturn bool Whether the name collides with a global variable.
local function new_var_name()
local var
local cletters, calphanum = #LETTERS, #ALPHANUM
local v = var_new
if v < cletters then -- single char
v = v + 1
var = sub(LETTERS, v, v)
else -- longer names
local range, sz = cletters, 1 -- calculate # chars fit
repeat
v = v - range
range = range * calphanum
sz = sz + 1
until range > v
local n = v % cletters -- left side cycles faster
v = (v - n) / cletters -- do first char first
n = n + 1
var = sub(LETTERS, n, n)
while sz > 1 do
local m = v % calphanum
v = (v - m) / calphanum
m = m + 1
var = var..sub(ALPHANUM, m, m)
sz = sz - 1
end
end
var_new = var_new + 1
return var, globaluniq[var] ~= nil
end
--- Calculates and prints some statistics.
--
-- Note: probably better in main source, put here for now.
--
-- @tparam table globaluniq
-- @tparam table localuniq
-- @tparam table afteruniq
-- @tparam table option
local function stats_summary(globaluniq, localuniq, afteruniq, option) --luacheck: ignore 431
local print = M.print or print
local opt_details = option.DETAILS
if option.QUIET then return end
local uniq_g , uniq_li, uniq_lo = 0, 0, 0
local decl_g, decl_li, decl_lo = 0, 0, 0
local token_g, token_li, token_lo = 0, 0, 0
local size_g, size_li, size_lo = 0, 0, 0
local function avg(c, l) -- safe average function
if c == 0 then return 0 end
return l / c
end
-- Collect statistics (Note: globals do not have declarations!)
for _, uniq in pairs(globaluniq) do
uniq_g = uniq_g + 1
token_g = token_g + uniq.token
size_g = size_g + uniq.size
end
for _, uniq in pairs(localuniq) do
uniq_li = uniq_li + 1
decl_li = decl_li + uniq.decl
token_li = token_li + uniq.token
size_li = size_li + uniq.size
end
for _, uniq in pairs(afteruniq) do
uniq_lo = uniq_lo + 1
decl_lo = decl_lo + uniq.decl
token_lo = token_lo + uniq.token
size_lo = size_lo + uniq.size
end
local uniq_ti = uniq_g + uniq_li
local decl_ti = decl_g + decl_li
local token_ti = token_g + token_li
local size_ti = size_g + size_li
local uniq_to = uniq_g + uniq_lo
local decl_to = decl_g + decl_lo
local token_to = token_g + token_lo
local size_to = size_g + size_lo
-- Detailed stats: global list
if opt_details then
local sorted = {} -- sort table of unique global names by size
for name, uniq in pairs(globaluniq) do
uniq.name = name
sorted[#sorted + 1] = uniq
end
sort(sorted, function(v1, v2)
return v1.size > v2.size
end)
do
local tabf1, tabf2 = "%8s%8s%10s %s", "%8d%8d%10.2f %s"
local hl = rep("-", 44)
print("*** global variable list (sorted by size) ***\n"..hl)
print(fmt(tabf1, "Token", "Input", "Input", "Global"))
print(fmt(tabf1, "Count", "Bytes", "Average", "Name"))
print(hl)
for i = 1, #sorted do
local uniq = sorted[i]
print(fmt(tabf2, uniq.token, uniq.size, avg(uniq.token, uniq.size), uniq.name))
end
print(hl)
print(fmt(tabf2, token_g, size_g, avg(token_g, size_g), "TOTAL"))
print(hl.."\n")
end
-- Detailed stats: local list
do
local tabf1, tabf2 = "%8s%8s%8s%10s%8s%10s %s", "%8d%8d%8d%10.2f%8d%10.2f %s"
local hl = rep("-", 70)
print("*** local variable list (sorted by allocation order) ***\n"..hl)
print(fmt(tabf1, "Decl.", "Token", "Input", "Input", "Output", "Output", "Global"))
print(fmt(tabf1, "Count", "Count", "Bytes", "Average", "Bytes", "Average", "Name"))
print(hl)
for i = 1, #varlist do -- iterate according to order assigned
local name = varlist[i]
local uniq = afteruniq[name]
local old_t, old_s = 0, 0
for j = 1, #localinfo do -- find corresponding old names and calculate
local obj = localinfo[j]
if obj.name == name then
old_t = old_t + obj.xcount
old_s = old_s + obj.xcount * #obj.oldname
end
end
print(fmt(tabf2, uniq.decl, uniq.token, old_s, avg(old_t, old_s),
uniq.size, avg(uniq.token, uniq.size), name))
end
print(hl)
print(fmt(tabf2, decl_lo, token_lo, size_li, avg(token_li, size_li),
size_lo, avg(token_lo, size_lo), "TOTAL"))
print(hl.."\n")
end
end--if opt_details
-- Display output
do
local tabf1, tabf2 = "%-16s%8s%8s%8s%8s%10s", "%-16s%8d%8d%8d%8d%10.2f"
local hl = rep("-", 58)
print("*** local variable optimization summary ***\n"..hl)
print(fmt(tabf1, "Variable", "Unique", "Decl.", "Token", "Size", "Average"))
print(fmt(tabf1, "Types", "Names", "Count", "Count", "Bytes", "Bytes"))
print(hl)
print(fmt(tabf2, "Global", uniq_g, decl_g, token_g, size_g, avg(token_g, size_g)))
print(hl)
print(fmt(tabf2, "Local (in)", uniq_li, decl_li, token_li, size_li, avg(token_li, size_li)))
print(fmt(tabf2, "TOTAL (in)", uniq_ti, decl_ti, token_ti, size_ti, avg(token_ti, size_ti)))
print(hl)
print(fmt(tabf2, "Local (out)", uniq_lo, decl_lo, token_lo, size_lo, avg(token_lo, size_lo)))
print(fmt(tabf2, "TOTAL (out)", uniq_to, decl_to, token_to, size_to, avg(token_to, size_to)))
print(hl.."\n")
end
end
--- Does experimental optimization for f("string") statements.
--
-- It's safe to delete parentheses without adding whitespace, as both
-- kinds of strings can abut with anything else.
local function optimize_func1()
local function is_strcall(j) -- find f("string") pattern
local t1 = tokpar[j + 1] or ""
local t2 = tokpar[j + 2] or ""
local t3 = tokpar[j + 3] or ""
if t1 == "(" and t2 == "<string>" and t3 == ")" then
return true
end
end
local del_list = {} -- scan for function pattern,
local i = 1 -- tokens to be deleted are marked
while i <= #tokpar do
local id = statinfo[i]
if id == "call" and is_strcall(i) then -- found & mark ()
del_list[i + 1] = true -- '('
del_list[i + 3] = true -- ')'
i = i + 3
end
i = i + 1
end
-- Delete a token and adjust all relevant tables.
-- * Currently invalidates globalinfo and localinfo (not updated),
-- so any other optimization is done after processing locals
-- (of course, we can also lex the source data again...).
-- * Faster one-pass token deletion.
local del_list2 = {}
do
local i, dst, idend = 1, 1, #tokpar
while dst <= idend do -- process parser tables
if del_list[i] then -- found a token to delete?
del_list2[xrefpar[i]] = true
i = i + 1
end
if i > dst then
if i <= idend then -- shift table items lower
tokpar[dst] = tokpar[i]
seminfopar[dst] = seminfopar[i]
xrefpar[dst] = xrefpar[i] - (i - dst)
statinfo[dst] = statinfo[i]
else -- nil out excess entries
tokpar[dst] = nil
seminfopar[dst] = nil
xrefpar[dst] = nil
statinfo[dst] = nil
end
end
i = i + 1
dst = dst + 1
end
end
do
local i, dst, idend = 1, 1, #toklist
while dst <= idend do -- process lexer tables
if del_list2[i] then -- found a token to delete?
i = i + 1
end
if i > dst then
if i <= idend then -- shift table items lower
toklist[dst] = toklist[i]
seminfolist[dst] = seminfolist[i]
else -- nil out excess entries
toklist[dst] = nil
seminfolist[dst] = nil
end
end
i = i + 1
dst = dst + 1
end
end
end
--- Does local variable optimization.
--
-- @tparam {[string]=bool,...} option
local function optimize_locals(option)
var_new = 0 -- reset variable name allocator
varlist = {}
-- Preprocess global/local tables, handle entropy reduction.
globaluniq = preprocess(globalinfo)
localuniq = preprocess(localinfo)
if option["opt-entropy"] then -- for entropy improvement
recalc_for_entropy(option)
end
-- Build initial declared object table, then sort according to
-- token count, this might help assign more tokens to more common
-- variable names such as 'e' thus possibly reducing entropy.
-- * An object knows its localinfo index via its 'id' field.
-- * Special handling for "self" and "_ENV" special local (parameter) here.
local object = {}
for i = 1, #localinfo do
object[i] = localinfo[i]
end
sort(object, function(v1, v2) -- sort largest first
return v1.xcount > v2.xcount
end)
-- The special "self" and "_ENV" function parameters must be preserved.
-- * The allocator below will never use "self", so it is safe to
-- keep those implicit declarations as-is.
local temp, j, used_specials = {}, 1, {}
for i = 1, #object do
local obj = object[i]
if not obj.is_special then
temp[j] = obj
j = j + 1
else
used_specials[#used_specials + 1] = obj.name
end
end
object = temp
-- A simple first-come first-served heuristic name allocator,
-- note that this is in no way optimal...
-- * Each object is a local variable declaration plus existence.
-- * The aim is to assign short names to as many tokens as possible,
-- so the following tries to maximize name reuse.
-- * Note that we preserve sort order.
local nobject = #object
while nobject > 0 do
local varname, gcollide
repeat
varname, gcollide = new_var_name() -- collect a variable name
until not SKIP_NAME[varname] -- skip all special names
varlist[#varlist + 1] = varname -- keep a list
local oleft = nobject
-- If variable name collides with an existing global, the name
-- cannot be used by a local when the name is accessed as a global
-- during which the local is alive (between 'act' to 'rem'), so
-- we drop objects that collides with the corresponding global.
if gcollide then
-- find the xref table of the global
local gref = globalinfo[globaluniq[varname].id].xref
local ngref = #gref
-- enumerate for all current objects; all are valid at this point
for i = 1, nobject do
local obj = object[i]
local act, rem = obj.act, obj.rem -- 'live' range of local
-- if rem < 0, it is a -id to a local that had the same name
-- so follow rem to extend it; does this make sense?
while rem < 0 do
rem = localinfo[-rem].rem
end
local drop
for j = 1, ngref do
local p = gref[j]
if p >= act and p <= rem then drop = true end -- in range?
end
if drop then
obj.skip = true
oleft = oleft - 1
end
end--for
end--if gcollide
-- Now the first unassigned local (since it's sorted) will be the
-- one with the most tokens to rename, so we set this one and then
-- eliminate all others that collides, then any locals that left
-- can then reuse the same variable name; this is repeated until
-- all local declaration that can use this name is assigned.
--
-- The criteria for local-local reuse/collision is:
-- A is the local with a name already assigned
-- B is the unassigned local under consideration
-- => anytime A is accessed, it cannot be when B is 'live'
-- => to speed up things, we have first/last accesses noted
while oleft > 0 do
local i = 1
while object[i].skip do -- scan for first object
i = i + 1
end
-- First object is free for assignment of the variable name
-- [first,last] gives the access range for collision checking.
oleft = oleft - 1
local obja = object[i]
i = i + 1
obja.newname = varname
obja.skip = true
obja.done = true
local first, last = obja.first, obja.last
local xref = obja.xref
-- Then, scan all the rest and drop those colliding.
-- If A was never accessed then it'll never collide with anything
-- otherwise trivial skip if:
-- * B was activated after A's last access (last < act),
-- * B was removed before A's first access (first > rem),
-- if not, see detailed skip below...
if first and oleft > 0 then -- must have at least 1 access
local scanleft = oleft
while scanleft > 0 do
while object[i].skip do -- next valid object
i = i + 1
end
scanleft = scanleft - 1
local objb = object[i]
i = i + 1
local act, rem = objb.act, objb.rem -- live range of B
-- if rem < 0, extend range of rem thru' following local
while rem < 0 do
rem = localinfo[-rem].rem
end
if not(last < act or first > rem) then -- possible collision
-- B is activated later than A or at the same statement,
-- this means for no collision, A cannot be accessed when B
-- is alive, since B overrides A (or is a peer).
if act >= obja.act then
for j = 1, obja.xcount do -- ... then check every access
local p = xref[j]
if p >= act and p <= rem then -- A accessed when B live!
oleft = oleft - 1
objb.skip = true
break
end
end--for
-- A is activated later than B, this means for no collision,
-- A's access is okay since it overrides B, but B's last
-- access need to be earlier than A's activation time.
else
if objb.last and objb.last >= obja.act then
oleft = oleft - 1
objb.skip = true
end
end
end
if oleft == 0 then break end
end
end--if first
end--while
-- After assigning all possible locals to one variable name, the
-- unassigned locals/objects have the skip field reset and the table
-- is compacted, to hopefully reduce iteration time.
local temp, j = {}, 1
for i = 1, nobject do
local obj = object[i]
if not obj.done then
obj.skip = false
temp[j] = obj
j = j + 1
end
end
object = temp -- new compacted object table
nobject = #object -- objects left to process
end--while
-- After assigning all locals with new variable names, we can
-- patch in the new names, and reprocess to get 'after' stats.
for i = 1, #localinfo do -- enumerate all locals
local obj = localinfo[i]
local xref = obj.xref
if obj.newname then -- if got new name, patch it in
for j = 1, obj.xcount do
local p = xref[j] -- xrefs indexes the token list
seminfolist[p] = obj.newname
end
obj.name, obj.oldname -- adjust names
= obj.newname, obj.name
else
obj.oldname = obj.name -- for cases like 'self'
end
end
-- Deal with statistics output.
for _, name in ipairs(used_specials) do
varlist[#varlist + 1] = name
end
local afteruniq = preprocess(localinfo)
stats_summary(globaluniq, localuniq, afteruniq, option)
end
--- The main entry point.
--
-- @tparam table option
-- @tparam {string,...} _toklist
-- @tparam {string,...} _seminfolist
-- @tparam table xinfo
function M.optimize(option, _toklist, _seminfolist, xinfo)
-- set tables
toklist, seminfolist -- from lexer
= _toklist, _seminfolist
tokpar, seminfopar, xrefpar -- from parser
= xinfo.toklist, xinfo.seminfolist, xinfo.xreflist
globalinfo, localinfo, statinfo -- from parser
= xinfo.globalinfo, xinfo.localinfo, xinfo.statinfo
-- Optimize locals.
if option["opt-locals"] then
optimize_locals(option)
end
-- Other optimizations.
if option["opt-experimental"] then -- experimental
optimize_func1()
-- WARNING globalinfo and localinfo now invalidated!
end
end
return M

View File

@ -0,0 +1,90 @@
---------
-- Example of a plugin for LuaSrcDiet.
--
-- WARNING: highly experimental! interface liable to change
--
-- **Notes:**
--
-- * Any function can be omitted and LuaSrcDiet won't call it.
-- * The functions are:
-- (1) init(_option, _srcfl, _destfl)
-- (2) post_load(z) can return z
-- (3) post_lex(toklist, seminfolist, toklnlist)
-- (4) post_parse(globalinfo, localinfo)
-- (5) post_optparse()
-- (6) post_optlex(toklist, seminfolist, toklnlist)
-- * Older tables can be copied and kept in the plugin and used later.
-- * If you modify 'option', remember that LuaSrcDiet might be
-- processing more than one file.
-- * Arrangement of the functions is not final!
-- * TODO: can't process additional options from command line yet
----
local M = {}
local option -- local reference to list of options
local srcfl, destfl -- filenames
local old_quiet
local function print(...) -- handle quiet option
if option.QUIET then return end
_G.print(...)
end
--- Initialization.
--
-- @tparam {[string]=bool,...} _option
-- @tparam string _srcfl Path of the source file.
-- @tparam string _destfl Path of the destination file.
function M.init(_option, _srcfl, _destfl)
option = _option
srcfl, destfl = _srcfl, _destfl
-- plugin can impose its own option starting from here
end
--- Message display, post-load processing, can return z.
function M.post_load(z)
-- this message will print after the LuaSrcDiet title message
print([[
Example plugin module for LuaSrcDiet
]])
print("Example: source file name is '"..srcfl.."'")
print("Example: destination file name is '"..destfl.."'")
print("Example: the size of the source file is "..#z.." bytes")
-- returning z is optional; this allows optional replacement of
-- the source data prior to lexing
return z
end
--- Post-lexing processing, can work on lexer table output.
function M.post_lex(toklist, seminfolist, toklnlist) --luacheck: ignore
print("Example: the number of lexed elements is "..#toklist)
end
--- Post-parsing processing, gives globalinfo, localinfo.
function M.post_parse(globalinfo, localinfo)
print("Example: size of globalinfo is "..#globalinfo)
print("Example: size of localinfo is "..#localinfo)
old_quiet = option.QUIET
option.QUIET = true
end
--- Post-parser optimization processing, can get tables from elsewhere.
function M.post_optparse()
option.QUIET = old_quiet
print("Example: pretend to do post-optparse")
end
--- Post-lexer optimization processing, can get tables from elsewhere.
function M.post_optlex(toklist, seminfolist, toklnlist) --luacheck: ignore
print("Example: pretend to do post-optlex")
-- restore old settings, other file might need original settings
option.QUIET = old_quiet
-- option.EXIT can be set at the end of any post_* function to stop
-- further processing and exit for the current file being worked on
-- in this case, final stats printout is disabled and the output will
-- not be written to the destination file
option.EXIT = true
end
return M

View File

@ -0,0 +1,177 @@
---------
-- Turns Lua 5.1 source code into HTML files.
--
-- WARNING: highly experimental! interface liable to change
--
-- **Notes:**
--
-- * This HTML highlighter marks globals brightly so that their usage
-- can be manually optimized.
-- * Either uses a .html extension for output files or it follows the
-- -o <filespec> option.
-- * The HTML style tries to follow that of the Lua wiki.
----
local fs = require "luasrcdiet.fs"
local concat = table.concat
local find = string.find
local fmt = string.format
local sub = string.sub
local M = {}
local HTML_EXT = ".html"
local ENTITIES = {
["&"] = "&amp;", ["<"] = "&lt;", [">"] = "&gt;",
["'"] = "&apos;", ["\""] = "&quot;",
}
-- simple headers and footers
local HEADER = [[
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<title>%s</title>
<meta name="Generator" content="LuaSrcDiet">
<style type="text/css">
%s</style>
</head>
<body>
<pre class="code">
]]
local FOOTER = [[
</pre>
</body>
</html>
]]
-- for more, please see wikimain.css from the Lua wiki site
local STYLESHEET = [[
BODY {
background: white;
color: navy;
}
pre.code { color: black; }
span.comment { color: #00a000; }
span.string { color: #009090; }
span.keyword { color: black; font-weight: bold; }
span.number { color: #993399; }
span.operator { }
span.name { }
span.global { color: #ff0000; font-weight: bold; }
span.local { color: #0000ff; font-weight: bold; }
]]
local option -- local reference to list of options
local srcfl, destfl -- filenames
local toklist, seminfolist -- token data
local function print(...) -- handle quiet option
if option.QUIET then return end
_G.print(...)
end
--- Initialization.
function M.init(_option, _srcfl)
option = _option
srcfl = _srcfl
local extb, _ = find(srcfl, "%.[^%.%\\%/]*$")
local basename = srcfl
if extb and extb > 1 then
basename = sub(srcfl, 1, extb - 1)
end
destfl = basename..HTML_EXT
if option.OUTPUT_FILE then
destfl = option.OUTPUT_FILE
end
if srcfl == destfl then
error("output filename identical to input filename")
end
end
--- Message display, post-load processing.
function M.post_load()
print([[
HTML plugin module for LuaSrcDiet
]])
print("Exporting: "..srcfl.." -> "..destfl.."\n")
end
--- Post-lexing processing, can work on lexer table output.
function M.post_lex(_toklist, _seminfolist)
toklist, seminfolist = _toklist, _seminfolist
end
--- Escapes the usual suspects for HTML/XML.
local function do_entities(z)
local i = 1
while i <= #z do
local c = sub(z, i, i)
local d = ENTITIES[c]
if d then
c = d
z = sub(z, 1, i - 1)..c..sub(z, i + 1)
end
i = i + #c
end--while
return z
end
--- Post-parsing processing, gives globalinfo, localinfo.
function M.post_parse(globalinfo, localinfo)
local html = {}
local function add(s) -- html helpers
html[#html + 1] = s
end
local function span(class, s)
add('<span class="'..class..'">'..s..'</span>')
end
for i = 1, #globalinfo do -- mark global identifiers as TK_GLOBAL
local obj = globalinfo[i]
local xref = obj.xref
for j = 1, #xref do
local p = xref[j]
toklist[p] = "TK_GLOBAL"
end
end--for
for i = 1, #localinfo do -- mark local identifiers as TK_LOCAL
local obj = localinfo[i]
local xref = obj.xref
for j = 1, #xref do
local p = xref[j]
toklist[p] = "TK_LOCAL"
end
end--for
add(fmt(HEADER, -- header and leading stuff
do_entities(srcfl),
STYLESHEET))
for i = 1, #toklist do -- enumerate token list
local tok, info = toklist[i], seminfolist[i]
if tok == "TK_KEYWORD" then
span("keyword", info)
elseif tok == "TK_STRING" or tok == "TK_LSTRING" then
span("string", do_entities(info))
elseif tok == "TK_COMMENT" or tok == "TK_LCOMMENT" then
span("comment", do_entities(info))
elseif tok == "TK_GLOBAL" then
span("global", info)
elseif tok == "TK_LOCAL" then
span("local", info)
elseif tok == "TK_NAME" then
span("name", info)
elseif tok == "TK_NUMBER" then
span("number", info)
elseif tok == "TK_OP" then
span("operator", do_entities(info))
elseif tok ~= "TK_EOS" then -- TK_EOL, TK_SPACE
add(info)
end
end--for
add(FOOTER)
assert(fs.write_file(destfl, concat(html), "wb"))
option.EXIT = true
end
return M

View File

@ -0,0 +1,89 @@
---------
-- Calculates SLOC for Lua 5.1 scripts
--
-- WARNING: highly experimental! interface liable to change
--
-- **Notes:**
--
-- * SLOC's behaviour is based on David Wheeler's SLOCCount.
-- * Empty lines and comment don't count as significant.
-- * Empty lines in long strings are also insignificant. This is
-- debatable. In SLOCCount, this allows counting of invalid multi-
-- line strings for C. But an empty line is still an empty line.
-- * Ignores the --quiet option, print own result line.
----
local M = {}
local option -- local reference to list of options
local srcfl -- source file name
function M.init(_option, _srcfl)
option = _option
option.QUIET = true
srcfl = _srcfl
end
--- Splits a block into a table of lines (minus EOLs).
--
-- @tparam string blk
-- @treturn {string,...} lines
local function split(blk)
local lines = {}
local i, nblk = 1, #blk
while i <= nblk do
local p, q, r, s = blk:find("([\r\n])([\r\n]?)", i)
if not p then
p = nblk + 1
end
lines[#lines + 1] = blk:sub(i, p - 1)
i = p + 1
if p < nblk and q > p and r ~= s then -- handle Lua-style CRLF, LFCR
i = i + 1
end
end
return lines
end
--- Post-lexing processing, can work on lexer table output.
function M.post_lex(toklist, seminfolist, toklnlist)
local lnow, sloc = 0, 0
local function chk(ln) -- if a new line, count it as an SLOC
if ln > lnow then -- new line # must be > old line #
sloc = sloc + 1; lnow = ln
end
end
for i = 1, #toklist do -- enumerate over all tokens
local tok, info, ln
= toklist[i], seminfolist[i], toklnlist[i]
if tok == "TK_KEYWORD" or tok == "TK_NAME" or -- significant
tok == "TK_NUMBER" or tok == "TK_OP" then
chk(ln)
-- Both TK_STRING and TK_LSTRING may be multi-line, hence, a loop
-- is needed in order to mark off lines one-by-one. Since llex.lua
-- currently returns the line number of the last part of the string,
-- we must subtract in order to get the starting line number.
elseif tok == "TK_STRING" then -- possible multi-line
local t = split(info)
ln = ln - #t + 1
for _ = 1, #t do
chk(ln); ln = ln + 1
end
elseif tok == "TK_LSTRING" then -- possible multi-line
local t = split(info)
ln = ln - #t + 1
for j = 1, #t do
if t[j] ~= "" then chk(ln) end
ln = ln + 1
end
-- Other tokens are comments or whitespace and are ignored.
end
end--for
print(srcfl..": "..sloc) -- display result
option.EXIT = true
end
return M

View File

@ -0,0 +1,30 @@
---------
-- General utility functions.
--
-- **Note: This module is not part of public API!**
----
local ipairs = ipairs
local pairs = pairs
local M = {}
--- Returns a new table containing the contents of all the given tables.
-- Tables are iterated using @{pairs}, so this function is intended for tables
-- that represent *associative arrays*. Entries with duplicate keys are
-- overwritten with the values from a later table.
--
-- @tparam {table,...} ... The tables to merge.
-- @treturn table A new table.
function M.merge (...)
local result = {}
for _, tab in ipairs{...} do
for key, val in pairs(tab) do
result[key] = val
end
end
return result
end
return M

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,181 @@
---------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Convert between various code representation formats. Atomic
-- converters are written in extenso, others are composed automatically
-- by chaining the atomic ones together in a closure.
--
-- Supported formats are:
--
-- * srcfile: the name of a file containing sources.
-- * src: these sources as a single string.
-- * lexstream: a stream of lexemes.
-- * ast: an abstract syntax tree.
-- * proto: a (Yueliang) struture containing a high level
-- representation of bytecode. Largely based on the
-- Proto structure in Lua's VM
-- * bytecode: a string dump of the function, as taken by
-- loadstring() and produced by string.dump().
-- * function: an executable lua function in RAM.
--
--------------------------------------------------------------------------------
local checks = require 'checks'
local M = { }
--------------------------------------------------------------------------------
-- Order of the transformations. if 'a' is on the left of 'b', then a 'a' can
-- be transformed into a 'b' (but not the other way around).
-- M.sequence goes for numbers to format names, M.order goes from format
-- names to numbers.
--------------------------------------------------------------------------------
M.sequence = {
'srcfile', 'src', 'lexstream', 'ast', 'proto', 'bytecode', 'function' }
local arg_types = {
srcfile = { 'string', '?string' },
src = { 'string', '?string' },
lexstream = { 'lexer.stream', '?string' },
ast = { 'table', '?string' },
proto = { 'table', '?string' },
bytecode = { 'string', '?string' },
}
if false then
-- if defined, runs on every newly-generated AST
function M.check_ast(ast)
local function rec(x, n, parent)
if not x.lineinfo and parent.lineinfo then
local pp = require 'metalua.pprint'
pp.printf("WARNING: Missing lineinfo in child #%s `%s{...} of node at %s",
n, x.tag or '', tostring(parent.lineinfo))
end
for i, child in ipairs(x) do
if type(child)=='table' then rec(child, i, x) end
end
end
rec(ast, -1, { })
end
end
M.order= { }; for a,b in pairs(M.sequence) do M.order[b]=a end
local CONV = { } -- conversion metatable __index
function CONV :srcfile_to_src(x, name)
checks('metalua.compiler', 'string', '?string')
name = name or '@'..x
local f, msg = io.open (x, 'rb')
if not f then error(msg) end
local r, msg = f :read '*a'
if not r then error("Cannot read file '"..x.."': "..msg) end
f :close()
return r, name
end
function CONV :src_to_lexstream(src, name)
checks('metalua.compiler', 'string', '?string')
local r = self.parser.lexer :newstream (src, name)
return r, name
end
function CONV :lexstream_to_ast(lx, name)
checks('metalua.compiler', 'lexer.stream', '?string')
local r = self.parser.chunk(lx)
r.source = name
if M.check_ast then M.check_ast (r) end
return r, name
end
local bytecode_compiler = nil -- cache to avoid repeated `pcall(require(...))`
local function get_bytecode_compiler()
if bytecode_compiler then return bytecode_compiler else
local status, result = pcall(require, 'metalua.compiler.bytecode')
if status then
bytecode_compiler = result
return result
elseif string.match(result, "not found") then
error "Compilation only available with full Metalua"
else error (result) end
end
end
function CONV :ast_to_proto(ast, name)
checks('metalua.compiler', 'table', '?string')
return get_bytecode_compiler().ast_to_proto(ast, name), name
end
function CONV :proto_to_bytecode(proto, name)
return get_bytecode_compiler().proto_to_bytecode(proto), name
end
function CONV :bytecode_to_function(bc, name)
checks('metalua.compiler', 'string', '?string')
return loadstring(bc, name)
end
-- Create all sensible combinations
for i=1,#M.sequence do
local src = M.sequence[i]
for j=i+2, #M.sequence do
local dst = M.sequence[j]
local dst_name = src.."_to_"..dst
local my_arg_types = arg_types[src]
local functions = { }
for k=i, j-1 do
local name = M.sequence[k].."_to_"..M.sequence[k+1]
local f = assert(CONV[name], name)
table.insert (functions, f)
end
CONV[dst_name] = function(self, a, b)
checks('metalua.compiler', unpack(my_arg_types))
for _, f in ipairs(functions) do
a, b = f(self, a, b)
end
return a, b
end
--printf("Created M.%s out of %s", dst_name, table.concat(n, ', '))
end
end
--------------------------------------------------------------------------------
-- This one goes in the "wrong" direction, cannot be composed.
--------------------------------------------------------------------------------
function CONV :function_to_bytecode(...) return string.dump(...) end
function CONV :ast_to_src(...)
require 'metalua.loader' -- ast_to_string isn't written in plain lua
return require 'metalua.compiler.ast_to_src' (...)
end
local MT = { __index=CONV, __type='metalua.compiler' }
function M.new()
local parser = require 'metalua.compiler.parser' .new()
local self = { parser = parser }
setmetatable(self, MT)
return self
end
return M

View File

@ -0,0 +1,682 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-{ extension ('match', ...) }
local M = { }
M.__index = M
M.__call = |self, ...| self:run(...)
local pp=require 'metalua.pprint'
--------------------------------------------------------------------------------
-- Instanciate a new AST->source synthetizer
--------------------------------------------------------------------------------
function M.new ()
local self = {
_acc = { }, -- Accumulates pieces of source as strings
current_indent = 0, -- Current level of line indentation
indent_step = " " -- Indentation symbol, normally spaces or '\t'
}
return setmetatable (self, M)
end
--------------------------------------------------------------------------------
-- Run a synthetizer on the `ast' arg and return the source as a string.
-- Can also be used as a static method `M.run (ast)'; in this case,
-- a temporary Metizer is instanciated on the fly.
--------------------------------------------------------------------------------
function M:run (ast)
if not ast then
self, ast = M.new(), self
end
self._acc = { }
self:node (ast)
return table.concat (self._acc)
end
--------------------------------------------------------------------------------
-- Accumulate a piece of source file in the synthetizer.
--------------------------------------------------------------------------------
function M:acc (x)
if x then table.insert (self._acc, x) end
end
--------------------------------------------------------------------------------
-- Accumulate an indented newline.
-- Jumps an extra line if indentation is 0, so that
-- toplevel definitions are separated by an extra empty line.
--------------------------------------------------------------------------------
function M:nl ()
if self.current_indent == 0 then self:acc "\n" end
self:acc ("\n" .. self.indent_step:rep (self.current_indent))
end
--------------------------------------------------------------------------------
-- Increase indentation and accumulate a new line.
--------------------------------------------------------------------------------
function M:nlindent ()
self.current_indent = self.current_indent + 1
self:nl ()
end
--------------------------------------------------------------------------------
-- Decrease indentation and accumulate a new line.
--------------------------------------------------------------------------------
function M:nldedent ()
self.current_indent = self.current_indent - 1
self:acc ("\n" .. self.indent_step:rep (self.current_indent))
end
--------------------------------------------------------------------------------
-- Keywords, which are illegal as identifiers.
--------------------------------------------------------------------------------
local keywords_list = {
"and", "break", "do", "else", "elseif",
"end", "false", "for", "function", "if",
"in", "local", "nil", "not", "or",
"repeat", "return", "then", "true", "until",
"while" }
local keywords = { }
for _, kw in pairs(keywords_list) do keywords[kw]=true end
--------------------------------------------------------------------------------
-- Return true iff string `id' is a legal identifier name.
--------------------------------------------------------------------------------
local function is_ident (id)
return string['match'](id, "^[%a_][%w_]*$") and not keywords[id]
end
--------------------------------------------------------------------------------
-- Return true iff ast represents a legal function name for
-- syntax sugar ``function foo.bar.gnat() ... end'':
-- a series of nested string indexes, with an identifier as
-- the innermost node.
--------------------------------------------------------------------------------
local function is_idx_stack (ast)
match ast with
| `Id{ _ } -> return true
| `Index{ left, `String{ _ } } -> return is_idx_stack (left)
| _ -> return false
end
end
--------------------------------------------------------------------------------
-- Operator precedences, in increasing order.
-- This is not directly used, it's used to generate op_prec below.
--------------------------------------------------------------------------------
local op_preprec = {
{ "or", "and" },
{ "lt", "le", "eq", "ne" },
{ "concat" },
{ "add", "sub" },
{ "mul", "div", "mod" },
{ "unary", "not", "len" },
{ "pow" },
{ "index" } }
--------------------------------------------------------------------------------
-- operator --> precedence table, generated from op_preprec.
--------------------------------------------------------------------------------
local op_prec = { }
for prec, ops in ipairs (op_preprec) do
for _, op in ipairs (ops) do
op_prec[op] = prec
end
end
--------------------------------------------------------------------------------
-- operator --> source representation.
--------------------------------------------------------------------------------
local op_symbol = {
add = " + ", sub = " - ", mul = " * ",
div = " / ", mod = " % ", pow = " ^ ",
concat = " .. ", eq = " == ", ne = " ~= ",
lt = " < ", le = " <= ", ["and"] = " and ",
["or"] = " or ", ["not"] = "not ", len = "# " }
--------------------------------------------------------------------------------
-- Accumulate the source representation of AST `node' in
-- the synthetizer. Most of the work is done by delegating to
-- the method having the name of the AST tag.
-- If something can't be converted to normal sources, it's
-- instead dumped as a `-{ ... }' splice in the source accumulator.
--------------------------------------------------------------------------------
function M:node (node)
assert (self~=M and self._acc)
if node==nil then self:acc'<<error>>'
elseif not self.custom_printer or not self.custom_printer (self, node) then
if not node.tag then -- tagless (henceunindented) block.
self:list (node, self.nl)
else
local f = M[node.tag]
if type (f) == "function" then -- Delegate to tag method.
f (self, node, unpack (node))
elseif type (f) == "string" then -- tag string.
self:acc (f)
else -- No appropriate method, fall back to splice dumping.
-- This cannot happen in a plain Lua AST.
self:acc " -{ "
self:acc (pp.tostring (node, {metalua_tag=1, hide_hash=1}), 80)
self:acc " }"
end
end
end
end
function M:block(body)
if not self.custom_printer or not self.custom_printer (self, body) then
self:nlindent ()
self:list (body, self.nl)
self:nldedent ()
end
end
--------------------------------------------------------------------------------
-- Convert every node in the AST list `list' passed as 1st arg.
-- `sep' is an optional separator to be accumulated between each list element,
-- it can be a string or a synth method.
-- `start' is an optional number (default == 1), indicating which is the
-- first element of list to be converted, so that we can skip the begining
-- of a list.
--------------------------------------------------------------------------------
function M:list (list, sep, start)
for i = start or 1, # list do
self:node (list[i])
if list[i + 1] then
if not sep then
elseif type (sep) == "function" then sep (self)
elseif type (sep) == "string" then self:acc (sep)
else error "Invalid list separator" end
end
end
end
--------------------------------------------------------------------------------
--
-- Tag methods.
-- ------------
--
-- Specific AST node dumping methods, associated to their node kinds
-- by their name, which is the corresponding AST tag.
-- synth:node() is in charge of delegating a node's treatment to the
-- appropriate tag method.
--
-- Such tag methods are called with the AST node as 1st arg.
-- As a convenience, the n node's children are passed as args #2 ... n+1.
--
-- There are several things that could be refactored into common subroutines
-- here: statement blocks dumping, function dumping...
-- However, given their small size and linear execution
-- (they basically perform series of :acc(), :node(), :list(),
-- :nl(), :nlindent() and :nldedent() calls), it seems more readable
-- to avoid multiplication of such tiny functions.
--
-- To make sense out of these, you need to know metalua's AST syntax, as
-- found in the reference manual or in metalua/doc/ast.txt.
--
--------------------------------------------------------------------------------
function M:Do (node)
self:acc "do"
self:block (node)
self:acc "end"
end
function M:Set (node)
match node with
| `Set{ { `Index{ lhs, `String{ method } } },
{ `Function{ { `Id "self", ... } == params, body } } }
if is_idx_stack (lhs) and is_ident (method) ->
-- ``function foo:bar(...) ... end'' --
self:acc "function "
self:node (lhs)
self:acc ":"
self:acc (method)
self:acc " ("
self:list (params, ", ", 2)
self:acc ")"
self:block (body)
self:acc "end"
| `Set{ { lhs }, { `Function{ params, body } } } if is_idx_stack (lhs) ->
-- ``function foo(...) ... end'' --
self:acc "function "
self:node (lhs)
self:acc " ("
self:list (params, ", ")
self:acc ")"
self:block (body)
self:acc "end"
| `Set{ { `Id{ lhs1name } == lhs1, ... } == lhs, rhs }
if not is_ident (lhs1name) ->
-- ``foo, ... = ...'' when foo is *not* a valid identifier.
-- In that case, the spliced 1st variable must get parentheses,
-- to be distinguished from a statement splice.
-- This cannot happen in a plain Lua AST.
self:acc "("
self:node (lhs1)
self:acc ")"
if lhs[2] then -- more than one lhs variable
self:acc ", "
self:list (lhs, ", ", 2)
end
self:acc " = "
self:list (rhs, ", ")
| `Set{ lhs, rhs } ->
-- ``... = ...'', no syntax sugar --
self:list (lhs, ", ")
self:acc " = "
self:list (rhs, ", ")
| `Set{ lhs, rhs, annot } ->
-- ``... = ...'', no syntax sugar, annotation --
local n = #lhs
for i=1,n do
local ell, a = lhs[i], annot[i]
self:node (ell)
if a then
self:acc ' #'
self:node(a)
end
if i~=n then self:acc ', ' end
end
self:acc " = "
self:list (rhs, ", ")
end
end
function M:While (node, cond, body)
self:acc "while "
self:node (cond)
self:acc " do"
self:block (body)
self:acc "end"
end
function M:Repeat (node, body, cond)
self:acc "repeat"
self:block (body)
self:acc "until "
self:node (cond)
end
function M:If (node)
for i = 1, #node-1, 2 do
-- for each ``if/then'' and ``elseif/then'' pair --
local cond, body = node[i], node[i+1]
self:acc (i==1 and "if " or "elseif ")
self:node (cond)
self:acc " then"
self:block (body)
end
-- odd number of children --> last one is an `else' clause --
if #node%2 == 1 then
self:acc "else"
self:block (node[#node])
end
self:acc "end"
end
function M:Fornum (node, var, first, last)
local body = node[#node]
self:acc "for "
self:node (var)
self:acc " = "
self:node (first)
self:acc ", "
self:node (last)
if #node==5 then -- 5 children --> child #4 is a step increment.
self:acc ", "
self:node (node[4])
end
self:acc " do"
self:block (body)
self:acc "end"
end
function M:Forin (node, vars, generators, body)
self:acc "for "
self:list (vars, ", ")
self:acc " in "
self:list (generators, ", ")
self:acc " do"
self:block (body)
self:acc "end"
end
function M:Local (node, lhs, rhs, annots)
if next (lhs) then
self:acc "local "
if annots then
local n = #lhs
for i=1, n do
self:node (lhs)
local a = annots[i]
if a then
self:acc ' #'
self:node (a)
end
if i~=n then self:acc ', ' end
end
else
self:list (lhs, ", ")
end
if rhs[1] then
self:acc " = "
self:list (rhs, ", ")
end
else -- Can't create a local statement with 0 variables in plain Lua
self:acc (pp.tostring (node, {metalua_tag=1, hide_hash=1, fix_indent=2}))
end
end
function M:Localrec (node, lhs, rhs)
match node with
| `Localrec{ { `Id{name} }, { `Function{ params, body } } }
if is_ident (name) ->
-- ``local function name() ... end'' --
self:acc "local function "
self:acc (name)
self:acc " ("
self:list (params, ", ")
self:acc ")"
self:block (body)
self:acc "end"
| _ ->
-- Other localrec are unprintable ==> splice them --
-- This cannot happen in a plain Lua AST. --
self:acc "-{ "
self:acc (pp.tostring (node, {metalua_tag=1, hide_hash=1, fix_indent=2}))
self:acc " }"
end
end
function M:Call (node, f)
-- single string or table literal arg ==> no need for parentheses. --
local parens
match node with
| `Call{ _, `String{_} }
| `Call{ _, `Table{...}} -> parens = false
| _ -> parens = true
end
self:node (f)
self:acc (parens and " (" or " ")
self:list (node, ", ", 2) -- skip `f'.
self:acc (parens and ")")
end
function M:Invoke (node, f, method)
-- single string or table literal arg ==> no need for parentheses. --
local parens
match node with
| `Invoke{ _, _, `String{_} }
| `Invoke{ _, _, `Table{...}} -> parens = false
| _ -> parens = true
end
self:node (f)
self:acc ":"
self:acc (method[1])
self:acc (parens and " (" or " ")
self:list (node, ", ", 3) -- Skip args #1 and #2, object and method name.
self:acc (parens and ")")
end
function M:Return (node)
self:acc "return "
self:list (node, ", ")
end
M.Break = "break"
M.Nil = "nil"
M.False = "false"
M.True = "true"
M.Dots = "..."
function M:Number (node, n)
self:acc (tostring (n))
end
function M:String (node, str)
-- format "%q" prints '\n' in an umpractical way IMO,
-- so this is fixed with the :gsub( ) call.
self:acc (string.format ("%q", str):gsub ("\\\n", "\\n"))
end
function M:Function (node, params, body, annots)
self:acc "function ("
if annots then
local n = #params
for i=1,n do
local p, a = params[i], annots[i]
self:node(p)
if annots then
self:acc " #"
self:node(a)
end
if i~=n then self:acc ', ' end
end
else
self:list (params, ", ")
end
self:acc ")"
self:block (body)
self:acc "end"
end
function M:Table (node)
if not node[1] then self:acc "{ }" else
self:acc "{"
if #node > 1 then self:nlindent () else self:acc " " end
for i, elem in ipairs (node) do
match elem with
| `Pair{ `String{ key }, value } if is_ident (key) ->
-- ``key = value''. --
self:acc (key)
self:acc " = "
self:node (value)
| `Pair{ key, value } ->
-- ``[key] = value''. --
self:acc "["
self:node (key)
self:acc "] = "
self:node (value)
| _ ->
-- ``value''. --
self:node (elem)
end
if node [i+1] then
self:acc ","
self:nl ()
end
end
if #node > 1 then self:nldedent () else self:acc " " end
self:acc "}"
end
end
function M:Op (node, op, a, b)
-- Transform ``not (a == b)'' into ``a ~= b''. --
match node with
| `Op{ "not", `Op{ "eq", _a, _b } }
| `Op{ "not", `Paren{ `Op{ "eq", _a, _b } } } ->
op, a, b = "ne", _a, _b
| _ ->
end
if b then -- binary operator.
local left_paren, right_paren
match a with
| `Op{ op_a, ...} if op_prec[op] >= op_prec[op_a] -> left_paren = true
| _ -> left_paren = false
end
match b with -- FIXME: might not work with right assoc operators ^ and ..
| `Op{ op_b, ...} if op_prec[op] >= op_prec[op_b] -> right_paren = true
| _ -> right_paren = false
end
self:acc (left_paren and "(")
self:node (a)
self:acc (left_paren and ")")
self:acc (op_symbol [op])
self:acc (right_paren and "(")
self:node (b)
self:acc (right_paren and ")")
else -- unary operator.
local paren
match a with
| `Op{ op_a, ... } if op_prec[op] >= op_prec[op_a] -> paren = true
| _ -> paren = false
end
self:acc (op_symbol[op])
self:acc (paren and "(")
self:node (a)
self:acc (paren and ")")
end
end
function M:Paren (node, content)
self:acc "("
self:node (content)
self:acc ")"
end
function M:Index (node, table, key)
local paren_table
-- Check precedence, see if parens are needed around the table --
match table with
| `Op{ op, ... } if op_prec[op] < op_prec.index -> paren_table = true
| _ -> paren_table = false
end
self:acc (paren_table and "(")
self:node (table)
self:acc (paren_table and ")")
match key with
| `String{ field } if is_ident (field) ->
-- ``table.key''. --
self:acc "."
self:acc (field)
| _ ->
-- ``table [key]''. --
self:acc "["
self:node (key)
self:acc "]"
end
end
function M:Id (node, name)
if is_ident (name) then
self:acc (name)
else -- Unprintable identifier, fall back to splice representation.
-- This cannot happen in a plain Lua AST.
self:acc "-{`Id "
self:String (node, name)
self:acc "}"
end
end
M.TDyn = '*'
M.TDynbar = '**'
M.TPass = 'pass'
M.TField = 'field'
M.TIdbar = M.TId
M.TReturn = M.Return
function M:TId (node, name) self:acc(name) end
function M:TCatbar(node, te, tebar)
self:acc'('
self:node(te)
self:acc'|'
self:tebar(tebar)
self:acc')'
end
function M:TFunction(node, p, r)
self:tebar(p)
self:acc '->'
self:tebar(r)
end
function M:TTable (node, default, pairs)
self:acc '['
self:list (pairs, ', ')
if default.tag~='TField' then
self:acc '|'
self:node (default)
end
self:acc ']'
end
function M:TPair (node, k, v)
self:node (k)
self:acc '='
self:node (v)
end
function M:TIdbar (node, name)
self :acc (name)
end
function M:TCatbar (node, a, b)
self:node(a)
self:acc ' ++ '
self:node(b)
end
function M:tebar(node)
if node.tag then self:node(node) else
self:acc '('
self:list(node, ', ')
self:acc ')'
end
end
function M:TUnkbar(node, name)
self:acc '~~'
self:acc (name)
end
function M:TUnk(node, name)
self:acc '~'
self:acc (name)
end
for name, tag in pairs{ const='TConst', var='TVar', currently='TCurrently', just='TJust' } do
M[tag] = function(self, node, te)
self:acc (name..' ')
self:node(te)
end
end
return M

View File

@ -0,0 +1,29 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
local compile = require 'metalua.compiler.bytecode.compile'
local ldump = require 'metalua.compiler.bytecode.ldump'
local M = { }
M.ast_to_proto = compile.ast_to_proto
M.proto_to_bytecode = ldump.dump_string
M.proto_to_file = ldump.dump_file
return M

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,448 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2005-2013 Kein-Hong Man, Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Kein-Hong Man - Initial implementation for Lua 5.0, part of Yueliang
-- Fabien Fleutot - Port to Lua 5.1, integration with Metalua
--
-------------------------------------------------------------------------------
--[[--------------------------------------------------------------------
ldump.lua
Save bytecodes in Lua
This file is part of Yueliang.
Copyright (c) 2005 Kein-Hong Man <khman@users.sf.net>
The COPYRIGHT file describes the conditions
under which this software may be distributed.
------------------------------------------------------------------------
[FF] Slightly modified, mainly to produce Lua 5.1 bytecode.
----------------------------------------------------------------------]]
--[[--------------------------------------------------------------------
-- Notes:
-- * LUA_NUMBER (double), byte order (little endian) and some other
-- header values hard-coded; see other notes below...
-- * One significant difference is that instructions are still in table
-- form (with OP/A/B/C/Bx fields) and luaP:Instruction() is needed to
-- convert them into 4-char strings
-- * Deleted:
-- luaU:DumpVector: folded into DumpLines, DumpCode
-- * Added:
-- luaU:endianness() (from lundump.c)
-- luaU:make_setS: create a chunk writer that writes to a string
-- luaU:make_setF: create a chunk writer that writes to a file
-- (lua.h contains a typedef for a Chunkwriter pointer, and
-- a Lua-based implementation exists, writer() in lstrlib.c)
-- luaU:from_double(x): encode double value for writing
-- luaU:from_int(x): encode integer value for writing
-- (error checking is limited for these conversion functions)
-- (double conversion does not support denormals or NaNs)
-- luaU:ttype(o) (from lobject.h)
----------------------------------------------------------------------]]
local luaP = require 'metalua.compiler.bytecode.lopcodes'
local M = { }
local format = { }
format.header = string.dump(function()end):sub(1, 12)
format.little_endian, format.int_size,
format.size_t_size, format.instr_size,
format.number_size, format.integral = format.header:byte(7, 12)
format.little_endian = format.little_endian~=0
format.integral = format.integral ~=0
assert(format.integral or format.number_size==8, "Number format not supported by dumper")
assert(format.little_endian, "Big endian architectures not supported by dumper")
--requires luaP
local luaU = { }
M.luaU = luaU
luaU.format = format
-- constants used by dumper
luaU.LUA_TNIL = 0
luaU.LUA_TBOOLEAN = 1
luaU.LUA_TNUMBER = 3 -- (all in lua.h)
luaU.LUA_TSTRING = 4
luaU.LUA_TNONE = -1
-- definitions for headers of binary files
--luaU.LUA_SIGNATURE = "\27Lua" -- binary files start with "<esc>Lua"
--luaU.VERSION = 81 -- 0x50; last format change was in 5.0
--luaU.FORMAT_VERSION = 0 -- 0 is official version. yeah I know I'm a liar.
-- a multiple of PI for testing native format
-- multiplying by 1E7 gives non-trivial integer values
--luaU.TEST_NUMBER = 3.14159265358979323846E7
--[[--------------------------------------------------------------------
-- Additional functions to handle chunk writing
-- * to use make_setS and make_setF, see test_ldump.lua elsewhere
----------------------------------------------------------------------]]
------------------------------------------------------------------------
-- works like the lobject.h version except that TObject used in these
-- scripts only has a 'value' field, no 'tt' field (native types used)
------------------------------------------------------------------------
function luaU:ttype(o)
local tt = type(o.value)
if tt == "number" then return self.LUA_TNUMBER
elseif tt == "string" then return self.LUA_TSTRING
elseif tt == "nil" then return self.LUA_TNIL
elseif tt == "boolean" then return self.LUA_TBOOLEAN
else
return self.LUA_TNONE -- the rest should not appear
end
end
------------------------------------------------------------------------
-- create a chunk writer that writes to a string
-- * returns the writer function and a table containing the string
-- * to get the final result, look in buff.data
------------------------------------------------------------------------
function luaU:make_setS()
local buff = {}
buff.data = ""
local writer =
function(s, buff) -- chunk writer
if not s then return end
buff.data = buff.data..s
end
return writer, buff
end
------------------------------------------------------------------------
-- create a chunk writer that writes to a file
-- * returns the writer function and a table containing the file handle
-- * if a nil is passed, then writer should close the open file
------------------------------------------------------------------------
function luaU:make_setF(filename)
local buff = {}
buff.h = io.open(filename, "wb")
if not buff.h then return nil end
local writer =
function(s, buff) -- chunk writer
if not buff.h then return end
if not s then buff.h:close(); return end
buff.h:write(s)
end
return writer, buff
end
-----------------------------------------------------------------------
-- converts a IEEE754 double number to an 8-byte little-endian string
-- * luaU:from_double() and luaU:from_int() are from ChunkBake project
-- * supports +/- Infinity, but not denormals or NaNs
-----------------------------------------------------------------------
function luaU:from_double(x)
local function grab_byte(v)
return math.floor(v / 256),
string.char(math.mod(math.floor(v), 256))
end
local sign = 0
if x < 0 then sign = 1; x = -x end
local mantissa, exponent = math.frexp(x)
if x == 0 then -- zero
mantissa, exponent = 0, 0
elseif x == 1/0 then
mantissa, exponent = 0, 2047
else
mantissa = (mantissa * 2 - 1) * math.ldexp(0.5, 53)
exponent = exponent + 1022
end
local v, byte = "" -- convert to bytes
x = mantissa
for i = 1,6 do
x, byte = grab_byte(x); v = v..byte -- 47:0
end
x, byte = grab_byte(exponent * 16 + x); v = v..byte -- 55:48
x, byte = grab_byte(sign * 128 + x); v = v..byte -- 63:56
return v
end
-----------------------------------------------------------------------
-- converts a number to a little-endian 32-bit integer string
-- * input value assumed to not overflow, can be signed/unsigned
-----------------------------------------------------------------------
function luaU:from_int(x, size)
local v = ""
x = math.floor(x)
if x >= 0 then
for i = 1, size do
v = v..string.char(math.mod(x, 256)); x = math.floor(x / 256)
end
else -- x < 0
x = -x
local carry = 1
for i = 1, size do
local c = 255 - math.mod(x, 256) + carry
if c == 256 then c = 0; carry = 1 else carry = 0 end
v = v..string.char(c); x = math.floor(x / 256)
end
end
return v
end
--[[--------------------------------------------------------------------
-- Functions to make a binary chunk
-- * many functions have the size parameter removed, since output is
-- in the form of a string and some sizes are implicit or hard-coded
-- * luaU:DumpVector has been deleted (used in DumpCode & DumpLines)
----------------------------------------------------------------------]]
------------------------------------------------------------------------
-- dump a block of literal bytes
------------------------------------------------------------------------
function luaU:DumpLiteral(s, D) self:DumpBlock(s, D) end
--[[--------------------------------------------------------------------
-- struct DumpState:
-- L -- lua_State (not used in this script)
-- write -- lua_Chunkwriter (chunk writer function)
-- data -- void* (chunk writer context or data already written)
----------------------------------------------------------------------]]
------------------------------------------------------------------------
-- dumps a block of bytes
-- * lua_unlock(D.L), lua_lock(D.L) deleted
------------------------------------------------------------------------
function luaU:DumpBlock(b, D) D.write(b, D.data) end
------------------------------------------------------------------------
-- dumps a single byte
------------------------------------------------------------------------
function luaU:DumpByte(y, D)
self:DumpBlock(string.char(y), D)
end
------------------------------------------------------------------------
-- dumps a signed integer of size `format.int_size` (for int)
------------------------------------------------------------------------
function luaU:DumpInt(x, D)
self:DumpBlock(self:from_int(x, format.int_size), D)
end
------------------------------------------------------------------------
-- dumps an unsigned integer of size `format.size_t_size` (for size_t)
------------------------------------------------------------------------
function luaU:DumpSize(x, D)
self:DumpBlock(self:from_int(x, format.size_t_size), D)
end
------------------------------------------------------------------------
-- dumps a LUA_NUMBER; can be an int or double depending on the VM.
------------------------------------------------------------------------
function luaU:DumpNumber(x, D)
if format.integral then
self:DumpBlock(self:from_int(x, format.number_size), D)
else
self:DumpBlock(self:from_double(x), D)
end
end
------------------------------------------------------------------------
-- dumps a Lua string
------------------------------------------------------------------------
function luaU:DumpString(s, D)
if s == nil then
self:DumpSize(0, D)
else
s = s.."\0" -- include trailing '\0'
self:DumpSize(string.len(s), D)
self:DumpBlock(s, D)
end
end
------------------------------------------------------------------------
-- dumps instruction block from function prototype
------------------------------------------------------------------------
function luaU:DumpCode(f, D)
local n = f.sizecode
self:DumpInt(n, D)
--was DumpVector
for i = 0, n - 1 do
self:DumpBlock(luaP:Instruction(f.code[i]), D)
end
end
------------------------------------------------------------------------
-- dumps local variable names from function prototype
------------------------------------------------------------------------
function luaU:DumpLocals(f, D)
local n = f.sizelocvars
self:DumpInt(n, D)
for i = 0, n - 1 do
-- Dirty temporary fix:
-- `Stat{ } keeps properly count of the number of local vars,
-- but fails to keep score of their debug info (names).
-- It therefore might happen that #f.localvars < f.sizelocvars, or
-- that a variable's startpc and endpc fields are left unset.
-- FIXME: This might not be needed anymore, check the bug report
-- by J. Belmonte.
local var = f.locvars[i]
if not var then break end
-- printf("[DUMPLOCALS] dumping local var #%i = %s", i, table.tostring(var))
self:DumpString(var.varname, D)
self:DumpInt(var.startpc or 0, D)
self:DumpInt(var.endpc or 0, D)
end
end
------------------------------------------------------------------------
-- dumps line information from function prototype
------------------------------------------------------------------------
function luaU:DumpLines(f, D)
local n = f.sizelineinfo
self:DumpInt(n, D)
--was DumpVector
for i = 0, n - 1 do
self:DumpInt(f.lineinfo[i], D) -- was DumpBlock
--print(i, f.lineinfo[i])
end
end
------------------------------------------------------------------------
-- dump upvalue names from function prototype
------------------------------------------------------------------------
function luaU:DumpUpvalues(f, D)
local n = f.sizeupvalues
self:DumpInt(n, D)
for i = 0, n - 1 do
self:DumpString(f.upvalues[i], D)
end
end
------------------------------------------------------------------------
-- dump constant pool from function prototype
-- * nvalue(o) and tsvalue(o) macros removed
------------------------------------------------------------------------
function luaU:DumpConstants(f, D)
local n = f.sizek
self:DumpInt(n, D)
for i = 0, n - 1 do
local o = f.k[i] -- TObject
local tt = self:ttype(o)
assert (tt >= 0)
self:DumpByte(tt, D)
if tt == self.LUA_TNUMBER then
self:DumpNumber(o.value, D)
elseif tt == self.LUA_TSTRING then
self:DumpString(o.value, D)
elseif tt == self.LUA_TBOOLEAN then
self:DumpByte (o.value and 1 or 0, D)
elseif tt == self.LUA_TNIL then
else
assert(false) -- cannot happen
end
end
end
function luaU:DumpProtos (f, D)
local n = f.sizep
assert (n)
self:DumpInt(n, D)
for i = 0, n - 1 do
self:DumpFunction(f.p[i], f.source, D)
end
end
function luaU:DumpDebug(f, D)
self:DumpLines(f, D)
self:DumpLocals(f, D)
self:DumpUpvalues(f, D)
end
------------------------------------------------------------------------
-- dump child function prototypes from function prototype
--FF completely reworked for 5.1 format
------------------------------------------------------------------------
function luaU:DumpFunction(f, p, D)
-- print "Dumping function:"
-- table.print(f, 60)
local source = f.source
if source == p then source = nil end
self:DumpString(source, D)
self:DumpInt(f.lineDefined, D)
self:DumpInt(f.lastLineDefined or 42, D)
self:DumpByte(f.nups, D)
self:DumpByte(f.numparams, D)
self:DumpByte(f.is_vararg, D)
self:DumpByte(f.maxstacksize, D)
self:DumpCode(f, D)
self:DumpConstants(f, D)
self:DumpProtos( f, D)
self:DumpDebug(f, D)
end
------------------------------------------------------------------------
-- dump Lua header section (some sizes hard-coded)
--FF: updated for version 5.1
------------------------------------------------------------------------
function luaU:DumpHeader(D)
self:DumpLiteral(format.header, D)
end
------------------------------------------------------------------------
-- dump function as precompiled chunk
-- * w, data are created from make_setS, make_setF
--FF: suppressed extraneous [L] param
------------------------------------------------------------------------
function luaU:dump (Main, w, data)
local D = {} -- DumpState
D.write = w
D.data = data
self:DumpHeader(D)
self:DumpFunction(Main, nil, D)
-- added: for a chunk writer writing to a file, this final call with
-- nil data is to indicate to the writer to close the file
D.write(nil, D.data)
end
------------------------------------------------------------------------
-- find byte order (from lundump.c)
-- * hard-coded to little-endian
------------------------------------------------------------------------
function luaU:endianness()
return 1
end
-- FIXME: ugly concat-base generation in [make_setS], bufferize properly!
function M.dump_string (proto)
local writer, buff = luaU:make_setS()
luaU:dump (proto, writer, buff)
return buff.data
end
-- FIXME: [make_setS] sucks, perform synchronous file writing
-- Now unused
function M.dump_file (proto, filename)
local writer, buff = luaU:make_setS()
luaU:dump (proto, writer, buff)
local file = io.open (filename, "wb")
file:write (buff.data)
io.close(file)
--if UNIX_SHARPBANG then os.execute ("chmod a+x "..filename) end
end
return M

View File

@ -0,0 +1,442 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2005-2013 Kein-Hong Man, Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Kein-Hong Man - Initial implementation for Lua 5.0, part of Yueliang
-- Fabien Fleutot - Port to Lua 5.1, integration with Metalua
--
-------------------------------------------------------------------------------
--[[--------------------------------------------------------------------
$Id$
lopcodes.lua
Lua 5 virtual machine opcodes in Lua
This file is part of Yueliang.
Copyright (c) 2005 Kein-Hong Man <khman@users.sf.net>
The COPYRIGHT file describes the conditions
under which this software may be distributed.
See the ChangeLog for more information.
------------------------------------------------------------------------
[FF] Slightly modified, mainly to produce Lua 5.1 bytecode.
----------------------------------------------------------------------]]
--[[--------------------------------------------------------------------
-- Notes:
-- * an Instruction is a table with OP, A, B, C, Bx elements; this
-- should allow instruction handling to work with doubles and ints
-- * Added:
-- luaP:Instruction(i): convert field elements to a 4-char string
-- luaP:DecodeInst(x): convert 4-char string into field elements
-- * WARNING luaP:Instruction outputs instructions encoded in little-
-- endian form and field size and positions are hard-coded
----------------------------------------------------------------------]]
local function debugf() end
local luaP = { }
--[[
===========================================================================
We assume that instructions are unsigned numbers.
All instructions have an opcode in the first 6 bits.
Instructions can have the following fields:
'A' : 8 bits
'B' : 9 bits
'C' : 9 bits
'Bx' : 18 bits ('B' and 'C' together)
'sBx' : signed Bx
A signed argument is represented in excess K; that is, the number
value is the unsigned value minus K. K is exactly the maximum value
for that argument (so that -max is represented by 0, and +max is
represented by 2*max), which is half the maximum for the corresponding
unsigned argument.
===========================================================================
--]]
luaP.OpMode = {"iABC", "iABx", "iAsBx"} -- basic instruction format
------------------------------------------------------------------------
-- size and position of opcode arguments.
-- * WARNING size and position is hard-coded elsewhere in this script
------------------------------------------------------------------------
luaP.SIZE_C = 9
luaP.SIZE_B = 9
luaP.SIZE_Bx = luaP.SIZE_C + luaP.SIZE_B
luaP.SIZE_A = 8
luaP.SIZE_OP = 6
luaP.POS_C = luaP.SIZE_OP
luaP.POS_B = luaP.POS_C + luaP.SIZE_C
luaP.POS_Bx = luaP.POS_C
luaP.POS_A = luaP.POS_B + luaP.SIZE_B
--FF from 5.1
luaP.BITRK = 2^(luaP.SIZE_B - 1)
function luaP:ISK(x) return x >= self.BITRK end
luaP.MAXINDEXRK = luaP.BITRK - 1
function luaP:RKASK(x)
if x < self.BITRK then return x+self.BITRK else return x end
end
------------------------------------------------------------------------
-- limits for opcode arguments.
-- we use (signed) int to manipulate most arguments,
-- so they must fit in BITS_INT-1 bits (-1 for sign)
------------------------------------------------------------------------
-- removed "#if SIZE_Bx < BITS_INT-1" test, assume this script is
-- running on a Lua VM with double or int as LUA_NUMBER
luaP.MAXARG_Bx = math.ldexp(1, luaP.SIZE_Bx) - 1
luaP.MAXARG_sBx = math.floor(luaP.MAXARG_Bx / 2) -- 'sBx' is signed
luaP.MAXARG_A = math.ldexp(1, luaP.SIZE_A) - 1
luaP.MAXARG_B = math.ldexp(1, luaP.SIZE_B) - 1
luaP.MAXARG_C = math.ldexp(1, luaP.SIZE_C) - 1
-- creates a mask with 'n' 1 bits at position 'p'
-- MASK1(n,p) deleted
-- creates a mask with 'n' 0 bits at position 'p'
-- MASK0(n,p) deleted
--[[--------------------------------------------------------------------
Visual representation for reference:
31 | | | 0 bit position
+-----+-----+-----+----------+
| B | C | A | Opcode | iABC format
+-----+-----+-----+----------+
- 9 - 9 - 8 - 6 - field sizes
+-----+-----+-----+----------+
| [s]Bx | A | Opcode | iABx | iAsBx format
+-----+-----+-----+----------+
----------------------------------------------------------------------]]
------------------------------------------------------------------------
-- the following macros help to manipulate instructions
-- * changed to a table object representation, very clean compared to
-- the [nightmare] alternatives of using a number or a string
------------------------------------------------------------------------
-- these accept or return opcodes in the form of string names
function luaP:GET_OPCODE(i) return self.ROpCode[i.OP] end
function luaP:SET_OPCODE(i, o) i.OP = self.OpCode[o] end
function luaP:GETARG_A(i) return i.A end
function luaP:SETARG_A(i, u) i.A = u end
function luaP:GETARG_B(i) return i.B end
function luaP:SETARG_B(i, b) i.B = b end
function luaP:GETARG_C(i) return i.C end
function luaP:SETARG_C(i, b) i.C = b end
function luaP:GETARG_Bx(i) return i.Bx end
function luaP:SETARG_Bx(i, b) i.Bx = b end
function luaP:GETARG_sBx(i) return i.Bx - self.MAXARG_sBx end
function luaP:SETARG_sBx(i, b) i.Bx = b + self.MAXARG_sBx end
function luaP:CREATE_ABC(o,a,b,c)
return {OP = self.OpCode[o], A = a, B = b, C = c}
end
function luaP:CREATE_ABx(o,a,bc)
return {OP = self.OpCode[o], A = a, Bx = bc}
end
------------------------------------------------------------------------
-- Bit shuffling stuffs
------------------------------------------------------------------------
if false and pcall (require, 'bit') then
------------------------------------------------------------------------
-- Return a 4-char string little-endian encoded form of an instruction
------------------------------------------------------------------------
function luaP:Instruction(i)
--FIXME
end
else
------------------------------------------------------------------------
-- Version without bit manipulation library.
------------------------------------------------------------------------
local p2 = {1,2,4,8,16,32,64,128,256, 512, 1024, 2048, 4096}
-- keeps [n] bits from [x]
local function keep (x, n) return x % p2[n+1] end
-- shifts bits of [x] [n] places to the right
local function srb (x,n) return math.floor (x / p2[n+1]) end
-- shifts bits of [x] [n] places to the left
local function slb (x,n) return x * p2[n+1] end
------------------------------------------------------------------------
-- Return a 4-char string little-endian encoded form of an instruction
------------------------------------------------------------------------
function luaP:Instruction(i)
-- printf("Instr->string: %s %s", self.opnames[i.OP], table.tostring(i))
local c0, c1, c2, c3
-- change to OP/A/B/C format if needed
if i.Bx then i.C = keep (i.Bx, 9); i.B = srb (i.Bx, 9) end
-- c0 = 6B from opcode + 2LSB from A (flushed to MSB)
c0 = i.OP + slb (keep (i.A, 2), 6)
-- c1 = 6MSB from A + 2LSB from C (flushed to MSB)
c1 = srb (i.A, 2) + slb (keep (i.C, 2), 6)
-- c2 = 7MSB from C + 1LSB from B (flushed to MSB)
c2 = srb (i.C, 2) + slb (keep (i.B, 1), 7)
-- c3 = 8MSB from B
c3 = srb (i.B, 1)
--printf ("Instruction: %s %s", self.opnames[i.OP], tostringv (i))
--printf ("Bin encoding: %x %x %x %x", c0, c1, c2, c3)
return string.char(c0, c1, c2, c3)
end
end
------------------------------------------------------------------------
-- decodes a 4-char little-endian string into an instruction struct
------------------------------------------------------------------------
function luaP:DecodeInst(x)
error "Not implemented"
end
------------------------------------------------------------------------
-- invalid register that fits in 8 bits
------------------------------------------------------------------------
luaP.NO_REG = luaP.MAXARG_A
------------------------------------------------------------------------
-- R(x) - register
-- Kst(x) - constant (in constant table)
-- RK(x) == if x < MAXSTACK then R(x) else Kst(x-MAXSTACK)
------------------------------------------------------------------------
------------------------------------------------------------------------
-- grep "ORDER OP" if you change these enums
------------------------------------------------------------------------
--[[--------------------------------------------------------------------
Lua virtual machine opcodes (enum OpCode):
------------------------------------------------------------------------
name args description
------------------------------------------------------------------------
OP_MOVE A B R(A) := R(B)
OP_LOADK A Bx R(A) := Kst(Bx)
OP_LOADBOOL A B C R(A) := (Bool)B; if (C) PC++
OP_LOADNIL A B R(A) := ... := R(B) := nil
OP_GETUPVAL A B R(A) := UpValue[B]
OP_GETGLOBAL A Bx R(A) := Gbl[Kst(Bx)]
OP_GETTABLE A B C R(A) := R(B)[RK(C)]
OP_SETGLOBAL A Bx Gbl[Kst(Bx)] := R(A)
OP_SETUPVAL A B UpValue[B] := R(A)
OP_SETTABLE A B C R(A)[RK(B)] := RK(C)
OP_NEWTABLE A B C R(A) := {} (size = B,C)
OP_SELF A B C R(A+1) := R(B); R(A) := R(B)[RK(C)]
OP_ADD A B C R(A) := RK(B) + RK(C)
OP_SUB A B C R(A) := RK(B) - RK(C)
OP_MUL A B C R(A) := RK(B) * RK(C)
OP_DIV A B C R(A) := RK(B) / RK(C)
OP_POW A B C R(A) := RK(B) ^ RK(C)
OP_UNM A B R(A) := -R(B)
OP_NOT A B R(A) := not R(B)
OP_CONCAT A B C R(A) := R(B).. ... ..R(C)
OP_JMP sBx PC += sBx
OP_EQ A B C if ((RK(B) == RK(C)) ~= A) then pc++
OP_LT A B C if ((RK(B) < RK(C)) ~= A) then pc++
OP_LE A B C if ((RK(B) <= RK(C)) ~= A) then pc++
OP_TEST A B C if (R(B) <=> C) then R(A) := R(B) else pc++
OP_CALL A B C R(A), ... ,R(A+C-2) := R(A)(R(A+1), ... ,R(A+B-1))
OP_TAILCALL A B C return R(A)(R(A+1), ... ,R(A+B-1))
OP_RETURN A B return R(A), ... ,R(A+B-2) (see note)
OP_FORLOOP A sBx R(A)+=R(A+2); if R(A) <?= R(A+1) then PC+= sBx
OP_TFORLOOP A C R(A+2), ... ,R(A+2+C) := R(A)(R(A+1), R(A+2));
if R(A+2) ~= nil then pc++
OP_TFORPREP A sBx if type(R(A)) == table then R(A+1):=R(A), R(A):=next;
PC += sBx
OP_SETLIST A Bx R(A)[Bx-Bx%FPF+i] := R(A+i), 1 <= i <= Bx%FPF+1
OP_SETLISTO A Bx (see note)
OP_CLOSE A close all variables in the stack up to (>=) R(A)
OP_CLOSURE A Bx R(A) := closure(KPROTO[Bx], R(A), ... ,R(A+n))
----------------------------------------------------------------------]]
luaP.opnames = {} -- opcode names
luaP.OpCode = {} -- lookup name -> number
luaP.ROpCode = {} -- lookup number -> name
local i = 0
for v in string.gfind([[
MOVE -- 0
LOADK
LOADBOOL
LOADNIL
GETUPVAL
GETGLOBAL -- 5
GETTABLE
SETGLOBAL
SETUPVAL
SETTABLE
NEWTABLE -- 10
SELF
ADD
SUB
MUL
DIV -- 15
MOD
POW
UNM
NOT
LEN -- 20
CONCAT
JMP
EQ
LT
LE -- 25
TEST
TESTSET
CALL
TAILCALL
RETURN -- 30
FORLOOP
FORPREP
TFORLOOP
SETLIST
CLOSE -- 35
CLOSURE
VARARG
]], "[%a]+") do
local n = "OP_"..v
luaP.opnames[i] = v
luaP.OpCode[n] = i
luaP.ROpCode[i] = n
i = i + 1
end
luaP.NUM_OPCODES = i
--[[
===========================================================================
Notes:
(1) In OP_CALL, if (B == 0) then B = top. C is the number of returns - 1,
and can be 0: OP_CALL then sets 'top' to last_result+1, so
next open instruction (OP_CALL, OP_RETURN, OP_SETLIST) may use 'top'.
(2) In OP_RETURN, if (B == 0) then return up to 'top'
(3) For comparisons, B specifies what conditions the test should accept.
(4) All 'skips' (pc++) assume that next instruction is a jump
(5) OP_SETLISTO is used when the last item in a table constructor is a
function, so the number of elements set is up to top of stack
===========================================================================
--]]
------------------------------------------------------------------------
-- masks for instruction properties
------------------------------------------------------------------------
-- was enum OpModeMask:
luaP.OpModeBreg = 2 -- B is a register
luaP.OpModeBrk = 3 -- B is a register/constant
luaP.OpModeCrk = 4 -- C is a register/constant
luaP.OpModesetA = 5 -- instruction set register A
luaP.OpModeK = 6 -- Bx is a constant
luaP.OpModeT = 1 -- operator is a test
------------------------------------------------------------------------
-- get opcode mode, e.g. "iABC"
------------------------------------------------------------------------
function luaP:getOpMode(m)
--printv(m)
--printv(self.OpCode[m])
--printv(self.opmodes [self.OpCode[m]+1])
return self.OpMode[tonumber(string.sub(self.opmodes[self.OpCode[m] + 1], 7, 7))]
end
------------------------------------------------------------------------
-- test an instruction property flag
-- * b is a string, e.g. "OpModeBreg"
------------------------------------------------------------------------
function luaP:testOpMode(m, b)
return (string.sub(self.opmodes[self.OpCode[m] + 1], self[b], self[b]) == "1")
end
-- number of list items to accumulate before a SETLIST instruction
-- (must be a power of 2)
-- * used in lparser, lvm, ldebug, ltests
luaP.LFIELDS_PER_FLUSH = 50 --FF updated to match 5.1
-- luaP_opnames[] is set above, as the luaP.opnames table
-- opmode(t,b,bk,ck,sa,k,m) deleted
--[[--------------------------------------------------------------------
Legend for luaP:opmodes:
1 T -> T (is a test?)
2 B -> B is a register
3 b -> B is an RK register/constant combination
4 C -> C is an RK register/constant combination
5 A -> register A is set by the opcode
6 K -> Bx is a constant
7 m -> 1 if iABC layout,
2 if iABx layout,
3 if iAsBx layout
----------------------------------------------------------------------]]
luaP.opmodes = {
-- TBbCAKm opcode
"0100101", -- OP_MOVE 0
"0000112", -- OP_LOADK
"0000101", -- OP_LOADBOOL
"0100101", -- OP_LOADNIL
"0000101", -- OP_GETUPVAL
"0000112", -- OP_GETGLOBAL 5
"0101101", -- OP_GETTABLE
"0000012", -- OP_SETGLOBAL
"0000001", -- OP_SETUPVAL
"0011001", -- OP_SETTABLE
"0000101", -- OP_NEWTABLE 10
"0101101", -- OP_SELF
"0011101", -- OP_ADD
"0011101", -- OP_SUB
"0011101", -- OP_MUL
"0011101", -- OP_DIV 15
"0011101", -- OP_MOD
"0011101", -- OP_POW
"0100101", -- OP_UNM
"0100101", -- OP_NOT
"0100101", -- OP_LEN 20
"0101101", -- OP_CONCAT
"0000003", -- OP_JMP
"1011001", -- OP_EQ
"1011001", -- OP_LT
"1011001", -- OP_LE 25
"1000101", -- OP_TEST
"1100101", -- OP_TESTSET
"0000001", -- OP_CALL
"0000001", -- OP_TAILCALL
"0000001", -- OP_RETURN 30
"0000003", -- OP_FORLOOP
"0000103", -- OP_FORPREP
"1000101", -- OP_TFORLOOP
"0000001", -- OP_SETLIST
"0000001", -- OP_CLOSE 35
"0000102", -- OP_CLOSURE
"0000101" -- OP_VARARG
}
return luaP

View File

@ -0,0 +1,86 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
--*-lua-*-----------------------------------------------------------------------
-- Override Lua's default compilation functions, so that they support Metalua
-- rather than only plain Lua
--------------------------------------------------------------------------------
local mlc = require 'metalua.compiler'
local M = { }
-- Original versions
local original_lua_versions = {
load = load,
loadfile = loadfile,
loadstring = loadstring,
dofile = dofile }
local lua_loadstring = loadstring
local lua_loadfile = loadfile
function M.loadstring(str, name)
if type(str) ~= 'string' then error 'string expected' end
if str:match '^\027LuaQ' then return lua_loadstring(str) end
local n = str:match '^#![^\n]*\n()'
if n then str=str:sub(n, -1) end
-- FIXME: handle erroneous returns (return nil + error msg)
return mlc.new():src_to_function(str, name)
end
function M.loadfile(filename)
local f, err_msg = io.open(filename, 'rb')
if not f then return nil, err_msg end
local success, src = pcall( f.read, f, '*a')
pcall(f.close, f)
if success then return M.loadstring (src, '@'..filename)
else return nil, src end
end
function M.load(f, name)
local acc = { }
while true do
local x = f()
if not x then break end
assert(type(x)=='string', "function passed to load() must return strings")
table.insert(acc, x)
end
return M.loadstring(table.concat(acc))
end
function M.dostring(src)
local f, msg = M.loadstring(src)
if not f then error(msg) end
return f()
end
function M.dofile(name)
local f, msg = M.loadfile(name)
if not f then error(msg) end
return f()
end
-- Export replacement functions as globals
for name, f in pairs(M) do _G[name] = f end
-- To be done *after* exportation
M.lua = original_lua_versions
return M

View File

@ -0,0 +1,42 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
-- Export all public APIs from sub-modules, squashed into a flat spacename
local MT = { __type='metalua.compiler.parser' }
local MODULE_REL_NAMES = { "annot.grammar", "expr", "meta", "misc",
"stat", "table", "ext" }
local function new()
local M = {
lexer = require "metalua.compiler.parser.lexer" ();
extensions = { } }
for _, rel_name in ipairs(MODULE_REL_NAMES) do
local abs_name = "metalua.compiler.parser."..rel_name
local extender = require (abs_name)
if not M.extensions[abs_name] then
if type (extender) == 'function' then extender(M) end
M.extensions[abs_name] = extender
end
end
return setmetatable(M, MT)
end
return { new = new }

View File

@ -0,0 +1,48 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
local checks = require 'checks'
local gg = require 'metalua.grammar.generator'
local M = { }
function M.opt(mlc, primary, a_type)
checks('table', 'table|function', 'string')
return gg.sequence{
primary,
gg.onkeyword{ "#", function() return assert(mlc.annot[a_type]) end },
builder = function(x)
local t, annot = unpack(x)
return annot and { tag='Annot', t, annot } or t
end }
end
-- split a list of "foo" and "`Annot{foo, annot}" into a list of "foo"
-- and a list of "annot".
-- No annot list is returned if none of the elements were annotated.
function M.split(lst)
local x, a, some = { }, { }, false
for i, p in ipairs(lst) do
if p.tag=='Annot' then
some, x[i], a[i] = true, unpack(p)
else x[i] = p end
end
if some then return x, a else return lst end
end
return M

View File

@ -0,0 +1,112 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
local gg = require 'metalua.grammar.generator'
return function(M)
local _M = gg.future(M)
M.lexer :add '->'
local A = { }
local _A = gg.future(A)
M.annot = A
-- Type identifier: Lua keywords such as `"nil"` allowed.
function M.annot.tid(lx)
local w = lx :next()
local t = w.tag
if t=='Keyword' and w[1] :match '^[%a_][%w_]*$' or w.tag=='Id'
then return {tag='TId'; lineinfo=w.lineinfo; w[1]}
else return gg.parse_error (lx, 'tid expected') end
end
local field_types = { var='TVar'; const='TConst';
currently='TCurrently'; field='TField' }
-- TODO check lineinfo
function M.annot.tf(lx)
local tk = lx:next()
local w = tk[1]
local tag = field_types[w]
if not tag then error ('Invalid field type '..w)
elseif tag=='TField' then return {tag='TField'} else
local te = M.te(lx)
return {tag=tag; te}
end
end
M.annot.tebar_content = gg.list{
name = 'tebar content',
primary = _A.te,
separators = { ",", ";" },
terminators = ")" }
M.annot.tebar = gg.multisequence{
name = 'annot.tebar',
--{ '*', builder = 'TDynbar' }, -- maybe not user-available
{ '(', _A.tebar_content, ')',
builder = function(x) return x[1] end },
{ _A.te }
}
M.annot.te = gg.multisequence{
name = 'annot.te',
{ _A.tid, builder=function(x) return x[1] end },
{ '*', builder = 'TDyn' },
{ "[",
gg.list{
primary = gg.sequence{
_M.expr, "=", _A.tf,
builder = 'TPair'
},
separators = { ",", ";" },
terminators = { "]", "|" } },
gg.onkeyword{ "|", _A.tf },
"]",
builder = function(x)
local fields, other = unpack(x)
return { tag='TTable', other or {tag='TField'}, fields }
end }, -- "[ ... ]"
{ '(', _A.tebar_content, ')', '->', '(', _A.tebar_content, ')',
builder = function(x)
local p, r = unpack(x)
return {tag='TFunction', p, r }
end } }
M.annot.ts = gg.multisequence{
name = 'annot.ts',
{ 'return', _A.tebar_content, builder='TReturn' },
{ _A.tid, builder = function(x)
if x[1][1]=='pass' then return {tag='TPass'}
else error "Bad statement type" end
end } }
-- TODO: add parsers for statements:
-- #return tebar
-- #alias = te
-- #ell = tf
--[[
M.annot.stat_annot = gg.sequence{
gg.list{ primary=_A.tid, separators='.' },
'=',
XXX??,
builder = 'Annot' }
--]]
return M.annot
end

View File

@ -0,0 +1,206 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--
-- Exported API:
-- * [mlp.expr()]
-- * [mlp.expr_list()]
-- * [mlp.func_val()]
--
-------------------------------------------------------------------------------
local pp = require 'metalua.pprint'
local gg = require 'metalua.grammar.generator'
local annot = require 'metalua.compiler.parser.annot.generator'
return function(M)
local _M = gg.future(M)
local _table = gg.future(M, 'table')
local _meta = gg.future(M, 'meta') -- TODO move to ext?
local _annot = gg.future(M, 'annot') -- TODO move to annot
--------------------------------------------------------------------------------
-- Non-empty expression list. Actually, this isn't used here, but that's
-- handy to give to users.
--------------------------------------------------------------------------------
M.expr_list = gg.list{ primary=_M.expr, separators="," }
--------------------------------------------------------------------------------
-- Helpers for function applications / method applications
--------------------------------------------------------------------------------
M.func_args_content = gg.list{
name = "function arguments",
primary = _M.expr,
separators = ",",
terminators = ")" }
-- Used to parse methods
M.method_args = gg.multisequence{
name = "function argument(s)",
{ "{", _table.content, "}" },
{ "(", _M.func_args_content, ")", builder = unpack },
{ "+{", _meta.quote_content, "}" },
-- TODO lineinfo?
function(lx) local r = M.opt_string(lx); return r and {r} or { } end }
--------------------------------------------------------------------------------
-- [func_val] parses a function, from opening parameters parenthese to
-- "end" keyword included. Used for anonymous functions as well as
-- function declaration statements (both local and global).
--
-- It's wrapped in a [_func_val] eta expansion, so that when expr
-- parser uses the latter, they will notice updates of [func_val]
-- definitions.
--------------------------------------------------------------------------------
M.func_params_content = gg.list{
name="function parameters",
gg.multisequence{ { "...", builder = "Dots" }, annot.opt(M, _M.id, 'te') },
separators = ",", terminators = {")", "|"} }
-- TODO move to annot
M.func_val = gg.sequence{
name = "function body",
"(", _M.func_params_content, ")", _M.block, "end",
builder = function(x)
local params, body = unpack(x)
local annots, some = { }, false
for i, p in ipairs(params) do
if p.tag=='Annot' then
params[i], annots[i], some = p[1], p[2], true
else annots[i] = false end
end
if some then return { tag='Function', params, body, annots }
else return { tag='Function', params, body } end
end }
local func_val = function(lx) return M.func_val(lx) end
--------------------------------------------------------------------------------
-- Default parser for primary expressions
--------------------------------------------------------------------------------
function M.id_or_literal (lx)
local a = lx:next()
if a.tag~="Id" and a.tag~="String" and a.tag~="Number" then
local msg
if a.tag=='Eof' then
msg = "End of file reached when an expression was expected"
elseif a.tag=='Keyword' then
msg = "An expression was expected, and `"..a[1]..
"' can't start an expression"
else
msg = "Unexpected expr token " .. pp.tostring (a)
end
gg.parse_error (lx, msg)
end
return a
end
--------------------------------------------------------------------------------
-- Builder generator for operators. Wouldn't be worth it if "|x|" notation
-- were allowed, but then lua 5.1 wouldn't compile it
--------------------------------------------------------------------------------
-- opf1 = |op| |_,a| `Op{ op, a }
local function opf1 (op) return
function (_,a) return { tag="Op", op, a } end end
-- opf2 = |op| |a,_,b| `Op{ op, a, b }
local function opf2 (op) return
function (a,_,b) return { tag="Op", op, a, b } end end
-- opf2r = |op| |a,_,b| `Op{ op, b, a } -- (args reversed)
local function opf2r (op) return
function (a,_,b) return { tag="Op", op, b, a } end end
local function op_ne(a, _, b)
-- This version allows to remove the "ne" operator from the AST definition.
-- However, it doesn't always produce the exact same bytecode as Lua 5.1.
return { tag="Op", "not",
{ tag="Op", "eq", a, b, lineinfo= {
first = a.lineinfo.first, last = b.lineinfo.last } } }
end
--------------------------------------------------------------------------------
--
-- complete expression
--
--------------------------------------------------------------------------------
-- FIXME: set line number. In [expr] transformers probably
M.expr = gg.expr {
name = "expression",
primary = gg.multisequence{
name = "expr primary",
{ "(", _M.expr, ")", builder = "Paren" },
{ "function", _M.func_val, builder = unpack },
{ "-{", _meta.splice_content, "}", builder = unpack },
{ "+{", _meta.quote_content, "}", builder = unpack },
{ "nil", builder = "Nil" },
{ "true", builder = "True" },
{ "false", builder = "False" },
{ "...", builder = "Dots" },
{ "{", _table.content, "}", builder = unpack },
_M.id_or_literal },
infix = {
name = "expr infix op",
{ "+", prec = 60, builder = opf2 "add" },
{ "-", prec = 60, builder = opf2 "sub" },
{ "*", prec = 70, builder = opf2 "mul" },
{ "/", prec = 70, builder = opf2 "div" },
{ "%", prec = 70, builder = opf2 "mod" },
{ "^", prec = 90, builder = opf2 "pow", assoc = "right" },
{ "..", prec = 40, builder = opf2 "concat", assoc = "right" },
{ "==", prec = 30, builder = opf2 "eq" },
{ "~=", prec = 30, builder = op_ne },
{ "<", prec = 30, builder = opf2 "lt" },
{ "<=", prec = 30, builder = opf2 "le" },
{ ">", prec = 30, builder = opf2r "lt" },
{ ">=", prec = 30, builder = opf2r "le" },
{ "and",prec = 20, builder = opf2 "and" },
{ "or", prec = 10, builder = opf2 "or" } },
prefix = {
name = "expr prefix op",
{ "not", prec = 80, builder = opf1 "not" },
{ "#", prec = 80, builder = opf1 "len" },
{ "-", prec = 80, builder = opf1 "unm" } },
suffix = {
name = "expr suffix op",
{ "[", _M.expr, "]", builder = function (tab, idx)
return {tag="Index", tab, idx[1]} end},
{ ".", _M.id, builder = function (tab, field)
return {tag="Index", tab, _M.id2string(field[1])} end },
{ "(", _M.func_args_content, ")", builder = function(f, args)
return {tag="Call", f, unpack(args[1])} end },
{ "{", _table.content, "}", builder = function (f, arg)
return {tag="Call", f, arg[1]} end},
{ ":", _M.id, _M.method_args, builder = function (obj, post)
local m_name, args = unpack(post)
return {tag="Invoke", obj, _M.id2string(m_name), unpack(args)} end},
{ "+{", _meta.quote_content, "}", builder = function (f, arg)
return {tag="Call", f, arg[1] } end },
default = { name="opt_string_arg", parse = _M.opt_string, builder = function(f, arg)
return {tag="Call", f, arg } end } } }
return M
end

View File

@ -0,0 +1,96 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Non-Lua syntax extensions
--
--------------------------------------------------------------------------------
local gg = require 'metalua.grammar.generator'
return function(M)
local _M = gg.future(M)
---------------------------------------------------------------------------
-- Algebraic Datatypes
----------------------------------------------------------------------------
local function adt (lx)
local node = _M.id (lx)
local tagval = node[1]
-- tagkey = `Pair{ `String "key", `String{ -{tagval} } }
local tagkey = { tag="Pair", {tag="String", "tag"}, {tag="String", tagval} }
if lx:peek().tag == "String" or lx:peek().tag == "Number" then
-- TODO support boolean litterals
return { tag="Table", tagkey, lx:next() }
elseif lx:is_keyword (lx:peek(), "{") then
local x = M.table.table (lx)
table.insert (x, 1, tagkey)
return x
else return { tag="Table", tagkey } end
end
M.adt = gg.sequence{ "`", adt, builder = unpack }
M.expr.primary :add(M.adt)
----------------------------------------------------------------------------
-- Anonymous lambda
----------------------------------------------------------------------------
M.lambda_expr = gg.sequence{
"|", _M.func_params_content, "|", _M.expr,
builder = function (x)
local li = x[2].lineinfo
return { tag="Function", x[1],
{ {tag="Return", x[2], lineinfo=li }, lineinfo=li } }
end }
M.expr.primary :add (M.lambda_expr)
--------------------------------------------------------------------------------
-- Allows to write "a `f` b" instead of "f(a, b)". Taken from Haskell.
--------------------------------------------------------------------------------
function M.expr_in_backquotes (lx) return M.expr(lx, 35) end -- 35=limited precedence
M.expr.infix :add{ name = "infix function",
"`", _M.expr_in_backquotes, "`", prec = 35, assoc="left",
builder = function(a, op, b) return {tag="Call", op[1], a, b} end }
--------------------------------------------------------------------------------
-- C-style op+assignments
-- TODO: no protection against side-effects in LHS vars.
--------------------------------------------------------------------------------
local function op_assign(kw, op)
local function rhs(a, b) return { tag="Op", op, a, b } end
local function f(a,b)
if #a ~= #b then gg.parse_error "assymetric operator+assignment" end
local right = { }
local r = { tag="Set", a, right }
for i=1, #a do right[i] = { tag="Op", op, a[i], b[i] } end
return r
end
M.lexer :add (kw)
M.assignments[kw] = f
end
local ops = { add='+='; sub='-='; mul='*='; div='/=' }
for ast_op_name, keyword in pairs(ops) do op_assign(keyword, ast_op_name) end
return M
end

View File

@ -0,0 +1,43 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2014 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
----------------------------------------------------------------------
-- Generate a new lua-specific lexer, derived from the generic lexer.
----------------------------------------------------------------------
local generic_lexer = require 'metalua.grammar.lexer'
return function()
local lexer = generic_lexer.lexer :clone()
local keywords = {
"and", "break", "do", "else", "elseif",
"end", "false", "for", "function",
"goto", -- Lua5.2
"if",
"in", "local", "nil", "not", "or", "repeat",
"return", "then", "true", "until", "while",
"...", "..", "==", ">=", "<=", "~=",
"::", -- Lua5,2
"+{", "-{" } -- Metalua
for _, w in ipairs(keywords) do lexer :add (w) end
return lexer
end

View File

@ -0,0 +1,138 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2014 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-- Compile-time metaprogramming features: splicing ASTs generated during compilation,
-- AST quasi-quoting helpers.
local gg = require 'metalua.grammar.generator'
return function(M)
local _M = gg.future(M)
M.meta={ }
local _MM = gg.future(M.meta)
--------------------------------------------------------------------------------
-- External splicing: compile an AST into a chunk, load and evaluate
-- that chunk, and replace the chunk by its result (which must also be
-- an AST).
--------------------------------------------------------------------------------
-- TODO: that's not part of the parser
function M.meta.eval (ast)
-- TODO: should there be one mlc per splice, or per parser instance?
local mlc = require 'metalua.compiler'.new()
local f = mlc :ast_to_function (ast, '=splice')
local result=f(M) -- splices act on the current parser
return result
end
----------------------------------------------------------------------------
-- Going from an AST to an AST representing that AST
-- the only hash-part key being lifted is `"tag"`.
-- Doesn't lift subtrees protected inside a `Splice{ ... }.
-- e.g. change `Foo{ 123 } into
-- `Table{ `Pair{ `String "tag", `String "foo" }, `Number 123 }
----------------------------------------------------------------------------
local function lift (t)
--print("QUOTING:", table.tostring(t, 60,'nohash'))
local cases = { }
function cases.table (t)
local mt = { tag = "Table" }
--table.insert (mt, { tag = "Pair", quote "quote", { tag = "True" } })
if t.tag == "Splice" then
assert (#t==1, "Invalid splice")
local sp = t[1]
return sp
elseif t.tag then
table.insert (mt, { tag="Pair", lift "tag", lift(t.tag) })
end
for _, v in ipairs (t) do
table.insert (mt, lift(v))
end
return mt
end
function cases.number (t) return { tag = "Number", t, quote = true } end
function cases.string (t) return { tag = "String", t, quote = true } end
function cases.boolean (t) return { tag = t and "True" or "False", t, quote = true } end
local f = cases [type(t)]
if f then return f(t) else error ("Cannot quote an AST containing "..tostring(t)) end
end
M.meta.lift = lift
--------------------------------------------------------------------------------
-- when this variable is false, code inside [-{...}] is compiled and
-- avaluated immediately. When it's true (supposedly when we're
-- parsing data inside a quasiquote), [-{foo}] is replaced by
-- [`Splice{foo}], which will be unpacked by [quote()].
--------------------------------------------------------------------------------
local in_a_quote = false
--------------------------------------------------------------------------------
-- Parse the inside of a "-{ ... }"
--------------------------------------------------------------------------------
function M.meta.splice_content (lx)
local parser_name = "expr"
if lx:is_keyword (lx:peek(2), ":") then
local a = lx:next()
lx:next() -- skip ":"
assert (a.tag=="Id", "Invalid splice parser name")
parser_name = a[1]
end
-- TODO FIXME running a new parser with the old lexer?!
local parser = require 'metalua.compiler.parser'.new()
local ast = parser [parser_name](lx)
if in_a_quote then -- only prevent quotation in this subtree
--printf("SPLICE_IN_QUOTE:\n%s", _G.table.tostring(ast, "nohash", 60))
return { tag="Splice", ast }
else -- convert in a block, eval, replace with result
if parser_name == "expr" then ast = { { tag="Return", ast } }
elseif parser_name == "stat" then ast = { ast }
elseif parser_name ~= "block" then
error ("splice content must be an expr, stat or block") end
--printf("EXEC THIS SPLICE:\n%s", _G.table.tostring(ast, "nohash", 60))
return M.meta.eval (ast)
end
end
M.meta.splice = gg.sequence{ "-{", _MM.splice_content, "}", builder=unpack }
--------------------------------------------------------------------------------
-- Parse the inside of a "+{ ... }"
--------------------------------------------------------------------------------
function M.meta.quote_content (lx)
local parser
if lx:is_keyword (lx:peek(2), ":") then -- +{parser: content }
local parser_name = M.id(lx)[1]
parser = M[parser_name]
lx:next() -- skip ":"
else -- +{ content }
parser = M.expr
end
local prev_iq = in_a_quote
in_a_quote = true
--print("IN_A_QUOTE")
local content = parser (lx)
local q_content = M.meta.lift (content)
in_a_quote = prev_iq
return q_content
end
return M
end

View File

@ -0,0 +1,176 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--
-- Summary: metalua parser, miscellaneous utility functions.
--
-------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Exported API:
-- * [mlp.fget()]
-- * [mlp.id()]
-- * [mlp.opt_id()]
-- * [mlp.id_list()]
-- * [mlp.string()]
-- * [mlp.opt_string()]
-- * [mlp.id2string()]
--
--------------------------------------------------------------------------------
local pp = require 'metalua.pprint'
local gg = require 'metalua.grammar.generator'
-- TODO: replace splice-aware versions with naive ones, move etensions in ./meta
return function(M)
local _M = gg.future(M)
--[[ metaprog-free versions:
function M.id(lx)
if lx:peek().tag~='Id' then gg.parse_error(lx, "Identifier expected")
else return lx:next() end
end
function M.opt_id(lx)
if lx:peek().tag~='Id' then return lx:next() else return false end
end
function M.string(lx)
if lx:peek().tag~='String' then gg.parse_error(lx, "String expected")
else return lx:next() end
end
function M.opt_string(lx)
if lx:peek().tag~='String' then return lx:next() else return false end
end
--------------------------------------------------------------------------------
-- Converts an identifier into a string. Hopefully one day it'll handle
-- splices gracefully, but that proves quite tricky.
--------------------------------------------------------------------------------
function M.id2string (id)
if id.tag == "Id" then id.tag = "String"; return id
else error ("Identifier expected: "..table.tostring(id, 'nohash')) end
end
--]]
--------------------------------------------------------------------------------
-- Try to read an identifier (possibly as a splice), or return [false] if no
-- id is found.
--------------------------------------------------------------------------------
function M.opt_id (lx)
local a = lx:peek();
if lx:is_keyword (a, "-{") then
local v = M.meta.splice(lx)
if v.tag ~= "Id" and v.tag ~= "Splice" then
gg.parse_error(lx, "Bad id splice")
end
return v
elseif a.tag == "Id" then return lx:next()
else return false end
end
--------------------------------------------------------------------------------
-- Mandatory reading of an id: causes an error if it can't read one.
--------------------------------------------------------------------------------
function M.id (lx)
return M.opt_id (lx) or gg.parse_error(lx,"Identifier expected")
end
--------------------------------------------------------------------------------
-- Common helper function
--------------------------------------------------------------------------------
M.id_list = gg.list { primary = _M.id, separators = "," }
--------------------------------------------------------------------------------
-- Converts an identifier into a string. Hopefully one day it'll handle
-- splices gracefully, but that proves quite tricky.
--------------------------------------------------------------------------------
function M.id2string (id)
--print("id2string:", disp.ast(id))
if id.tag == "Id" then id.tag = "String"; return id
elseif id.tag == "Splice" then
error ("id2string on splice not implemented")
-- Evaluating id[1] will produce `Id{ xxx },
-- and we want it to produce `String{ xxx }.
-- The following is the plain notation of:
-- +{ `String{ `Index{ `Splice{ -{id[1]} }, `Number 1 } } }
return { tag="String", { tag="Index", { tag="Splice", id[1] },
{ tag="Number", 1 } } }
else error ("Identifier expected: "..pp.tostring (id, {metalua_tag=1, hide_hash=1})) end
end
--------------------------------------------------------------------------------
-- Read a string, possibly spliced, or return an error if it can't
--------------------------------------------------------------------------------
function M.string (lx)
local a = lx:peek()
if lx:is_keyword (a, "-{") then
local v = M.meta.splice(lx)
if v.tag ~= "String" and v.tag ~= "Splice" then
gg.parse_error(lx,"Bad string splice")
end
return v
elseif a.tag == "String" then return lx:next()
else error "String expected" end
end
--------------------------------------------------------------------------------
-- Try to read a string, or return false if it can't. No splice allowed.
--------------------------------------------------------------------------------
function M.opt_string (lx)
return lx:peek().tag == "String" and lx:next()
end
--------------------------------------------------------------------------------
-- Chunk reader: block + Eof
--------------------------------------------------------------------------------
function M.skip_initial_sharp_comment (lx)
-- Dirty hack: I'm happily fondling lexer's private parts
-- FIXME: redundant with lexer:newstream()
lx :sync()
local i = lx.src:match ("^#.-\n()", lx.i)
if i then
lx.i = i
lx.column_offset = i
lx.line = lx.line and lx.line + 1 or 1
end
end
local function chunk (lx)
if lx:peek().tag == 'Eof' then
return { } -- handle empty files
else
M.skip_initial_sharp_comment (lx)
local chunk = M.block (lx)
if lx:peek().tag ~= "Eof" then
gg.parse_error(lx, "End-of-file expected")
end
return chunk
end
end
-- chunk is wrapped in a sequence so that it has a "transformer" field.
M.chunk = gg.sequence { chunk, builder = unpack }
return M
end

View File

@ -0,0 +1,279 @@
------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--
-- Summary: metalua parser, statement/block parser. This is part of the
-- definition of module [mlp].
--
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--
-- Exports API:
-- * [mlp.stat()]
-- * [mlp.block()]
-- * [mlp.for_header()]
--
-------------------------------------------------------------------------------
local lexer = require 'metalua.grammar.lexer'
local gg = require 'metalua.grammar.generator'
local annot = require 'metalua.compiler.parser.annot.generator'
--------------------------------------------------------------------------------
-- List of all keywords that indicate the end of a statement block. Users are
-- likely to extend this list when designing extensions.
--------------------------------------------------------------------------------
return function(M)
local _M = gg.future(M)
M.block_terminators = { "else", "elseif", "end", "until", ")", "}", "]" }
-- FIXME: this must be handled from within GG!!!
-- FIXME: there's no :add method in the list anyway. Added by gg.list?!
function M.block_terminators :add(x)
if type (x) == "table" then for _, y in ipairs(x) do self :add (y) end
else table.insert (self, x) end
end
----------------------------------------------------------------------------
-- list of statements, possibly followed by semicolons
----------------------------------------------------------------------------
M.block = gg.list {
name = "statements block",
terminators = M.block_terminators,
primary = function (lx)
-- FIXME use gg.optkeyword()
local x = M.stat (lx)
if lx:is_keyword (lx:peek(), ";") then lx:next() end
return x
end }
----------------------------------------------------------------------------
-- Helper function for "return <expr_list>" parsing.
-- Called when parsing return statements.
-- The specific test for initial ";" is because it's not a block terminator,
-- so without it gg.list would choke on "return ;" statements.
-- We don't make a modified copy of block_terminators because this list
-- is sometimes modified at runtime, and the return parser would get out of
-- sync if it was relying on a copy.
----------------------------------------------------------------------------
local return_expr_list_parser = gg.multisequence{
{ ";" , builder = function() return { } end },
default = gg.list {
_M.expr, separators = ",", terminators = M.block_terminators } }
local for_vars_list = gg.list{
name = "for variables list",
primary = _M.id,
separators = ",",
terminators = "in" }
----------------------------------------------------------------------------
-- for header, between [for] and [do] (exclusive).
-- Return the `Forxxx{...} AST, without the body element (the last one).
----------------------------------------------------------------------------
function M.for_header (lx)
local vars = M.id_list(lx)
if lx :is_keyword (lx:peek(), "=") then
if #vars ~= 1 then
gg.parse_error (lx, "numeric for only accepts one variable")
end
lx:next() -- skip "="
local exprs = M.expr_list (lx)
if #exprs < 2 or #exprs > 3 then
gg.parse_error (lx, "numeric for requires 2 or 3 boundaries")
end
return { tag="Fornum", vars[1], unpack (exprs) }
else
if not lx :is_keyword (lx :next(), "in") then
gg.parse_error (lx, '"=" or "in" expected in for loop')
end
local exprs = M.expr_list (lx)
return { tag="Forin", vars, exprs }
end
end
----------------------------------------------------------------------------
-- Function def parser helper: id ( . id ) *
----------------------------------------------------------------------------
local function fn_builder (list)
local acc = list[1]
local first = acc.lineinfo.first
for i = 2, #list do
local index = M.id2string(list[i])
local li = lexer.new_lineinfo(first, index.lineinfo.last)
acc = { tag="Index", acc, index, lineinfo=li }
end
return acc
end
local func_name = gg.list{ _M.id, separators = ".", builder = fn_builder }
----------------------------------------------------------------------------
-- Function def parser helper: ( : id )?
----------------------------------------------------------------------------
local method_name = gg.onkeyword{ name = "method invocation", ":", _M.id,
transformers = { function(x) return x and x.tag=='Id' and M.id2string(x) end } }
----------------------------------------------------------------------------
-- Function def builder
----------------------------------------------------------------------------
local function funcdef_builder(x)
local name, method, func = unpack(x)
if method then
name = { tag="Index", name, method,
lineinfo = {
first = name.lineinfo.first,
last = method.lineinfo.last } }
table.insert (func[1], 1, {tag="Id", "self"})
end
local r = { tag="Set", {name}, {func} }
r[1].lineinfo = name.lineinfo
r[2].lineinfo = func.lineinfo
return r
end
----------------------------------------------------------------------------
-- if statement builder
----------------------------------------------------------------------------
local function if_builder (x)
local cond_block_pairs, else_block, r = x[1], x[2], {tag="If"}
local n_pairs = #cond_block_pairs
for i = 1, n_pairs do
local cond, block = unpack(cond_block_pairs[i])
r[2*i-1], r[2*i] = cond, block
end
if else_block then table.insert(r, #r+1, else_block) end
return r
end
--------------------------------------------------------------------------------
-- produce a list of (expr,block) pairs
--------------------------------------------------------------------------------
local elseifs_parser = gg.list {
gg.sequence { _M.expr, "then", _M.block , name='elseif parser' },
separators = "elseif",
terminators = { "else", "end" }
}
local annot_expr = gg.sequence {
_M.expr,
gg.onkeyword{ "#", gg.future(M, 'annot').tf },
builder = function(x)
local e, a = unpack(x)
if a then return { tag='Annot', e, a }
else return e end
end }
local annot_expr_list = gg.list {
primary = annot.opt(M, _M.expr, 'tf'), separators = ',' }
------------------------------------------------------------------------
-- assignments and calls: statements that don't start with a keyword
------------------------------------------------------------------------
local function assign_or_call_stat_parser (lx)
local e = annot_expr_list (lx)
local a = lx:is_keyword(lx:peek())
local op = a and M.assignments[a]
-- TODO: refactor annotations
if op then
--FIXME: check that [e] is a LHS
lx :next()
local annots
e, annots = annot.split(e)
local v = M.expr_list (lx)
if type(op)=="string" then return { tag=op, e, v, annots }
else return op (e, v) end
else
assert (#e > 0)
if #e > 1 then
gg.parse_error (lx,
"comma is not a valid statement separator; statement can be "..
"separated by semicolons, or not separated at all")
elseif e[1].tag ~= "Call" and e[1].tag ~= "Invoke" then
local typename
if e[1].tag == 'Id' then
typename = '("'..e[1][1]..'") is an identifier'
elseif e[1].tag == 'Op' then
typename = "is an arithmetic operation"
else typename = "is of type '"..(e[1].tag or "<list>").."'" end
gg.parse_error (lx,
"This expression %s; "..
"a statement was expected, and only function and method call "..
"expressions can be used as statements", typename);
end
return e[1]
end
end
M.local_stat_parser = gg.multisequence{
-- local function <name> <func_val>
{ "function", _M.id, _M.func_val, builder =
function(x)
local vars = { x[1], lineinfo = x[1].lineinfo }
local vals = { x[2], lineinfo = x[2].lineinfo }
return { tag="Localrec", vars, vals }
end },
-- local <id_list> ( = <expr_list> )?
default = gg.sequence{
gg.list{
primary = annot.opt(M, _M.id, 'tf'),
separators = ',' },
gg.onkeyword{ "=", _M.expr_list },
builder = function(x)
local annotated_left, right = unpack(x)
local left, annotations = annot.split(annotated_left)
return {tag="Local", left, right or { }, annotations }
end } }
------------------------------------------------------------------------
-- statement
------------------------------------------------------------------------
M.stat = gg.multisequence {
name = "statement",
{ "do", _M.block, "end", builder =
function (x) return { tag="Do", unpack (x[1]) } end },
{ "for", _M.for_header, "do", _M.block, "end", builder =
function (x) x[1][#x[1]+1] = x[2]; return x[1] end },
{ "function", func_name, method_name, _M.func_val, builder=funcdef_builder },
{ "while", _M.expr, "do", _M.block, "end", builder = "While" },
{ "repeat", _M.block, "until", _M.expr, builder = "Repeat" },
{ "local", _M.local_stat_parser, builder = unpack },
{ "return", return_expr_list_parser, builder =
function(x) x[1].tag='Return'; return x[1] end },
{ "break", builder = function() return { tag="Break" } end },
{ "-{", gg.future(M, 'meta').splice_content, "}", builder = unpack },
{ "if", gg.nonempty(elseifs_parser), gg.onkeyword{ "else", M.block }, "end",
builder = if_builder },
default = assign_or_call_stat_parser }
M.assignments = {
["="] = "Set"
}
function M.assignments:add(k, v) self[k] = v end
return M
end

View File

@ -0,0 +1,77 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Exported API:
-- * [M.table_bracket_field()]
-- * [M.table_field()]
-- * [M.table_content()]
-- * [M.table()]
--
-- KNOWN BUG: doesn't handle final ";" or "," before final "}"
--
--------------------------------------------------------------------------------
local gg = require 'metalua.grammar.generator'
return function(M)
M.table = { }
local _table = gg.future(M.table)
local _expr = gg.future(M).expr
--------------------------------------------------------------------------------
-- `[key] = value` table field definition
--------------------------------------------------------------------------------
M.table.bracket_pair = gg.sequence{ "[", _expr, "]", "=", _expr, builder = "Pair" }
--------------------------------------------------------------------------------
-- table element parser: list value, `id = value` pair or `[value] = value` pair.
--------------------------------------------------------------------------------
function M.table.element (lx)
if lx :is_keyword (lx :peek(), "[") then return M.table.bracket_pair(lx) end
local e = M.expr (lx)
if not lx :is_keyword (lx :peek(), "=") then return e end
lx :next(); -- skip the "="
local key = M.id2string(e) -- will fail on non-identifiers
local val = M.expr(lx)
local r = { tag="Pair", key, val }
r.lineinfo = { first = key.lineinfo.first, last = val.lineinfo.last }
return r
end
-----------------------------------------------------------------------------
-- table constructor, without enclosing braces; returns a full table object
-----------------------------------------------------------------------------
M.table.content = gg.list {
-- eta expansion to allow patching the element definition
primary = _table.element,
separators = { ",", ";" },
terminators = "}",
builder = "Table" }
--------------------------------------------------------------------------------
-- complete table constructor including [{...}]
--------------------------------------------------------------------------------
-- TODO beware, stat and expr use only table.content, this can't be patched.
M.table.table = gg.sequence{ "{", _table.content, "}", builder = unpack }
return M
end

View File

@ -0,0 +1,282 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
--
-- This extension implements list comprehensions, similar to Haskell and
-- Python syntax, to easily describe lists.
--
-- * x[a ... b] is the list { x[a], x[a+1], ..., x[b] }
-- * { f()..., b } contains all the elements returned by f(), then b
-- (allows to expand list fields other than the last one)
-- * list comprehensions a la python, with "for" and "if" suffixes:
-- {i+10*j for i=1,3 for j=1,3 if i~=j} is { 21, 31, 12, 32, 13, 23 }
--
-------------------------------------------------------------------------------
-{ extension ("match", ...) }
local SUPPORT_IMPROVED_LOOPS = true
local SUPPORT_IMPROVED_INDEXES = false -- depends on deprecated table.isub
local SUPPORT_CONTINUE = true
local SUPPORT_COMP_LISTS = true
assert (SUPPORT_IMPROVED_LOOPS or not SUPPORT_CONTINUE,
"Can't support 'continue' without improved loop headers")
local gg = require 'metalua.grammar.generator'
local Q = require 'metalua.treequery'
local function dots_list_suffix_builder (x) return `DotsSuffix{ x } end
local function for_list_suffix_builder (list_element, suffix)
local new_header = suffix[1]
match list_element with
| `Comp{ _, acc } -> table.insert (acc, new_header); return list_element
| _ -> return `Comp{ list_element, { new_header } }
end
end
local function if_list_suffix_builder (list_element, suffix)
local new_header = `If{ suffix[1] }
match list_element with
| `Comp{ _, acc } -> table.insert (acc, new_header); return list_element
| _ -> return `Comp{ list_element, { new_header } }
end
end
-- Builds a statement from a table element, which adds this element to
-- a table `t`, potentially thanks to an alias `tinsert` to
-- `table.insert`.
-- @param core the part around which the loops are built.
-- either `DotsSuffix{expr}, `Pair{ expr } or a plain expression
-- @param list comprehension suffixes, in the order in which they appear
-- either `Forin{ ... } or `Fornum{ ...} or `If{ ... }. In each case,
-- it misses a last child node as its body.
-- @param t a variable containing the table to fill
-- @param tinsert a variable containing `table.insert`.
--
-- @return fill a statement which fills empty table `t` with the denoted element
local function comp_list_builder(core, list, t, tinsert)
local filler
-- 1 - Build the loop's core: if it has suffix "...", every elements of the
-- multi-return must be inserted, hence the extra [for] loop.
match core with
| `DotsSuffix{ element } ->
local x = gg.gensym()
filler = +{stat: for _, -{x} in pairs{ -{element} } do (-{tinsert})(-{t}, -{x}) end }
| `Pair{ key, value } ->
--filler = +{ -{t}[-{key}] = -{value} }
filler = `Set{ { `Index{ t, key } }, { value } }
| _ -> filler = +{ (-{tinsert})(-{t}, -{core}) }
end
-- 2 - Stack the `if` and `for` control structures, from outside to inside.
-- This is done in a destructive way for the elements of [list].
for i = #list, 1, -1 do
table.insert (list[i], {filler})
filler = list[i]
end
return filler
end
local function table_content_builder (list)
local special = false -- Does the table need a special builder?
for _, element in ipairs(list) do
local etag = element.tag
if etag=='Comp' or etag=='DotsSuffix' then special=true; break end
end
if not special then list.tag='Table'; return list end
local t, tinsert = gg.gensym 'table', gg.gensym 'table_insert'
local filler_block = { +{stat: local -{t}, -{tinsert} = { }, table.insert } }
for _, element in ipairs(list) do
local filler
match element with
| `Comp{ core, comp } -> filler = comp_list_builder(core, comp, t, tinsert)
| _ -> filler = comp_list_builder(element, { }, t, tinsert)
end
table.insert(filler_block, filler)
end
return `Stat{ filler_block, t }
end
--------------------------------------------------------------------------------
-- Back-end for improved index operator.
local function index_builder(a, suffix)
match suffix[1] with
-- Single index, no range: keep the native semantics
| { { e, false } } -> return `Index{ a, e }
-- Either a range, or multiple indexes, or both
| ranges ->
local r = `Call{ +{table.isub}, a }
local function acc (x,y) table.insert (r,x); table.insert (r,y) end
for _, seq in ipairs (ranges) do
match seq with
| { e, false } -> acc(e,e)
| { e, f } -> acc(e,f)
end
end
return r
end
end
-------------------------------------------------------------------
-- Find continue statements in a loop body, change them into goto
-- end-of-body.
local function transform_continue_statements(body)
local continue_statements = Q(body)
:if_unknown() -- tolerate unknown 'Continue' statements
:not_under ('Forin', 'Fornum', 'While', 'Repeat')
:filter ('Continue')
:list()
if next(continue_statements) then
local continue_label = gg.gensym 'continue' [1]
table.insert(body, `Label{ continue_label })
for _, statement in ipairs(continue_statements) do
statement.tag = 'Goto'
statement[1] = continue_label
end
return true
else return false end
end
-------------------------------------------------------------------------------
-- Back-end for loops with a multi-element header
local function loop_builder(x)
local first, elements, body = unpack(x)
-- Change continue statements into gotos.
if SUPPORT_CONTINUE then transform_continue_statements(body) end
-------------------------------------------------------------------
-- If it's a regular loop, don't bloat the code
if not next(elements) then
table.insert(first, body)
return first
end
-------------------------------------------------------------------
-- There's no reason to treat the first element in a special way
table.insert(elements, 1, first)
-------------------------------------------------------------------
-- Change breaks into gotos that escape all loops at once.
local exit_label = nil
local function break_to_goto(break_node)
if not exit_label then exit_label = gg.gensym 'break' [1] end
break_node = break_node or { }
break_node.tag = 'Goto'
break_node[1] = exit_label
return break_node
end
Q(body)
:not_under('Function', 'Forin', 'Fornum', 'While', 'Repeat')
:filter('Break')
:foreach (break_to_goto)
-------------------------------------------------------------------
-- Compile all headers elements, from last to first.
-- invariant: `body` is a block (not a statement)
local result = body
for i = #elements, 1, -1 do
local e = elements[i]
match e with
| `If{ cond } ->
result = { `If{ cond, result } }
| `Until{ cond } ->
result = +{block: if -{cond} then -{break_to_goto()} else -{result} end }
| `While{ cond } ->
if i==1 then result = { `While{ cond, result } } -- top-level while
else result = +{block: if -{cond} then -{result} else -{break_to_goto()} end } end
| `Forin{ ... } | `Fornum{ ... } ->
table.insert (e, result); result={e}
| _-> require'metalua.pprint'.printf("Bad loop header element %s", e)
end
end
-------------------------------------------------------------------
-- If some breaks had to be changed into gotos, insert the label
if exit_label then result = { result, `Label{ exit_label } } end
return result
end
--------------------------------------------------------------------------------
-- Improved "[...]" index operator:
-- * support for multi-indexes ("foo[bar, gnat]")
-- * support for ranges ("foo[bar ... gnat]")
--------------------------------------------------------------------------------
local function extend(M)
local _M = gg.future(M)
if SUPPORT_COMP_LISTS then
-- support for "for" / "if" comprehension suffixes in literal tables
local original_table_element = M.table.element
M.table.element = gg.expr{ name="table cell",
primary = original_table_element,
suffix = { name="table cell suffix",
{ "...", builder = dots_list_suffix_builder },
{ "for", _M.for_header, builder = for_list_suffix_builder },
{ "if", _M.expr, builder = if_list_suffix_builder } } }
M.table.content.builder = table_content_builder
end
if SUPPORT_IMPROVED_INDEXES then
-- Support for ranges and multiple indices in bracket suffixes
M.expr.suffix:del '['
M.expr.suffix:add{ name="table index/range",
"[", gg.list{
gg.sequence { _M.expr, gg.onkeyword{ "...", _M.expr } } ,
separators = { ",", ";" } },
"]", builder = index_builder }
end
if SUPPORT_IMPROVED_LOOPS then
local original_for_header = M.for_header
M.stat :del 'for'
M.stat :del 'while'
M.loop_suffix = gg.multisequence{
{ 'while', _M.expr, builder = |x| `Until{ `Op{ 'not', x[1] } } },
{ 'until', _M.expr, builder = |x| `Until{ x[1] } },
{ 'if', _M.expr, builder = |x| `If{ x[1] } },
{ 'for', original_for_header, builder = |x| x[1] } }
M.loop_suffix_list = gg.list{ _M.loop_suffix, terminators='do' }
M.stat :add{
'for', original_for_header, _M.loop_suffix_list, 'do', _M.block, 'end',
builder = loop_builder }
M.stat :add{
'while', _M.expr, _M.loop_suffix_list, 'do', _M.block, 'end',
builder = |x| loop_builder{ `While{x[1]}, x[2], x[3] } }
end
if SUPPORT_CONTINUE then
M.lexer :add 'continue'
M.stat :add{ 'continue', builder='Continue' }
end
end
return extend

View File

@ -0,0 +1,400 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--
-- Glossary:
--
-- * term_seq: the tested stuff, a sequence of terms
-- * pattern_element: might match one term of a term seq. Represented
-- as expression ASTs.
-- * pattern_seq: might match a term_seq
-- * pattern_group: several pattern seqs, one of them might match
-- the term seq.
-- * case: pattern_group * guard option * block
-- * match_statement: tested term_seq * case list
--
-- Hence a complete match statement is a:
--
-- { list(expr), list{ list(list(expr)), expr or false, block } }
--
-- Implementation hints
-- ====================
--
-- The implementation is made as modular as possible, so that parts
-- can be reused in other extensions. The priviledged way to share
-- contextual information across functions is through the 'cfg' table
-- argument. Its fields include:
--
-- * code: code generated from pattern. A pattern_(element|seq|group)
-- is compiled as a sequence of instructions which will jump to
-- label [cfg.on_failure] if the tested term doesn't match.
--
-- * on_failure: name of the label where the code will jump if the
-- pattern doesn't match
--
-- * locals: names of local variables used by the pattern. This
-- includes bound variables, and temporary variables used to
-- destructure tables. Names are stored as keys of the table,
-- values are meaningless.
--
-- * after_success: label where the code must jump after a pattern
-- succeeded to capture a term, and the guard suceeded if there is
-- any, and the conditional block has run.
--
-- * ntmp: number of temporary variables used to destructurate table
-- in the current case.
--
-- Code generation is performed by acc_xxx() functions, which accumulate
-- code in cfg.code:
--
-- * acc_test(test, cfg) will generate a jump to cfg.on_failure
-- *when the test returns TRUE*
--
-- * acc_stat accumulates a statement
--
-- * acc_assign accumulate an assignment statement, and makes sure that
-- the LHS variable the registered as local in cfg.locals.
--
-------------------------------------------------------------------------------
-- TODO: hygiene wrt type()
-- TODO: cfg.ntmp isn't reset as often as it could. I'm not even sure
-- the corresponding locals are declared.
local checks = require 'checks'
local gg = require 'metalua.grammar.generator'
local pp = require 'metalua.pprint'
----------------------------------------------------------------------
-- This would have been best done through library 'metalua.walk',
-- but walk depends on match, so we have to break the dependency.
-- It replaces all instances of `...' in `ast' with `term', unless
-- it appears in a function.
----------------------------------------------------------------------
local function replace_dots (ast, term)
local function rec (node)
for i, child in ipairs(node) do
if type(child)~="table" then -- pass
elseif child.tag=='Dots' then
if term=='ambiguous' then
error ("You can't use `...' on the right of a match case when it appears "..
"more than once on the left")
else node[i] = term end
elseif child.tag=='Function' then return nil
else rec(child) end
end
end
return rec(ast)
end
local tmpvar_base = gg.gensym 'submatch.' [1]
local function next_tmpvar(cfg)
assert (cfg.ntmp, "No cfg.ntmp imbrication level in the match compiler")
cfg.ntmp = cfg.ntmp+1
return `Id{ tmpvar_base .. cfg.ntmp }
end
-- Code accumulators
local acc_stat = |x,cfg| table.insert (cfg.code, x)
local acc_test = |x,cfg| acc_stat(+{stat: if -{x} then -{`Goto{cfg.on_failure}} end}, cfg)
-- lhs :: `Id{ string }
-- rhs :: expr
local function acc_assign (lhs, rhs, cfg)
assert(lhs.tag=='Id')
cfg.locals[lhs[1]] = true
acc_stat (`Set{ {lhs}, {rhs} }, cfg)
end
local literal_tags = { String=1, Number=1, True=1, False=1, Nil=1 }
-- pattern :: `Id{ string }
-- term :: expr
local function id_pattern_element_builder (pattern, term, cfg)
assert (pattern.tag == "Id")
if pattern[1] == "_" then
-- "_" is used as a dummy var ==> no assignment, no == checking
cfg.locals._ = true
elseif cfg.locals[pattern[1]] then
-- This var is already bound ==> test for equality
acc_test (+{ -{term} ~= -{pattern} }, cfg)
else
-- Free var ==> bind it, and remember it for latter linearity checking
acc_assign (pattern, term, cfg)
cfg.locals[pattern[1]] = true
end
end
-- mutually recursive with table_pattern_element_builder
local pattern_element_builder
-- pattern :: pattern and `Table{ }
-- term :: expr
local function table_pattern_element_builder (pattern, term, cfg)
local seen_dots, len = false, 0
acc_test (+{ type( -{term} ) ~= "table" }, cfg)
for i = 1, #pattern do
local key, sub_pattern
if pattern[i].tag=="Pair" then -- Explicit key/value pair
key, sub_pattern = unpack (pattern[i])
assert (literal_tags[key.tag], "Invalid key")
else -- Implicit key
len, key, sub_pattern = len+1, `Number{ len+1 }, pattern[i]
end
-- '...' can only appear in final position
-- Could be fixed actually...
assert (not seen_dots, "Wrongly placed `...' ")
if sub_pattern.tag == "Id" then
-- Optimization: save a useless [ v(n+1)=v(n).key ]
id_pattern_element_builder (sub_pattern, `Index{ term, key }, cfg)
if sub_pattern[1] ~= "_" then
acc_test (+{ -{sub_pattern} == nil }, cfg)
end
elseif sub_pattern.tag == "Dots" then
-- Remember where the capture is, and thatt arity checking shouldn't occur
seen_dots = true
else
-- Business as usual:
local v2 = next_tmpvar(cfg)
acc_assign (v2, `Index{ term, key }, cfg)
pattern_element_builder (sub_pattern, v2, cfg)
-- TODO: restore ntmp?
end
end
if seen_dots then -- remember how to retrieve `...'
-- FIXME: check, but there might be cases where the variable -{term}
-- will be overridden in contrieved tables.
-- ==> save it now, and clean the setting statement if unused
if cfg.dots_replacement then cfg.dots_replacement = 'ambiguous'
else cfg.dots_replacement = +{ select (-{`Number{len}}, unpack(-{term})) } end
else -- Check arity
acc_test (+{ #-{term} ~= -{`Number{len}} }, cfg)
end
end
-- mutually recursive with pattern_element_builder
local eq_pattern_element_builder, regexp_pattern_element_builder
-- Concatenate code in [cfg.code], that will jump to label
-- [cfg.on_failure] if [pattern] doesn't match [term]. [pattern]
-- should be an identifier, or at least cheap to compute and
-- side-effects free.
--
-- pattern :: pattern_element
-- term :: expr
function pattern_element_builder (pattern, term, cfg)
if literal_tags[pattern.tag] then
acc_test (+{ -{term} ~= -{pattern} }, cfg)
elseif "Id" == pattern.tag then
id_pattern_element_builder (pattern, term, cfg)
elseif "Op" == pattern.tag and "div" == pattern[1] then
regexp_pattern_element_builder (pattern, term, cfg)
elseif "Op" == pattern.tag and "eq" == pattern[1] then
eq_pattern_element_builder (pattern, term, cfg)
elseif "Table" == pattern.tag then
table_pattern_element_builder (pattern, term, cfg)
else
error ("Invalid pattern at "..
tostring(pattern.lineinfo)..
": "..pp.tostring(pattern, {hide_hash=true}))
end
end
function eq_pattern_element_builder (pattern, term, cfg)
local _, pat1, pat2 = unpack (pattern)
local ntmp_save = cfg.ntmp
pattern_element_builder (pat1, term, cfg)
cfg.ntmp = ntmp_save
pattern_element_builder (pat2, term, cfg)
end
-- pattern :: `Op{ 'div', string, list{`Id string} or `Id{ string }}
-- term :: expr
local function regexp_pattern_element_builder (pattern, term, cfg)
local op, regexp, sub_pattern = unpack(pattern)
-- Sanity checks --
assert (op=='div', "Don't know what to do with that op in a pattern")
assert (regexp.tag=="String",
"Left hand side operand for '/' in a pattern must be "..
"a literal string representing a regular expression")
if sub_pattern.tag=="Table" then
for _, x in ipairs(sub_pattern) do
assert (x.tag=="Id" or x.tag=='Dots',
"Right hand side operand for '/' in a pattern must be "..
"a list of identifiers")
end
else
assert (sub_pattern.tag=="Id",
"Right hand side operand for '/' in a pattern must be "..
"an identifier or a list of identifiers")
end
-- Regexp patterns can only match strings
acc_test (+{ type(-{term}) ~= 'string' }, cfg)
-- put all captures in a list
local capt_list = +{ { string.strmatch(-{term}, -{regexp}) } }
-- save them in a var_n for recursive decomposition
local v2 = next_tmpvar(cfg)
acc_stat (+{stat: local -{v2} = -{capt_list} }, cfg)
-- was capture successful?
acc_test (+{ not next (-{v2}) }, cfg)
pattern_element_builder (sub_pattern, v2, cfg)
end
-- Jumps to [cfg.on_faliure] if pattern_seq doesn't match
-- term_seq.
local function pattern_seq_builder (pattern_seq, term_seq, cfg)
if #pattern_seq ~= #term_seq then error ("Bad seq arity") end
cfg.locals = { } -- reset bound variables between alternatives
for i=1, #pattern_seq do
cfg.ntmp = 1 -- reset the tmp var generator
pattern_element_builder(pattern_seq[i], term_seq[i], cfg)
end
end
--------------------------------------------------
-- for each case i:
-- pattern_seq_builder_i:
-- * on failure, go to on_failure_i
-- * on success, go to on_success
-- label on_success:
-- block
-- goto after_success
-- label on_failure_i
--------------------------------------------------
local function case_builder (case, term_seq, cfg)
local patterns_group, guard, block = unpack(case)
local on_success = gg.gensym 'on_success' [1]
for i = 1, #patterns_group do
local pattern_seq = patterns_group[i]
cfg.on_failure = gg.gensym 'match_fail' [1]
cfg.dots_replacement = false
pattern_seq_builder (pattern_seq, term_seq, cfg)
if i<#patterns_group then
acc_stat (`Goto{on_success}, cfg)
acc_stat (`Label{cfg.on_failure}, cfg)
end
end
acc_stat (`Label{on_success}, cfg)
if guard then acc_test (+{not -{guard}}, cfg) end
if cfg.dots_replacement then
replace_dots (block, cfg.dots_replacement)
end
block.tag = 'Do'
acc_stat (block, cfg)
acc_stat (`Goto{cfg.after_success}, cfg)
acc_stat (`Label{cfg.on_failure}, cfg)
end
local function match_builder (x)
local term_seq, cases = unpack(x)
local cfg = {
code = `Do{ },
after_success = gg.gensym "_after_success" }
-- Some sharing issues occur when modifying term_seq,
-- so it's replaced by a copy new_term_seq.
-- TODO: clean that up, and re-suppress the useless copies
-- (cf. remarks about capture bug below).
local new_term_seq = { }
local match_locals
-- Make sure that all tested terms are variables or literals
for i=1, #term_seq do
local t = term_seq[i]
-- Capture problem: the following would compile wrongly:
-- `match x with x -> end'
-- Temporary workaround: suppress the condition, so that
-- all external variables are copied into unique names.
--if t.tag ~= 'Id' and not literal_tags[t.tag] then
local v = gg.gensym 'v'
if not match_locals then match_locals = `Local{ {v}, {t} } else
table.insert(match_locals[1], v)
table.insert(match_locals[2], t)
end
new_term_seq[i] = v
--end
end
term_seq = new_term_seq
if match_locals then acc_stat(match_locals, cfg) end
for i=1, #cases do
local case_cfg = {
after_success = cfg.after_success,
code = `Do{ }
-- locals = { } -- unnecessary, done by pattern_seq_builder
}
case_builder (cases[i], term_seq, case_cfg)
if next (case_cfg.locals) then
local case_locals = { }
table.insert (case_cfg.code, 1, `Local{ case_locals, { } })
for v, _ in pairs (case_cfg.locals) do
table.insert (case_locals, `Id{ v })
end
end
acc_stat(case_cfg.code, cfg)
end
local li = `String{tostring(cases.lineinfo)}
acc_stat(+{error('mismatch at '..-{li})}, cfg)
acc_stat(`Label{cfg.after_success}, cfg)
return cfg.code
end
----------------------------------------------------------------------
-- Syntactical front-end
----------------------------------------------------------------------
local function extend(M)
local _M = gg.future(M)
checks('metalua.compiler.parser')
M.lexer:add{ "match", "with", "->" }
M.block.terminators:add "|"
local match_cases_list_parser = gg.list{ name = "match cases list",
gg.sequence{ name = "match case",
gg.list{ name = "match case patterns list",
primary = _M.expr_list,
separators = "|",
terminators = { "->", "if" } },
gg.onkeyword{ "if", _M.expr, consume = true },
"->",
_M.block },
separators = "|",
terminators = "end" }
M.stat:add{ name = "match statement",
"match",
_M.expr_list,
"with", gg.optkeyword "|",
match_cases_list_parser,
"end",
builder = |x| match_builder{ x[1], x[3] } }
end
return extend

View File

@ -0,0 +1,834 @@
--------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Summary: parser generator. Collection of higher order functors,
-- which allow to build and combine parsers. Relies on a lexer
-- that supports the same API as the one exposed in mll.lua.
--
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--
-- Exported API:
--
-- Parser generators:
-- * [gg.sequence()]
-- * [gg.multisequence()]
-- * [gg.expr()]
-- * [gg.list()]
-- * [gg.onkeyword()]
-- * [gg.optkeyword()]
--
-- Other functions:
-- * [gg.parse_error()]
-- * [gg.make_parser()]
-- * [gg.is_parser()]
--
--------------------------------------------------------------------------------
local M = { }
local checks = require 'checks'
local lexer = require 'metalua.grammar.lexer'
local pp = require 'metalua.pprint'
--------------------------------------------------------------------------------
-- Symbol generator: [gensym()] returns a guaranteed-to-be-unique identifier.
-- The main purpose is to avoid variable capture in macros.
--
-- If a string is passed as an argument, theis string will be part of the
-- id name (helpful for macro debugging)
--------------------------------------------------------------------------------
local gensymidx = 0
function M.gensym (arg)
gensymidx = gensymidx + 1
return { tag="Id", string.format(".%i.%s", gensymidx, arg or "")}
end
-------------------------------------------------------------------------------
-- parser metatable, which maps __call to method parse, and adds some
-- error tracing boilerplate.
-------------------------------------------------------------------------------
local parser_metatable = { }
function parser_metatable :__call (lx, ...)
return self :parse (lx, ...)
end
-------------------------------------------------------------------------------
-- Turn a table into a parser, mainly by setting the metatable.
-------------------------------------------------------------------------------
function M.make_parser(kind, p)
p.kind = kind
if not p.transformers then p.transformers = { } end
function p.transformers:add (x)
table.insert (self, x)
end
setmetatable (p, parser_metatable)
return p
end
-------------------------------------------------------------------------------
-- Return true iff [x] is a parser.
-- If it's a gg-generated parser, return the name of its kind.
-------------------------------------------------------------------------------
function M.is_parser (x)
return type(x)=="function" or getmetatable(x)==parser_metatable and x.kind
end
-------------------------------------------------------------------------------
-- Parse a sequence, without applying builder nor transformers.
-------------------------------------------------------------------------------
local function raw_parse_sequence (lx, p)
local r = { }
for i=1, #p do
local e=p[i]
if type(e) == "string" then
local kw = lx :next()
if not lx :is_keyword (kw, e) then
M.parse_error(
lx, "A keyword was expected, probably `%s'.", e)
end
elseif M.is_parser (e) then
table.insert (r, e(lx))
else -- Invalid parser definition, this is *not* a parsing error
error(string.format(
"Sequence `%s': element #%i is neither a string nor a parser: %s",
p.name, i, pp.tostring(e)))
end
end
return r
end
-------------------------------------------------------------------------------
-- Parse a multisequence, without applying multisequence transformers.
-- The sequences are completely parsed.
-------------------------------------------------------------------------------
local function raw_parse_multisequence (lx, sequence_table, default)
local seq_parser = sequence_table[lx:is_keyword(lx:peek())]
if seq_parser then return seq_parser (lx)
elseif default then return default (lx)
else return false end
end
-------------------------------------------------------------------------------
-- Applies all transformers listed in parser on ast.
-------------------------------------------------------------------------------
local function transform (ast, parser, fli, lli)
if parser.transformers then
for _, t in ipairs (parser.transformers) do ast = t(ast) or ast end
end
if type(ast) == 'table' then
local ali = ast.lineinfo
if not ali or ali.first~=fli or ali.last~=lli then
ast.lineinfo = lexer.new_lineinfo(fli, lli)
end
end
return ast
end
-------------------------------------------------------------------------------
-- Generate a tracable parsing error (not implemented yet)
-------------------------------------------------------------------------------
function M.parse_error(lx, fmt, ...)
local li = lx:lineinfo_left()
local file, line, column, offset, positions
if li then
file, line, column, offset = li.source, li.line, li.column, li.offset
positions = { first = li, last = li }
else
line, column, offset = -1, -1, -1
end
local msg = string.format("line %i, char %i: "..fmt, line, column, ...)
if file and file~='?' then msg = "file "..file..", "..msg end
local src = lx.src
if offset>0 and src then
local i, j = offset, offset
while src:sub(i,i) ~= '\n' and i>=0 do i=i-1 end
while src:sub(j,j) ~= '\n' and j<=#src do j=j+1 end
local srcline = src:sub (i+1, j-1)
local idx = string.rep (" ", column).."^"
msg = string.format("%s\n>>> %s\n>>> %s", msg, srcline, idx)
end
--lx :kill()
error(msg)
end
-------------------------------------------------------------------------------
--
-- Sequence parser generator
--
-------------------------------------------------------------------------------
-- Input fields:
--
-- * [builder]: how to build an AST out of sequence parts. let [x] be the list
-- of subparser results (keywords are simply omitted). [builder] can be:
-- - [nil], in which case the result of parsing is simply [x]
-- - a string, which is then put as a tag on [x]
-- - a function, which takes [x] as a parameter and returns an AST.
--
-- * [name]: the name of the parser. Used for debug messages
--
-- * [transformers]: a list of AST->AST functions, applied in order on ASTs
-- returned by the parser.
--
-- * Table-part entries corresponds to keywords (strings) and subparsers
-- (function and callable objects).
--
-- After creation, the following fields are added:
-- * [parse] the parsing function lexer->AST
-- * [kind] == "sequence"
-- * [name] is set, if it wasn't in the input.
--
-------------------------------------------------------------------------------
function M.sequence (p)
M.make_parser ("sequence", p)
-------------------------------------------------------------------
-- Parsing method
-------------------------------------------------------------------
function p:parse (lx)
-- Raw parsing:
local fli = lx:lineinfo_right()
local seq = raw_parse_sequence (lx, self)
local lli = lx:lineinfo_left()
-- Builder application:
local builder, tb = self.builder, type (self.builder)
if tb == "string" then seq.tag = builder
elseif tb == "function" or builder and builder.__call then seq = builder(seq)
elseif builder == nil then -- nothing
else error ("Invalid builder of type "..tb.." in sequence") end
seq = transform (seq, self, fli, lli)
assert (not seq or seq.lineinfo)
return seq
end
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
-- Try to build a proper name
if p.name then
-- don't touch existing name
elseif type(p[1])=="string" then -- find name based on 1st keyword
if #p==1 then p.name=p[1]
elseif type(p[#p])=="string" then
p.name = p[1] .. " ... " .. p[#p]
else p.name = p[1] .. " ..." end
else -- can't find a decent name
p.name = "unnamed_sequence"
end
return p
end --</sequence>
-------------------------------------------------------------------------------
--
-- Multiple, keyword-driven, sequence parser generator
--
-------------------------------------------------------------------------------
-- in [p], useful fields are:
--
-- * [transformers]: as usual
--
-- * [name]: as usual
--
-- * Table-part entries must be sequence parsers, or tables which can
-- be turned into a sequence parser by [gg.sequence]. These
-- sequences must start with a keyword, and this initial keyword
-- must be different for each sequence. The table-part entries will
-- be removed after [gg.multisequence] returns.
--
-- * [default]: the parser to run if the next keyword in the lexer is
-- none of the registered initial keywords. If there's no default
-- parser and no suitable initial keyword, the multisequence parser
-- simply returns [false].
--
-- After creation, the following fields are added:
--
-- * [parse] the parsing function lexer->AST
--
-- * [sequences] the table of sequences, indexed by initial keywords.
--
-- * [add] method takes a sequence parser or a config table for
-- [gg.sequence], and adds/replaces the corresponding sequence
-- parser. If the keyword was already used, the former sequence is
-- removed and a warning is issued.
--
-- * [get] method returns a sequence by its initial keyword
--
-- * [kind] == "multisequence"
--
-------------------------------------------------------------------------------
function M.multisequence (p)
M.make_parser ("multisequence", p)
-------------------------------------------------------------------
-- Add a sequence (might be just a config table for [gg.sequence])
-------------------------------------------------------------------
function p :add (s)
-- compile if necessary:
local keyword = type(s)=='table' and s[1]
if type(s)=='table' and not M.is_parser(s) then M.sequence(s) end
if M.is_parser(s)~='sequence' or type(keyword)~='string' then
if self.default then -- two defaults
error ("In a multisequence parser, all but one sequences "..
"must start with a keyword")
else self.default = s end -- first default
else
if self.sequences[keyword] then -- duplicate keyword
-- TODO: warn that initial keyword `keyword` is overloaded in multiseq
end
self.sequences[keyword] = s
end
end -- </multisequence.add>
-------------------------------------------------------------------
-- Get the sequence starting with this keyword. [kw :: string]
-------------------------------------------------------------------
function p :get (kw) return self.sequences [kw] end
-------------------------------------------------------------------
-- Remove the sequence starting with keyword [kw :: string]
-------------------------------------------------------------------
function p :del (kw)
if not self.sequences[kw] then
-- TODO: warn that we try to delete a non-existent entry
end
local removed = self.sequences[kw]
self.sequences[kw] = nil
return removed
end
-------------------------------------------------------------------
-- Parsing method
-------------------------------------------------------------------
function p :parse (lx)
local fli = lx:lineinfo_right()
local x = raw_parse_multisequence (lx, self.sequences, self.default)
local lli = lx:lineinfo_left()
return transform (x, self, fli, lli)
end
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
-- Register the sequences passed to the constructor. They're going
-- from the array part of the parser to the hash part of field
-- [sequences]
p.sequences = { }
for i=1, #p do p :add (p[i]); p[i] = nil end
-- FIXME: why is this commented out?
--if p.default and not is_parser(p.default) then sequence(p.default) end
return p
end --</multisequence>
-------------------------------------------------------------------------------
--
-- Expression parser generator
--
-------------------------------------------------------------------------------
--
-- Expression configuration relies on three tables: [prefix], [infix]
-- and [suffix]. Moreover, the primary parser can be replaced by a
-- table: in this case the [primary] table will be passed to
-- [gg.multisequence] to create a parser.
--
-- Each of these tables is a modified multisequence parser: the
-- differences with respect to regular multisequence config tables are:
--
-- * the builder takes specific parameters:
-- - for [prefix], it takes the result of the prefix sequence parser,
-- and the prefixed expression
-- - for [infix], it takes the left-hand-side expression, the results
-- of the infix sequence parser, and the right-hand-side expression.
-- - for [suffix], it takes the suffixed expression, and the result
-- of the suffix sequence parser.
--
-- * the default field is a list, with parameters:
-- - [parser] the raw parsing function
-- - [transformers], as usual
-- - [prec], the operator's precedence
-- - [assoc] for [infix] table, the operator's associativity, which
-- can be "left", "right" or "flat" (default to left)
--
-- In [p], useful fields are:
-- * [transformers]: as usual
-- * [name]: as usual
-- * [primary]: the atomic expression parser, or a multisequence config
-- table (mandatory)
-- * [prefix]: prefix operators config table, see above.
-- * [infix]: infix operators config table, see above.
-- * [suffix]: suffix operators config table, see above.
--
-- After creation, these fields are added:
-- * [kind] == "expr"
-- * [parse] as usual
-- * each table is turned into a multisequence, and therefore has an
-- [add] method
--
-------------------------------------------------------------------------------
function M.expr (p)
M.make_parser ("expr", p)
-------------------------------------------------------------------
-- parser method.
-- In addition to the lexer, it takes an optional precedence:
-- it won't read expressions whose precedence is lower or equal
-- to [prec].
-------------------------------------------------------------------
function p :parse (lx, prec)
prec = prec or 0
------------------------------------------------------
-- Extract the right parser and the corresponding
-- options table, for (pre|in|suff)fix operators.
-- Options include prec, assoc, transformers.
------------------------------------------------------
local function get_parser_info (tab)
local p2 = tab :get (lx :is_keyword (lx :peek()))
if p2 then -- keyword-based sequence found
local function parser(lx) return raw_parse_sequence(lx, p2) end
return parser, p2
else -- Got to use the default parser
local d = tab.default
if d then return d.parse or d.parser, d
else return false, false end
end
end
------------------------------------------------------
-- Look for a prefix sequence. Multiple prefixes are
-- handled through the recursive [p.parse] call.
-- Notice the double-transform: one for the primary
-- expr, and one for the one with the prefix op.
------------------------------------------------------
local function handle_prefix ()
local fli = lx :lineinfo_right()
local p2_func, p2 = get_parser_info (self.prefix)
local op = p2_func and p2_func (lx)
if op then -- Keyword-based sequence found
local ili = lx :lineinfo_right() -- Intermediate LineInfo
local e = p2.builder (op, self :parse (lx, p2.prec))
local lli = lx :lineinfo_left()
return transform (transform (e, p2, ili, lli), self, fli, lli)
else -- No prefix found, get a primary expression
local e = self.primary(lx)
local lli = lx :lineinfo_left()
return transform (e, self, fli, lli)
end
end --</expr.parse.handle_prefix>
------------------------------------------------------
-- Look for an infix sequence+right-hand-side operand.
-- Return the whole binary expression result,
-- or false if no operator was found.
------------------------------------------------------
local function handle_infix (e)
local p2_func, p2 = get_parser_info (self.infix)
if not p2 then return false end
-----------------------------------------
-- Handle flattening operators: gather all operands
-- of the series in [list]; when a different operator
-- is found, stop, build from [list], [transform] and
-- return.
-----------------------------------------
if (not p2.prec or p2.prec>prec) and p2.assoc=="flat" then
local fli = lx:lineinfo_right()
local pflat, list = p2, { e }
repeat
local op = p2_func(lx)
if not op then break end
table.insert (list, self:parse (lx, p2.prec))
local _ -- We only care about checking that p2==pflat
_, p2 = get_parser_info (self.infix)
until p2 ~= pflat
local e2 = pflat.builder (list)
local lli = lx:lineinfo_left()
return transform (transform (e2, pflat, fli, lli), self, fli, lli)
-----------------------------------------
-- Handle regular infix operators: [e] the LHS is known,
-- just gather the operator and [e2] the RHS.
-- Result goes in [e3].
-----------------------------------------
elseif p2.prec and p2.prec>prec or
p2.prec==prec and p2.assoc=="right" then
local fli = e.lineinfo.first -- lx:lineinfo_right()
local op = p2_func(lx)
if not op then return false end
local e2 = self:parse (lx, p2.prec)
local e3 = p2.builder (e, op, e2)
local lli = lx:lineinfo_left()
return transform (transform (e3, p2, fli, lli), self, fli, lli)
-----------------------------------------
-- Check for non-associative operators, and complain if applicable.
-----------------------------------------
elseif p2.assoc=="none" and p2.prec==prec then
M.parse_error (lx, "non-associative operator!")
-----------------------------------------
-- No infix operator suitable at that precedence
-----------------------------------------
else return false end
end --</expr.parse.handle_infix>
------------------------------------------------------
-- Look for a suffix sequence.
-- Return the result of suffix operator on [e],
-- or false if no operator was found.
------------------------------------------------------
local function handle_suffix (e)
-- FIXME bad fli, must take e.lineinfo.first
local p2_func, p2 = get_parser_info (self.suffix)
if not p2 then return false end
if not p2.prec or p2.prec>=prec then
--local fli = lx:lineinfo_right()
local fli = e.lineinfo.first
local op = p2_func(lx)
if not op then return false end
local lli = lx:lineinfo_left()
e = p2.builder (e, op)
e = transform (transform (e, p2, fli, lli), self, fli, lli)
return e
end
return false
end --</expr.parse.handle_suffix>
------------------------------------------------------
-- Parser body: read suffix and (infix+operand)
-- extensions as long as we're able to fetch more at
-- this precedence level.
------------------------------------------------------
local e = handle_prefix()
repeat
local x = handle_suffix (e); e = x or e
local y = handle_infix (e); e = y or e
until not (x or y)
-- No transform: it already happened in operators handling
return e
end --</expr.parse>
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
if not p.primary then p.primary=p[1]; p[1]=nil end
for _, t in ipairs{ "primary", "prefix", "infix", "suffix" } do
if not p[t] then p[t] = { } end
if not M.is_parser(p[t]) then M.multisequence(p[t]) end
end
function p:add(...) return self.primary:add(...) end
return p
end --</expr>
-------------------------------------------------------------------------------
--
-- List parser generator
--
-------------------------------------------------------------------------------
-- In [p], the following fields can be provided in input:
--
-- * [builder]: takes list of subparser results, returns AST
-- * [transformers]: as usual
-- * [name]: as usual
--
-- * [terminators]: list of strings representing the keywords which
-- might mark the end of the list. When non-empty, the list is
-- allowed to be empty. A string is treated as a single-element
-- table, whose element is that string, e.g. ["do"] is the same as
-- [{"do"}].
--
-- * [separators]: list of strings representing the keywords which can
-- separate elements of the list. When non-empty, one of these
-- keyword has to be found between each element. Lack of a separator
-- indicates the end of the list. A string is treated as a
-- single-element table, whose element is that string, e.g. ["do"]
-- is the same as [{"do"}]. If [terminators] is empty/nil, then
-- [separators] has to be non-empty.
--
-- After creation, the following fields are added:
-- * [parse] the parsing function lexer->AST
-- * [kind] == "list"
--
-------------------------------------------------------------------------------
function M.list (p)
M.make_parser ("list", p)
-------------------------------------------------------------------
-- Parsing method
-------------------------------------------------------------------
function p :parse (lx)
------------------------------------------------------
-- Used to quickly check whether there's a terminator
-- or a separator immediately ahead
------------------------------------------------------
local function peek_is_in (keywords)
return keywords and lx:is_keyword(lx:peek(), unpack(keywords)) end
local x = { }
local fli = lx :lineinfo_right()
-- if there's a terminator to start with, don't bother trying
local is_empty_list = self.terminators and (peek_is_in (self.terminators) or lx:peek().tag=="Eof")
if not is_empty_list then
repeat
local item = self.primary(lx)
table.insert (x, item) -- read one element
until
-- There's a separator list specified, and next token isn't in it.
-- Otherwise, consume it with [lx:next()]
self.separators and not(peek_is_in (self.separators) and lx:next()) or
-- Terminator token ahead
peek_is_in (self.terminators) or
-- Last reason: end of file reached
lx:peek().tag=="Eof"
end
local lli = lx:lineinfo_left()
-- Apply the builder. It can be a string, or a callable value,
-- or simply nothing.
local b = self.builder
if b then
if type(b)=="string" then x.tag = b -- b is a string, use it as a tag
elseif type(b)=="function" then x=b(x)
else
local bmt = getmetatable(b)
if bmt and bmt.__call then x=b(x) end
end
end
return transform (x, self, fli, lli)
end --</list.parse>
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
if not p.primary then p.primary = p[1]; p[1] = nil end
if type(p.terminators) == "string" then p.terminators = { p.terminators }
elseif p.terminators and #p.terminators == 0 then p.terminators = nil end
if type(p.separators) == "string" then p.separators = { p.separators }
elseif p.separators and #p.separators == 0 then p.separators = nil end
return p
end --</list>
-------------------------------------------------------------------------------
--
-- Keyword-conditioned parser generator
--
-------------------------------------------------------------------------------
--
-- Only apply a parser if a given keyword is found. The result of
-- [gg.onkeyword] parser is the result of the subparser (modulo
-- [transformers] applications).
--
-- lineinfo: the keyword is *not* included in the boundaries of the
-- resulting lineinfo. A review of all usages of gg.onkeyword() in the
-- implementation of metalua has shown that it was the appropriate choice
-- in every case.
--
-- Input fields:
--
-- * [name]: as usual
--
-- * [transformers]: as usual
--
-- * [peek]: if non-nil, the conditioning keyword is left in the lexeme
-- stream instead of being consumed.
--
-- * [primary]: the subparser.
--
-- * [keywords]: list of strings representing triggering keywords.
--
-- * Table-part entries can contain strings, and/or exactly one parser.
-- Strings are put in [keywords], and the parser is put in [primary].
--
-- After the call, the following fields will be set:
--
-- * [parse] the parsing method
-- * [kind] == "onkeyword"
-- * [primary]
-- * [keywords]
--
-------------------------------------------------------------------------------
function M.onkeyword (p)
M.make_parser ("onkeyword", p)
-------------------------------------------------------------------
-- Parsing method
-------------------------------------------------------------------
function p :parse (lx)
if lx :is_keyword (lx:peek(), unpack(self.keywords)) then
local fli = lx:lineinfo_right()
if not self.peek then lx:next() end
local content = self.primary (lx)
local lli = lx:lineinfo_left()
local li = content.lineinfo or { }
fli, lli = li.first or fli, li.last or lli
return transform (content, p, fli, lli)
else return false end
end
-------------------------------------------------------------------
-- Construction
-------------------------------------------------------------------
if not p.keywords then p.keywords = { } end
for _, x in ipairs(p) do
if type(x)=="string" then table.insert (p.keywords, x)
else assert (not p.primary and M.is_parser (x)); p.primary = x end
end
assert (next (p.keywords), "Missing trigger keyword in gg.onkeyword")
assert (p.primary, 'no primary parser in gg.onkeyword')
return p
end --</onkeyword>
-------------------------------------------------------------------------------
--
-- Optional keyword consummer pseudo-parser generator
--
-------------------------------------------------------------------------------
--
-- This doesn't return a real parser, just a function. That function parses
-- one of the keywords passed as parameters, and returns it. It returns
-- [false] if no matching keyword is found.
--
-- Notice that tokens returned by lexer already carry lineinfo, therefore
-- there's no need to add them, as done usually through transform() calls.
-------------------------------------------------------------------------------
function M.optkeyword (...)
local args = {...}
if type (args[1]) == "table" then
assert (#args == 1)
args = args[1]
end
for _, v in ipairs(args) do assert (type(v)=="string") end
return function (lx)
local x = lx:is_keyword (lx:peek(), unpack (args))
if x then lx:next(); return x
else return false end
end
end
-------------------------------------------------------------------------------
--
-- Run a parser with a special lexer
--
-------------------------------------------------------------------------------
--
-- This doesn't return a real parser, just a function.
-- First argument is the lexer class to be used with the parser,
-- 2nd is the parser itself.
-- The resulting parser returns whatever the argument parser does.
--
-------------------------------------------------------------------------------
function M.with_lexer(new_lexer, parser)
-------------------------------------------------------------------
-- Most gg functions take their parameters in a table, so it's
-- better to silently accept when with_lexer{ } is called with
-- its arguments in a list:
-------------------------------------------------------------------
if not parser and #new_lexer==2 and type(new_lexer[1])=='table' then
return M.with_lexer(unpack(new_lexer))
end
-------------------------------------------------------------------
-- Save the current lexer, switch it for the new one, run the parser,
-- restore the previous lexer, even if the parser caused an error.
-------------------------------------------------------------------
return function (lx)
local old_lexer = getmetatable(lx)
lx:sync()
setmetatable(lx, new_lexer)
local status, result = pcall(parser, lx)
lx:sync()
setmetatable(lx, old_lexer)
if status then return result else error(result) end
end
end
--------------------------------------------------------------------------------
--
-- Make sure a parser is used and returns successfully.
--
--------------------------------------------------------------------------------
function M.nonempty(primary)
local p = M.make_parser('non-empty list', { primary = primary, name=primary.name })
function p :parse (lx)
local fli = lx:lineinfo_right()
local content = self.primary (lx)
local lli = lx:lineinfo_left()
local li = content.lineinfo or { }
fli, lli = li.first or fli, li.last or lli
if #content == 0 then
M.parse_error (lx, "`%s' must not be empty.", self.name or "list")
else
return transform (content, self, fli, lli)
end
end
return p
end
local FUTURE_MT = { }
function FUTURE_MT:__tostring() return "<Proxy parser module>" end
function FUTURE_MT:__newindex(key, value) error "don't write in futures" end
function FUTURE_MT :__index (parser_name)
return function(...)
local p, m = rawget(self, '__path'), self.__module
if p then for _, name in ipairs(p) do
m=rawget(m, name)
if not m then error ("Submodule '"..name.."' undefined") end
end end
local f = rawget(m, parser_name)
if not f then error ("Parser '"..parser_name.."' undefined") end
return f(...)
end
end
function M.future(module, ...)
checks('table')
local path = ... and {...}
if path then for _, x in ipairs(path) do
assert(type(x)=='string', "Bad future arg")
end end
local self = { __module = module,
__path = path }
return setmetatable(self, FUTURE_MT)
end
return M

View File

@ -0,0 +1,672 @@
-------------------------------------------------------------------------------
-- Copyright (c) 2006-2013 Fabien Fleutot and others.
--
-- All rights reserved.
--
-- This program and the accompanying materials are made available
-- under the terms of the Eclipse Public License v1.0 which
-- accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- This program and the accompanying materials are also made available
-- under the terms of the MIT public license which accompanies this
-- distribution, and is available at http://www.lua.org/license.html
--
-- Contributors:
-- Fabien Fleutot - API and implementation
--
-------------------------------------------------------------------------------
local checks = require 'checks'
local M = { }
local lexer = { alpha={ }, sym={ } }
lexer.__index=lexer
lexer.__type='lexer.stream'
M.lexer = lexer
local debugf = function() end
-- local debugf=printf
----------------------------------------------------------------------
-- Some locale settings produce bad results, e.g. French locale
-- expect float numbers to use commas instead of periods.
-- TODO: change number parser into something loclae-independent,
-- locales are nasty.
----------------------------------------------------------------------
os.setlocale('C')
local MT = { }
M.metatables=MT
----------------------------------------------------------------------
-- Create a new metatable, for a new class of objects.
----------------------------------------------------------------------
local function new_metatable(name)
local mt = { __type = 'lexer.'..name };
mt.__index = mt
MT[name] = mt
end
----------------------------------------------------------------------
-- Position: represent a point in a source file.
----------------------------------------------------------------------
new_metatable 'position'
local position_idx=1
function M.new_position(line, column, offset, source)
checks('number', 'number', 'number', 'string')
local id = position_idx; position_idx = position_idx+1
return setmetatable({line=line, column=column, offset=offset,
source=source, id=id}, MT.position)
end
function MT.position :__tostring()
return string.format("<%s%s|L%d|C%d|K%d>",
self.comments and "C|" or "",
self.source, self.line, self.column, self.offset)
end
----------------------------------------------------------------------
-- Position factory: convert offsets into line/column/offset positions.
----------------------------------------------------------------------
new_metatable 'position_factory'
function M.new_position_factory(src, src_name)
-- assert(type(src)=='string')
-- assert(type(src_name)=='string')
local lines = { 1 }
for offset in src :gmatch '\n()' do table.insert(lines, offset) end
local max = #src+1
table.insert(lines, max+1) -- +1 includes Eof
return setmetatable({ src_name=src_name, line2offset=lines, max=max },
MT.position_factory)
end
function MT.position_factory :get_position (offset)
-- assert(type(offset)=='number')
assert(offset<=self.max)
local line2offset = self.line2offset
local left = self.last_left or 1
if offset<line2offset[left] then left=1 end
local right = left+1
if line2offset[right]<=offset then right = right+1 end
if line2offset[right]<=offset then right = #line2offset end
while true do
-- print (" trying lines "..left.."/"..right..", offsets "..line2offset[left]..
-- "/"..line2offset[right].." for offset "..offset)
-- assert(line2offset[left]<=offset)
-- assert(offset<line2offset[right])
-- assert(left<right)
if left+1==right then break end
local middle = math.floor((left+right)/2)
if line2offset[middle]<=offset then left=middle else right=middle end
end
-- assert(left+1==right)
-- printf("found that offset %d is between %d and %d, hence on line %d",
-- offset, line2offset[left], line2offset[right], left)
local line = left
local column = offset - line2offset[line] + 1
self.last_left = left
return M.new_position(line, column, offset, self.src_name)
end
----------------------------------------------------------------------
-- Lineinfo: represent a node's range in a source file;
-- embed information about prefix and suffix comments.
----------------------------------------------------------------------
new_metatable 'lineinfo'
function M.new_lineinfo(first, last)
checks('lexer.position', 'lexer.position')
return setmetatable({first=first, last=last}, MT.lineinfo)
end
function MT.lineinfo :__tostring()
local fli, lli = self.first, self.last
local line = fli.line; if line~=lli.line then line =line ..'-'..lli.line end
local column = fli.column; if column~=lli.column then column=column..'-'..lli.column end
local offset = fli.offset; if offset~=lli.offset then offset=offset..'-'..lli.offset end
return string.format("<%s%s|L%s|C%s|K%s%s>",
fli.comments and "C|" or "",
fli.source, line, column, offset,
lli.comments and "|C" or "")
end
----------------------------------------------------------------------
-- Token: atomic Lua language element, with a category, a content,
-- and some lineinfo relating it to its original source.
----------------------------------------------------------------------
new_metatable 'token'
function M.new_token(tag, content, lineinfo)
--printf("TOKEN `%s{ %q, lineinfo = %s} boundaries %d, %d",
-- tag, content, tostring(lineinfo), lineinfo.first.id, lineinfo.last.id)
return setmetatable({tag=tag, lineinfo=lineinfo, content}, MT.token)
end
function MT.token :__tostring()
--return string.format("`%s{ %q, %s }", self.tag, self[1], tostring(self.lineinfo))
return string.format("`%s %q", self.tag, self[1])
end
----------------------------------------------------------------------
-- Comment: series of comment blocks with associated lineinfo.
-- To be attached to the tokens just before and just after them.
----------------------------------------------------------------------
new_metatable 'comment'
function M.new_comment(lines)
local first = lines[1].lineinfo.first
local last = lines[#lines].lineinfo.last
local lineinfo = M.new_lineinfo(first, last)
return setmetatable({lineinfo=lineinfo, unpack(lines)}, MT.comment)
end
function MT.comment :text()
local last_line = self[1].lineinfo.last.line
local acc = { }
for i, line in ipairs(self) do
local nreturns = line.lineinfo.first.line - last_line
table.insert(acc, ("\n"):rep(nreturns))
table.insert(acc, line[1])
end
return table.concat(acc)
end
function M.new_comment_line(text, lineinfo, nequals)
checks('string', 'lexer.lineinfo', '?number')
return { lineinfo = lineinfo, text, nequals }
end
----------------------------------------------------------------------
-- Patterns used by [lexer :extract] to decompose the raw string into
-- correctly tagged tokens.
----------------------------------------------------------------------
lexer.patterns = {
spaces = "^[ \r\n\t]*()",
short_comment = "^%-%-([^\n]*)\n?()",
--final_short_comment = "^%-%-([^\n]*)()$",
long_comment = "^%-%-%[(=*)%[\n?(.-)%]%1%]()",
long_string = "^%[(=*)%[\n?(.-)%]%1%]()",
number_mantissa = { "^%d+%.?%d*()", "^%d*%.%d+()" },
number_mantissa_hex = { "^%x+%.?%x*()", "^%x*%.%x+()" }, --Lua5.1 and Lua5.2
number_exponant = "^[eE][%+%-]?%d+()",
number_exponant_hex = "^[pP][%+%-]?%d+()", --Lua5.2
number_hex = "^0[xX]()",
word = "^([%a_][%w_]*)()"
}
----------------------------------------------------------------------
-- unescape a whole string, applying [unesc_digits] and
-- [unesc_letter] as many times as required.
----------------------------------------------------------------------
local function unescape_string (s)
-- Turn the digits of an escape sequence into the corresponding
-- character, e.g. [unesc_digits("123") == string.char(123)].
local function unesc_digits (backslashes, digits)
if #backslashes%2==0 then
-- Even number of backslashes, they escape each other, not the digits.
-- Return them so that unesc_letter() can treat them
return backslashes..digits
else
-- Remove the odd backslash, which escapes the number sequence.
-- The rest will be returned and parsed by unesc_letter()
backslashes = backslashes :sub (1,-2)
end
local k, j, i = digits :reverse() :byte(1, 3)
local z = string.byte "0"
local code = (k or z) + 10*(j or z) + 100*(i or z) - 111*z
if code > 255 then
error ("Illegal escape sequence '\\"..digits..
"' in string: ASCII codes must be in [0..255]")
end
local c = string.char (code)
if c == '\\' then c = '\\\\' end -- parsed by unesc_letter (test: "\092b" --> "\\b")
return backslashes..c
end
-- Turn hex digits of escape sequence into char.
local function unesc_hex(backslashes, digits)
if #backslashes%2==0 then
return backslashes..'x'..digits
else
backslashes = backslashes :sub (1,-2)
end
local c = string.char(tonumber(digits,16))
if c == '\\' then c = '\\\\' end -- parsed by unesc_letter (test: "\x5cb" --> "\\b")
return backslashes..c
end
-- Handle Lua 5.2 \z sequences
local function unesc_z(backslashes, more)
if #backslashes%2==0 then
return backslashes..more
else
return backslashes :sub (1,-2)
end
end
-- Take a letter [x], and returns the character represented by the
-- sequence ['\\'..x], e.g. [unesc_letter "n" == "\n"].
local function unesc_letter(x)
local t = {
a = "\a", b = "\b", f = "\f",
n = "\n", r = "\r", t = "\t", v = "\v",
["\\"] = "\\", ["'"] = "'", ['"'] = '"', ["\n"] = "\n" }
return t[x] or x
end
s = s: gsub ("(\\+)(z%s*)", unesc_z) -- Lua 5.2
s = s: gsub ("(\\+)([0-9][0-9]?[0-9]?)", unesc_digits)
s = s: gsub ("(\\+)x([0-9a-fA-F][0-9a-fA-F])", unesc_hex) -- Lua 5.2
s = s: gsub ("\\(%D)",unesc_letter)
return s
end
lexer.extractors = {
"extract_long_comment", "extract_short_comment",
"extract_short_string", "extract_word", "extract_number",
"extract_long_string", "extract_symbol" }
----------------------------------------------------------------------
-- Really extract next token from the raw string
-- (and update the index).
-- loc: offset of the position just after spaces and comments
-- previous_i: offset in src before extraction began
----------------------------------------------------------------------
function lexer :extract ()
local attached_comments = { }
local function gen_token(...)
local token = M.new_token(...)
if #attached_comments>0 then -- attach previous comments to token
local comments = M.new_comment(attached_comments)
token.lineinfo.first.comments = comments
if self.lineinfo_last_extracted then
self.lineinfo_last_extracted.comments = comments
end
attached_comments = { }
end
token.lineinfo.first.facing = self.lineinfo_last_extracted
self.lineinfo_last_extracted.facing = assert(token.lineinfo.first)
self.lineinfo_last_extracted = assert(token.lineinfo.last)
return token
end
while true do -- loop until a non-comment token is found
-- skip whitespaces
self.i = self.src:match (self.patterns.spaces, self.i)
if self.i>#self.src then
local fli = self.posfact :get_position (#self.src+1)
local lli = self.posfact :get_position (#self.src+1) -- ok?
local tok = gen_token("Eof", "eof", M.new_lineinfo(fli, lli))
tok.lineinfo.last.facing = lli
return tok
end
local i_first = self.i -- loc = position after whitespaces
-- try every extractor until a token is found
for _, extractor in ipairs(self.extractors) do
local tag, content, xtra = self [extractor] (self)
if tag then
local fli = self.posfact :get_position (i_first)
local lli = self.posfact :get_position (self.i-1)
local lineinfo = M.new_lineinfo(fli, lli)
if tag=='Comment' then
local prev_comment = attached_comments[#attached_comments]
if not xtra -- new comment is short
and prev_comment and not prev_comment[2] -- prev comment is short
and prev_comment.lineinfo.last.line+1==fli.line then -- adjascent lines
-- concat with previous comment
prev_comment[1] = prev_comment[1].."\n"..content -- TODO quadratic, BAD!
prev_comment.lineinfo.last = lli
else -- accumulate comment
local comment = M.new_comment_line(content, lineinfo, xtra)
table.insert(attached_comments, comment)
end
break -- back to skipping spaces
else -- not a comment: real token, then
return gen_token(tag, content, lineinfo)
end -- if token is a comment
end -- if token found
end -- for each extractor
end -- while token is a comment
end -- :extract()
----------------------------------------------------------------------
-- Extract a short comment.
----------------------------------------------------------------------
function lexer :extract_short_comment()
-- TODO: handle final_short_comment
local content, j = self.src :match (self.patterns.short_comment, self.i)
if content then self.i=j; return 'Comment', content, nil end
end
----------------------------------------------------------------------
-- Extract a long comment.
----------------------------------------------------------------------
function lexer :extract_long_comment()
local equals, content, j = self.src:match (self.patterns.long_comment, self.i)
if j then self.i = j; return "Comment", content, #equals end
end
----------------------------------------------------------------------
-- Extract a '...' or "..." short string.
----------------------------------------------------------------------
function lexer :extract_short_string()
local k = self.src :sub (self.i,self.i) -- first char
if k~=[[']] and k~=[["]] then return end -- no match'
local i = self.i + 1
local j = i
while true do
local x,y; x, j, y = self.src :match ("([\\\r\n"..k.."])()(.?)", j) -- next interesting char
if x == '\\' then
if y == 'z' then -- Lua 5.2 \z
j = self.src :match ("^%s*()", j+1)
else
j=j+1 -- escaped char
end
elseif x == k then break -- end of string
else
assert (not x or x=='\r' or x=='\n')
return nil, 'Unterminated string'
end
end
self.i = j
return 'String', unescape_string (self.src :sub (i,j-2))
end
----------------------------------------------------------------------
-- Extract Id or Keyword.
----------------------------------------------------------------------
function lexer :extract_word()
local word, j = self.src:match (self.patterns.word, self.i)
if word then
self.i = j
return (self.alpha [word] and 'Keyword' or 'Id'), word
end
end
----------------------------------------------------------------------
-- Extract Number.
----------------------------------------------------------------------
function lexer :extract_number()
local j = self.src:match(self.patterns.number_hex, self.i)
if j then
j = self.src:match (self.patterns.number_mantissa_hex[1], j) or
self.src:match (self.patterns.number_mantissa_hex[2], j)
if j then
j = self.src:match (self.patterns.number_exponant_hex, j) or j
end
else
j = self.src:match (self.patterns.number_mantissa[1], self.i) or
self.src:match (self.patterns.number_mantissa[2], self.i)
if j then
j = self.src:match (self.patterns.number_exponant, j) or j
end
end
if not j then return end
-- Number found, interpret with tonumber() and return it
local str = self.src:sub (self.i, j-1)
-- :TODO: tonumber on Lua5.2 floating hex may or may not work on Lua5.1
local n = tonumber (str)
if not n then error(str.." is not a valid number according to tonumber()") end
self.i = j
return 'Number', n
end
----------------------------------------------------------------------
-- Extract long string.
----------------------------------------------------------------------
function lexer :extract_long_string()
local _, content, j = self.src :match (self.patterns.long_string, self.i)
if j then self.i = j; return 'String', content end
end
----------------------------------------------------------------------
-- Extract symbol.
----------------------------------------------------------------------
function lexer :extract_symbol()
local k = self.src:sub (self.i,self.i)
local symk = self.sym [k] -- symbols starting with `k`
if not symk then
self.i = self.i + 1
return 'Keyword', k
end
for _, sym in pairs (symk) do
if sym == self.src:sub (self.i, self.i + #sym - 1) then
self.i = self.i + #sym
return 'Keyword', sym
end
end
self.i = self.i+1
return 'Keyword', k
end
----------------------------------------------------------------------
-- Add a keyword to the list of keywords recognized by the lexer.
----------------------------------------------------------------------
function lexer :add (w, ...)
assert(not ..., "lexer :add() takes only one arg, although possibly a table")
if type (w) == "table" then
for _, x in ipairs (w) do self :add (x) end
else
if w:match (self.patterns.word .. "$") then self.alpha [w] = true
elseif w:match "^%p%p+$" then
local k = w:sub(1,1)
local list = self.sym [k]
if not list then list = { }; self.sym [k] = list end
table.insert (list, w)
elseif w:match "^%p$" then return
else error "Invalid keyword" end
end
end
----------------------------------------------------------------------
-- Return the [n]th next token, without consuming it.
-- [n] defaults to 1. If it goes pass the end of the stream, an EOF
-- token is returned.
----------------------------------------------------------------------
function lexer :peek (n)
if not n then n=1 end
if n > #self.peeked then
for i = #self.peeked+1, n do
self.peeked [i] = self :extract()
end
end
return self.peeked [n]
end
----------------------------------------------------------------------
-- Return the [n]th next token, removing it as well as the 0..n-1
-- previous tokens. [n] defaults to 1. If it goes pass the end of the
-- stream, an EOF token is returned.
----------------------------------------------------------------------
function lexer :next (n)
n = n or 1
self :peek (n)
local a
for i=1,n do
a = table.remove (self.peeked, 1)
-- TODO: is this used anywhere? I think not. a.lineinfo.last may be nil.
--self.lastline = a.lineinfo.last.line
end
self.lineinfo_last_consumed = a.lineinfo.last
return a
end
----------------------------------------------------------------------
-- Returns an object which saves the stream's current state.
----------------------------------------------------------------------
-- FIXME there are more fields than that to save
function lexer :save () return { self.i; {unpack(self.peeked) } } end
----------------------------------------------------------------------
-- Restore the stream's state, as saved by method [save].
----------------------------------------------------------------------
-- FIXME there are more fields than that to restore
function lexer :restore (s) self.i=s[1]; self.peeked=s[2] end
----------------------------------------------------------------------
-- Resynchronize: cancel any token in self.peeked, by emptying the
-- list and resetting the indexes
----------------------------------------------------------------------
function lexer :sync()
local p1 = self.peeked[1]
if p1 then
local li_first = p1.lineinfo.first
if li_first.comments then li_first=li_first.comments.lineinfo.first end
self.i = li_first.offset
self.column_offset = self.i - li_first.column
self.peeked = { }
self.attached_comments = p1.lineinfo.first.comments or { }
end
end
----------------------------------------------------------------------
-- Take the source and offset of an old lexer.
----------------------------------------------------------------------
function lexer :takeover(old)
self :sync(); old :sync()
for _, field in ipairs{ 'i', 'src', 'attached_comments', 'posfact' } do
self[field] = old[field]
end
return self
end
----------------------------------------------------------------------
-- Return the current position in the sources. This position is between
-- two tokens, and can be within a space / comment area, and therefore
-- have a non-null width. :lineinfo_left() returns the beginning of the
-- separation area, :lineinfo_right() returns the end of that area.
--
-- ____ last consummed token ____ first unconsummed token
-- / /
-- XXXXX <spaces and comments> YYYYY
-- \____ \____
-- :lineinfo_left() :lineinfo_right()
----------------------------------------------------------------------
function lexer :lineinfo_right()
return self :peek(1).lineinfo.first
end
function lexer :lineinfo_left()
return self.lineinfo_last_consumed
end
----------------------------------------------------------------------
-- Create a new lexstream.
----------------------------------------------------------------------
function lexer :newstream (src_or_stream, name)
name = name or "?"
if type(src_or_stream)=='table' then -- it's a stream
return setmetatable ({ }, self) :takeover (src_or_stream)
elseif type(src_or_stream)=='string' then -- it's a source string
local src = src_or_stream
local pos1 = M.new_position(1, 1, 1, name)
local stream = {
src_name = name; -- Name of the file
src = src; -- The source, as a single string
peeked = { }; -- Already peeked, but not discarded yet, tokens
i = 1; -- Character offset in src
attached_comments = { },-- comments accumulator
lineinfo_last_extracted = pos1,
lineinfo_last_consumed = pos1,
posfact = M.new_position_factory (src_or_stream, name)
}
setmetatable (stream, self)
-- Skip initial sharp-bang for Unix scripts
-- FIXME: redundant with mlp.chunk()
if src and src :match "^#!" then
local endofline = src :find "\n"
stream.i = endofline and (endofline + 1) or #src
end
return stream
else
assert(false, ":newstream() takes a source string or a stream, not a "..
type(src_or_stream))
end
end
----------------------------------------------------------------------
-- If there's no ... args, return the token a (whose truth value is
-- true) if it's a `Keyword{ }, or nil. If there are ... args, they
-- have to be strings. if the token a is a keyword, and it's content
-- is one of the ... args, then returns it (it's truth value is
-- true). If no a keyword or not in ..., return nil.
----------------------------------------------------------------------
function lexer :is_keyword (a, ...)
if not a or a.tag ~= "Keyword" then return false end
local words = {...}
if #words == 0 then return a[1] end
for _, w in ipairs (words) do
if w == a[1] then return w end
end
return false
end
----------------------------------------------------------------------
-- Cause an error if the next token isn't a keyword whose content
-- is listed among ... args (which have to be strings).
----------------------------------------------------------------------
function lexer :check (...)
local words = {...}
local a = self :next()
local function err ()
error ("Got " .. tostring (a) ..
", expected one of these keywords : '" ..
table.concat (words,"', '") .. "'") end
if not a or a.tag ~= "Keyword" then err () end
if #words == 0 then return a[1] end
for _, w in ipairs (words) do
if w == a[1] then return w end
end
err ()
end
----------------------------------------------------------------------
--
----------------------------------------------------------------------
function lexer :clone()
local alpha_clone, sym_clone = { }, { }
for word in pairs(self.alpha) do alpha_clone[word]=true end
for letter, list in pairs(self.sym) do sym_clone[letter] = { unpack(list) } end
local clone = { alpha=alpha_clone, sym=sym_clone }
setmetatable(clone, self)
clone.__index = clone
return clone
end
----------------------------------------------------------------------
-- Cancel everything left in a lexer, all subsequent attempts at
-- `:peek()` or `:next()` will return `Eof`.
----------------------------------------------------------------------
function lexer :kill()
self.i = #self.src+1
self.peeked = { }
self.attached_comments = { }
self.lineinfo_last = self.posfact :get_position (#self.src+1)
end
return M

Some files were not shown because too many files have changed in this diff Show More