This is the fourth part of the YAUL series. For your convenience you can find other parts in the table of contents in Part 1 — Introduction
Today we are going to take a look at the basics of compilation process.
AST
In order to execute code we are going to parse source code using PLY and generate AST. Next, we are going to parse this AST and generate lambdas. Let’s take the following code:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
function isPrime(number){ temp = 2; while(temp < number){ if(number % temp == 0){ return 0; } temp = temp + 1; if(temp >= number){ break; } } return 1; } function checkPrimes(low, up){ temp = low; while(temp <= up){ if(isPrime(temp) == 1){ print temp; } temp = temp + 1; continue; } } function doWork(){ checkPrimes(2,100); jump target!; target! return; } doWork(); |
This code does nothing important, however, it uses almost all features of our language. The AST for this application is:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
FunctionStatementBlock = { "Statements" : [ ProcedureCall = { "FunctionCall" : FunctionCall = { "Name" : "doWork", "Arguments" : [] } } ], "Functions" : [ FunctionDefinition = { "Body" : StatementBlock = { "Statements" : [ Assignment = { "VariableName" : "temp", "ValueExpression" : ConstantExpression = { "Value" : 2 } }, WhileLoop = { "Condition" : BinaryOperation = { "Left" : VariableDereference = { "Name" : "temp" }, "Right" : VariableDereference = { "Name" : "number" }, "Sign" : "<" }, "Body" : StatementBlock = { "Statements" : [ If = { "Condition" : BinaryOperation = { "Left" : BinaryOperation = { "Left" : VariableDereference = { "Name" : "number" }, "Right" : VariableDereference = { "Name" : "temp" }, "Sign" : "%" }, "Right" : ConstantExpression = { "Value" : 0 }, "Sign" : "==" }, "TrueBlock" : StatementBlock = { "Statements" : [ Return = { "ReturnedValue" : { "Value" : 0 } } ] }, "FalseBlock" : StatementBlock = { "Statements" : null } }, Assignment = { "VariableName" : "temp", "ValueExpression" : BinaryOperation = { "Left" : VariableDereference = { "Name" : "temp" }, "Right" : ConstantExpression = { "Value" : 1 }, "Sign" : "+" } }, If = { "Condition" : BinaryOperation = { "Left" : VariableDereference = { "Name" : "temp" }, "Right" : VariableDereference = { "Name" : "number" }, "Sign" : ">=" }, "TrueBlock" : StatementBlock = { "Statements" : [ CompilerBreak = { } ] }, "FalseBlock" : StatementBlock = { "Statements" : null } } ] } }, Return = { "ReturnedValue" : { "Value" : 1 } } ] }, "Parameters" : [ Parameter = { "Name" : "number" } ], "Name" : "isPrime" }, FunctionDefinition = { "Body" : StatementBlock = { "Statements" : [ Assignment = { "VariableName" : "temp", "ValueExpression" : VariableDereference = { "Name" : "low" } }, WhileLoop = { "Condition" : BinaryOperation = { "Left" : VariableDereference = { "Name" : "temp" }, "Right" : VariableDereference = { "Name" : "up" }, "Sign" : "<=" }, "Body" : StatementBlock = { "Statements" : [ If = { "Condition" : BinaryOperation = { "Left" : FunctionCall = { "Name" : "isPrime", "Arguments" : [ VariableDereference = { "Name" : "temp" } ] }, "Right" : ConstantExpression = { "Value" : 1 }, "Sign" : "==" }, "TrueBlock" : StatementBlock = { "Statements" : [ ProcedureCall = { "FunctionCall" : FunctionCall = { "Name" : "_print", "Arguments" : [ VariableDereference = { "Name" : "temp" } ] } } ] }, "FalseBlock" : StatementBlock = { "Statements" : null } }, Assignment = { "VariableName" : "temp", "ValueExpression" : BinaryOperation { "Left" : VariableDereference = { "Name" : "temp" }, "Right" : ConstantExpression = { "Value" : 1 }, "Sign" : "+" } }, Continue = {} ] } } ] }, "Parameters" : [ Parameter = { "Name" : "low" }, Parameter = { "Name" : "up" } ], "Name" : "checkPrimes" }, FunctionDefinition = { "Body" : StatementBlock = { "Statements" : [ ProcedureCall = { "FunctionCall" : FunctionCall = { "Name" : "checkPrimes", "Arguments" : [ ConstantExpression = { "Value" : 2 }, ConstantExpression = { "Value" : 100 } ] } }, Jump = { "Target" : Label = { "Name" : "target", "Target" : null } }, Label = { "Name" : "target", "Target" : null }, Return = { "ReturnedValue" : null } ] }, "Parameters" : [], "Name" : "doWork" } ] } |
Lots of code for such a simple application. However, if you try to decipher this structure you will see that it indeed represents whole application.
We will generate this AST using PLY. Having this AST we could easily try to execute it by writing custom processor — we would need to track instruction pointer, variables’ states, call stacks and all that stuff. But in order to generate lambdas we will use visitor pattern to traverse the tree and generate the code.
Compiler application
Let’s write some C# code. We start with the following:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
using Compiler; using System.IO; namespace YetAnotherUselessLanguage { class Program { static void Main(string[] args) { var parser = new Parser(); var tree = parser.Parse(File.ReadAllText(args[0])); var scriptExpression = YaulCompiler.CreateExpressionFromAST(tree); YaulCompiler.ExecuteScript(scriptExpression); } } } |
This simple application accepts one command line argument — the path to the source code file. After reading the filwe, we call Parser
‘s method to parse the code (we will implement Parser
later in this series), next we call Compiler
‘s methods to create lambdas and execute the script. Here is the Compiler
class:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
using System; using System.Linq.Expressions; using System.Reflection; using System.Reflection.Emit; namespace Compiler { public static class YaulCompiler { public static Expression CreateExpressionFromAST(FunctionStatementBlock astTree) { return astTree.Accept(new ExpressionGenerator()); } public static void ExecuteScript(Expression expression) { Expression.Lambda< Action>(expression).Compile()(); } public static SimpleObject ConstructSimpleObject(int value) { return new SimpleObject(value); } public static SimpleObject ConstructSimpleObject(string value) { return new SimpleObject(value); } } } |
Pretty simple. We create ExpressionGenerator
and execute Accept
method. ExpressionGenerator
visitor is pretty simple:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
using System.Collections.Generic; using System.Linq.Expressions; using LExpression = System.Linq.Expressions.Expression; namespace Compiler { public interface IVisitor { IList< Variable> LocalVariables { get; set; } IList< PreparedFunction> Functions { get; set; } IList< LExpression> BreakExpressions { get; } IList< LExpression> ContinueExpressions { get; } IList< LabelTarget> Labels { get; set; } LabelTarget ReturnLabel { get; set; } } } |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
using System.Collections.Generic; using System.Linq.Expressions; using LExpression = System.Linq.Expressions.Expression; namespace Compiler { class ExpressionGenerator : IVisitor { public IList< Variable> LocalVariables { get; set; } public IList< PreparedFunction> Functions { get; set; } public IList< LExpression> BreakExpressions { get; private set; } public IList< LExpression> ContinueExpressions { get; private set; } public IList< LabelTarget> Labels { get; set; } public LabelTarget ReturnLabel { get; set; } public ExpressionGenerator() { LocalVariables = new List< Variable>(); LocalVariables = new List< Variable>(); BreakExpressions = new List< LExpression>(); ContinueExpressions = new List< LExpression>(); Labels = new List< LabelTarget>(); } } } |
Our visitor will be used to hold some of parsed instructions. We will use it to maintain local variables, nested blocks, labels etc.
Next time we are going to handle variables declarations and assignments.