This is the fourth part of YAUL series. For your convenience you can find other parts in the table of contents in Part 1 — Introduction

Today we are going to take a look at the basics of compilation process.

AST

In order to execute code we are going to parse source code using PLY and generate AST. Next, we are going to parse this AST and generate lambdas. Let’s take the following code:

function isPrime(number){
	temp = 2;
	while(temp < number){
		if(number % temp == 0){
			return 0;
		}
		temp = temp + 1;
		if(temp >= number){
			break;
		}
	}

	return 1;
}

function checkPrimes(low, up){
	temp = low;
	while(temp <= up){
		if(isPrime(temp) == 1){
			print temp;
		}
		temp = temp + 1;
		continue;
	}
}



function doWork(){
	checkPrimes(2,100);
	jump target!;
target!
	return;
}


doWork();

This code does nothing important, however, it uses almost all features of our language. The AST for this application is:

FunctionStatementBlock = {
	"Statements" : [
		ProcedureCall = {
			"FunctionCall" : FunctionCall = {
				"Name" : "doWork",
				"Arguments" : []
			}
		}
	],
	"Functions" : [
		FunctionDefinition = {
			"Body" : StatementBlock = {
				"Statements" : [
					Assignment = {
						"VariableName" : "temp",
						"ValueExpression" : ConstantExpression = {
							"Value" : 2
						}
					}, 
					WhileLoop = {
						"Condition" : BinaryOperation = {
							"Left" : VariableDereference = {
								"Name" : "temp"
							},
							"Right" : VariableDereference = {
								"Name" : "number"
							},
							"Sign" : "<"
						},
						"Body" : StatementBlock = {
							"Statements" : [
								If = {
									"Condition" : BinaryOperation = {
										"Left" : BinaryOperation = {
											"Left" : VariableDereference = {
												"Name" : "number"
											},
											"Right" : VariableDereference = {
												"Name" : "temp"
											},
											"Sign" : "%"
										},
										"Right" : ConstantExpression = {
											"Value" : 0
										},
										"Sign" : "=="
									},
									"TrueBlock" : StatementBlock = {
										"Statements" : [
											Return = {
												"ReturnedValue" : {
													"Value" : 0
												}
											}
										]
									},
									"FalseBlock" : StatementBlock = {
										"Statements" : null
									}
								}, 
								Assignment = {
									"VariableName" : "temp",
									"ValueExpression" : BinaryOperation = {
										"Left" : VariableDereference = {
											"Name" : "temp"
										},
										"Right" : ConstantExpression = {
											"Value" : 1
										},
										"Sign" : "+"
									}
								}, 
								If = {
									"Condition" : BinaryOperation = {
										"Left" : VariableDereference = {
											"Name" : "temp"
										},
										"Right" : VariableDereference = {
											"Name" : "number"
										},
										"Sign" : ">="
									},
									"TrueBlock" : StatementBlock = {
										"Statements" : [
											CompilerBreak = {
											}
										]
									},
									"FalseBlock" : StatementBlock = {
										"Statements" : null
									}
								}
							]
						}
					}, 
					Return = {
						"ReturnedValue" : {
							"Value" : 1
						}
					}
				]
			},
			"Parameters" : [
				Parameter = {
					"Name" : "number"
				}
			],
			"Name" : "isPrime"
		}, 
		FunctionDefinition = {
			"Body" : StatementBlock = {
				"Statements" : [
					Assignment = {
						"VariableName" : "temp",
						"ValueExpression" : VariableDereference = {
							"Name" : "low"
						}
					}, 
					WhileLoop = {
						"Condition" : BinaryOperation = {
							"Left" : VariableDereference = {
								"Name" : "temp"
							},
							"Right" : VariableDereference = {
								"Name" : "up"
							},
							"Sign" : "<="
						},
						"Body" : StatementBlock = {
							"Statements" : [
								If = {
									"Condition" : BinaryOperation = {
										"Left" : FunctionCall = {
											"Name" : "isPrime",
											"Arguments" : [
												VariableDereference = {
													"Name" : "temp"
												}
											]
										},
										"Right" : ConstantExpression = {
											"Value" : 1
										},
										"Sign" : "=="
									},
									"TrueBlock" : StatementBlock = {
										"Statements" : [
											ProcedureCall = {
												"FunctionCall" : FunctionCall = {
													"Name" : "_print",
													"Arguments" : [
														VariableDereference = {
															"Name" : "temp"
														}
													]
												}
											}
										]
									},
									"FalseBlock" : StatementBlock = {
										"Statements" : null
									}
								}, 
								Assignment = {
									"VariableName" : "temp",
									"ValueExpression" : BinaryOperation {
										"Left" : VariableDereference = {
											"Name" : "temp"
										},
										"Right" : ConstantExpression = {
											"Value" : 1
										},
										"Sign" : "+"
									}
								}, 
								Continue = {}
							]
						}
					}
				]
			},
			"Parameters" : [
				Parameter = {
					"Name" : "low"
				}, 
				Parameter = {
					"Name" : "up"
				}
			],
			"Name" : "checkPrimes"
		}, 
		FunctionDefinition = {
			"Body" : StatementBlock = {
				"Statements" : [
					ProcedureCall = {
						"FunctionCall" : FunctionCall = {
							"Name" : "checkPrimes",
							"Arguments" : [
								ConstantExpression = {
									"Value" : 2
								}, 
								ConstantExpression = {
									"Value" : 100
								}
							]
						}
					}, 
					Jump = {
						"Target" : Label = {
							"Name" : "target",
							"Target" : null
						}
					}, 
					Label = {
						"Name" : "target",
						"Target" : null
					}, 
					Return = {
						"ReturnedValue" : null
					}
				]
			},
			"Parameters" : [],
			"Name" : "doWork"
		}
	]
}

Lots of code for such a simple application. However, if you try to decipher this structure you will see that it indeed represents whole application.

We will generate this AST using PLY. Having this AST we could easily try to execute it by writing custom processor — we would need to track instruction pointer, variables' states, call stacks and all that stuff. But in order to generate lambdas we will use visitor pattern to traverse the tree and generate the code.

Compiler application

Let's write some C# code. We start with the following:

using Compiler;
using System.IO;

namespace YetAnotherUselessLanguage
{
    class Program
    {
        static void Main(string[] args)
        {
            var parser = new Parser();
            var tree = parser.Parse(File.ReadAllText(args[0]));
            var scriptExpression = YaulCompiler.CreateExpressionFromAST(tree);
            YaulCompiler.ExecuteScript(scriptExpression);
        }
    }
}

This simple application accepts one command line argument — the path to the source code file. After reading the filwe, we call Parser's method to parse the code (we will implement Parser later in this series), next we call Compiler's methods to create lambdas and execute the script. Here is the Compiler class:

using System;
using System.Linq.Expressions;
using System.Reflection;
using System.Reflection.Emit;

namespace Compiler
{
    public static class YaulCompiler
    {
        public static Expression CreateExpressionFromAST(FunctionStatementBlock astTree)
        {
            return astTree.Accept(new ExpressionGenerator());
        }

        public static void ExecuteScript(Expression expression)
        {
            Expression.Lambda< Action>(expression).Compile()();
        }

        public static SimpleObject ConstructSimpleObject(int value)
        {
            return new SimpleObject(value);
        }

        public static SimpleObject ConstructSimpleObject(string value)
        {
            return new SimpleObject(value);
        }
    }
}

Pretty simple. We create ExpressionGenerator and execute Accept method. ExpressionGenerator visitor is pretty simple:

using System.Collections.Generic;
using System.Linq.Expressions;
using LExpression = System.Linq.Expressions.Expression;

namespace Compiler
{
    public interface IVisitor
    {
        IList< Variable> LocalVariables { get; set; }
        
        IList< PreparedFunction> Functions { get; set; }

        IList< LExpression> BreakExpressions { get; }
        IList< LExpression> ContinueExpressions { get; }
        IList< LabelTarget> Labels { get; set; }

        LabelTarget ReturnLabel { get; set; }
    }
}
using System.Collections.Generic;
using System.Linq.Expressions;
using LExpression = System.Linq.Expressions.Expression;

namespace Compiler
{

    class ExpressionGenerator : IVisitor
    {
        public IList< Variable> LocalVariables { get; set; }
        public IList< PreparedFunction> Functions { get; set; }
        public IList< LExpression> BreakExpressions { get; private set; }
        public IList< LExpression> ContinueExpressions { get; private set; }
        public IList< LabelTarget> Labels { get; set; }
        public LabelTarget ReturnLabel { get; set; }

        public ExpressionGenerator()
        {
            LocalVariables = new List< Variable>();
            LocalVariables = new List< Variable>();
            BreakExpressions = new List< LExpression>();
            ContinueExpressions = new List< LExpression>();
            Labels = new List< LabelTarget>();
        }
    }
}

Our visitor will be used to hold some of parsed instructions. We will use it to maintain local variables, nested blocks, labels etc.

Next time we are going to handle variables declarations and assignments.