compilers:erlang_to_llvm
Return to Home page
If you found any error, or if you want to partecipate to the editing of this wiki, please contact: admin [at] skenz.it
You can reuse, distribute or modify the content of this page, but you must cite in any document (or webpage) this url: https://www.skenz.it/compilers/erlang_to_llvm?do=diff&rev2%5B0%5D=1614772946&rev2%5B1%5D=1614773173&difftype=sidebyside
Differences
This shows you the differences between two versions of the page.
— | compilers:erlang_to_llvm [2024/04/08 22:34] (current) – created - external edit 127.0.0.1 | ||
---|---|---|---|
Line 1: | Line 1: | ||
+ | ====== From Erlang to LLVM ====== | ||
+ | ===== Abstract ===== | ||
+ | The project goal was to develop a compiler for a subset of Erlang (miniErlang) that generates LLVM IR in human-readable format, built using JFlex and CUP. | ||
+ | ===== Results achieved ===== | ||
+ | * Implementation of a mini virtual machine (Erlang is based on the [[https:// | ||
+ | * BIFs (built-in functions): | ||
+ | * '' | ||
+ | * '' | ||
+ | * '' | ||
+ | * '' | ||
+ | * '' | ||
+ | * '' | ||
+ | * '' | ||
+ | * '' | ||
+ | * '' | ||
+ | * '' | ||
+ | * '' | ||
+ | * '' | ||
+ | * '' | ||
+ | * '' | ||
+ | * '' | ||
+ | * Other functions: | ||
+ | * '' | ||
+ | * '' | ||
+ | * '' | ||
+ | * Language | ||
+ | * Terms: Number, List, Atom, Boolean | ||
+ | * Expressions: | ||
+ | * Arithmetic operations | ||
+ | * Sum (e.g. '' | ||
+ | * Subtraction (e.g. '' | ||
+ | * Sign inversion (e.g. '' | ||
+ | * Multiplication (e.g. '' | ||
+ | * Division (e.g. '' | ||
+ | * Integer division (e.g. '' | ||
+ | * Modulo (e.g. '' | ||
+ | * Comparison operations | ||
+ | * Exactly equal (e.g. '' | ||
+ | * Equal (e.g. '' | ||
+ | * Exactly not equal (e.g. '' | ||
+ | * Not equal (e.g. '' | ||
+ | * Less than (e.g. '' | ||
+ | * Less than or equal (e.g. '' | ||
+ | * Greater than (e.g. '' | ||
+ | * Greater than or equal (e.g. '' | ||
+ | * Boolean operations | ||
+ | * And (e.g. '' | ||
+ | * Or (e.g. '' | ||
+ | * Xor (e.g. '' | ||
+ | * Not (e.g. '' | ||
+ | * List append (e.g. '' | ||
+ | * Pattern Matching: | ||
+ | * Single variable | ||
+ | * List based (Only available using the match operator, not in functions) | ||
+ | * Function structures (one parameter): | ||
+ | * Function clauses | ||
+ | * Function clauses with guards | ||
+ | |||
+ | ===== miniErlang VM ===== | ||
+ | The miniErlang VM is written in C++, it implements the basic data item construct used in every operation, BIFs (Built-in Functions) and functions needed to support Erlang language features, such as the fact that is dynamically typed. | ||
+ | The miniErlang VM is stored in the project folder in two formats: C++ for compiler development, | ||
+ | The code that implements it is very verbose (results in 1k LOC) due to the many error handling conditions, for this reason only the most interesting aspects will be reported. | ||
+ | |||
+ | ==== Literal Struct ==== | ||
+ | It is the basic data item object, it handles typing and any operation that can be performed with basic data item. | ||
+ | |||
+ | Following, a small part of the implementation that should give an idea of how it works: | ||
+ | <code C++[enable_line_numbers=" | ||
+ | typedef enum | ||
+ | { | ||
+ | Integer, | ||
+ | Float, | ||
+ | List, | ||
+ | Atom, | ||
+ | Undefined, | ||
+ | Boolean | ||
+ | } LiteralType; | ||
+ | |||
+ | typedef struct Literal | ||
+ | { | ||
+ | LiteralType type = Undefined; | ||
+ | void *ptr = nullptr; | ||
+ | |||
+ | Literal(int value) : type(Integer), | ||
+ | { | ||
+ | } | ||
+ | Literal(double value) : type(Float), | ||
+ | Literal(size_t value) : type(Atom), ptr(new size_t(value)) {} | ||
+ | Literal(bool value) : type(Boolean), | ||
+ | Literal(list< | ||
+ | { | ||
+ | if (value.size() == 0) | ||
+ | { | ||
+ | ptr = nullptr; | ||
+ | } | ||
+ | else | ||
+ | { | ||
+ | if (value.begin()-> | ||
+ | { | ||
+ | error(" | ||
+ | } | ||
+ | |||
+ | ptr = new pair< | ||
+ | } | ||
+ | } | ||
+ | Literal(Literal head, Literal tail) : type(List) | ||
+ | { | ||
+ | if (head.type == Undefined || tail.type == Undefined) | ||
+ | { | ||
+ | error(" | ||
+ | } | ||
+ | ptr = new pair< | ||
+ | } | ||
+ | Literal() : type(Undefined) {} | ||
+ | |||
+ | /* Copy constructor */ | ||
+ | Literal(const Literal &a) | ||
+ | { | ||
+ | this-> | ||
+ | switch (this-> | ||
+ | { | ||
+ | case Integer: | ||
+ | this-> | ||
+ | break; | ||
+ | case Float: | ||
+ | this-> | ||
+ | break; | ||
+ | case List: | ||
+ | this-> | ||
+ | break; | ||
+ | case Atom: | ||
+ | this-> | ||
+ | break; | ||
+ | case Boolean: | ||
+ | this-> | ||
+ | break; | ||
+ | case Undefined: | ||
+ | error(" | ||
+ | } | ||
+ | } | ||
+ | |||
+ | void match(const Literal & | ||
+ | { | ||
+ | if (this-> | ||
+ | { | ||
+ | this-> | ||
+ | switch (this-> | ||
+ | { | ||
+ | case Integer: | ||
+ | this-> | ||
+ | break; | ||
+ | case Float: | ||
+ | this-> | ||
+ | break; | ||
+ | case List: | ||
+ | this-> | ||
+ | |||
+ | break; | ||
+ | case Atom: | ||
+ | this-> | ||
+ | break; | ||
+ | case Boolean: | ||
+ | this-> | ||
+ | break; | ||
+ | case Undefined: | ||
+ | error(" | ||
+ | } | ||
+ | } | ||
+ | |||
+ | if (*this != match_var) | ||
+ | { | ||
+ | error(" | ||
+ | } | ||
+ | } | ||
+ | |||
+ | bool try_match(const Literal & | ||
+ | { | ||
+ | try | ||
+ | { | ||
+ | match(match_var); | ||
+ | } | ||
+ | catch (invalid_argument e) | ||
+ | { | ||
+ | return false; | ||
+ | } | ||
+ | return true; | ||
+ | } | ||
+ | int getInt() const | ||
+ | { | ||
+ | if (type != Integer) | ||
+ | { | ||
+ | error(" | ||
+ | } | ||
+ | |||
+ | int result = *(int *)this-> | ||
+ | return result; | ||
+ | } | ||
+ | |||
+ | double getFloat() const | ||
+ | { | ||
+ | if (type != Float) | ||
+ | error(" | ||
+ | double result = *(double *)this-> | ||
+ | return result; | ||
+ | } | ||
+ | |||
+ | list< | ||
+ | { | ||
+ | if (type != List) | ||
+ | error(" | ||
+ | list< | ||
+ | if (ptr != nullptr) | ||
+ | { | ||
+ | pair< | ||
+ | result.push_back(element.first); | ||
+ | void *iterator = element.second.ptr; | ||
+ | while (iterator != nullptr && element.second.type == List) | ||
+ | { | ||
+ | element = *(pair< | ||
+ | result.push_back(element.first); | ||
+ | iterator = element.second.ptr; | ||
+ | } | ||
+ | } | ||
+ | return result; | ||
+ | } | ||
+ | |||
+ | size_t getAtom() const | ||
+ | { | ||
+ | if (type != Atom) | ||
+ | error(" | ||
+ | size_t result = *(size_t *)this-> | ||
+ | return result; | ||
+ | } | ||
+ | |||
+ | bool getBoolean() const | ||
+ | { | ||
+ | if (type != Boolean) | ||
+ | error(" | ||
+ | bool result = *(bool *)this-> | ||
+ | return result; | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | ==== Erlang standard functions ==== | ||
+ | |||
+ | === float/1 impelementation == | ||
+ | '' | ||
+ | |||
+ | <code C++[enable_line_numbers=" | ||
+ | Literal BIF_float(const Literal &l) | ||
+ | { | ||
+ | if (!l.isNumber()) | ||
+ | { | ||
+ | error(" | ||
+ | } | ||
+ | if (l.type == Integer) | ||
+ | { | ||
+ | return (double)l.getInt(); | ||
+ | } | ||
+ | return l; | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | === io:format/2 implementation === | ||
+ | It equivalent in some ways to C's printf, supports in a limited way ~s and ~w control sequences (https:// | ||
+ | |||
+ | io:format/2 example: | ||
+ | <code Erlang[enable_line_numbers=" | ||
+ | io: | ||
+ | % outputs ' | ||
+ | |||
+ | Implementation: | ||
+ | <code C++[enable_line_numbers=" | ||
+ | Literal ioformat(const Literal & | ||
+ | { | ||
+ | if (format.type != List || data.type != List) | ||
+ | { | ||
+ | error(" | ||
+ | } | ||
+ | |||
+ | list< | ||
+ | |||
+ | regex n(" | ||
+ | regex ee(" | ||
+ | |||
+ | string ss = format.getString(' | ||
+ | ss = regex_replace(ss, | ||
+ | |||
+ | smatch mm; | ||
+ | |||
+ | string to_print = ""; | ||
+ | |||
+ | auto llist_it = llist.begin(); | ||
+ | int i = 0; | ||
+ | while (regex_search(ss, | ||
+ | { | ||
+ | if (i++ >= llist.size()) | ||
+ | { | ||
+ | error(" | ||
+ | } | ||
+ | to_print += mm.prefix().str() + mm.format(" | ||
+ | if (mm.format(" | ||
+ | { | ||
+ | if (llist_it-> | ||
+ | { | ||
+ | to_print += " | ||
+ | } | ||
+ | else | ||
+ | { | ||
+ | to_print += llist_it-> | ||
+ | } | ||
+ | } | ||
+ | else | ||
+ | { | ||
+ | to_print += llist_it-> | ||
+ | } | ||
+ | |||
+ | llist_it = next(llist_it); | ||
+ | ss = mm.suffix().str(); | ||
+ | } | ||
+ | |||
+ | if (i < llist.size()) | ||
+ | { | ||
+ | error(" | ||
+ | } | ||
+ | |||
+ | cout << to_print << ss; | ||
+ | |||
+ | return Literal(true); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | === ++ operator and lists: | ||
+ | '' | ||
+ | |||
+ | Example usage of the '' | ||
+ | |||
+ | Usage of '' | ||
+ | |||
+ | Implementation: | ||
+ | <code C++[enable_line_numbers=" | ||
+ | Literal listsappend(const Literal &a, const Literal &b) | ||
+ | { | ||
+ | if (a.type != List || b.type != List) | ||
+ | { | ||
+ | error(" | ||
+ | } | ||
+ | if (!a.isProperList() || !b.isProperList()) | ||
+ | { | ||
+ | error(" | ||
+ | } | ||
+ | |||
+ | list< | ||
+ | list< | ||
+ | concatenation.insert(concatenation.end(), | ||
+ | |||
+ | return Literal(concatenation); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | ===== Compiler ===== | ||
+ | |||
+ | The compiler is composed of a scanner (generated by JFlex) and a parser (generated by CUP), every time the scanner recognizes a symbol it passes it to the parser. The Symbol class holds information about each recognized symbol. | ||
+ | ==== Scanner | ||
+ | The scanner is produced using jFlex (a lexical analyzer generator written in Java). JFlex is designed to work together with the LALR parser generator CUP. | ||
+ | <code none[enable_line_numbers=" | ||
+ | nl = \r|\n|\r\n | ||
+ | ws = [ \t] | ||
+ | |||
+ | %% | ||
+ | |||
+ | /* Symbols */ | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | "/" | ||
+ | " | ||
+ | "," | ||
+ | ":" | ||
+ | ";" | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | "< | ||
+ | " | ||
+ | " | ||
+ | "/ | ||
+ | " | ||
+ | "<" | ||
+ | " | ||
+ | ">" | ||
+ | "> | ||
+ | |||
+ | |||
+ | /* Keywords */ | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | " | ||
+ | |||
+ | /* Boolean terms */ | ||
+ | " | ||
+ | " | ||
+ | |||
+ | /* String term */ | ||
+ | [\" | ||
+ | |||
+ | /* Variable names */ | ||
+ | [A-Z_][0-9a-zA-Z_@]* { return new Symbol(sym.VARIABLE, | ||
+ | |||
+ | /* Atom terms */ | ||
+ | [a-z][0-9a-zA-Z_@]* { return new Symbol(sym.ATOM, | ||
+ | [\' | ||
+ | |||
+ | /* Float terms */ | ||
+ | [0-9]*\.[0-9]+ { return new Symbol(sym.FLOAT, | ||
+ | |||
+ | /* Integer terms */ | ||
+ | [1-9][0-9]*|0 { return new Symbol(sym.INT, | ||
+ | |||
+ | /* Comments */ | ||
+ | " | ||
+ | |||
+ | {ws}|{nl} | ||
+ | |||
+ | . { System.out.println(" | ||
+ | </ | ||
+ | ==== Parser | ||
+ | The parser generator is written in CUP, the code is generated by Java classes, one for each language item. | ||
+ | |||
+ | |||
+ | === Grammar start === | ||
+ | Erlang does not support global variables, nor expressions that are not part of a function, thus the program must start with a function definition. | ||
+ | |||
+ | <code Java[enable_line_numbers=" | ||
+ | start with program; | ||
+ | |||
+ | program ::= function_seq: | ||
+ | funs.generateCode(manager, | ||
+ | manager.checkUndefinedFunctionsCalls(); | ||
+ | if (sem() && parser.semErrors == 0) { | ||
+ | System.out.println(parser.outputBuffer); | ||
+ | } else { | ||
+ | System.err.println(" | ||
+ | } | ||
+ | System.err.println(parser.errorBuffer); | ||
+ | |||
+ | System.err.println("######################" | ||
+ | System.err.println(" | ||
+ | System.err.println(" | ||
+ | System.err.println(" | ||
+ | :}; | ||
+ | |||
+ | function_seq ::= | ||
+ | function_seq: | ||
+ | | function: | ||
+ | ; | ||
+ | </ | ||
+ | |||
+ | |||
+ | The Manager class is a wrapper to access the parser, it is used by every class corresponding to a non terminal. | ||
+ | On line 4 of the above code, the Manager is used to check whether there where calls to functions that are have not been defined, either in the file being compiled or in the miniErlang VM. | ||
+ | |||
+ | Every non terminal used in the parser has an equivalent class defined with these common methods: | ||
+ | * generateCode(): | ||
+ | * destructDependencies(): | ||
+ | * destruct(): that calls destructDependencies() and deallocates every object instatiated in by the non terminal; | ||
+ | and common attributes: | ||
+ | * subgraphSize: | ||
+ | * label: the LLVM IR label used to store the object resulting from the operations performed in generateCode(). | ||
+ | |||
+ | FunctionSequence Class: | ||
+ | <code Java[enable_line_numbers=" | ||
+ | public class FunctionSequence extends Node { | ||
+ | FunctionSequence seqHead; | ||
+ | Function tail; | ||
+ | |||
+ | public FunctionSequence(Function tail) { | ||
+ | this.tail = tail; | ||
+ | seqHead = null; | ||
+ | } | ||
+ | public FunctionSequence(FunctionSequence head, Function tail) { | ||
+ | seqHead = head; | ||
+ | this.tail = tail; | ||
+ | } | ||
+ | |||
+ | public void generateCode(Manager manager, Node parent) { | ||
+ | super.generateCode(manager, | ||
+ | if(seqHead != null) { | ||
+ | seqHead.generateCode(manager, | ||
+ | } | ||
+ | tail.generateCode(manager, | ||
+ | } | ||
+ | |||
+ | public long destruct(Manager manager, Node caller) { | ||
+ | return 0; | ||
+ | |||
+ | } | ||
+ | public long destructDependencies(Manager manager, Node caller) { | ||
+ | return 0; | ||
+ | } | ||
+ | |||
+ | } | ||
+ | </ | ||
+ | |||
+ | === Functions | ||
+ | Due to the complexity derived from implementation details of the miniErlang VM, functions clauses can have at most 1 argument. | ||
+ | |||
+ | <code Java[enable_line_numbers=" | ||
+ | function ::= | ||
+ | function_clause: | ||
+ | | function_clause: | ||
+ | ; | ||
+ | |||
+ | function_clause_seq ::= | ||
+ | SEMICOLON function_clause: | ||
+ | | DOT {: | ||
+ | |||
+ | ; | ||
+ | |||
+ | function_clause ::= | ||
+ | ATOM:name argument: | ||
+ | | ATOM: | ||
+ | | error argument RIGHT_ARROW expression_seq {: | ||
+ | | error argument K_WHEN guard RIGHT_ARROW expression_seq {: | ||
+ | | ATOM error RIGHT_ARROW expression_seq {: | ||
+ | | ATOM error K_WHEN guard RIGHT_ARROW expression_seq {: | ||
+ | | ATOM argument RIGHT_ARROW error {: | ||
+ | | ATOM argument K_WHEN guard RIGHT_ARROW error {: | ||
+ | | ATOM argument K_WHEN error RIGHT_ARROW expression_seq {: | ||
+ | ; | ||
+ | |||
+ | argument ::= | ||
+ | ROUND_OPEN expression: | ||
+ | | ROUND_OPEN ROUND_CLOSE {: | ||
+ | ; | ||
+ | |||
+ | guard ::= | ||
+ | expression: | ||
+ | ; | ||
+ | guard_tail ::= | ||
+ | COMMA expression: | ||
+ | | {: | ||
+ | ; | ||
+ | </ | ||
+ | |||
+ | The Function class handles generating the function firm and the code that throws an error if no matched was found between the function parameter and the function clauses. | ||
+ | Function.generateCode(): | ||
+ | <code Java[enable_line_numbers=" | ||
+ | public void generateCode(Manager manager, Node parent) { | ||
+ | super.generateCode(manager, | ||
+ | |||
+ | manager.checkTailMatch(head, | ||
+ | manager.setFunctionName(manager.getFunctionName(head.name, | ||
+ | manager.openClause(); | ||
+ | |||
+ | long returnLabel = manager.genLabel(); | ||
+ | manager.setReturnLabel(returnLabel); | ||
+ | |||
+ | if (head.argument != null) { | ||
+ | long parameterLabel = manager.genLabel(); | ||
+ | manager.setParameterLabel(parameterLabel); | ||
+ | manager.dumpFormatln( | ||
+ | " | ||
+ | + " bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {", | ||
+ | manager.getFunctionName(), | ||
+ | Const.LITERAL_STRUCT, | ||
+ | returnLabel, | ||
+ | Const.LITERAL_STRUCT, | ||
+ | parameterLabel); | ||
+ | } else { | ||
+ | manager.dumpFormatln( | ||
+ | " | ||
+ | + " (...)* @__gxx_personality_v0 to i8*) {", | ||
+ | manager.getFunctionName(), | ||
+ | } | ||
+ | manager.genLabel(); | ||
+ | long returnPointerLabel = manager.genLabel(); | ||
+ | manager.dumpFormatln(" | ||
+ | long bitcastReturnPointerLabel = manager.genLabel(); | ||
+ | |||
+ | manager.dumpFormatln( | ||
+ | " | ||
+ | bitcastReturnPointerLabel, | ||
+ | manager.dumpFormatln( | ||
+ | " | ||
+ | |||
+ | long resumePointerLabel = manager.genLabel(); | ||
+ | manager.setResumePointer(resumePointerLabel); | ||
+ | long resumeIntegerLabel = manager.genLabel(); | ||
+ | manager.setResumeInteger(resumeIntegerLabel); | ||
+ | manager.dumpFormatln(" | ||
+ | manager.dumpFormatln(" | ||
+ | |||
+ | head.generateCode(manager, | ||
+ | if (tail != null) { | ||
+ | manager.dumpCodeLabel(); | ||
+ | tail.generateCode(manager, | ||
+ | } | ||
+ | |||
+ | if (head.argument != null) { | ||
+ | manager.dumpCodeLabel(); | ||
+ | manager.dumpFormatln(" | ||
+ | manager.dumpln(" | ||
+ | } | ||
+ | manager.dumpln(" | ||
+ | manager.popFunctionSymbols(); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | FunctionClauseSequence is very similar in nature to FunctionSequence as it just generates the list of FunctionClauses. | ||
+ | |||
+ | FunctionClause handles function clauses with: no parameter, 1 parameter and with guards. | ||
+ | If the parameter is a constant term, the generator will match the run-time parameter with the term declared in the function clause. If on the other hand the parameter is a variable, its name will be bound to the LLVM IR label previously set by the Function class. | ||
+ | |||
+ | The subgraphSize attribute of the child non terminals is used to compute the label the next clause will be at, that will need to be evaluated if the current function clause doesn' | ||
+ | |||
+ | FunctionClause.generateCode(): | ||
+ | <code Java[enable_line_numbers=" | ||
+ | public void generateCode(Manager manager, Node parent) { | ||
+ | this.parent = parent; | ||
+ | |||
+ | if (argument != null) { | ||
+ | if (argument instanceof Variable) { | ||
+ | | ||
+ | variableArgument.generateCode(manager, | ||
+ | } else { | ||
+ | // Check that runtime argument is equal to function-def argument. | ||
+ | argument.generateCode(manager, | ||
+ | |||
+ | manager.dumpCodeLabel(); | ||
+ | long matchingLabel = manager.genLabel(); | ||
+ | manager.dumpFormatln( | ||
+ | " | ||
+ | + " dereferenceable(16) %%%d)", | ||
+ | matchingLabel, | ||
+ | Const.LITERAL_CLAUSE_MATCH, | ||
+ | Const.LITERAL_STRUCT, | ||
+ | manager.getParameterLabel(), | ||
+ | Const.LITERAL_STRUCT, | ||
+ | argument.label); | ||
+ | |||
+ | long unwindLabel = matchingLabel + 1; | ||
+ | long branchLabel = unwindLabel + CLEANUP_LABEL_SIZE + RESUME_LABEL_SIZE; | ||
+ | manager.dumpFormatln(" | ||
+ | |||
+ | manager.cleanupError(); | ||
+ | argument.destruct(manager, | ||
+ | manager.resumeError(); | ||
+ | long rbl = manager.genLabel(); | ||
+ | manager.dumpln(branchLabel + ":" | ||
+ | argument.destruct(manager, | ||
+ | long clauseExpressions = branchLabel + 1; | ||
+ | long nextClause = branchLabel + 2 + expressions.subgraphSize; | ||
+ | manager.dumpFormatln( | ||
+ | "\tbr i1 %%%d, label %%%d, label %%%d", matchingLabel, | ||
+ | manager.dumpCodeLabel(); | ||
+ | } | ||
+ | } | ||
+ | if (guard != null) { | ||
+ | guard.generateCode(manager, | ||
+ | manager.dumpCodeLabel(); | ||
+ | |||
+ | long clauseExpressions = manager.getCurrentLabel(); | ||
+ | long nextClause = manager.getCurrentLabel() + 1 + expressions.subgraphSize; | ||
+ | manager.dumpFormatln( | ||
+ | "\tbr i1 %%%d, label %%%d, label %%%d", guard.label, | ||
+ | manager.dumpCodeLabel(); | ||
+ | } | ||
+ | expressions.generateCode(manager, | ||
+ | expressions.generateReturn(manager); | ||
+ | manager.closeClause(); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | == Guards == | ||
+ | A guard is a series of guard expressions separated by a comma. | ||
+ | Guards can be used to perform simple tests and comparisons on the variables in a pattern; they can be used in function defintions to expand the power of pattern matching. | ||
+ | |||
+ | |||
+ | The optional guard of the function clause is implemented by evaluating every condition, storing each into a list, and passing the list to the eval_guard() function, implemented in the miniErlang VM. | ||
+ | |||
+ | Guard.generateCode(): | ||
+ | <code Java[enable_line_numbers=" | ||
+ | public void generateCode(Manager manager, Node parent) { | ||
+ | super.generateCode(manager, | ||
+ | |||
+ | this.checkGuardSemantic(manager); | ||
+ | guard_expression.generateCode(manager, | ||
+ | manager.dumpCodeLabel(); | ||
+ | label = manager.genLabel(); | ||
+ | manager.dumpFormatln(" | ||
+ | label, Const.EVAL_GUARD, | ||
+ | long afterUnwind = manager.getCurrentLabel() + CLEANUP_LABEL_SIZE + RESUME_LABEL_SIZE; | ||
+ | manager.dumpFormatln(" | ||
+ | manager.cleanupError(); | ||
+ | destructDependencies(manager, | ||
+ | manager.resumeError(); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | |||
+ | === Terms === | ||
+ | Terms are the basic data object usable in Erlang, their implementation is trivial: it requires calling the Literal struct constructor relative to the type of object that is needed. | ||
+ | |||
+ | <code Java[enable_line_numbers=" | ||
+ | term ::= | ||
+ | ATOM: | ||
+ | | FLOAT: | ||
+ | | INT: | ||
+ | | BOOLEAN: | ||
+ | | STRING: | ||
+ | | non_empty_list: | ||
+ | | empty_list: | ||
+ | ; | ||
+ | |||
+ | empty_list ::= | ||
+ | SQUARE_OPEN SQUARE_CLOSE {: | ||
+ | ; | ||
+ | |||
+ | non_empty_list ::= | ||
+ | normal_list: | ||
+ | | alt_list: | ||
+ | ; | ||
+ | normal_list ::= | ||
+ | SQUARE_OPEN expression: | ||
+ | ; | ||
+ | normal_list_tail ::= | ||
+ | COMMA expression: | ||
+ | | SQUARE_CLOSE {: | ||
+ | ; | ||
+ | |||
+ | alt_list ::= | ||
+ | SQUARE_OPEN expression: | ||
+ | | SQUARE_OPEN expression: | ||
+ | ; | ||
+ | alt_list_tail ::= | ||
+ | COMMA expression: | ||
+ | | VERTICAL_BAR expression: | ||
+ | ; | ||
+ | </ | ||
+ | |||
+ | == Atoms == | ||
+ | Atoms are used to represent different non-numerical constant values; they start with lowercase letters, followed by a sequence of alphanumeric characters or '' | ||
+ | |||
+ | Atoms in the miniErlang VM are represented as long unsigned integers, since two atoms can be either equal equal or not (i.e. one can't be greater than the other). | ||
+ | |||
+ | Atom class: | ||
+ | <code Java[enable_line_numbers=" | ||
+ | public class Atom extends Term { | ||
+ | private String stringForm; | ||
+ | private long atomId; | ||
+ | public Atom(String atom) { | ||
+ | stringForm = atom; | ||
+ | this.subgraphSize = 1 + Node.CLEANUP_LABEL_SIZE + Node.RESUME_LABEL_SIZE; | ||
+ | } | ||
+ | |||
+ | public void generateCode(Manager manager, Node parent) { | ||
+ | super.generateCode(manager, | ||
+ | manager.dumpln(" | ||
+ | | ||
+ | atomId = manager.getAtom(stringForm); | ||
+ | label = allocate(manager); | ||
+ | manager.dumpFormatln( | ||
+ | " | ||
+ | | ||
+ | |||
+ | long unwindLabel = label + 1; | ||
+ | long branchLabel = unwindLabel + Node.CLEANUP_LABEL_SIZE + Node.RESUME_LABEL_SIZE; | ||
+ | manager.dumpFormatln(" | ||
+ | |||
+ | manager.cleanupError(); | ||
+ | destructDependencies(manager, | ||
+ | manager.resumeError(); | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | Boolean, Integer and Float terms are generated similarly to Atoms, but with a different constructor. | ||
+ | |||
+ | == Lists == | ||
+ | Lists are created by enclosing the list elements in square brackets; they can contain values of any type. | ||
+ | |||
+ | Lists can be represented in two ways in Erlang: | ||
+ | * comma separated (e.g. '' | ||
+ | * head tail format (e.g. '' | ||
+ | The two representation are equivalent in terms of accessory data structures. | ||
+ | |||
+ | |||
+ | Two classes generate code for lists: | ||
+ | * List: handles conversion from string to List Literal. | ||
+ | * AltList: handles recursive list instantiation. | ||
+ | Every AltList must be terminated by an empty List object to maintain representations equivalent. | ||
+ | |||
+ | |||
+ | AltList class: | ||
+ | <code Java[enable_line_numbers=" | ||
+ | public class AltList extends Term { | ||
+ | public Expression head; | ||
+ | public Expression tail; | ||
+ | |||
+ | public AltList(Expression head, Expression tail) { | ||
+ | this.head = head; | ||
+ | this.tail = (tail == null ? new List("" | ||
+ | this.subgraphSize = | ||
+ | 3 | ||
+ | + CLEANUP_LABEL_SIZE | ||
+ | + RESUME_LABEL_SIZE | ||
+ | + this.head.subgraphSize | ||
+ | + this.tail.subgraphSize; | ||
+ | ; | ||
+ | } | ||
+ | |||
+ | public void generateCode(Manager manager, Node parent) { | ||
+ | super.generateCode(manager, | ||
+ | manager.dumpln(" | ||
+ | |||
+ | head.generateCode(manager, | ||
+ | manager.dumpCodeLabel(); | ||
+ | tail.generateCode(manager, | ||
+ | manager.dumpCodeLabel(); | ||
+ | |||
+ | label = allocate(manager); | ||
+ | manager.dumpFormatln( | ||
+ | " | ||
+ | Const.LITERAL_CONSTRUCT_LIST_ELEMENT, | ||
+ | Const.LITERAL_STRUCT, | ||
+ | label, | ||
+ | Const.LITERAL_STRUCT, | ||
+ | head.label, | ||
+ | Const.LITERAL_STRUCT, | ||
+ | tail.label); | ||
+ | long afterUnwind = manager.getCurrentLabel() + CLEANUP_LABEL_SIZE + RESUME_LABEL_SIZE; | ||
+ | manager.dumpFormatln( | ||
+ | " | ||
+ | manager.cleanupError(); | ||
+ | destructDependencies(manager, | ||
+ | manager.resumeError(); | ||
+ | } | ||
+ | |||
+ | public long destructDependencies(Manager manager, Node caller) { | ||
+ | long maxParentDep = super.destructDependencies(manager, | ||
+ | if (head != caller) { | ||
+ | if (head.label > maxParentDep) { | ||
+ | maxParentDep = Math.max(head.destruct(manager, | ||
+ | } | ||
+ | if (tail != caller) { | ||
+ | maxParentDep = Math.max(tail.destructDependencies(manager, | ||
+ | } | ||
+ | } | ||
+ | |||
+ | return maxParentDep; | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | |||
+ | === Expressions === | ||
+ | |||
+ | In Erlang every instruction (except function definitions) is an expression, and every expression holds an immutable value. | ||
+ | The Expression class is inherited by the great majority of all classes in the project. | ||
+ | |||
+ | |||
+ | Grammar rules regarding expressions: | ||
+ | <code Java[enable_line_numbers=" | ||
+ | expression ::= | ||
+ | pattern_matching: | ||
+ | | term: | ||
+ | | VARIABLE: | ||
+ | | term_comparison: | ||
+ | | arithmetic_expression: | ||
+ | | boolean_expression: | ||
+ | | function_call: | ||
+ | | ROUND_OPEN expression: | ||
+ | | lists_append: | ||
+ | ; | ||
+ | |||
+ | term_comparison ::= | ||
+ | expression: | ||
+ | | expression: | ||
+ | | expression: | ||
+ | | expression: | ||
+ | | expression: | ||
+ | | expression: | ||
+ | | expression: | ||
+ | | expression: | ||
+ | ; | ||
+ | |||
+ | arithmetic_expression ::= | ||
+ | PLUS expression: | ||
+ | | HYPHEN expression: | ||
+ | | expression: | ||
+ | | expression: | ||
+ | | expression: | ||
+ | | expression: | ||
+ | | expression: | ||
+ | | expression: | ||
+ | ; | ||
+ | |||
+ | boolean_expression ::= | ||
+ | NOT expression: | ||
+ | | expression: | ||
+ | | expression: | ||
+ | | expression: | ||
+ | ; | ||
+ | </ | ||
+ | |||
+ | == Variables == | ||
+ | |||
+ | Variables are a special kind of expression, in that they can hold no value (i.e. can be unbounded), but once a value is assigned to them, it cannot be changed; variables in Erlang are bound to values through pattern matching. | ||
+ | |||
+ | Example: | ||
+ | <code Erlang> | ||
+ | 1> X. ** 1: variable ' | ||
+ | 2> X = 2. | ||
+ | 2 | ||
+ | 3> X + 1. | ||
+ | 3 | ||
+ | 4> X = 3. ** exception error: no match of right hand side value 3 | ||
+ | </ | ||
+ | |||
+ | == Binary Expressions == | ||
+ | |||
+ | Most expression are binary (they operate on 2 expressions), | ||
+ | right associative, | ||
+ | The precedence of the operators is defined as following: | ||
+ | |||
+ | <code Java[enable_line_numbers=" | ||
+ | // lowest priority | ||
+ | precedence right MATCH; | ||
+ | precedence nonassoc EQ, NOT_EQ, LESS_EQ, LESS, GREATER_EQ, GREATER, EXACT_EQ, EXACT_NOT_EQ; | ||
+ | precedence right PLUS_PLUS; | ||
+ | precedence left PLUS, HYPHEN, K_OR, K_XOR; | ||
+ | precedence left SLASH, STAR, K_DIV, K_REM, K_AND; | ||
+ | precedence nonassoc K_NOT; | ||
+ | precedence nonassoc SHARP; | ||
+ | precedence nonassoc COLON; | ||
+ | // highest priority | ||
+ | </ | ||
+ | |||
+ | Two general classes have been implemented to handle binary expression (as in operations between two expressions): | ||
+ | |||
+ | LeftAssocBinaryExpression: | ||
+ | <code Java[enable_line_numbers=" | ||
+ | protected Expression lhs, rhs; | ||
+ | |||
+ | public LeftAssocBinaryExpression(Expression lhs, Expression rhs) { | ||
+ | this.lhs = lhs; | ||
+ | this.rhs = rhs; | ||
+ | subgraphSize = 3 + rhs.subgraphSize + lhs.subgraphSize + CLEANUP_LABEL_SIZE + RESUME_LABEL_SIZE; | ||
+ | | ||
+ | } | ||
+ | | ||
+ | public void genericGenerateCode(String function, Manager manager, Node parent) { | ||
+ | super.generateCode(manager, | ||
+ | |||
+ | rhs.generateCode(manager, | ||
+ | manager.dumpCodeLabel(); | ||
+ | lhs.generateCode(manager, | ||
+ | |||
+ | |||
+ | manager.dumpCodeLabel(); | ||
+ | label = allocate(manager); | ||
+ | manager.dumpFormatln( | ||
+ | " | ||
+ | + " dereferenceable(16) %%%d)", | ||
+ | function, | ||
+ | Const.LITERAL_STRUCT, | ||
+ | label, | ||
+ | Const.LITERAL_STRUCT, | ||
+ | lhs.label, | ||
+ | Const.LITERAL_STRUCT, | ||
+ | rhs.label); | ||
+ | |||
+ | long unwindLabel = label + 1; | ||
+ | long branchLabel = unwindLabel + Node.CLEANUP_LABEL_SIZE + Node.RESUME_LABEL_SIZE; | ||
+ | manager.dumpFormatln(" | ||
+ | |||
+ | manager.cleanupError(); | ||
+ | destructDependencies(manager, | ||
+ | manager.resumeError(); | ||
+ | } | ||
+ | |||
+ | |||
+ | public long destructDependencies(Manager manager, Node caller) { | ||
+ | long maxParentDep = super.destructDependencies(manager, | ||
+ | if (rhs != caller) { | ||
+ | if (rhs.label >= maxParentDep) { | ||
+ | manager.dumpln(" | ||
+ | maxParentDep = Math.max(rhs.destruct(manager, | ||
+ | } | ||
+ | if (lhs != caller && lhs.label >= maxParentDep) { | ||
+ | manager.dumpln(" | ||
+ | maxParentDep = Math.max(lhs.destruct(manager, | ||
+ | } | ||
+ | } | ||
+ | |||
+ | return maxParentDep; | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | This class is inherited by every left associative operation. | ||
+ | The class for right associative operations is very similar, but inverts the order of the code generation of its dependencies. | ||
+ | |||
+ | Example of LeftAssociative class (Add): | ||
+ | <code Java[enable_line_numbers=" | ||
+ | public class Add extends LeftAssocBinaryExpression { | ||
+ | public Add(Expression lhs, Expression rhs) { | ||
+ | super(lhs, rhs); | ||
+ | } | ||
+ | |||
+ | public void generateCode(Manager manager, Node parent) { | ||
+ | manager.dumpln(" | ||
+ | super.genericGenerateCode(Const.LITERAL_ADD, | ||
+ | } | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | == Function Calls == | ||
+ | |||
+ | Function calls are very simple expressions, | ||
+ | |||
+ | <code Java[enable_line_numbers=" | ||
+ | function_call ::= | ||
+ | ATOM: | ||
+ | | ATOM: | ||
+ | | ATOM: | ||
+ | | ATOM: | ||
+ | ; | ||
+ | </ | ||
+ | |||
+ | The functionCall class, translates the function name given as input into LLVM IR equivalent, and prints the relative parameters. | ||
+ | FunctionCall.generateCode() | ||
+ | <code Java[enable_line_numbers=" | ||
+ | public void generateCode(Manager manager, Node parent) { | ||
+ | super.generateCode(manager, | ||
+ | |||
+ | manager.recordFunctionCall(name, | ||
+ | | ||
+ | |||
+ | if (parameters != null) { | ||
+ | parameters.generateCode(manager, | ||
+ | manager.dumpCodeLabel(); | ||
+ | label = allocate(manager); | ||
+ | manager.dump( | ||
+ | String.format( | ||
+ | " | ||
+ | manager.getFunctionName(name, | ||
+ | |||
+ | ExpressionSequence dfs_node = parameters; | ||
+ | while (dfs_node != null) { | ||
+ | manager.dump(String.format(", | ||
+ | dfs_node = dfs_node.tail; | ||
+ | } | ||
+ | manager.dumpln(" | ||
+ | |||
+ | } else { | ||
+ | label = allocate(manager); | ||
+ | manager.dumpFormatln( | ||
+ | " | ||
+ | manager.getFunctionName(name, | ||
+ | } | ||
+ | |||
+ | long unwindLabel = manager.getCurrentLabel(), | ||
+ | branchLabel = unwindLabel + Node.CLEANUP_LABEL_SIZE + Node.RESUME_LABEL_SIZE; | ||
+ | |||
+ | manager.dumpFormatln(" | ||
+ | |||
+ | manager.cleanupError(); | ||
+ | destructDependencies(manager, | ||
+ | manager.resumeError(); | ||
+ | } | ||
+ | </ | ||
+ | |||
+ | The '' | ||
+ | <code Java[enable_line_numbers=" | ||
+ | lists_append ::= | ||
+ | expression: | ||
+ | ; | ||
+ | </ | ||
+ | |||
+ | === Pattern Matching | ||
+ | |||
+ | This is the most complex feature of Erlang: In a pattern matching expression, a left-hand side pattern is matched against a right-hand side term. If the matching succeeds, any unbound variables in the pattern become bound. If the matching fails, a run-time error occurs. | ||
+ | |||
+ | In this project, pattern matching was implemented for the match operator ('' | ||
+ | |||
+ | The following action code distinguishes between expressions with left-hand side pattern that is List and those that aren' | ||
+ | <code Java[enable_line_numbers=" | ||
+ | pattern_matching ::= | ||
+ | expression: | ||
+ | | expression MATCH error {: | ||
+ | | error MATCH expression {: | ||
+ | ; | ||
+ | </ | ||
+ | |||
+ | The Match class is similar to other binary expressions such as the Add illustrated previously. | ||
+ | |||
+ | ListMatching is the most complex class of the entire compiler, because it has to handle complex pattern matching (e.g. '' | ||
+ | |||
+ | ListMatching Class: | ||
+ | <code Java[enable_line_numbers=" | ||
+ | public class ListMatching extends Expression { | ||
+ | static long temporaryLabel = 0; | ||
+ | AltList lhs; | ||
+ | Expression rhs; | ||
+ | Expression headMatch, tailMatch; | ||
+ | |||
+ | public ListMatching(AltList lhs, Expression rhs) { | ||
+ | // User code can't declare variables starting with a lowercase letter, so there can't be any | ||
+ | // collision. | ||
+ | long currentTemporaryLabel = temporaryLabel++; | ||
+ | this.rhs = new Match(new Variable(" | ||
+ | headMatch = | ||
+ | lhs.head instanceof AltList | ||
+ | ? new ListMatching( | ||
+ | (AltList) lhs.head, | ||
+ | new FunctionCall( | ||
+ | " | ||
+ | new ExpressionSequence(new Variable(" | ||
+ | : new Match( | ||
+ | lhs.head, | ||
+ | new FunctionCall( | ||
+ | " | ||
+ | new ExpressionSequence(new Variable(" | ||
+ | if (lhs.tail != null && lhs.tail instanceof AltList) { | ||
+ | tailMatch = | ||
+ | new ListMatching( | ||
+ | (AltList) lhs.tail, | ||
+ | new FunctionCall( | ||
+ | " | ||
+ | |||
+ | } else { | ||
+ | tailMatch = | ||
+ | new Match( | ||
+ | lhs.tail != null ? lhs.tail : new List("" | ||
+ | new FunctionCall( | ||
+ | " | ||
+ | } | ||
+ | |||
+ | this.subgraphSize = 2 + this.rhs.subgraphSize + headMatch.subgraphSize + tailMatch.subgraphSize; | ||
+ | } | ||
+ | |||
+ | public void generateCode(Manager manager, Node parent) { | ||
+ | manager.dumpln(" | ||
+ | super.generateCode(manager, | ||
+ | |||
+ | rhs.generateCode(manager, | ||
+ | label = rhs.label; | ||
+ | manager.dumpCodeLabel(); | ||
+ | |||
+ | headMatch.generateCode(manager, | ||
+ | manager.dumpCodeLabel(); | ||
+ | |||
+ | tailMatch.generateCode(manager, | ||
+ | } | ||
+ | |||
+ | public long destruct(Manager manager, Node caller) { | ||
+ | return destructDependencies(manager, | ||
+ | } | ||
+ | |||
+ | public long destructDependencies(Manager manager, Node caller) { | ||
+ | |||
+ | long maxParentDep = 0; | ||
+ | if (caller != parent) { | ||
+ | maxParentDep = parent.destruct(manager, | ||
+ | } | ||
+ | |||
+ | if (rhs != caller) { | ||
+ | if (rhs.label >= maxParentDep) { | ||
+ | maxParentDep = Math.max(rhs.destruct(manager, | ||
+ | } | ||
+ | if (headMatch != caller) { | ||
+ | if (headMatch.label > maxParentDep) { | ||
+ | maxParentDep = Math.max(headMatch.destruct(manager, | ||
+ | } | ||
+ | if (tailMatch != caller && tailMatch.label > maxParentDep) { | ||
+ | maxParentDep = Math.max(tailMatch.destruct(manager, | ||
+ | } else { | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | return maxParentDep; | ||
+ | } | ||
+ | } | ||
+ | |||
+ | </ | ||
+ | |||
+ | ===== Example code ===== | ||
+ | |||
+ | Fibonacci: | ||
+ | <code Erlang[enable_line_numbers=" | ||
+ | fib(0) -> 0; | ||
+ | fib(1) -> 1; | ||
+ | fib(N) -> Res = fib(N-1) + fib(N-2). | ||
+ | </ | ||
+ | |||
+ | Pattern Matching: | ||
+ | <code Erlang[enable_line_numbers=" | ||
+ | patternMatching() -> | ||
+ | Five = 5, | ||
+ | io: | ||
+ | % outputs: "Five: 5\n" | ||
+ | [H | T] = [1, 2, 3, 4], | ||
+ | io: | ||
+ | % outputs: "H: 1 T: [2, | ||
+ | ComplexList = [1, 2, [3, 4, 5]], | ||
+ | [First, Second, [HeadOfThird | TailOfThird]] = ComplexList, | ||
+ | io: | ||
+ | % outputs: " | ||
+ | </ | ||
+ | |||
+ | ===== Usage guide ===== | ||
+ | |||
+ | Code written to '' | ||
+ | To compile and run the code run '' | ||
+ | |||
+ | ===== Compiler repository ===== | ||
+ | |||
+ | Code is accessible at: https:// | ||
+ | |||
+ | |||
+ | |||
If you found any error, or if you want to partecipate to the editing of this wiki, please contact: admin [at] skenz.it
You can reuse, distribute or modify the content of this page, but you must cite in any document (or webpage) this url: https://www.skenz.it/compilers/erlang_to_llvm?do=diff&rev2%5B0%5D=1614772946&rev2%5B1%5D=1614773173&difftype=sidebyside
/web/htdocs/www.skenz.it/home/data/pages/compilers/erlang_to_llvm.txt · Last modified: 2024/04/08 22:34 by 127.0.0.1