/* * This is a Yacc-style attribute grammar for the code generation semantics of * a language very similar to SIL. The meta-notation used here is slightly * less formal that the pure ML notation used in the interpretive SIL * definition. The point of this example is to give you a general feel for * what code generation semantics look like for a language of other than the * very simple Turingol defined by Knuth. * * The semantic rules define both type checking and code generation in the same * definition. These are the attributes that are used: * * NAME DESCRIPTION * =================================================================== * symtab A global reference-valued attribute representing an abstract * symbol table. A symtab is a reference to a 2-tuple of the * form: * (parenttab, entries) * where parenttab is a reference to the parent symbol table, and * entries is a list of 5-tuples of the form: * [ (name, class, type, level, offset), ... ]. * For symbols with class = "proc", a symtab entry is a nine-tuple * of the form * (name, "proc", type, level, offset, parms, symtab, label, size) * where the first 5 items are as for a variable entry, and the * last 4 items are, respectively, the list of formal parm names, * the local procedure symbol table, the object code label for the * proc, and the size in bytes of the proc act. record. * * type A string-valued attribute representing the names of types. * Successful type checking is represented by the value of * program.type = "OK". Note that a string-valued type attribute * is used for this simple language since there are no structured * types. For a language like Modula-2, the type attribute would * be tuple-valued, in order to represented structured types * effectively. * * class A string-valued attribute representing the names of symbol * classes, specifically "var", "parm", or "proc". * * text A string-valued attribute representing the lexical text of * declared identifiers. * * types A sequence-valued attribute that holds a list of types for * formal and actual procedure parameters. * * code A synthesized sequence-valued attribute that holds obj code. * * addr A synthesized string-valued attribute for a machine address. * * parms A sequence-valued attribute that holds the names of formal * procedure parameters. * * actuals A sequence-valued attribute that holds the types of actual * procedure parameters from a proc call. * * codes Sequence of actual parm code values. * * addrs Sequence of actual parm addr values. * * label A global integer attribute used to generate unique labels. * Initial value is 0. * * reg A global integer attribute used to generate an available * register. Initial value is 1. * * curoffset An inherited integer attr that records the next available * storage offset, in bytes. * * level An inherited integer attribute that records the lexical * nesting level. * * size A synthesized integer attr that records total size in bytes * of allocated storage. * * depth Synthesized integer attribute used to record max nesting depth * of proc decls. * * WORDSIZE A global constant integer attribute that holds the size of a * word in bytes. * * * There are three auxiliary functions used to enter and lookup symbols: * * Enter(symtab, symbol, class, type, level, offset) = * let (!symtab)#2 = * (!symtab)#2 U (symbol, class, type, level, offset) * * Enter(symtab, symbol, class, type, level, offset, parms, symtab) * let (!symtab)#2 = (!symtab)#2 U * (symbol, class, type, level, offset, parms, symtab) * * Lookup(symtab, symbol) = the first element S of (!symtab)#2 * such that S#1 = symbol, * "ERROR" if no such element * * * There are three auxiliary functions used to enter and exit procedure scopes * while symtab values are being computed: * * EnterProc(symtab, name, level) = * let newsymtab = (symtab, {}) * Enter(symtab, name, "proc", "", level, 0, (), newsymtab) * let Lookup(name)#8 = NextLab() * let symtab = newsymtab * * EnterParms(symtab, name, type, parmtypes, offset) = * let Lookup(symtab, name)#3 = type * let Lookup(symtab, name)#4 = parmtypes * let Lookup(symtab, name)#5 = offset; * let Lookup(symtab, name)#9 = offset + 2 * WORDSIZE * * ExitProc(symtab) = * let symtab = symtab#1 * * * There is an aux function to compute the size in bytes of a data type. With * the simple types of the Translator 5 language, this is a simple function. * For Modula-2, it's more complicated, as has been noted before. * * typesize(t) = * if t = "integer" then WORDSIZE * else if t = "real" then WORDSIZE * else if t = "boolean" then 1 * else if t = "char" then 1 * * * There are aux functions used to generate unique labels and available * registers, assuming that initially label has value 0 and reg has value 1. * * NextLab() = "L" || label; let label = label + 1 * NextReg() = "R" || reg; let reg = reg + 1 * ClearRegs() = let reg = 1 * * There are two aux functions to generate the largely canned pieces of code * that go at the top of the object code, and at the top of the main body: * * topcode(size,depth) = * ["GOTO\tMAIN", * "STATIC\\tDATA\\t" || strify(size), * "STACK\\tDATA\\t25000", * "DISPLAY\\tDATA\\t" || strify(depth*WORDSIZE)] * * maincode() = * ["MAIN\\tDATA\\t0", * "\\tMOV\\tSTATIC, R0", * "\\tMOV\\tSTACK, SP", * "\\tADD\\t#25000, SP", * "\\tMOV\\tDISPLAY, R1"] * * * There is an aux function to generate a local or global machine address: * * genaddr(sym) = * if sym#4 = 0 * then strify(sym#5) || "(R0)" * else strify(sym#5) || "(SP)"); * * * There is a simple aux function to generate an assignment stmt: * * genassmnt(src, dest) = "\\tMOV\\t" || src || ", " || dest * * * There are two simple aux functions to gen code to push and pop act records: * * pushactrec(size) = * "\\tSUB\\t" || strify(size) || ", SP" * popactrec(size) = * "\\tADD\\t" || strify(size) || ", SP" * * * Finally, there is an aux function to generate code for the details of a proc * call: * * gencall(proclab, returnlab, offset) = * ["\\tMOV\\t" || returnlab || ", " || strify(offset) || "(SP)", * "\\tGOTO\\t" || proclab, * returnlab || "\\tDATA\\t0"] * * */ program : YPROGRAM decls YBEGIN stmts YEND {symtab = [ ref (), [("integer", "type", "integer"), ...] ]; $$.type = $4.type; $2.level = 0; $2.curoffset = 0; if ($$.type = "OK" then $$.code = topcode($2.size, $2.depth) U $2.code U maincode() U $4.code;} ; decls : /* empty */ {$$.size = 0; $$.depth = 0; $$.code = [];} | decl ';' decls {$1.level = $$.level; $3.level = $$.level; $1.curoffset = $$.curoffset; $3.curoffset = $$.curoffset + $1.size; $$.size = $1.size + $3.size; $$.depth = max($1.depth, $3.depth); $$.code = $1.code U $3.code; } ; decl : vardecl {$1.level = $$.level; $1.curoffset = $$.curoffset; $$.size = $1.size; $$.depth = $1.depth; $$.code = []; } | procdecl {$1.level = $$.level; $$.size = 0; $$.depth = $1.depth; $$.code = $1.code; } ; vardecl : YVAR vars ':' type {$2.type = $4.type; /* notice inherited attribute */ $2.class = "var"; $2.level = $$.level; $2.curoffset = $$.curoffset; $$.size = $2.size; $$.depth = 0; } ; type : Yidentifier {$$.type = Lookup(symtab, $1.text)#3;} ; vars : var {Enter(symtab, $1.text, $$.class, $$.type, $$.level, $$.curoffset); $$.size = typesize($$.type)} | var ',' vars {$3.type = $$.type; $3.class = $$.class; $3.level = $$.level; Enter(symtab, $1.text, $$.class, $$.type, $$.level, $$.curoffset); $3.curoffset = $$.curoffset + typesize($$.type); $$.size = typesize($$.type) + $3.size; } ; var : Yidentifier {$$.text = $1.text; /* The lexer provides Yidentifier as a string */ let sym = Lookup(symtab, $1.text); } ; procdecl : YPROCEDURE prochdr ';' procbody {$2.level = $$.level; $4.level = $$.level + 1; $4.curoffset = $2.size; $$.depth = $4.depth; $$.size = $4.size; $$.code = $2.code || $4.code; ExitProc($2.text);} } ; prochdr : Yidentifier '(' formals ')' {EnterProc(symtab, $1.text, $$.level); $3.level = $$.level + 1; $3.curoffset = 0; EnterParms(symtab, $1.text, "", $3.parms, $3.size); $$.text = $1.text; $$.code = Lookup(symtab, $1.text)#7 || "\\tDATA\\t0";} } | Yidentifier '(' formals ')' ':' type {EnterProc(symtab, $1.text, $$.level); $3.level = $$.level + 1; $3.curoffset = 0; EnterParms(symtab, $1.text, $6.type, $3.parms, $3.size); $$.text = $1.text; $$.code = Lookup(symtab, $1.text)#7 || "\\tDATA\\t0";} } ; formals : /* empty */ {$$.types = null; $$.parms = null;} | formal {$$.types = $1.type; $1.level = $$.level; $1.curoffset = $$.curoffset; $$.parms = $1.text; $$.size = $1.size; } | formal ',' formals {$$.types = $1.type U $3.types;} $$.types = null; $1.level = $$.level; $1.curoffset = $$.curoffset; $3.curoffset = $$.curoffset + $1.size; $$.parms = $1.text U $3.parms; $$.size = $1.size + $3.size; } ; formal : Yidentifier ':' type {Enter(symtab, $1.text, "parm", $3.type, $$.level, $$.curoffset); $$.text = $1.text $$.type = $3; $$.size = typesize($3.type); } ; procbody : decls YBEGIN stmts YEND {$$.type = $3.type; $1.level = $$.level; $1.curoffset = $$.curoffset; $$.size = $1.size; $$.depth = $1.depth + 1; $$.code = $3.code; } ; stmts : stmt ';' {$$.type = $1.type; $$.code = $1.code;} | stmt ';' stmts {$$.type = if ($1.type = "OK" and $3.type = "OK") then "OK" else "ERROR"; $$.code = $1.code U $4.code; } ; stmt : /* empty */ | Yidentifier YASSMNT expr {$$.type = (if Lookup(symtab, $1.text)#3 = $3.type then "OK" else "ERROR"); $$.code = $3.code U genassmnt($3.addr, $1.addr) ClearRegs(); } | Yidentifier '(' actuals ')' {let ftypes = Lookup(symtab, $1.text)#4; let proctype = Lookup(symtab, $1.text)#3; $$.type = if (foreach (fp in ftypes, ap in $3.types) (fp = ap) and (proctype = null)) then "OK" else "ERROR"; $$.code = /* Gen code for proc call stmt, ... */ ClearRegs(); } | YIF expr YTHEN stmts YEND {$$.type = if ($2.type = "boolean") and ($4.type = "OK") then "OK" else "ERROR"; $$.code = /* filled-in code template for if */ ClearRegs(); } | YIF expr YTHEN stmts YELSE stmts YEND {$$.type = if ($2.type = "boolean") and ($4.type = "OK") and ($6.type = "OK") then "OK" else "ERROR"; $$.code = /* filled-in template for if-then-else */ ClearRegs(); } ; expr : number {$$.type = $1.type; $$.code = []; $$.addr = $1.addr;} | char {$$.type = $1.type; $$.code = []; $$.addr = $1.addr;} | bool {$$.type = $1.type; $$.code = []; $$.addr = $1.addr;} | Yidentifier {$$.type = Lookup(symtab, $1.text)#3; $$.code = []; $$.addr = genaddr(Lookup(symtab, $1.text)); } | Yidentifier '(' actuals ')' {let ftypes = Lookup(symtab, $1.text)#4; let proctype = Lookup(symtab, $1.text)#3; $$.type = if (foreach (fp in ftypes, ap in $3.types) (fp = ap) and (proctype != null)) then proctype else "ERROR"; /* Gen code as for proc call stmt, plus set * $$.addr = machine addr of return value. */ } | expr relop expr %prec '<' {$$.type = if ($1.type = $3.type) and (($1.type = "real") or ($1.type = "integer") or ($1.type = "char")) then $1.type else "ERROR"; } /* NOTE: relop code gen not done here. */ | expr addop expr %prec '+' {$$.type = /* For simplicity, this is Mod-2 rule */ if ($1.type = $3.type) and (($1.type = "real") or ($1.type = "integer")) then $1.type else "ERROR"; $$.addr = if isreg($1.addr) then $1.addr else NextReg(); $$.code = if not isreg($1.addr) then "\\tMOV\\t" || $1.addr || ", " || $$.addr U "\\t" || $2.code || "\\t" || $3.addr || "," || $$.addr else "\\t" || $2.code || "\\t" || $3.addr || "," || $$.addr; } | expr multop expr %prec '*' {$$.type = if ($1.type = $3.type) and (($1.type = "real") or ($1.type = "integer")) then $1.type else "ERROR"; } {$$.code = /* ... as for addop */;} | '(' expr ')' {$$.type = $2.type; $$.addr = $2.addr; $$.code = $2.code;} ; addop : '+' {$$.code = "\\tADD\\t";} /* etc. for the rest */ /* NOTE: this does not handle FADD */ | '-' | YOR ; multop : '*' | '/' | YAND ; relop : '<' | '>' | '=' | YLEOP | YGEOP | YNEOP ; actuals : /* empty */ | actual {$$.types = $1.type; $$.addrs = $1.addr; $$.codes = $1.code;} | actual ',' actuals {$$.types = $1.type U $3.types; $$.addrs = $1.addr U $3.addrs; $$.codes = $1.code U $3.codes; } ; actual : expr {$$.type = $1.type; $$.addr = $1.addr; $$.code = $1.code; } ; number : real {$$.type = $1.type; $$.addr = $1.addr; } | integer {$$.type = $1.type; $$.addr = $1.addr; } ; real : Yreal {$$.type = "real"; $$.addr = "#" || strify($1.val); } ; integer : Yinteger {$$.type = "integer"; $$.addr = "#" || strify($1.val); } ; char : Ychar {$$.type = "char"; $$.addr = "'" || strify($1.val) "'"; } ; bool : Ybool {$$.type = "boolean"; $$.addr = "#" || strify($1.val); } ;