/*
 * This is a Yacc-style attribute grammar for the code generation semantics of
 * a language very similar to SIL.  The meta-notation used here is slightly
 * less formal that the pure ML notation used in the interpretive SIL
 * definition.  The point of this example is to give you a general feel for
 * what code generation semantics look like for a language of other than the
 * very simple Turingol defined by Knuth.
 *
 * The semantic rules define both type checking and code generation in the same
 * definition.  These are the attributes that are used:
 *
 *      NAME    DESCRIPTION
 *      ===================================================================
 *      symtab  A global reference-valued attribute representing an abstract
 *              symbol table.  A symtab is a reference to a 2-tuple of the
 *              form:
 *                  (parenttab, entries)
 *              where parenttab is a reference to the parent symbol table, and
 *              entries is a list of 5-tuples of the form:
 *                  [ (name, class, type, level, offset), ... ]. 
 *              For symbols with class = "proc", a symtab entry is a nine-tuple
 *              of the form
 *                (name, "proc", type, level, offset, parms, symtab, label, size)
 *              where the first 5 items are as for a variable entry, and the
 *              last 4 items are, respectively, the list of formal parm names,
 *              the local procedure symbol table, the object code label for the
 *              proc, and the size in bytes of the proc act. record.
 *
 *      type    A string-valued attribute representing the names of types.
 *              Successful type checking is represented by the value of
 *              program.type = "OK".  Note that a string-valued type attribute
 *              is used for this simple language since there are no structured
 *              types.  For a language like Modula-2, the type attribute would
 *              be tuple-valued, in order to represented structured types
 *              effectively.
 *
 *      class   A string-valued attribute representing the names of symbol
 *              classes, specifically "var", "parm", or "proc".
 *
 *      text    A string-valued attribute representing the lexical text of
 *              declared identifiers.
 *
 *      types   A sequence-valued attribute that holds a list of types for
 *              formal and actual procedure parameters.
 *
 *      code    A synthesized sequence-valued attribute that holds obj code.
 *
 *      addr    A synthesized string-valued attribute for a machine address.
 *
 *      parms   A sequence-valued attribute that holds the names of formal
 *              procedure parameters.
 *
 *      actuals  A sequence-valued attribute that holds the types of actual
 *              procedure parameters from a proc call.
 *
 *      codes   Sequence of actual parm code values.
 *
 *      addrs   Sequence of actual parm addr values.
 *
 *      label   A global integer attribute used to generate unique labels.
 *              Initial value is 0.
 *
 *      reg     A global integer attribute used to generate an available
 *              register.  Initial value is 1.
 *
 *      curoffset  An inherited integer attr that records the next available
 *              storage offset, in bytes.
 *
 *      level   An inherited integer attribute that records the lexical
 *              nesting level.
 *
 *      size    A synthesized integer attr that records total size in bytes
 *              of allocated storage.
 *
 *      depth   Synthesized integer attribute used to record max nesting depth
 *              of proc decls.
 *      
 *      WORDSIZE  A global constant integer attribute that holds the size of a
 *              word in bytes.
 *
 *
 * There are three auxiliary functions used to enter and lookup symbols:
 *
 *      Enter(symtab, symbol, class, type, level, offset) =
 *             let (!symtab)#2 =
 *                   (!symtab)#2 U (symbol, class, type, level, offset)
 *
 *      Enter(symtab, symbol, class, type, level, offset, parms, symtab)
 *             let (!symtab)#2 = (!symtab)#2 U
 *                   (symbol, class, type, level, offset, parms, symtab)
 *
 *      Lookup(symtab, symbol) = the first element S of (!symtab)#2
 *                                      such that S#1 = symbol,
 *                               "ERROR" if no such element
 *
 *
 * There are three auxiliary functions used to enter and exit procedure scopes
 * while symtab values are being computed:
 *
 *      EnterProc(symtab, name, level) =
 *              let newsymtab = (symtab, {})
 *              Enter(symtab, name, "proc", "", level, 0, (), newsymtab)
 *              let Lookup(name)#8 = NextLab()
 *              let symtab = newsymtab
 *
 *      EnterParms(symtab, name, type, parmtypes, offset) =
 *              let Lookup(symtab, name)#3 = type
 *              let Lookup(symtab, name)#4 = parmtypes
 *              let Lookup(symtab, name)#5 = offset;
 *              let Lookup(symtab, name)#9 = offset + 2 * WORDSIZE
 *
 *      ExitProc(symtab) =
 *              let symtab = symtab#1
 *
 *
 * There is an aux function to compute the size in bytes of a data type.  With
 * the simple types of the Translator 5 language, this is a simple function.
 * For Modula-2, it's more complicated, as has been noted before.
 *
 *      typesize(t) =
 *              if t = "integer" then WORDSIZE
 *              else if t = "real" then WORDSIZE
 *              else if t = "boolean" then 1
 *              else if t = "char" then 1
 *
 *
 * There are aux functions used to generate unique labels and available
 * registers, assuming that initially label has value 0 and reg has value 1.
 *
 *      NextLab() = "L" || label; let label = label + 1
 *      NextReg() = "R" || reg; let reg = reg + 1
 *      ClearRegs() = let reg = 1
 *
 * There are two aux functions to generate the largely canned pieces of code
 * that go at the top of the object code, and at the top of the main body:
 *
 *      topcode(size,depth) =
 *              ["GOTO\tMAIN",
 *               "STATIC\\tDATA\\t" || strify(size),
 *               "STACK\\tDATA\\t25000",
 *               "DISPLAY\\tDATA\\t" || strify(depth*WORDSIZE)]
 *
 *      maincode() =
 *              ["MAIN\\tDATA\\t0",
 *               "\\tMOV\\tSTATIC, R0",
 *               "\\tMOV\\tSTACK, SP",
 *               "\\tADD\\t#25000, SP",
 *               "\\tMOV\\tDISPLAY, R1"]
 *
 *
 * There is an aux function to generate a local or global machine address:
 *
 *      genaddr(sym) =
 *              if sym#4 = 0
 *              then strify(sym#5) || "(R0)"
 *              else strify(sym#5) || "(SP)");
 *
 *
 * There is a simple aux function to generate an assignment stmt:
 *
 *      genassmnt(src, dest) = "\\tMOV\\t" || src || ", " || dest
 *
 *
 * There are two simple aux functions to gen code to push and pop act records:
 *
 *      pushactrec(size) =
 *              "\\tSUB\\t" || strify(size) || ", SP"
 *      popactrec(size) =
 *              "\\tADD\\t" || strify(size) || ", SP"
 *
 *
 * Finally, there is an aux function to generate code for the details of a proc
 * call:
 *
 *      gencall(proclab, returnlab, offset) =
 *              ["\\tMOV\\t" || returnlab || ", " || strify(offset) || "(SP)",
 *               "\\tGOTO\\t" || proclab,
 *               returnlab || "\\tDATA\\t0"]
 *
 *
 */

program          : YPROGRAM decls YBEGIN stmts YEND
                        {symtab =
                            [ ref (), [("integer", "type", "integer"), ...] ];
                         $$.type = $4.type;
                         $2.level = 0;
                         $2.curoffset = 0;
                         if ($$.type = "OK" then
                            $$.code = topcode($2.size, $2.depth) U
                                $2.code U maincode() U $4.code;}
                ;

decls           : /* empty */
                        {$$.size = 0;
                         $$.depth = 0;
                         $$.code = [];}
                | decl ';' decls
                        {$1.level = $$.level;
                         $3.level = $$.level;
                         $1.curoffset = $$.curoffset;
                         $3.curoffset = $$.curoffset + $1.size;
                         $$.size = $1.size + $3.size;
                         $$.depth = max($1.depth, $3.depth);
                         $$.code = $1.code U $3.code;
                        }
                ;

decl            : vardecl
                        {$1.level = $$.level;
                         $1.curoffset = $$.curoffset;
                         $$.size = $1.size;
                         $$.depth = $1.depth;
                         $$.code = [];
                        }
                | procdecl
                        {$1.level = $$.level;
                         $$.size = 0;
                         $$.depth = $1.depth;
                         $$.code = $1.code;
                        }
                ;       

vardecl         : YVAR vars ':' type
                        {$2.type = $4.type; /* notice inherited attribute */
                         $2.class = "var";
                         $2.level = $$.level;
                         $2.curoffset = $$.curoffset;
                         $$.size = $2.size;
                         $$.depth = 0;
                        }
                ;

type            : Yidentifier
                        {$$.type = Lookup(symtab, $1.text)#3;}
                ;

vars            : var
                        {Enter(symtab, $1.text, $$.class, $$.type,
                                $$.level, $$.curoffset);
                         $$.size = typesize($$.type)}
                | var ',' vars
                        {$3.type = $$.type;
                         $3.class = $$.class;
                         $3.level = $$.level;
                         Enter(symtab, $1.text, $$.class, $$.type,
                                $$.level, $$.curoffset);
                         $3.curoffset = $$.curoffset + typesize($$.type);
                         $$.size = typesize($$.type) + $3.size;
                        }
                ;

var             : Yidentifier
                        {$$.text = $1.text;
                           /* The lexer provides Yidentifier as a string */
                         let sym = Lookup(symtab, $1.text);
                        }
                ;

procdecl        : YPROCEDURE prochdr ';' procbody
                        {$2.level = $$.level;
                         $4.level = $$.level + 1;
                         $4.curoffset = $2.size;
                         $$.depth = $4.depth;
                         $$.size = $4.size;
                         $$.code = $2.code || $4.code;
                         ExitProc($2.text);}
                        }
                ;

prochdr         : Yidentifier '(' formals ')'
                        {EnterProc(symtab, $1.text, $$.level);
                         $3.level = $$.level + 1;
                         $3.curoffset = 0;
                         EnterParms(symtab, $1.text, "",
                                $3.parms, $3.size);
                         $$.text = $1.text;
                         $$.code = Lookup(symtab, $1.text)#7 || "\\tDATA\\t0";}
                        }
                | Yidentifier '(' formals ')' ':' type
                        {EnterProc(symtab, $1.text, $$.level);
                         $3.level = $$.level + 1;
                         $3.curoffset = 0;
                         EnterParms(symtab, $1.text, $6.type,
                                $3.parms, $3.size);
                         $$.text = $1.text;
                         $$.code = Lookup(symtab, $1.text)#7 || "\\tDATA\\t0";}
                        }
                ;

formals         : /* empty */
                        {$$.types = null;
                         $$.parms = null;}
                | formal
                        {$$.types = $1.type;
                         $1.level = $$.level;
                         $1.curoffset = $$.curoffset;
                         $$.parms = $1.text;
                         $$.size = $1.size;
                        }
                | formal ',' formals
                        {$$.types = $1.type U $3.types;}
                         $$.types = null;
                         $1.level = $$.level;
                         $1.curoffset = $$.curoffset;
                         $3.curoffset = $$.curoffset + $1.size;
                         $$.parms = $1.text U $3.parms;
                         $$.size = $1.size + $3.size;
                        }
                ;

formal          : Yidentifier ':' type
                        {Enter(symtab, $1.text, "parm", $3.type,
                                $$.level, $$.curoffset);
                         $$.text = $1.text
                         $$.type = $3;
                         $$.size = typesize($3.type);
                        }
                ;

procbody        : decls YBEGIN stmts YEND
                        {$$.type = $3.type;
                         $1.level = $$.level;
                         $1.curoffset = $$.curoffset;
                         $$.size = $1.size;
                         $$.depth = $1.depth + 1;
                         $$.code = $3.code;
                        }
                ;

stmts           : stmt ';'
                        {$$.type = $1.type;
                         $$.code = $1.code;}
                | stmt ';' stmts
                        {$$.type = if ($1.type = "OK" and $3.type = "OK")
                                        then "OK" else "ERROR";
                         $$.code = $1.code U $4.code;
                        }
                ;

stmt            : /* empty */
                | Yidentifier YASSMNT expr
                        {$$.type = 
                             (if Lookup(symtab, $1.text)#3 = $3.type
                              then "OK" else "ERROR");
                         $$.code =
                              $3.code U genassmnt($3.addr, $1.addr)
                         ClearRegs();
                        }
                | Yidentifier '(' actuals ')'
                        {let ftypes = Lookup(symtab, $1.text)#4;
                         let proctype = Lookup(symtab, $1.text)#3;
                         $$.type = 
                             if (foreach (fp in ftypes, ap in $3.types)
                                 (fp = ap) and (proctype = null))
                             then
                                 "OK"
                             else
                                "ERROR";
                         $$.code = 
                             /* Gen code for proc call stmt, ... */
                         ClearRegs();
                        }
                | YIF expr YTHEN stmts YEND
                        {$$.type =
                             if ($2.type = "boolean") and
                                ($4.type = "OK")
                             then "OK"
                             else "ERROR";
                         $$.code  = /* filled-in code template for if */
                         ClearRegs();
                        }
                | YIF expr YTHEN stmts YELSE stmts YEND
                        {$$.type =
                             if ($2.type = "boolean") and
                                ($4.type = "OK") and
                                ($6.type = "OK")
                             then "OK"
                             else "ERROR";
                         $$.code  = /* filled-in template for if-then-else */
                         ClearRegs();
                        }
                ;

expr            : number
                        {$$.type = $1.type;
                         $$.code = [];
                         $$.addr = $1.addr;}
                | char
                        {$$.type = $1.type;
                         $$.code = [];
                         $$.addr = $1.addr;}
                | bool
                        {$$.type = $1.type;
                         $$.code = [];
                         $$.addr = $1.addr;}
                | Yidentifier
                        {$$.type = Lookup(symtab, $1.text)#3;
                         $$.code = [];
                         $$.addr = genaddr(Lookup(symtab, $1.text));
                        }
                | Yidentifier '(' actuals ')'
                        {let ftypes = Lookup(symtab, $1.text)#4;
                         let proctype = Lookup(symtab, $1.text)#3;
                         $$.type = 
                             if (foreach (fp in ftypes, ap in $3.types)
                                 (fp = ap) and (proctype != null))
                             then
                                 proctype
                             else
                                "ERROR";
                          /* Gen code as for proc call stmt, plus set
                           * $$.addr = machine addr of return value. */
                        }
                | expr relop expr      %prec '<'
                        {$$.type =
                            if ($1.type = $3.type)
                              and (($1.type = "real") or ($1.type = "integer")
                                   or ($1.type = "char"))
                            then $1.type
                            else "ERROR";
                        }
                        /* NOTE: relop code gen not done here. */
                | expr addop expr      %prec '+'
                        {$$.type =    /* For simplicity, this is Mod-2 rule */
                            if ($1.type = $3.type)
                              and (($1.type = "real") or ($1.type = "integer"))
                            then $1.type
                            else "ERROR";
                         $$.addr =
                             if isreg($1.addr) then $1.addr else NextReg();
                         $$.code =
                             if not isreg($1.addr)
                             then
                                  "\\tMOV\\t" || $1.addr || ", " || $$.addr  U
                                  "\\t" || $2.code || "\\t" ||
                                        $3.addr || "," || $$.addr
                             else
                                  "\\t" || $2.code || "\\t" ||
                                        $3.addr || "," || $$.addr;
                        }
                | expr multop expr     %prec '*'
                        {$$.type = 
                            if ($1.type = $3.type)
                              and (($1.type = "real") or ($1.type = "integer"))
                            then $1.type
                            else "ERROR";
                        }
                        {$$.code = /* ... as for addop */;}
                | '(' expr ')'
                        {$$.type = $2.type;
                         $$.addr = $2.addr;
                         $$.code = $2.code;}
                ;

addop           : '+'
                        {$$.code = "\\tADD\\t";}        /* etc. for the rest */
                                /* NOTE: this does not handle FADD */
                | '-'
                | YOR
                ;

multop          : '*'
                | '/'
                | YAND
                ;

relop           : '<'
                | '>'
                | '='
                | YLEOP
                | YGEOP
                | YNEOP
                ;

actuals         : /* empty */
                | actual
                        {$$.types = $1.type;
                         $$.addrs = $1.addr;
                         $$.codes = $1.code;}
                | actual ',' actuals
                        {$$.types = $1.type U $3.types;
                         $$.addrs = $1.addr U $3.addrs;
                         $$.codes = $1.code U $3.codes;
                        }
                ;

actual          : expr
                        {$$.type = $1.type;
                         $$.addr = $1.addr;
                         $$.code = $1.code;
                        }
                ;

number          : real
                        {$$.type = $1.type;
                         $$.addr = $1.addr;
                        }
                | integer
                        {$$.type = $1.type;
                         $$.addr = $1.addr;
                        }
                ;

real            : Yreal
                        {$$.type = "real";
                         $$.addr = "#" || strify($1.val);
                        }
                ;
                
integer         : Yinteger
                        {$$.type = "integer";
                         $$.addr = "#" || strify($1.val);
                        }
                ;

char            : Ychar
                        {$$.type = "char";
                         $$.addr = "'" || strify($1.val) "'";
                        }
                ;

bool            : Ybool
                        {$$.type = "boolean";
                         $$.addr = "#" || strify($1.val);
                        }
                ;