/**** **** Defintion of the symbol table data abstraction, suitable for C++. **** Cf. sym.h. **** ***** THE MAIN SYMTAB STRUCTURE ***** The overall structure of a symtab is a tree. Each scoping block (i.e, unit of lexical declaration) has its own symbol table. E.g., the symbol table structure is shown below for the following program: program P; var p1,p2,p3: integer; procedure A(a1,a2); var a3:integer; begin ... end {A}; procedure B(b1,b2: integer); var b3:integer; procedure C(c1, c2: integer); var c3: integer; begin ... end; {C}; begin ... end; {B} begin ... end. {P} P's symtab |---------------| | null |<--| |-------|-------| | B's symtab | p3 | | | |---------------| |-------|-------| |-----------o |<--| | | | |-------|-------| | |-------|-------| | b2 | | | C's symtab | B | o---------->|-------|-------| | |---------------| |-------|-------| | | | |-----------o | | p1 | | |-------|-------| |-------|-------| |-------|-------| | C | o---------->| c3 | | | p2 | | |-------|-------| |-------|-------| |-------|-------| . . . | | | |-------|-------| |-------|-------| A's symtab | A | o---------->|---------------| |-------|-------| | o------------> to P's symtab |-------|-------| | a1 | | |-------|-------| . . . |-------|-------| Note that some details are left out of this diagram; the declaration of the Symtab type in sym.h has complete details. The key idea of this tree structure is that each symtab entry for a block includes a pointer to a new symtab for that block. Also, each child symtab in turn has a back link to its parent table. These back links form the static chain for identifier context resolution. As the parse goes on, a global pointer (CurSymtab) points to the table that belongs to the block currently being parsed. Thus, all entries to the symtab will go through the global pointer. For lookups, we start where CurSymtab points, and move back parent links, doing successive lookups until the item is found or we've run out of tables. The Cursymtab pointer continues to be used at runtime to maintain proper context for incremental parsing. Cursymtab is bumped at each block entry, and moved back to the father at block exit. Global variables considered harmful?!? Bah! ***** THE PRE-HASH TABLE STRUCTURE ***** Lex enters all alphanumeric symbols, including keywords, in a one-level allocation table called the string hash table. This is *below* the main structure or the symbol table. It is the structure that stores print names and allows symbols throughout the remainder of the parser to be represented as one-word pointers rather than arbitrary char strings. Hence, once we're past lex, a symbol is just a pointer not a string -- a rather efficient arrangement. It is important to note the one-level nature of the string hash table. That is, all symbols, at all lexical levels are entered directly. The string hash table represents no lexical scoping; this is handled in the second level of symbol table structure described above. Specifically, the hash tab entries point to symbol print names maintained in their character representation in the string table or keyword table as shown below. The first sample entry shown is for an alphanumeric token that represents a non-keyword item (e.g., varname, proc name, etc). This entry points to a char string in the string table. The lower sample entry represents a keyword token. It points to a special table holding only keyword strings. For more info about dealing with keywords, see the IsKeyword predicate. HASH TAB |-----------------------| STRING TAB | | /------------------\ |-----------------------| / \ | o------------------------> char strings | |-----------------------| \ / | | \------------------/ |-----------------------| | | KWTAB |-----------------------| |------------------| NOTE: at present | o--------------------> | | this keyword table |-----------------------| |------------------| is not used, since | | | | keywords are recog- |-----------------------| |------------------| nized directly in | | . . . the lexer. Hence, . . . this is a hook for |------------------| future expansion. | | |-----------------------| Here is summary of of the hashing functions. See the comments at each function definition for further info: FUNCTION DESCRIPTION ==================================================== char **hash1(S,Save) The string hash function. S is the string and Save is a flag. Save == false, means S is already in a safe place. If Save = true, space will be malloc'd for S if it's being hashed for the first time. int hash2(S,Symtab) The symtab entries function (it's actually a macro). hash2 is a Symtab->Mask-way hash on the low bits of S as a character pointer. char *hashsave(S) A macro that calls hash1(S,true). char *hash(S) A macro that calls hash1(S,false). * * */ #ifndef symppIncluded #define symppIncluded #include "std-macros.h" #include "parse-tree++.h" #include "type++.h" #include "interp++.h" #include "entity.h" #include "cpplist++.h" /* * Some assorted constants used below and in sym.c */ #ifndef NIL #define NIL 0 #endif #define NILSYM ((SymtabEntry *)NIL) #define STRINC 1024 /* string space increment */ #define HASHINC 509 /* hash table size in words, each increment */ #define TABLE_MULTIPLIER 8 #define MAXHASH (4 * TABLE_MULTIPLIER) #define LEVEL0SIZE 1024 class EntityStructList; /* * Symbol classes. These values are the union tag for the Info field of a * SymtabEntry. */ typedef enum { C_Const, /* A constant */ /* 0 */ C_Type, /* A type */ /* 1 */ C_Var, /* A regular variable */ /* 2 */ C_Enum, /* An enumeration literal */ /* 3 */ C_Parm, /* A procedure parameter */ /* 4 */ C_Record, /* A record */ /* 5 */ C_Field, /* A record field */ /* 6 */ C_Variant, /* A variant record part */ /* 7 */ C_Variant_Field, /* A variant record field */ /* 8 */ C_Proc, /* A procedure */ /* 9 */ C_Module, /* A def, imple, prog or spec module */ /* 10 */ C_Macro, /* A #define macro (non-standard) */ /* 11 */ C_Obj, /* An object def */ /* 12 */ C_Op, /* An operation def */ /* 13 */ C_Attr, /* A defined attribute name */ /* 14 */ C_Name, /* A subobject or suboperation name */ /* 15 */ C_Value, /* A subobject or suboperation value */ /* 16 */ C_Quant, /* A quantifier or eqn scope */ /* 17 */ C_ExprSeq, /* An anon expr seq scope */ /* 18 */ C_Let_Var /* A var declared with let */ /* 19 */ #ifdef DEMO , C_SROp /* A DISL S/R Operation def */ /* 20 */ #endif } SymClasses; /* * Flag bit definitions. Note that different classes of symbols use the same * bit positions to mean different things. E.g., a C_Parm uses bit 0x10 to * indicate that it's a var parm, whereas a C_Type uses bit 0x10 to indicate * that it's an opaque type. */ #define symUsed 0x01 /* On if a symbol ever gets used */ #define symSet 0x02 /* On if a symbol ever gets set */ #define isStatic 0x04 /* Symbol is static storage class */ #define isBuiltIn 0x04 /* Type is built in (type analog of static) */ #define isOpaque 0x10 /* An opaque type def */ #define isExport 0x20 /* An export */ #define isImport 0x80 /* An import */ #define varParm 0x10 /* A var parm */ #define nameParm 0x40 /* A name parm */ #define arrayParm 0x20 /* An open array parm */ #define listParm 0x100 /* An RSL list parm */ #define returnSeen 0x40 /* On if return stmt seen */ #define specialProc 0x10 /* On if a special proc, like new */ #define compiledProc 0x20 /* On if a pre-compiled proc, like i/o */ #define listPart 0x20 /* A list object */ #define unionPart 0x40 /* An ORed component */ #define isPreComp 0x40 /* A predefined function */ #define isClass 0x08 /* An entity class, i.e., extended from */ #define isInstance 0x02 /* An entity instance */ #define isDef 0x100 /* An entity def (i.e., with '=' */ #define isSimpleDef 0x200 /* A simple entity def (i.e., YIS */ #define isUndef 0x400 /* An undefined symbol; used by browser */ #define beenChecked 0x800 /* The object or operation has been checked */ #define sigsChecked 0x1000 /* Overloaded sigs of an op have beee chk'd */ #define isInherited 0x2000 /* Inherited tuple field */ #define hasName 0x4000 /* True if field has non-default name */ #define isAmbig 0x8000 /* True if symbol is globally ambiguous */ #define isWhereRHS 0x1000 /* True if ident type is RHS of where clause */ #define isRule 0x2000 /* True is a rule */ #define isFunc 0x4000 /* True is a function */ #define isAx 0x1000 /* True if formal decl is ax, false if thm */ /* * Handy macros to set and check SymtabEntry bit flags. */ #define SetSymFlag(sym, bit) (sym->Flags |= bit) #define ChkSymFlag(sym, bit) (sym->Flags & bit) /* * SymtabEntry is the principal decl in this file. It is the structure * of the symbol table entry for each symbol appearing in the program. */ typedef struct Sym { char *Symbol; /* pointer to symbol name */ SymClasses Class; /* class of symbol */ unsigned char Level; /* lexical level */ unsigned int Flags; /* assorted flags */ TypeStruct Type; /* type */ struct Sym *Next; /* hash bucket link */ struct Sym *DeclNext; /* link in decl-order thread */ struct Sym *Chain; /* utility chain field */ char *comment; /* comment assoc`d with symbol */ SrcLoc Loc; /* src code location of symbol */ CppList errors; /* list of Pass 2 error msgs */ union { /* Class-specific info */ /* C_Const */ struct { nodep valTree; ValueStruct val; } Consta; /* C_Var, C_Let_Var */ struct { int Offset; nodep initValue; } Var; /* C_Enum */ struct { int Value; /* Ordinal value. */ struct Sym *Prev; /* Link to younger enum sibling. */ struct Sym *Next; /* Link to older enum sibling. */ } Enum; /* C_Parm */ struct { struct Sym *Link; /* Pointer to next parm in list. */ int Offset; nodep initValue; char* TypeName; /* String value of type, for pp etc. */ } Parm; #ifdef DEMO /* C_SROp */ struct { struct Sym *Parms; /* Pointer to parm chain. */ struct Symt *Symtab; /* Symtab for SROperation's scope. */ TypeStruct FType; /* Formal signature for type chking */ union { /* Code location; compiledProc is tag*/ nodep Tree; /* Tree code for user-defined procs */ ValueStruct (*Func)(); /* Compiled function for built-ins */ } Code; TypeStruct (*ChkFunc)(); /* Special-purpose type chking func */ int Offset; /* Return val offset. */ } SROp; #endif /* C_Proc */ struct { struct Sym *Parms; /* Pointer to parm chain. */ struct Symt *Symtab; /* Symtab for proc's scope. */ TypeStruct FType; /* Formal signature for type chking */ union { /* Code location; compiledProc is tag*/ nodep Tree; /* Tree code for user-defined procs */ ValueStruct (*Func)(); /* Compiled function for built-ins */ } Code; TypeStruct (*ChkFunc)(); /* Special-purpose type chking func */ int Offset; /* Return val offset. */ } Proc; /* C_Module */ struct { int Priority; /* Declared priority value */ union { /* Code location; specialProc is tag */ nodep Tree; /* Tree code for user-defined procs */ ValueStruct (*Func)(); /* Compiled function for built-ins */ } Code; struct Sym *Parms; /* Pointer to parm chain. */ struct Symt *Symtab; /* Symtab for module's scope. */ } Module; /* * The next four fields (C_Record, C_Field, C_Variant, and * C_Variant_Field) are presently unused. They are hooks for more * efficient handling of records. */ /* C_Record */ struct { struct Sym *Parent; /* For nested records */ struct Sym *Fileds; /* The field list */ } Record; /* C_Field */ struct { struct Sym *Next; /* Next field */ struct Sym *Prev; /* Previous field */ struct Sym *Parent; /* Containing record */ } Field; /* C_Variant */ struct { struct Sym *Tag; /* Tag field */ struct Sym *Cases; /* Variant parts */ struct Sym *Els; /* Else part */ } VariantRecord; /* C_Variant_Field */ struct { struct Sym *Labels; /* Case label list */ struct Sym *Fields; /* Fields */ struct Sym *Next; /* Next variant field */ struct Sym *Prev; /* Previous variant field */ struct Sym *Parent; /* Containing variant record */ } VariantField; /* C_Macro */ struct { nodep Parms; /* Formal parms */ char *Body; /* Macro body */ } Macro; #ifdef RSL /* C_Object */ struct { nodep inheritsfrom; /* Zero or more parent classes */ nodep speclreq; /* Chain of parts that require specialization*/ nodep ops; /* Operations */ nodep partslist; /* Flattend list of values of parts */ struct Sym *InitList; /* Flattend list of part entries to be type checked later for initial values */ nodep parts; /* parts parse tree */ nodep namelist; /* Flattend list of names of parts */ nodep eqns; /* equations parse tree */ nodep attrs; /* Attributes */ struct Symt *Symtab;/* Symtab for Object's scope */ struct Sym *Parts; /* Symbol table entries of parts */ ValueStruct val; /* Evaluated expr for 'isDef' values */ /* New goodies for the browser. */ EntityStructList* brComponents; /* List class component types */ EntityStructList* brComponentsU;/* List class component types, unsorted */ EntityStructList* brInstances; /* List class instances */ EntityStructList* brPartof; /* List of hierarchical parents*/ EntityStructList* brInheritsFrom; /* List of class parents */ EntityStructList* brOps; /* List of operations */ EntityStructList* brPictures; /* List of picture names */ EntityStructList* brLinks; /* List of list of link info */ int brSize; /* Number of components, non-transitive. */ int brTotalSize; /* Number of components, transitive. */ nodep theTree; /* Parse tree for full obj decl */ struct Sym *parent; /* Used by browser; see browser-hooks.c */ } Obj; /* C_Operation */ struct { nodep inheritsfrom; /* Zero or more parent classes */ nodep speclreq; /* Chain of parts that require specialization*/ nodep partslist; /* Flattend list of names of parts */ struct Sym *InitList; /* Flattend list of part entries to be type checked later for initial values */ nodep parts; /* parts parse tree */ nodep partstree; /* parts parse tree */ nodep namelist; /* Flattend list of names of parts */ struct Sym *InsInitList; /* Inputs */ nodep ins; /* Inputs */ nodep instree; /* Inputs */ struct Sym *OutsInitList; /* Outputs */ nodep outs; /* Outputs */ nodep outstree; /* Outputs */ nodep precond; /* preconditions */ nodep postcond; /* postconditions */ nodep attrs; /* Attributes */ struct Symt *Symtab;/* Symtab for Op's scope */ struct Sym *InParms;/* Pointer to input parm chain. */ struct Sym *OutParms;/* Pointer to output parm chain. */ nodep OpType; /* Type of this op as an op type */ /* New goodies for the browser. */ EntityStructList* brComponents; /* List class component types */ EntityStructList* brInstances; /* List class instances */ EntityStructList* brPartof; /* List of hierarchical parents*/ EntityStructList* brInheritsFrom; /* List of class parents */ EntityStructList* brIns; /* List of inputs */ EntityStructList* brOuts; /* List of outputs */ EntityStructList* brPictures; /* List of picture names */ EntityStructList* brLinks; /* List of list of link info */ nodep theTree; /* Parse tree for full obj decl */ struct Sym *parent; /* Used by browser; see browser-hooks.c */ /* And last but least, stuff to acutally run the puppy. */ union { /* Code location; compiledProc is tag*/ nodep Tree; /* Tree code for user-defined procs */ ValueStruct (*Func)(); /* Compiled function for built-ins */ } Code; int Offset; /* Return val offset. */ } Op; #endif } Info; } SymtabEntry; /* * Import and export list structures. */ typedef struct Imp { char *From; /* Single from name; may be empty. */ char **Names; /* Import names; list is null terminated. */ struct Imp *Next; /* Next list entry */ } ImportListEntry; typedef struct Exp { char **Names; /* Export names; list is null terminated. */ } ExportList; /* * This is the representation of the overall table structure. */ typedef struct Symt { struct Symt *ParentTab; /* Static table link */ SymtabEntry *ParentEntry; /* Static entry link */ int Level; /* Lexical level */ int Size; /* Number of entries */ int Type; /* Scope type (e.g., YDEF, etc) */ int Mask; /* Info for hashing */ SymtabEntry **Entries; /* Ptrs to entries */ nodep Imports; /* Imports list */ nodep Exports; /* Exports list */ SymtabEntry *UnresList; /* List of unresolved forw refs */ int Errors; /* No. of type chking errors. */ int Offset; /* Storage offset counter. */ nodep Tree; /* Full src parse tree */ SymtabEntry *DeclThread; /* Thread of all syms in decl * order, used when hash order * enum is inappropriate */ SymtabEntry *DeclLast; /* Tail of DeclThread, for efficient addition */ CppList DeclOrderList; /* Decl-order list of objects, */ /* used in browser */ /* * 26jun92 glf: Browser hook. */ struct spec_list *BrowserSpec; } Symtab; #define TabName(st) (st->ParentEntry == NILSYM ? "Level 0 Table" :\ st->ParentEntry->Symbol) /* * Push/PopSymtab routines used to stack symtab contexts during type checking * (i.e., in Milestone 4) and later at runtime (i.e., in 451). */ #ifdef USEWITHCPP extern "C" #endif #define STKINC 4096 #ifdef USEWITHCPP extern "C" #endif Symtab *SymtabStack[STKINC]; #ifdef USEWITHCPP extern "C" #endif Symtab **symtos; #ifdef USEWITHCPP extern "C" #endif int SymtabStackSize; #define PushSymtab() \ if (++SymtabStackSize == STKINC) \ AllocSymtabStack(); \ *(symtos++) = CurSymtab #define PopSymtab() \ if (symtos == SymtabStack) error("Panic: empty symtab stack"); \ CurSymtab = *(--symtos); \ SymtabStackSize-- #define TopSymtab() \ *(symtos-1) #define AllocSymtabStack() \ error("Panic: full symtab stack") #ifdef USEWITHCPP extern "C" #endif Symtab *CurSymtab; /* THE global symtab. */ #ifdef USEWITHCPP extern "C" #endif Symtab *Level0Symtab; /* The topmost symtab -- everyone's parent. */ #ifdef USEWITHCPP extern "C" #endif Symtab *CurProgSymtab; /* The progarm module symtab -- start of execution. */ #ifdef USEWITHCPP extern "C" #endif int MaxNest; /* Max proc nesting, for computing display size. */ #ifdef USEWITHCPP extern "C" #endif int CurNest; /* Current nesting depth, used to compute MaxNest. */ #ifdef USEWITHCPP extern "C" #endif Symtab *MainSymtab; /* The main (program module) symtab */ /* * Visible Functions */ extern "C" void InitSymtab(); extern "C" Symtab *AllocSymtab(int); extern "C" void NewLevel(SymtabEntry*, int, nodep, TokenType); extern "C" SymtabEntry *AllocSymtabEntry(); extern "C" SymtabEntry *HashAllocSymtabEntry(char*, SymClasses, TypeStruct, int); extern "C" bool Enter(); extern "C" bool EnterIn(SymtabEntry* s, Symtab* st); extern "C" SymtabEntry *Lookup(); extern "C" SymtabEntry *LookupIn(); extern "C" SymtabEntry *LookupString(char* s); extern "C" SymtabEntry *LookupStringIn(char* s, Symtab* st); extern "C" SymtabEntry *NextEntry(); extern "C" void ResetNext(); extern "C" void Delete(); extern "C" void DeleteIn(); extern "C" char **hash1(); #define hash2(s,st) (((int) s & st->Mask) % st->Size) #define hash(s) (*hash1(s,false)) #define hashsave(s) (*hash1(s,true)) #ifdef DEMO /* * Some variables used in EnterSRProc() and ExitSRProc(); * The following structure is not put in the interp++.h, because it may * create a circular #include def with this "sym++.h". */ typedef struct SRValue { ValueStruct v1; char *saveftos; int so; SymtabEntry *p; } SRValueStruct; SRValueStruct *sr; #ifdef USEWITHCPP extern "C" #endif ValueStruct EnterSRProc(nodep t, SRValueStruct* sr); #ifdef USEWITHCPP extern "C" #endif ValueStruct ExitSRProc(SRValueStruct* sr); #endif #endif