#include #include #include #include #include "std-macros.h" #include "parse-tree.h" #include "sym.h" #include "parser.h" #include "tokens.h" #include "translator.h" #include "error.h" #include "lex.h" #include "str.h" extern YYSTYPE yylval; extern int yycharno; extern char* yyfilename; extern char yytext[]; extern int yylineno; extern FILE* yyin; extern char* yysptr, yysbuf[]; extern int yytchar; # define U(x) x # define input() \ (((yytchar=yysptr>yysbuf?U(*--yysptr):getic(yyin))==10 \ ?(yylineno++,yycharno=1,yytchar):yytchar)==EOF \ ?0:(yycharno++,yytchar)) # define unput(c) \ {yytchar= (c);if(yytchar=='\n') \ yylineno--;yycharno;yycharno--;*yysptr++=yytchar;} #define COMMENT_LEN 2500 /* SetKeyword() { yylval.keyword.text = NewString(yytext); yylval.keyword.loc.line = yylineno; yylval.keyword.loc.col = yycharno-(strlen(yytext)+1); yylval.keyword.loc.file = yyfilename ? stralloc(yyfilename) : null; } */ InitLex() { /* yycharno = 0; InFileStack = NewList(); LineBuf = (char *) malloc(MAXLINELEN+1); */ } bool SetLexInFile(const char* filename) { yycharno = 0; yylineno = 1; if (streq(filename, "stdin")) { yyin = stdin; yyfilename = null; } else { yyin = fopen(filename, "r"); if (not yyin) { lexerror("No such file %s.\n", filename); return false; } yyfilename = stralloc(filename); } return true; } void PushLexInFile() { SrcFileLoc *floc = (SrcFileLoc *) malloc(sizeof(SrcFileLoc)); floc->loc.line = yylineno; floc->loc.col = yycharno; floc->loc.file = yyfilename ? stralloc(yyfilename) : yyfilename; floc->File = yyin; PushList(InFileStack, (char *) floc); } /* * Reset infile after we're done with an #include. */ SrcFileLoc *PopLexInFile() { SrcFileLoc *floc = (SrcFileLoc *) PopList(InFileStack); fclose(yyin); if (floc) { yylineno = floc->loc.line + 1; yycharno = 0; yyfilename = floc->loc.file; yyin = floc->File; return floc; } return null; } int NumLexInFiles() { return ListLen(InFileStack); } FILE *CurInFile() { return yyin; } /* * Get the next token manually. */ char *NextToken(); /* * Handle any error messages from the lexer. This function cannot use varargs * because ANSI varargs macros do not allow forwarding. This sucks. */ void lexerror(char* msg, const char* optarg) { SrcLoc loc; loc.line = yylineno; loc.col = yycharno; loc.file = yyfilename; if (loc.file) { lerror(NewNode(null, null, loc), msg, optarg); } else { fprintf(stderr, msg, optarg); } } /* * Function to replace the lexer's getc. getic reads from our own buffer * rather than straight from stdin. */ getic(yyin) FILE *yyin; { char *line; SrcFileLoc *floc; if (!LineBuf[LineInd]) { if (Interactive) prompt(); line = fgets(LineBuf, MAXLINELEN, yyin); /* while ((not line) and (floc = PopLexInFile())) { line = fgets(LineBuf, MAXLINELEN, yyin); } */ if (line == null) return null; LineInd = 0; } return LineBuf[LineInd++]; } /* * Role the line index back one char. */ ungetic() { LineInd--; } /* * Give us a peek at the previous char in the input buf. */ char getprevc() { return LineInd ? LineBuf[LineInd-1] : '\0'; } /* * Output an appropriate interative prompt. I.e., if at top-level, then ">", * else if still parsing then ">>". If in a breakpoint, output n>, where n = * current breakpoint level. */ prompt(yyin) FILE *yyin; { /* * Don't prompt if we're in the midsts of an include, which condition is * recognized by 1 or more pending lex in files, or if we aren't * conversing. */ if ((NumLexInFiles() > 0) or (not Conversing)) return; if (BreakLev) printf("%d", BreakLev); if (!Parsing) printf("> "); else printf(">> "); } /* * Routine to get the first line so that a single '>' prompt is printed out * initially. */ GetLine(yyin) FILE *yyin; { char *line; if ((LineInd == 0) or (LineBuf[LineInd] == 0) or (LineBuf[LineInd] == '\n')) { prompt(); line = fgets(LineBuf, MAXLINELEN, yyin); LineInd = 0; return !(line == null); } else return true; } /* * Check if we're at the end of a GetLine buffer. This is used in order to * allow multiple stmts/decls on a single interactive line. */ EOL() { return ((LineBuf[LineInd] == '\n') or (LineBuf[LineInd] == '\0')); } /* * Force an end-of-line to be detected by the next call to EOL. Since ForceEOL * is called after a top-level error is detected, we also want to clear the * lexer's input buffer here. This is necessary in order to avoid excess * syntax errors, and other odd behavior. */ ForceEOL() { LineBuf[LineInd] = '\n'; yysptr = yysbuf; } /* * The SetDefineStateN functions set a state variable that's detected at * various places in the lexer. The idea is the following: * * DefineState1: Seen #define, looking for name [macroformals] macrobody * DefineState2: Seen #define, looking for name macrobody * DefineState3: Seen #define, looking for name macroformals macrobody * * The important point is that the parser needs help from the lexer in passing * the macrobody on through as a plain string, whereas the lexer needs help in * parsing the entire construct, in particular the formals. Thus, we hack it * up quite a bit in the ')' and {identifier} rules above. */ SetDefineState0() { DefineState = 0; } SetDefineState1() { DefineState = 1; } SetDefineState2() { DefineState = 2; } SetDefineState3() { DefineState = 3; } /* * MacroNameAndBody returns the name and body of a macro, both as strings, in * yylval. It returns the token YMacroNameAndBody. */ /* NodeSubkind MacroNameAndBody(name) String* name; { yylval.YYSmacro.name = name; yylval.YYSmacro.body = GetMacroBody(); return YMacroNameAndBody; } */ /* * MacroBody returns the body of a macro, as a string, in yylval. It returns * the token YMacroBody. */ /* NodeSubkind MacroBody() { yylval.YYSmacro.name = null; yylval.YYSmacro.body = GetMacroBody(); return YMacroBody; } */ /* * GetMacroBody consumes everything in its path up to the next non-slashified * newline, putting all the chars in a string buffer. */ /* String* GetMacroBody() { char c, buf[COMMENT_LEN]; int i; bool mlenmsg; while (true) { for (i=0, mlenmsg=false; (c = input()) != '\n'; i++) { if (i < COMMENT_LEN) buf[i] = c; else if (!mlenmsg) { printf("Length limit (%d) of #define exceeded\n", COMMENT_LEN); mlenmsg = true; } } if (getprevc() != '\\') break; unput(c); } return mlenmsg ? null : NewString(buf); } */ /* * Clear everything up in response to a ^C from the user. Buffer, include * files, etc., all need to be attended to. */ ResetLex() { /* * Reset the char counter back to 0. */ yycharno = 0; /* * Pop off any pending include files (unlikely, but certainly possible). */ while (NumLexInFiles()) PopLexInFile(); /* * Reset the break lev back to 0. */ BreakLev = 0; /* * Turn the Parsing flag off so that the next prompt wont be double. */ Parsing = false; } /* * Process a string lexeme. This function is called for single-quoted strings * length 1 and all double-quoted strings. */ ProcessString() { char *s1,*s2,*s3; s3 = (char *)malloc(strlen(yytext)-1); for (s1=&yytext[1],s2=s3; *s1; ) { if (*s1 != '\\') { *s2++ = *s1++; } else if (isodigit(s1[1])) { char obuf[4]; int l = 1; if (isodigit(s1[2])) l++; if (isodigit(s1[3])) l++; strncpy(obuf, s1+1, l); obuf[l] = '\0'; s1 += l+1; *s2++ = (char) strtol(obuf, NULL, 8); } else if ((s1[1] == 'x') and (isxdigit(s1[2]))) { char* hbuf; int l = 1; s1 += 2; while (isxdigit(s1[l])) l++; hbuf = (char*) malloc(l+1); strncpy(hbuf, s1, l); hbuf[l] = '\0'; s1 += l; *s2++ = (char) strtol(hbuf, NULL, 16); free(hbuf); } else { s1++; switch (*s1) { case 'a': *s2++ = '\a'; break; case 'b': *s2++ = '\b'; break; case 'f': *s2++ = '\f'; break; case 'n': *s2++ = '\n'; break; case 'r': *s2++ = '\r'; break; case 't': *s2++ = '\t'; break; case 'v': *s2++ = '\v'; break; case '\\': *s2++ = '\\'; break; case '\?': *s2++ = '\?'; break; case '\'': *s2++ = '\''; break; case '\"': *s2++ = '\"'; break; case '0': *s2++ = '\0'; break; default: *s2++ = *s1; } s1++; } } *(s2-1) = '\0'; yylval.YYSstring.val = NewString(s3); } /* * FillLine is called from eval and friends (q.q.v., in eval.{h,c}) to fill the * lexer's line buf with a programmatically-obtained string. */ FillLine(char* s) { LineInd = 0; /* * Note well -- it is assumed that the caller has put s in a safe place. */ strcpy(LineBuf, s); /* * Not quite sure why, but the following synchronization is necessary. The * low-level reason is that getic won't be called if yysptr>yysbuf (see the * input macro). At a slightly higher level, it appears to have to do with * the last char of the line (the ';') being left over in the yylex's * look-ahead buffer. The end result is that every other line (sometimes) * the input is ignored because there's a left-over single ';' from the * previous line. * * Anyway, the following line appears to fix things. See also ForceEOL, * which is doing a similar twiddle. */ yysptr = yysbuf; }