/* * Flex version of ./lex.l, q.v. The main diff is that input() in flex is no * longer a macro. The changeable input macro in flex is YY_INPUT. */ %option noyywrap %p 4000 %a 4000 %o 10000 /** * Definitions */ /* * Externals */ %{ #include #include "std-macros.h" #include "parse-tree.h" #include "parser.h" #include "tokens.h" #include "sym.h" #include "translator.h" #include "options.h" #include "lex.h" extern YYSTYPE yylval; int yytchar; /* remnant from lex */ int yycharno; /* char count relative to beginning of yylineno */ int yycharpos; /* total char count from beginning of file */ int yycharposnnl;/* total char count from beginning of file, excluding \n's */ int yylinestartpos; int yyprevlinestartpos; char *yyfilename; extern int inAttribute; extern int inParts; #define lparen '(' #define rparen ')' #define COMMENTSIZE 25000 static char CommentBuf[COMMENTSIZE]; /* Browser hooks */ extern int bm_position, bm_startpos, bm_endpos; /* * This def of YY_INPUT is a mutation of the standard one. The changes are * to count indiviual characters so character position can be used in the * srcloc structure during parsing and type checking. */ #define YY_INPUT(buf,result,max_size) \ yytchar = getic(yyin); \ if (yytchar == '\n') { \ yycharno = 0; \ yyprevlinestartpos = yylinestartpos; \ yylinestartpos = yycharpos + 1; \ } \ if (yytchar == 0) { \ result = YY_NULL; \ } \ else { \ yycharno++; \ yycharpos++; \ yycharposnnl++; \ bm_position++; \ buf[0] = yytchar; \ result = 1; \ } /* * Here is the old def of the lex input() macro on which the above mods to * YY_INPUT are based (there's a more comprehensible version of this in * ./dissection-of-input-macro, q.v.): * (((yytchar=yysptr>yysbuf?U(*--yysptr):getic(yyin))==10 \ ?(yylineno++,yycharno=1, \ yyprevlinestartpos=yylinestartpos, \ yylinestartpos=yycharpos+2,yytchar):yytchar)==EOF \ ?0:(yycharno++,bm_position++,yycharpos++,yycharposnnl++,yytchar)) * */ /* * According to the not-entirely-accruate flex man page, in the section titled * "INCOMPATIBILITIES WITH LEX AND POSIX": * * The unput() routine is not redefinable. This restriction is in * accordance with POSIX. * * What apparently they mean is that the "yyunput" routine is not redefinable, * since unput is still a macro, in particular a one-liner that calls yyunput. * so, luckily, we can in fact redefine unput. */ #undef unput #define unput(c) \ if (yytchar=='\n') { \ yycharno--; \ yycharpos--; \ yycharposnnl--; \ bm_position--; \ } \ yyunput( c, yytext_ptr ); /* * Here is the old lex def of unput(c): * {yytchar= (c);if(yytchar=='\n') \ yylineno--;yycharno--;bm_position--;yycharpos--; \ yycharposnnl--;*yysptr++=yytchar;} * */ %} letter [A-Za-z] letter_or_underscore [_A-Za-z] digit [0-9] alphanumeric {letter}|{digit} other_id_char [_] prime [\'] quesmrk [\?] identifier {quesmrk}?({letter_or_underscore}({alphanumeric}|{other_id_char})*{prime}*) non_blank_or_newline [^.^\n] integer {digit}+ number {integer}\.{integer} string (\"[^\n\"]*\") symlit (\'[^\n\']*\') whitespace [ \n\t\r] /** * Rules */ %% actions {SetKeyword(); return(YACTIONS);} all {SetKeyword(); return(YALL);} and {SetKeyword(); return(YAND);} attribute {SetKeyword(); return(YATTRIBUTE);} ax {SetKeyword(); return(YAX);} axiom {SetKeyword(); return(YAXIOM);} begin {SetKeyword(); return(YBEGIN);} components {SetKeyword(); return(YCOMPONENTS);} dataflow {SetKeyword(); return(YDATAFLOW);} define {SetKeyword(); return(YDEFINE);} div {SetKeyword(); return(YDIV);} else {SetKeyword(); return(YELSE);} end {SetKeyword(); return(YEND);} eq {SetKeyword(); return(YEQ);} equations {SetKeyword(); return(YEQUATIONS);} except {SetKeyword(); return(YEXCEPT);} exists {SetKeyword(); return(YEXISTS);} export {SetKeyword(); return(YEXPORT);} extends {SetKeyword(); return(YEXTENDS);} forall {SetKeyword(); return(YFORALL);} foreach {SetKeyword(); return(YFOREACH);} from {SetKeyword(); return(YFROM);} func {SetKeyword(); return(YFUNC);} function {SetKeyword(); return(YFUNCTION);} if {SetKeyword(); return(YIF);} iff {SetKeyword(); return(YIFF);} implies {SetKeyword(); return(YIMPLIES);} import {SetKeyword(); return(YIMPORT);} in {SetKeyword(); return(YIN);} inputs {SetKeyword(); return(YINPUTS);} is {SetKeyword(); return(YIS);} lambda {SetKeyword(); return(YLAMBDA);} let {SetKeyword(); return(YLET);} mod {SetKeyword(); return(YMOD);} module {SetKeyword(); return(YMODULE);} not {SetKeyword(); return(YNOT);} obj {SetKeyword(); return(YOBJ);} object {SetKeyword(); return(YOBJECT);} op {SetKeyword(); return(YOP);} operation {SetKeyword(); return(YOPERATION);} operations {SetKeyword(); return(YOPERATIONS);} ops {SetKeyword(); return(YOPS);} or {SetKeyword(); return(YOR);} out {SetKeyword(); return(YOUT);} outputs {SetKeyword(); return(YOUTPUTS);} parts {SetKeyword(); return(YPARTS);} post {SetKeyword(); return(YPOST);} postcondition {SetKeyword(); return(YPOSTCONDITION);} pre {SetKeyword(); return(YPRE);} precondition {SetKeyword(); return(YPRECONDITION);} return {SetKeyword(); return(YRETURN);} set {SetKeyword(); return(YSET);} then {SetKeyword(); return(YTHEN);} theorem {SetKeyword(); return(YTHEOREM);} thm {SetKeyword(); return(YTHM);} val {SetKeyword(); return(YVAL);} value {SetKeyword(); return(YVALUE);} var {SetKeyword(); return(YVAR);} variable {SetKeyword(); return(YVARIABLE);} where {SetKeyword(); return(YWHERE);} xor {SetKeyword(); return(YXOR);} "!=" {SetKeyword(); return(YNEQ);} "~=" {SetKeyword(); return(YAEQ);} "#" {SetKeyword(); return('#');} "&" {SetKeyword(); return(YAND);} "(" {SetKeyword(); return('(');} ")" {SetKeyword(); return(')');} "*" {SetKeyword(); return('*');} "+" {SetKeyword(); return('+');} "," {SetKeyword(); return(',');} "-" {SetKeyword(); return('-');} "->" {SetKeyword(); return(YRTARROW);} "." {SetKeyword(); return('.');} ".-" {SetKeyword(); return(YATTRSELECT);} ".:" {SetKeyword(); return(YATTRSELECT);} ".<" {SetKeyword(); return(YGETINSTANCE);} ".>" {SetKeyword(); return(YRETRACT);} "/" {SetKeyword(); return('/');} ":" {SetKeyword(); return(':');} "::=" {SetKeyword(); return(YRTARROW);} ";" {SetKeyword(); return(';');} "<" {SetKeyword(); return('<');} "<=" {SetKeyword(); return(YLEQ);} "<=>" {SetKeyword(); return(YIFF);} "<>" {SetKeyword(); return(YNEQ);} "=" {SetKeyword(); return('=');} "==" {SetKeyword(); return(YEQEQ);} "=>" {SetKeyword(); return(YIMPLIES);} ">" {SetKeyword(); return('>');} ">=" {SetKeyword(); return(YGEQ);} "[" {SetKeyword(); return('[');} "]" {SetKeyword(); return(']');} "^" {SetKeyword(); return('^');} "{" {SetKeyword(); return('{');} "|" {SetKeyword(); return('|');} "}" {SetKeyword(); return('}');} ".." {SetKeyword(); return(YDOTDOT);} "?" {SetKeyword(); return('?');} "?." {SetKeyword(); return(YISA);} "?<" {SetKeyword(); return(YCHKINSTANCE);} "?->" {SetKeyword(); return(YQRTARROW);} "..." {return SetIdent(); /* OTHER THOTS ABANDONED FOR NOW: * pick up a commment and return it as a YText token. * Note that the diff between this case and the case of * a comment elsewhere is that here a YText token is * explicitely returned whereas elsewhere the comment * is ignored for now, and later will be make an attach- * ment. */ /* MAYBE AT SOME POINT: * Cruise over all chars upto the next ':'. Then back * up over the preceding token and see if its a defined * attribute name. If so, terminate the scan, else * continue in this way. */ } {identifier} {return SetIdent();} {symlit} {yylval.YYSsymlit.val = stralloc(&(yytext[1])); return(Ysymlit);} {integer} {yylval.YYSinteger.val = atoi(yytext); return(Yinteger);} {number} {yylval.YYSreal.val = atof(yytext); return(Yreal);} {string} {ProcessString(); return(Ystring);} {whitespace} {if (inParts and GrammarFlagOn()) return YAND;} "(*" {if (MoveOverMod2Comment() == YEOF) return YEOF; if (inAttribute) { yylval.YYSstring.val = stralloc(CommentBuf); inAttribute = false; /* only one comment will be kept as the value */ return Ytext; } } --.* ; . {fprintf(stderr, "line %d: illegal character %c\n", yylineno, yytext[0]);} %% /** * Functions */ int MoveOverMod2Comment() { char c = YEOF; int i,l; bool CommentTruncated; l = yylineno; /* Not exactly sure when c to -1, but it has and caused infinite loop when * so. So there's a guard now. */ for (i=0, CommentTruncated=false; c != -1; ) { for (; ((c = input()) != '*') and (c != YEOF) and (c != -1); i++) { if (i < COMMENTSIZE) CommentBuf[i] = c; else if (!CommentTruncated) { printf("Comment truncated to %d chars\n", COMMENTSIZE); CommentTruncated = true; } } if ((c=input()) == rparen) break; if (c == lparen) { error("Nested comment starting on line %d\n", l); break; } else if ((c == YEOF) or (c == -1)) { error("Unterminated comment starting on line %d\n", l); return YEOF; } else unput(c); } CommentBuf[i] = '\0'; return Ytext; /* Just something other than YEOF */ } char* GetLastComment() { return stralloc(CommentBuf); } void ClearComment() { CommentBuf[0] = '\0'; } int MoveOverCurlyComment() { char c; int i,l; bool CommentTruncated; l = yylineno; for (i=0, CommentTruncated=false; (c = input()) != '}'; i++) { if (i < COMMENTSIZE) CommentBuf[i] = c; else if (!CommentTruncated) { printf("Comment truncated to %d chars\n", COMMENTSIZE); CommentTruncated = true; } if (c == YEOF) { error("Unterminated comment starting on line %d\n", l); return YEOF; } } } SetKeyword(symbol) int symbol; { yylval.keyword.text = hashsave(yytext); yylval.keyword.loc.line = yylineno; /* * Next crap is to get col number right when token ends the line. */ if (yytchar == 10) yylval.keyword.loc.col = (yycharpos - yyprevlinestartpos + 1) - (strlen(yytext) - 1); else yylval.keyword.loc.col = yycharno - (strlen(yytext)+1); yylval.keyword.loc.abschar = yycharpos-(strlen(yytext)+1); yylval.keyword.loc.abscharnnl = yycharposnnl-(strlen(yytext)+1); yylval.keyword.loc.file = yyfilename; /* NOTE: yyfilename already stralloc'd in translator.c:GetXXXInFile */ } InitLex() { yycharno = 0; yycharpos = 0; yycharposnnl = 0; yylinestartpos = 0; yyprevlinestartpos = 0; /* InFileStack = NewList(); */ if (not LineBuf) LineBuf = (char *) malloc(MAXLINELEN+1); LineInd = 0; LineBuf[0] = '\0'; } bool SetLexInFile(filename) char *filename; { yycharpos = 0; yycharposnnl = 0; yycharno = 0; yylineno = 1; if (streq(filename, "stdin")) { yyin = stdin; yyfilename = null; } else { yyin = fopen(filename, "r"); if (not yyin) { lexerror("No such file %s.\n", filename); return false; } yyfilename = stralloc(filename); } return true; } void PushLexInFile() { SrcFileLoc *floc = (SrcFileLoc *) malloc(sizeof(SrcFileLoc)); floc->loc.line = yylineno; floc->loc.col = yycharno; floc->loc.file = yyfilename; floc->File = yyin; PushList(InFileStack, (char *) floc); } /* * Reset infile after we're done with an #include. */ SrcFileLoc *PopLexInFile() { SrcFileLoc *floc = (SrcFileLoc *) PopList(InFileStack); fclose(yyin); if (floc) { yylineno = floc->loc.line + 1; yycharno = 0; yycharpos = floc->loc.abschar + 1; yycharposnnl = floc->loc.abscharnnl + 1; yyfilename = floc->loc.file; yyin = floc->File; return floc; } return null; } int NumLexInFiles() { return ListLen(InFileStack); } FILE *CurInFile() { return yyin; } /* * Get the next token manually. */ char *NextToken(); /* * Handle any error messages from the lexer. */ void lexerror(msg) char *msg; { SrcLoc loc; loc.line = yylineno; loc.col = yycharno; loc.file = yyfilename; lserror(loc, msg); } /* * Return a SrcLoc with the current token's info. */ SrcLoc CurSrcLoc() { SrcLoc loc; loc.line = yylineno; loc.col = yycharno; loc.file = yyfilename ? stralloc(yyfilename) : stralloc("stdin"); return loc; } /* * Role the line index back one char. */ ungetic() { LineInd--; } /* * Give us a peek at the previous char in the input buf. */ char getprevc() { return LineInd ? LineBuf[LineInd-1] : '\0'; } /* * Check if we're at the end of a GetLine buffer. This is used in order to * allow multiple stmts/decls on a single interactive line. */ EOL() { return ((LineBuf[LineInd] == '\n') or (LineBuf[LineInd] == '\0')); } /* * Force an end-of-line to be detected by the next call to EOL. Since ForceEOL * is called after a top-level error is detected, we also want to clear the * lexer's input buffer here. This is necessary in order to avoid excess * syntax errors, and other odd behavior. */ ForceEOL() { LineBuf[LineInd] = '\n'; YY_FLUSH_BUFFER; } /* * To fix the off-by-1-line multi-file input problem, this gets called from the * top-level translator at a place you can go see. */ ForceEOF() { LineInd = 0; LineBuf[LineInd] = '\0'; } /* * Function to replace the lexer's getc. getic reads from our own buffer * rather than straight from stdin. */ getic(yyin) FILE *yyin; { char *line; if (!LineBuf[LineInd]) { if (interactive) prompt(); line = fgets(LineBuf, MAXLINELEN, yyin); if (line == null) return null; LineInd = 0; } return LineBuf[LineInd++]; } /* * Output an appropriate interative prompt. I.e., if at top-level, then ">", * else if still parsing then ">>". */ prompt(yyin) FILE *yyin; { if (!Parsing) printf("> "); else printf(">>"); } /* * Routine to get the first line so that a single '>' prompt is printed out * initially. */ GetLine(yyin) FILE *yyin; { char *line; if ((LineInd == 0) or (LineBuf[LineInd] == 0) or (LineBuf[LineInd] == '\n')) { printf("> "); line = fgets(LineBuf, MAXLINELEN, yyin); LineInd = 0; return !(line == null); } else return true; } SetIdent() { yylval.YYSident.text = hashsave(yytext); yylval.YYSident.loc.line = yylineno; /* * Next crap is to get col number right when token ends the line. */ if (yytchar == 10) yylval.YYSident.loc.col = (yycharpos - yyprevlinestartpos + 1) - (strlen(yytext) - 1); else yylval.YYSident.loc.col = yycharno - (strlen(yytext)+1); yylval.YYSident.loc.abschar = yycharpos-(strlen(yytext)+1); yylval.YYSident.loc.abscharnnl = yycharposnnl-(strlen(yytext)+1); yylval.YYSident.loc.file = yyfilename; /* NOTE: yyfilename already stralloc'd in translator.c:GetXXXInFile */ /* 20jun92: Browser hooks. */ bm_startpos = bm_position - strlen(yytext) + 1; bm_endpos = bm_position + 1; /* * WARNING! * * Calling IsAttrName with yytext as the arg rather than * yylval.YYSident.text causes an excruciatingly subtle error. * */ if (IsAttrName(yylval.YYSident.text)) { if (streq(yylval.YYSident.text, "description")) { ClearComment(); } return(Yattr); } else return(Yident); } /* * Process a string lexeme. This function is called for single-quoted strings * length 1 and all double-quoted strings. */ ProcessString() { char *s1,*s2,*s3; s3 = (char *)malloc(strlen(yytext)+1); for (s1=&yytext[0],s2=s3; *s1; ) { if (*s1 != '\\') { *s2++ = *s1++; } else if (isodigit(s1[1])) { char obuf[4]; int l = 1; if (isodigit(s1[2])) l++; if (isodigit(s1[3])) l++; strncpy(obuf, s1+1, l); obuf[l] = '\0'; s1 += l+1; *s2++ = (char) strtol(obuf, NULL, 8); } else if ((s1[1] == 'x') and (isxdigit(s1[2]))) { char* hbuf; int l = 1; s1 += 2; while (isxdigit(s1[l])) l++; hbuf = (char*) malloc(l+1); strncpy(hbuf, s1, l); hbuf[l] = '\0'; s1 += l; *s2++ = (char) strtol(hbuf, NULL, 16); free(hbuf); } else { s1++; switch (*s1) { case 'a': *s2++ = '\a'; break; case 'b': *s2++ = '\b'; break; case 'f': *s2++ = '\f'; break; case 'n': *s2++ = '\n'; break; case 'r': *s2++ = '\r'; break; case 't': *s2++ = '\t'; break; case 'v': *s2++ = '\v'; break; case '\\': *s2++ = '\\'; break; case '\?': *s2++ = '\?'; break; case '\'': *s2++ = '\''; break; case '\"': *s2++ = '\"'; break; case '0': *s2++ = '\0'; break; default: *s2++ = *s1; } s1++; } } *(s2) = '\0'; yylval.YYSstring.val = stralloc(s3); }