/*
  * Flex version of ./lex.l, q.v.  The main diff is that input() in flex is no
  * longer a macro.  The changeable input macro in flex is YY_INPUT.
  */
%option noyywrap
%p 4000
%a 4000
%o 10000

 /**
  * Definitions
  */

 /*
  * Externals
  */
%{
#include <math.h>
#include "std-macros.h"
#include "parse-tree.h"
#include "parser.h"
#include "tokens.h"
#include "sym.h"
#include "translator.h"
#include "options.h"
#include "lex.h"

extern YYSTYPE yylval;
int yytchar;	 /* remnant from lex */
int yycharno;	 /* char count relative to beginning of yylineno */
int yycharpos;	 /* total char count from beginning of file */
int yycharposnnl;/* total char count from beginning of file, excluding \n's */
int yylinestartpos;
int yyprevlinestartpos;
char *yyfilename;
extern int inAttribute;
extern int inParts;
#define lparen '('
#define rparen ')'

#define COMMENTSIZE 25000
static char CommentBuf[COMMENTSIZE];

/* Browser hooks */
extern int bm_position, bm_startpos, bm_endpos;

/*
 * This def of YY_INPUT is a mutation of the standard one.  The changes are
 * to count indiviual characters so character position can be used in the 
 * srcloc structure during parsing and type checking.
 */
#define YY_INPUT(buf,result,max_size) \
    yytchar = getic(yyin); \
    if (yytchar == '\n') { \
	yycharno = 0; \
	yyprevlinestartpos = yylinestartpos; \
	yylinestartpos = yycharpos + 1; \
    } \
    if (yytchar == 0) { \
	result = YY_NULL; \
    } \
    else { \
	yycharno++; \
	yycharpos++; \
	yycharposnnl++; \
	bm_position++; \
	buf[0] = yytchar; \
	result = 1; \
    }

/*
 * Here is the old def of the lex input() macro on which the above mods to
 * YY_INPUT are based (there's a more comprehensible version of this in
 * ./dissection-of-input-macro, q.v.):
 *
 (((yytchar=yysptr>yysbuf?U(*--yysptr):getic(yyin))==10 \
      ?(yylineno++,yycharno=1, \
	yyprevlinestartpos=yylinestartpos, \
	  yylinestartpos=yycharpos+2,yytchar):yytchar)==EOF \
           ?0:(yycharno++,bm_position++,yycharpos++,yycharposnnl++,yytchar))
 *
 */

/*
 * According to the not-entirely-accruate flex man page, in the section titled
 * "INCOMPATIBILITIES WITH LEX AND POSIX":
 *
 *    The unput() routine is not redefinable.  This restriction is in
 *    accordance with POSIX.
 *
 * What apparently they mean is that the "yyunput" routine is not redefinable,
 * since unput is still a macro, in particular a one-liner that calls yyunput.
 * so, luckily, we can in fact redefine unput.
 */
#undef unput
#define unput(c) \
    if (yytchar=='\n') { \
	yycharno--; \
	yycharpos--; \
	yycharposnnl--; \
	bm_position--; \
    } \
    yyunput( c, yytext_ptr );

/*
 * Here is the old lex def of unput(c):
 *
  {yytchar= (c);if(yytchar=='\n') \
      yylineno--;yycharno--;bm_position--;yycharpos--; \
          yycharposnnl--;*yysptr++=yytchar;}
 *
 */

%}


letter		[A-Za-z]
letter_or_underscore	[_A-Za-z]
digit		[0-9]
alphanumeric	{letter}|{digit}
other_id_char	[_]
prime		[\']
quesmrk		[\?]
identifier	{quesmrk}?({letter_or_underscore}({alphanumeric}|{other_id_char})*{prime}*)
non_blank_or_newline	[^.^\n]
integer		{digit}+
number		{integer}\.{integer}
string		(\"[^\n\"]*\")
symlit		(\'[^\n\']*\')
whitespace	[ \n\t\r]

 /**
  * Rules
  */
%%
actions		{SetKeyword(); return(YACTIONS);}
all		{SetKeyword(); return(YALL);}
and		{SetKeyword(); return(YAND);}
attribute	{SetKeyword(); return(YATTRIBUTE);}
ax		{SetKeyword(); return(YAX);}
axiom		{SetKeyword(); return(YAXIOM);}
begin		{SetKeyword(); return(YBEGIN);}
components	{SetKeyword(); return(YCOMPONENTS);}
dataflow	{SetKeyword(); return(YDATAFLOW);}
define		{SetKeyword(); return(YDEFINE);}
div		{SetKeyword(); return(YDIV);}
else		{SetKeyword(); return(YELSE);}
end		{SetKeyword(); return(YEND);}
eq		{SetKeyword(); return(YEQ);}
equations	{SetKeyword(); return(YEQUATIONS);}
except		{SetKeyword(); return(YEXCEPT);}
exists		{SetKeyword(); return(YEXISTS);}
export		{SetKeyword(); return(YEXPORT);}
extends 	{SetKeyword(); return(YEXTENDS);}
forall		{SetKeyword(); return(YFORALL);}
foreach		{SetKeyword(); return(YFOREACH);}
from		{SetKeyword(); return(YFROM);}
func		{SetKeyword(); return(YFUNC);}
function	{SetKeyword(); return(YFUNCTION);}
if		{SetKeyword(); return(YIF);}
iff		{SetKeyword(); return(YIFF);}
implies		{SetKeyword(); return(YIMPLIES);}
import		{SetKeyword(); return(YIMPORT);}
in		{SetKeyword(); return(YIN);}
inputs		{SetKeyword(); return(YINPUTS);}
is		{SetKeyword(); return(YIS);}
lambda		{SetKeyword(); return(YLAMBDA);}
let		{SetKeyword(); return(YLET);}
mod		{SetKeyword(); return(YMOD);}
module		{SetKeyword(); return(YMODULE);}
not		{SetKeyword(); return(YNOT);}
obj		{SetKeyword(); return(YOBJ);}
object		{SetKeyword(); return(YOBJECT);}
op		{SetKeyword(); return(YOP);}
operation	{SetKeyword(); return(YOPERATION);}
operations	{SetKeyword(); return(YOPERATIONS);}
ops		{SetKeyword(); return(YOPS);}
or		{SetKeyword(); return(YOR);}
out		{SetKeyword(); return(YOUT);}
outputs		{SetKeyword(); return(YOUTPUTS);}
parts		{SetKeyword(); return(YPARTS);}
post		{SetKeyword(); return(YPOST);}
postcondition	{SetKeyword(); return(YPOSTCONDITION);}
pre		{SetKeyword(); return(YPRE);}
precondition	{SetKeyword(); return(YPRECONDITION);}
return		{SetKeyword(); return(YRETURN);}
set		{SetKeyword(); return(YSET);}
then		{SetKeyword(); return(YTHEN);}
theorem		{SetKeyword(); return(YTHEOREM);}
thm		{SetKeyword(); return(YTHM);}
val		{SetKeyword(); return(YVAL);}
value		{SetKeyword(); return(YVALUE);}
var		{SetKeyword(); return(YVAR);}
variable	{SetKeyword(); return(YVARIABLE);}
where		{SetKeyword(); return(YWHERE);}
xor		{SetKeyword(); return(YXOR);}

"!="		{SetKeyword(); return(YNEQ);}
"~="		{SetKeyword(); return(YAEQ);}
"#"		{SetKeyword(); return('#');}
"&"		{SetKeyword(); return(YAND);}
"("		{SetKeyword(); return('(');}
")"		{SetKeyword(); return(')');}
"*"		{SetKeyword(); return('*');}
"+"		{SetKeyword(); return('+');}
","		{SetKeyword(); return(',');}
"-"		{SetKeyword(); return('-');}
"->"		{SetKeyword(); return(YRTARROW);}
"."		{SetKeyword(); return('.');}
".-"		{SetKeyword(); return(YATTRSELECT);}
".:"		{SetKeyword(); return(YATTRSELECT);}
".<"		{SetKeyword(); return(YGETINSTANCE);}
".>"		{SetKeyword(); return(YRETRACT);}
"/"		{SetKeyword(); return('/');}
":"		{SetKeyword(); return(':');}
"::="		{SetKeyword(); return(YRTARROW);}
";"		{SetKeyword(); return(';');}
"<"		{SetKeyword(); return('<');}
"<="		{SetKeyword(); return(YLEQ);}
"<=>"		{SetKeyword(); return(YIFF);}
"<>"		{SetKeyword(); return(YNEQ);}
"="		{SetKeyword(); return('=');}
"=="		{SetKeyword(); return(YEQEQ);}
"=>"		{SetKeyword(); return(YIMPLIES);}
">"		{SetKeyword(); return('>');}
">="		{SetKeyword(); return(YGEQ);}
"["		{SetKeyword(); return('[');}
"]"		{SetKeyword(); return(']');}
"^"		{SetKeyword(); return('^');}
"{"		{SetKeyword(); return('{');}
"|"		{SetKeyword(); return('|');}
"}"		{SetKeyword(); return('}');}
".."		{SetKeyword(); return(YDOTDOT);}
"?"		{SetKeyword(); return('?');}
"?."		{SetKeyword(); return(YISA);}
"?<"		{SetKeyword(); return(YCHKINSTANCE);}
"?->"		{SetKeyword(); return(YQRTARROW);}
"..."		{return SetIdent();
		     /* OTHER THOTS ABANDONED FOR NOW:
		      * pick up a commment and return it as a YText token.
		      * Note that the diff between this case and the case of
		      * a comment elsewhere is that here a YText token is
		      * explicitely returned whereas elsewhere the comment
		      * is ignored for now, and later will be make an attach-
		      * ment.
		      */

		     /* MAYBE AT SOME POINT:
		      * Cruise over all chars upto the next ':'.  Then back
		      * up over the preceding token and see if its a defined
		      * attribute name.  If so, terminate the scan, else
		      * continue in this way.
		      */
		}
{identifier}	{return SetIdent();}
{symlit}	{yylval.YYSsymlit.val = stralloc(&(yytext[1]));
		 return(Ysymlit);}
{integer}	{yylval.YYSinteger.val = atoi(yytext);
		 return(Yinteger);}
{number}	{yylval.YYSreal.val = atof(yytext);
		 return(Yreal);}
{string}	{ProcessString();
		 return(Ystring);}
{whitespace}    {if (inParts and GrammarFlagOn()) return YAND;}
"(*"		{if (MoveOverMod2Comment() == YEOF) return YEOF;
		 if (inAttribute) {
			yylval.YYSstring.val = stralloc(CommentBuf);
			inAttribute = false; /* only one comment will be
						kept as the value */
			return Ytext;
		 }
		}
--.*		;
.		{fprintf(stderr, "line %d: illegal character %c\n",
			yylineno, yytext[0]);}
%%

/**
 * Functions
 */
int MoveOverMod2Comment()
{
    char c = YEOF;
    int i,l;
    bool CommentTruncated;

    l = yylineno;

    /* Not exactly sure when c to -1, but it has and caused infinite loop when
     * so.  So there's a guard now. */
    for (i=0, CommentTruncated=false; c != -1; ) {
	for (; ((c = input()) != '*') and (c != YEOF) and (c != -1); i++) {
	    if (i < COMMENTSIZE)
		CommentBuf[i] = c;
	    else if (!CommentTruncated) {
		printf("Comment truncated to %d chars\n", COMMENTSIZE);
		CommentTruncated = true;
	    }
	}
	if ((c=input()) == rparen)
	    break;
	if (c == lparen) {
	    error("Nested comment starting on line %d\n", l);
	    break;
	}
	else if ((c == YEOF) or (c == -1)) {
	    error("Unterminated comment starting on line %d\n", l);
	    return YEOF;
	}
	else
	    unput(c);
    }
    CommentBuf[i] = '\0';
    return Ytext;          /* Just something other than YEOF */
}

char* GetLastComment() {
    return stralloc(CommentBuf);
}

void ClearComment() {
    CommentBuf[0] = '\0';
}

int MoveOverCurlyComment()
{
    char c;
    int i,l;
    bool CommentTruncated;

    l = yylineno;

    for (i=0, CommentTruncated=false; (c = input()) != '}'; i++) {
	if (i < COMMENTSIZE)
	    CommentBuf[i] = c;
	else if (!CommentTruncated) {
	    printf("Comment truncated to %d chars\n", COMMENTSIZE);
	    CommentTruncated = true;
	}
	if (c == YEOF) {
	    error("Unterminated comment starting on line %d\n", l);
	    return YEOF;
	}
    }
}


SetKeyword(symbol)
    int symbol;
{
    yylval.keyword.text = hashsave(yytext);

    yylval.keyword.loc.line = yylineno;

    /*
     * Next crap is to get col number right when token ends the line.
     */
    if (yytchar == 10)
	yylval.keyword.loc.col = (yycharpos - yyprevlinestartpos + 1) -
	    (strlen(yytext) - 1);
    else
	yylval.keyword.loc.col = yycharno - (strlen(yytext)+1);

    yylval.keyword.loc.abschar = yycharpos-(strlen(yytext)+1);
    yylval.keyword.loc.abscharnnl = yycharposnnl-(strlen(yytext)+1);
    yylval.keyword.loc.file = yyfilename;
	/* NOTE: yyfilename already stralloc'd in translator.c:GetXXXInFile */
}

InitLex()
{
    yycharno = 0;
    yycharpos = 0;
    yycharposnnl = 0;
    yylinestartpos = 0;
    yyprevlinestartpos = 0;
 /*   InFileStack = NewList();
  */
    if (not LineBuf)
	LineBuf = (char *) malloc(MAXLINELEN+1);
    LineInd = 0;
    LineBuf[0] = '\0';
}

bool SetLexInFile(filename)
    char *filename;
{
    yycharpos = 0;
    yycharposnnl = 0;
    yycharno = 0;
    yylineno = 1;
    if (streq(filename, "stdin")) {
	yyin = stdin;
	yyfilename = null;
    }
    else {
	yyin = fopen(filename, "r");
	if (not yyin) {
	    lexerror("No such file %s.\n", filename);
	    return false;
	}
	yyfilename = stralloc(filename);
    }
    return true;
}


void PushLexInFile()
{
    SrcFileLoc *floc = (SrcFileLoc *) malloc(sizeof(SrcFileLoc));

    floc->loc.line = yylineno;
    floc->loc.col = yycharno;
    floc->loc.file = yyfilename;
    floc->File = yyin;
    PushList(InFileStack, (char *) floc);
}

/*
 * Reset infile after we're done with an #include.
 */
SrcFileLoc *PopLexInFile()
{
    SrcFileLoc *floc = (SrcFileLoc *) PopList(InFileStack);

    fclose(yyin);
    if (floc) {
	yylineno = floc->loc.line + 1;
	yycharno = 0;
	yycharpos = floc->loc.abschar + 1;
	yycharposnnl = floc->loc.abscharnnl + 1;
	yyfilename = floc->loc.file;
	yyin = floc->File;
	return floc;
    }	
    return null;
}    

int NumLexInFiles()
{
    return ListLen(InFileStack);
}

FILE *CurInFile()
{
    return yyin;
}


/*
 * Get the next token manually.
 */
char *NextToken();

/*
 * Handle any error messages from the lexer.
 */
void lexerror(msg)
    char *msg;
{
     SrcLoc loc;

     loc.line = yylineno;
     loc.col = yycharno;
     loc.file = yyfilename;
     lserror(loc, msg);
}

/*
 * Return a SrcLoc with the current token's info.
 */
SrcLoc CurSrcLoc() {
    SrcLoc loc;

    loc.line = yylineno;
    loc.col = yycharno;
    loc.file = yyfilename ? stralloc(yyfilename) : stralloc("stdin");

    return loc;
}

/*
 * Role the line index back one char.
 */
ungetic()
{
    LineInd--;
}

/*
 * Give us a peek at the previous char in the input buf.
 */
char getprevc()
{
    return LineInd ? LineBuf[LineInd-1] : '\0';
}

/*
 * Check if we're at the end of a GetLine buffer.  This is used in order to
 * allow multiple stmts/decls on a single interactive line.
 */
EOL()
{
    return ((LineBuf[LineInd] == '\n') or (LineBuf[LineInd] == '\0'));
}


/*
 * Force an end-of-line to be detected by the next call to EOL.  Since ForceEOL
 * is called after a top-level error is detected, we also want to clear the
 * lexer's input buffer here.  This is necessary in order to avoid excess
 * syntax errors, and other odd behavior.
 */
ForceEOL()
{
    LineBuf[LineInd] = '\n';
    YY_FLUSH_BUFFER;
}


/*
 * To fix the off-by-1-line multi-file input problem, this gets called from the
 * top-level translator at a place you can go see.
 */
ForceEOF()
{
    LineInd = 0;
    LineBuf[LineInd] = '\0';
}

/*
 * Function to replace the lexer's getc.  getic reads from our own buffer
 * rather than straight from stdin.
 */
getic(yyin)
    FILE *yyin;
{
    char *line;
    if (!LineBuf[LineInd]) {
      if (interactive)
	prompt();
      line = fgets(LineBuf, MAXLINELEN, yyin);
      if (line == null)
        return null;
      LineInd = 0;
      }
    return LineBuf[LineInd++];
}

/*
 * Output an appropriate interative prompt.  I.e., if at top-level, then ">",
 * else if still parsing then ">>".
 */
prompt(yyin)
    FILE *yyin;
{
    if (!Parsing)
	printf("> ");
    else
	printf(">>");
}

/*
 * Routine to get the first line so that a single '>' prompt is printed out
 * initially.
 */
GetLine(yyin)
    FILE *yyin;
{
    char *line;

    if ((LineInd == 0) or (LineBuf[LineInd] == 0) or 
		(LineBuf[LineInd] == '\n')) {
	printf("> ");
	line = fgets(LineBuf, MAXLINELEN, yyin);
	LineInd = 0;
	return !(line == null);
    }
    else
	return true;
}

SetIdent() {
    yylval.YYSident.text = hashsave(yytext);
	yylval.YYSident.loc.line = yylineno;

    /*
     * Next crap is to get col number right when token ends the line.
     */
    if (yytchar == 10)
	yylval.YYSident.loc.col = (yycharpos - yyprevlinestartpos + 1) -
	    (strlen(yytext) - 1);
    else
	yylval.YYSident.loc.col = yycharno - (strlen(yytext)+1);

    yylval.YYSident.loc.abschar = yycharpos-(strlen(yytext)+1);
    yylval.YYSident.loc.abscharnnl =
	yycharposnnl-(strlen(yytext)+1);
    yylval.YYSident.loc.file = yyfilename;
    /* NOTE: yyfilename already stralloc'd in translator.c:GetXXXInFile */

    /* 20jun92: Browser hooks. */
    bm_startpos = bm_position - strlen(yytext) + 1;
    bm_endpos = bm_position + 1;

    /*
     * WARNING!
     *
     * Calling IsAttrName with yytext as the arg rather than
     * yylval.YYSident.text causes an excruciatingly subtle error.
     *
     */
    if (IsAttrName(yylval.YYSident.text)) {
	if (streq(yylval.YYSident.text, "description")) {
	    ClearComment();
	}
	return(Yattr);
    }
    else
	return(Yident);
}

/*
 * Process a string lexeme.  This function is called for single-quoted strings
 * length 1 and all double-quoted strings.
 */
ProcessString() {
    char *s1,*s2,*s3;
    s3 = (char *)malloc(strlen(yytext)+1);
    for (s1=&yytext[0],s2=s3; *s1; ) {
	if (*s1 != '\\') {
	    *s2++ = *s1++;
	}
	else if (isodigit(s1[1])) {
	    char obuf[4];
	    int l = 1;
	    if (isodigit(s1[2]))
		l++;
	    if (isodigit(s1[3]))
		l++;
	    strncpy(obuf, s1+1, l); 
	    obuf[l] = '\0';
	    s1 += l+1;
	    *s2++ = (char) strtol(obuf, NULL, 8);
	}
	else if ((s1[1] == 'x') and (isxdigit(s1[2]))) {
	    char* hbuf;
	    int l = 1;
	    s1 += 2;
	    while (isxdigit(s1[l]))
		l++;
	    hbuf = (char*) malloc(l+1);
	    strncpy(hbuf, s1, l);
	    hbuf[l] = '\0';
	    s1 += l;
	    *s2++ = (char) strtol(hbuf, NULL, 16);
	    free(hbuf);
	}
	else {
	    s1++;
	    switch (*s1) {
		case 'a': *s2++ = '\a'; break;
		case 'b': *s2++ = '\b'; break;
		case 'f': *s2++ = '\f'; break;
		case 'n': *s2++ = '\n'; break;
		case 'r': *s2++ = '\r'; break;
		case 't': *s2++ = '\t'; break;
		case 'v': *s2++ = '\v'; break;
		case '\\': *s2++ = '\\'; break;
		case '\?': *s2++ = '\?'; break;
		case '\'': *s2++ = '\''; break;
		case '\"': *s2++ = '\"'; break;
		case '0': *s2++ = '\0'; break;
		default: *s2++ = *s1;
	    }
	    s1++;
	}
    }
    *(s2) = '\0';
    yylval.YYSstring.val = stralloc(s3);
}