# C/C++ BNF language specification # # Copyright (C) 1999, 2000, 2001 Eric M. Ludlam # # Author: Eric M. Ludlam # X-RCS: $Id: c.bnf,v 1.55 2001/12/18 02:39:12 zappo Exp $ # # c.bnf is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2, or (at your option) # any later version. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with GNU Emacs; see the file COPYING. If not, write to the # Free Software Foundation, Inc., 59 Temple Place - Suite 330, # Boston, MA 02111-1307, USA. # %start declaration %scopestart codeblock %outputfile semantic-c.el %parsetable semantic-toplevel-c-bovine-table %keywordtable semantic-c-keyword-table %languagemode (c-mode c++-mode) %setupfunction semantic-default-c-setup %(setq semantic-expand-nonterminal 'semantic-expand-c-nonterminal semantic-flex-extensions semantic-flex-c-extensions semantic-dependency-include-path semantic-default-c-path imenu-create-index-function 'semantic-create-imenu-index semantic-type-relation-separator-character '("." "->") semantic-command-separation-character ";" document-comment-start "/*" document-comment-line-prefix " *" document-comment-end " */" ;; Semantic navigation inside 'type children senator-step-at-token-ids '(function variable) )% %token INCLUDE "include" %token DEFINE "define" %token HASH punctuation "#" %token PERIOD punctuation "." %token COLON punctuation ":" %token SEMICOLON punctuation ";" %token STAR punctuation "*" %token AMPERSAND punctuation "&" %token DIVIDE punctuation "/" %token PLUS punctuation "+" %token MINUS punctuation "-" %token BANG punctuation "!" %token EQUAL punctuation "=" %token LESS punctuation "<" %token GREATER punctuation ">" %token COMA punctuation "," %token TILDE punctuation "~" %token EXTERN "extern" %put EXTERN summary "Declaration Modifier: extern ..." %token STATIC "static" %put STATIC summary "Declaration Modifier: static ..." %token CONST "const" %put CONST summary "Declaration Modifier: const ..." %token VOLATILE "volatile" %put VOLATILE summary "Declaration Modifier: volatile ..." %token REGISTER "register" %put REGISTER summary "Declaration Modifier: register ..." %token SIGNED "signed" %put SIGNED summary "Numeric Type Modifier: signed ..." %token UNSIGNED "unsigned" %put UNSIGNED summary "Numeric Type Modifier: unsigned ..." %token INLINE "inline" %put INLINE summary "Function Modifier: inline (...) {...};" %token VIRTUAL "virtual" %put VIRTUAL summary "Method Modifier: virtual (...) ..." %token STRUCT "struct" %put STRUCT summary "Structure Type Declaration: struct [name] { ... };" %token UNION "union" %put UNION summary "Union Type Declaration: union [name] { ... };" %token ENUM "enum" %put ENUM summary "Enumeration Type Declaration: enum [name] { ... };" %token TYPEDEF "typedef" %put TYPEDEF summary "Arbitrary Type Declaration: typedef ;" %token CLASS "class" %put CLASS summary "Class Declaration: class [:parents] { ... };" %token NAMESPACE "namespace" %put NAMESPACE summary "Namespace Declaration: namespace { ... };" %token TEMPLATE "template" %put TEMPLATE summary "template [other definition]" # Despite this, this parser can find templates by ignoring the TEMPLATE # keyword, and finding the class/method being templateized. %token TEMPLATE "template" %put TEMPLATE summary "template TYPE_OR_FUNCTION" %token THROW "throw" %put THROW summary " () throw () ..." %token REENTRANT "reentrant" %put REENTRANT summary " () reentrant ..." # Leave these alone for now. %token OPERATOR "operator" %token PUBLIC "public" %token PRIVATE "private" %token PROTECTED "protected" # These aren't used for parsing, but is a useful place to describe the keywords. %token IF "if" %token ELSE "else" %put {IF ELSE} summary "if () { code } [ else { code } ]" %token DO "do" %token WHILE "while" %put DO summary " do { code } while ();" %put WHILE summary "do { code } while (); or while () { code };" %token FOR "for" %put FOR summary "for(; ; ) { code }" %token SWITCH "switch" %token CASE "case" %token DEFAULT "default" %put {SWITCH CASE DEFAULT} summary "switch () { case : code; ... default: code; }" %token RETURN "return" %put RETURN summary "return ;" %token BREAK "break" %put BREAK summary "Non-local exit within a loop or switch (for, do/while, switch): break;" %token CONTINUE "continue" %put CONTINUE summary "Non-local continue within a lool (for, do/while): continue;" %token SIZEOF "sizeof" %put SIZEOF summary "Compile time macro: sizeof() // size in bytes" # Types %token VOID "void" %put VOID summary "Built in typeless type: void" %token CHAR "char" %put CHAR summary "Integral Character Type: (0 to 256)" %token SHORT "short" %put SHORT summary "Integral Primitive Type: (-32768 to 32767)" %token INT "int" %put INT summary "Integral Primitive Type: (-2147483648 to 2147483647)" %token LONG "long" %put LONG summary "Integral primitive type (-9223372036854775808 to 9223372036854775807)" %token FLOAT "float" %put FLOAT summary "Primitive floating-point type (single-precision 32-bit IEEE 754)" %token DOUBLE "double" %put DOUBLE summary "Primitive floating-point type (double-precision 64-bit IEEE 754)" %token UNDERP "_P" %token UNDERUNDERP "__P" %put UNDERP summary "Common macro to eliminate prototype compatibility on some compilers" %put UNDERUNDERP summary "Common macro to eliminate prototype compatibility on some compilers" declaration : macro | type | var-or-fun | define | extern-c | template ; codeblock : define | codeblock-var-or-fun | type # type is less likely to be used here. ; extern-c-contents: open-paren ( nil ) | bovine-toplevel | close-paren ( nil ) ; extern-c: EXTERN string "\"C\"" semantic-list # Extern C commands which contain a list need to have the # entries of the list extracted, and spliced into the main # list of entries. This must be done via the function # that expands singular nonterminals, such as int x,y; ( extern (EXPANDFULL $3 extern-c-contents) ) | EXTERN string "\"C\"" # A plain extern "C" call should add something to the token, # but just strip it from the buffer here for now. ( nil ) ; macro : HASH macro-or-include ( ,$2 ) ; macro-or-include : DEFINE symbol opt-define-arglist opt-expression ( $2 variable nil $3 (ASSOC const t) nil ) | INCLUDE LESS filename GREATER ( ,$3 include t nil ) | INCLUDE string ( (read $2) include nil nil ) ; opt-define-arglist : semantic-list ( nil ) | EMPTY ; # This is used in struct parts. define : HASH DEFINE symbol opt-expression ( $2 variable nil $3 (ASSOC const t) nil ) ; # Some token symbols are also header files like FLOAT. Add them here. filename-prefix : symbol | FLOAT ( "float" ) ; filename : filename-prefix PERIOD symbol ( (concat (car $1) $2 $3) ) | filename-prefix DIVIDE filename ( (concat (car $1) $2 (car $3)) ) ; # In C++, structures can have the same things as classes. # So delete this somday in the figure. # #structparts : semantic-list # (EXPANDFULL $1 structsubparts) # ; # #structsubparts : open-paren "{" # ( nil ) # | close-paren "}" # ( nil ) # | var-or-fun # | define # # sometimes there are defines in structs. # ; unionparts : semantic-list (EXPANDFULL $1 classsubparts) ; classsubparts : open-paren "{" ( nil ) | close-paren "}" ( nil ) | opt-class-protection COLON ( ,$1 label ) | var-or-fun | type | define ( ,$1 protection ) # In C++, this label in a classsubpart represents # PUBLIC or PRIVATE bits. Ignore them for now. | EMPTY ; opt-class-parents : COLON class-parents ( $2 ) | EMPTY ( ) ; class-parents : opt-class-protection opt-class-declmods symbol COMA class-parents ( ,(cons $3 $5 ) ) | opt-class-protection opt-class-declmods symbol ( $3 ) ; opt-class-declmods : class-declmods opt-class-declmods ( nil ) | EMPTY ; class-declmods : VIRTUAL ; opt-class-protection : PUBLIC | PRIVATE | PROTECTED | EMPTY ; namespaceparts : semantic-list (EXPANDFULL $1 namespacesubparts) ; namespacesubparts : open-paren "{" ( nil ) | close-paren "}" ( nil ) | type | var-or-fun | define | opt-class-protection COLON ( $1 protection ) # In C++, this label in a classsubpart represents # PUBLIC or PRIVATE bits. Ignore them for now. | EMPTY ; enumparts : semantic-list (EXPANDFULL $1 enumsubparts) ; enumsubparts : symbol opt-assign ( $1 variable "int" ,$2 (ASSOC const t) nil) | open-paren "{" ( nil ) | close-paren "}" ( nil ) | COMA ( nil ) ; opt-name : symbol | EMPTY ( "" ) ; typesimple : struct-or-class opt-name opt-class-parents semantic-list ( ,$2 type ,$1 (let ((semantic-c-classname (cons (car ,$2) (car ,$1)))) (EXPANDFULL $4 classsubparts)) ,$3 nil nil ) | UNION opt-name unionparts ( ,$2 type $1 $3 nil nil nil ) | ENUM opt-name enumparts ( ,$2 type $1 $3 nil nil nil ) | TYPEDEF typeformbase opt-stars symbol ( $4 type $1 nil $2 nil nil ) ; struct-or-class: STRUCT | CLASS ; type : typesimple SEMICOLON ( ,$1 ) | NAMESPACE symbol namespaceparts ( $2 type $1 $3 nil nil nil ) ; template : TEMPLATE template-specifier template-definition # We should add a 'template extra specifier for the definition found. ( ,$3 ) ; template-specifier : LESS template-specifier-type-list GREATER ( ,$2 ) ; template-specifier-type-list : template-var COMA template-specifier-type-list ( ,(cons (,car ,$1) $3 ) ) | template-var ( $1 ) ; template-var : template-type opt-template-equal ( ,$1 ) ; opt-template-equal : EQUAL symbol LESS template-specifier-type-list GREATER | EMPTY ; template-type : CLASS symbol | STRUCT symbol | builtintype | symbol ; template-definition : type ( ,$1 ) | var-or-fun ( ,$1 ) ; opt-stars : STAR opt-starmod opt-stars ( (1+ (car $3)) ) | EMPTY ( 0 ) ; opt-starmod : STARMOD opt-starmod ( ,(cons (,car ,$1) $2) ) | EMPTY () ; STARMOD : CONST ; declmods : DECLMOD declmods ( ,(cons ,(car ,$1) $2 ) ) | DECLMOD ( ,$1 ) | EMPTY () ; DECLMOD : EXTERN | STATIC | CONST | VOLATILE | SIGNED | UNSIGNED | INLINE | REGISTER | METADECLMOD ; metadeclmod : METADECLMOD () | EMPTY () ; METADECLMOD : VIRTUAL ; # C++: A type can be modified into a reference by "&" opt-ref : AMPERSAND ( 1 ) | EMPTY ( 0 ) ; typeformbase : typesimple ( ,$1 ) | STRUCT symbol ( $2 type $1 ) | UNION symbol ( $2 type $1 ) | ENUM symbol ( $2 type $1 ) | builtintype ( ,$1 ) | symbol template-specifier ( $1 type "class" ) | symbol COLON COLON typeformclassbase ( (concat $1 "::" (car $4) ) ) | symbol ( $1 ) ; typeformclassbase : symbol COLON COLON typeformclassbase ( (concat $1 "::" (car $4)) ) | symbol ( $1 ) ; builtintype : VOID | CHAR | SHORT | INT | LONG | FLOAT | DOUBLE ; codeblock-var-or-fun : declmods typeformbase metadeclmod opt-ref var-or-func-decl ( ,(semantic-c-reconstitute-token ,$5 $1 $2 ) ) ; var-or-fun : codeblock-var-or-fun ( ,$1 ) # it is possible for a function to not have a type, and # it is then assumed to be an int. How annoying. # In C++, this could be a constructor or a destructor. # Even more annoying. Only ever do this for regular # top-level items. Ignore this problem in code blocks # so that we don't have to deal with regular code # being erroneously converted into types. | declmods var-or-func-decl ( ,(semantic-c-reconstitute-token ,$2 $1 nil ) ) ; var-or-func-decl : func-decl ( ,$1 ) | var-decl ( ,$1 ) ; func-decl : opt-stars opt-class opt-destructor functionname opt-under-p arg-list opt-post-fcn-modifiers opt-throw opt-initializers fun-or-proto-end ( ,$4 'function ;; Extra stuff goes in here. ;; Continue with the stuff we found in ;; this definition $2 $3 $6 $8 $7 ,$1 ,$10) ; var-decl : varnamelist SEMICOLON ( $1 'variable ) ; opt-under-p : UNDERP (nil) | UNDERUNDERP (nil) | EMPTY ; opt-initializers: COLON symbol semantic-list opt-initializers | COMA symbol semantic-list opt-initializers | EMPTY ; opt-post-fcn-modifiers : post-fcn-modifiers opt-post-fcn-modifiers ( ,(cons ,$1 $2) ) | EMPTY ( nil ) ; post-fcn-modifiers : REENTRANT | CONST ; opt-throw : THROW semantic-list ( EXPAND $2 throw-exception-list ) | EMPTY ; # Is this true? I don't actually know. throw-exception-list : symbol COMA throw-exception-list ( ,(cons $1 $3) ) | symbol close-paren ")" ( $1 ) | open-paren "(" throw-exception-list ( ,$2 ) ; opt-bits : COLON symbol ( $2 ) | EMPTY ( nil ) ; opt-array : semantic-list "\\[.*\\]$" opt-array # Eventually we want to replace the 1 below with a size # (if available) ( (cons 1 (car ,$2) ) ) | EMPTY ( nil ) ; opt-assign : EQUAL expression ( $2 ) | EMPTY ( nil ) ; opt-restrict : symbol "\\<\\(__\\)?restrict\\>" | EMPTY ; varname : opt-stars opt-restrict symbol opt-bits opt-array opt-assign ( $3 ,$1 ,$4 ,$5 ,$6 ) ; # I should store more in this def, but leave it simple for now. variablearg : declmods typeformbase opt-ref varname ( (list $4) variable $2 nil (ASSOC const (if (member "const" $1) t nil) typemodifiers (delete "const" $1) reference (car ,$3) ) nil ) ; varnamelist : varname COMA varnamelist ( ,(cons $1 $3) ) | varname ( $1 ) ; opt-class : symbol COLON COLON ( $1 ) | EMPTY ( nil ) ; opt-destructor : TILDE ( t ) | EMPTY ( nil ) ; arg-list : semantic-list "^(" knr-arguments ( ,$2 ) | semantic-list "^(" (EXPANDFULL $1 arg-sub-list) | semantic-list "^(void)$" ( ) ; knr-arguments : variablearg SEMICOLON knr-arguments ( ,(cons $1 $3) ) | variablearg SEMICOLON ( $1 ) ; arg-sub-list : variablearg ( ,$1 ) | PERIOD PERIOD PERIOD close-paren ")" ( "..." ) | COMA ( nil ) | open-paren "(" ( nil ) | close-paren ")" ( nil ) ; operatorsym : LESS LESS ( "<<" ) | GREATER GREATER ( ">>" ) | EQUAL EQUAL ( "==" ) | LESS EQUAL ( "<=" ) | GREATER EQUAL ( ">=" ) | BANG EQUAL ( "!=" ) | LESS | GREATER | STAR | PLUS | MINUS | DIVIDE | EQUAL ; functionname : OPERATOR operatorsym ( ,$2 ) | symbol ( $1 ) ; fun-or-proto-end: SEMICOLON ( t ) | semantic-list ( nil ) # Here is an anoying feature of C++ pure virtual methods | EQUAL number "^0$" SEMICOLON ( 'pure-virtual ) ; opt-expression : expression | EMPTY ( nil ) ; type-cast : semantic-list ( EXPAND $1 type-cast-list ) ; type-cast-list : open-paren typeformbase close-paren ; # Use expression for parsing only. Don't actually return anything # for now. Hopefully we can fix this later. expression : number ( (identity start) (identity end) ) | symbol ( (identity start) (identity end) ) | string ( (identity start) (identity end)) | type-cast expression # A cast to some other type ( (identity start) (identity end) ) | semantic-list ( (identity start) (identity end) ) | punctuation "[-+*/%^|&]" expression ( (identity start) (identity end) ) ;