@q Copyright 2012-2024, Alexander Shibakov@>
@q This file is part of SPLinT@>
@q SPLinT is free software: you can redistribute it and/or modify@>
@q it under the terms of the GNU General Public License as published by@>
@q the Free Software Foundation, either version 3 of the License, or@>
@q (at your option) any later version.@>
@q SPLinT is distributed in the hope that it will be useful,@>
@q but WITHOUT ANY WARRANTY; without even the implied warranty of@>
@q MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the@>
@q GNU General Public License for more details.@>
@q You should have received a copy of the GNU General Public License@>
@q along with SPLinT. If not, see .@>
@** The name parser. What follows is an example parser for the term
name processing. This approach (i.e. using a `full blown' parser/scanner
combination) is probably not the best way to implement such machinery
but its main purpose is to demonstrate a way to create a separate
parser for local purposes. The name parser is what allows one to
automatically typeset term names such as \.{example1}
and \.{\%option\_name} as \prodstyle{example1} and \prodstyle{\%option_name}.
@q The reference to \prodstyle{example1} above serves a dual role of correcting@>
@q an owerfull \vbox in the index.@>
% We include the macros here since this file is intended to be
% included by the documentation `aggregator' so putting bare \TeX\
% at the beginning of the file runs the risk of producing and error
% of having \TeX\ material inside a \Cee\ section.
\let\currentparsernamespace\parsernamespace
\let\parsernamespace\smallnamespace
\let\hostparsernamespace\smallnamespace
\input stokenset.sty
\let\parsernamespace\currentparsernamespace
@(small_parser.yy@>=
@G Switch to generic mode.
%{@> @ @=%}
@> @ @=
%union {@> @ @=}
%{@> @ @=%}
@> @ @=
%%
@> @ @=
%%
@g
@ @=
@G
%token-table
%debug
%start full_name
@g
@ @=
@G
%token PERCENT_IDENTIFIER
%token IDENTIFIER
%token OPTIONAL NO_ATTR EXTENDED LT RT
%token INTEGER
%token WILDCARD C_ESCCHAR
%token META_IDENTIFIER
@g
@ @=
@G
full_name:
identifier_string suffixes.opt {@> @ @=}
| META_IDENTIFIER {@> @ @=}
| quoted_name suffixes.opt {@> @ @=}
;
identifier_string:
PERCENT_IDENTIFIER {@> @ @=}
| IDENTIFIER {@> @ @=}
| '<' IDENTIFIER '>' {@> @ @=}
| '\'' WILDCARD '\'' {@> @ @=}
| '\'' C_ESCCHAR '\'' {@> @ @=}
| '\'' '>' '\'' {@> @'} string@> @=}
| '\'' '<' '\'' {@> @ @=}
| '\'' '.' '\'' {@> @ @=}
| '\'' '_' '\'' {@> @ @=}
| '\'' '-' '\'' {@> @ @=}
| '\'' '$' '\'' {@> @ @=}
| '$' {@> @ @=}
| qualifier {@> @ @=}
| identifier_string IDENTIFIER {@> @ @=}
| identifier_string qualifier {@> @ @=}
| identifier_string INTEGER {@> @ @=}
;
quoted_name:
'\"' PERCENT_IDENTIFIER '\"' {@> @ @=}
| '\"' IDENTIFIER '\"' {@> @ @=}
;
suffixes.opt:
{@> TeX_( "/yy0{}" ); @=}
| '.' {@> TeX_( "/yy0{/nx/dotsp/nx/sfxnone}" ); @=}
| '.' suffixes {@> @ @=}
| '.' qualified_suffixes {@> @ @=}
;
suffixes:
IDENTIFIER {@> @ @=}
| INTEGER {@> @ @=}
| suffixes '.' {@> @ @=}
| suffixes IDENTIFIER {@> @ @=}
| suffixes INTEGER {@> @ @=}
| qualifier '.' {@> TeX_( "/yy0{/nx/sfxn/the/yy(1)/nx/dotsp}" ); @=}
| suffixes qualifier '.' {@> TeX_( "/yy0{/the/yy(1)/nx/sfxn/the/yy(2)/nx/dotsp}" ); @=}
;
qualified_suffixes:
suffixes qualifier {@> @ @=}
| qualifier {@> @ @=}
;
@t}\vb{\inline\flatten}{@>
qualifier:
OPTIONAL {@> TeX_( "/yy0{/the/yy(1)}" ); @=}
| NO_ATTR {@> TeX_( "/yy0{/the/yy(1)}" ); @=}
| EXTENDED {@> TeX_( "/yy0{/the/yy(1)}" ); @=}
| LT {@> TeX_( "/yy0{/the/yy(1)}" ); @=}
| RT {@> TeX_( "/yy0{/the/yy(1)}" ); @=}
;
@g
@ @=
@[TeX_( "/yy0{/the/yy(1)/the/yy(2)}/namechars/yyval" );@]@;
@ @=
@[TeX_( "/getfirst{/yy(1)}/to/toksa" );@]@;
@[TeX_( "/getsecond{/yy(1)}/to/toksb" );@]@;
@[TeX_( "/yy0{/nx/idstr{/the/toksa}{/the/toksb}}/namechars/yyval" );@]@;
@ @=
@[TeX_( "/getfirst{/yy(1)}/to/toksa" );@]@;
@[TeX_( "/getsecond{/yy(1)}/to/toksb" );@]@;
@[TeX_( "/yy0{/nx/optstr{/the/toksa}{/the/toksb}}" );@]@;
@ @=
@[TeX_( "/getfirst{/yy(1)}/to/toksa" );@]@;
@[TeX_( "/getsecond{/yy(1)}/to/toksb" );@]@;
@[TeX_( "/yy0{/nx/idstr{/the/toksa}{/the/toksb}}" );@]@;
@ Tags are recognized as a separate syntax element although no special
processing is performed by the name parser or the associated macros.
@=
@[TeX_( "/getfirst{/yy(2)}/to/toksa" );@]@;
@[TeX_( "/getsecond{/yy(2)}/to/toksb" );@]@;
@[TeX_( "/yy0{/nx/idstr{}{}}" );@]@;
@ @=
@[TeX_( "/getfirst{/yy(2)}/to/toksa" );@]@;
@[TeX_( "/getsecond{/yy(2)}/to/toksb" );@]@;
@[TeX_( "/sansfirst/toksb" );@]@;
@[TeX_( "/yy0{/nx/chstr{/the/toksb}{/the/toksb}/nx/visflag{/nx/termvstring}{}}" );@]@;
@ @=
@[TeX_( "/getsecond{/yy(2)}/to/toksb" );@]@;
@[TeX_( "/yy0{/nx/chstr{/the/toksb}{/the/toksb}/nx/visflag{/nx/termvstring}{}}" );@]@;
@ @=
@[TeX_( "/yy0{/nx/chstr{<}{<}/nx/visflag{/nx/termvstring}{}}" );@]@;
@ @'} string@>=
@[TeX_( "/yy0{/nx/chstr{/greaterthan}{/greaterthan}/nx/visflag{/nx/termvstring}{}}" );@]@;
@ @=
@[TeX_( "/yy0{/nx/chstr{/uscoreletter}{/uscoreletter}/nx/visflag{/nx/termvstring}{}}" );@]@;
@ @=
@[TeX_( "/yy0{/nx/chstr{-}{-}/nx/visflag{/nx/termvstring}{}}" );@]@;
@ @=
@[TeX_( "/yy0{/nx/chstr{/safemath}{/safemath}/nx/visflag{/nx/termvstring}{}}" );@]@;
@ @=
@[TeX_( "/yy0{/nx/chstr{.}{.}/nx/visflag{/nx/termvstring}{}}" );@]@;
@ @=
@[TeX_( "/yy0{/nx/bidstr{/nx/$}{/safemath}}" );@]@;
@ @=
@@;
@ @=
@[TeX_( "/getsecond{/yy(1)}/to/toksa" );@]@;
@[TeX_( "/appendr/toksa{/space}" );@]@;
@[TeX_( "/getfirst{/yy(2)}/to/toksb" );@]@;
@[TeX_( "/concat/toksa/toksb" );@]@;
@[TeX_( "/getthird{/yy(1)}/to/toksb" );@]@;
@[TeX_( "/appendr/toksb{/space}" );@]@;
@[TeX_( "/getsecond{/yy(2)}/to/toksc" );@]@;
@[TeX_( "/concat/toksb/toksc" );@]@;
@[TeX_( "/yy0{/nx/idstr{/the/toksa}{/the/toksb}}" );@]@;
@ @=
@
@ An integer at the end of an identifier (such as |id1|) is
interpreted as a suffix (similar to the way \MF\ treats identifiers,
and \mft\ typesets them,\footnote{This allows, for example, names like
|$[term0]| while leaving |$[char2int]| in its `natural' form.} as \prodstyle{id1}) to
mitigate a well-intentioned but surprisingly inconvenient feature of
\CTANGLE, namely outputting something like \.{id.1} as \.{id\ .1} in an
attempt to make sure that integers do not interfere with structure
dereferences. For this to produce meaningful results, a stricter
interpretation of \prodstyle{IDENTIFIER} syntax is required,
represented by the \flexrenstyle{id\_strict} syntax
\locallink{id_strict definition}below\endlink.
@=
@[TeX_( "/yy0{/the/yy(1)/nx/dotsp/nx/sfxi/the/yy(2)}" );@]@;
@ @=
@[TeX_( "/getfirst{/yy(2)}/to/toksa" );@]@;
@[TeX_( "/getsecond{/yy(2)}/to/toksb" );@]@;
@[TeX_( "/yy0{/nx/idstr{/the/toksa}{/the/toksb}/nx/visflag{/nx/termvstring}{}}" );@]@;
@ @=
@[TeX_( "/getfirst{/yy(2)}/to/toksa" );@]@;
@[TeX_( "/getsecond{/yy(2)}/to/toksb" );@]@;
@[TeX_( "/yy0{/nx/optstr{/the/toksa}{/the/toksb}/nx/visflag{/nx/termvstring}{}}" );@]@;
@ @=
@[TeX_( "/yy0{/nx/dotsp/the/yy(2)}" );@]@;
@ @=
@@;
@ @=
@[TeX_( "/yy0{/nx/sfxn/the/yy(1)}" );@]@;
@ @=
@[TeX_( "/yy0{/nx/sfxi/the/yy(1)}" );@]@;
@ @=
@[TeX_( "/yy0{/the/yy(1)/nx/dotsp}" );@]@;
@ @=
@[TeX_( "/yy0{/the/yy(1)/nx/sfxi/the/yy(2)}" );@]@;
@ @=
@[TeX_( "/yy0{/the/yy(1)/nx/sfxn/the/yy(2)}" );@]@;
@ @=
@[TeX_( "/yy0{/the/yy(1)/nx/qual/the/yy(2)}" );@]@;
@ @=
@[TeX_( "/yy0{/nx/qual/the/yy(1)}" );@]@;
@ \Cee\ preamble. In this case, there are no `real' actions that our
grammar performs, only \TeX\ output, so this section is empty.
@=
@ \Cee\ postamble. It is tricky to insert function definitions that use \bison's internal types,
as they have to be inserted in a place that is aware of the internal definitions but before said
definitions are used.
@=
@ Union of types.
@=
@** The name scanner.
The scanner for lexing term names is admittedly {\em ad hoc\/} and
rather redundant. A minor reason for this is to provide some
flexibility for name typesetting. Another reason is to let the
existing code serve as a template for similar procedures in other
projects. At the same time, it must be pointed out that this scanner
is executed multiple times for every \bison\ section, so its
efficiency directly affects the speed at which the parser operates.
@(small_lexer.ll@>=
@G
@> @ @=
%{@> @ @=%}
@> @ @=
%%
@> @ @=
%%
@O
void define_all_states( void ) {
@@;
}
@o
@g
@ \namedspot{id_strict definition}The tokens consumed by the name parser must
represent a relatively fine classification of various identifier substrings to be able to
detect various suffixes.
@=
@@;
@G(fs1)
letter [_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ]
c-escchar \\[fnrtv]
wc ([^\\\'\"$.<>]{-}[_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0-9]|\\.)
id {letter}({letter}|[-0-9])*
id_strict {letter}(({letter}|[-0-9])*{letter})?
meta_id "*"{id_strict}"*"?
int [0-9]+
@g
@ @=
#define _register_name( name ) @[Define_State( #name, name )@]
/* nothing for now */
#undef _register_name
@ Strings and characters in directives/rules.
@=
@G(fs1)
%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
@g
@ @=
#include
#include
@ @=
@G(fs1)
%option bison-bridge
%option noyywrap nounput noinput reentrant
%option noyy_top_state
%option debug
%option stack
%option outfile="small_lexer.c"
@g
@ @=
@@;
@@;
@ White space skipping.
@=
@G(fs2)
[ \f\n\t\v] {@> @[TeX_( "/yylexnext" );@]@=}
@g
@ This collection of regular expressions might seem redundant, and in
its present state, it certainly is. However, if later on the
typesetting style for some of the keywords would need to be adjusted,
such changes would be easy to implement, since the template is already
here.
@=
@G(fs2)
"%binary" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%code" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%debug" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%default-prec" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%define" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%defines" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%destructor" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%dprec" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%empty" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%error-verbose" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%expect" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%expect-rr" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%file-prefix" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%fixed-output-files" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%initial-action" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%glr-parser" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%language" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%left" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%lex-param" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%locations" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%merge" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%name-prefix" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%no-default-prec" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%no-lines" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%nonassoc" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%nondeterministic-parser" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%nterm" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%output" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%param" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%parse-param" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%prec" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%precedence" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%printer" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%pure-parser" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%require" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%right" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%skeleton" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%start" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%term" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%token" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%token-table" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%type" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%union" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%verbose" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%yacc" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%default"[-_]"prec" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%error"[-_]"verbose" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%expect"[-_]"rr" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%fixed"[-_]"output"[-_]"files" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%name"[-_]"prefix" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%no"[-_]"default"[-_]"prec" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%no"[-_]"lines" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%pure"[-_]"parser" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%token"[-_]"table" {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
"%"({letter}|[0-9]|[-_]|"%"|[<>])+ {@> @[TeX_( "/yylexreturnval{PERCENT_IDENTIFIER}" );@]@=}
@t}\vb{\insertraw{\inscomment{\it suffixes}}}{@>
"opt" {@> @[TeX_( "/yylexreturnval{OPTIONAL}" );@]@=}
"na" {@> @[TeX_( "/yylexreturnval{NO_ATTR}" );@]@=}
"ext" {@> @[TeX_( "/yylexreturnval{EXTENDED}" );@]@=}
"l" {@> @[TeX_( "/yylexreturnval{LT}" );@]@=}
"r" {@> @[TeX_( "/yylexreturnval{RT}" );@]@=}
@t}\vb{\insertraw{\inscomment{\it delimeters}}}{@>
[<>$._\'\"] {@> @[TeX_( "/yylexreturnchar" );@]@=}
{c-escchar} {@> @[TeX_( "/yylexreturnval{C_ESCCHAR}" );@]@=}
{wc} {@> @[TeX_( "/yylexreturnval{WILDCARD}" );@]@=}
@t}\vb{\insertraw{\inscomment{\it identifiers and other names}}}{@>
{id_strict} {@> @[@@]@=}
{meta_id} {@> @[@@]@=}
{int} {@> @[TeX_( "/yylexreturnval{INTEGER}" );@]@=}
@t}\vb{\insertraw{\inscomment{\it everything else}}}{@>
. {@> @[@@]@=}
@g
@ @=
@[TeX_( "/yylexreturnval{IDENTIFIER}" );@]@;
@ @=
@[TeX_( "/yylexreturnval{META_IDENTIFIER}" );@]@;
@ @=
@[TeX_( "/iftracebadchars" );@]@;
@[TeX_( " /yyfatal{invalid character(s): /the/yytext}" );@]@;
@[TeX_( "/fi" );@]@;