* <<HELP with recursive descent parser>>
@ 1994-11-12 17:27 HACKER
1994-11-13 0:43 ` Joe Halpin
1994-11-13 2:02 ` S M Ryan
0 siblings, 2 replies; 3+ messages in thread
From: HACKER @ 1994-11-12 17:27 UTC (permalink / raw)
Hi,
The Recursive Descent procedure I have, takes care of all cases
except one, the number + (expression) case. Ex 2 + (3 * 4).
The grammar I am following is...
E ==> E + T
E ==> T
T ==> T * F
T ==> F
F ==> (E)
F ==> NUMBER
i.e Only taking care of addition and multiplication
Here are the pseudo-codes for my procedures...
Procedure F
Begin
Symbol := lex();
If symbol == number then return TRUE
If symbol == Lpar then do begin /* lpar is Left
paranthesis */
If E() == TRUE then do begin
symbol := lex();
If symbol == Rpar then return TRUE
end;
else return FALSE
Procedure T
Begin
If F() == TRUE then return TRUE
else begin
If T()==TRUE the do begin
Symbol := lex();
If symbol == '*' then
If F() == TRUE return TRUE
else return FALSE
Procedure E
Begin
If T() == TRUE then return TRUE
else begin
If E()== TRUE then
symbol = lex();
If symbol == '+' then
If T() == TRUE then return TRUE
else return FALSE;
Any help/suggestion will be appreciated
Thanks
Percy
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: <<HELP with recursive descent parser>>
1994-11-12 17:27 <<HELP with recursive descent parser>> HACKER
@ 1994-11-13 0:43 ` Joe Halpin
1994-11-13 2:02 ` S M Ryan
1 sibling, 0 replies; 3+ messages in thread
From: Joe Halpin @ 1994-11-13 0:43 UTC (permalink / raw)
In article <1994Nov12.132726.23824@wmichgw> x91nikorawal@wmich.edu (HACKER) writes:
>Hi,
> The Recursive Descent procedure I have, takes care of all cases
>except one, the number + (expression) case. Ex 2 + (3 * 4).
>
Here's some code by Alan Holub, from the (now deceased) C Gazette. He
does it better than I do, so I'll just refer you to him :-).
============================================================================
/* Listing 1. LEX.H -- Lexeme Definitions */
typedef enum token /* Token definitions. */
{
NO_TOKEN = -1, /* Number not used as a token value. */
EOI = 0, /* Reserve zero for end of input. */
EQUAL, /* = */
ID, /* string of alphanumerics, starting with alpha. */
LP, /* ( */
MINUS, /* - */
NUMBER, /* string of numeric characters */
PLUS, /* + */
RP, /* ) */
SEMI, /* ; */
SLASH, /* / */
STAR, /* * */
WHILE /* while */
}
token ;
extern int match( token t ); /* scanner functions in lex.c */
extern void advance( void );
extern char *yytext; /* current lexeme (unterminated) */
extern int yyleng; /* number of chars in lexeme */
-------
/* Listing 2. LEX.C -- A Brute-Force Lexical Analyzer */
#include <stdio.h>
#include "lex.h"
char *yytext = ""; /* holds the lexeme (not \0 terminated) */
int yyleng = 0; /* number of valid characters in lexeme */
FILE *yyinp = stdin; /* Input FILE. */
#define LINELEN 128 /* Maximum input-line length. */
#if ( defined(DEBUG) || defined(PTRACE) )
char *tokstr( token t )
{
switch( t )
{
case NO_TOKEN: return "NO_TOKEN";
case EOI: return "EOI";
case EQUAL: return "EQUAL";
case ID: return "ID";
case LP: return "LP";
case MINUS: return "MINUS";
case NUMBER: return "NUMBER";
case PLUS: return "PLUS";
case RP: return "RP";
case SEMI: return "SEMI";
case SLASH: return "SLASH";
case STAR: return "STAR";
case WHILE: return "WHILE";
}
return "UNKNOWN";
}
#endif
/*----------------------------------------------------------------------*/
static token yylex()
{
/* A brute-force, line oriented lexical analyzer */
static char input_buf[ LINELEN ]; /* Initialized to 0's by default. */
static char *p = input_buf; /* *p is initially '\0'. */
int rval = NO_TOKEN; /* Returned token */
do
{
if( !*p ) /* If buffer empty, */
{
if( !fgets(input_buf, LINELEN, yyinp) ) /* get a new line. */
return EOI; /* End of input. */
p = input_buf;
}
while( isspace( *p ) ) /* Skip leading white space */
++p;
} while( !*p ); /* Until you find a nonempty line. */
/* At this juncture, p is pointing at a nonspace character; either
* at the first character on the line or at the first nonspace
* character following the last lexeme we recognized.
*/
yytext = p; /* pointer to current lexeme */
yyleng = 1; /* default lexeme length */
switch( *p++ )
{
case '=': rval = EQUAL; break;
case '+': rval = PLUS; break;
case '-': rval = MINUS; break;
case '*': rval = STAR; break;
case '/': rval = SLASH; break;
case ';': rval = SEMI; break;
case '(': rval = LP; break;
case ')': rval = RP; break;
default:
if( isdigit(*yytext) )
{
for(; isdigit(*p); ++p, ++yyleng )
;
rval = NUMBER;
}
else if( isalpha(*yytext) )
{
for(; isalnum(*p); ++p, ++yyleng )
;
rval = !strncmp("while", yytext, yyleng) ? WHILE
: ID;
}
else
{
fprintf(stderr,"Ignoring bad input char. (%c)\n", *yytext);
exit(1);
}
}
# ifdef DEBUG
printf("yylex returning %s (%1.*s)\n", tokstr(rval), yyleng, yytext);
# endif
return rval;
}
/*------------------------------------------------------------------*/
static token Lookahead = NO_TOKEN;
int match( token t )
{
if( Lookahead == NO_TOKEN )
Lookahead = yylex();
return Lookahead == t;
}
/*------------------------------------------------------------------*/
void advance()
{
#ifdef PTRACE /* Debugging diagnostic, print */
/* current lexeme before advancing */
extern int rdepth(void); /* declared in topdown.c */
printf("%*s%s (%1.*s)\n", rdepth(), "", tokstr(Lookahead),
yyleng, yytext );
#endif;
Lookahead = yylex();
}
-------
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include "lex.h"
int statement(void);
int expression_and_predicate(void);
int mult_expr_and_predicate(void);
int factor(void);
void error(char *fmt, ...);
extern FILE *yyinp;
#ifdef PTRACE
static int Rdepth = 0;
int rdepth(void) { return( Rdepth * 8 ); }
#define trace(name) printf("%*s%s\n", Rdepth++ *8, "", name)
#define untrace(name) (--Rdepth)
#else
#define trace(name)
#define untrace(name)
#endif
/* -------------------------------------------------------------------------- */
int statement( void )
{
/* statement -> expression SEMI LP SEMI NUMBER */
int value1;
trace("statement");
if( match(LP) || match(SEMI) || match(NUMBER) )
{
value1 = expression_and_predicate();
if( match(SEMI) )
{
advance();
}
else
error( "Inserting missing semicolon." );
untrace( "statement" );
return value1;
}
else if( match( EOI ) )
{
return EOI;
}
else
{
error("Illegal expression\n");
untrace( "statement" );
return 0;
}
}
/* -------------------------------------------------------------------------- */
int expression_and_predicate( void )
{
/*
* expression -> mult_expr predicate LP NUMBER
* expression -> (epsilon) RP SEMI
*/
int value2; /* accumulated value of subexpression */
trace("expression_and_predicate");
if( match(RP) || match(SEMI) )
;
else if( !(match(LP) || match(NUMBER)) )
error( "expression expected\n" );
else
{
value2 = mult_expr_and_predicate();
/*
* predicate -> PLUS mult_expr predicate PLUS
* predicate -> MINUS mult_expr predicate MINUS
* predicate -> (epsilon) RP SEMI
*/
while( 1 )
{
if( match(RP) || match(SEMI) )
break; /* epsilon */
else if( match(PLUS) )
{
advance();
value2 += mult_expr_and_predicate();
}
else if( match(MINUS) )
{
advance();
value2 -= mult_expr_and_predicate();
}
else
{
error("operator or statement - terminator expected\n");
break;
}
}
}
untrace("expression_and_predicate");
return value2;
}
/* -------------------------------------------------------------------------- */
int mult_expr_and_predicate( void )
{
/*
* mult_expr -> factor mult_predicate LP NUMBER
*/
int value3; /* value of subexpression */
trace("mult_expr_and_predicate");
if( !(match(LP) || match(NUMBER)) )
error("Expected number identifier or open paranthesis\n");
else
{
value3 = factor();
/*
* mult_predicate -> STAR factor mult_predicate STAR
* mult_predicate -> SLASH factor mult_predicate SLASH
* mult_predicate -> (epsilon) RP PLUS MINUS SEMI
*/
while( 1 )
{
if( match(RP) || match(PLUS) || match(MINUS) || match(SEMI) )
break; /* epsilon */
else if( match(STAR) )
{
advance();
value3 *= factor();
}
else if( match(SLASH) )
{
advance();
value3 /= factor();
}
else
{
error("operator expected\n");
break;
}
}
}
untrace("mult_expr_and_predicate");
return value3;
}
/* -------------------------------------------------------------------------- */
int factor( void )
{
/*
* factor -> NUMBER NUMBER
* factor -> LP expression RP LP
*/
int value4;
/* level += 3; */
trace("factor");
if( match(NUMBER) )
{
value4 = atoi(yytext);
advance();
}
else if( match(LP) )
{
advance();
value4 = expression_and_predicate();
if( match(RP) )
advance();
else
error("Inserting missing right parenthesis.");
}
else
error("Number or parenthesized subexpression expected\n");
untrace( "factor" );
return value4;
}
/* -------------------------------------------------------------------------- */
void error( char *fmt, ... )
{
va_list args;
va_start( args, fmt );
vfprintf( stderr, fmt, args );
va_end( args );
}
/* -------------------------------------------------------------------------- */
int main( int argc, char *argv[] )
{
int val;
if( argc < 2 )
{
puts( "Syntax: parse <filename>" );
exit( 1 );
}
if( ( yyinp = fopen( argv[1], "r" ) ) == NULL )
{
puts( "Can't open file" );
exit( 1 );
}
while( ( val = statement() ) != EOI )
printf( "%d\n", val );
return 0;
}
--
Joe Halpin
jhalpin@netcom.com
---------------------------------------------------------------------------
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: <<HELP with recursive descent parser>>
1994-11-12 17:27 <<HELP with recursive descent parser>> HACKER
1994-11-13 0:43 ` Joe Halpin
@ 1994-11-13 2:02 ` S M Ryan
1 sibling, 0 replies; 3+ messages in thread
From: S M Ryan @ 1994-11-13 2:02 UTC (permalink / raw)
: The grammar I am following is...
: E ==> E + T
: E ==> T
: T ==> T * F
: T ==> F
: F ==> (E)
: F ==> NUMBER
: Procedure T
: Begin
: If F() == TRUE then return TRUE
: else begin
: If T()==TRUE the do begin
this will cause infinite recursion.
: Symbol := lex();
check the value of the next symbol before you consume it.
It's possible the next symbol is '+', in which case you
have to push it back on lex or keep it in a global.
: If symbol == '*' then
: If F() == TRUE return TRUE
- don't forget to embed your semantics or translation unless
all you want is a recogniser.
- if the interior F() fails, you have a parse error and
do some kind of error and/or recovery.
: else return FALSE
You can't use this grammar. For recursive descent, you need right
recursion, and you must be able to choose a production based on
the next k symbols:
E ==> t E'
E' ==> <empty> | + T E'
T ==> F T'
T' ==> <empty> | * F T'
F ==> ( E ) | <number>
boolean function F;
begin
if lookahead=lpar then // F ==> ( E )
if not E then error;
if lookahead=rpar
then consume input
else error;
return true
else if lookahead=number then // F ==> <number>
stash number
consume input
return true
else
return false
end
boolean function T;
boolean function T';
begin
if lookahead=star then // T' ==> * F T'
consume input;
if not F then error
multiply stashed values
if not T' then error
return true
else // T ==> <empty>
return true
end;
begin
if F then // T ==> F T'
if not T' then error;
return true
else
return false
end;
boolean function E;
basically the same as T.
T' and E' are tail recursive and can be transformed into
boolean function T';
begin
while lookahead=star do
consume input
if not F then error
multiply stashed values
return true
end
And because T' and E' always return true
boolean function T;
procedure T';
begin
while lookahead=star do
consume input
if not F then error
multiply stashed values
end
begin
if F then
T';
return true
else
return false
And finally inlining gives
boolean function T;
begin
if F then
while lookahead=star do
consume input;
if not F then error;
multiply stashed values
return true
else
return false
In an extended grammar this language is
E ==> T [ '+' T ] *
T ==> F [ '*' F ] *
F ==> ( E ) | <number>
in which the final form of function T is read off directly from
production T.
--
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ __________________________________
| | | | | | | | | | | | | | | | | | | | | smryan@netcom.com PO Box 1563
| | | | | | |G|A|T|E| |1| | | | | | | | | Cupertino, California
|Electricians use gate 2| | | | | | | | |_(xxx)xxx-xxxx_______________95013
|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|____(coming soon: a new signature)
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~1994-11-13 2:02 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
1994-11-12 17:27 <<HELP with recursive descent parser>> HACKER
1994-11-13 0:43 ` Joe Halpin
1994-11-13 2:02 ` S M Ryan
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox