From: jhalpin@netcom.com (Joe Halpin)
Subject: Re: <<HELP with recursive descent parser>>
Date: Sun, 13 Nov 1994 00:43:28 GMT
Date: 1994-11-13T00:43:28+00:00 [thread overview]
Message-ID: <jhalpinCz6KoG.Br3@netcom.com> (raw)
In-Reply-To: 1994Nov12.132726.23824@wmichgw
In article <1994Nov12.132726.23824@wmichgw> x91nikorawal@wmich.edu (HACKER) writes:
>Hi,
> The Recursive Descent procedure I have, takes care of all cases
>except one, the number + (expression) case. Ex 2 + (3 * 4).
>
Here's some code by Alan Holub, from the (now deceased) C Gazette. He
does it better than I do, so I'll just refer you to him :-).
============================================================================
/* Listing 1. LEX.H -- Lexeme Definitions */
typedef enum token /* Token definitions. */
{
NO_TOKEN = -1, /* Number not used as a token value. */
EOI = 0, /* Reserve zero for end of input. */
EQUAL, /* = */
ID, /* string of alphanumerics, starting with alpha. */
LP, /* ( */
MINUS, /* - */
NUMBER, /* string of numeric characters */
PLUS, /* + */
RP, /* ) */
SEMI, /* ; */
SLASH, /* / */
STAR, /* * */
WHILE /* while */
}
token ;
extern int match( token t ); /* scanner functions in lex.c */
extern void advance( void );
extern char *yytext; /* current lexeme (unterminated) */
extern int yyleng; /* number of chars in lexeme */
-------
/* Listing 2. LEX.C -- A Brute-Force Lexical Analyzer */
#include <stdio.h>
#include "lex.h"
char *yytext = ""; /* holds the lexeme (not \0 terminated) */
int yyleng = 0; /* number of valid characters in lexeme */
FILE *yyinp = stdin; /* Input FILE. */
#define LINELEN 128 /* Maximum input-line length. */
#if ( defined(DEBUG) || defined(PTRACE) )
char *tokstr( token t )
{
switch( t )
{
case NO_TOKEN: return "NO_TOKEN";
case EOI: return "EOI";
case EQUAL: return "EQUAL";
case ID: return "ID";
case LP: return "LP";
case MINUS: return "MINUS";
case NUMBER: return "NUMBER";
case PLUS: return "PLUS";
case RP: return "RP";
case SEMI: return "SEMI";
case SLASH: return "SLASH";
case STAR: return "STAR";
case WHILE: return "WHILE";
}
return "UNKNOWN";
}
#endif
/*----------------------------------------------------------------------*/
static token yylex()
{
/* A brute-force, line oriented lexical analyzer */
static char input_buf[ LINELEN ]; /* Initialized to 0's by default. */
static char *p = input_buf; /* *p is initially '\0'. */
int rval = NO_TOKEN; /* Returned token */
do
{
if( !*p ) /* If buffer empty, */
{
if( !fgets(input_buf, LINELEN, yyinp) ) /* get a new line. */
return EOI; /* End of input. */
p = input_buf;
}
while( isspace( *p ) ) /* Skip leading white space */
++p;
} while( !*p ); /* Until you find a nonempty line. */
/* At this juncture, p is pointing at a nonspace character; either
* at the first character on the line or at the first nonspace
* character following the last lexeme we recognized.
*/
yytext = p; /* pointer to current lexeme */
yyleng = 1; /* default lexeme length */
switch( *p++ )
{
case '=': rval = EQUAL; break;
case '+': rval = PLUS; break;
case '-': rval = MINUS; break;
case '*': rval = STAR; break;
case '/': rval = SLASH; break;
case ';': rval = SEMI; break;
case '(': rval = LP; break;
case ')': rval = RP; break;
default:
if( isdigit(*yytext) )
{
for(; isdigit(*p); ++p, ++yyleng )
;
rval = NUMBER;
}
else if( isalpha(*yytext) )
{
for(; isalnum(*p); ++p, ++yyleng )
;
rval = !strncmp("while", yytext, yyleng) ? WHILE
: ID;
}
else
{
fprintf(stderr,"Ignoring bad input char. (%c)\n", *yytext);
exit(1);
}
}
# ifdef DEBUG
printf("yylex returning %s (%1.*s)\n", tokstr(rval), yyleng, yytext);
# endif
return rval;
}
/*------------------------------------------------------------------*/
static token Lookahead = NO_TOKEN;
int match( token t )
{
if( Lookahead == NO_TOKEN )
Lookahead = yylex();
return Lookahead == t;
}
/*------------------------------------------------------------------*/
void advance()
{
#ifdef PTRACE /* Debugging diagnostic, print */
/* current lexeme before advancing */
extern int rdepth(void); /* declared in topdown.c */
printf("%*s%s (%1.*s)\n", rdepth(), "", tokstr(Lookahead),
yyleng, yytext );
#endif;
Lookahead = yylex();
}
-------
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include "lex.h"
int statement(void);
int expression_and_predicate(void);
int mult_expr_and_predicate(void);
int factor(void);
void error(char *fmt, ...);
extern FILE *yyinp;
#ifdef PTRACE
static int Rdepth = 0;
int rdepth(void) { return( Rdepth * 8 ); }
#define trace(name) printf("%*s%s\n", Rdepth++ *8, "", name)
#define untrace(name) (--Rdepth)
#else
#define trace(name)
#define untrace(name)
#endif
/* -------------------------------------------------------------------------- */
int statement( void )
{
/* statement -> expression SEMI LP SEMI NUMBER */
int value1;
trace("statement");
if( match(LP) || match(SEMI) || match(NUMBER) )
{
value1 = expression_and_predicate();
if( match(SEMI) )
{
advance();
}
else
error( "Inserting missing semicolon." );
untrace( "statement" );
return value1;
}
else if( match( EOI ) )
{
return EOI;
}
else
{
error("Illegal expression\n");
untrace( "statement" );
return 0;
}
}
/* -------------------------------------------------------------------------- */
int expression_and_predicate( void )
{
/*
* expression -> mult_expr predicate LP NUMBER
* expression -> (epsilon) RP SEMI
*/
int value2; /* accumulated value of subexpression */
trace("expression_and_predicate");
if( match(RP) || match(SEMI) )
;
else if( !(match(LP) || match(NUMBER)) )
error( "expression expected\n" );
else
{
value2 = mult_expr_and_predicate();
/*
* predicate -> PLUS mult_expr predicate PLUS
* predicate -> MINUS mult_expr predicate MINUS
* predicate -> (epsilon) RP SEMI
*/
while( 1 )
{
if( match(RP) || match(SEMI) )
break; /* epsilon */
else if( match(PLUS) )
{
advance();
value2 += mult_expr_and_predicate();
}
else if( match(MINUS) )
{
advance();
value2 -= mult_expr_and_predicate();
}
else
{
error("operator or statement - terminator expected\n");
break;
}
}
}
untrace("expression_and_predicate");
return value2;
}
/* -------------------------------------------------------------------------- */
int mult_expr_and_predicate( void )
{
/*
* mult_expr -> factor mult_predicate LP NUMBER
*/
int value3; /* value of subexpression */
trace("mult_expr_and_predicate");
if( !(match(LP) || match(NUMBER)) )
error("Expected number identifier or open paranthesis\n");
else
{
value3 = factor();
/*
* mult_predicate -> STAR factor mult_predicate STAR
* mult_predicate -> SLASH factor mult_predicate SLASH
* mult_predicate -> (epsilon) RP PLUS MINUS SEMI
*/
while( 1 )
{
if( match(RP) || match(PLUS) || match(MINUS) || match(SEMI) )
break; /* epsilon */
else if( match(STAR) )
{
advance();
value3 *= factor();
}
else if( match(SLASH) )
{
advance();
value3 /= factor();
}
else
{
error("operator expected\n");
break;
}
}
}
untrace("mult_expr_and_predicate");
return value3;
}
/* -------------------------------------------------------------------------- */
int factor( void )
{
/*
* factor -> NUMBER NUMBER
* factor -> LP expression RP LP
*/
int value4;
/* level += 3; */
trace("factor");
if( match(NUMBER) )
{
value4 = atoi(yytext);
advance();
}
else if( match(LP) )
{
advance();
value4 = expression_and_predicate();
if( match(RP) )
advance();
else
error("Inserting missing right parenthesis.");
}
else
error("Number or parenthesized subexpression expected\n");
untrace( "factor" );
return value4;
}
/* -------------------------------------------------------------------------- */
void error( char *fmt, ... )
{
va_list args;
va_start( args, fmt );
vfprintf( stderr, fmt, args );
va_end( args );
}
/* -------------------------------------------------------------------------- */
int main( int argc, char *argv[] )
{
int val;
if( argc < 2 )
{
puts( "Syntax: parse <filename>" );
exit( 1 );
}
if( ( yyinp = fopen( argv[1], "r" ) ) == NULL )
{
puts( "Can't open file" );
exit( 1 );
}
while( ( val = statement() ) != EOI )
printf( "%d\n", val );
return 0;
}
--
Joe Halpin
jhalpin@netcom.com
---------------------------------------------------------------------------
next prev parent reply other threads:[~1994-11-13 0:43 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
1994-11-12 17:27 <<HELP with recursive descent parser>> HACKER
1994-11-13 0:43 ` Joe Halpin [this message]
1994-11-13 2:02 ` S M Ryan
replies disabled
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox