comp.lang.ada
 help / color / mirror / Atom feed
From: jhalpin@netcom.com (Joe Halpin)
Subject: Re: <<HELP with recursive descent parser>>
Date: Sun, 13 Nov 1994 00:43:28 GMT
Date: 1994-11-13T00:43:28+00:00	[thread overview]
Message-ID: <jhalpinCz6KoG.Br3@netcom.com> (raw)
In-Reply-To: 1994Nov12.132726.23824@wmichgw

In article <1994Nov12.132726.23824@wmichgw> x91nikorawal@wmich.edu (HACKER) writes:
>Hi,
> The Recursive Descent procedure I have, takes care of all cases
>except one, the number + (expression) case. Ex 2 + (3 * 4).
>

Here's some code by Alan Holub, from the (now deceased) C Gazette.  He 
does it better than I do, so I'll just refer you to him :-).

============================================================================

/* Listing 1. LEX.H -- Lexeme Definitions */

 typedef enum token      /* Token definitions.                        */
 {
     NO_TOKEN = -1, /*  Number not used as a token value.             */
     EOI      = 0,  /*  Reserve zero for end of input.                */
     EQUAL,         /*  =                                             */
     ID,            /*  string of alphanumerics, starting with alpha. */
     LP,            /*  (                                             */
     MINUS,         /*  -                                             */
     NUMBER,        /*  string of numeric characters                  */
     PLUS,          /*  +                                             */
     RP,            /*  )                                             */
     SEMI,          /*  ;                                             */
     SLASH,         /*  /                                             */
     STAR,          /*  *                                             */
     WHILE          /*  while                                         */
 }
 token ;

 extern int match( token t );    /* scanner functions in lex.c */
 extern void advance( void );
 extern char *yytext;            /* current lexeme (unterminated) */
 extern int  yyleng;             /* number of chars in lexeme     */

-------

/* Listing 2. LEX.C -- A Brute-Force Lexical Analyzer */

#include <stdio.h>
#include "lex.h"

char    *yytext = "";           /* holds the lexeme (not \0 terminated) */
int     yyleng  = 0;            /* number of valid characters in lexeme */
FILE    *yyinp  = stdin;        /* Input FILE.                          */
#define LINELEN 128             /* Maximum input-line length.           */

#if ( defined(DEBUG) || defined(PTRACE) )

char *tokstr( token t )
{
    switch( t )
    {
    case NO_TOKEN:      return "NO_TOKEN";
    case EOI:           return "EOI";
    case EQUAL:         return "EQUAL";
    case ID:            return "ID";
    case LP:            return "LP";
    case MINUS:         return "MINUS";
    case NUMBER:        return "NUMBER";
    case PLUS:          return "PLUS";
    case RP:            return "RP";
    case SEMI:          return "SEMI";
    case SLASH:         return "SLASH";
    case STAR:          return "STAR";
    case WHILE:         return "WHILE";
    }

    return "UNKNOWN";
}
#endif

/*----------------------------------------------------------------------*/

static token yylex()
{
    /* A brute-force, line oriented lexical analyzer */

    static char input_buf[ LINELEN ];   /* Initialized to 0's by default. */
    static char *p = input_buf;         /* *p is initially '\0'.         */

    int rval = NO_TOKEN;                /* Returned token                 */

    do
    {
        if( !*p )                                    /* If buffer empty, */
        {
            if( !fgets(input_buf, LINELEN, yyinp) )  /* get a new line.  */
                return EOI;                          /* End of input.    */
            p = input_buf;
        }

        while( isspace( *p ) )          /* Skip leading white space */
            ++p;

    } while( !*p );             /* Until you find a nonempty line.  */

    /* At this juncture, p is pointing at a nonspace character; either
     * at the first character on the line or at the first nonspace
     * character following the last lexeme we recognized.
     */

    yytext = p;         /* pointer to current lexeme    */
    yyleng = 1;         /* default lexeme length        */

    switch( *p++ )
    {
    case '=':   rval = EQUAL;   break;
    case '+':   rval = PLUS;    break;
    case '-':   rval = MINUS;   break;
    case '*':   rval = STAR;    break;
    case '/':   rval = SLASH;   break;
    case ';':   rval = SEMI;    break;
    case '(':   rval = LP;      break;
    case ')':   rval = RP;      break;
    default:
        if( isdigit(*yytext) )
        {
            for(; isdigit(*p); ++p, ++yyleng )
                ;
            rval = NUMBER;
        }
        else if( isalpha(*yytext) )
        {
            for(; isalnum(*p); ++p, ++yyleng )
                ;

            rval = !strncmp("while", yytext, yyleng) ? WHILE
                                                     : ID;
        }
        else
        {
            fprintf(stderr,"Ignoring bad input char. (%c)\n", *yytext);
            exit(1);
        }
    }
#   ifdef DEBUG
        printf("yylex returning %s (%1.*s)\n", tokstr(rval), yyleng, yytext);
#   endif

    return rval;
}
/*------------------------------------------------------------------*/
static token Lookahead = NO_TOKEN;

int match( token t )
{
    if( Lookahead == NO_TOKEN )
        Lookahead = yylex();

    return Lookahead == t;
}
/*------------------------------------------------------------------*/
void advance()
{
    #ifdef PTRACE               /* Debugging diagnostic, print     */
                                /* current lexeme before advancing */
        extern int rdepth(void);        /* declared in topdown.c */

        printf("%*s%s (%1.*s)\n", rdepth(), "", tokstr(Lookahead),
                                                yyleng, yytext );
    #endif;

    Lookahead = yylex();
}

-------

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include "lex.h"

int statement(void);
int expression_and_predicate(void);
int mult_expr_and_predicate(void);
int factor(void);
void error(char *fmt, ...);
extern FILE *yyinp;

#ifdef PTRACE
    static int  Rdepth = 0;
    int         rdepth(void)    { return( Rdepth * 8 ); }
    #define     trace(name)     printf("%*s%s\n", Rdepth++ *8, "", name)
    #define     untrace(name)   (--Rdepth)
#else
    #define trace(name)
    #define untrace(name)
#endif


/* -------------------------------------------------------------------------- */
int statement( void )
{
    /* statement -> expression SEMI                 LP SEMI NUMBER */

    int value1;


    trace("statement");

    if( match(LP) || match(SEMI) || match(NUMBER) )
    {
        value1 = expression_and_predicate();

        if( match(SEMI) )
        {
            advance();
        }
        else
            error( "Inserting missing semicolon." );

        untrace( "statement" );
        return value1;
    }

    else if( match( EOI ) )
    {
        return EOI;
    }

    else
    {
        error("Illegal expression\n");
        untrace( "statement" );
        return 0;
    }
}

/* -------------------------------------------------------------------------- */
int expression_and_predicate( void )
{
    /*
     * expression -> mult_expr predicate            LP NUMBER
     * expression -> (epsilon)                      RP SEMI
     */

    int value2;                 /* accumulated value of subexpression */


    trace("expression_and_predicate");

    if( match(RP) || match(SEMI) )
        ;
    else if( !(match(LP) || match(NUMBER)) )
        error( "expression expected\n" );
    else
    {
        value2 = mult_expr_and_predicate();

        /*
         * predicate -> PLUS mult_expr predicate    PLUS
         * predicate -> MINUS mult_expr predicate   MINUS
         * predicate -> (epsilon)                   RP SEMI
         */

        while( 1 )
        {
            if( match(RP) || match(SEMI) )
                break;   /* epsilon */
            else if( match(PLUS) )
            {
                advance();
                value2 += mult_expr_and_predicate();
            }
            else if( match(MINUS) )
            {
                advance();
                value2 -= mult_expr_and_predicate();
            }
            else
            {
                error("operator or statement - terminator expected\n");
                break;
            }
        }
    }
    untrace("expression_and_predicate");
    return value2;
}

/* -------------------------------------------------------------------------- */
int mult_expr_and_predicate( void )
{
    /*
     * mult_expr -> factor mult_predicate           LP NUMBER
     */

    int value3;                   /* value of subexpression */


    trace("mult_expr_and_predicate");

    if( !(match(LP) || match(NUMBER)) )
        error("Expected number identifier or open paranthesis\n");
    else
    {
        value3 = factor();

        /*
         * mult_predicate -> STAR factor mult_predicate     STAR
         * mult_predicate -> SLASH factor mult_predicate    SLASH
         * mult_predicate -> (epsilon)                      RP PLUS MINUS SEMI
         */

        while( 1 )
        {
            if( match(RP) || match(PLUS) || match(MINUS) || match(SEMI) )
                break;                  /* epsilon */

            else if( match(STAR) )
            {
                advance();
                value3 *= factor();
            }
            else if( match(SLASH) )
            {
                advance();
                value3 /= factor();
            }
            else
            {
                error("operator expected\n");
                break;
            }
        }
    }
    untrace("mult_expr_and_predicate");
    return value3;
}

/* -------------------------------------------------------------------------- */
int factor( void )
{
    /*
     * factor -> NUMBER                             NUMBER
     * factor -> LP expression RP                   LP
     */

    int value4;

/*    level += 3; */

    trace("factor");

    if( match(NUMBER) )
    {
        value4 = atoi(yytext);
        advance();
    }
    else if( match(LP) )
    {
        advance();
        value4 = expression_and_predicate();

        if( match(RP) )
            advance();
        else
            error("Inserting missing right parenthesis.");
    }
    else
        error("Number or parenthesized subexpression expected\n");

    untrace( "factor" );
    return value4;
}

/* -------------------------------------------------------------------------- */
void error( char *fmt, ... )
{
    va_list args;
    va_start( args, fmt );
    vfprintf( stderr, fmt, args );
    va_end( args );
}

/* -------------------------------------------------------------------------- */
int main( int argc, char *argv[] )
{
    int val;

    if( argc < 2 )
    {
        puts( "Syntax: parse <filename>" );
        exit( 1 );
    }

    if( ( yyinp = fopen( argv[1], "r" ) ) == NULL )
    {
        puts( "Can't open file" );
        exit( 1 );
    }

    while( ( val = statement() ) != EOI )
        printf( "%d\n", val );

    return 0;
}

-- 
Joe Halpin
jhalpin@netcom.com
---------------------------------------------------------------------------



  reply	other threads:[~1994-11-13  0:43 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
1994-11-12 17:27 <<HELP with recursive descent parser>> HACKER
1994-11-13  0:43 ` Joe Halpin [this message]
1994-11-13  2:02 ` S M Ryan
replies disabled

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox