comp.lang.ada
 help / color / mirror / Atom feed
* <<HELP with recursive descent parser>>
@ 1994-11-12 17:27 HACKER
  1994-11-13  0:43 ` Joe Halpin
  1994-11-13  2:02 ` S M Ryan
  0 siblings, 2 replies; 3+ messages in thread
From: HACKER @ 1994-11-12 17:27 UTC (permalink / raw)


Hi,
 The Recursive Descent procedure I have, takes care of all cases
except one, the number + (expression) case. Ex 2 + (3 * 4).

 The grammar I am following is...

        E  ==>	E + T
	E  ==>  T
	T  ==> 	T * F
	T  ==>	F
	F  ==>	(E)
	F  ==>	NUMBER

i.e Only taking care of addition and multiplication




Here are the pseudo-codes for my procedures...


          Procedure F
	  Begin
		Symbol := lex();
		If symbol == number then return TRUE
		If symbol == Lpar then do begin  /* lpar is Left 
						  paranthesis */
		  If E() == TRUE then do begin
			symbol := lex();
			If symbol == Rpar then return TRUE
		end;
		else return FALSE



		Procedure T
		Begin
		  If F() == TRUE then return TRUE
		else begin
		If T()==TRUE the do begin
		Symbol := lex();
		If symbol == '*' then
		   If F() == TRUE return TRUE
		else return FALSE


		Procedure E
		Begin
		If T() == TRUE then return TRUE
		else begin
		If E()== TRUE then 
		symbol = lex();
		If symbol == '+' then
		If T() == TRUE then return TRUE
		else return FALSE;

		


	Any help/suggestion will be appreciated

Thanks
Percy






^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: <<HELP with recursive descent parser>>
  1994-11-12 17:27 <<HELP with recursive descent parser>> HACKER
@ 1994-11-13  0:43 ` Joe Halpin
  1994-11-13  2:02 ` S M Ryan
  1 sibling, 0 replies; 3+ messages in thread
From: Joe Halpin @ 1994-11-13  0:43 UTC (permalink / raw)


In article <1994Nov12.132726.23824@wmichgw> x91nikorawal@wmich.edu (HACKER) writes:
>Hi,
> The Recursive Descent procedure I have, takes care of all cases
>except one, the number + (expression) case. Ex 2 + (3 * 4).
>

Here's some code by Alan Holub, from the (now deceased) C Gazette.  He 
does it better than I do, so I'll just refer you to him :-).

============================================================================

/* Listing 1. LEX.H -- Lexeme Definitions */

 typedef enum token      /* Token definitions.                        */
 {
     NO_TOKEN = -1, /*  Number not used as a token value.             */
     EOI      = 0,  /*  Reserve zero for end of input.                */
     EQUAL,         /*  =                                             */
     ID,            /*  string of alphanumerics, starting with alpha. */
     LP,            /*  (                                             */
     MINUS,         /*  -                                             */
     NUMBER,        /*  string of numeric characters                  */
     PLUS,          /*  +                                             */
     RP,            /*  )                                             */
     SEMI,          /*  ;                                             */
     SLASH,         /*  /                                             */
     STAR,          /*  *                                             */
     WHILE          /*  while                                         */
 }
 token ;

 extern int match( token t );    /* scanner functions in lex.c */
 extern void advance( void );
 extern char *yytext;            /* current lexeme (unterminated) */
 extern int  yyleng;             /* number of chars in lexeme     */

-------

/* Listing 2. LEX.C -- A Brute-Force Lexical Analyzer */

#include <stdio.h>
#include "lex.h"

char    *yytext = "";           /* holds the lexeme (not \0 terminated) */
int     yyleng  = 0;            /* number of valid characters in lexeme */
FILE    *yyinp  = stdin;        /* Input FILE.                          */
#define LINELEN 128             /* Maximum input-line length.           */

#if ( defined(DEBUG) || defined(PTRACE) )

char *tokstr( token t )
{
    switch( t )
    {
    case NO_TOKEN:      return "NO_TOKEN";
    case EOI:           return "EOI";
    case EQUAL:         return "EQUAL";
    case ID:            return "ID";
    case LP:            return "LP";
    case MINUS:         return "MINUS";
    case NUMBER:        return "NUMBER";
    case PLUS:          return "PLUS";
    case RP:            return "RP";
    case SEMI:          return "SEMI";
    case SLASH:         return "SLASH";
    case STAR:          return "STAR";
    case WHILE:         return "WHILE";
    }

    return "UNKNOWN";
}
#endif

/*----------------------------------------------------------------------*/

static token yylex()
{
    /* A brute-force, line oriented lexical analyzer */

    static char input_buf[ LINELEN ];   /* Initialized to 0's by default. */
    static char *p = input_buf;         /* *p is initially '\0'.         */

    int rval = NO_TOKEN;                /* Returned token                 */

    do
    {
        if( !*p )                                    /* If buffer empty, */
        {
            if( !fgets(input_buf, LINELEN, yyinp) )  /* get a new line.  */
                return EOI;                          /* End of input.    */
            p = input_buf;
        }

        while( isspace( *p ) )          /* Skip leading white space */
            ++p;

    } while( !*p );             /* Until you find a nonempty line.  */

    /* At this juncture, p is pointing at a nonspace character; either
     * at the first character on the line or at the first nonspace
     * character following the last lexeme we recognized.
     */

    yytext = p;         /* pointer to current lexeme    */
    yyleng = 1;         /* default lexeme length        */

    switch( *p++ )
    {
    case '=':   rval = EQUAL;   break;
    case '+':   rval = PLUS;    break;
    case '-':   rval = MINUS;   break;
    case '*':   rval = STAR;    break;
    case '/':   rval = SLASH;   break;
    case ';':   rval = SEMI;    break;
    case '(':   rval = LP;      break;
    case ')':   rval = RP;      break;
    default:
        if( isdigit(*yytext) )
        {
            for(; isdigit(*p); ++p, ++yyleng )
                ;
            rval = NUMBER;
        }
        else if( isalpha(*yytext) )
        {
            for(; isalnum(*p); ++p, ++yyleng )
                ;

            rval = !strncmp("while", yytext, yyleng) ? WHILE
                                                     : ID;
        }
        else
        {
            fprintf(stderr,"Ignoring bad input char. (%c)\n", *yytext);
            exit(1);
        }
    }
#   ifdef DEBUG
        printf("yylex returning %s (%1.*s)\n", tokstr(rval), yyleng, yytext);
#   endif

    return rval;
}
/*------------------------------------------------------------------*/
static token Lookahead = NO_TOKEN;

int match( token t )
{
    if( Lookahead == NO_TOKEN )
        Lookahead = yylex();

    return Lookahead == t;
}
/*------------------------------------------------------------------*/
void advance()
{
    #ifdef PTRACE               /* Debugging diagnostic, print     */
                                /* current lexeme before advancing */
        extern int rdepth(void);        /* declared in topdown.c */

        printf("%*s%s (%1.*s)\n", rdepth(), "", tokstr(Lookahead),
                                                yyleng, yytext );
    #endif;

    Lookahead = yylex();
}

-------

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include "lex.h"

int statement(void);
int expression_and_predicate(void);
int mult_expr_and_predicate(void);
int factor(void);
void error(char *fmt, ...);
extern FILE *yyinp;

#ifdef PTRACE
    static int  Rdepth = 0;
    int         rdepth(void)    { return( Rdepth * 8 ); }
    #define     trace(name)     printf("%*s%s\n", Rdepth++ *8, "", name)
    #define     untrace(name)   (--Rdepth)
#else
    #define trace(name)
    #define untrace(name)
#endif


/* -------------------------------------------------------------------------- */
int statement( void )
{
    /* statement -> expression SEMI                 LP SEMI NUMBER */

    int value1;


    trace("statement");

    if( match(LP) || match(SEMI) || match(NUMBER) )
    {
        value1 = expression_and_predicate();

        if( match(SEMI) )
        {
            advance();
        }
        else
            error( "Inserting missing semicolon." );

        untrace( "statement" );
        return value1;
    }

    else if( match( EOI ) )
    {
        return EOI;
    }

    else
    {
        error("Illegal expression\n");
        untrace( "statement" );
        return 0;
    }
}

/* -------------------------------------------------------------------------- */
int expression_and_predicate( void )
{
    /*
     * expression -> mult_expr predicate            LP NUMBER
     * expression -> (epsilon)                      RP SEMI
     */

    int value2;                 /* accumulated value of subexpression */


    trace("expression_and_predicate");

    if( match(RP) || match(SEMI) )
        ;
    else if( !(match(LP) || match(NUMBER)) )
        error( "expression expected\n" );
    else
    {
        value2 = mult_expr_and_predicate();

        /*
         * predicate -> PLUS mult_expr predicate    PLUS
         * predicate -> MINUS mult_expr predicate   MINUS
         * predicate -> (epsilon)                   RP SEMI
         */

        while( 1 )
        {
            if( match(RP) || match(SEMI) )
                break;   /* epsilon */
            else if( match(PLUS) )
            {
                advance();
                value2 += mult_expr_and_predicate();
            }
            else if( match(MINUS) )
            {
                advance();
                value2 -= mult_expr_and_predicate();
            }
            else
            {
                error("operator or statement - terminator expected\n");
                break;
            }
        }
    }
    untrace("expression_and_predicate");
    return value2;
}

/* -------------------------------------------------------------------------- */
int mult_expr_and_predicate( void )
{
    /*
     * mult_expr -> factor mult_predicate           LP NUMBER
     */

    int value3;                   /* value of subexpression */


    trace("mult_expr_and_predicate");

    if( !(match(LP) || match(NUMBER)) )
        error("Expected number identifier or open paranthesis\n");
    else
    {
        value3 = factor();

        /*
         * mult_predicate -> STAR factor mult_predicate     STAR
         * mult_predicate -> SLASH factor mult_predicate    SLASH
         * mult_predicate -> (epsilon)                      RP PLUS MINUS SEMI
         */

        while( 1 )
        {
            if( match(RP) || match(PLUS) || match(MINUS) || match(SEMI) )
                break;                  /* epsilon */

            else if( match(STAR) )
            {
                advance();
                value3 *= factor();
            }
            else if( match(SLASH) )
            {
                advance();
                value3 /= factor();
            }
            else
            {
                error("operator expected\n");
                break;
            }
        }
    }
    untrace("mult_expr_and_predicate");
    return value3;
}

/* -------------------------------------------------------------------------- */
int factor( void )
{
    /*
     * factor -> NUMBER                             NUMBER
     * factor -> LP expression RP                   LP
     */

    int value4;

/*    level += 3; */

    trace("factor");

    if( match(NUMBER) )
    {
        value4 = atoi(yytext);
        advance();
    }
    else if( match(LP) )
    {
        advance();
        value4 = expression_and_predicate();

        if( match(RP) )
            advance();
        else
            error("Inserting missing right parenthesis.");
    }
    else
        error("Number or parenthesized subexpression expected\n");

    untrace( "factor" );
    return value4;
}

/* -------------------------------------------------------------------------- */
void error( char *fmt, ... )
{
    va_list args;
    va_start( args, fmt );
    vfprintf( stderr, fmt, args );
    va_end( args );
}

/* -------------------------------------------------------------------------- */
int main( int argc, char *argv[] )
{
    int val;

    if( argc < 2 )
    {
        puts( "Syntax: parse <filename>" );
        exit( 1 );
    }

    if( ( yyinp = fopen( argv[1], "r" ) ) == NULL )
    {
        puts( "Can't open file" );
        exit( 1 );
    }

    while( ( val = statement() ) != EOI )
        printf( "%d\n", val );

    return 0;
}

-- 
Joe Halpin
jhalpin@netcom.com
---------------------------------------------------------------------------



^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: <<HELP with recursive descent parser>>
  1994-11-12 17:27 <<HELP with recursive descent parser>> HACKER
  1994-11-13  0:43 ` Joe Halpin
@ 1994-11-13  2:02 ` S M Ryan
  1 sibling, 0 replies; 3+ messages in thread
From: S M Ryan @ 1994-11-13  2:02 UTC (permalink / raw)


:  The grammar I am following is...
:       E  ==>	E + T
: 	E  ==>  T
: 	T  ==> 	T * F
: 	T  ==>	F
: 	F  ==>	(E)
: 	F  ==>	NUMBER
: 		Procedure T
: 		Begin
: 		  If F() == TRUE then return TRUE
: 		else begin
: 		If T()==TRUE the do begin
this will cause infinite recursion.
: 		Symbol := lex();
check the value of the next symbol before you consume it.
It's possible the next symbol is '+', in which case you
have to push it back on lex or keep it in a global. 
: 		If symbol == '*' then
: 		   If F() == TRUE return TRUE
- don't forget to embed your semantics or translation unless
  all you want is a recogniser.
- if the interior F() fails, you have a parse error and
  do some kind of error and/or recovery.
: 		else return FALSE

You can't use this grammar. For recursive descent, you need right
recursion, and you must be able to choose a production based on 
the next k symbols:

	E  ==>  t E'
	E' ==>  <empty> | + T E'
	T  ==>  F T'
	T' ==>  <empty> | * F T'
	F  ==>  ( E )   | <number>

boolean function F;
    begin
	if lookahead=lpar then			// F ==> ( E )
	    if not E then error;
	    if lookahead=rpar
		then consume input
		else error;
	    return true
	else if lookahead=number then		// F ==> <number>
	    stash number
	    consume input
	    return true
	else
	    return false
    end

boolean function T;
    boolean function T';
	begin
	    if lookahead=star then		// T' ==> * F T'
		consume input;
		if not F then error
		multiply stashed values
		if not T' then error
		return true
	   else					// T  ==> <empty>
		return true
	end;
    begin
	if F then				// T ==> F T'
	    if not T' then error;
	    return true
	else
	    return false
    end;

boolean function E;
	basically the same as T.

T' and E' are tail recursive and can be transformed into
	boolean function T';
	    begin
		while lookahead=star do
		    consume input
		    if not F then error
		    multiply stashed values
		return true
	    end

And because T' and E' always return true
boolean function T;
    procedure T';
	begin
	    while lookahead=star do
		consume input
		if not F then error
		multiply stashed values
	end
    begin
	if F then
	    T';
	    return true
	else
	    return false

And finally inlining gives
boolean function T;
    begin
	if F then
	    while lookahead=star do
		consume input;
		if not F then error;
		multiply stashed values
	    return true
	else
	    return false


In an extended grammar this language is
	E  ==>  T [ '+' T ] *
	T  ==>  F [ '*' F ] *
	F  ==>  ( E )   | <number>
in which the final form of function T is read off directly from
production T.

-- 
 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ __________________________________
| | | | | | | | | | | | | | | | | | | | | smryan@netcom.com	PO Box 1563
| | | | | | |G|A|T|E| |1| | | | | | | | |             Cupertino, California
|Electricians use gate 2| | | | | | | | |_(xxx)xxx-xxxx_______________95013
|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|_|____(coming soon: a new signature)



^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~1994-11-13  2:02 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
1994-11-12 17:27 <<HELP with recursive descent parser>> HACKER
1994-11-13  0:43 ` Joe Halpin
1994-11-13  2:02 ` S M Ryan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox