From mboxrd@z Thu Jan  1 00:00:00 1970
X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on polar.synack.me
X-Spam-Level: 
X-Spam-Status: No, score=-1.9 required=5.0 tests=BAYES_00 autolearn=ham
	autolearn_force=no version=3.4.4
X-Google-Thread: 103376,13b4e394fcd91d4
X-Google-NewGroupId: yes
X-Google-Attributes: gida07f3367d7,domainid0,public,usenet
X-Google-Language: ENGLISH,ASCII-7-bit
Received: by 10.68.213.68 with SMTP id nq4mr8690794pbc.2.1327859170612;
        Sun, 29 Jan 2012 09:46:10 -0800 (PST)
Path: 
 lh20ni238159pbb.0!nntp.google.com!news1.google.com!npeer02.iad.highwinds-media.com!news.highwinds-media.com!feed-me.highwinds-media.com!post02.iad.highwinds-media.com!news.flashnewsgroups.com-b7.4zTQh5tI3A!not-for-mail
From: Stephen Leake <stephen_leake@stephe-leake.org>
Newsgroups: comp.lang.ada
Subject: Re: OpenToken: Handling the empty word token
References: 
 <62121d9d-f208-4e78-a109-749742da14a6@h12g2000yqg.googlegroups.com>
 	<1jvlv7i0tn14u.b5d2cwsqhl2h$.dlg@40tude.net>
 	<jfvqqu$83e$1@munin.nbi.dk>
Date: Sun, 29 Jan 2012 12:45:54 -0500
Message-ID: <82ehuibdwt.fsf@stephe-leake.org>
User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/23.2 (windows-nt)
Cancel-Lock: sha1:jkcIyDi+Fs3N0On0+knLOSNqcOo=
MIME-Version: 1.0
X-Complaints-To: abuse@flashnewsgroups.com
Organization: FlashNewsgroups.com
X-Trace: 5d1c24f2585e2e029e66106660
Content-Type: text/plain; charset=us-ascii
Date: 2012-01-29T12:45:54-05:00
List-Id: <comp.lang.ada>

"Randy Brukardt" <randy@rrsoftware.com> writes:

> "Dmitry A. Kazakov" <mailbox@dmitry-kazakov.de> wrote in message 
> news:1jvlv7i0tn14u.b5d2cwsqhl2h$.dlg@40tude.net...
>> On Fri, 27 Jan 2012 08:22:12 -0800 (PST), mtrenkmann wrote:
>>
>>> Is there a way to instrument the parser to silently accept the epsilon
>>> token whenever it expects it without consuming a token from the lexer,
>>> or is it a common convention to translate each grammar into a epsilon-
>>> free representation?
>>
>> I use neither explicit grammars nor OpenToken, so it is possible that I
>> didn't really understand the problem you have.
>
> Like Dmitry, I don't use OpenToken, but I do use a LALR(1) parser generator 
> (ours originates in a University of Wisconsin research project from the late 
> 1970s).
>
> In all of the grammars I've seen, you don't write anything for an epsilon 
> production; that's because you are matching nothing. But there is no problem 
> in matching nothing, so long as your grammar generator is powerful enough 
> (uses at least LALR(1) parsing, or perhaps LR(1) parsing). In that case, 
> matching nothing works so long as the follow sets are disjoint (something 
> that fails to be true periodically in our Ada grammar).
>
> For instance, here's the grammar for parameter modes from the Janus/Ada 
> compiler grammar:
>
> mode ::= IN ## 93
>     | OUT ## 94
>     | IN OUT ## 95
>     |   ## 198
>
> Note that the last production is an epsilon production. The ## part gives an 
> action number associated with the matching of that particular alternative of 
> this production. The ## part also marks the end of the production (it's 
> optional, and | also ends a production -- but it's required on the last 
> alternative as the grammar of our grammar uses insignificant line endings 
> like Ada does).
>
> I'd be surprised if OpenToken didn't have something similar; 

Not quite. Because OpenToken uses Ada types to build the grammar, we
need an explicit Epsilon token (full code below):

   Grammar : constant Production_List.Instance :=
     Tokens.Parse_Sequence <= Tokens.Paren_Left & Tokens.Mode & Tokens.Paren_Right + Arg_Action'Access and
     Tokens.Mode <= Tokens.In_Tok + Mode_Action'Access and
     Tokens.Mode <= Tokens.Out_Tok + Mode_Action'Access and
     Tokens.Mode <= Tokens.In_Tok & Tokens.Out_Tok + Mode_Action'Access and
     Tokens.Mode <= Tokens.Epsilon + Mode_Action'Access;

> and if it doesn't, you probably need to upgrade to a better grammar
> generator.

One way to do that is to improve OpenToken :).

In this case, we might be able to provide a monadic "+" that would do
the right thing, but I didn't try that.

pragma License (GPL);

with Ada.Text_IO;
with OpenToken.Production.List;
with OpenToken.Production.Parser.LALR;
with OpenToken.Production.Parser;
with OpenToken.Recognizer.Character_Set;
with OpenToken.Recognizer.End_Of_File;
with OpenToken.Recognizer.Keyword;
with OpenToken.Recognizer.Nothing;
with OpenToken.Text_Feeder.String;
with OpenToken.Token.Enumerated.Analyzer;
with OpenToken.Token.Enumerated.List;
with OpenToken.Token.Enumerated.Nonterminal;
procedure Debug is

   type Token_ID_Type is
     (EOF_ID,
      Epsilon_ID,
      In_ID,
      Out_ID,
      Paren_Left_ID,
      Paren_Right_ID,
      Whitespace_ID,

      --  non-terminals
      Mode_ID,
      Parse_Sequence_ID);

   package Master_Token is new OpenToken.Token.Enumerated (Token_ID_Type);
   package Token_List is new Master_Token.List;
   package Nonterminal is new Master_Token.Nonterminal (Token_List);

   package Production is new OpenToken.Production (Master_Token, Token_List, Nonterminal);
   package Production_List is new Production.List;

   use type Production.Instance;        --  "<="
   use type Production_List.Instance;   --  "and"
   use type Production.Right_Hand_Side; --  "+"
   use type Token_List.Instance;        --  "&"

   package Tokens is
      EOF         : constant Master_Token.Class := Master_Token.Get (EOF_ID);
      Epsilon     : constant Master_Token.Class := Master_Token.Get (Epsilon_ID);
      In_Tok      : constant Master_Token.Class := Master_Token.Get (In_ID);
      Out_Tok     : constant Master_Token.Class := Master_Token.Get (Out_ID);
      Paren_Left  : constant Master_Token.Class := Master_Token.Get (Paren_Left_ID);
      Paren_Right : constant Master_Token.Class := Master_Token.Get (Paren_Right_ID);

      --  Nonterminals
      Mode           : constant Nonterminal.Class := Nonterminal.Get (Mode_ID);
      Parse_Sequence : constant Nonterminal.Class := Nonterminal.Get (Parse_Sequence_ID);
   end Tokens;

   package Tokenizer is new Master_Token.Analyzer (Last_Terminal => Whitespace_ID);

   Syntax : constant Tokenizer.Syntax :=
     (EOF_ID         => Tokenizer.Get (OpenToken.Recognizer.End_Of_File.Get, Tokens.EOF),
      Epsilon_ID     => Tokenizer.Get (OpenToken.Recognizer.Nothing.Get),
      In_ID          => Tokenizer.Get (OpenToken.Recognizer.Keyword.Get ("in")),
      Out_ID         => Tokenizer.Get (OpenToken.Recognizer.Keyword.Get ("out")),
      Paren_Left_ID  => Tokenizer.Get (OpenToken.Recognizer.Keyword.Get ("(")),
      Paren_Right_ID => Tokenizer.Get (OpenToken.Recognizer.Keyword.Get (")")),

      Whitespace_ID => Tokenizer.Get
        (OpenToken.Recognizer.Character_Set.Get (OpenToken.Recognizer.Character_Set.Standard_Whitespace))
     );

   procedure Arg_Action
     (New_Token : out Nonterminal.Class;
      Source    : in  Token_List.Instance'Class;
      To_ID     : in  Token_ID_Type)
   is begin
      Nonterminal.Synthesize_Self (New_Token, Source, To_ID);
      Ada.Text_IO.Put_Line ("arg action");
   end Arg_Action;

   procedure Mode_Action
     (New_Token : out Nonterminal.Class;
      Source    : in  Token_List.Instance'Class;
      To_ID     : in  Token_ID_Type)
   is begin
      Nonterminal.Synthesize_Self (New_Token, Source, To_ID);
      Ada.Text_IO.Put_Line ("mode action");
   end Mode_Action;

   Grammar : constant Production_List.Instance :=
     Tokens.Parse_Sequence <= Tokens.Paren_Left & Tokens.Mode & Tokens.Paren_Right + Arg_Action'Access and
     Tokens.Mode <= Tokens.In_Tok + Mode_Action'Access and
     Tokens.Mode <= Tokens.Out_Tok + Mode_Action'Access and
     Tokens.Mode <= Tokens.In_Tok & Tokens.Out_Tok + Mode_Action'Access and
     Tokens.Mode <= Tokens.Epsilon + Mode_Action'Access;

   package OpenToken_Parser is new Production.Parser (Production_List, Tokenizer);
   package LALR_Parser is new OpenToken_Parser.LALR;
   String_Feeder : aliased OpenToken.Text_Feeder.String.Instance;
   Analyzer : constant Tokenizer.Instance := Tokenizer.Initialize (Syntax);
   Command_Parser : LALR_Parser.Instance := LALR_Parser.Generate (Grammar, Analyzer, OpenToken.Trace_Parse);

   use LALR_Parser;
begin
   OpenToken.Text_Feeder.String.Set (String_Feeder, "( in out )");

   Set_Text_Feeder (Command_Parser, String_Feeder'Unchecked_Access);

   --  Read and parse statements from the string until end of string
   loop
      exit when End_Of_Text (Command_Parser);
         Parse (Command_Parser);
   end loop;

end Debug;

-- 
-- Stephe