From mboxrd@z Thu Jan 1 00:00:00 1970 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on polar.synack.me X-Spam-Level: X-Spam-Status: No, score=-1.9 required=5.0 tests=BAYES_00 autolearn=ham autolearn_force=no version=3.4.4 X-Google-Thread: 103376,13b4e394fcd91d4 X-Google-NewGroupId: yes X-Google-Attributes: gida07f3367d7,domainid0,public,usenet X-Google-Language: ENGLISH,ASCII-7-bit Received: by 10.68.213.68 with SMTP id nq4mr8690794pbc.2.1327859170612; Sun, 29 Jan 2012 09:46:10 -0800 (PST) Path: lh20ni238159pbb.0!nntp.google.com!news1.google.com!npeer02.iad.highwinds-media.com!news.highwinds-media.com!feed-me.highwinds-media.com!post02.iad.highwinds-media.com!news.flashnewsgroups.com-b7.4zTQh5tI3A!not-for-mail From: Stephen Leake Newsgroups: comp.lang.ada Subject: Re: OpenToken: Handling the empty word token References: <62121d9d-f208-4e78-a109-749742da14a6@h12g2000yqg.googlegroups.com> <1jvlv7i0tn14u.b5d2cwsqhl2h$.dlg@40tude.net> Date: Sun, 29 Jan 2012 12:45:54 -0500 Message-ID: <82ehuibdwt.fsf@stephe-leake.org> User-Agent: Gnus/5.13 (Gnus v5.13) Emacs/23.2 (windows-nt) Cancel-Lock: sha1:jkcIyDi+Fs3N0On0+knLOSNqcOo= MIME-Version: 1.0 X-Complaints-To: abuse@flashnewsgroups.com Organization: FlashNewsgroups.com X-Trace: 5d1c24f2585e2e029e66106660 Content-Type: text/plain; charset=us-ascii Date: 2012-01-29T12:45:54-05:00 List-Id: "Randy Brukardt" writes: > "Dmitry A. Kazakov" wrote in message > news:1jvlv7i0tn14u.b5d2cwsqhl2h$.dlg@40tude.net... >> On Fri, 27 Jan 2012 08:22:12 -0800 (PST), mtrenkmann wrote: >> >>> Is there a way to instrument the parser to silently accept the epsilon >>> token whenever it expects it without consuming a token from the lexer, >>> or is it a common convention to translate each grammar into a epsilon- >>> free representation? >> >> I use neither explicit grammars nor OpenToken, so it is possible that I >> didn't really understand the problem you have. > > Like Dmitry, I don't use OpenToken, but I do use a LALR(1) parser generator > (ours originates in a University of Wisconsin research project from the late > 1970s). > > In all of the grammars I've seen, you don't write anything for an epsilon > production; that's because you are matching nothing. But there is no problem > in matching nothing, so long as your grammar generator is powerful enough > (uses at least LALR(1) parsing, or perhaps LR(1) parsing). In that case, > matching nothing works so long as the follow sets are disjoint (something > that fails to be true periodically in our Ada grammar). > > For instance, here's the grammar for parameter modes from the Janus/Ada > compiler grammar: > > mode ::= IN ## 93 > | OUT ## 94 > | IN OUT ## 95 > | ## 198 > > Note that the last production is an epsilon production. The ## part gives an > action number associated with the matching of that particular alternative of > this production. The ## part also marks the end of the production (it's > optional, and | also ends a production -- but it's required on the last > alternative as the grammar of our grammar uses insignificant line endings > like Ada does). > > I'd be surprised if OpenToken didn't have something similar; Not quite. Because OpenToken uses Ada types to build the grammar, we need an explicit Epsilon token (full code below): Grammar : constant Production_List.Instance := Tokens.Parse_Sequence <= Tokens.Paren_Left & Tokens.Mode & Tokens.Paren_Right + Arg_Action'Access and Tokens.Mode <= Tokens.In_Tok + Mode_Action'Access and Tokens.Mode <= Tokens.Out_Tok + Mode_Action'Access and Tokens.Mode <= Tokens.In_Tok & Tokens.Out_Tok + Mode_Action'Access and Tokens.Mode <= Tokens.Epsilon + Mode_Action'Access; > and if it doesn't, you probably need to upgrade to a better grammar > generator. One way to do that is to improve OpenToken :). In this case, we might be able to provide a monadic "+" that would do the right thing, but I didn't try that. pragma License (GPL); with Ada.Text_IO; with OpenToken.Production.List; with OpenToken.Production.Parser.LALR; with OpenToken.Production.Parser; with OpenToken.Recognizer.Character_Set; with OpenToken.Recognizer.End_Of_File; with OpenToken.Recognizer.Keyword; with OpenToken.Recognizer.Nothing; with OpenToken.Text_Feeder.String; with OpenToken.Token.Enumerated.Analyzer; with OpenToken.Token.Enumerated.List; with OpenToken.Token.Enumerated.Nonterminal; procedure Debug is type Token_ID_Type is (EOF_ID, Epsilon_ID, In_ID, Out_ID, Paren_Left_ID, Paren_Right_ID, Whitespace_ID, -- non-terminals Mode_ID, Parse_Sequence_ID); package Master_Token is new OpenToken.Token.Enumerated (Token_ID_Type); package Token_List is new Master_Token.List; package Nonterminal is new Master_Token.Nonterminal (Token_List); package Production is new OpenToken.Production (Master_Token, Token_List, Nonterminal); package Production_List is new Production.List; use type Production.Instance; -- "<=" use type Production_List.Instance; -- "and" use type Production.Right_Hand_Side; -- "+" use type Token_List.Instance; -- "&" package Tokens is EOF : constant Master_Token.Class := Master_Token.Get (EOF_ID); Epsilon : constant Master_Token.Class := Master_Token.Get (Epsilon_ID); In_Tok : constant Master_Token.Class := Master_Token.Get (In_ID); Out_Tok : constant Master_Token.Class := Master_Token.Get (Out_ID); Paren_Left : constant Master_Token.Class := Master_Token.Get (Paren_Left_ID); Paren_Right : constant Master_Token.Class := Master_Token.Get (Paren_Right_ID); -- Nonterminals Mode : constant Nonterminal.Class := Nonterminal.Get (Mode_ID); Parse_Sequence : constant Nonterminal.Class := Nonterminal.Get (Parse_Sequence_ID); end Tokens; package Tokenizer is new Master_Token.Analyzer (Last_Terminal => Whitespace_ID); Syntax : constant Tokenizer.Syntax := (EOF_ID => Tokenizer.Get (OpenToken.Recognizer.End_Of_File.Get, Tokens.EOF), Epsilon_ID => Tokenizer.Get (OpenToken.Recognizer.Nothing.Get), In_ID => Tokenizer.Get (OpenToken.Recognizer.Keyword.Get ("in")), Out_ID => Tokenizer.Get (OpenToken.Recognizer.Keyword.Get ("out")), Paren_Left_ID => Tokenizer.Get (OpenToken.Recognizer.Keyword.Get ("(")), Paren_Right_ID => Tokenizer.Get (OpenToken.Recognizer.Keyword.Get (")")), Whitespace_ID => Tokenizer.Get (OpenToken.Recognizer.Character_Set.Get (OpenToken.Recognizer.Character_Set.Standard_Whitespace)) ); procedure Arg_Action (New_Token : out Nonterminal.Class; Source : in Token_List.Instance'Class; To_ID : in Token_ID_Type) is begin Nonterminal.Synthesize_Self (New_Token, Source, To_ID); Ada.Text_IO.Put_Line ("arg action"); end Arg_Action; procedure Mode_Action (New_Token : out Nonterminal.Class; Source : in Token_List.Instance'Class; To_ID : in Token_ID_Type) is begin Nonterminal.Synthesize_Self (New_Token, Source, To_ID); Ada.Text_IO.Put_Line ("mode action"); end Mode_Action; Grammar : constant Production_List.Instance := Tokens.Parse_Sequence <= Tokens.Paren_Left & Tokens.Mode & Tokens.Paren_Right + Arg_Action'Access and Tokens.Mode <= Tokens.In_Tok + Mode_Action'Access and Tokens.Mode <= Tokens.Out_Tok + Mode_Action'Access and Tokens.Mode <= Tokens.In_Tok & Tokens.Out_Tok + Mode_Action'Access and Tokens.Mode <= Tokens.Epsilon + Mode_Action'Access; package OpenToken_Parser is new Production.Parser (Production_List, Tokenizer); package LALR_Parser is new OpenToken_Parser.LALR; String_Feeder : aliased OpenToken.Text_Feeder.String.Instance; Analyzer : constant Tokenizer.Instance := Tokenizer.Initialize (Syntax); Command_Parser : LALR_Parser.Instance := LALR_Parser.Generate (Grammar, Analyzer, OpenToken.Trace_Parse); use LALR_Parser; begin OpenToken.Text_Feeder.String.Set (String_Feeder, "( in out )"); Set_Text_Feeder (Command_Parser, String_Feeder'Unchecked_Access); -- Read and parse statements from the string until end of string loop exit when End_Of_Text (Command_Parser); Parse (Command_Parser); end loop; end Debug; -- -- Stephe