comp.lang.ada
 help / color / mirror / Atom feed
From: Paul Gregory <pg16@bton.ac.uk>
Subject: Re: Thanks guys..my project and my many problems
Date: Wed, 26 Feb 2003 14:06:53 +0000
Date: 2003-02-26T14:01:38+00:00	[thread overview]
Message-ID: <3E5CC9FC.F1D134FC@bton.ac.uk> (raw)
In-Reply-To: 1ec946d1.0302250931.2fe67884@posting.google.com

Thanks a million Matthew and everybody else...however I went to see my tutor today and he
refused me permission to use your package. Apparantly I MUST use a package that the
University supplies called "string_tokenizer" to read the tokens in my array when
translation...so It's back for the drawing board for me unfortunately :-(

---->
----------------------------------------------------------------------------------------
package body String_tokenizer is

-- Uses recursive descent.
-- Each syntactic token has a recognizer, which returns its token iff
--   it is recognized at the current position in the input string.
-- If a recognizer recognizes its token, it consumes the corresponding
--   portion of the input string; if not, it leaves the string for others
--   to attempt to recognize.
-- Higher level syntactic recognizers call lower level recognizers,
--   possibly recursively.

-- Token string syntax
----------------------

--  NO_MORE_TOKENS ::= end of input string
--  WORD_CHAR      ::= A..Z | a..z
--  WORD           ::= WORD_CHAR | WORD_CHAR WORD
--  PUNCTUATION    ::= . | , | : | ; | ' | " | `
--  NUMBER_CHAR    ::= 0..9
--  NUMBER         ::= NUMBER_CHAR | NUMBER_CHAR NUMBER
--  OPERATOR       ::= + | - | * | / | **
--  BRACKET        ::= ( | ) | { | } | [ | }
--  WHITE_SPACE    ::= space_character | space_character WHITE_SPACE
--  OTHER          ::= characters other than those mentioned above
--  TOKEN          ::= NO_MORE_TOKENS | WORD | PUNCTUATION | NUMBER |
--                       OPERATOR | BRACKET | OTHER
--  STRING_TOKEN   ::= WORD | PUNCTUATION | NUMBER | OPERATOR | BRACKET |
--                       OTHER
--  NON_WHITE_SPACE_TOKEN_STRING ::= NO_MORE_TOKENS | STRING_TOKEN TOKEN_STRING
--  TOKEN_STRING   ::= NON_WHITE_SPACE_TOKEN_STRING |
--                     WHITE_SPACE NON_WHITE_SPACE_TOKEN_STRING


  WHITE_SPACE : constant integer := 7;  -- needed internally, not for client
  FAIL        : constant integer := 8;  -- needed internally, not for client

  function char_type(c : Character) return Integer is

  -- post: returns type code for character c

  -- test:
  --   space
  --   all punctuation characters
  --   brackets
  --   a,z,A,Z
  --   0,9
  --   all operators
  --   other character

    char_type_table : array (Character range' '..'~') of Integer :=
    (WHITE_SPACE,PUNCTUATION,PUNCTUATION,OTHER,OTHER,OTHER,OTHER,PUNCTUATION,      -- '
'..'''
     BRACKET,BRACKET,OPERATOR,OPERATOR,PUNCTUATION,OPERATOR,PUNCTUATION,OPERATOR,  --
'('..'/'
     NUMBER,NUMBER,NUMBER,NUMBER,NUMBER,NUMBER,NUMBER,NUMBER,                      --
'0'..'7'
     NUMBER,NUMBER,PUNCTUATION,PUNCTUATION,OPERATOR,OPERATOR,OPERATOR,PUNCTUATION, --
'8'..'?'
     OTHER,WORD,WORD,WORD,WORD,WORD,WORD,WORD,                                     --
'@'..'G'
     WORD,WORD,WORD,WORD,WORD,WORD,WORD,WORD,                                      --
'H'..'O'
     WORD,WORD,WORD,WORD,WORD,WORD,WORD,WORD,                                      --
'P'..'W'
     WORD,WORD,WORD,BRACKET,OTHER,BRACKET,OTHER,OTHER,                             --
'X'..'_'
     PUNCTUATION,WORD,WORD,WORD,WORD,WORD,WORD,WORD,                               --
'`'..'g'
     WORD,WORD,WORD,WORD,WORD,WORD,WORD,WORD,                                      --
'h'..'o'
     WORD,WORD,WORD,WORD,WORD,WORD,WORD,WORD,                                      --
'p'..'w'
     WORD,WORD,WORD,BRACKET,OTHER,BRACKET,OTHER);                                  --
'x'..'~'

  begin
    if c not in ' '..'~' then return OTHER; end if;
    return char_type_table(c);
  end;


  IN_STRING_SIZE : constant integer := 1000;
  in_string : String(1..IN_STRING_SIZE);
  in_string_pos : integer := IN_STRING_SIZE;

  blank_token : constant Token := ("                    ",0,0,NO_MORE_TOKENS);
  fail_token : constant Token  := ("                    ",0,0,FAIL);

  current_token : Token := blank_token;


  -- methods to access current character and move to next character
  --   in input string

  function has_current_char return Boolean is

  --test:
  --  just before end of string
  --  end of string

  begin
    return in_string_pos <= IN_STRING_SIZE;
  end;


  function current_char return Character is

  -- pre:  has_current_char

  -- post: returns current char

  -- test: once thru

  begin
    return in_string(in_string_pos);
  end;


  procedure inc is

  -- test : once thru

  begin
    in_string_pos := in_string_pos + 1;
  end;


  -- recognizer methods
  ---------------------

  function recognize_char(i : Integer) return Token is

  -- pre:  i = character type to be recognized

  -- post: returns character token if recognized, fail token if not

  -- test:
  --   end of string
  --   current char is required char
  --   current char is not required char

    my_token : Token;
  begin
    if not has_current_char then return fail_token; end if;
    if char_type (current_char) = i then
      my_token := (current_char &  "                   ",1,0,i);
      inc;
      return my_token;
    else
      return fail_token;
    end if;
  end;


  function recognize_sequence(i : Integer) return Token is

  -- pre:  i = char type of sequence to be recognized

  -- post: returns token for recognized sequence,
  --         or fail token if not found

  -- test:
  --   not a recognized sequence
  --   sequence of 1 char
  --   sequence of 3 chars

  my_token : Token;
  next_Token : Token;
  word_pos :Integer;

  begin
    my_token := recognize_char(i);
    if my_token.token_type = FAIL then
      return my_token;
    end if;
    word_pos := 2; -- start position for rest of word
    loop
      next_Token := recognize_char(i);
      if next_token.token_type = FAIL then
        return my_token;
      end if;
      if word_pos <= TOKEN_STRING_SIZE then
        my_token.token_string(word_pos) := next_Token.token_string(1);
        word_pos := word_pos + 1;
        my_token.token_string_length := my_token.token_string_length + 1;
      end if;
    end loop;
  end;


  function recognize_no_more_tokens return Token is

  -- test:
  --   not end of string
  --   end of string

  begin
    if not has_current_char then return blank_token;
    else
      return fail_token;
    end if;
  end;


  function recognize_word_char return Token is
  begin
    return recognize_char(WORD);
  end;


  function recognize_word return Token is
  begin
    return recognize_sequence(WORD);
  end;


  function recognize_punctuation return Token is
  begin
    return recognize_char(PUNCTUATION);
  end;


  function recognize_number_char return Token is
  begin
    return recognize_char(NUMBER);
  end;


  function recognize_number return Token is

  -- test:
  --   not a number
  --   2 char number

  my_token : Token;

  begin
    my_token := recognize_sequence(NUMBER);
    if my_token.token_type = FAIL then
      return my_token;
    end if;
    for i in 1..my_token.token_string_length loop
    my_token.token_value := my_token.token_value * 10 +
      character'pos(my_token.token_string(i)) - character'pos('0');
    end loop;
    return my_token;
  end;


  function recognize_operator return Token is

  -- test:
  --   /
  --   */
  --   **/
  --   * at end of string

  my_token : Token;
  begin
    my_token := recognize_char(OPERATOR);
    if my_token.token_string(1) = '*' then
      if has_current_char and then current_char = '*' then
        my_token.token_string(2) := '*';
        my_token.token_string_length := 2;
        inc;
      end if;
    end if;
    return my_token;
  end;


  function recognize_bracket return Token is
  begin
    return recognize_char(BRACKET);
  end;


  function recognize_white_space return Token is
  begin
    return recognize_sequence(WHITE_SPACE);
  end;


  function recognize_other return Token is
  begin
    return recognize_char(OTHER);
  end;


  function recognize_string_token return Token is

  -- test:
  --   word
  --   punctuation
  --   number
  --   operator
  --   bracket
  --   other
  --   none of these

    my_token : Token;
  begin
    my_token := recognize_word;
    if my_token.token_type /= FAIL then
      return my_token;
    end if;

    my_token := recognize_punctuation;
    if my_token.token_type /= FAIL then
      return my_token;
    end if;

    my_token := recognize_number;
    if my_token.token_type /= FAIL then
      return my_token;
    end if;

    my_token := recognize_operator;
    if my_token.token_type /= FAIL then
      return my_token;
    end if;

    my_token := recognize_bracket;
    if my_token.token_type /= FAIL then
      return my_token;
    end if;

    my_token := recognize_other;
    if my_token.token_type /= FAIL then
      return my_token;
    end if;
    return fail_token;
  end;


  function recognize_token_string_token return Token is

  -- test:
  --   NO_MORE_TOKENS preceded by whitespace
  --   string token preceded by whitespace

    my_token : Token;
    RECOGNIZE_FAILURE : exception;

  begin
    my_token := recognize_white_space;
    -- ignore whether there or not

    my_token := recognize_no_more_tokens;
    if my_token.token_type /= FAIL then
      return my_token;
    end if;

    my_token := recognize_string_token;
    if my_token.token_type /= FAIL then
      return my_token;
    end if;
    raise RECOGNIZE_FAILURE;
  end;


  procedure set_string(s : String) is

  -- pre:  s = string to be tokenized
  -- post: in_string has been set up (space-padded)
  --       current_token is set to first token of string

  -- test:
  --   once thru with non-null s

  begin
    in_string_pos := 1;
    for i in s'range loop
      in_string(in_string_pos) := s(i);
      in_string_pos := in_string_pos + 1;
    end loop;
    while in_string_pos <= IN_STRING_SIZE loop
      in_string(in_string_pos) := ' ';
      in_string_pos := in_string_pos + 1;
    end loop;
    in_string_pos := in_string'first;
    current_token := recognize_token_string_token;
  end;


  function has_more_tokens return boolean is

  -- post: returns true iff there are more tokens in the string

  -- test: once thru

  begin
    return current_token.token_type /= NO_MORE_TOKENS;
  end;


  function next_token return Token is

  -- post: returns next token of string, or NO_MORE_TOKENS token if none

  -- test: twice thru

  my_token : Token;

  begin
    my_token := current_token;
    current_token := recognize_token_string_token;
    return my_token;
  end;

end;




  parent reply	other threads:[~2003-02-26 14:06 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-02-24 13:52 Thanks guys..my project and my many problems Paul Gregory
2003-02-24 15:12 ` Preben Randhol
2003-02-24 15:38 ` Hyman Rosen
2003-02-24 18:08   ` Preben Randhol
2003-02-25  2:01     ` Hyman Rosen
2003-02-25  9:46       ` Preben Randhol
2003-02-25 16:07         ` Hyman Rosen
2003-02-24 18:37   ` Simon Wright
2003-02-24 22:55     ` Jano
2003-02-25 17:36       ` Matthew Heaney
2003-02-25 21:56     ` Simon Wright
2003-02-25  8:45   ` Rodrigo García
2003-02-25 17:34   ` Matthew Heaney
2003-02-25 18:03     ` Hyman Rosen
2003-02-26  8:14       ` Preben Randhol
2003-02-25 17:31 ` Matthew Heaney
2003-02-25 19:57   ` chris.danx
2003-02-25 21:17     ` Chad R. Meiners
2003-03-05  9:22       ` chris.danx
2003-02-26 14:06   ` Paul Gregory [this message]
2003-02-26 18:09     ` tmoran
2003-02-27 17:12     ` Update - PLEASE SOMEBODY HELP!!!! Paul Gregory
     [not found]       ` <bot2j-ei3.ln1@beastie.ix.netcom.com>
2003-02-28 10:16         ` Georg Bauhaus
2003-02-28 10:53           ` Paul Gregory
2003-02-28 16:47             ` Simon Wright
replies disabled

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox