From: Paul Gregory <pg16@bton.ac.uk>
Subject: Re: Thanks guys..my project and my many problems
Date: Wed, 26 Feb 2003 14:06:53 +0000
Date: 2003-02-26T14:01:38+00:00 [thread overview]
Message-ID: <3E5CC9FC.F1D134FC@bton.ac.uk> (raw)
In-Reply-To: 1ec946d1.0302250931.2fe67884@posting.google.com
Thanks a million Matthew and everybody else...however I went to see my tutor today and he
refused me permission to use your package. Apparantly I MUST use a package that the
University supplies called "string_tokenizer" to read the tokens in my array when
translation...so It's back for the drawing board for me unfortunately :-(
---->
----------------------------------------------------------------------------------------
package body String_tokenizer is
-- Uses recursive descent.
-- Each syntactic token has a recognizer, which returns its token iff
-- it is recognized at the current position in the input string.
-- If a recognizer recognizes its token, it consumes the corresponding
-- portion of the input string; if not, it leaves the string for others
-- to attempt to recognize.
-- Higher level syntactic recognizers call lower level recognizers,
-- possibly recursively.
-- Token string syntax
----------------------
-- NO_MORE_TOKENS ::= end of input string
-- WORD_CHAR ::= A..Z | a..z
-- WORD ::= WORD_CHAR | WORD_CHAR WORD
-- PUNCTUATION ::= . | , | : | ; | ' | " | `
-- NUMBER_CHAR ::= 0..9
-- NUMBER ::= NUMBER_CHAR | NUMBER_CHAR NUMBER
-- OPERATOR ::= + | - | * | / | **
-- BRACKET ::= ( | ) | { | } | [ | }
-- WHITE_SPACE ::= space_character | space_character WHITE_SPACE
-- OTHER ::= characters other than those mentioned above
-- TOKEN ::= NO_MORE_TOKENS | WORD | PUNCTUATION | NUMBER |
-- OPERATOR | BRACKET | OTHER
-- STRING_TOKEN ::= WORD | PUNCTUATION | NUMBER | OPERATOR | BRACKET |
-- OTHER
-- NON_WHITE_SPACE_TOKEN_STRING ::= NO_MORE_TOKENS | STRING_TOKEN TOKEN_STRING
-- TOKEN_STRING ::= NON_WHITE_SPACE_TOKEN_STRING |
-- WHITE_SPACE NON_WHITE_SPACE_TOKEN_STRING
WHITE_SPACE : constant integer := 7; -- needed internally, not for client
FAIL : constant integer := 8; -- needed internally, not for client
function char_type(c : Character) return Integer is
-- post: returns type code for character c
-- test:
-- space
-- all punctuation characters
-- brackets
-- a,z,A,Z
-- 0,9
-- all operators
-- other character
char_type_table : array (Character range' '..'~') of Integer :=
(WHITE_SPACE,PUNCTUATION,PUNCTUATION,OTHER,OTHER,OTHER,OTHER,PUNCTUATION, -- '
'..'''
BRACKET,BRACKET,OPERATOR,OPERATOR,PUNCTUATION,OPERATOR,PUNCTUATION,OPERATOR, --
'('..'/'
NUMBER,NUMBER,NUMBER,NUMBER,NUMBER,NUMBER,NUMBER,NUMBER, --
'0'..'7'
NUMBER,NUMBER,PUNCTUATION,PUNCTUATION,OPERATOR,OPERATOR,OPERATOR,PUNCTUATION, --
'8'..'?'
OTHER,WORD,WORD,WORD,WORD,WORD,WORD,WORD, --
'@'..'G'
WORD,WORD,WORD,WORD,WORD,WORD,WORD,WORD, --
'H'..'O'
WORD,WORD,WORD,WORD,WORD,WORD,WORD,WORD, --
'P'..'W'
WORD,WORD,WORD,BRACKET,OTHER,BRACKET,OTHER,OTHER, --
'X'..'_'
PUNCTUATION,WORD,WORD,WORD,WORD,WORD,WORD,WORD, --
'`'..'g'
WORD,WORD,WORD,WORD,WORD,WORD,WORD,WORD, --
'h'..'o'
WORD,WORD,WORD,WORD,WORD,WORD,WORD,WORD, --
'p'..'w'
WORD,WORD,WORD,BRACKET,OTHER,BRACKET,OTHER); --
'x'..'~'
begin
if c not in ' '..'~' then return OTHER; end if;
return char_type_table(c);
end;
IN_STRING_SIZE : constant integer := 1000;
in_string : String(1..IN_STRING_SIZE);
in_string_pos : integer := IN_STRING_SIZE;
blank_token : constant Token := (" ",0,0,NO_MORE_TOKENS);
fail_token : constant Token := (" ",0,0,FAIL);
current_token : Token := blank_token;
-- methods to access current character and move to next character
-- in input string
function has_current_char return Boolean is
--test:
-- just before end of string
-- end of string
begin
return in_string_pos <= IN_STRING_SIZE;
end;
function current_char return Character is
-- pre: has_current_char
-- post: returns current char
-- test: once thru
begin
return in_string(in_string_pos);
end;
procedure inc is
-- test : once thru
begin
in_string_pos := in_string_pos + 1;
end;
-- recognizer methods
---------------------
function recognize_char(i : Integer) return Token is
-- pre: i = character type to be recognized
-- post: returns character token if recognized, fail token if not
-- test:
-- end of string
-- current char is required char
-- current char is not required char
my_token : Token;
begin
if not has_current_char then return fail_token; end if;
if char_type (current_char) = i then
my_token := (current_char & " ",1,0,i);
inc;
return my_token;
else
return fail_token;
end if;
end;
function recognize_sequence(i : Integer) return Token is
-- pre: i = char type of sequence to be recognized
-- post: returns token for recognized sequence,
-- or fail token if not found
-- test:
-- not a recognized sequence
-- sequence of 1 char
-- sequence of 3 chars
my_token : Token;
next_Token : Token;
word_pos :Integer;
begin
my_token := recognize_char(i);
if my_token.token_type = FAIL then
return my_token;
end if;
word_pos := 2; -- start position for rest of word
loop
next_Token := recognize_char(i);
if next_token.token_type = FAIL then
return my_token;
end if;
if word_pos <= TOKEN_STRING_SIZE then
my_token.token_string(word_pos) := next_Token.token_string(1);
word_pos := word_pos + 1;
my_token.token_string_length := my_token.token_string_length + 1;
end if;
end loop;
end;
function recognize_no_more_tokens return Token is
-- test:
-- not end of string
-- end of string
begin
if not has_current_char then return blank_token;
else
return fail_token;
end if;
end;
function recognize_word_char return Token is
begin
return recognize_char(WORD);
end;
function recognize_word return Token is
begin
return recognize_sequence(WORD);
end;
function recognize_punctuation return Token is
begin
return recognize_char(PUNCTUATION);
end;
function recognize_number_char return Token is
begin
return recognize_char(NUMBER);
end;
function recognize_number return Token is
-- test:
-- not a number
-- 2 char number
my_token : Token;
begin
my_token := recognize_sequence(NUMBER);
if my_token.token_type = FAIL then
return my_token;
end if;
for i in 1..my_token.token_string_length loop
my_token.token_value := my_token.token_value * 10 +
character'pos(my_token.token_string(i)) - character'pos('0');
end loop;
return my_token;
end;
function recognize_operator return Token is
-- test:
-- /
-- */
-- **/
-- * at end of string
my_token : Token;
begin
my_token := recognize_char(OPERATOR);
if my_token.token_string(1) = '*' then
if has_current_char and then current_char = '*' then
my_token.token_string(2) := '*';
my_token.token_string_length := 2;
inc;
end if;
end if;
return my_token;
end;
function recognize_bracket return Token is
begin
return recognize_char(BRACKET);
end;
function recognize_white_space return Token is
begin
return recognize_sequence(WHITE_SPACE);
end;
function recognize_other return Token is
begin
return recognize_char(OTHER);
end;
function recognize_string_token return Token is
-- test:
-- word
-- punctuation
-- number
-- operator
-- bracket
-- other
-- none of these
my_token : Token;
begin
my_token := recognize_word;
if my_token.token_type /= FAIL then
return my_token;
end if;
my_token := recognize_punctuation;
if my_token.token_type /= FAIL then
return my_token;
end if;
my_token := recognize_number;
if my_token.token_type /= FAIL then
return my_token;
end if;
my_token := recognize_operator;
if my_token.token_type /= FAIL then
return my_token;
end if;
my_token := recognize_bracket;
if my_token.token_type /= FAIL then
return my_token;
end if;
my_token := recognize_other;
if my_token.token_type /= FAIL then
return my_token;
end if;
return fail_token;
end;
function recognize_token_string_token return Token is
-- test:
-- NO_MORE_TOKENS preceded by whitespace
-- string token preceded by whitespace
my_token : Token;
RECOGNIZE_FAILURE : exception;
begin
my_token := recognize_white_space;
-- ignore whether there or not
my_token := recognize_no_more_tokens;
if my_token.token_type /= FAIL then
return my_token;
end if;
my_token := recognize_string_token;
if my_token.token_type /= FAIL then
return my_token;
end if;
raise RECOGNIZE_FAILURE;
end;
procedure set_string(s : String) is
-- pre: s = string to be tokenized
-- post: in_string has been set up (space-padded)
-- current_token is set to first token of string
-- test:
-- once thru with non-null s
begin
in_string_pos := 1;
for i in s'range loop
in_string(in_string_pos) := s(i);
in_string_pos := in_string_pos + 1;
end loop;
while in_string_pos <= IN_STRING_SIZE loop
in_string(in_string_pos) := ' ';
in_string_pos := in_string_pos + 1;
end loop;
in_string_pos := in_string'first;
current_token := recognize_token_string_token;
end;
function has_more_tokens return boolean is
-- post: returns true iff there are more tokens in the string
-- test: once thru
begin
return current_token.token_type /= NO_MORE_TOKENS;
end;
function next_token return Token is
-- post: returns next token of string, or NO_MORE_TOKENS token if none
-- test: twice thru
my_token : Token;
begin
my_token := current_token;
current_token := recognize_token_string_token;
return my_token;
end;
end;
next prev parent reply other threads:[~2003-02-26 14:06 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-02-24 13:52 Thanks guys..my project and my many problems Paul Gregory
2003-02-24 15:12 ` Preben Randhol
2003-02-24 15:38 ` Hyman Rosen
2003-02-24 18:08 ` Preben Randhol
2003-02-25 2:01 ` Hyman Rosen
2003-02-25 9:46 ` Preben Randhol
2003-02-25 16:07 ` Hyman Rosen
2003-02-24 18:37 ` Simon Wright
2003-02-24 22:55 ` Jano
2003-02-25 17:36 ` Matthew Heaney
2003-02-25 21:56 ` Simon Wright
2003-02-25 8:45 ` Rodrigo García
2003-02-25 17:34 ` Matthew Heaney
2003-02-25 18:03 ` Hyman Rosen
2003-02-26 8:14 ` Preben Randhol
2003-02-25 17:31 ` Matthew Heaney
2003-02-25 19:57 ` chris.danx
2003-02-25 21:17 ` Chad R. Meiners
2003-03-05 9:22 ` chris.danx
2003-02-26 14:06 ` Paul Gregory [this message]
2003-02-26 18:09 ` tmoran
2003-02-27 17:12 ` Update - PLEASE SOMEBODY HELP!!!! Paul Gregory
[not found] ` <bot2j-ei3.ln1@beastie.ix.netcom.com>
2003-02-28 10:16 ` Georg Bauhaus
2003-02-28 10:53 ` Paul Gregory
2003-02-28 16:47 ` Simon Wright
replies disabled
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox