comp.lang.ada
 help / color / mirror / Atom feed
From: Martin Krischik <krischik@users.sourceforge.net>
Subject: Re: variable lenght strings
Date: Fri, 22 Oct 2004 09:38:19 +0200
Date: 2004-10-22T09:38:19+02:00	[thread overview]
Message-ID: <1240712.80bnFHAYxs@linux1.krischik.com> (raw)
In-Reply-To: 417842cd$0$74191$39cecf19@news.twtelecom.net

Matthew Heaney wrote:

> 
> "Marius Amado Alves" <amado.alves@netcabo.pt> wrote in message
> news:mailman.46.1098398641.10401.comp.lang.ada@ada-france.org...
>>> 1) Is it possible to use Get_Line with Unbounded and/or Bounded
>>> Strings?
>>
>> Not in the standard, but subprograms like those are usually around, e.g.
>> in the GNAT Library, or end up being written in house.
>>
>>> 2) If not, how should usei input be managed when lines length isn't
>>> known a priori?
>>
>> There's a way using the standard Get_Line, explained in AdaPower.
> 
> Mario is probably referring to an article I posted to CLA a few years'
> ago, and which is now archived at the adapower website.
> 
> The basic idea is this:  algorithms that consume input from a stream need
> a
> way a identify when all of the input has been consumed.  Typically this is
> done using a special value that you know is outside the range of normal
> values, e.g.

Well, you do not check for End_Of_File and that means your solution will
fail if the last line is not terminated with CR/LF. And if you want to
process files which have been edited by human beings than you have to care
for that case 

The following version does work. It hast been tested on hundreds of files
all edited by human beings:

package body
   --
   --  String IO Routienes. This are used because
   --  Ada.Strings.Unbounded.Text_IO and GNAT.IO_Aux both have a suttle
   --  little bug.
   --
   AdaCL.Strings.IO
is
   --
   --  Shorten some names.
   --
   package S_U     renames Ada.Strings.Unbounded;
   package S_Maps  renames Ada.Strings.Maps;
   package Latin_1 renames Ada.Characters.Latin_1;
   package IO      renames Ada.Text_IO;

   --  Buffer length. Works for any non-zero value, larger values take
   --  more stack space, smaller values require more recursion.
   BufferSize : constant := 2000;

   --
   --  Well, there are a lot of Get_Line routines around and GNAT
   --  certanly has its onwn, but all those I have seen have suttle bug:
   --  When the last line is not terminated with CR/LF and a multiple
   --  of buffersize long they will throw and end of file exception.
   --
   --  This version need recursion!
   --
   function Get_Line (
      --  File to be read.
      File : in IO.File_Type)
   return
      String
   is
      --  Trace : AdaCL.Trace.Object := AdaCL.Trace.Function_Trace
(AdaCL.Trace.Entity & ':' & AdaCL.Trace.Source);
      --  pragma Unreferenced (Trace);

      Buffer : String (1 .. BufferSize);
      Last   : Natural;
   begin
      IO.Get_Line (
         File => File,
         Item => Buffer,
         Last => Last);

      if Last < Buffer'Last then
         return Buffer (1 .. Last);
      elsif IO.End_Of_File (File) then
         return Buffer;
      else
         return Buffer & Get_Line (File);
      end if;
   end Get_Line;

   --
   --  Well, there are a lot of Get_Line routines around and GNAT
   --  certanly has its onwn, but all those I have seen have suttle bug:
   --  When the last line is not terminated with CR/LF and a multiple
   --  of buffersize long they will throw and end of file exception.
   --
   --  This version uses a loop.
   --
   function Get_Line (
      --  File to be read.
      File : in IO.File_Type)
   return
      S_U.Unbounded_String
   is
      --  Trace : AdaCL.Trace.Object := AdaCL.Trace.Function_Trace
(AdaCL.Trace.Entity & ':' & AdaCL.Trace.Source);
      --  pragma Unreferenced (Trace);

      Retval : S_U.Unbounded_String := S_U.Null_Unbounded_String;
      Item   : String (1 .. BufferSize);
      Last   : Natural;
   begin
      GetWholeLine :
      loop
         IO.Get_Line (
            File => File,
            Item => Item,
            Last => Last);

         S_U.Append (
            Source   => Retval,
            New_Item => Item (1 .. Last));

         exit GetWholeLine when Last < Item'Last
                      or   IO.End_Of_File (File);

      end loop GetWholeLine;

      return Retval;
   end Get_Line;

   --
   --  Get Next Word.
   --
   procedure Get_Word (
      --  File to be read.
      File : in Ada.Text_IO.File_Type;
      --  String into wich the word is to be read
      Item : out String;
      --  Actual amount of characters read.
      Last : out Natural;
      --  Word Delimiters
      Delimiters : in Ada.Strings.Maps.Character_Set := Word_Delimiters)
   is
      --  Trace : AdaCL.Trace.Object := AdaCL.Trace.Function_Trace
(AdaCL.Trace.Entity & ':' & AdaCL.Trace.Source);
      --  pragma Unreferenced (Trace);

      Next_Char : Character := Latin_1.NUL;
   begin
      Last := Item'First;

      Skip_Blanks :
      loop
         IO.Get (File => File,
               Item => Next_Char);

         --  AdaCL.Trace.Write (Integer'Image (Character'Pos (Next_Char)) &
"'" & String'(1 => Next_Char) & "'");

         exit Skip_Blanks when not S_Maps.Is_In (
                               Element => Next_Char,
                               Set     => Delimiters);
      end loop Skip_Blanks;

      Read_Char :
      loop

         if S_Maps.Is_In (Element => Next_Char,
                     Set     => Delimiters)
         then
            Last := Natural'Pred (Last);

            exit Read_Char;
         end if;

         --  AdaCL.Trace.Write (Integer'Image (Character'Pos (Next_Char)) &
"'" & String'(1 => Next_Char) & "'");

         Item (Last) := Next_Char;

         --  AdaCL.Trace.Write (Item (Item'First .. Last));

         Last := Natural'Succ (Last);

         exit Read_Char when Last = Item'Last;

         IO.Get (File => File,
               Item => Next_Char);

      end loop Read_Char;
   end Get_Word;

   --
   --  Get Next Word.
   --
   --  This version uses recursion! The actual version is garanteed to work
   --  up to words 2000 characters.
   --
   function Get_Word (
      --  File to be read.
      File : in IO.File_Type;
      --  Word Delimiters
      Delimiters : in S_Maps.Character_Set := Word_Delimiters)
   return
      String
   is
      --  Trace : AdaCL.Trace.Object := AdaCL.Trace.Function_Trace
(AdaCL.Trace.Entity & ':' & AdaCL.Trace.Source);
      --  pragma Unreferenced (Trace);

      Buffer : String (1 .. BufferSize);
      Last   : Natural;
   begin
      Get_Word (File       => File,
             Item       => Buffer,
             Last       => Last,
             Delimiters => Delimiters);

      if Last < Buffer'Last then
         return Buffer (1 .. Last);
      elsif IO.End_Of_File (File) then
         return Buffer;
      else
         return Buffer & Get_Word (File, Delimiters);
      end if;
   end Get_Word;

   --
   --  Get Next Word.
   --
   --  This version uses a loop. The actual version is garanteed to work
   --  up to words 2000 characters.
   --
   function Get_Word (
      --  File to be read.
      File : in IO.File_Type;
      --  Word Delimiters
      Delimiters : in Ada.Strings.Maps.Character_Set := Word_Delimiters)
   return
      S_U.Unbounded_String
   is
      --  Trace : AdaCL.Trace.Object := AdaCL.Trace.Function_Trace
(AdaCL.Trace.Entity & ':' & AdaCL.Trace.Source);
      --  pragma Unreferenced (Trace);

      Retval : S_U.Unbounded_String := S_U.Null_Unbounded_String;
      Item   : String (1 .. BufferSize);
      Last   : Natural;
   begin
      GetWholeLine : loop
         Get_Word (File       => File,
                Item       => Item,
                Last       => Last,
                Delimiters => Delimiters);

         S_U.Append (Source   => Retval,
                     New_Item => Item (1 .. Last));

         exit GetWholeLine when Last < Item'Last
                      or   IO.End_Of_File (File);

      end loop GetWholeLine;

      return Retval;
   end Get_Word;

end AdaCL.Strings.IO;

With Regards

Martin
-- 
mailto://krischik@users.sourceforge.net
http://www.ada.krischik.com




  reply	other threads:[~2004-10-22  7:38 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-10-21 17:52 variable lenght strings fabio de francesco
2004-10-21 21:22 ` Martin Dowie
2004-10-21 22:42 ` Marius Amado Alves
2004-10-21 23:14   ` Matthew Heaney
2004-10-22  7:38     ` Martin Krischik [this message]
2004-10-22 12:30     ` fabio de francesco
2004-10-22  7:29   ` Martin Krischik
2004-10-22 13:01     ` Matthew Heaney
2004-10-24 15:46       ` Martin Krischik
2004-10-24 19:54         ` Jeffrey Carter
2004-10-24 21:30           ` Larry Kilgallen
2004-10-25  4:02             ` Jeffrey Carter
2004-10-21 23:01 ` Marius Amado Alves
2004-10-21 23:05 ` Stephen Leake
2004-10-22  7:25 ` Martin Krischik
2004-10-22 11:11   ` Martin Dowie
2004-10-24 15:43     ` Martin Krischik
2004-10-24 17:39       ` Martin Dowie
2004-10-24 18:37       ` Björn Persson
2004-10-25  7:30         ` Martin Krischik
2004-10-26  0:06           ` Randy Brukardt
2004-10-26  1:53             ` Larry Kilgallen
2004-10-26  8:49               ` Martin Krischik
2004-10-26 11:18               ` Marius Amado Alves
2004-10-26 12:48                 ` variable length strings Larry Kilgallen
2004-10-26 16:11                   ` Warren W. Gay VE3WWG
2004-10-26 18:50                     ` Björn Persson
2004-10-26 19:46                       ` Larry Kilgallen
2004-10-26  8:43             ` variable lenght strings Jean-Pierre Rosen
2004-10-26 13:15               ` Martin Krischik
2004-10-26 17:37                 ` Pascal Obry
2004-10-26 18:07                   ` Hyman Rosen
2004-10-26 20:10                     ` Pascal Obry
2004-10-27 10:26                       ` variable length strings Jacob Sparre Andersen
2004-10-27 10:39                         ` Pascal Obry
2004-10-27 11:47                         ` Larry Kilgallen
2004-10-28  7:18                           ` Jacob Sparre Andersen
2004-10-27 11:50                         ` Larry Kilgallen
     [not found]                         ` <uwtxcla1n.fsf@obry.Organization: LJK Software <PTzuwe3GsIg6@eisner.encompasserve.org>
2004-10-27 12:12                           ` Samuel Tardieu
2004-10-27 12:58                             ` Pascal Obry
2004-10-27 13:04                           ` Pascal Obry
2004-10-27 14:54                             ` Dmitry A. Kazakov
2004-10-27 16:38                               ` Pascal Obry
2004-10-28  8:14                                 ` Dmitry A. Kazakov
2004-10-28  8:49                                   ` Pascal Obry
2004-10-28  9:06                                     ` Dmitry A. Kazakov
2004-10-28 16:07                                       ` Pascal Obry
2004-10-28 22:05                                         ` Jeffrey Carter
2004-10-28 22:41                                           ` Randy Brukardt
2004-10-29  1:11                                             ` Jeffrey Carter
2004-10-29  7:42                                         ` Dmitry A. Kazakov
2004-10-28 10:31                         ` Larry Kilgallen
2004-10-26 18:13                   ` variable lenght strings Martin Krischik
2004-10-27 12:01                 ` Jean-Pierre Rosen
2004-10-26 17:46             ` Jeffrey Carter
2004-10-28 22:50               ` Randy Brukardt
2004-10-28 23:01                 ` Larry Kilgallen
2004-10-29 21:52                   ` Randy Brukardt
2004-10-29  8:48                 ` Dale Stanbrough
2004-10-29  9:11                   ` Larry Kilgallen
2004-10-24 18:38     ` Why these "Drop" parameters? (was: variable lenght strings) Björn Persson
2004-10-26  0:13       ` Randy Brukardt
2004-11-01  1:02         ` Why these "Drop" parameters? Björn Persson
2004-11-01 19:59           ` Randy Brukardt
2004-10-24 18:57 ` variable lenght strings Jeffrey Carter
replies disabled

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox