From mboxrd@z Thu Jan 1 00:00:00 1970 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on polar.synack.me X-Spam-Level: X-Spam-Status: No, score=-2.9 required=5.0 tests=BAYES_00,MAILING_LIST_MULTI, WEIRD_QUOTING autolearn=unavailable autolearn_force=no version=3.4.4 X-Google-Language: ENGLISH,ASCII-7-bit X-Google-Thread: 103376,97643ec695b9bf73 X-Google-Attributes: gid103376,public X-Google-ArrivalTime: 2003-12-11 14:48:25 PST Path: archiver1.google.com!news2.google.com!newsfeed2.dallas1.level3.net!news.level3.com!priapus.visi.com!orange.octanews.net!news.octanews.net!news-out.visi.com!petbe.visi.com!skynet.be!freenix!enst.fr!melchior!cuivre.fr.eu.org!melchior.frmug.org!not-for-mail From: "David C. Hoos" Newsgroups: comp.lang.ada Subject: Re: Word counting Date: Thu, 11 Dec 2003 16:45:05 -0600 Organization: Cuivre, Argent, Or Message-ID: References: <4d01ad29.0312111401.32ec5297@posting.google.com> NNTP-Posting-Host: lovelace.ada-france.org Mime-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: 7bit X-Trace: melchior.cuivre.fr.eu.org 1071182728 3771 80.67.180.195 (11 Dec 2003 22:45:28 GMT) X-Complaints-To: usenet@melchior.cuivre.fr.eu.org NNTP-Posting-Date: Thu, 11 Dec 2003 22:45:28 +0000 (UTC) To: "wave" , "comp.lang.ada@ada.eu.org" Return-Path: X-Priority: 3 X-MSMail-Priority: Normal X-Mailer: Microsoft Outlook Express 6.00.2800.1158 X-MimeOLE: Produced By Microsoft MimeOLE V6.00.2800.1165 X-Virus-Scanned: by amavisd-new-20030616-p5 (Debian) at ada-france.org X-BeenThere: comp.lang.ada@ada-france.org X-Mailman-Version: 2.1.3 Precedence: list List-Id: Gateway to the comp.lang.ada Usenet newsgroup List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Xref: archiver1.google.com comp.lang.ada:3399 Date: 2003-12-11T16:45:05-06:00 Here is some code I originally posted On March 8, 2003 which does the word parsing using the facilites of the Ada language standard libraries. The function "Words" returns an array with an element for each word in the line. Each array element contains the first and last indices of each word. This would make determination of the length of each word very easy. package Word_Parser is type Word_Boundaries is record First : Positive; Last : Natural; end record; type Word_Boundaries_Array is array (Positive range <>) of Word_Boundaries; -- Limitation: No more than 1024 words per text string. function Words (Text : String) return Word_Boundaries_Array; end Word_Parser; with Ada.Strings.Fixed; with Ada.Strings.Maps; package body Word_Parser is Whitespace : constant String := ' ' & ASCII.Ht & ASCII.Cr & ASCII.LF; Punctuation : constant String := ",./?<>:;'""[]{}!@#$%^&*()_+|-=\~~"; Delimiters : constant Ada.Strings.Maps.Character_Set := Ada.Strings.Maps.To_Set (Whitespace & Punctuation); ----------- -- Words -- ----------- function Words (Text : String) return Word_Boundaries_Array is Word_Boundaries_List : Word_Boundaries_Array (1 .. 1024); Word_Count : Natural := 0; First : Positive := Text'First; begin loop Ada.Strings.Fixed.Find_Token (Source => Text (First .. Text'Last), Set => Delimiters, Test => Ada.Strings.Outside, First => Word_Boundaries_List (Word_Count + 1).First, Last => Word_Boundaries_List (Word_Count + 1).Last); exit when Word_Boundaries_List (Word_Count + 1).Last = 0; First := Word_Boundaries_List (Word_Count + 1).Last + 1; Word_Count := Word_Count + 1; end loop; return Word_Boundaries_List (1 .. Word_Count); end Words; end Word_Parser; with Ada.Command_Line; with Ada.Text_IO; with Word_Parser; procedure Test_Word_Parser is File : Ada.Text_IO.File_Type; Line : String (1 .. 10240); Last : Natural; use type Ada.Text_IO.Count; begin if Ada.Command_Line.Argument_Count /= 1 then Ada.Text_IO.Put_Line (Ada.Text_IO.Standard_Error, "USAGE: " & Ada.Command_Line.Command_Name & " "); Ada.Command_Line.Set_Exit_Status (0); return; end if; Ada.Text_IO.Open (File => File, Name => Ada.Command_Line.Argument (1), Mode => Ada.Text_IO.In_File); while not Ada.Text_IO.End_Of_File (File) loop Ada.Text_IO.Get_Line (Item => Line, File => File, Last => Last); declare Word_Boundary_List : constant Word_Parser.Word_Boundaries_Array := Word_Parser.Words (Line (Line'First .. Last)); begin Ada.Text_IO.Put_Line ("Words in line" & Ada.Text_IO.Count'Image (Ada.Text_IO.Line (File) - 1)); for W in Word_Boundary_List'Range loop Ada.Text_IO.Put_Line ("""" & Line (Word_Boundary_List (W).First .. Word_Boundary_List (W).Last) & """"); end loop; end; end loop; end Test_Word_Parser; ----- Original Message ----- From: "wave" Newsgroups: comp.lang.ada To: Sent: Thursday, December 11, 2003 4:01 PM Subject: Word counting