From mboxrd@z Thu Jan 1 00:00:00 1970 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on polar.synack.me X-Spam-Level: X-Spam-Status: No, score=-2.9 required=5.0 tests=BAYES_00,FREEMAIL_FROM, MAILING_LIST_MULTI autolearn=unavailable autolearn_force=no version=3.4.4 X-Google-Thread: 103376,8de7eedad50552f1 X-Google-Attributes: gid103376,public X-Google-Language: ENGLISH,ASCII-7-bit Path: g2news1.google.com!news1.google.com!proxad.net!usenet-fr.net!enst.fr!melchior!cuivre.fr.eu.org!melchior.frmug.org!not-for-mail From: Marius Amado Alves Newsgroups: comp.lang.ada Subject: Re: Ada bench : word frequency Date: Thu, 24 Mar 2005 01:24:55 +0000 Organization: Cuivre, Argent, Or Message-ID: References: <1111613194.164413.310450@l41g2000cwc.googlegroups.com> NNTP-Posting-Host: lovelace.ada-france.org Mime-Version: 1.0 (Apple Message framework v619.2) Content-Type: text/plain; charset=US-ASCII; format=flowed Content-Transfer-Encoding: 7bit X-Trace: melchior.cuivre.fr.eu.org 1111627530 6363 212.85.156.195 (24 Mar 2005 01:25:30 GMT) X-Complaints-To: usenet@melchior.cuivre.fr.eu.org NNTP-Posting-Date: Thu, 24 Mar 2005 01:25:30 +0000 (UTC) To: comp.lang.ada@ada-france.org Return-Path: In-Reply-To: <1111613194.164413.310450@l41g2000cwc.googlegroups.com> X-Mailer: Apple Mail (2.619.2) X-OriginalArrivalTime: 24 Mar 2005 01:24:57.0175 (UTC) FILETIME=[4A292E70:01C53010] X-Virus-Scanned: by amavisd-new-20030616-p10 (Debian) at ada-france.org X-BeenThere: comp.lang.ada@ada-france.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "Gateway to the comp.lang.ada Usenet newsgroup" List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Xref: g2news1.google.com comp.lang.ada:9858 Date: 2005-03-24T01:24:55+00:00 Program fixed. Zero differences with the reference result. It's two times slower than the GCC C benchmark :-( (CPU times (user+sys) on my iBook for n=25 repetitions of the input file: C => 0.75, Ada => 1.45) However that's already enough to put Ada on the 7th place, after OCaml and before Eiffel :-) Program follows. with Ada.Characters.Handling; with Ada.Characters.Latin_1; with Ada.Command_Line; with Ada.Streams; with Ada.Streams.Stream_IO; with Ada.Strings.Fixed; with Ada.Text_IO; with Ada.Text_IO.Text_Streams; procedure Word_Frequency is use Ada.Characters.Handling; use Ada.Characters.Latin_1; use Ada.Command_Line; use Ada.Streams; use Ada.Streams.Stream_IO; use Ada.Text_IO; use Ada.Text_IO.Text_Streams; Buffer : Stream_Element_Array (1 .. 4096); Input_Stream : Ada.Text_IO.Text_Streams.Stream_Access := Ada.Text_IO.Text_Streams.Stream (Current_Input); N : Stream_Element_Offset; Is_Separator : array (Stream_Element) of Boolean := (Character'Pos ('A') .. Character'Pos ('Z') | Character'Pos ('a') .. Character'Pos ('z') => False, others => True); -- N-ary tree of word counts -- used to increment the counts in one pass of the input file -- branches on the letter -- carries the count -- very fast -- but very space consuming subtype Letter is Stream_Element range Character'Pos ('a') .. Character'Pos ('z'); type Word is array (Positive range <>) of Letter; type Tree; type Tree_Ptr is access Tree; type Node is record Count : Natural := 0; Subtree : Tree_Ptr := null; end record; type Tree is array (Letter) of Node; procedure Inc (X : in out Integer) is begin X := X + 1; end; procedure Dec (X : in out Integer) is begin X := X - 1; end; procedure Inc_Word (Parent : Tree_Ptr; Descendents : Word) is begin if Descendents'Length > 0 then declare Child_Index : Positive := Descendents'First; Child : Letter renames Descendents (Child_Index); begin if Descendents'Length = 1 then Inc (Parent (Child).Count); else if Parent (Child).Subtree = null then Parent (Child).Subtree := new Tree; end if; Inc_Word (Parent (Child).Subtree, Descendents (Child_Index + 1 .. Descendents'Last)); end if; end; end if; end; -- Binary tree of word counts -- used for sorting the result by the count (frequency) -- branches on the word count -- carries the word form type Form_Ptr is access Word; type Binary_Tree; type Binary_Tree_Ptr is access Binary_Tree; type Binary_Tree is record Form : Form_Ptr; Count : Natural; Left, Right : Binary_Tree_Ptr; end record; procedure Add_Node (Parent : in out Binary_Tree_Ptr; Form : Form_Ptr; Count : Natural) is begin if Parent = null then Parent := new Binary_Tree; Parent.Form := Form; Parent.Count := Count; else if Count < Parent.Count then Add_Node (Parent.Left, Form, Count); else Add_Node (Parent.Right, Form, Count); end if; end if; end; -- end of binary tree primitives Root : Tree_Ptr := new Tree; Btree : Binary_Tree_Ptr := null; Current_Word : Word (1 .. 1000); Current_Word_Length : Natural range 0 .. Current_Word'Last := 0; In_Word : Boolean := False; procedure Append_To_Word (E : Letter) is begin Inc (Current_Word_Length); Current_Word (Current_Word_Length) := E; In_Word := True; end; procedure End_Word is begin if Current_Word_Length > 0 then Inc_Word (Root, Current_Word (1 .. Current_Word_Length)); end if; Current_Word_Length := 0; In_Word := False; end; To_Lower : array (Stream_Element) of Letter; procedure Initialise_To_Lower_Map is D : Integer := Character'Pos ('a') - Character'Pos ('A'); begin for I in Character'Pos ('a') .. Character'Pos ('z') loop To_Lower (Stream_Element (I)) := Letter (I); To_Lower (Stream_Element (I - D)) := Letter (I); end loop; end; procedure Process (S : Stream_Element_Array) is begin for I in S'Range loop if Is_Separator (S (I)) then if In_Word then End_Word; end if; else Append_To_Word (To_Lower (S (I))); end if; end loop; end; procedure Populate_Btree (Ntree : Tree_Ptr) is begin Inc (Current_Word_Length); for I in Letter'Range loop Current_Word (Current_Word_Length) := I; if Ntree (I).Count > 0 then Add_Node (Btree, Form => new Word'(Current_Word (1 .. Current_Word_Length)), Count => Ntree (I).Count); end if; if Ntree (I).Subtree /= null then Populate_Btree (Ntree (I).Subtree); end if; end loop; Dec (Current_Word_Length); end; procedure Populate_Btree is begin Current_Word_Length := 0; Populate_Btree (Root); end; function To_String (X : Form_Ptr) return String is S : String (X'Range); begin for I in X'Range loop S (I) := Character'Val (X (I)); end loop; return S; end; subtype String7 is String (1 .. 7); function Img7 (X : Natural) return String7 is S : String := Natural'Image (X); begin return String' (1 .. 8 - S'Length => ' ') & S (2 .. S'Last); end; procedure Dump_Btree (X : Binary_Tree_Ptr := Btree) is begin if X /= null then Dump_Btree (X.Right); Ada.Text_IO.Put_Line (Img7 (X.Count) & " " & To_String (X.Form)); Dump_Btree (X.Left); end if; end; begin Initialise_To_Lower_Map; loop Read (Root_Stream_Type'Class (Input_Stream.all), Buffer, N); Process (Buffer (1 .. N)); exit when N < Buffer'Length; end loop; if In_Word then End_Word; end if; Populate_Btree; Dump_Btree; end;