From mboxrd@z Thu Jan 1 00:00:00 1970 X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on polar.synack.me X-Spam-Level: X-Spam-Status: No, score=-2.9 required=5.0 tests=BAYES_00,FREEMAIL_FROM, MAILING_LIST_MULTI autolearn=unavailable autolearn_force=no version=3.4.4 X-Google-Thread: 103376,8de7eedad50552f1 X-Google-Attributes: gid103376,public X-Google-Language: ENGLISH,ASCII-7-bit Path: g2news1.google.com!news4.google.com!news.glorb.com!easynet-quince!easynet.net!oleane.net!oleane!freenix!enst.fr!melchior!cuivre.fr.eu.org!melchior.frmug.org!not-for-mail From: Marius Amado Alves Newsgroups: comp.lang.ada Subject: Ada bench : word frequency Date: Wed, 23 Mar 2005 20:39:16 +0000 Organization: Cuivre, Argent, Or Message-ID: References: NNTP-Posting-Host: lovelace.ada-france.org Mime-Version: 1.0 (Apple Message framework v619.2) Content-Type: text/plain; charset=US-ASCII; format=flowed Content-Transfer-Encoding: 7bit X-Trace: melchior.cuivre.fr.eu.org 1111610392 61533 212.85.156.195 (23 Mar 2005 20:39:52 GMT) X-Complaints-To: usenet@melchior.cuivre.fr.eu.org NNTP-Posting-Date: Wed, 23 Mar 2005 20:39:52 +0000 (UTC) To: comp.lang.ada@ada-france.org Return-Path: In-Reply-To: X-Mailer: Apple Mail (2.619.2) X-Virus-Scanned: by amavisd-new-20030616-p10 (Debian) at ada-france.org X-BeenThere: comp.lang.ada@ada-france.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: "Gateway to the comp.lang.ada Usenet newsgroup" List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Xref: g2news1.google.com comp.lang.ada:9828 Date: 2005-03-23T20:39:16+00:00 Here's a shot at the word frequency benchmark. By my calculations, it is as fast as the GCC C benchmark. I did not compare the output with the reference output. A line-by-line comparison requires a criterion for words with the same frequency, which AFAICT is not documented. Also, my current concept of word separator does not include punctuation marks. Again, I could not find a reference definition. The program does not use any external data structures library. It includes its own specific structures, an n-ary tree keyed by word form, and a binary tree keyed by word frequency. It increments the counts in the n-ary tree, then traverses the n-ary tree to populate the binary tree (to order by count), then dumps the latter. with Ada.Characters.Handling; with Ada.Characters.Latin_1; with Ada.Command_Line; with Ada.Streams; with Ada.Streams.Stream_IO; with Ada.Strings.Fixed; with Ada.Text_IO; with Ada.Text_IO.Text_Streams; procedure Word_Frequency is use Ada.Characters.Handling; use Ada.Characters.Latin_1; use Ada.Command_Line; use Ada.Streams; use Ada.Streams.Stream_IO; use Ada.Text_IO; use Ada.Text_IO.Text_Streams; Buffer : Stream_Element_Array (1 .. 4096); Input_Stream : Ada.Text_IO.Text_Streams.Stream_Access := Ada.Text_IO.Text_Streams.Stream (Current_Input); N : Stream_Element_Offset; Is_Separator : array (Stream_Element) of Boolean := (0 .. 32 | 127 .. 159 => True, others => False); -- N-ary tree of word counts -- used to increment the counts in one pass of the input file -- branches on the letter -- carries the count -- very fast -- but very space consuming subtype Letter is Stream_Element range 0 .. 255; type Word is array (Positive range <>) of Letter; type Tree; type Tree_Ptr is access Tree; type Node is record Count : Natural := 0; Subtree : Tree_Ptr := null; end record; type Tree is array (Letter) of Node; procedure Inc (X : in out Integer) is begin X := X + 1; end; procedure Dec (X : in out Integer) is begin X := X - 1; end; procedure Inc_Word (Parent : Tree_Ptr; Descendents : Word) is begin if Descendents'Length > 0 then declare Child_Index : Positive := Descendents'First; Child : Letter renames Descendents (Child_Index); begin if Descendents'Length = 1 then Inc (Parent (Child).Count); else if Parent (Child).Subtree = null then Parent (Child).Subtree := new Tree; end if; Inc_Word (Parent (Child).Subtree, Descendents (Child_Index + 1 .. Descendents'Last)); end if; end; end if; end; -- Binary tree of word counts -- used for sorting the result by the count (frequency) -- branches on the word count -- carries the word form type Form_Ptr is access Word; type Binary_Tree; type Binary_Tree_Ptr is access Binary_Tree; type Binary_Tree is record Form : Form_Ptr; Count : Natural; Left, Right : Binary_Tree_Ptr; end record; procedure Add_Node (Parent : in out Binary_Tree_Ptr; Form : Form_Ptr; Count : Natural) is begin if Parent = null then Parent := new Binary_Tree; Parent.Form := Form; Parent.Count := Count; else if Count < Parent.Count then Add_Node (Parent.Left, Form, Count); else Add_Node (Parent.Right, Form, Count); end if; end if; end; -- end of binary tree primitives Root : Tree_Ptr := new Tree; Btree : Binary_Tree_Ptr := null; Current_Word : Word (1 .. 1000); Current_Word_Length : Natural range 0 .. Current_Word'Last := 0; In_Word : Boolean := False; procedure Append_To_Word (E : Stream_Element) is begin Inc (Current_Word_Length); Current_Word (Current_Word_Length) := E; In_Word := True; end; procedure End_Word is begin if Current_Word_Length > 0 then Inc_Word (Root, Current_Word (1 .. Current_Word_Length)); end if; Current_Word_Length := 0; In_Word := False; end; procedure Process (S : Stream_Element_Array) is begin for I in S'Range loop if Is_Separator (S (I)) then if In_Word then End_Word; end if; else Append_To_Word (S (I)); end if; end loop; end; procedure Populate_Btree (Ntree : Tree_Ptr) is begin Inc (Current_Word_Length); for I in Letter'Range loop Current_Word (Current_Word_Length) := I; if Ntree (I).Count > 0 then Add_Node (Btree, Form => new Word'(Current_Word (1 .. Current_Word_Length)), Count => Ntree (I).Count); end if; if Ntree (I).Subtree /= null then Populate_Btree (Ntree (I).Subtree); end if; end loop; Dec (Current_Word_Length); end; procedure Populate_Btree is begin Current_Word_Length := 0; Populate_Btree (Root); end; function To_String (X : Form_Ptr) return String is S : String (X'Range); begin for I in X'Range loop S (I) := Character'Val (X (I)); end loop; return S; end; procedure Dump_Btree (X : Binary_Tree_Ptr := Btree) is begin if X /= null then Dump_Btree (X.Right); Ada.Text_IO.Put_Line (To_String (X.Form) & Natural'Image (X.Count)); Dump_Btree (X.Left); end if; end; begin loop Read (Root_Stream_Type'Class (Input_Stream.all), Buffer, N); Process (Buffer (1 .. N)); exit when N < Buffer'Length; end loop; if In_Word then End_Word; end if; Populate_Btree; Dump_Btree; end;