comp.lang.ada
 help / color / mirror / Atom feed
From: Marius Amado Alves <amado.alves@netcabo.pt>
To: comp.lang.ada@ada-france.org
Subject: Re: Ada bench : word frequency
Date: Thu, 24 Mar 2005 01:24:55 +0000
Date: 2005-03-24T01:24:55+00:00	[thread overview]
Message-ID: <mailman.60.1111627528.23655.comp.lang.ada@ada-france.org> (raw)
In-Reply-To: <1111613194.164413.310450@l41g2000cwc.googlegroups.com>

Program fixed. Zero differences with the reference result.

It's two times slower than the GCC C benchmark :-(

(CPU times (user+sys) on my iBook for n=25 repetitions of the input 
file: C => 0.75, Ada => 1.45)

However that's already enough to put Ada on the 7th place, after OCaml 
and before Eiffel :-)

Program follows.

with Ada.Characters.Handling;
with Ada.Characters.Latin_1;
with Ada.Command_Line;
with Ada.Streams;
with Ada.Streams.Stream_IO;
with Ada.Strings.Fixed;
with Ada.Text_IO;
with Ada.Text_IO.Text_Streams;

procedure Word_Frequency is

    use Ada.Characters.Handling;
    use Ada.Characters.Latin_1;
    use Ada.Command_Line;
    use Ada.Streams;
    use Ada.Streams.Stream_IO;
    use Ada.Text_IO;
    use Ada.Text_IO.Text_Streams;

    Buffer : Stream_Element_Array (1 .. 4096);
    Input_Stream : Ada.Text_IO.Text_Streams.Stream_Access
      := Ada.Text_IO.Text_Streams.Stream (Current_Input);
    N : Stream_Element_Offset;
    Is_Separator : array (Stream_Element) of Boolean :=
      (Character'Pos ('A') .. Character'Pos ('Z') |
       Character'Pos ('a') .. Character'Pos ('z') => False,
       others => True);

    -- N-ary tree of word counts
    -- used to increment the counts in one pass of the input file
    -- branches on the letter
    -- carries the count
    -- very fast
    -- but very space consuming

    subtype Letter is Stream_Element range
      Character'Pos ('a') .. Character'Pos ('z');
    type Word is array (Positive range <>) of Letter;
    type Tree;
    type Tree_Ptr is access Tree;
    type Node is
       record
          Count : Natural := 0;
          Subtree : Tree_Ptr := null;
       end record;
    type Tree is array (Letter) of Node;

    procedure Inc (X : in out Integer) is begin X := X + 1; end;
    procedure Dec (X : in out Integer) is begin X := X - 1; end;

    procedure Inc_Word (Parent : Tree_Ptr; Descendents : Word) is
    begin
       if Descendents'Length > 0 then
          declare
             Child_Index : Positive := Descendents'First;
             Child : Letter renames Descendents (Child_Index);
          begin
             if Descendents'Length = 1 then
                Inc (Parent (Child).Count);
             else
                if Parent (Child).Subtree = null then
                   Parent (Child).Subtree := new Tree;
                end if;
                Inc_Word
                  (Parent (Child).Subtree,
                   Descendents (Child_Index + 1 .. Descendents'Last));
             end if;
          end;
       end if;
    end;

    -- Binary tree of word counts
    -- used for sorting the result by the count (frequency)
    -- branches on the word count
    -- carries the word form

    type Form_Ptr is access Word;
    type Binary_Tree;
    type Binary_Tree_Ptr is access Binary_Tree;
    type Binary_Tree is
       record
          Form : Form_Ptr;
          Count : Natural;
          Left, Right : Binary_Tree_Ptr;
       end record;

    procedure Add_Node (Parent : in out Binary_Tree_Ptr; Form : 
Form_Ptr; Count : Natural) is
    begin
       if Parent = null then
          Parent := new Binary_Tree;
          Parent.Form := Form;
          Parent.Count := Count;
       else
          if Count < Parent.Count then
             Add_Node (Parent.Left, Form, Count);
          else
             Add_Node (Parent.Right, Form, Count);
          end if;
       end if;
    end;

    -- end of binary tree primitives

    Root : Tree_Ptr := new Tree;
    Btree : Binary_Tree_Ptr := null;
    Current_Word : Word (1 .. 1000);
    Current_Word_Length : Natural range 0 .. Current_Word'Last := 0;
    In_Word : Boolean := False;

    procedure Append_To_Word (E : Letter) is
    begin
       Inc (Current_Word_Length);
       Current_Word (Current_Word_Length) := E;
       In_Word := True;
    end;

    procedure End_Word is
    begin
       if Current_Word_Length > 0 then
          Inc_Word (Root, Current_Word (1 .. Current_Word_Length));
       end if;
       Current_Word_Length := 0;
       In_Word := False;
    end;

    To_Lower : array (Stream_Element) of Letter;

    procedure Initialise_To_Lower_Map is
       D : Integer := Character'Pos ('a') - Character'Pos ('A');
    begin
       for I in Character'Pos ('a') .. Character'Pos ('z') loop
          To_Lower (Stream_Element (I)) := Letter (I);
          To_Lower (Stream_Element (I - D)) := Letter (I);
       end loop;
    end;

    procedure Process (S : Stream_Element_Array) is
    begin
       for I in S'Range loop
          if Is_Separator (S (I)) then
             if In_Word then End_Word; end if;
          else
             Append_To_Word (To_Lower (S (I)));
          end if;
       end loop;
    end;

    procedure Populate_Btree (Ntree : Tree_Ptr) is
    begin
       Inc (Current_Word_Length);
       for I in Letter'Range loop
          Current_Word (Current_Word_Length) := I;
          if Ntree (I).Count > 0 then
             Add_Node
                (Btree,
                 Form => new Word'(Current_Word (1 .. 
Current_Word_Length)),
                 Count => Ntree (I).Count);
          end if;
          if Ntree (I).Subtree /= null then
             Populate_Btree (Ntree (I).Subtree);
          end if;
       end loop;
       Dec (Current_Word_Length);
    end;

    procedure Populate_Btree is
    begin
       Current_Word_Length := 0;
       Populate_Btree (Root);
    end;

    function To_String (X : Form_Ptr) return String is
       S : String (X'Range);
    begin
       for I in X'Range loop
          S (I) := Character'Val (X (I));
       end loop;
       return S;
    end;

    subtype String7 is String (1 .. 7);

    function Img7 (X : Natural) return String7 is
       S : String := Natural'Image (X);
    begin
       return String' (1 .. 8 - S'Length => ' ') & S (2 .. S'Last);
    end;

    procedure Dump_Btree (X : Binary_Tree_Ptr := Btree) is
    begin
       if X /= null then
          Dump_Btree (X.Right);
          Ada.Text_IO.Put_Line
            (Img7 (X.Count) & " " & To_String (X.Form));
          Dump_Btree (X.Left);
       end if;
    end;

begin
    Initialise_To_Lower_Map;
    loop
       Read (Root_Stream_Type'Class (Input_Stream.all), Buffer, N);
       Process (Buffer (1 .. N));
       exit when N < Buffer'Length;
    end loop;
    if In_Word then End_Word; end if;
    Populate_Btree;
    Dump_Btree;
end;




  reply	other threads:[~2005-03-24  1:24 UTC|newest]

Thread overview: 122+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-03-19 16:22 Ada bench Pascal Obry
2005-03-19 16:55 ` Dr. Adrian Wrigley
2005-03-19 21:32   ` Michael Bode
2005-03-20  9:20     ` Pascal Obry
2005-03-20  9:39       ` Michael Bode
2005-03-20 11:16         ` Pascal Obry
2005-03-20 12:20           ` Dmitry A. Kazakov
2005-03-20 12:32             ` Pascal Obry
2005-03-21  1:42             ` (see below)
2005-03-21  2:24               ` (see below)
2005-03-21 15:00                 ` (see below)
2005-03-21  3:54               ` Ed Falis
2005-03-21 12:20                 ` Jeff C
2005-03-21 15:18                 ` (see below)
2005-03-21 15:24                   ` (see below)
2005-03-21 18:56                   ` Isaac Gouy
2005-03-21 21:31                 ` Randy Brukardt
2005-03-21 22:14                   ` Ed Falis
2005-03-21 18:07               ` Pascal Obry
2005-03-20 10:11       ` Adrian Knoth
2005-03-20 10:30         ` Michael Bode
2005-03-21 23:27       ` Georg Bauhaus
2005-03-22  1:16         ` Ada bench : count words Marius Amado Alves
2005-03-22 10:59           ` Dmitry A. Kazakov
2005-03-22 11:57             ` Marius Amado Alves
2005-03-22 12:17               ` Dmitry A. Kazakov
2005-03-22 12:47                 ` Marius Amado Alves
2005-03-22 13:08                   ` Dmitry A. Kazakov
2005-03-22 13:28                     ` Marius Amado Alves
2005-03-22 16:48                     ` Marius Amado Alves
2005-03-22 17:34                       ` Dmitry A. Kazakov
2005-03-27 20:14                         ` jtg
2005-03-27 21:22                           ` Dmitry A. Kazakov
2005-03-28 19:54                             ` jtg
2005-03-28 20:56                               ` Dmitry A. Kazakov
2005-03-29 12:40                                 ` jtg
2005-03-29 13:00                                   ` [OT] " Tapio Kelloniemi
2005-03-29 13:47                                     ` Dmitry A. Kazakov
2005-03-29 15:53                                       ` Tapio Kelloniemi
2005-03-29 16:17                                         ` Dmitry A. Kazakov
     [not found]                                         ` <k33j419lgei1ui89s26o1dlr9ccf1qe1hd@4ax.com>
2005-04-11 23:04                                           ` Marius Amado Alves
2005-03-29 13:47                                   ` Dmitry A. Kazakov
2005-04-01 20:58                                   ` Georg Bauhaus
2005-04-01 20:18                                     ` Pascal Obry
2005-03-22 12:53                 ` Marius Amado Alves
     [not found]                 ` <f205219321dd18dba878fab16b7cb50d@netcabo.pt>
2005-03-22 13:12                   ` Marius Amado Alves
2005-03-23 16:58                     ` Isaac Gouy
2005-03-22 13:58                 ` Robert A Duff
2005-03-22 16:30                   ` Marius Amado Alves
2005-03-22 16:41                     ` Tapio Kelloniemi
2005-03-22 17:39                       ` Marius Amado Alves
2005-03-22 18:59                         ` Dmitry A. Kazakov
2005-03-22 19:08                         ` Tapio Kelloniemi
2005-03-22 18:34                     ` Georg Bauhaus
2005-03-22 19:32                     ` Robert A Duff
2005-03-22 20:04                       ` tmoran
2005-03-23 16:55                       ` Isaac Gouy
     [not found]                   ` <1820eab50b57f2fe1c4e8e50bb0f4fe5@netcabo.pt>
2005-03-22 22:49                     ` Stephen Leake
2005-03-22 22:58                       ` Robert A Duff
2005-03-22 23:27                   ` Larry Kilgallen
2005-03-23 22:33                     ` Robert A Duff
2005-03-24  5:02                     ` Larry Kilgallen
     [not found]                     ` <wccpsxqro0c.fsfOrganization: LJK Software <bM40pHW6P2KW@eisner.encompasserve.org>
2005-03-25  1:34                       ` Robert A Duff
2005-03-22 12:22             ` Jeff C
2005-03-23 16:48               ` Isaac Gouy
2005-03-23 17:06             ` Isaac Gouy
2005-03-22 19:49           ` tmoran
2005-03-22 21:51             ` Dmitry A. Kazakov
2005-03-23  0:16               ` tmoran
2005-03-23  7:25                 ` Dmitry A. Kazakov
2005-03-22 22:33             ` Marius Amado Alves
     [not found]             ` <00b362390273e6c04844dd4ff1885ee0@netcabo.pt>
2005-03-23 15:09               ` Marius Amado Alves
2005-03-23 19:00                 ` tmoran
2005-03-23 19:30                   ` tmoran
2005-03-23 21:38                     ` tmoran
2005-03-25  7:30                       ` Simon Wright
2005-03-25  9:38                         ` tmoran
2005-03-25 16:20                           ` Tapio Kelloniemi
2005-03-25 22:18                             ` Ada:The High Performance Language for Hyperthreaded and Multicore CPUs; was " tmoran
2005-03-23 19:54                   ` Tapio Kelloniemi
2005-03-23 20:39                     ` Ada bench : word frequency Marius Amado Alves
2005-03-23 21:26                       ` Isaac Gouy
2005-03-24  1:24                         ` Marius Amado Alves [this message]
2005-03-24 17:23                           ` Isaac Gouy
2005-03-24 19:52                           ` Martin Dowie
2005-04-11 15:11                             ` Marius Amado Alves
2005-03-24 20:16                           ` Pascal Obry
2005-03-24 22:54                             ` tmoran
2005-04-11 15:38                               ` Marius Amado Alves
2005-03-24 21:18                           ` Gautier Write-only
2005-04-11 15:32                             ` Marius Amado Alves
2005-04-11 23:56                               ` Robert A Duff
2005-04-28  4:04                       ` Matthew Heaney
2005-04-28 20:40                         ` Matthew Heaney
2005-03-23 21:38                     ` Ada bench : count words tmoran
2005-03-24 20:19                   ` tmoran
2005-03-24 21:00                     ` Pascal Obry
2005-03-24 22:54                       ` tmoran
2005-03-30 16:08                 ` Andre
2005-03-30 16:36                   ` Pascal Obry
2005-03-22 22:27           ` Dmitry A. Kazakov
2005-03-23  7:46             ` Pascal Obry
2005-03-23  7:56               ` Dmitry A. Kazakov
2005-03-23 13:38                 ` Robert A Duff
2005-03-22  7:05         ` Ada bench Pascal Obry
2005-04-07 20:59   ` David Sauvage
2005-04-07 23:40     ` David Sauvage
2005-04-08 17:11     ` Pascal Obry
2005-04-08 17:39       ` tmoran
2005-04-08 18:49       ` David Sauvage
2005-04-18 19:14       ` David Sauvage
2005-04-19 16:43         ` Matthew Heaney
2005-04-19 23:22           ` David Sauvage
2005-04-20  0:49             ` Matthew Heaney
2005-04-20  4:22               ` Georg Bauhaus
2005-04-20 21:24               ` David Sauvage
2005-04-20 23:06                 ` Georg Bauhaus
2005-04-20  9:41             ` Pascal Obry
2005-04-20 11:44               ` Matthew Heaney
2005-04-20 14:47                 ` Pascal Obry
2005-04-20 19:26                   ` Georg Bauhaus
2005-04-20 19:34                     ` Pascal Obry
replies disabled

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox