From: Marius Amado Alves <amado.alves@netcabo.pt>
To: comp.lang.ada@ada-france.org
Subject: Re: Ada bench : word frequency
Date: Thu, 24 Mar 2005 01:24:55 +0000
Date: 2005-03-24T01:24:55+00:00 [thread overview]
Message-ID: <mailman.60.1111627528.23655.comp.lang.ada@ada-france.org> (raw)
In-Reply-To: <1111613194.164413.310450@l41g2000cwc.googlegroups.com>
Program fixed. Zero differences with the reference result.
It's two times slower than the GCC C benchmark :-(
(CPU times (user+sys) on my iBook for n=25 repetitions of the input
file: C => 0.75, Ada => 1.45)
However that's already enough to put Ada on the 7th place, after OCaml
and before Eiffel :-)
Program follows.
with Ada.Characters.Handling;
with Ada.Characters.Latin_1;
with Ada.Command_Line;
with Ada.Streams;
with Ada.Streams.Stream_IO;
with Ada.Strings.Fixed;
with Ada.Text_IO;
with Ada.Text_IO.Text_Streams;
procedure Word_Frequency is
use Ada.Characters.Handling;
use Ada.Characters.Latin_1;
use Ada.Command_Line;
use Ada.Streams;
use Ada.Streams.Stream_IO;
use Ada.Text_IO;
use Ada.Text_IO.Text_Streams;
Buffer : Stream_Element_Array (1 .. 4096);
Input_Stream : Ada.Text_IO.Text_Streams.Stream_Access
:= Ada.Text_IO.Text_Streams.Stream (Current_Input);
N : Stream_Element_Offset;
Is_Separator : array (Stream_Element) of Boolean :=
(Character'Pos ('A') .. Character'Pos ('Z') |
Character'Pos ('a') .. Character'Pos ('z') => False,
others => True);
-- N-ary tree of word counts
-- used to increment the counts in one pass of the input file
-- branches on the letter
-- carries the count
-- very fast
-- but very space consuming
subtype Letter is Stream_Element range
Character'Pos ('a') .. Character'Pos ('z');
type Word is array (Positive range <>) of Letter;
type Tree;
type Tree_Ptr is access Tree;
type Node is
record
Count : Natural := 0;
Subtree : Tree_Ptr := null;
end record;
type Tree is array (Letter) of Node;
procedure Inc (X : in out Integer) is begin X := X + 1; end;
procedure Dec (X : in out Integer) is begin X := X - 1; end;
procedure Inc_Word (Parent : Tree_Ptr; Descendents : Word) is
begin
if Descendents'Length > 0 then
declare
Child_Index : Positive := Descendents'First;
Child : Letter renames Descendents (Child_Index);
begin
if Descendents'Length = 1 then
Inc (Parent (Child).Count);
else
if Parent (Child).Subtree = null then
Parent (Child).Subtree := new Tree;
end if;
Inc_Word
(Parent (Child).Subtree,
Descendents (Child_Index + 1 .. Descendents'Last));
end if;
end;
end if;
end;
-- Binary tree of word counts
-- used for sorting the result by the count (frequency)
-- branches on the word count
-- carries the word form
type Form_Ptr is access Word;
type Binary_Tree;
type Binary_Tree_Ptr is access Binary_Tree;
type Binary_Tree is
record
Form : Form_Ptr;
Count : Natural;
Left, Right : Binary_Tree_Ptr;
end record;
procedure Add_Node (Parent : in out Binary_Tree_Ptr; Form :
Form_Ptr; Count : Natural) is
begin
if Parent = null then
Parent := new Binary_Tree;
Parent.Form := Form;
Parent.Count := Count;
else
if Count < Parent.Count then
Add_Node (Parent.Left, Form, Count);
else
Add_Node (Parent.Right, Form, Count);
end if;
end if;
end;
-- end of binary tree primitives
Root : Tree_Ptr := new Tree;
Btree : Binary_Tree_Ptr := null;
Current_Word : Word (1 .. 1000);
Current_Word_Length : Natural range 0 .. Current_Word'Last := 0;
In_Word : Boolean := False;
procedure Append_To_Word (E : Letter) is
begin
Inc (Current_Word_Length);
Current_Word (Current_Word_Length) := E;
In_Word := True;
end;
procedure End_Word is
begin
if Current_Word_Length > 0 then
Inc_Word (Root, Current_Word (1 .. Current_Word_Length));
end if;
Current_Word_Length := 0;
In_Word := False;
end;
To_Lower : array (Stream_Element) of Letter;
procedure Initialise_To_Lower_Map is
D : Integer := Character'Pos ('a') - Character'Pos ('A');
begin
for I in Character'Pos ('a') .. Character'Pos ('z') loop
To_Lower (Stream_Element (I)) := Letter (I);
To_Lower (Stream_Element (I - D)) := Letter (I);
end loop;
end;
procedure Process (S : Stream_Element_Array) is
begin
for I in S'Range loop
if Is_Separator (S (I)) then
if In_Word then End_Word; end if;
else
Append_To_Word (To_Lower (S (I)));
end if;
end loop;
end;
procedure Populate_Btree (Ntree : Tree_Ptr) is
begin
Inc (Current_Word_Length);
for I in Letter'Range loop
Current_Word (Current_Word_Length) := I;
if Ntree (I).Count > 0 then
Add_Node
(Btree,
Form => new Word'(Current_Word (1 ..
Current_Word_Length)),
Count => Ntree (I).Count);
end if;
if Ntree (I).Subtree /= null then
Populate_Btree (Ntree (I).Subtree);
end if;
end loop;
Dec (Current_Word_Length);
end;
procedure Populate_Btree is
begin
Current_Word_Length := 0;
Populate_Btree (Root);
end;
function To_String (X : Form_Ptr) return String is
S : String (X'Range);
begin
for I in X'Range loop
S (I) := Character'Val (X (I));
end loop;
return S;
end;
subtype String7 is String (1 .. 7);
function Img7 (X : Natural) return String7 is
S : String := Natural'Image (X);
begin
return String' (1 .. 8 - S'Length => ' ') & S (2 .. S'Last);
end;
procedure Dump_Btree (X : Binary_Tree_Ptr := Btree) is
begin
if X /= null then
Dump_Btree (X.Right);
Ada.Text_IO.Put_Line
(Img7 (X.Count) & " " & To_String (X.Form));
Dump_Btree (X.Left);
end if;
end;
begin
Initialise_To_Lower_Map;
loop
Read (Root_Stream_Type'Class (Input_Stream.all), Buffer, N);
Process (Buffer (1 .. N));
exit when N < Buffer'Length;
end loop;
if In_Word then End_Word; end if;
Populate_Btree;
Dump_Btree;
end;
next prev parent reply other threads:[~2005-03-24 1:24 UTC|newest]
Thread overview: 122+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-03-19 16:22 Ada bench Pascal Obry
2005-03-19 16:55 ` Dr. Adrian Wrigley
2005-03-19 21:32 ` Michael Bode
2005-03-20 9:20 ` Pascal Obry
2005-03-20 9:39 ` Michael Bode
2005-03-20 11:16 ` Pascal Obry
2005-03-20 12:20 ` Dmitry A. Kazakov
2005-03-20 12:32 ` Pascal Obry
2005-03-21 1:42 ` (see below)
2005-03-21 2:24 ` (see below)
2005-03-21 15:00 ` (see below)
2005-03-21 3:54 ` Ed Falis
2005-03-21 12:20 ` Jeff C
2005-03-21 15:18 ` (see below)
2005-03-21 15:24 ` (see below)
2005-03-21 18:56 ` Isaac Gouy
2005-03-21 21:31 ` Randy Brukardt
2005-03-21 22:14 ` Ed Falis
2005-03-21 18:07 ` Pascal Obry
2005-03-20 10:11 ` Adrian Knoth
2005-03-20 10:30 ` Michael Bode
2005-03-21 23:27 ` Georg Bauhaus
2005-03-22 1:16 ` Ada bench : count words Marius Amado Alves
2005-03-22 10:59 ` Dmitry A. Kazakov
2005-03-22 11:57 ` Marius Amado Alves
2005-03-22 12:17 ` Dmitry A. Kazakov
2005-03-22 12:47 ` Marius Amado Alves
2005-03-22 13:08 ` Dmitry A. Kazakov
2005-03-22 13:28 ` Marius Amado Alves
2005-03-22 16:48 ` Marius Amado Alves
2005-03-22 17:34 ` Dmitry A. Kazakov
2005-03-27 20:14 ` jtg
2005-03-27 21:22 ` Dmitry A. Kazakov
2005-03-28 19:54 ` jtg
2005-03-28 20:56 ` Dmitry A. Kazakov
2005-03-29 12:40 ` jtg
2005-03-29 13:00 ` [OT] " Tapio Kelloniemi
2005-03-29 13:47 ` Dmitry A. Kazakov
2005-03-29 15:53 ` Tapio Kelloniemi
2005-03-29 16:17 ` Dmitry A. Kazakov
[not found] ` <k33j419lgei1ui89s26o1dlr9ccf1qe1hd@4ax.com>
2005-04-11 23:04 ` Marius Amado Alves
2005-03-29 13:47 ` Dmitry A. Kazakov
2005-04-01 20:58 ` Georg Bauhaus
2005-04-01 20:18 ` Pascal Obry
2005-03-22 12:53 ` Marius Amado Alves
[not found] ` <f205219321dd18dba878fab16b7cb50d@netcabo.pt>
2005-03-22 13:12 ` Marius Amado Alves
2005-03-23 16:58 ` Isaac Gouy
2005-03-22 13:58 ` Robert A Duff
2005-03-22 16:30 ` Marius Amado Alves
2005-03-22 16:41 ` Tapio Kelloniemi
2005-03-22 17:39 ` Marius Amado Alves
2005-03-22 18:59 ` Dmitry A. Kazakov
2005-03-22 19:08 ` Tapio Kelloniemi
2005-03-22 18:34 ` Georg Bauhaus
2005-03-22 19:32 ` Robert A Duff
2005-03-22 20:04 ` tmoran
2005-03-23 16:55 ` Isaac Gouy
[not found] ` <1820eab50b57f2fe1c4e8e50bb0f4fe5@netcabo.pt>
2005-03-22 22:49 ` Stephen Leake
2005-03-22 22:58 ` Robert A Duff
2005-03-22 23:27 ` Larry Kilgallen
2005-03-23 22:33 ` Robert A Duff
2005-03-24 5:02 ` Larry Kilgallen
[not found] ` <wccpsxqro0c.fsfOrganization: LJK Software <bM40pHW6P2KW@eisner.encompasserve.org>
2005-03-25 1:34 ` Robert A Duff
2005-03-22 12:22 ` Jeff C
2005-03-23 16:48 ` Isaac Gouy
2005-03-23 17:06 ` Isaac Gouy
2005-03-22 19:49 ` tmoran
2005-03-22 21:51 ` Dmitry A. Kazakov
2005-03-23 0:16 ` tmoran
2005-03-23 7:25 ` Dmitry A. Kazakov
2005-03-22 22:33 ` Marius Amado Alves
[not found] ` <00b362390273e6c04844dd4ff1885ee0@netcabo.pt>
2005-03-23 15:09 ` Marius Amado Alves
2005-03-23 19:00 ` tmoran
2005-03-23 19:30 ` tmoran
2005-03-23 21:38 ` tmoran
2005-03-25 7:30 ` Simon Wright
2005-03-25 9:38 ` tmoran
2005-03-25 16:20 ` Tapio Kelloniemi
2005-03-25 22:18 ` Ada:The High Performance Language for Hyperthreaded and Multicore CPUs; was " tmoran
2005-03-23 19:54 ` Tapio Kelloniemi
2005-03-23 20:39 ` Ada bench : word frequency Marius Amado Alves
2005-03-23 21:26 ` Isaac Gouy
2005-03-24 1:24 ` Marius Amado Alves [this message]
2005-03-24 17:23 ` Isaac Gouy
2005-03-24 19:52 ` Martin Dowie
2005-04-11 15:11 ` Marius Amado Alves
2005-03-24 20:16 ` Pascal Obry
2005-03-24 22:54 ` tmoran
2005-04-11 15:38 ` Marius Amado Alves
2005-03-24 21:18 ` Gautier Write-only
2005-04-11 15:32 ` Marius Amado Alves
2005-04-11 23:56 ` Robert A Duff
2005-04-28 4:04 ` Matthew Heaney
2005-04-28 20:40 ` Matthew Heaney
2005-03-23 21:38 ` Ada bench : count words tmoran
2005-03-24 20:19 ` tmoran
2005-03-24 21:00 ` Pascal Obry
2005-03-24 22:54 ` tmoran
2005-03-30 16:08 ` Andre
2005-03-30 16:36 ` Pascal Obry
2005-03-22 22:27 ` Dmitry A. Kazakov
2005-03-23 7:46 ` Pascal Obry
2005-03-23 7:56 ` Dmitry A. Kazakov
2005-03-23 13:38 ` Robert A Duff
2005-03-22 7:05 ` Ada bench Pascal Obry
2005-04-07 20:59 ` David Sauvage
2005-04-07 23:40 ` David Sauvage
2005-04-08 17:11 ` Pascal Obry
2005-04-08 17:39 ` tmoran
2005-04-08 18:49 ` David Sauvage
2005-04-18 19:14 ` David Sauvage
2005-04-19 16:43 ` Matthew Heaney
2005-04-19 23:22 ` David Sauvage
2005-04-20 0:49 ` Matthew Heaney
2005-04-20 4:22 ` Georg Bauhaus
2005-04-20 21:24 ` David Sauvage
2005-04-20 23:06 ` Georg Bauhaus
2005-04-20 9:41 ` Pascal Obry
2005-04-20 11:44 ` Matthew Heaney
2005-04-20 14:47 ` Pascal Obry
2005-04-20 19:26 ` Georg Bauhaus
2005-04-20 19:34 ` Pascal Obry
replies disabled
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox