From mboxrd@z Thu Jan  1 00:00:00 1970
X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on polar.synack.me
X-Spam-Level: 
X-Spam-Status: No, score=-1.9 required=5.0 tests=BAYES_00 autolearn=ham
	autolearn_force=no version=3.4.4
X-Google-Language: ENGLISH,ASCII-7-bit
X-Google-Thread: 103376,cda33fc7f63c2885
X-Google-Attributes: gid103376,public
X-Google-ArrivalTime: 2002-01-13 01:15:57 PST
Path: 
 archiver1.google.com!news1.google.com!sn-xit-02!supernews.com!isdnet!freenix!proxad.net!feeder2-1.proxad.net!news2-2.free.fr!not-for-mail
Newsgroups: comp.lang.ada
Subject: Re: list strawman
References: <mailman.1010451062.16465.comp.lang.ada@ada.eu.org>
 <7iE_7.8661$cD4.15714@www.newsranger.com>
 <3c3b13ba$0$212$626a54ce@news.free.fr>
 <sHF_7.8807$cD4.16831@www.newsranger.com>
 <3c3b2aa0$0$212$626a54ce@news.free.fr>
 <zwH_7.9026$cD4.17983@www.newsranger.com>
 <3c3bfabc$0$3190$626a54ce@news.free.fr>
 <4519e058.0201091037.325fbdbb@posting.google.com>
 <3c3eb272$0$282$626a54ce@news.free.fr> <3C3F1A0F.2000706@telepath.com>
From: Jean-Marc Bourguet <jm@bourguet.org>
Date: 12 Jan 2002 16:10:52 +0100
MIME-Version: 1.0
Content-Type: text/plain; charset=us-ascii
Message-ID: <7uir1a.s11.ln@192.168.0.2>
Organization: Guest of ProXad - France
NNTP-Posting-Date: 13 Jan 2002 10:15:55 MET
NNTP-Posting-Host: 62.147.76.142
X-Trace: 1010913355 news2-2.free.fr 201 62.147.76.142
X-Complaints-To: abuse@proxad.net
Xref: archiver1.google.com comp.lang.ada:18850
Date: 2002-01-13T10:15:55+01:00
List-Id: <comp.lang.ada>

Ted Dennison <dennison@telepath.com> writes:

[...]

> That's right, each call to those routines is O(n). However, for a mergesort
> you don't do just one split or merge. At the top level you do 1, and the
> next level you do 2, at the next 4, and so on.
> 
> It doesn't look like the algorithm you presented does this, but then it
> doesn't look like a sort either. It only calls split once, and then
> merge. Its actually a O(n) algorithm. Too bad it doesn't actually sort, or
> you'd be up for some kind of prize. :-)

A merge sort can be splitting once into several sorted lists and then
merging them.  That's what I did.

What I presented is algorithm L in section 5.2.4 of The Art Of
Computer Programming (in the third volume) modified according exercise
12 of the same section.  All the ideas are also presented in the
section 5.4 of the same book. I don't think I'll get a prize for
reading Knuth and his solutions to his exercises :-)

There is a sligth difference with was is presented in Knuth. He use an
additionnal flag in the record to mark the end of the sublists.  I
didn't do that and used the fact that a sublist end when there is a
drop in value.  This has a consequence that sublists may be merged
inexpectedly and so some care must be taken not to go in O(N^2).

More precisely, what I wrote was supposed to be that. Reading Knuth
anew I see one bug which modify the number of operations needed to be
O(N^2) on average.

> Now lets assume it *did* sort, 

It do sort, try it.

Appended is a fixed version with a test harness.

Yours,

-- Jean-Marc

with Ada.Text_IO; use Ada.Text_IO;
with Ada.Command_Line; use Ada.Command_Line;
with Ada.Numerics.Discrete_Random;
with Ada.Unchecked_Deallocation;

procedure Test_Sort is

   ---------------------------------------------------------------------------
   -- Data definition

   type Node;
   type Node_Ptr is access Node;
   type Node is record
      Next: Node_Ptr;
      Value: Positive;
   end record;

   function Ordered (A, B : Positive) return Boolean;

   ---------------------------------------------------------------------------
   -- Natural List Merge Sort
   -- See D.E. Knuth, The Art Of Computer Programming, Volume 3, Second Edition
   -- Algorithm L section 5.2.4 modified according exercise 12 in the same
   -- section, modified to mark the end of the sorted chunks by a drop in value
   -- instead of an additionnal flag.

   procedure Sort (Head : in out Node_Ptr);
   -- Sort the linked list Head.

   procedure Sort (Head : in out Node_Ptr) is

      procedure Split (Head : Node_Ptr; L1, L2 : out Node_Ptr);
      -- Split the sorted chunks of the list Head into L1 and L2

      procedure Merge(L1, L2 : in out Node_Ptr);
      -- Merge the sorted chunks of the list L1 and L2.  If the result has
      -- several chunks, they are splitted between L1 and L2; if the result
      -- has only one chunk, it is in L1

      procedure Split (Head : Node_Ptr; L1, L2 : out Node_Ptr) is
         Last    : Node_Ptr := Head;
         Current : Node_Ptr := Last.Next;
         Other   : Node_Ptr := null;
      begin -- Split
         L1 := Head;
         L2 := null;
         while Current /= null loop
            pragma Assert (Current = Last.Next);
            if not Ordered (Last.Value, Current.Value) then
               -- change the destination list
	       if Other = null then
		  L2 := Current;
	       else
		  Other.Next := Current;
	       end if;
	       Other := Last;
	    end if;
            Last    := Current;
            Current := Last.Next;
         end loop;
         pragma Assert (Last.Next = null);
         if Other /= null then
            Other.Next := null;
         end if;
      end Split;

      procedure Merge(L1, L2 : in out Node_Ptr) is

         procedure Append (C1, C2, Last, Other : in out Node_Ptr);
         -- Append the head of C1 to Last (removing it from C1).  If it is
         -- the last element of its chunks, the rest of the current chunk
         -- in C2 is also appended (removing it from C2) and Last and Other
         -- are swapped.

         procedure Append (C1, C2, Last, Other : in out Node_Ptr) is
         begin -- Append
            if Last = null then
               -- start the list
               if L1 = null then
                  L1 := C1;
               else
                  L2 := C1;
               end if;
            else
               -- Do the append
               Last.Next := C1;
            end if;
            -- Consume
            Last := C1;
            C1 := C1.Next;
            if C1 = null or else not Ordered (Last.Value, C1.Value) then
               -- end of chunk reached, append the rest of the peer chunk
               -- this is the missing part in my previous post, step L5/L7
               -- in Knuth
               loop
                  Last.Next := C2;
                  Last := C2;
                  C2 := C2.Next;
                  exit when C2 = null
                       or else not Ordered (Last.Value, C2.Value);
               end loop;
               -- end of missing part
               -- change the destination list
	       declare
		  Tmp : Node_Ptr := Last;
	       begin
		  Last  := Other;
		  Other := Tmp;
	       end;
            end if;
         end Append;

         Current1 : Node_Ptr := L1;
         Current2 : Node_Ptr := L2;
         Last     : Node_Ptr := null;
         Other    : Node_Ptr := null;
         Tmp      : Node_Ptr := null;

      begin -- Merge
         L1 := null;
         L2 := null;
         pragma Assert (Current1 /= null and Current2 /= null);
         while Current1 /= null and Current2 /= null loop
            if Ordered (Current1.Value, Current2.Value) then
               Append (Current1, Current2, Last, Other);
            else
               Append (Current2, Current1, Last, Other);
            end if;
         end loop;
         pragma Assert (L1 /= null and Other /= null);
         if Current1 /= null then
            Tmp := Current1;
         else
            Tmp := Current2;
         end if;
         if Tmp /= null then
            if Last = null then
               L2   := Tmp;
               Last := Tmp;
               Tmp  := Last.Next;
            else
               pragma Assert (L2 /= null);
               -- set up the loop invariant
               Last.Next := Tmp;
            end if;
	    -- Needed while it is not needed in Knuth because we detect end
	    -- of chunks by drop of value and so may merge some chunks
	    -- inexpectitly.  Ensure here that the number of chunks is more
	    -- or less balanced between L1 and L2.  Doing it more precisely
	    -- is not worthwhile in number of comparisons. This is very
	    -- similar to Split and in fact by changing precondition, Merge
	    -- and Split could be merged or Split called here.
            while Tmp /= null loop
               pragma Assert (Last.Next = Tmp);
               if not Ordered (Last.Value, Tmp.Value) then
                  -- change destination list
                  Other.Next := Tmp;
                  Other := Last;
               end if;
               Last := Tmp;
               Tmp  := Last.Next;
            end loop;
         else
            if Last /= null then
               Last.Next := null;
            end if;
         end if;
         if Other /= null then
            Other.Next := null;
         end if;
      end Merge;

      Aux : Node_Ptr := null;

   begin -- Sort
      if Head = null then
         return;
      end if;
      Split (Head, Head, Aux);
      while Aux /= null loop
         Merge (Head, Aux);
      end loop;
   end Sort;

   ---------------------------------------------------------------------------
   -- Test bench

   procedure Free is
      new Ada.Unchecked_Deallocation(Node, Node_Ptr);

   package Random_Positives is
      new Ada.Numerics.Discrete_Random(Positive);

   Comparison_Count : Long_Integer;

   function Ordered (A, B : Positive) return Boolean is
   begin
      Comparison_Count := Comparison_Count + 1;
      return A <= B;
   end Ordered;

   Generator : Random_Positives.Generator;

   Failed_Test_Count : Natural := 0;

   procedure Check (N : Positive; Sum : in out Long_Integer) is

      function Allocate_List (N : Positive) return Node_Ptr is
         Result : Node_Ptr;
         Last   : Node_Ptr;
         State  : Random_Positives.State;
      begin
         Random_Positives.Save (Generator, State);
         Put ("Sorting " & Positive'Image(N)
              &" numbers (seed="&Random_Positives.Image(State)&")... ");
         Flush;
         Result := new Node'(null, Random_Positives.Random(Generator));
         Last := Result;
         for I in 2 .. N loop
            Last.Next := new Node'(null, Random_Positives.Random(Generator));
            Last := Last.Next;
         end loop;
         return Result;
      end Allocate_List;

      procedure Check_And_Free_List (N : Positive; Head : Node_Ptr) is
         Current : Node_Ptr := Head;
         Next : Node_Ptr := Head;
         Count : Positive := 1;
      begin
         while Current.Next /= null loop
            Count := Count + 1;
            Next := Current.Next;
            if not (Current.Value <= Next.Value) or Count > N then
               Put_Line (" ERROR");
               Failed_Test_Count := Failed_Test_Count + 1;
               return;
            end if;
            Free (Current);
            Current := Next;
         end loop;
         Free (Current);
         if Count = N then
            Put_Line (" OK");
         else
            Put_Line (" ERROR");
            Failed_Test_Count := Failed_Test_Count + 1;
         end if;
      end Check_And_Free_List;

      Head : Node_Ptr := Allocate_List (N);

   begin
      Comparison_Count := 0;
      Sort (Head);
      Sum := Sum + Long_Integer(Comparison_Count);
      Put (Long_Integer'Image(Comparison_Count) & " cmp ");
      Flush;
      Check_And_Free_List (N, Head);
   end Check;

   Number_Of_Tests : Integer := 10;

   procedure Summary (N : Positive; Count : Long_Integer) is
   begin
      Put_Line ("Sorting " & Positive'Image(N) & " elements took "
                & Long_Integer'Image(Count) & " comparisons, "
                & Float'Image(Float(Count)/Float(Number_Of_Tests)/Float(N))
                & " per element per run");
   end Summary;

   Accu10,
   Accu100,
   Accu1000,
   Accu10000,
   Accu100000,
   Accu1000000 : Long_Integer := 0;

   Reset_Generator : Boolean := False;

begin -- Test_Sort
   if Argument_Count > 2 then
      Put_Line ("test_sort [number_of_run [reset_generator]]");
      return;
   end if;
   if Argument_Count > 0 then
      begin
         Number_Of_Tests := Integer'Value(Argument(1));
      exception
         when Constraint_Error =>
            Put_Line (Argument(1)&" is not a number");
            return;
      end;
   end if;
   if Argument_Count > 1 then
      begin
         Reset_Generator := Boolean'Value(Argument(2));
      exception
         when Constraint_Error =>
            Put_Line (Argument(1)&" is not a boolean");
            return;
      end;
   end if;
   if Reset_Generator then
      Random_Positives.Reset(Generator);
   end if;
   for I in 1 .. Number_Of_Tests loop
      Put_Line ("Run" & Integer'Image(I));
      Check (10, Accu10);
      Check (100, Accu100);
      Check (1000, Accu1000);
      Check (10000, Accu10000);
      Check (100000, Accu100000);
      Check (1000000, Accu1000000);
   end loop;
   Put_Line ("For " & Positive'Image(Number_Of_Tests) & " runs");
   Put_Line (Natural'Image(Failed_Test_Count)&" errors");
   Summary (10, Accu10);
   Summary (100, Accu100);
   Summary (1000, Accu1000);
   Summary (10000, Accu10000);
   Summary (100000, Accu100000);
   Summary (1000000, Accu1000000);
end Test_Sort;