From mboxrd@z Thu Jan  1 00:00:00 1970
X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on polar.synack.me
X-Spam-Level: 
X-Spam-Status: No, score=-1.9 required=5.0 tests=BAYES_00 autolearn=ham
	autolearn_force=no version=3.4.4
X-Google-Language: ENGLISH,ASCII-7-bit
X-Google-Thread: 109fba,4873305131bf4d94
X-Google-Attributes: gid109fba,public
X-Google-Thread: 1014db,4873305131bf4d94
X-Google-Attributes: gid1014db,public
X-Google-Thread: 103376,4873305131bf4d94
X-Google-Attributes: gid103376,public
From: kaz@helios.crest.nt.com (Kaz Kylheku)
Subject: Re: Porting Experiences (was Ada and Pascal etc )
Date: 1997/11/06
Message-ID: <63t0mc$ltl$1@helios.crest.nt.com>
X-Deja-AN: 288179018
References: <34557f2b.1934172@news.mindspring.com>
 <3460A7BB.3CCD27DC@hso.link.com> <63qq9k$njt$1@helios.crest.nt.com>
 <34623DA4.7EE5@dynamite.com.au>
Organization: A poorly-installed InterNetNews site
Newsgroups: comp.lang.ada,comp.lang.c,comp.lang.c++
Date: 1997-11-06T00:00:00+00:00
List-Id: <comp.lang.ada>


In article <34623DA4.7EE5@dynamite.com.au>,
Alan E & Carmel J Brain  <aebrain@dynamite.com.au> wrote:
>YES PLEASE. Any help from anyone is always welcome. So much to learn,
>and only one lifetime, *SIGH*
>
>OK, I would like the following:
>
>1st(0th) byte contains telegram type. This has legal values of FF,FE,and 00.
>If the value is FF, then bytes 4-7 contain an IEEE floating point between
>-1.7666 e6 and 2.34 e12. Byte 3 contains an enumeration type, which has the
>values Red,Green, Yellow, corresponding to 01, 10, and FF (hex).  If the value
>is FE, bytes 1-3 contain a binary angle (fixed point), first 13 bits before
>the binary point. Bytes 4-11 contain a packed ANSI 7-bit string.  If the value
>is 00, then the rest is "don't care".
>
>The reading in must check that all values are correct. The floating point will
>be decoded into the form of mantissa, sign, exponent etc.
>
>This is a nice, simple one. I'll get into the really polymorphic records
>later.
>
>Over 2 U.

Okay, first of all we make the assumption that the octets in this datagram are
mapped to bytes in the C execution environment, which need not be octets.  This
is an issue which has to do with how the data are communicated to the execution
environment of the C program. For example, on an IBM mainframe that has 9 bit
bytes, data brought in from the 8 bit world are not chopped into 9 bit units,
but rather their octets are mapped to individual 9 bit bytes which then
have the values 0 to 255 (if interpreted as unsigned char).

The IEEE floating point handling is probably going to be a little shaky, but I
will do my best.

I haven't fully tested this (due to boredom), so let me know if I missed a
requirement, or botched something up.

I also have only implemented the decoding of the telegram, not encoding.

/*------------------------------------------------------------------------*/

    #include <stddef.h>	/* for size_t	*/
    #include <math.h>	/* for ldexp()	*/

    /* 
     * First, we declare some C data type that _internally_ represents the
     * telegram/datagram.
     */

    typedef enum { red = 0x01, green = 0x10, yellow = 0xff } telegram_color;

    typedef struct {
	int type;
	double number;
	unsigned long angle;
	telegram_color color;
	char string[10];
    } telegram;

    int telegram_decode(telegram *tel, unsigned char *buf);
    double ieee_single_decode(unsigned char *buf);
    unsigned long fixed_24_decode(unsigned char *buf);
    void packed_string_decode(unsigned char *dest, unsigned char *src, size_t);

    /*
     * Then we write a routine which decodes that packet from a buffer of
     * bytes, which we assume contains a complete telegram.  We return 0 if
     * there is something wrong with the telegram's contents, 1 if the decoding
     * is successful.  We assume that the buffer has been prepared such that
     * all bytes are in the range 0 to 255 even if unsigned char has a larger
     * range.
     */

    int telegram_decode(telegram *tel, unsigned char *buf)
    {
	switch (tel->type = buf[0]) {
	case 0xFF:	/* legal values */
	    tel->number = ieee_single_decode(buf + 4);
	    if (tel->number < -1.7666E6 || tel->number > 2.34E12)
		return 0;
	    tel->color = buf[3];
	    if (tel->color!=red && tel->color!=green && tel->color!=yellow)
		return 0;
	    break;
	case 0xFE:
	    tel->angle = fixed_24_decode(buf + 1);
	    packed_string_decode(tel->string, buf + 4, 8);
	    tel->string[9] = 0;
	    break;
	case 0x00:
	    break;
	default:	/* or exit */
	    return 0;
	}

	return 1;
    }
  
    /*
     * decode an IEEE single precision number, assumed to be stored
     * in big endian, broken into individual octets.
     * An IEEE 754 single precision number consists of a sign bit,
     * eight bits of binary exponent biased by adding 127, and
     * 23 bits of mantissa with an implicit 1 stripped away.
     * We try to convert this to a value of type double.
     */

    double ieee_single_decode(unsigned char *buf)
    {
	int sign, exponent;
	unsigned long mantissa;
	double mantdouble, result;


	/*
	 * extract the sign bit as an int valued 1 or 0
	 * representing negative and positive, respectively.
	 */

	sign = ((buf[0] & 0x80) != 0);

	/*
	 * extract the exponent as a negative integer
	 */

	exponent = ((buf[0] & 0x7f) << 1 | (buf[1] >> 7)) - 127;

	/*
	 * extract the mantissa as a 24 bit integer, with the implicit
	 * 1 put back in explicitly
	 */

	mantissa = (1UL << 23) | (unsigned long) (buf[1] & 0x7f) << 16;
	mantissa |= (unsigned) (buf[2] << 8) | buf[3];

	/*
	 * compute the mantissa as a floating point number in the
	 * range [0,1)
	 */

	mantdouble = (double) mantissa / (1UL << 23);


	/*
	 * compute mantdouble * 2**exponent using the standard function.
	 */

	result = ldexp(mantdouble, exponent);

	/*
	 * factor in the sign and return the result
	 */

	return (sign) ? -result : result;
    }

    /*
     * extract a 24 bit fixed point number from three
     * octets. For now, this is just represented as a
     * scaled, unsigned integer, so this operation
     * is trivial.
     */

    unsigned long fixed_24_decode(unsigned char *buf)
    {
	return (unsigned long) buf[0] << 16 | (unsigned) buf[1] << 8 | buf[2];
    }

    /*
     * Decode a 7 bit string packet into eight bits
     * We basically loop over groups of 7 bytes (or 56 bits)
     * since 56 is the least common multiple of 8 and 7.
     * Then handle the remainder of the data which may be
     * less than a full 56 bit block.
     *
     * Parameter n is the number of octets of the input packet,
     * NOT the number of 7 bit characters to be extracted.
     *
     * There are more efficient ways to code this to defeat
     * assumptions of aliasing that the compiler has to make.
     */

    void packed_string_decode(unsigned char *dest, unsigned char *src, size_t n)
    {
	while (n >= 7) {
	    dest[0] =                        src[0] >> 1;
	    dest[1] = (src[0] & 0x01) << 6 | src[1] >> 2;
	    dest[2] = (src[1] & 0x03) << 5 | src[2] >> 3;
	    dest[3] = (src[2] & 0x07) << 4 | src[3] >> 4;
	    dest[4] = (src[3] & 0x0f) << 3 | src[4] >> 5;
	    dest[5] = (src[4] & 0x1f) << 2 | src[5] >> 6;
	    dest[6] = (src[5] & 0x3f) << 1 | src[6] >> 7;
	    dest[7] =  src[6] & 0x7f;

	    dest += 8;
	    src += 7;
	    n -= 7;
	}

	/* note the reversal of the order of the above dest[] assignments in
	   the cases below */

	switch (n) {
	case 6:
	    dest[5] = (src[4] & 0x1f) << 2 | src[5] >> 6;
	case 5:
	    dest[4] = (src[3] & 0x0f) << 3 | src[4] >> 5;
	case 4:
	    dest[3] = (src[2] & 0x07) << 4 | src[3] >> 4;
	case 3:
	    dest[2] = (src[1] & 0x03) << 5 | src[2] >> 3;
	case 2:
	    dest[1] = (src[0] & 0x01) << 6 | src[1] >> 2;
	case 1:
	    dest[0] =  src[0] >> 1;
	case 0:
	    break;
	}
    }


/*------------------------------------------------------------------------*/

Here is an example of a packed six-bit data encoding routine which has been
aggressively coded to save the compiler from making pessimistic aliasing
assumptions. Here, the variables x and y are used as temporary values
which eliminate redundant loads from the source array, so that the bare minimum
number of pointer dereference operations is performed. In principle,
the same sort of technique can be applied to packed_string_decode().

    size_t sixbit_encode(unsigned char *dest, unsigned char *src, size_t size)
    {
	size_t out = 0;
	unsigned x, y;

	while (size > 2) {
	    *dest++  = (x = *src++)  >> 2;
	    *dest++  = (x & 3)       << 4  | (y = *src++) >> 4;
	    *dest++  = (y & 15)      << 2  | (x = *src++) >> 6;
	    *dest++  =  x & 63;

	    size -= 3;
	    out += 4;
	}

	switch (size) {
	case 2:
	    *dest++  = (x = *src++)  >> 2;
	    *dest++  = (x & 3)       << 4  | (y = *src) >> 4;
	    *dest++  = (y & 15)      << 2;
	    return out + 3;
	case 1:
	    *dest++  = (x = *src++)  >> 2;
	    *dest++  = (x & 3)       << 4;
	    out += 2;
	default:
	    return out;
	}
    }

Oh, and to dispel any myth that assembly language is easier to maintain than C,
here is a hand-coded 80x86 equivalent:


		.file	"sixbit-i386.s"

ALIGN		= 16

arg1		= 4
arg2		= 8
arg3		= 12

		.text

rcsid:		.ascii	"$Id: sixbit-i386.s,v 1.1 1997/07/26 18:35:35 kaz Exp $"

# 
#  ... snip ...
#

		.globl	sixbit_encode
sixbit_encode:
		pushl	%edi
		pushl	%esi
		pushl 	%ebx
		movl	arg1+12(%esp), %edi
		movl	arg2+12(%esp), %esi
		movl	arg3+12(%esp), %ecx
		xorl	%edx, %edx
		cld

		cmpl	$2, %ecx
		je	encode_two
		jb	encode_one

		.align	ALIGN

encode_threes:
		lodsw	(%esi), %ax
		movb	%al, %bh
		andb	$3, %bh
		shrb	$2, %al
		shlb	$4, %bh
		movb	%ah, %bl
		shrb	$4, %ah
		orb	%bh, %ah
		shll	$16, %eax
		andb	$15, %bl
		lodsb	(%esi), %al
		shlb	$2, %bl	
		movb	%al, %bh
		shrb	$6, %al	
		orb	%bl, %al
		movb	%bh, %ah
		andb	$63, %ah
		roll	$16, %eax
		stosl	%eax, (%edi)

		addl	$4, %edx
		subl	$3, %ecx
		cmp	$2, %ecx
		ja	encode_threes
		jb	encode_one

encode_two:	
		lodsw	(%esi), %ax
		movb	%al, %bh
		shrb	$2, %al	
		andb	$3, %bh	
		shlb	$4, %bh

		movb	%ah, %bl
		shrb	$4, %ah	
		orb	%bh, %ah
		stosw	%ax, (%edi)

		andb	$15, %bl
		shlb	$2, %bl	
		movb	%bl, (%edi)

		addl	$3, %edx
		movl	%edx, %eax

		popl	%ebx
		popl	%esi
		popl	%edi
		ret

encode_one:	
		testl	%ecx, %ecx
		je	encode_done

		lodsb	(%esi), %al
		movb	%al, %ah
		shrb	$2, %al	
		andb	$3, %ah
		shlb	$4, %ah	
		movw	%ax, (%edi)

		addl	$2, %edx

encode_done:
		movl	%edx, %eax
		popl	%ebx
		popl	%esi
		popl	%edi
		ret

-- 
"In My Egotistical Opinion, most people's C programs should be
indented six feet downward and covered with dirt."
	-- Blair P. Houghton