home *** CD-ROM | disk | FTP | other *** search
- From ISO10646@JHUVM.BITNET Wed Sep 4 02:03:48 1991
- Received: from danpost2.uni-c.dk by dkuug.dk via EUnet with SMTP (5.64+/8+bit/IDA-1.2.8)
- id AA12183; Wed, 4 Sep 91 02:03:43 +0200
- Received: from vm.uni-c.dk by danpost2.uni-c.dk (5.65c+/1.34)
- id AA15375; Wed, 4 Sep 1991 00:04:03 GMT
- Message-Id: <199109040004.AA15375@danpost2.uni-c.dk>
- Received: from vm.uni-c.dk by vm.uni-c.dk (IBM VM SMTP V2R1) with BSMTP id 0815;
- Wed, 04 Sep 91 02:05:47 DNT
- Received: from SEARN.SUNET.SE by vm.uni-c.dk (Mailer R2.07) with BSMTP id 5755;
- Wed, 04 Sep 91 02:05:45 DNT
- Received: from SEARN.BITNET by SEARN.SUNET.SE (Mailer R2.05) with BSMTP id
- 0515; Wed, 04 Sep 91 02:07:34 +0200
- Date: Tue, 3 Sep 1991 17:01:01 U
- Reply-To: Multi-byte Code Issues <ISO10646@JHUVM.BITNET>
- Sender: Multi-byte Code Issues <ISO10646@JHUVM.BITNET>
- From: Mark Davis <mark_davis@gateway.qm.apple.com>
- Subject: ATM C Implementation
- X-To: unicore@Eng.Sun.COM, ISO10646%JHUVM.BITNET@cunyvm.cuny.edu,
- ansix3l2%JHUVM.BITNET@cunyvm.cuny.edu
- To: Multiple recipients of list ISO10646 <ISO10646@JHUVM>
- X-Charset: US-DK
- X-Char-Esc: 29
- Status: RO
-
- Subject: Time:4:34 PM
- OFFICE MEMO ATM C Implementation Date:9/3/91
- Here is an implementation of the ATM algorithm, for those interested.
- Any feedback would be welcome.--Mark
-
- /////////////////////////////////////////////////////////////////
- // A Transformation Method
- // Author: Mark Davis
- // Date: August 30, 1991
- // The following is a C test implementation of the ATM algorithm
- // described in the C0 committee report (see that text for
- // details as to the purpose and requirements).
- // The details of the algorithm are somewhat changed from that
- // report, to correct some bugs and take into account some
- // results of the WG2 meeting.
- /////////////////////////////////////////////////////////////////
-
- #include <STDIO.H>
- #include <TYPES.h>
-
- typedef unsigned char ubyte;
- typedef unsigned short ushort;
- typedef unsigned long ucs;
- typedef short index;
- typedef short bufferLength;
- enum {false,true};
- typedef unsigned char Boolean;
-
- #define c0Start 0x00
- #define c0End 0x20
- #define g0Start 0x21
- #define g0End 0x7E
- #define c1Start 0x7F
- #define c1End 0x9F
- #define g1Start 0xA0
- #define g1End 0xFF
- #define uStart 0x100
-
- #define g0Count (c1Start - g0Start)
- #define g1Count (uStart - g1Start)
- #define c0Count (g0Start - c0Start)
- #define c1Count (g1Start - c1Start)
-
- #define gCount (g0Count + g1Count)
- #define cCount (c0Count + c1Count)
-
- #define section0 0x000000A0
- #define section1 0x00000100
- #define section2 0x00004016
- #define section3 0x00038E2E
- #define section4 0xFFFFFFFF
-
- #define break0 0xA0
- #define break1 0xA1
- #define break2 0xF6
- #define break3 0xFC
- #define break4 0x100
-
- #define errorChar 0xFFFFFFFF
-
- /////////////////////////////////////////////////////////////////
- // SkipTable is used to map a contiguous range onto values that
- // do not include control bytes.
- // It maps the values from 0 to 256 as follows:
- // 0 .. g0Count-1 => g0Start..g0End
- // g0Count .. gCount-1 => g1Start..g1End
- // gCount .. gCount+c0Count-1 => c0Start..c0End
- // gCount+c0Count .. g1End => c1Start..c1End
- // UnskipTable reverses the effect of SkipTable.
- // The last two ranges are not, strictly speaking, necessary,
- // but make it injective and surjective, providing
- // predictability for out-of-range cases.
- // Call FillSkipTable before using any other routine.
- /////////////////////////////////////////////////////////////////
-
- ubyte SkipTable [256];
- ucs UnskipTable [256];
-
- void FillSkipTables (void) {
- index c;
- for (c = 0; c < 256; c++) {
- if (c < g0Count)
- SkipTable[c] = (ubyte)(c + g0Start);
- else if (c < gCount)
- SkipTable[c] = (ubyte)(c - g0Count + g1Start);
- else if (c < (gCount + c0Count))
- SkipTable[c] = (ubyte)(c - gCount + c0Start);
- else
- SkipTable[c]
- = (ubyte)(c - (gCount + c0Count) + c1Start);
- UnskipTable[SkipTable[c]] = c;
- };
- };
-
- /////////////////////////////////////////////////////////////////
- // The procedure ToATM takes a UCS character (0..4G) and maps it
- // to a sequence of bytes that do not include control characters
- // (C0 or C1), SPACE, or DEL.
- // The length of the sequence can be from 1 to 5 bytes, depending
- // on the first byte.
- /////////////////////////////////////////////////////////////////
-
- void ToAtm(ucs ch, ubyte* a, bufferLength *len) {
- ubyte *chPtr;
- chPtr = a;
- if (ch < section0) {
- chPtr += (*len = 1);
- *--chPtr = (ubyte) ch;
- } else if (ch < section1) {
- chPtr += (*len = 2);
- *--chPtr = (ubyte) ch;
- *--chPtr = break0;
- } else if (ch < section2) {
- chPtr += (*len = 2);
- ch -= section1;
- *--chPtr = SkipTable[ch % gCount]; ch /= gCount;
- *--chPtr = (ubyte)(break1 + ch);
- } else if (ch < section3) {
- chPtr += (*len = 3);
- ch -= section2;
- *--chPtr = SkipTable[ch % gCount]; ch /= gCount;
- *--chPtr = SkipTable[ch % gCount]; ch /= gCount;
- *--chPtr = (ubyte)(break2 + ch);
- } else {
- chPtr += (*len = 5);
- ch -= section3;
- *--chPtr = SkipTable[ch % gCount]; ch /= gCount;
- *--chPtr = SkipTable[ch % gCount]; ch /= gCount;
- *--chPtr = SkipTable[ch % gCount]; ch /= gCount;
- *--chPtr = SkipTable[ch % gCount]; ch /= gCount;
- *--chPtr = (ubyte)(break3 + ch);
- };
- };
-
- /////////////////////////////////////////////////////////////////
- // The procedure FromATM takes a sequence of ATM bytes (as
- // generated by ToATM) and maps it to the UCS character (0..4G)
- // that generated it.
- // Note that there are a number of byte sequences that cannot
- // be produced by the ATM algorithm, and are invalid input.
- // As written, this procedure checks for some of the obvious
- // invalid values, such as insufficient bufferLength (based on
- // the first byte), but does not do full-fledged checking for
- // invalid sequences (such as <A0,21>).
- /////////////////////////////////////////////////////////////////
-
- ucs FromAtm(ubyte** bufferStart, bufferLength maxLength) {
- register ubyte c, *a;
- ucs result;
-
- if (maxLength < 1) return errorChar;
- a = *bufferStart;
- c = *a++;
- result = 0;
- if (c < break0) {
- result = c;
- } else if (c < break1) {
- result = *a++;
- } else if (c < break2) {
- if (maxLength < 2) return errorChar;
- result = c - break1;
- result *= gCount; result += UnskipTable[*a++];
- result += section1;
- } else if (c < break3) {
- if (maxLength < 3) return errorChar;
- result = c - break2;
- result *= gCount; result += UnskipTable[*a++];
- result *= gCount; result += UnskipTable[*a++];
- result += section2;
- } else {
- if (maxLength < 5) return errorChar;
- result = c - break3;
- result *= gCount; result += UnskipTable[*a++];
- result *= gCount; result += UnskipTable[*a++];
- result *= gCount; result += UnskipTable[*a++];
- result *= gCount; result += UnskipTable[*a++];
- result += section3;
- };
- *bufferStart = a; // pass back new starting point
- return result;
- };
-
-