1 /// \file LEB128.d
2 
3 module tagion.betterC.utils.LEB128;
4 
5 @nogc:
6 
7 import std.format;
8 import traits = std.traits : isIntegral, isSigned, isUnsigned;
9 import std.typecons;
10 
11 //import tagion.basic.tagionexceptions;
12 import std.algorithm.comparison : min;
13 import std.algorithm.iteration : map, sum;
14 import tagion.betterC.utils.Bailout;
15 import tagion.betterC.utils.BinBuffer;
16 
17 //import std.stdio;
18 
19 // @safe
20 // class LEB128Exception : TagionException {
21 //     this(string msg, string file = __FILE__, size_t line = __LINE__ ) pure {
22 //         super( msg, file, line );
23 //     }
24 // }
25 
26 // alias check=Check!LEB128Exception;
27 
28 size_t calc_size(const(ubyte[]) data) {
29     foreach (i, d; data) {
30         if ((d & 0x80) == 0) {
31             //check(i <= ulong.sizeof+1, "LEB128 overflow");
32             return i + 1;
33         }
34     }
35     // check(0, "LEB128 bad format");
36     assert(0);
37 }
38 
39 @safe
40 size_t calc_size(T)(const T v) pure if (isUnsigned!(T)) {
41     size_t result;
42     ulong value = v;
43     do {
44         result++;
45         value >>= 7;
46     }
47     while (value);
48     return result;
49 }
50 
51 @safe
52 size_t calc_size(T)(const T v) pure if (isSigned!(T)) {
53     if (v == T.min) {
54         return T.sizeof + (is(T == int) ? 1 : 2);
55     }
56     ulong value = ulong((v < 0) ? -v : v);
57     static if (is(T == long)) {
58         if ((value >> (long.sizeof * 8 - 2)) == 1UL) {
59             return long.sizeof + 2;
60         }
61     }
62     size_t result;
63     // auto uv=(v < 0)?-v:v;
64     // T nv=-v;
65 
66     ubyte d;
67     do {
68         d = value & 0x7f;
69         result++;
70         value >>= 7;
71     }
72     while ((((value != 0) || (d & 0x40)) && ((value != -1) || !(d & 0x40))));
73     return result;
74 }
75 
76 void encode(T)(ref BinBuffer buffer, const T v) if (isUnsigned!T && isIntegral!T) {
77     ubyte[T.sizeof + 2] data;
78     alias BaseT = TypedefType!T;
79     BaseT value = cast(BaseT) v;
80     foreach (i, ref d; data) {
81         d = value & 0x7f;
82         value >>= 7;
83         if (value == 0) {
84             buffer.write(data[0 .. i + 1]);
85             return;
86         }
87         d |= 0x80;
88     }
89     assert(0);
90 }
91 
92 void encode(T)(ref BinBuffer buffer, const T v) if (isSigned!T && isIntegral!T) {
93     enum DATA_SIZE = (T.sizeof * 9 + 1) / 8 + 1;
94     ubyte[DATA_SIZE] data;
95     //    size_t index;
96     if (v == T.min) {
97         foreach (ref d; data[0 .. $ - 1]) {
98             d = 0x80;
99         }
100         data[$ - 1] = (T.min >> (7 * (DATA_SIZE - 1))) & 0x7F;
101         buffer.write(data);
102         return;
103     }
104     T value = v;
105     foreach (i, ref d; data) {
106         d = value & 0x7f;
107         value >>= 7;
108         /* sign bit of byte is second high order bit (0x40) */
109         if (((value == 0) && !(d & 0x40)) || ((value == -1) && (d & 0x40))) {
110             buffer.write(data[0 .. i + 1]);
111             return;
112         }
113         d |= 0x80;
114     }
115     // check(0, "Bad LEB128 format");
116     assert(0);
117 }
118 
119 alias DecodeLEB128(T) = Tuple!(T, "value", size_t, "size");
120 
121 DecodeLEB128!T decode(T = ulong)(const(ubyte[]) data) if (isUnsigned!T) {
122     alias BaseT = TypedefType!T;
123     ulong result;
124     uint shift;
125     enum MAX_LIMIT = T.sizeof * 8;
126     size_t len;
127     foreach (i, d; data) {
128         // check(shift < MAX_LIMIT,
129         //     message("LEB128 decoding buffer over limit of %d %d", MAX_LIMIT, shift));
130 
131         result |= (d & 0x7FUL) << shift;
132         if ((d & 0x80) == 0) {
133             len = i + 1;
134             static if (!is(BaseT == ulong)) {
135                 check(result <= BaseT.max, message("LEB128 decoding overflow of %x for %s", result, T
136                         .stringof));
137             }
138             return DecodeLEB128!T(cast(BaseT) result, len);
139         }
140         shift += 7;
141     }
142     // check(0, message("Bad LEB128 format for type %s", T.stringof));
143     assert(0);
144 }
145 
146 DecodeLEB128!T decode(T = long)(const(ubyte[]) data) if (isSigned!T) {
147     alias BaseT = TypedefType!T;
148     long result;
149     uint shift;
150     enum MAX_LIMIT = T.sizeof * 8;
151     size_t len;
152     foreach (i, d; data) {
153         // check(shift < MAX_LIMIT, "LEB128 decoding buffer over limit");
154         result |= (d & 0x7FL) << shift;
155         shift += 7;
156         if ((d & 0x80) == 0) {
157             if ((shift < long.sizeof * 8) && ((d & 0x40) != 0)) {
158                 result |= (~0L << shift);
159             }
160             len = i + 1;
161             // static if (!is(BaseT==long)) {
162             //     // check((T.min <= result) && (result <= T.max),
163             //     //     message("LEB128 out of range %d for %s", result, T.stringof));
164             // }
165             return DecodeLEB128!T(cast(BaseT) result, len);
166         }
167     }
168     // check(0, message("Bad LEB128 format for type %s", T.stringof));
169     assert(0);
170 }
171 
172 ///
173 unittest {
174     import std.algorithm.comparison : equal;
175 
176     void ok(T)(T x, const(ubyte[]) expected) {
177         BinBuffer encoded;
178         encode(encoded, x);
179         assert(equal(encoded.serialize, expected));
180         assert(calc_size(x) == expected.length);
181         assert(calc_size(expected) == expected.length);
182         const decoded = decode!T(expected);
183         assert(decoded.size == expected.length);
184         assert(decoded.value == x);
185     }
186 
187     {
188 
189         const(ubyte[5]) buffer_0 = [255, 255, 255, 255, 7];
190         ok!int(int.max, buffer_0);
191         const(ubyte[1]) buffer_1 = [27];
192         ok!ulong(27, buffer_1);
193         const(ubyte[2]) buffer_2 = [167, 21];
194         ok!ulong(2727, buffer_2);
195         const(ubyte[3]) buffer_3 = [215, 210, 16];
196         ok!ulong(272727, buffer_3);
197         const(ubyte[4]) buffer_4 = [151, 204, 128, 13];
198         ok!ulong(27272727, buffer_4);
199         const(ubyte[5]) buffer_5 = [181, 202, 212, 168, 5];
200         ok!ulong(1427449141, buffer_5);
201         const(ubyte[10]) buffer_6 = [
202             255, 255, 255, 255, 255, 255, 255, 255, 255, 1
203         ];
204         ok!ulong(ulong.max, buffer_6);
205     }
206 
207     {
208         const(ubyte[1]) buffer_0 = [127];
209         ok!int(-1, buffer_0);
210         const(ubyte[5]) buffer_1 = [255, 255, 255, 255, 7];
211         ok!int(int.max, buffer_1);
212         const(ubyte[5]) buffer_2 = [128, 128, 128, 128, 120];
213         ok!int(int.min, buffer_2);
214         const(ubyte[5]) buffer_3 = [255, 255, 255, 255, 7];
215         ok!int(int.max, buffer_3);
216         const(ubyte[5]) buffer_4 = [128, 128, 128, 128, 120];
217         ok!long(int.min, buffer_4);
218         const(ubyte[5]) buffer_5 = [255, 255, 255, 255, 7];
219         ok!long(int.max, buffer_5);
220 
221         const(ubyte[1]) buffer_6 = [27];
222         ok!long(27, buffer_6);
223         const(ubyte[2]) buffer_7 = [167, 21];
224         ok!long(2727, buffer_7);
225         const(ubyte[3]) buffer_8 = [215, 210, 16];
226         ok!long(272727, buffer_8);
227         const(ubyte[4]) buffer_9 = [151, 204, 128, 13];
228         ok!long(27272727, buffer_9);
229 
230         const(ubyte[5]) buffer_10 = [181, 202, 212, 168, 5];
231         ok!long(1427449141, buffer_10);
232 
233         const(ubyte[3]) buffer_11 = [192, 187, 120];
234         ok!int(-123456, buffer_11);
235 
236         const(ubyte[1]) buffer_12 = [101];
237         ok!long(-27, buffer_12);
238         const(ubyte[2]) buffer_13 = [217, 106];
239         ok!long(-2727, buffer_13);
240         const(ubyte[3]) buffer_14 = [169, 173, 111];
241         ok!long(-272727, buffer_14);
242         const(ubyte[4]) buffer_15 = [233, 179, 255, 114];
243         ok!long(-27272727, buffer_15);
244         const(ubyte[5]) buffer_16 = [203, 181, 171, 215, 122];
245         ok!long(-1427449141L, buffer_16);
246 
247         const(ubyte[1]) buffer_17 = [127];
248         ok!long(-1L, buffer_17);
249 
250         const(ubyte[10]) buffer_18 = [
251             254, 255, 255, 255, 255, 255, 255, 255, 255, 0
252         ];
253         ok!long(long.max - 1, buffer_18);
254         const(ubyte[10]) buffer_19 = [
255             255, 255, 255, 255, 255, 255, 255, 255, 255, 0
256         ];
257         ok!long(long.max, buffer_19);
258         const(ubyte[10]) buffer_20 = [
259             129, 128, 128, 128, 128, 128, 128, 128, 128, 127
260         ];
261         ok!long(long.min + 1, buffer_20);
262         const(ubyte[10]) buffer_21 = [
263             128, 128, 128, 128, 128, 128, 128, 128, 128, 127
264         ];
265         ok!long(long.min, buffer_21);
266     }
267 
268     { // Bug fix
269         assert(calc_size(-77) == 2);
270         const(ubyte[2]) buffer_22 = [179, 127];
271         ok!int(-77, buffer_22);
272     }
273 
274 }