1 module tagion.utils.LEB128;
2 
3 import std.algorithm.comparison : min;
4 import std.algorithm.iteration : map, sum;
5 import std.format;
6 import traits = std.traits : isIntegral, isSigned, isUnsigned;
7 import std.typecons;
8 import tagion.basic.tagionexceptions;
9 
10 @safe:
11 @nogc
12 class LEB128Exception : TagionException {
13     this(string msg, string file = __FILE__, size_t line = __LINE__) pure nothrow {
14         super(msg, file, line);
15     }
16 }
17 
18 alias check = Check!LEB128Exception;
19 
20 /++
21  Returns:
22  The size in bytes of the LEB128
23  No error size 0 is returned
24 +/
25 @nogc
26 size_t calc_size(const(ubyte[]) data) pure nothrow {
27     foreach (i, d; data) {
28         if ((d & 0x80) == 0) {
29             if (i > ulong.sizeof + 1) {
30                 return 0;
31             }
32             return i + 1;
33         }
34     }
35     return data.length;
36 }
37 
38 @nogc
39 size_t calc_size(T)(const T v) pure nothrow if (isUnsigned!(T)) {
40     size_t result;
41     ulong value = v;
42     do {
43         result++;
44         value >>= 7;
45     }
46     while (value);
47     return result;
48 }
49 
50 /// Returns: the max size to store a T as a
51 template DataSize(T) if (isIntegral!T) {
52     static if (isUnsigned!T) {
53         enum DataSize = T.sizeof + 2;
54     }
55     else {
56         enum DataSize = (T.sizeof * 9 + 1) / 8 + 1;
57     }
58 }
59 
60 @nogc
61 size_t calc_size(T)(const T v) pure nothrow if (isSigned!(T)) {
62     if (v == T.min) {
63         return T.sizeof + (is(T == int) ? 1 : 2);
64     }
65     T value = v;
66     static if (is(T == long)) {
67         if ((value >> (long.sizeof * 8 - 2)) == 1UL) {
68             return long.sizeof + 2;
69         }
70     }
71     size_t result;
72     // auto uv=(v < 0)?-v:v;
73     // T nv=-v;
74 
75     ubyte d;
76     do {
77         d = value & 0x7f;
78         result++;
79         value >>= 7;
80     }
81     while ((((value != 0) || (d & 0x40)) && ((value != -1) || !(d & 0x40))));
82     return result;
83 }
84 
85 immutable(ubyte[]) encode(T)(const T v) pure if (isUnsigned!T && isIntegral!T) {
86     ubyte[DataSize!T] data;
87     alias BaseT = TypedefType!T;
88     BaseT value = cast(BaseT) v;
89     foreach (i, ref d; data) {
90         d = value & 0x7f;
91         value >>= 7;
92         if (value == 0) {
93             return data[0 .. i + 1].idup;
94         }
95         d |= 0x80;
96     }
97     assert(0);
98 }
99 
100 immutable(ubyte[]) encode(T)(const T v) pure if (isSigned!T && isIntegral!T) {
101     enum DATA_SIZE = (T.sizeof * 9 + 1) / 8 + 1;
102     ubyte[DataSize!T] data;
103     if (v == T.min) {
104         foreach (ref d; data[0 .. $ - 1]) {
105             d = 0x80;
106         }
107         data[$ - 1] = (T.min >> (7 * (DATA_SIZE - 1))) & 0x7F;
108         return data.dup;
109     }
110     T value = v;
111     foreach (i, ref d; data) {
112         d = value & 0x7f;
113         value >>= 7;
114         /* sign bit of byte is second high order bit (0x40) */
115         if (((value == 0) && !(d & 0x40)) || ((value == -1) && (d & 0x40))) {
116             return data[0 .. i + 1].idup;
117         }
118         d |= 0x80;
119     }
120     check(0, "Bad LEB128 format");
121     assert(0);
122 }
123 
124 alias DecodeLEB128(T) = Tuple!(T, "value", size_t, "size");
125 enum ErrorValue(T) = DecodeLEB128!T(T.init, 0);
126 /++
127  Converts a ubyte string to LEB128 number
128  Returns:
129  The value and the size
130  In case of an error this size is set to zero
131 +/
132 @nogc
133 DecodeLEB128!T decode(T = ulong)(const(ubyte[]) data) pure nothrow
134 if (isUnsigned!T) {
135     alias BaseT = TypedefType!T;
136     ulong result;
137     uint shift;
138     enum MAX_LIMIT = T.sizeof * 8;
139     size_t len;
140     foreach (i, d; data) {
141         if (shift >= MAX_LIMIT) {
142             return ErrorValue!T;
143         }
144         result |= (d & 0x7FUL) << shift;
145         if ((d & 0x80) == 0) {
146             len = i + 1;
147             static if (!is(BaseT == ulong)) {
148                 if (result > BaseT.max) {
149                     return ErrorValue!T;
150                 }
151             }
152             return DecodeLEB128!T(cast(BaseT) result, len);
153         }
154         shift += 7;
155     }
156     return ErrorValue!T;
157 }
158 
159 /++
160  Converts a ubyte string to LEB128 number
161  Returns:
162  The value and the size
163  In case of an error this size is set to zero
164 +/
165 @nogc
166 DecodeLEB128!T decode(T = long)(const(ubyte[]) data) pure nothrow if (isSigned!T) {
167     alias BaseT = TypedefType!T;
168     long result;
169     uint shift;
170     enum MAX_LIMIT = T.sizeof * 8;
171     size_t len;
172     foreach (i, d; data) {
173         if (shift >= MAX_LIMIT) {
174             return ErrorValue!T;
175         }
176         //        check(shift < MAX_LIMIT, "LEB128 decoding buffer over limit");
177         result |= (d & 0x7FL) << shift;
178         shift += 7;
179         if ((d & 0x80) == 0) {
180             if ((shift < long.sizeof * 8) && ((d & 0x40) != 0)) {
181                 result |= (~0L << shift);
182             }
183             len = i + 1;
184             static if (!is(BaseT == long)) {
185                 if (T.min > result) {
186                     return ErrorValue!T;
187                 }
188             }
189             return DecodeLEB128!T(cast(BaseT) result, len);
190         }
191     }
192     return ErrorValue!T;
193 }
194 
195 ///
196 unittest {
197     import std.algorithm.comparison : equal;
198 
199     void ok(T)(T x, const(ubyte[]) expected) {
200         const encoded = encode(x);
201         assert(equal(encoded, expected));
202         assert(calc_size(x) == expected.length);
203         assert(calc_size(expected) == expected.length);
204         const decoded = decode!T(expected);
205         assert(decoded.size == expected.length);
206         assert(decoded.value == x);
207     }
208 
209     {
210         ok!int(int.max, [255, 255, 255, 255, 7]);
211         ok!ulong(27, [27]);
212         ok!ulong(2727, [167, 21]);
213         ok!ulong(272727, [215, 210, 16]);
214         ok!ulong(27272727, [151, 204, 128, 13]);
215         ok!ulong(1427449141, [181, 202, 212, 168, 5]);
216         ok!ulong(ulong.max, [255, 255, 255, 255, 255, 255, 255, 255, 255, 1]);
217     }
218 
219     {
220         ok!int(-1, [127]);
221         ok!int(int.max, [255, 255, 255, 255, 7]);
222         ok!int(int.min, [128, 128, 128, 128, 120]);
223         ok!int(int.max, [255, 255, 255, 255, 7]);
224         ok!long(int.min, [128, 128, 128, 128, 120]);
225         ok!long(int.max, [255, 255, 255, 255, 7]);
226 
227         ok!long(27, [27]);
228         ok!long(2727, [167, 21]);
229         ok!long(272727, [215, 210, 16]);
230         ok!long(27272727, [151, 204, 128, 13]);
231         ok!long(1427449141, [181, 202, 212, 168, 5]);
232 
233         ok!int(-123456, [192, 187, 120]);
234         ok!long(-27, [101]);
235         ok!long(-2727, [217, 106]);
236         ok!long(-272727, [169, 173, 111]);
237         ok!long(-27272727, [233, 179, 255, 114]);
238         ok!long(-1427449141L, [203, 181, 171, 215, 122]);
239 
240         ok!long(-1L, [127]);
241         ok!long(long.max - 1, [254, 255, 255, 255, 255, 255, 255, 255, 255, 0]);
242         ok!long(long.max, [255, 255, 255, 255, 255, 255, 255, 255, 255, 0]);
243         ok!long(long.min + 1, [129, 128, 128, 128, 128, 128, 128, 128, 128, 127]);
244         ok!long(long.min, [128, 128, 128, 128, 128, 128, 128, 128, 128, 127]);
245     }
246 
247     {
248         assert(decode!int([127]).value == -1);
249     }
250 
251     { // Bug fix
252         assert(calc_size(-77) == 2);
253         ok!int(-77, [179, 127]);
254     }
255 }
256 
257 DecodeLEB128!T read(T, U:
258         const(ubyte))(ref U[] data) pure nothrow {
259     const result = decode!T(data);
260     data = data[result.size .. $];
261     return result;
262 }
263 
264 ///
265 unittest {
266     const(ubyte)[] buf;
267     { // read of empty
268         const dec_range = read!ulong(buf);
269         assert(dec_range.size == 0);
270     }
271 
272     buf ~= 1234.encode;
273     { // read of one
274         const dec_range = read!ulong(buf);
275         assert(dec_range.value == 1234);
276         assert(dec_range.size == 2);
277     }
278     buf ~= 2755.encode ~ encode(-0x1245);
279     { // read of two
280         const dec_range = read!ulong(buf);
281         assert(dec_range.value == 2755);
282         assert(dec_range.size == 2);
283     }
284     { // read of the last one
285         const dec_range = read!long(buf);
286         assert(dec_range.value == -0x1245);
287         assert(dec_range.size == 2);
288     }
289 }
290 
291 bool isInvariant(T)(const(ubyte[]) data) pure nothrow @nogc if (isIntegral!T) {
292     import std.algorithm;
293 
294     bool result = (data.length > 0) && ((data[0] != 0x80) || ((data.length > 1) && data[0 .. 2] != [0x80, 0x00]));
295     static if (isSigned!T) {
296         if (result) {
297             const negative_count = data.countUntil(0xFF);
298             return (negative_count < 0) || ((negative_count + 1 < data.length) && data[negative_count] == 0x7F);
299         }
300     }
301     return result;
302 }
303 
304 unittest {
305     const(ubyte[]) correct_buf = [0];
306     const(ubyte[]) not_correct_buf = [0x80, 0];
307     { /// Unsigned data invariant LEB128
308         assert(correct_buf.decode!uint.value == not_correct_buf.decode!uint.value);
309         assert(correct_buf.isInvariant!uint);
310         assert(!not_correct_buf.isInvariant!uint);
311     }
312 
313     {
314         assert(correct_buf.isInvariant!int);
315         assert(!not_correct_buf.isInvariant!int);
316         const(ubyte[]) correct_buf_negative = [0x7F];
317         const(ubyte[]) not_correct_buf_negative = [0xFF, 0x7F];
318         const(ubyte[]) not_correct_buf_negative_2 = [0xFF, 0xFF, 0xFF, 0x7F];
319         assert(correct_buf_negative.isInvariant!int);
320         assert(!not_correct_buf_negative.isInvariant!int);
321         assert(!not_correct_buf_negative_2.isInvariant!int);
322 
323     }
324 }
325 
326 bool isCompleat(const(ubyte[]) data) pure nothrow @nogc {
327     import std.algorithm;
328 
329     return data.countUntil!((b) => (b & 0x80) == 0) >= 0;
330 }
331 
332 unittest {
333     assert(isCompleat([0x07]));
334     assert(!isCompleat([0x87]));
335     assert(isCompleat([0x87, 0x07]));
336 }