1 module tagion.utils.LEB128; 2 3 import std.algorithm.comparison : min; 4 import std.algorithm.iteration : map, sum; 5 import std.format; 6 import traits = std.traits : isIntegral, isSigned, isUnsigned; 7 import std.typecons; 8 import tagion.basic.tagionexceptions; 9 10 @safe: 11 @nogc 12 class LEB128Exception : TagionException { 13 this(string msg, string file = __FILE__, size_t line = __LINE__) pure nothrow { 14 super(msg, file, line); 15 } 16 } 17 18 alias check = Check!LEB128Exception; 19 20 /++ 21 Returns: 22 The size in bytes of the LEB128 23 No error size 0 is returned 24 +/ 25 @nogc 26 size_t calc_size(const(ubyte[]) data) pure nothrow { 27 foreach (i, d; data) { 28 if ((d & 0x80) == 0) { 29 if (i > ulong.sizeof + 1) { 30 return 0; 31 } 32 return i + 1; 33 } 34 } 35 return data.length; 36 } 37 38 @nogc 39 size_t calc_size(T)(const T v) pure nothrow if (isUnsigned!(T)) { 40 size_t result; 41 ulong value = v; 42 do { 43 result++; 44 value >>= 7; 45 } 46 while (value); 47 return result; 48 } 49 50 /// Returns: the max size to store a T as a 51 template DataSize(T) if (isIntegral!T) { 52 static if (isUnsigned!T) { 53 enum DataSize = T.sizeof + 2; 54 } 55 else { 56 enum DataSize = (T.sizeof * 9 + 1) / 8 + 1; 57 } 58 } 59 60 @nogc 61 size_t calc_size(T)(const T v) pure nothrow if (isSigned!(T)) { 62 if (v == T.min) { 63 return T.sizeof + (is(T == int) ? 1 : 2); 64 } 65 T value = v; 66 static if (is(T == long)) { 67 if ((value >> (long.sizeof * 8 - 2)) == 1UL) { 68 return long.sizeof + 2; 69 } 70 } 71 size_t result; 72 // auto uv=(v < 0)?-v:v; 73 // T nv=-v; 74 75 ubyte d; 76 do { 77 d = value & 0x7f; 78 result++; 79 value >>= 7; 80 } 81 while ((((value != 0) || (d & 0x40)) && ((value != -1) || !(d & 0x40)))); 82 return result; 83 } 84 85 immutable(ubyte[]) encode(T)(const T v) pure if (isUnsigned!T && isIntegral!T) { 86 ubyte[DataSize!T] data; 87 alias BaseT = TypedefType!T; 88 BaseT value = cast(BaseT) v; 89 foreach (i, ref d; data) { 90 d = value & 0x7f; 91 value >>= 7; 92 if (value == 0) { 93 return data[0 .. i + 1].idup; 94 } 95 d |= 0x80; 96 } 97 assert(0); 98 } 99 100 immutable(ubyte[]) encode(T)(const T v) pure if (isSigned!T && isIntegral!T) { 101 enum DATA_SIZE = (T.sizeof * 9 + 1) / 8 + 1; 102 ubyte[DataSize!T] data; 103 if (v == T.min) { 104 foreach (ref d; data[0 .. $ - 1]) { 105 d = 0x80; 106 } 107 data[$ - 1] = (T.min >> (7 * (DATA_SIZE - 1))) & 0x7F; 108 return data.dup; 109 } 110 T value = v; 111 foreach (i, ref d; data) { 112 d = value & 0x7f; 113 value >>= 7; 114 /* sign bit of byte is second high order bit (0x40) */ 115 if (((value == 0) && !(d & 0x40)) || ((value == -1) && (d & 0x40))) { 116 return data[0 .. i + 1].idup; 117 } 118 d |= 0x80; 119 } 120 check(0, "Bad LEB128 format"); 121 assert(0); 122 } 123 124 alias DecodeLEB128(T) = Tuple!(T, "value", size_t, "size"); 125 enum ErrorValue(T) = DecodeLEB128!T(T.init, 0); 126 /++ 127 Converts a ubyte string to LEB128 number 128 Returns: 129 The value and the size 130 In case of an error this size is set to zero 131 +/ 132 @nogc 133 DecodeLEB128!T decode(T = ulong)(const(ubyte[]) data) pure nothrow 134 if (isUnsigned!T) { 135 alias BaseT = TypedefType!T; 136 ulong result; 137 uint shift; 138 enum MAX_LIMIT = T.sizeof * 8; 139 size_t len; 140 foreach (i, d; data) { 141 if (shift >= MAX_LIMIT) { 142 return ErrorValue!T; 143 } 144 result |= (d & 0x7FUL) << shift; 145 if ((d & 0x80) == 0) { 146 len = i + 1; 147 static if (!is(BaseT == ulong)) { 148 if (result > BaseT.max) { 149 return ErrorValue!T; 150 } 151 } 152 return DecodeLEB128!T(cast(BaseT) result, len); 153 } 154 shift += 7; 155 } 156 return ErrorValue!T; 157 } 158 159 /++ 160 Converts a ubyte string to LEB128 number 161 Returns: 162 The value and the size 163 In case of an error this size is set to zero 164 +/ 165 @nogc 166 DecodeLEB128!T decode(T = long)(const(ubyte[]) data) pure nothrow if (isSigned!T) { 167 alias BaseT = TypedefType!T; 168 long result; 169 uint shift; 170 enum MAX_LIMIT = T.sizeof * 8; 171 size_t len; 172 foreach (i, d; data) { 173 if (shift >= MAX_LIMIT) { 174 return ErrorValue!T; 175 } 176 // check(shift < MAX_LIMIT, "LEB128 decoding buffer over limit"); 177 result |= (d & 0x7FL) << shift; 178 shift += 7; 179 if ((d & 0x80) == 0) { 180 if ((shift < long.sizeof * 8) && ((d & 0x40) != 0)) { 181 result |= (~0L << shift); 182 } 183 len = i + 1; 184 static if (!is(BaseT == long)) { 185 if (T.min > result) { 186 return ErrorValue!T; 187 } 188 } 189 return DecodeLEB128!T(cast(BaseT) result, len); 190 } 191 } 192 return ErrorValue!T; 193 } 194 195 /// 196 unittest { 197 import std.algorithm.comparison : equal; 198 199 void ok(T)(T x, const(ubyte[]) expected) { 200 const encoded = encode(x); 201 assert(equal(encoded, expected)); 202 assert(calc_size(x) == expected.length); 203 assert(calc_size(expected) == expected.length); 204 const decoded = decode!T(expected); 205 assert(decoded.size == expected.length); 206 assert(decoded.value == x); 207 } 208 209 { 210 ok!int(int.max, [255, 255, 255, 255, 7]); 211 ok!ulong(27, [27]); 212 ok!ulong(2727, [167, 21]); 213 ok!ulong(272727, [215, 210, 16]); 214 ok!ulong(27272727, [151, 204, 128, 13]); 215 ok!ulong(1427449141, [181, 202, 212, 168, 5]); 216 ok!ulong(ulong.max, [255, 255, 255, 255, 255, 255, 255, 255, 255, 1]); 217 } 218 219 { 220 ok!int(-1, [127]); 221 ok!int(int.max, [255, 255, 255, 255, 7]); 222 ok!int(int.min, [128, 128, 128, 128, 120]); 223 ok!int(int.max, [255, 255, 255, 255, 7]); 224 ok!long(int.min, [128, 128, 128, 128, 120]); 225 ok!long(int.max, [255, 255, 255, 255, 7]); 226 227 ok!long(27, [27]); 228 ok!long(2727, [167, 21]); 229 ok!long(272727, [215, 210, 16]); 230 ok!long(27272727, [151, 204, 128, 13]); 231 ok!long(1427449141, [181, 202, 212, 168, 5]); 232 233 ok!int(-123456, [192, 187, 120]); 234 ok!long(-27, [101]); 235 ok!long(-2727, [217, 106]); 236 ok!long(-272727, [169, 173, 111]); 237 ok!long(-27272727, [233, 179, 255, 114]); 238 ok!long(-1427449141L, [203, 181, 171, 215, 122]); 239 240 ok!long(-1L, [127]); 241 ok!long(long.max - 1, [254, 255, 255, 255, 255, 255, 255, 255, 255, 0]); 242 ok!long(long.max, [255, 255, 255, 255, 255, 255, 255, 255, 255, 0]); 243 ok!long(long.min + 1, [129, 128, 128, 128, 128, 128, 128, 128, 128, 127]); 244 ok!long(long.min, [128, 128, 128, 128, 128, 128, 128, 128, 128, 127]); 245 } 246 247 { 248 assert(decode!int([127]).value == -1); 249 } 250 251 { // Bug fix 252 assert(calc_size(-77) == 2); 253 ok!int(-77, [179, 127]); 254 } 255 } 256 257 DecodeLEB128!T read(T, U: 258 const(ubyte))(ref U[] data) pure nothrow { 259 const result = decode!T(data); 260 data = data[result.size .. $]; 261 return result; 262 } 263 264 /// 265 unittest { 266 const(ubyte)[] buf; 267 { // read of empty 268 const dec_range = read!ulong(buf); 269 assert(dec_range.size == 0); 270 } 271 272 buf ~= 1234.encode; 273 { // read of one 274 const dec_range = read!ulong(buf); 275 assert(dec_range.value == 1234); 276 assert(dec_range.size == 2); 277 } 278 buf ~= 2755.encode ~ encode(-0x1245); 279 { // read of two 280 const dec_range = read!ulong(buf); 281 assert(dec_range.value == 2755); 282 assert(dec_range.size == 2); 283 } 284 { // read of the last one 285 const dec_range = read!long(buf); 286 assert(dec_range.value == -0x1245); 287 assert(dec_range.size == 2); 288 } 289 } 290 291 bool isInvariant(T)(const(ubyte[]) data) pure nothrow @nogc if (isIntegral!T) { 292 import std.algorithm; 293 294 bool result = (data.length > 0) && ((data[0] != 0x80) || ((data.length > 1) && data[0 .. 2] != [0x80, 0x00])); 295 static if (isSigned!T) { 296 if (result) { 297 const negative_count = data.countUntil(0xFF); 298 return (negative_count < 0) || ((negative_count + 1 < data.length) && data[negative_count] == 0x7F); 299 } 300 } 301 return result; 302 } 303 304 unittest { 305 const(ubyte[]) correct_buf = [0]; 306 const(ubyte[]) not_correct_buf = [0x80, 0]; 307 { /// Unsigned data invariant LEB128 308 assert(correct_buf.decode!uint.value == not_correct_buf.decode!uint.value); 309 assert(correct_buf.isInvariant!uint); 310 assert(!not_correct_buf.isInvariant!uint); 311 } 312 313 { 314 assert(correct_buf.isInvariant!int); 315 assert(!not_correct_buf.isInvariant!int); 316 const(ubyte[]) correct_buf_negative = [0x7F]; 317 const(ubyte[]) not_correct_buf_negative = [0xFF, 0x7F]; 318 const(ubyte[]) not_correct_buf_negative_2 = [0xFF, 0xFF, 0xFF, 0x7F]; 319 assert(correct_buf_negative.isInvariant!int); 320 assert(!not_correct_buf_negative.isInvariant!int); 321 assert(!not_correct_buf_negative_2.isInvariant!int); 322 323 } 324 } 325 326 bool isCompleat(const(ubyte[]) data) pure nothrow @nogc { 327 import std.algorithm; 328 329 return data.countUntil!((b) => (b & 0x80) == 0) >= 0; 330 } 331 332 unittest { 333 assert(isCompleat([0x07])); 334 assert(!isCompleat([0x87])); 335 assert(isCompleat([0x87, 0x07])); 336 }