1 module tagion.wasm.WastParser; 2 3 import core.exception : RangeError; 4 import std.algorithm; 5 import std.array; 6 import std.conv; 7 import std.exception : ifThrown; 8 import std.format; 9 import std.stdio; 10 import std.traits; 11 import tagion.basic.Debug; 12 import tagion.wasm.WasmBase; 13 import tagion.wasm.WasmException; 14 import tagion.wasm.WasmWriter; 15 import tagion.wasm.WastAssert; 16 import tagion.wasm.WastTokenizer; 17 18 @safe 19 struct WastParser { 20 WasmWriter writer; 21 SectionAssert wast_assert; 22 private void writeCustomAssert() { 23 if (wast_assert !is SectionAssert.init) { 24 auto _custom = new CustomType("assert", wast_assert.toDoc); 25 writer.mod[Section.CUSTOM].list[Section.DATA] ~= _custom; 26 } 27 } 28 29 alias WasmSection = WasmWriter.WasmSection; 30 this(WasmWriter writer) @nogc pure nothrow { 31 this.writer = writer; 32 } 33 34 alias GlobalDesc = WasmSection.ImportType.ImportDesc.GlobalDesc; 35 alias Global = WasmSection.Global; 36 alias Type = WasmSection.Type; 37 alias Function = WasmSection.Function; 38 alias Code = WasmSection.Code; 39 alias GlobalType = WasmSection.GlobalType; 40 alias FuncType = WasmSection.FuncType; 41 alias TypeIndex = WasmSection.TypeIndex; 42 alias CodeType = WasmSection.CodeType; 43 alias ExportType = WasmSection.ExportType; 44 alias CustomType = WasmSection.Custom; 45 46 enum ParserStage { 47 BASE, 48 COMMENT, 49 ASSERT, 50 MODULE, 51 TYPE, 52 FUNC, 53 PARAM, 54 RESULT, 55 FUNC_BODY, 56 CODE, 57 END_FUNC, 58 EXPORT, 59 IMPORT, 60 MEMORY, 61 EXPECTED, 62 END, 63 UNDEFINED, 64 } 65 66 struct Register { 67 int idx; 68 Types type; 69 } 70 71 private ParserStage parseInstr(ref WastTokenizer r, 72 const ParserStage stage, 73 ref CodeType code_type, 74 ref const(FuncType) func_type, 75 ref scope int[string] params) { 76 import std.outbuffer; 77 import tagion.wasm.WasmExpr; 78 79 immutable number_of_func_arguments = func_type.params.length; 80 scope immutable(Types)[] locals = func_type.params; 81 auto bout = new OutBuffer; 82 auto wasmexpr = WasmExpr(bout); 83 int getLocal(string text) @trusted { 84 int result = params[r.token].ifThrown!RangeError(int(-1)); 85 if (result < 0) { 86 result = r.token 87 .to!int 88 .ifThrown!ConvException(-1); 89 r.check(result >= 0); 90 } 91 return result; 92 } 93 94 int getFuncIdx() @trusted { 95 int innerFunc(string text) { 96 int result = func_idx[text].ifThrown!RangeError(int(-1)); 97 if (result < 0) { 98 result = text.to!int 99 .ifThrown!ConvException(-1); 100 r.check(result >= 0, format("Invalid function %s name or index", text)); 101 } 102 return result; 103 } 104 105 switch (r.type) { 106 case TokenType.WORD: 107 return innerFunc(r.token); 108 case TokenType.STRING: 109 if (writer.mod[Section.EXPORT]!is null) { 110 auto export_found = writer.mod[Section.EXPORT].sectypes 111 .find!(exp => exp.name == r.token.stripQuotes); 112 if (!export_found.empty) { 113 return export_found.front.idx; 114 } 115 } 116 117 break; 118 default: 119 // empty 120 } 121 r.check(0, format("Export %s is not defined", r.token)); 122 123 return -1; 124 } 125 // writefln("%s %s", __FUNCTION__, params.dup); 126 ParserStage innerInstr(ref WastTokenizer r, const ParserStage) { 127 r.check(r.type == TokenType.BEGIN); 128 scope (exit) { 129 r.check(r.type == TokenType.END); 130 r.nextToken; 131 } 132 r.nextToken; 133 r.check(r.type == TokenType.WORD); 134 const instr = instrWastLookup.get(r.token, Instr.init); 135 string label; 136 if (instr !is Instr.init) { 137 with (IRType) { 138 final switch (instr.irtype) { 139 case CODE: 140 r.nextToken; 141 foreach (i; 0 .. instr.pops) { 142 innerInstr(r, ParserStage.CODE); 143 } 144 wasmexpr(irLookupTable[instr.name]); 145 break; 146 case BLOCK: 147 string arg; 148 r.nextToken; 149 if (r.type == TokenType.WORD) { 150 //r.check(r.type == TokenType.WORD); 151 label = r.token; 152 r.nextToken; 153 } 154 if (r.type == TokenType.WORD) { 155 arg = r.token; 156 r.nextToken; 157 } 158 while (r.type == TokenType.BEGIN) { 159 innerInstr(r, ParserStage.CODE); 160 } 161 return stage; 162 case BRANCH: 163 r.nextToken; 164 if (r.type == TokenType.WORD) { 165 label = r.token; 166 r.nextToken; 167 } 168 while (r.type == TokenType.BEGIN) { 169 innerInstr(r, ParserStage.CODE); 170 } 171 break; 172 case BRANCH_IF: 173 r.nextToken; 174 innerInstr(r, ParserStage.CODE); 175 r.check(r.type == TokenType.WORD); 176 label = r.token; 177 r.nextToken; 178 if (r.type == TokenType.BEGIN) { 179 innerInstr(r, ParserStage.CODE); 180 } 181 break; 182 case BRANCH_TABLE: 183 break; 184 case CALL: 185 r.nextToken; 186 const idx = getFuncIdx(); 187 //writefln("CALL %s %d", r.token, idx); 188 label = r.token; 189 r.nextToken; 190 while (r.type == TokenType.BEGIN) { 191 innerInstr(r, ParserStage.CODE); 192 } 193 wasmexpr(IR.CALL, idx); 194 break; 195 case CALL_INDIRECT: 196 break; 197 case LOCAL: 198 r.nextToken; 199 label = r.token; 200 r.check(r.type == TokenType.WORD); 201 const local_idx = getLocal(r.token); 202 wasmexpr(irLookupTable[instr.name], local_idx); 203 r.nextToken; 204 foreach (i; 0 .. instr.pops) { 205 innerInstr(r, ParserStage.CODE); 206 } 207 break; 208 case GLOBAL: 209 r.nextToken; 210 label = r.token; 211 r.check(r.type == TokenType.WORD); 212 r.nextToken; 213 break; 214 case MEMORY: 215 216 r.nextToken; 217 for (uint i = 0; (i < 2) && (r.type == TokenType.WORD); i++) { 218 label = r.token; // Fix this later 219 r.nextToken; 220 } 221 foreach (i; 0 .. instr.pops) { 222 innerInstr(r, ParserStage.CODE); 223 } 224 break; 225 case MEMOP: 226 r.nextToken; 227 foreach (i; 0 .. instr.pops) { 228 innerInstr(r, ParserStage.CODE); 229 } 230 break; 231 case CONST: 232 r.nextToken; 233 r.check(r.type == TokenType.WORD); 234 const ir = irLookupTable[instr.name]; 235 with (IR) switch (ir) { 236 case I32_CONST: 237 wasmexpr(ir, r.get!int); 238 break; 239 case I64_CONST: 240 wasmexpr(ir, r.get!long); 241 break; 242 case F32_CONST: 243 wasmexpr(ir, r.get!float); 244 break; 245 case F64_CONST: 246 wasmexpr(ir, r.get!double); 247 break; 248 default: 249 r.check(0, "Bad const instruction"); 250 } 251 //label = r.token; 252 r.nextToken; 253 break; 254 case END: 255 break; 256 case PREFIX: 257 break; 258 case ILLEGAL: 259 throw new WasmException("Undefined instruction %s", r.token); 260 break; 261 case SYMBOL: 262 r.nextToken; 263 string[] labels; 264 for (uint i = 0; (instr.push == uint.max) ? r.type == TokenType.WORD : i < instr.push; i++) { 265 labels ~= r.token; 266 r.nextToken; 267 } 268 for (uint i = 0; (instr.pops == uint.max) ? r.type == TokenType.BEGIN : i < instr.pops; i++) { 269 innerInstr(r, ParserStage.CODE); 270 271 } 272 switch (instr.wast) { 273 case PseudoWastInstr.local: 274 r.check(labels.length >= 1); 275 if ((labels.length == 2) && (labels[1].getType !is Types.EMPTY)) { 276 params[labels[0]] = cast(int) locals.length; 277 locals ~= labels[1].getType; 278 break; 279 } 280 locals ~= labels.map!(l => l.getType).array; 281 break; 282 default: 283 284 } 285 } 286 } 287 288 } 289 else { 290 r.check(false); 291 } 292 return stage; 293 } 294 295 scope (exit) { 296 code_type = CodeType(locals[number_of_func_arguments .. $], wasmexpr.serialize); 297 } 298 return innerInstr(r, stage); 299 } 300 301 private ParserStage parseModule(ref WastTokenizer r, const ParserStage stage) { 302 if (r.type == TokenType.COMMENT) { 303 r.nextToken; 304 } 305 if (r.type == TokenType.BEGIN) { 306 string label; 307 string arg; 308 r.nextToken; 309 bool not_ended; 310 scope (exit) { 311 r.check(r.type == TokenType.END || not_ended); 312 r.nextToken; 313 } 314 switch (r.token) { 315 case "module": 316 r.check(stage < ParserStage.MODULE); 317 r.nextToken; 318 while (r.type == TokenType.BEGIN) { 319 parseModule(r, ParserStage.MODULE); 320 321 } 322 return ParserStage.MODULE; 323 case "type": 324 r.nextToken; 325 if (r.type == TokenType.WORD) { 326 label = r.token; 327 r.nextToken; 328 } 329 parseModule(r, ParserStage.TYPE); 330 return stage; 331 case "func": // Example (func $name (param ...) (result i32) ) 332 333 return parseTypeSection(r, stage); 334 case "param": // Example (param $y i32) 335 r.nextToken; 336 if (stage == ParserStage.IMPORT) { 337 Types[] wasm_types; 338 while (r.token.getType !is Types.EMPTY) { 339 wasm_types ~= r.token.getType; 340 r.nextToken; 341 } 342 } 343 else { 344 r.check(stage == ParserStage.FUNC); 345 346 if (r.type == TokenType.WORD && r.token.getType is Types.EMPTY) { 347 label = r.token; 348 r.nextToken; 349 350 r.check(r.type == TokenType.WORD); 351 } 352 while (r.type == TokenType.WORD && r.token.getType !is Types.EMPTY) { 353 arg = r.token; 354 r.nextToken; 355 } 356 } 357 return ParserStage.PARAM; 358 case "result": 359 r.check(stage == ParserStage.FUNC); 360 r.nextToken; 361 r.check(r.type == TokenType.WORD); 362 arg = r.token; 363 r.nextToken; 364 return ParserStage.RESULT; 365 case "memory": 366 r.check(stage == ParserStage.MODULE); 367 r.nextToken; 368 r.check(r.type == TokenType.WORD); 369 label = r.token; 370 371 r.nextToken; 372 if (r.type == TokenType.WORD) { 373 arg = r.token; 374 r.nextToken; 375 } 376 while (r.type == TokenType.BEGIN) { 377 parseModule(r, ParserStage.MEMORY); 378 } 379 return ParserStage.MEMORY; 380 case "segment": 381 r.nextToken; 382 r.check(r.type == TokenType.WORD); 383 label = r.token; 384 r.nextToken; 385 r.check(r.type == TokenType.STRING); 386 arg = r.token; 387 r.nextToken; 388 break; 389 case "export": 390 ExportType export_type; 391 scope (exit) { 392 writer.section!(Section.EXPORT).sectypes ~= export_type; 393 } 394 r.check(stage == ParserStage.MODULE); 395 396 r.nextToken; 397 r.check(r.type == TokenType.STRING); 398 export_type.name = r.token.stripQuotes; 399 r.nextToken; 400 r.check(r.type == TokenType.WORD); 401 export_type.desc = IndexType.FUNC; 402 export_type.idx = func_idx.get(r.token, -1); 403 r.check(export_type.idx >= 0); 404 405 r.nextToken; 406 return ParserStage.EXPORT; 407 case "import": 408 string arg2; 409 r.nextToken; 410 r.check(r.type == TokenType.WORD); 411 label = r.token; 412 r.nextToken; 413 r.check(r.type == TokenType.STRING); 414 arg = r.token; 415 r.nextToken; 416 r.check(r.type == TokenType.STRING); 417 arg2 = r.token; 418 r.nextToken; 419 FuncType func_type; 420 scope int[string] params; 421 const ret = parseFuncArgs(r, ParserStage.IMPORT, func_type, params); 422 r.check(ret == ParserStage.TYPE || ret == ParserStage.PARAM); 423 424 return stage; 425 case "assert_return": 426 case "assert_return_nan": 427 r.check(stage == ParserStage.BASE); 428 Assert assert_type; 429 assert_type.method = Assert.Method.Return; 430 assert_type.name = r.token; 431 r.nextToken; 432 FuncType func_type; 433 CodeType code_invoke; 434 CodeType code_result; 435 scope int[string] params; 436 // Invoke call 437 parseInstr(r, ParserStage.ASSERT, code_invoke, func_type, params); 438 if (r.type == TokenType.BEGIN) { 439 parseInstr(r, ParserStage.EXPECTED, code_result, func_type, params); 440 } 441 assert_type.invoke = code_invoke.serialize; 442 assert_type.result = code_result.serialize; 443 wast_assert.asserts ~= assert_type; 444 return ParserStage.ASSERT; 445 case "assert_trap": 446 r.check(stage == ParserStage.BASE); 447 Assert assert_type; 448 assert_type.method = Assert.Method.Trap; 449 assert_type.name = r.token; 450 label = r.token; 451 r.nextToken; 452 FuncType func_type; 453 CodeType code_invoke; 454 scope int[string] params; 455 // Invoke call 456 parseInstr(r, ParserStage.ASSERT, code_invoke, func_type, params); 457 assert_type.invoke = code_invoke.serialize; 458 459 r.check(r.type == TokenType.STRING); 460 assert_type.message = r.token; 461 wast_assert.asserts ~= assert_type; 462 r.nextToken; 463 return ParserStage.ASSERT; 464 case "assert_invalid": 465 r.check(stage == ParserStage.BASE); 466 r.nextToken; 467 parseModule(r, ParserStage.ASSERT); 468 r.check(r.type == TokenType.STRING); 469 arg = r.token; 470 r.nextToken; 471 return ParserStage.ASSERT; 472 default: 473 if (r.type == TokenType.COMMENT) { 474 r.nextToken; 475 return ParserStage.COMMENT; 476 } 477 not_ended = true; 478 r.nextToken; 479 return ParserStage.UNDEFINED; 480 } 481 } 482 if (r.type == TokenType.COMMENT) { 483 r.nextToken; 484 } 485 return ParserStage.END; 486 } 487 488 private ParserStage parseFuncArgs( 489 ref WastTokenizer r, 490 const ParserStage stage, 491 ref FuncType func_type, 492 ref scope int[string] params) { 493 if (r.type == TokenType.BEGIN) { 494 //string label; 495 string arg; 496 r.nextToken; 497 bool not_ended; 498 scope (exit) { 499 r.check(r.type == TokenType.END || not_ended); 500 r.nextToken; 501 } 502 switch (r.token) { 503 case "type": 504 r.nextToken; 505 r.check(r.type == TokenType.WORD); 506 //label = r.token; 507 r.nextToken; 508 return ParserStage.TYPE; 509 case "param": // Example (param $y i32) 510 r.nextToken; 511 if (stage == ParserStage.IMPORT) { 512 while (r.token.getType !is Types.EMPTY) { 513 func_type.params ~= r.token.getType; 514 r.nextToken; 515 } 516 } 517 else { 518 r.check(stage == ParserStage.FUNC); 519 520 if (r.type == TokenType.WORD && r.token.getType is Types.EMPTY) { 521 const label = r.token; 522 r.nextToken; 523 524 r.check(r.type == TokenType.WORD); 525 params[label] = cast(int) func_type.params.length; 526 func_type.params ~= r.token.getType; 527 r.check(r.token.getType !is Types.EMPTY); 528 r.nextToken; 529 } 530 while (r.type == TokenType.WORD && r.token.getType !is Types.EMPTY) { 531 func_type.params ~= r.token.getType; 532 //arg = r.token; 533 r.nextToken; 534 } 535 } 536 return ParserStage.PARAM; 537 case "result": 538 r.check(stage == ParserStage.FUNC); 539 r.nextToken; 540 r.check(r.type == TokenType.WORD); 541 542 //arg = r.token; 543 func_type.results = [r.token.getType]; 544 r.check(r.token.getType !is Types.EMPTY); 545 r.nextToken; 546 return ParserStage.RESULT; 547 default: 548 not_ended = true; 549 r.nextToken; 550 return ParserStage.UNDEFINED; 551 } 552 } 553 return ParserStage.UNDEFINED; 554 } 555 556 private ParserStage parseTypeSection(ref WastTokenizer r, const ParserStage stage) { 557 CodeType code_type; 558 //writeln("Function code"); 559 scope (exit) { 560 const type_index = cast(uint) writer.section!(Section.CODE).sectypes.length; 561 writer.section!(Section.FUNCTION).sectypes ~= TypeIndex(type_index); 562 writer.section!(Section.CODE).sectypes ~= code_type; 563 //writefln("%s code.length=%s %s", Section.CODE, code_type.expr.length, writer.section!(Section.CODE).sectypes.length); 564 } 565 566 r.check(stage < ParserStage.FUNC); 567 auto type_section = writer.section!(Section.TYPE); 568 569 const type_idx = cast(int) type_section.sectypes.length; 570 FuncType func_type; 571 func_type.type = Types.FUNC; 572 scope int[string] params; 573 //scope Types[] locals; 574 scope (exit) { 575 type_section.sectypes ~= func_type; 576 } 577 578 r.nextToken; 579 if (r.type == TokenType.WORD) { 580 func_idx[r.token] = type_idx; 581 r.nextToken; 582 } 583 ParserStage arg_stage; 584 WastTokenizer rewined; 585 uint only_one_type_allowed; 586 do { 587 rewined = r.save; 588 arg_stage = parseFuncArgs(r, ParserStage.FUNC, func_type, params); 589 590 only_one_type_allowed += (only_one_type_allowed > 0) || (arg_stage == ParserStage.TYPE); 591 } 592 while ((arg_stage == ParserStage.PARAM) || (only_one_type_allowed == 1)); 593 //auto result_r=r.save; 594 if (arg_stage != ParserStage.TYPE && arg_stage != ParserStage.RESULT || 595 arg_stage == ParserStage.UNDEFINED) { 596 r = rewined; 597 } 598 while (r.type == TokenType.BEGIN) { 599 const ret = parseInstr(r, ParserStage.FUNC_BODY, code_type, func_type, params); 600 r.check(ret == ParserStage.FUNC_BODY); 601 } 602 return ParserStage.FUNC; 603 } 604 605 private { 606 int[string] func_idx; 607 } 608 void parse(ref WastTokenizer tokenizer) { 609 while (parseModule(tokenizer, ParserStage.BASE) !is ParserStage.END) { 610 //empty 611 } 612 writeCustomAssert; 613 } 614 615 } 616 617 version (WAST) @safe 618 unittest { 619 import std.file : readText; 620 import std.stdio; 621 import tagion.basic.basic : unitfile; 622 623 immutable wast_test_files = [ 624 "i32.wast", 625 /* 626 "f32.wast", 627 "i64.wast", 628 "f64.wast", 629 "f32_cmp.wast", 630 "f64_cmp.wast", 631 "float_exprs.wast", 632 "unreachable.wast", 633 "float_literals.wast", 634 "float_memory.wast", 635 "float_misc.wast", 636 "conversions.wast", 637 "endianness.wast", 638 "traps.wast", 639 "runaway-recursion.wast", 640 "nan-propagation.wast", 641 "forward.wast", 642 "func_ptrs.wast", 643 "functions.wast", 644 // "has_feature.wast", 645 "imports.wast", 646 "int_exprs.wast", 647 "int_literals.wast", 648 "labels.wast", 649 "left-to-right.wast", 650 "memory_redundancy.wast", 651 "memory_trap.wast", 652 "memory.wast", 653 "resizing.wast", 654 "select.wast", 655 "store_retval.wast", 656 "switch.wast", 657 */ 658 ]; 659 version (none) immutable wast_test_files = [ 660 "unreachable.wast", 661 "float_literals.wast", 662 "float_memory.wast", 663 "float_misc.wast", 664 "conversions.wast", 665 "endianness.wast", 666 "traps.wast", 667 "runaway-recursion.wast", 668 "nan-propagation.wast", 669 "forward.wast", 670 "func_ptrs.wast", 671 "functions.wast", 672 "has_feature.wast", 673 "imports.wast", 674 "int_exprs.wast", 675 "int_literals.wast", 676 "labels.wast", 677 "left-to-right.wast", 678 "memory_redundancy.wast", 679 "memory_trap.wast", 680 "memory.wast", 681 "resizing.wast", 682 "select.wast", 683 "store_retval.wast", 684 "switch.wast", 685 ]; 686 import std.file : fwrite = write; 687 688 foreach (wast_file; wast_test_files) { 689 immutable wast_text = wast_file.unitfile.readText; 690 auto tokenizer = WastTokenizer(wast_text); 691 auto writer = new WasmWriter; 692 auto wast_parser = WastParser(writer); 693 wast_parser.parse(tokenizer); 694 if (wast_file == "i32.wast") { 695 "/tmp/i32.wasm".fwrite(writer.serialize); 696 } 697 } 698 699 }