1 module tagion.wasm.WastTokenizer; 2 3 import std.traits; 4 import tagion.basic.Debug; 5 6 enum Chars : char { 7 NUL = '\0', 8 SPACE = char(0x20), 9 DOUBLE_QUOTE = '"', 10 PARENTHESES_BEGIN = '(', 11 PARENTHESES_END = ')', 12 DEL = char(127), 13 NEWLINE = '\n', 14 SEMICOLON = ';', 15 } 16 17 enum TokenType { 18 EOF, 19 BEGIN, 20 END, 21 COMMENT, 22 WORD, 23 STRING, 24 } 25 26 @safe @nogc pure nothrow { 27 bool isWordChar(const char ch) { 28 with (Chars) { 29 return (ch > SPACE) && (ch < DEL) && 30 (ch != DOUBLE_QUOTE) && (ch != PARENTHESES_BEGIN) && (ch != PARENTHESES_END); 31 } 32 } 33 34 bool isStringChar(const char ch) { 35 with (Chars) { 36 return (ch >= SPACE) && (ch < DEL) && (ch != DOUBLE_QUOTE); 37 } 38 } 39 40 bool isInvisiable(const char ch) { 41 with (Chars) { 42 return (ch <= SPACE) || (ch == DEL); 43 } 44 } 45 46 string stripQuotes(string text) { 47 if (text.length > 2) { 48 return text[text[0] == '"' .. $ - (text[$ - 1] == '"')]; 49 } 50 return text; 51 } 52 } 53 54 @safe 55 struct WastTokenizer { 56 string toString() const pure nothrow @trusted { 57 import std.exception : assumeWontThrow; 58 import std.format; 59 60 return assumeWontThrow(format("%s:%s:%d:%d", token, type, line, line_pos)); 61 62 } 63 64 void check(const bool flag, string msg = null, string file = __FILE__, const size_t code_line = __LINE__) nothrow { 65 import std.exception : assumeWontThrow; 66 import std.stdio; 67 68 if (!flag) { 69 assumeWontThrow((() { 70 writefln("Error:%s %s:%s:%d:%d", msg, token, type, line, line_pos); 71 writefln("%s:%d", file, code_line); 72 })()); 73 74 } 75 } 76 77 T get(T)() nothrow if (isIntegral!T) { 78 import std.algorithm.comparison : min; 79 import std.conv; 80 81 try { 82 enum hex_prefix = "0x"; 83 if (token[0 .. min(hex_prefix.length, $)] == hex_prefix) { 84 return cast(T)(token[hex_prefix.length .. $].to!(Unsigned!T)(16)); 85 } 86 return token.to!T; 87 } 88 catch (Exception e) { 89 check(false, e.msg); 90 } 91 return T.init; 92 } 93 94 T get(T)() nothrow if (isFloatingPoint!T) { 95 import std.format; 96 97 try { 98 const spec = singleSpec("%f"); 99 auto number = token; 100 return unformatValue!T(number, spec); 101 } 102 catch (Exception e) { 103 check(false, e.msg); 104 } 105 return T.init; 106 107 } 108 109 private string text; 110 string token; 111 uint line; 112 uint pos; 113 uint start_line_pos; 114 @nogc pure nothrow { 115 this(string text) { 116 line = 1; 117 this.text = text; 118 popFront; 119 } 120 121 bool empty() const { 122 return pos >= text.length; 123 } 124 125 const(WastTokenizer) front() const { 126 return this; 127 } 128 129 char next() { 130 if (!empty) { 131 scope (exit) { 132 pos++; 133 } 134 135 if (text[pos] == Chars.NEWLINE) { 136 start_line_pos = pos + 1; 137 line++; 138 } 139 return text[pos]; 140 } 141 return Chars.NUL; 142 } 143 144 void nextUntil(string fun, string paramName = "a")() { 145 import std.format; 146 147 enum code = format(q{ 148 alias goUntil=(%1$s) => %2$s; 149 while(!empty && goUntil(text[pos])) { 150 next; 151 // empty 152 } 153 }, paramName, fun); 154 mixin(code); 155 } 156 157 uint line_pos() const { 158 return pos - start_line_pos; 159 } 160 161 TokenType type() const { 162 if (empty) { 163 return TokenType.EOF; 164 } 165 with (Chars) { 166 switch (token[0]) { 167 case NUL: 168 return TokenType.EOF; 169 case PARENTHESES_BEGIN: 170 if (token.length > 1 && token[1] == SEMICOLON) { 171 return TokenType.COMMENT; 172 } 173 return TokenType.BEGIN; 174 case PARENTHESES_END: 175 return TokenType.END; 176 case SEMICOLON: 177 return TokenType.COMMENT; 178 case DOUBLE_QUOTE: 179 return TokenType.STRING; 180 default: 181 return TokenType.WORD; 182 } 183 } 184 assert(0); 185 } 186 187 void popFront() { 188 trim; 189 const begin_pos = pos; 190 with (Chars) { 191 switch (currentChar) { 192 case PARENTHESES_BEGIN: 193 next; 194 if (!empty && text[pos] == SEMICOLON) { 195 next; 196 nextUntil!q{a != Chars.PARENTHESES_END}; 197 next; 198 } 199 break; 200 case PARENTHESES_END: 201 next; 202 break; 203 204 case SEMICOLON: 205 next; 206 nextUntil!q{a == Chars.SEMICOLON}; 207 nextUntil!q{a != Chars.NEWLINE}; 208 next; 209 break; 210 211 case DOUBLE_QUOTE: 212 next; 213 nextUntil!q{a != Chars.DOUBLE_QUOTE}; 214 next; 215 break; 216 default: 217 nextUntil!q{a.isWordChar}; 218 } 219 token = text[begin_pos .. pos]; 220 } 221 } 222 223 // Like popFront exception that it skips the Comment token 224 void nextToken() { 225 do { 226 popFront; 227 } 228 while (type == TokenType.COMMENT); 229 } 230 231 void trim() { 232 nextUntil!q{a.isInvisiable}; 233 version (none) 234 while (!empty && text[pos].isInvisiable) { 235 if (text[pos] == Chars.NEWLINE) { 236 start_line_pos = pos + 1; 237 line++; 238 } 239 pos++; 240 } 241 } 242 243 char currentChar() const { 244 if (!empty) { 245 return text[pos]; 246 } 247 return '\0'; 248 } 249 250 WastTokenizer save() { 251 return this; 252 } 253 } 254 } 255 256 version (unittest) { 257 import std.file : readText; 258 import tagion.basic.basic : unitfile; 259 260 immutable(string) wast_text; 261 shared static this() { 262 // wast_text = "i32.wast".unitfile.readText; 263 //wast_text = "f32.wast".unitfile.readText; 264 //wast_text = "i64.wast".unitfile.readText; 265 // wast_text = "f64.wast".unitfile.readText; 266 //wast_text = "f32_cmp.wast".unitfile.readText; 267 //wast_text = "f64_cmp.wast".unitfile.readText; 268 //wast_text = "float_exprs.wast".unitfile.readText; 269 //wast_text = "unreachable.wast".unitfile.readText; 270 //wast_text = "float_literals.wast".unitfile.readText; 271 //wast_text = "float_memory.wast".unitfile.readText; 272 //wast_text = "float_misc.wast".unitfile.readText; 273 //wast_text = "conversions.wast".unitfile.readText; 274 //wast_text = "endianness.wast".unitfile.readText; 275 //wast_text = "traps.wast".unitfile.readText; 276 //wast_text = "runaway-recursion.wast".unitfile.readText; 277 //wast_text = "nan-propagation.wast".unitfile.readText; 278 // wast_text = "forward.wast".unitfile.readText; 279 //wast_text = "func_ptrs.wast".unitfile.readText; 280 // wast_text = "functions.wast".unitfile.readText; 281 /// -- wast_text = "has_feature.wast".unitfile.readText; 282 //wast_text = "imports.wast".unitfile.readText; 283 //wast_text = "int_exprs.wast".unitfile.readText; 284 //wast_text = "int_literals.wast".unitfile.readText; 285 //wast_text = "labels.wast".unitfile.readText; 286 // wast_text = "left-to-right.wast".unitfile.readText; 287 //wast_text = "memory_redundancy.wast".unitfile.readText; 288 // wast_text = "memory_trap.wast".unitfile.readText; 289 //wast_text = "memory.wast".unitfile.readText; 290 //wast_text = "resizing.wast".unitfile.readText; 291 //wast_text = "select.wast".unitfile.readText; 292 //wast_text = "store_retval.wast".unitfile.readText; 293 wast_text = "switch.wast".unitfile.readText; 294 } 295 } 296 297 version (WAST) @safe 298 unittest { 299 import std.stdio; 300 import tagion.basic.basic; 301 302 // writefln("Unitfile file %s", mangle!(WastParser)("")); 303 //writefln("Unitfile file %s", wast_text); 304 auto r = WastTokenizer(wast_text); 305 while (!r.empty) { 306 // writefln("Token %s", r); 307 r.popFront; 308 } 309 }