1 module tagion.wasm.WastTokenizer;
2 
3 import std.traits;
4 import tagion.basic.Debug;
5 
6 enum Chars : char {
7     NUL = '\0',
8     SPACE = char(0x20),
9     DOUBLE_QUOTE = '"',
10     PARENTHESES_BEGIN = '(',
11     PARENTHESES_END = ')',
12     DEL = char(127),
13     NEWLINE = '\n',
14     SEMICOLON = ';',
15 }
16 
17 enum TokenType {
18     EOF,
19     BEGIN,
20     END,
21     COMMENT,
22     WORD,
23     STRING,
24 }
25 
26 @safe @nogc pure nothrow {
27     bool isWordChar(const char ch) {
28         with (Chars) {
29             return (ch > SPACE) && (ch < DEL) &&
30                 (ch != DOUBLE_QUOTE) && (ch != PARENTHESES_BEGIN) && (ch != PARENTHESES_END);
31         }
32     }
33 
34     bool isStringChar(const char ch) {
35         with (Chars) {
36             return (ch >= SPACE) && (ch < DEL) && (ch != DOUBLE_QUOTE);
37         }
38     }
39 
40     bool isInvisiable(const char ch) {
41         with (Chars) {
42             return (ch <= SPACE) || (ch == DEL);
43         }
44     }
45 
46     string stripQuotes(string text) {
47         if (text.length > 2) {
48             return text[text[0] == '"' .. $ - (text[$ - 1] == '"')];
49         }
50         return text;
51     }
52 }
53 
54 @safe
55 struct WastTokenizer {
56     string toString() const pure nothrow @trusted {
57         import std.exception : assumeWontThrow;
58         import std.format;
59 
60         return assumeWontThrow(format("%s:%s:%d:%d", token, type, line, line_pos));
61 
62     }
63 
64     void check(const bool flag, string msg = null, string file = __FILE__, const size_t code_line = __LINE__) nothrow {
65         import std.exception : assumeWontThrow;
66         import std.stdio;
67 
68         if (!flag) {
69             assumeWontThrow((() {
70                     writefln("Error:%s %s:%s:%d:%d", msg, token, type, line, line_pos);
71                     writefln("%s:%d", file, code_line);
72                 })());
73 
74         }
75     }
76 
77     T get(T)() nothrow if (isIntegral!T) {
78         import std.algorithm.comparison : min;
79         import std.conv;
80 
81         try {
82             enum hex_prefix = "0x";
83             if (token[0 .. min(hex_prefix.length, $)] == hex_prefix) {
84                 return cast(T)(token[hex_prefix.length .. $].to!(Unsigned!T)(16));
85             }
86             return token.to!T;
87         }
88         catch (Exception e) {
89             check(false, e.msg);
90         }
91         return T.init;
92     }
93 
94     T get(T)() nothrow if (isFloatingPoint!T) {
95         import std.format;
96 
97         try {
98             const spec = singleSpec("%f");
99             auto number = token;
100             return unformatValue!T(number, spec);
101         }
102         catch (Exception e) {
103             check(false, e.msg);
104         }
105         return T.init;
106 
107     }
108 
109     private string text;
110     string token;
111     uint line;
112     uint pos;
113     uint start_line_pos;
114     @nogc pure nothrow {
115         this(string text) {
116             line = 1;
117             this.text = text;
118             popFront;
119         }
120 
121         bool empty() const {
122             return pos >= text.length;
123         }
124 
125         const(WastTokenizer) front() const {
126             return this;
127         }
128 
129         char next() {
130             if (!empty) {
131                 scope (exit) {
132                     pos++;
133                 }
134 
135                 if (text[pos] == Chars.NEWLINE) {
136                     start_line_pos = pos + 1;
137                     line++;
138                 }
139                 return text[pos];
140             }
141             return Chars.NUL;
142         }
143 
144         void nextUntil(string fun, string paramName = "a")() {
145             import std.format;
146 
147             enum code = format(q{
148                 alias goUntil=(%1$s) => %2$s; 
149                 while(!empty && goUntil(text[pos])) {
150                 next;
151                   // empty
152                 }
153             }, paramName, fun);
154             mixin(code);
155         }
156 
157         uint line_pos() const {
158             return pos - start_line_pos;
159         }
160 
161         TokenType type() const {
162             if (empty) {
163                 return TokenType.EOF;
164             }
165             with (Chars) {
166                 switch (token[0]) {
167                 case NUL:
168                     return TokenType.EOF;
169                     case PARENTHESES_BEGIN:
170                     if (token.length > 1 && token[1] == SEMICOLON) {
171                         return TokenType.COMMENT;
172                     }
173                     return TokenType.BEGIN;
174                     case PARENTHESES_END:
175                     return TokenType.END;
176                     case SEMICOLON:
177                     return TokenType.COMMENT;
178                     case DOUBLE_QUOTE:
179                     return TokenType.STRING;
180                     default:
181                     return TokenType.WORD;
182                 }
183             }
184             assert(0);
185         }
186 
187         void popFront() {
188             trim;
189             const begin_pos = pos;
190             with (Chars) {
191                 switch (currentChar) {
192                 case PARENTHESES_BEGIN:
193                     next;
194                     if (!empty && text[pos] == SEMICOLON) {
195                         next;
196                         nextUntil!q{a != Chars.PARENTHESES_END};
197                         next;
198                     }
199                     break;
200                 case PARENTHESES_END:
201                     next;
202                     break;
203 
204                 case SEMICOLON:
205                     next;
206                     nextUntil!q{a == Chars.SEMICOLON};
207                     nextUntil!q{a != Chars.NEWLINE};
208                     next;
209                     break;
210 
211                 case DOUBLE_QUOTE:
212                     next;
213                     nextUntil!q{a != Chars.DOUBLE_QUOTE};
214                     next;
215                     break;
216                 default:
217                     nextUntil!q{a.isWordChar};
218                 }
219                 token = text[begin_pos .. pos];
220             }
221         }
222 
223         // Like popFront exception that it skips the Comment token
224         void nextToken() {
225             do {
226                 popFront;
227             }
228             while (type == TokenType.COMMENT);
229         }
230 
231         void trim() {
232             nextUntil!q{a.isInvisiable};
233             version (none)
234                 while (!empty && text[pos].isInvisiable) {
235                 if (text[pos] == Chars.NEWLINE) {
236                     start_line_pos = pos + 1;
237                     line++;
238                 }
239                 pos++;
240             }
241         }
242 
243         char currentChar() const {
244             if (!empty) {
245                 return text[pos];
246             }
247             return '\0';
248         }
249 
250         WastTokenizer save() {
251             return this;
252         }
253     }
254 }
255 
256 version (unittest) {
257     import std.file : readText;
258     import tagion.basic.basic : unitfile;
259 
260     immutable(string) wast_text;
261     shared static this() {
262         //        wast_text = "i32.wast".unitfile.readText;
263         //wast_text = "f32.wast".unitfile.readText;
264         //wast_text = "i64.wast".unitfile.readText;
265         // wast_text = "f64.wast".unitfile.readText;
266         //wast_text = "f32_cmp.wast".unitfile.readText;
267         //wast_text = "f64_cmp.wast".unitfile.readText;
268         //wast_text = "float_exprs.wast".unitfile.readText;
269         //wast_text = "unreachable.wast".unitfile.readText;
270         //wast_text = "float_literals.wast".unitfile.readText;
271         //wast_text = "float_memory.wast".unitfile.readText;
272         //wast_text = "float_misc.wast".unitfile.readText;
273         //wast_text = "conversions.wast".unitfile.readText;
274         //wast_text = "endianness.wast".unitfile.readText;
275         //wast_text = "traps.wast".unitfile.readText;
276         //wast_text = "runaway-recursion.wast".unitfile.readText;
277         //wast_text = "nan-propagation.wast".unitfile.readText;
278         // wast_text = "forward.wast".unitfile.readText;
279         //wast_text = "func_ptrs.wast".unitfile.readText;
280         //        wast_text = "functions.wast".unitfile.readText;
281         /// -- wast_text = "has_feature.wast".unitfile.readText;
282         //wast_text = "imports.wast".unitfile.readText;
283         //wast_text = "int_exprs.wast".unitfile.readText;
284         //wast_text = "int_literals.wast".unitfile.readText;
285         //wast_text = "labels.wast".unitfile.readText;
286         //        wast_text = "left-to-right.wast".unitfile.readText;
287         //wast_text = "memory_redundancy.wast".unitfile.readText;
288         //        wast_text = "memory_trap.wast".unitfile.readText;
289         //wast_text = "memory.wast".unitfile.readText;
290         //wast_text = "resizing.wast".unitfile.readText;
291         //wast_text = "select.wast".unitfile.readText;
292         //wast_text = "store_retval.wast".unitfile.readText;
293         wast_text = "switch.wast".unitfile.readText;
294     }
295 }
296 
297 version (WAST) @safe
298 unittest {
299     import std.stdio;
300     import tagion.basic.basic;
301 
302     //    writefln("Unitfile file %s", mangle!(WastParser)(""));
303     //writefln("Unitfile file %s", wast_text);
304     auto r = WastTokenizer(wast_text);
305     while (!r.empty) {
306         //        writefln("Token %s", r);
307         r.popFront;
308     }
309 }