-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathtokenizer.js
125 lines (122 loc) · 4.18 KB
/
tokenizer.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
"use strict";
function tokenize(input) {
let tokenized = [];
let areWeInAString = false;
let areWeInAComment = false;
let currentLine = 1; // Don't care about columns, lines in assembly language
// are always short.
let currentToken = "";
for (let i = 0; i < input.length; i++) {
if (areWeInAComment && areWeInAString) {
alert(
"Tokenizer got into a forbidden state because of some bug in it! Line #" +
currentLine);
return [];
}
if (input[i] == ";" && !areWeInAString) {
areWeInAComment = true;
tokenized.push(new TreeNode(currentToken, currentLine));
tokenized.push(new TreeNode("\n", currentLine));
continue;
}
if (areWeInAComment && input[i] != "\n")
continue;
if (areWeInAComment && input[i] == "\n") {
areWeInAComment = false;
currentLine++;
currentToken = "";
continue;
}
if (input[i] == '"' && !areWeInAString) {
areWeInAString = true;
tokenized.push(new TreeNode(currentToken, currentLine));
currentToken = '"';
continue;
}
if (input[i] == "\n" && areWeInAString) {
alert("Unterminated string literal on line " + currentLine);
return [];
}
if (input[i] == '"') {
areWeInAString = false;
currentToken += '"';
tokenized.push(new TreeNode(currentToken, currentLine));
currentToken = "";
continue;
}
if (input[i] == "\n") {
tokenized.push(new TreeNode(currentToken, currentLine));
currentToken = "";
tokenized.push(new TreeNode(
"\n", currentLine++)); // Because assembly language is a
// whitespace-sensitive language, the new-line
// characters are tokens visible to the parser.
continue;
}
if (
(input[i] == " " || input[i] == "\t") &&
!areWeInAString // https://github.com/FlatAssembler/PicoBlaze_Simulator_in_JS/issues/5
) {
tokenized.push(new TreeNode(currentToken, currentLine));
currentToken = "";
continue;
}
if ((input[i] == "(" || input[i] == ")" || input[i] == "[" ||
input[i] == "]" || input[i] == "{" || input[i] == "}" ||
input[i] == "," || input[i] == "/" || input[i] == "*" ||
input[i] == "-" || input[i] == "+" || input[i] == "^" ||
input[i] == "<" || input[i] == ">" || input[i] == "=" ||
input[i] == "&" || input[i] == "|" || input[i] == "?" ||
input[i] == ':') &&
!areWeInAString) {
tokenized.push(new TreeNode(currentToken, currentLine));
tokenized.push(new TreeNode(input[i], currentLine));
currentToken = "";
continue;
}
if (input[i] == ":") {
tokenized.push(new TreeNode(currentToken + ":", currentLine));
currentToken = "";
continue;
}
currentToken += input[i];
}
if (currentToken.length) {
tokenized.push(new TreeNode(currentToken, currentLine));
tokenized.push(new TreeNode("\n", currentLine));
}
if (tokenized[tokenized.length - 1].text != "\n")
tokenized.push(new TreeNode("\n", currentLine));
for (let i = 0; i < tokenized.length; i++) {
if (!(tokenized[i] instanceof TreeNode)) {
alert("Internal compiler error in tokenizer, the token #" + i +
" is not of type TreeNode!");
return [];
}
if (tokenized[i].text == "") {
tokenized.splice(i, 1);
i--;
}
}
// Labels are single tokens.
for (let i = 0; i < tokenized.length; i++)
if (tokenized[i].text == ':' &&
(tokenized[i + 1].text == '\n' ||
(i < 2 // https://github.com/FlatAssembler/PicoBlaze_Simulator_in_JS/issues/32
||
tokenized[i - 2].text ==
'\n'))) { // https://github.com/FlatAssembler/PicoBlaze_Simulator_in_JS/issues/31
tokenized[i - 1].text += ':';
tokenized.splice(i, 1);
i--;
}
// Functions in the preprocessor.
for (let i = 0; i < tokenized.length - 1; i++)
if ([ "invertbits", "bitand", "bitor", "mod" ].includes(
tokenized[i].text.toLowerCase()) &&
tokenized[i + 1].text == '(') {
tokenized[i].text += "(";
tokenized.splice(i + 1, 1);
}
return tokenized;
}