00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "ModelState.H"
00030 #include "Lex.H"
00031 #include "Location.H"
00032 #include "Expr.H"
00033 #include "Val.H"
00034 #include "Err.H"
00035 #include <Table.H>
00036 #include <iostream>
00037
00038 using std::istream;
00039
00040
00041 SrcLoc *noLoc = NEW(SrcLoc);
00042
00043
00044 istream *lexIn;
00045 char lookAhead1, lookAhead2;
00046 int lookaheads;
00047 int lineNumber, charNumber;
00048
00049 static TokenClass CharMap[300];
00050 static Table<Text,TokenClass>::Default ReservedWords(64);
00051 static SrcLoc *currentLoc = NEW(SrcLoc);
00052
00053 Token::Token()
00054 : bytesLength(128), length(0), loc(noLoc) {
00055 bytes = NEW_PTRFREE_ARRAY(char, 128);
00056 bytes[length] = 0;
00057 }
00058
00059 void Token::AppendChar(char c) {
00060 bytes[length++] = c;
00061 if (length == bytesLength) {
00062 bytesLength = bytesLength * 2;
00063 char *newBytes = NEW_PTRFREE_ARRAY(char, bytesLength);
00064 memcpy(newBytes, bytes, length);
00065 delete[] bytes;
00066 bytes = newBytes;
00067 }
00068 }
00069
00070 void Token::TokenAssign(Token& tk) {
00071 this->tclass = tk.tclass;
00072 this->expr = tk.expr;
00073 this->loc = tk.loc;
00074 char* tempBytes = this->bytes;
00075 int tempLength = this->length, tempBytesLength = this->bytesLength;
00076 this->bytes = tk.bytes;
00077 this->length = tk.length;
00078 this->bytesLength = tk.bytesLength;
00079 tk.bytes = tempBytes;
00080 tk.length = this->length;
00081 tk.bytesLength = tempBytesLength;
00082 }
00083
00084 char GetChar() {
00085 char res;
00086 switch (lookaheads) {
00087 case 0:
00088 res = (char)lexIn->get();
00089 if (res == '\n') {
00090 lineNumber++;
00091 charNumber = 0;
00092 }
00093 else
00094 charNumber++;
00095 break;
00096 case 1:
00097 res = lookAhead1;
00098 lookaheads--;
00099 break;
00100 case 2:
00101 res = lookAhead1;
00102 lookAhead1 = lookAhead2;
00103 lookaheads--;
00104 break;
00105 default:
00106 outputMu.lock();
00107 InternalError("GetChar");
00108 outputMu.unlock();
00109 }
00110 return res;
00111 }
00112
00113 inline static void UngetChar(char c) {
00114 switch (lookaheads) {
00115 case 0:
00116 lookAhead1 = c;
00117 lookaheads++;
00118 break;
00119 case 1:
00120 lookAhead2 = lookAhead1;
00121 lookAhead1 = c;
00122 lookaheads++;
00123 break;
00124 default:
00125 outputMu.lock();
00126 InternalError("UngetChar");
00127 outputMu.unlock();
00128 }
00129 }
00130
00131 static char SkipWhitespace() {
00132 while (true) {
00133 char c = GetChar();
00134 switch (c) {
00135 case ' ': case '\t': case '\f': case '\n': case '\r':
00136 break;
00137 default:
00138 return c;
00139 }
00140 }
00141 }
00142
00143
00144 void Token::ScanComment(char c) {
00145 bool done = false;
00146
00147
00148 if (c == '*') {
00149 while (!done) {
00150 c = GetChar();
00151 switch (c) {
00152 case '/':
00153 c = GetChar();
00154 if (c == '*') {
00155 SrcLoc loc(lineNumber, charNumber, currentLoc->file, currentLoc->shortId);
00156 outputMu.lock();
00157 Error("Nested /* comment.\n", &loc);
00158 outputMu.unlock();
00159 throw "\nParsing terminated.\n";
00160 }
00161 else
00162 UngetChar(c);
00163 break;
00164 case '*':
00165 c = GetChar();
00166 if (c == '/')
00167 done = true;
00168 else
00169 UngetChar(c);
00170 break;
00171 case ((char) EOF):
00172 {
00173 SrcLoc loc(lineNumber, charNumber, currentLoc->file, currentLoc->shortId);
00174 outputMu.lock();
00175 Error("Unterminated comment. EOF in ScanComment.\n", &loc);
00176 outputMu.unlock();
00177 throw "\nParsing terminated.\n";
00178 }
00179 default:
00180 break;
00181 }
00182 }
00183 }
00184 else {
00185 while (c != '\n' && c != ((char) EOF))
00186 c = GetChar();
00187 }
00188 }
00189
00190
00191 bool Token::ScanPragma() {
00192 bool done = false;
00193 char c = SkipWhitespace();
00194
00195 while (!done) {
00196 switch (c) {
00197 case '*':
00198 {
00199 char c1 = GetChar();
00200 char c2 = GetChar();
00201
00202 if ((c1 == '*') && (c2 == '/'))
00203 done = true;
00204
00205 else if(c1 == '/')
00206 {
00207
00208 StartBytes();
00209
00210 UngetChar(c2);
00211
00212 return false;
00213 }
00214 else {
00215 AppendChar(c);
00216 c = c1;
00217 UngetChar(c2);
00218 }
00219 break;
00220 }
00221 case ((char) EOF):
00222 outputMu.lock();
00223 Error("Unterminated pragma.\n", currentLoc);
00224 outputMu.unlock();
00225 throw "\nParsing terminated.\n";
00226 default:
00227 AppendChar(c);
00228 c = GetChar();
00229 break;
00230 }
00231 }
00232
00233 int index = length - 1;
00234 while (index >= 0) {
00235 switch (bytes[index]) {
00236 case ' ': case '\t': case '\f': case '\n':
00237 index--;
00238 break;
00239 default:
00240 length = index + 1;
00241 index = -1;
00242 break;
00243 }
00244 }
00245 EndBytes();
00246 this->tclass = TkPragma;
00247 this->loc = currentLoc->Copy();
00248
00249
00250 return true;
00251 }
00252
00253 void Token::ScanIdNumber(char c) {
00254 AppendChar(c);
00255 while (true) {
00256 c = GetChar();
00257 if ((c >= 'a') && (c <= 'z') ||
00258 (c >= 'A') && (c <= 'Z') ||
00259 (c >= '0') && (c <= '9') ||
00260 (c == '_') ||
00261 (c == '.'))
00262 AppendChar(c);
00263 else {
00264 UngetChar(c);
00265 break;
00266 }
00267 }
00268 EndBytes();
00269 this->loc = currentLoc->Copy();
00270 Text id(Bytes());
00271 if (!ReservedWords.Get(id, tclass)) {
00272
00273 Basics::int32 n = 0;
00274 unsigned int idx = 0, base = 10;
00275 if (id[0] == '0') {
00276 if (id[1] == 'x' || id[1] == 'X') {
00277 idx = 2;
00278 base = 16;
00279 } else {
00280 idx = 1;
00281 base = 8;
00282 }
00283 }
00284 bool isNumber = true;
00285 for (int i = idx; i < id.Length(); i++) {
00286 char ch = id[i];
00287 switch (ch) {
00288 case '0': case '1': case '2': case '3': case '4': case '5':
00289 case '6': case '7':
00290 n = base * n + (ch - '0');
00291 break;
00292 case '8': case '9':
00293 if (base == 8)
00294 isNumber = false;
00295 else
00296 n = base * n + (ch - '0');
00297 break;
00298 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
00299 if (base != 16)
00300 isNumber = false;
00301 else
00302 n = base * n + (10 + ch - 'a');
00303 break;
00304 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
00305 if (base != 16)
00306 isNumber = false;
00307 else
00308 n = base * n + (10 + ch - 'A');
00309 break;
00310 default:
00311 isNumber = false;
00312 break;
00313 }
00314 if (n < 0) {
00315 outputMu.lock();
00316 Error("Integer too large.\n", currentLoc);
00317 outputMu.unlock();
00318 throw "\nParsing terminated.\n";
00319 }
00320 if (!isNumber) break;
00321 }
00322
00323 if (isNumber) {
00324 tclass = TkNumber;
00325 expr = NEW_CONSTR(ConstantEC, (NEW_CONSTR(IntegerVC, (n)), this->loc));
00326 }
00327 else {
00328 tclass = TkId;
00329 expr = NEW_CONSTR(NameEC, (id, this->loc));
00330 }
00331 }
00332 }
00333
00334 void Token::ScanText() {
00335 char c;
00336 int val, j;
00337 bool done = false;
00338
00339 while (!done) {
00340 c = GetChar();
00341 switch (c) {
00342 case '"':
00343 done = true;
00344 break;
00345 case '\\':
00346 c = GetChar();
00347 switch (c) {
00348 case 'n': AppendChar('\n'); break;
00349 case 't': AppendChar('\t'); break;
00350 case 'v': AppendChar('\v'); break;
00351 case 'b': AppendChar('\b'); break;
00352 case 'r': AppendChar('\r'); break;
00353 case 'f': AppendChar('\f'); break;
00354 case 'a': AppendChar('\a'); break;
00355 case '\\': AppendChar('\\'); break;
00356 case '"': AppendChar('\"'); break;
00357 case 'x': case 'X':
00358 val = 0;
00359 c = GetChar();
00360 for (j = 0; j < 2; j++) {
00361 switch (c) {
00362 case '0': case '1': case '2': case '3': case '4': case '5':
00363 case '6': case '7': case '8': case '9':
00364 val = 16 * val + c - '0';
00365 c = GetChar();
00366 break;
00367 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
00368 val = 16 * val + 10 + c - 'A';
00369 c = GetChar();
00370 break;
00371 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
00372 val = 16 * val + 10 + c - 'a';
00373 c = GetChar();
00374 break;
00375 default:
00376 if (j == 0) {
00377 UngetChar(c);
00378 outputMu.lock();
00379 Error("Illegal escape character in text.\n", currentLoc);
00380 outputMu.unlock();
00381 throw "\nParsing terminated.\n";
00382 }
00383 goto xend;
00384 }
00385 }
00386 xend: UngetChar(c);
00387 AppendChar((unsigned char)val);
00388 break;
00389 default:
00390 val = 0;
00391 for (j = 0; j < 3; j++) {
00392 switch (c) {
00393 case '0': case '1': case '2': case '3': case '4': case '5':
00394 case '6': case '7':
00395 val = 8 * val + c - '0';
00396 c = GetChar();
00397 break;
00398 default:
00399 if (j == 0) {
00400 UngetChar(c);
00401 outputMu.lock();
00402 Error("Illegal escape character in text.\n", currentLoc);
00403 outputMu.unlock();
00404 throw "\nParsing terminated.\n";
00405 }
00406 goto oend;
00407 }
00408 }
00409 oend: UngetChar(c);
00410 AppendChar((unsigned char)val);
00411 break;
00412 }
00413 break;
00414 case ((char) EOF): case '\n':
00415 outputMu.lock();
00416 Error("Text not terminated at end of line or file.\n", currentLoc);
00417 outputMu.unlock();
00418 throw("\nParsing terminated.\n");
00419 default:
00420 AppendChar(c);
00421 break;
00422 }
00423 }
00424 EndBytes();
00425 this->tclass = TkString;
00426 this->loc = currentLoc->Copy();
00427 expr = NEW_CONSTR(ConstantEC,
00428 (NEW_CONSTR(TextVC, (Text(Bytes()))), this->loc));
00429 }
00430
00431 void Token::Next() {
00432 while (true) {
00433 char c = SkipWhitespace();
00434 currentLoc->line = lineNumber;
00435 currentLoc->character = charNumber;
00436 StartBytes();
00437 switch (c) {
00438 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
00439 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
00440 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
00441 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
00442 case 'y': case 'z':
00443 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
00444 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
00445 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
00446 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
00447 case 'Y': case 'Z':
00448 case '.': case '_':
00449 case '0': case '1': case '2': case '3': case '4': case '5':
00450 case '6': case '7': case '8': case '9':
00451 this->ScanIdNumber(c);
00452 return;
00453 case '"':
00454 this->ScanText();
00455 return;
00456 case '/':
00457 {
00458 char c1 = GetChar();
00459 if (c1 == '/') {
00460 this->ScanComment(c);
00461 break;
00462 }
00463 else if (c1 == '*') {
00464 char c2 = GetChar();
00465 if (c2 == '*') {
00466 if(this->ScanPragma())
00467 {
00468 return;
00469 }
00470 else
00471 {
00472 break;
00473 }
00474 }
00475 else {
00476 UngetChar(c2);
00477 this->ScanComment(c1);
00478 break;
00479 }
00480 }
00481 tclass = TkSlash;
00482 this->AppendChar(c);
00483 UngetChar(c1);
00484 this->EndBytes();
00485 this->loc = currentLoc->Copy();
00486 return;
00487 }
00488 case '<':
00489 {
00490 this->AppendChar(c);
00491 c = GetChar();
00492 if (c == '=') {
00493 this->AppendChar(c);
00494 tclass = TkLessEq;
00495 }
00496 else {
00497 UngetChar(c);
00498 tclass = TkLess;
00499 }
00500 this->EndBytes();
00501 this->loc = currentLoc->Copy();
00502 return;
00503 }
00504 case '&': case '|': case '=': case '!': case '>': case '+':
00505 this->AppendChar(c);
00506 tclass = CharMap[c];
00507 c = GetChar();
00508 switch(c) {
00509 case '&': case '|': case '=': case '>': case '+':
00510 this->AppendChar(c);
00511 this->EndBytes();
00512 if (ReservedWords.Get(Text(bytes, (void*)1), tclass)) {
00513 this->loc = currentLoc->Copy();
00514 return;
00515 }
00516 this->UnAppendChar();
00517
00518 default:
00519 UngetChar(c);
00520 this->EndBytes();
00521 this->loc = currentLoc->Copy();
00522 return;
00523 }
00524 case '\\': case ':': case ',': case '$': case '-': case '%': case '?':
00525 case ';': case '*':
00526 case '[': case ']': case '{': case '}': case '(': case ')':
00527 tclass = CharMap[c];
00528 this->AppendChar(c);
00529 this->EndBytes();
00530 this->loc = currentLoc->Copy();
00531 return;
00532 case ((char) EOF):
00533 tclass = TkEOF;
00534 return;
00535 default:
00536 outputMu.lock();
00537 Error(Text("Bad character `") + c + "'.\n", currentLoc);
00538 outputMu.unlock();
00539 throw("\nParsing terminated.\n");
00540 }
00541 }
00542 }
00543
00544 void Token::LexFlush() {
00545 while (true) {
00546 int c = lexIn->get();
00547 if (c == EOF) break;
00548 }
00549 }
00550
00551 void Token::Init(const Text& model, ShortId sid) {
00552 tclass = TkErr;
00553 expr = NULL;
00554 currentLoc->Init(model, sid);
00555 this->loc = currentLoc;
00556 }
00557
00558
00559
00560 Text TokenNames[TkIllegal+1] =
00561 { "binding", "bool", "do", "else", "ERR", "FALSE", "files",
00562 "foreach", "from", "function", "if", "in", "import", "int",
00563 "list", "return", "text", "then", "TRUE", "type", "value",
00564
00565 "Id", "Number", "String",
00566
00567 "And", "EqEq", "NotEq", "GreaterEq", "Implies", "LessEq", "Or",
00568 "PlusPlus",
00569
00570 "BackSlash", "Bang", "Colon", "Comma", "Dollar", "Equal", "Greater",
00571 "Less", "Minus", "Percent", "Plus", "Query", "Semicolon", "Slash",
00572 "Star", "Underscore",
00573
00574 "LBrace", "RBrace", "LBracket", "RBracket", "LParen", "RParen",
00575
00576 "Pragma",
00577
00578 "End of File", "Illegal Token"
00579 };
00580
00581 void LexInit() {
00582
00583 CharMap['\\']= TkBackSlash;
00584 CharMap['!'] = TkBang;
00585 CharMap[':'] = TkColon;
00586 CharMap[','] = TkComma;
00587 CharMap['$'] = TkDollar;
00588 CharMap['='] = TkEqual;
00589 CharMap['>'] = TkGreater;
00590 CharMap['<'] = TkLess;
00591 CharMap['-'] = TkMinus;
00592 CharMap['%'] = TkPercent;
00593 CharMap['+'] = TkPlus;
00594 CharMap['?'] = TkQuery;
00595 CharMap[';'] = TkSemicolon;
00596 CharMap['/'] = TkSlash;
00597 CharMap['*'] = TkStar;
00598 CharMap['_'] = TkUnderscore;
00599 CharMap['{'] = TkLBrace;
00600 CharMap['}'] = TkRBrace;
00601 CharMap['['] = TkLBracket;
00602 CharMap[']'] = TkRBracket;
00603 CharMap['('] = TkLParen;
00604 CharMap[')'] = TkRParen;
00605
00606 ReservedWords.Put("binding", TkBinding);
00607 ReservedWords.Put("bool", TkBool);
00608 ReservedWords.Put("do", TkDo);
00609 ReservedWords.Put("else", TkElse);
00610 ReservedWords.Put("ERR", TkErr);
00611 ReservedWords.Put("FALSE", TkFalse);
00612 ReservedWords.Put("files", TkFiles);
00613 ReservedWords.Put("foreach", TkForeach);
00614 ReservedWords.Put("from", TkFrom);
00615 ReservedWords.Put("function", TkFunction);
00616 ReservedWords.Put("if", TkIf);
00617 ReservedWords.Put("in", TkIn);
00618 ReservedWords.Put("import", TkImport);
00619 ReservedWords.Put("int", TkInt);
00620 ReservedWords.Put("list", TkList);
00621 ReservedWords.Put("return", TkReturn);
00622 ReservedWords.Put("text", TkText);
00623 ReservedWords.Put("then", TkThen);
00624 ReservedWords.Put("TRUE", TkTrue);
00625 ReservedWords.Put("type", TkType);
00626 ReservedWords.Put("value", TkValue);
00627 ReservedWords.Put("&&", TkAnd);
00628 ReservedWords.Put("==", TkEqEq);
00629 ReservedWords.Put("!=", TkNotEq);
00630 ReservedWords.Put(">=", TkGreaterEq);
00631 ReservedWords.Put("=>", TkImplies);
00632 ReservedWords.Put("<=", TkLessEq);
00633 ReservedWords.Put("||", TkOr);
00634 ReservedWords.Put("++", TkPlusPlus);
00635 ReservedWords.Put("