You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2415 lines
74 KiB
2415 lines
74 KiB
/+
|
|
* Copyright (c) 1999-2006 by Digital Mars
|
|
* All Rights Reserved
|
|
* written by Walter Bright www.digitalmars.com
|
|
* License for redistribution is by either the Artistic License in artistic.txt, or the GNU General Public License in gnu.txt.
|
|
* See the included readme.txt for details.
|
|
* D Language conversion by: J Duncan
|
|
+/
|
|
|
|
/**
|
|
* d language lexer
|
|
*/
|
|
|
|
module dparser.Lexer;
|
|
|
|
import dparser.Root;
|
|
|
|
import dparser.Tokens;
|
|
import dparser.Token;
|
|
import dparser.Keyword;
|
|
|
|
import dparser.Types;
|
|
|
|
import dparser.Module;
|
|
import dparser.Identifier;
|
|
import dparser.unialpha;
|
|
|
|
import dparser.OutBuffer;
|
|
|
|
//private import std.ctype;
|
|
//private import std.string;
|
|
//import dwf.core.debugapi;
|
|
|
|
int errno = 0;
|
|
|
|
//#if _WIN32 && __DMC__
|
|
// from \dm\src\include\setlocal.h
|
|
//extern "C" char * __cdecl __locale_decpoint;
|
|
char* __locale_decpoint;
|
|
//#endif
|
|
//const uint LS = 0x2028; // UTF line separator
|
|
//const uint PS = 0x2029; // UTF paragraph separator
|
|
|
|
//extern int isUniAlpha(unsigned u);
|
|
//extern int HtmlNamedEntity(unsigned char *p, int length);
|
|
|
|
/**
|
|
* Lexer object
|
|
*/
|
|
|
|
class Lexer
|
|
{
|
|
static Identifier[char[]] stringtable;
|
|
static OutBuffer stringbuffer;
|
|
static Token * freelist;
|
|
|
|
Token token; // current token
|
|
Module mod; // current module
|
|
Loc loc; // for error messages
|
|
ubyte *base; // pointer to start of buffer
|
|
ubyte *end; // past end of buffer
|
|
ubyte *p; // current character
|
|
int doDocComment; // collect doc comment information
|
|
int anyToken; // !=0 means seen at least one token
|
|
int commentToken; // !=0 means comments are TOKcomment's
|
|
|
|
|
|
this(Module mod, ubyte* base, uint begoffset, uint endoffset, int doDocComment, int commentToken)
|
|
{
|
|
if (stringbuffer is null) {
|
|
stringbuffer = new OutBuffer;
|
|
}
|
|
loc = Loc(mod, 1);
|
|
|
|
this.base = base;
|
|
this.end = base + endoffset;
|
|
this.p = base + begoffset;
|
|
this.mod = mod;
|
|
this.doDocComment = doDocComment;
|
|
this.commentToken = commentToken;
|
|
|
|
/*
|
|
* If first line starts with '#!', ignore the line
|
|
*/
|
|
|
|
if (p[0] == '#' && p[1] == '!') {
|
|
p += 2;
|
|
while (true) {
|
|
ubyte c = *p;
|
|
switch (c) {
|
|
case '\n':
|
|
p++;
|
|
break;
|
|
|
|
case '\r':
|
|
p++;
|
|
if (*p == '\n') {
|
|
p++;
|
|
}
|
|
break;
|
|
|
|
case 0:
|
|
case 0x1A:
|
|
break;
|
|
|
|
default:
|
|
if (c & 0x80) {
|
|
uint u = decodeUTF();
|
|
if (u == PS || u == LS) {
|
|
break;
|
|
}
|
|
}
|
|
p++;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
|
|
loc.linnum = 2;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// generate a unique identifier for this string
|
|
static Identifier idPool(in char[] str) {
|
|
// StringValue sv;
|
|
// uint len = s.length;
|
|
// StringValue sv = stringtable.update(s, len);
|
|
// Identifier* id = cast(Identifier*) sv.ptrvalue;
|
|
// if( id is null )
|
|
if ((str in stringtable) == null) {
|
|
stringtable[str] = new Identifier(str, TOK.TOKidentifier);
|
|
}
|
|
return(stringtable[str]);
|
|
}
|
|
|
|
static void initKeywords() {
|
|
// build character map
|
|
cmtable_init();
|
|
|
|
// create keyword tokens & identifiers
|
|
dparser.Keyword.initKeywords();
|
|
|
|
// create standard lexer tokens
|
|
dparser.Token.createLexerTokens();
|
|
}
|
|
|
|
// Combine two document comments into one.
|
|
static char[] combineComments(char[] c1, char[] c2) {
|
|
char[] c = c2;
|
|
if (c1.length) {
|
|
c = c1;
|
|
if (c2.length) {
|
|
c = c1 ~ "\n" ~ c2;
|
|
}
|
|
}
|
|
return(c);
|
|
}
|
|
|
|
// Decode UTF character. Issue error messages for invalid sequences. Return decoded character, advance p to last character in UTF sequence.
|
|
//! fix
|
|
uint decodeUTF() {
|
|
ubyte * s = p;
|
|
ubyte c = *s;
|
|
|
|
assert(c & 0x80);
|
|
if (!(c & 0x80)) {
|
|
return(c);
|
|
}
|
|
|
|
return(cast(uint)'X');
|
|
/*
|
|
* dchar u;
|
|
* uint len;
|
|
*
|
|
*
|
|
*
|
|
* // Check length of remaining string up to 6 UTF-8 characters
|
|
* for( len = 1; len < 6 && s[len]; len++ )
|
|
* {
|
|
*
|
|
* }
|
|
* /+
|
|
* uint idx = 0;
|
|
* char* msg = utf_decodeChar( s, len, &idx, &u );
|
|
* p += idx - 1;
|
|
* if( msg )
|
|
* {
|
|
* error(msg);
|
|
* }
|
|
* +/
|
|
* return u;
|
|
*/
|
|
}
|
|
|
|
void error(...) {
|
|
if ((mod !is null) && !global.gag) {
|
|
writefln(formatLoc(loc, _arguments, _argptr));
|
|
/*
|
|
* char[] p = loc.toChars();
|
|
* if( p.length )
|
|
* writef( "%s: ", p );
|
|
* writefx( stdout, _arguments, _argptr, 1 );
|
|
*/
|
|
if (global.errors >= global.max_errors) { // moderate blizzard of cascading messages
|
|
throw new Exception("too many errors");
|
|
}
|
|
}
|
|
|
|
global.errors++;
|
|
}
|
|
|
|
void errorLoc(Loc loc, ...) {
|
|
if ((mod !is null) && !global.gag) {
|
|
writefln(formatLoc(loc, _arguments, _argptr));
|
|
/*
|
|
* char[] p = loc.toChars();
|
|
* if( p.length )
|
|
* writef("%s: ", p);
|
|
* writefx(stdout, _arguments, _argptr, 1);
|
|
*/
|
|
if (global.errors >= 20) { // moderate blizzard of cascading messages
|
|
throw new Exception("too many errors");
|
|
}
|
|
}
|
|
|
|
global.errors++;
|
|
}
|
|
|
|
|
|
TOK nextToken() {
|
|
if (token.next) {
|
|
Token* t = token.next;
|
|
memcpy(&token, t, Token.sizeof);
|
|
// t.next = freelist;
|
|
// freelist = t;
|
|
}
|
|
else {
|
|
scan(&token);
|
|
}
|
|
// token.print();
|
|
return(token.value);
|
|
}
|
|
|
|
Token* peek(inout Token ct) {
|
|
Token* t;
|
|
|
|
if (ct.next) {
|
|
t = ct.next;
|
|
}
|
|
else {
|
|
t = new Token;
|
|
scan(t);
|
|
t.next = null;
|
|
ct.next = t;
|
|
}
|
|
return(t);
|
|
}
|
|
|
|
// Turn next token in buffer into a token.
|
|
|
|
void scan(Token* t) {
|
|
// debug writefln("scan token");
|
|
uint lastLine = loc.linnum;
|
|
uint linnum;
|
|
|
|
t.blockComment = null;
|
|
t.lineComment = null;
|
|
while (true) {
|
|
t.ptr = p;
|
|
// debug writefln( " p = %d, *p = ", cast(uint)p, cast(char)*p );
|
|
switch (*p) {
|
|
case 0:
|
|
case 0x1a:
|
|
t.value = TOK.TOKeof; // end of file
|
|
// debug writefln( " EOF" );
|
|
return;
|
|
|
|
case ' ':
|
|
case '\t':
|
|
case '\v':
|
|
case '\f':
|
|
p++;
|
|
// debug writefln( " whitespace" );
|
|
continue; // skip white space
|
|
|
|
case '\r':
|
|
// debug writefln( " cr" );
|
|
p++;
|
|
if (*p != '\n') { // if CR stands by itself
|
|
loc.linnum++;
|
|
}
|
|
continue; // skip white space
|
|
|
|
case '\n':
|
|
// debug writefln( " nl" );
|
|
p++;
|
|
loc.linnum++;
|
|
continue; // skip white space
|
|
|
|
case '0':
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
case '8':
|
|
case '9':
|
|
t.value = number(t);
|
|
return;
|
|
|
|
/*
|
|
* #if CSTRINGS
|
|
* case '\'':
|
|
* t.value = charConstant(t, 0);
|
|
* return;
|
|
*
|
|
* case '"':
|
|
* t.value = stringConstant(t,0);
|
|
* return;
|
|
*
|
|
* case 'l':
|
|
* case 'L':
|
|
* if( p[1] == '\'')
|
|
* {
|
|
* p++;
|
|
* t.value = charConstant(t, 1);
|
|
* return;
|
|
* }
|
|
* else if( p[1] == '"')
|
|
* {
|
|
* p++;
|
|
* t.value = stringConstant(t, 1);
|
|
* return;
|
|
* }
|
|
* #else
|
|
*/
|
|
case '\'':
|
|
// debug writefln( " char" );
|
|
t.value = charConstant(t, 0);
|
|
return;
|
|
|
|
case 'r':
|
|
// debug writefln( " wysiwyg" );
|
|
if (p[1] != '"') {
|
|
goto case_identifier;
|
|
}
|
|
p++;
|
|
|
|
case '`':
|
|
t.value = wysiwygStringConstant(t, *p);
|
|
return;
|
|
|
|
case 'x':
|
|
// debug writefln( " hex string" );
|
|
if (p[1] != '"') {
|
|
goto case_identifier;
|
|
}
|
|
p++;
|
|
t.value = hexStringConstant(t);
|
|
return;
|
|
|
|
|
|
case '"':
|
|
// debug writefln( " string" );
|
|
t.value = escapeStringConstant(t, 0);
|
|
// debug writefln( t.ustring );
|
|
return;
|
|
|
|
case '\\': // escaped string literal
|
|
// debug writefln( " escaped string literal" );
|
|
uint c;
|
|
stringbuffer.offset = 0;
|
|
do {
|
|
p++;
|
|
c = escapeSequence();
|
|
stringbuffer.write(c);
|
|
} while (*p == '\\');
|
|
// t.len = stringbuffer.offset;
|
|
// stringbuffer.write(cast(byte)0);
|
|
t.ustring = stringbuffer.toString;
|
|
// memcpy( t.ustring.ptr, stringbuffer.data, stringbuffer.offset );
|
|
t.postfix = 0;
|
|
t.value = TOK.TOKstring;
|
|
return;
|
|
|
|
case 'l':
|
|
case 'L':
|
|
// #endif
|
|
|
|
case 'a':
|
|
case 'b':
|
|
case 'c':
|
|
case 'd':
|
|
case 'e':
|
|
case 'f':
|
|
case 'g':
|
|
case 'h':
|
|
case 'i':
|
|
case 'j':
|
|
case 'k':
|
|
case 'm':
|
|
case 'n':
|
|
case 'o':
|
|
case 'p':
|
|
case 'q': /*case 'r':*/
|
|
case 's':
|
|
case 't':
|
|
case 'u':
|
|
case 'v':
|
|
case 'w': /*case 'x':*/
|
|
case 'y':
|
|
case 'z':
|
|
case 'A':
|
|
case 'B':
|
|
case 'C':
|
|
case 'D':
|
|
case 'E':
|
|
case 'F':
|
|
case 'G':
|
|
case 'H':
|
|
case 'I':
|
|
case 'J':
|
|
case 'K':
|
|
case 'M':
|
|
case 'N':
|
|
case 'O':
|
|
case 'P':
|
|
case 'Q':
|
|
case 'R':
|
|
case 'S':
|
|
case 'T':
|
|
case 'U':
|
|
case 'V':
|
|
case 'W':
|
|
case 'X':
|
|
case 'Y':
|
|
case 'Z':
|
|
case '_':
|
|
case_identifier:
|
|
{
|
|
// debug writefln( " identifier" );
|
|
ubyte c;
|
|
do {
|
|
c = *++p;
|
|
} while (isidchar(c) || (c & 0x80 && isUniAlpha(decodeUTF())));
|
|
|
|
// sv = stringtable.update((char *)t.ptr, p - t.ptr);
|
|
char[] tmp;
|
|
tmp.length = p - t.ptr;
|
|
memcpy(tmp.ptr, t.ptr, p - t.ptr);
|
|
Identifier id;
|
|
Identifier * pid = tmp in stringtable;
|
|
if (pid) {
|
|
id = *pid;
|
|
}
|
|
|
|
if (id is null) {
|
|
id = new Identifier(tmp, TOK.TOKidentifier);
|
|
stringtable[tmp] = id;
|
|
}
|
|
|
|
t.identifier = id;
|
|
t.value = cast(TOK)id.value;
|
|
anyToken = 1;
|
|
|
|
// if special identifier token
|
|
if (*t.ptr == '_') {
|
|
static char date[11 + 1];
|
|
static char time[8 + 1];
|
|
static char timestamp[24 + 1];
|
|
|
|
if (!date[0]) { // lazy evaluation
|
|
//!!
|
|
/+
|
|
* time_t t;
|
|
* char *p;
|
|
* .time(&t);
|
|
* p = ctime(&t);
|
|
* assert(p);
|
|
* sprintf(date.ptr, "%.6s %.4s", p + 4, p + 20);
|
|
* sprintf(time.ptr, "%.8s", p + 11);
|
|
* sprintf(timestamp.ptr, "%.24s", p);
|
|
+/
|
|
}
|
|
|
|
if (mod && id is Id.FILE) {
|
|
t.value = TOK.TOKstring;
|
|
if (loc.filename.length) {
|
|
t.ustring = loc.filename;
|
|
}
|
|
else {
|
|
t.ustring = mod.identifier.toChars();
|
|
}
|
|
goto Llen;
|
|
}
|
|
else if (mod && id == Id.LINE) {
|
|
t.value = TOK.TOKint64v;
|
|
t.uns64value = loc.linnum;
|
|
}
|
|
else if (id == Id.DATE) {
|
|
t.value = TOK.TOKstring;
|
|
//! t.ustring = date;
|
|
goto Llen;
|
|
}
|
|
else if (id == Id.TIME) {
|
|
t.value = TOK.TOKstring;
|
|
//! t.ustring = time;
|
|
goto Llen;
|
|
}
|
|
else if (id == Id.TIMESTAMP) {
|
|
t.value = TOK.TOKstring;
|
|
//! t.ustring = timestamp;
|
|
Llen:
|
|
t.postfix = 0;
|
|
// t.len = strlen((char *)t.ustring);
|
|
}
|
|
}
|
|
//printf("t.value = %d\n",t.value);
|
|
return;
|
|
}
|
|
|
|
// comments
|
|
case '/':
|
|
p++;
|
|
switch (*p) {
|
|
case '=':
|
|
p++;
|
|
t.value = TOK.TOKdivass;
|
|
return;
|
|
|
|
case '*': // '/*'
|
|
p++;
|
|
linnum = loc.linnum;
|
|
while (true) {
|
|
while (true) {
|
|
ubyte c = *p;
|
|
switch (c) {
|
|
case '/':
|
|
break;
|
|
|
|
case '\n':
|
|
loc.linnum++;
|
|
p++;
|
|
continue;
|
|
|
|
case '\r':
|
|
p++;
|
|
if (*p != '\n') {
|
|
loc.linnum++;
|
|
}
|
|
continue;
|
|
|
|
case 0:
|
|
case 0x1A:
|
|
error("unterminated /* */ comment");
|
|
p = end;
|
|
t.value = TOK.TOKeof;
|
|
return;
|
|
|
|
default:
|
|
if (c & 0x80) {
|
|
uint u = decodeUTF();
|
|
if (u == PS || u == LS) {
|
|
loc.linnum++;
|
|
}
|
|
}
|
|
p++;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
p++;
|
|
if (p[-2] == '*' && p - 3 != t.ptr) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (commentToken) {
|
|
t.value = TOK.TOKcomment;
|
|
return;
|
|
}
|
|
// if /** but not /**/
|
|
else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr) {
|
|
getDocComment(t, lastLine == linnum); //! ?
|
|
}
|
|
continue;
|
|
|
|
case '/': // do // style comments
|
|
linnum = loc.linnum;
|
|
while (1) {
|
|
ubyte c = *++p;
|
|
switch (c) {
|
|
case '\n':
|
|
break;
|
|
|
|
case '\r':
|
|
if (p[1] == '\n') {
|
|
p++;
|
|
}
|
|
break;
|
|
|
|
case 0:
|
|
case 0x1a:
|
|
if (commentToken) {
|
|
p = end;
|
|
t.value = TOK.TOKcomment;
|
|
return;
|
|
}
|
|
if (doDocComment && t.ptr[2] == '/') {
|
|
getDocComment(t, lastLine == linnum);
|
|
}
|
|
p = end;
|
|
t.value = TOK.TOKeof;
|
|
return;
|
|
|
|
default:
|
|
if (c & 0x80) {
|
|
uint u = decodeUTF();
|
|
if (u == PS || u == LS) {
|
|
break;
|
|
}
|
|
}
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (commentToken) {
|
|
p++;
|
|
loc.linnum++;
|
|
t.value = TOK.TOKcomment;
|
|
return;
|
|
}
|
|
if (doDocComment && t.ptr[2] == '/') {
|
|
getDocComment(t, lastLine == linnum);
|
|
}
|
|
|
|
p++;
|
|
loc.linnum++;
|
|
continue;
|
|
|
|
case '+':
|
|
{
|
|
int nest;
|
|
linnum = loc.linnum;
|
|
p++;
|
|
nest = 1;
|
|
while (1) {
|
|
ubyte c = *p;
|
|
switch (c) {
|
|
case '/':
|
|
p++;
|
|
if (*p == '+') {
|
|
p++;
|
|
nest++;
|
|
}
|
|
continue;
|
|
|
|
case '+':
|
|
p++;
|
|
if (*p == '/') {
|
|
p++;
|
|
if (--nest == 0) {
|
|
break;
|
|
}
|
|
}
|
|
continue;
|
|
|
|
case '\r':
|
|
p++;
|
|
if (*p != '\n') {
|
|
loc.linnum++;
|
|
}
|
|
continue;
|
|
|
|
case '\n':
|
|
loc.linnum++;
|
|
p++;
|
|
continue;
|
|
|
|
case 0:
|
|
case 0x1A:
|
|
error("unterminated /+ +/ comment");
|
|
p = end;
|
|
t.value = TOK.TOKeof;
|
|
return;
|
|
|
|
default:
|
|
if (c & 0x80) {
|
|
uint u = decodeUTF();
|
|
if (u == PS || u == LS) {
|
|
loc.linnum++;
|
|
}
|
|
}
|
|
p++;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
if (commentToken) {
|
|
t.value = TOK.TOKcomment;
|
|
return;
|
|
}
|
|
if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr) {
|
|
// if /++ but not /++/
|
|
getDocComment(t, lastLine == linnum);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
t.value = TOK.TOKdiv;
|
|
return;
|
|
|
|
case '.':
|
|
p++;
|
|
if (isdigit(*p)) {
|
|
p--;
|
|
t.value = inreal(t);
|
|
}
|
|
else if (p[0] == '.') {
|
|
if (p[1] == '.') {
|
|
p += 2;
|
|
t.value = TOK.TOKdotdotdot;
|
|
}
|
|
else {
|
|
p++;
|
|
t.value = TOK.TOKslice;
|
|
}
|
|
}
|
|
else {
|
|
t.value = TOK.TOKdot;
|
|
}
|
|
return;
|
|
|
|
case '&':
|
|
p++;
|
|
if (*p == '=') {
|
|
p++;
|
|
t.value = TOK.TOKandass;
|
|
}
|
|
else if (*p == '&') {
|
|
p++;
|
|
t.value = TOK.TOKandand;
|
|
}
|
|
else {
|
|
t.value = TOK.TOKand;
|
|
}
|
|
return;
|
|
|
|
// |, ||, |=
|
|
case '|':
|
|
p++;
|
|
if (*p == '=') {
|
|
p++;
|
|
t.value = TOK.TOKorass;
|
|
}
|
|
else if (*p == '|') {
|
|
p++;
|
|
t.value = TOK.TOKoror;
|
|
}
|
|
else {
|
|
t.value = TOK.TOKor;
|
|
}
|
|
return;
|
|
|
|
case '-':
|
|
p++;
|
|
if (*p == '=') {
|
|
p++;
|
|
t.value = TOK.TOKminass;
|
|
}
|
|
else if (*p == '-') {
|
|
p++;
|
|
t.value = TOK.TOKminusminus;
|
|
}
|
|
else {
|
|
t.value = TOK.TOKmin;
|
|
}
|
|
return;
|
|
|
|
// +, +=, ++
|
|
case '+':
|
|
p++;
|
|
if (*p == '=') {
|
|
p++;
|
|
t.value = TOK.TOKaddass; // +=
|
|
}
|
|
else if (*p == '+') {
|
|
p++;
|
|
t.value = TOK.TOKplusplus; // ++
|
|
}
|
|
else {
|
|
t.value = TOK.TOKadd; // +
|
|
}
|
|
return;
|
|
|
|
// <, <=, <<=, <<, <>=, <>
|
|
case '<':
|
|
p++;
|
|
if (*p == '=') {
|
|
p++;
|
|
t.value = TOK.TOKle; // <=
|
|
}
|
|
else if (*p == '<') {
|
|
p++;
|
|
if (*p == '=') {
|
|
p++;
|
|
t.value = TOK.TOKshlass; // <<=
|
|
}
|
|
else {
|
|
t.value = TOK.TOKshl; // <<
|
|
}
|
|
}
|
|
else if (*p == '>') {
|
|
p++;
|
|
if (*p == '=') {
|
|
p++;
|
|
t.value = TOK.TOKleg; // <>=
|
|
}
|
|
else {
|
|
t.value = TOK.TOKlg; // <>
|
|
}
|
|
}
|
|
else {
|
|
t.value = TOK.TOKlt; // <
|
|
}
|
|
return;
|
|
|
|
// >, >>, >>>, >=, >>=, >>>=
|
|
case '>':
|
|
p++;
|
|
if (*p == '=') {
|
|
p++;
|
|
t.value = TOK.TOKge; // >=
|
|
}
|
|
else if (*p == '>') {
|
|
p++;
|
|
if (*p == '=') {
|
|
p++;
|
|
t.value = TOK.TOKshrass; // >>=
|
|
}
|
|
else if (*p == '>') {
|
|
p++;
|
|
if (*p == '=') {
|
|
p++;
|
|
t.value = TOK.TOKushrass; // >>>=
|
|
}
|
|
else {
|
|
t.value = TOK.TOKushr; // >>>
|
|
}
|
|
}
|
|
else {
|
|
t.value = TOK.TOKshr; // >>
|
|
}
|
|
}
|
|
else {
|
|
t.value = TOK.TOKgt; // >
|
|
}
|
|
return;
|
|
|
|
case '!':
|
|
p++;
|
|
if (*p == '=') {
|
|
p++;
|
|
if (*p == '=') {
|
|
p++;
|
|
t.value = TOK.TOKnotidentity; // !==
|
|
}
|
|
else {
|
|
t.value = TOK.TOKnotequal; // !=
|
|
}
|
|
}
|
|
else if (*p == '<') {
|
|
p++;
|
|
if (*p == '>') {
|
|
p++;
|
|
if (*p == '=') {
|
|
p++;
|
|
t.value = TOK.TOKunord; // !<>=
|
|
}
|
|
else {
|
|
t.value = TOK.TOKue; // !<>
|
|
}
|
|
}
|
|
else if (*p == '=') {
|
|
p++;
|
|
t.value = TOK.TOKug; // !<=
|
|
}
|
|
else {
|
|
t.value = TOK.TOKuge; // !<
|
|
}
|
|
}
|
|
else if (*p == '>') {
|
|
p++;
|
|
if (*p == '=') {
|
|
p++;
|
|
t.value = TOK.TOKul; // !>=
|
|
}
|
|
else {
|
|
t.value = TOK.TOKule; // !>
|
|
}
|
|
}
|
|
else {
|
|
t.value = TOK.TOKnot; // !
|
|
}
|
|
return;
|
|
|
|
case '=':
|
|
p++;
|
|
if (*p == '=') {
|
|
p++;
|
|
if (*p == '=') {
|
|
p++;
|
|
t.value = TOK.TOKidentity; // ===
|
|
}
|
|
else {
|
|
t.value = TOK.TOKequal; // ==
|
|
}
|
|
}
|
|
else {
|
|
t.value = TOK.TOKassign; // =
|
|
}
|
|
return;
|
|
|
|
case '~':
|
|
p++;
|
|
if (*p == '=') {
|
|
p++;
|
|
t.value = TOK.TOKcatass; // ~=
|
|
}
|
|
else {
|
|
t.value = TOK.TOKtilde; // ~
|
|
}
|
|
return;
|
|
|
|
// SINGLE
|
|
case '(': p++; t.value = TOK.TOKlparen; return;
|
|
|
|
case ')': p++; t.value = TOK.TOKrparen; return;
|
|
|
|
case '[': p++; t.value = TOK.TOKlbracket; return;
|
|
|
|
case ']': p++; t.value = TOK.TOKrbracket; return;
|
|
|
|
case '{': p++; t.value = TOK.TOKlcurly; return;
|
|
|
|
case '}': p++; t.value = TOK.TOKrcurly; return;
|
|
|
|
case '?': p++; t.value = TOK.TOKquestion; return;
|
|
|
|
case ',': p++; t.value = TOK.TOKcomma; return;
|
|
|
|
case ';': p++; t.value = TOK.TOKsemicolon; return;
|
|
|
|
case ':': p++; t.value = TOK.TOKcolon; return;
|
|
|
|
case '$': p++; t.value = TOK.TOKdollar; return;
|
|
|
|
// DOUBLE
|
|
case '*': p++; if (*p == '=') {
|
|
p++; t.value = TOK.TOKmulass;
|
|
}
|
|
else {
|
|
t.value = TOK.TOKmul;
|
|
} return;
|
|
|
|
case '%': p++; if (*p == '=') {
|
|
p++; t.value = TOK.TOKmodass;
|
|
}
|
|
else {
|
|
t.value = TOK.TOKmod;
|
|
} return;
|
|
|
|
case '^': p++; if (*p == '=') {
|
|
p++; t.value = TOK.TOKxorass;
|
|
}
|
|
else {
|
|
t.value = TOK.TOKxor;
|
|
} return;
|
|
|
|
// removed 148 case '~': p++; if( *p == '=' ) { p++; t.value = TOK.TOKcatass; } else t.value = TOK.TOKtilde; return;
|
|
|
|
|
|
case '#':
|
|
p++;
|
|
Pragma();
|
|
continue;
|
|
|
|
default:
|
|
{
|
|
debug writefln(" default char");
|
|
ubyte c = *p;
|
|
if (c & 0x80) {
|
|
uint u = decodeUTF();
|
|
// Check for start of unicode identifier
|
|
if (isUniAlpha(u)) {
|
|
goto case_identifier;
|
|
}
|
|
|
|
if (u == PS || u == LS) {
|
|
loc.linnum++;
|
|
p++;
|
|
continue;
|
|
}
|
|
}
|
|
if (isprint(c)) {
|
|
error("unsupported char '%s'", cast(char)c);
|
|
}
|
|
else {
|
|
error("unsupported char 0x%02x", cast(ubyte)c);
|
|
}
|
|
p++;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// Parse escape sequence.
|
|
uint escapeSequence() {
|
|
uint c;
|
|
int n;
|
|
int ndigits;
|
|
|
|
c = *p;
|
|
switch (c) {
|
|
case '\'':
|
|
case '"':
|
|
case '?':
|
|
case '\\':
|
|
Lconsume:
|
|
p++;
|
|
break;
|
|
|
|
case 'a': c = 7; goto Lconsume;
|
|
|
|
case 'b': c = 8; goto Lconsume;
|
|
|
|
case 'f': c = 12; goto Lconsume;
|
|
|
|
case 'n': c = 10; goto Lconsume;
|
|
|
|
case 'r': c = 13; goto Lconsume;
|
|
|
|
case 't': c = 9; goto Lconsume;
|
|
|
|
case 'v': c = 11; goto Lconsume;
|
|
|
|
case 'u':
|
|
ndigits = 4;
|
|
goto Lhex;
|
|
|
|
case 'U':
|
|
ndigits = 8;
|
|
goto Lhex;
|
|
|
|
case 'x':
|
|
ndigits = 2;
|
|
Lhex:
|
|
p++;
|
|
c = *p;
|
|
if (ishex(c)) {
|
|
uint v;
|
|
n = 0;
|
|
v = 0;
|
|
while (1) {
|
|
if (isdigit(c)) {
|
|
c -= '0';
|
|
}
|
|
else if (islower(c)) {
|
|
c -= 'a' - 10;
|
|
}
|
|
else {
|
|
c -= 'A' - 10;
|
|
}
|
|
v = v * 16 + c;
|
|
c = *++p;
|
|
if (++n == ndigits) {
|
|
break;
|
|
}
|
|
if (!ishex(c)) {
|
|
error("escape hex sequence has %d hex digits instead of %d", n, ndigits);
|
|
break;
|
|
}
|
|
}
|
|
//! if( ndigits != 2 && !utf_isValidDchar(v))
|
|
//! error("invalid UTF character \\U%08x", v);
|
|
c = v;
|
|
}
|
|
else {
|
|
error("undefined escape hex sequence \\%s\n", c);
|
|
}
|
|
break;
|
|
|
|
case '&': // named character entity
|
|
for (ubyte *idstart = ++p; 1; p++) {
|
|
switch (*p) {
|
|
case ';':
|
|
//!!!
|
|
/+
|
|
* c = HtmlNamedEntity(idstart, p - idstart);
|
|
* if( c == ~0 )
|
|
* {
|
|
* error("unnamed character entity &%.*s;", p - idstart, idstart);
|
|
* c = ' ';
|
|
* }
|
|
*
|
|
* p++;
|
|
+/
|
|
break;
|
|
|
|
default:
|
|
if (isalpha(*p) || (p != idstart + 1 && isdigit(*p))) {
|
|
continue;
|
|
}
|
|
error("unterminated named entity");
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case 0:
|
|
case 0x1a: // end of file
|
|
c = '\\';
|
|
break;
|
|
|
|
default:
|
|
if (isoctal(c)) {
|
|
ubyte v;
|
|
n = 0;
|
|
do {
|
|
v = v * 8 + (c - '0');
|
|
c = *++p;
|
|
} while (++n < 3 && isoctal(c));
|
|
c = v;
|
|
}
|
|
else {
|
|
error("undefined escape sequence \\%s\n", c);
|
|
}
|
|
break;
|
|
}
|
|
return(c);
|
|
}
|
|
|
|
/**************************************
|
|
*/
|
|
|
|
TOK wysiwygStringConstant(Token *t, int tc) {
|
|
uint c;
|
|
Loc start = loc;
|
|
|
|
p++;
|
|
stringbuffer.offset = 0;
|
|
while (1) {
|
|
c = *p++;
|
|
switch (c) {
|
|
case '\n':
|
|
loc.linnum++;
|
|
break;
|
|
|
|
case '\r':
|
|
if (*p == '\n') {
|
|
continue; // ignore
|
|
}
|
|
c = '\n'; // treat EndOfLine as \n character
|
|
loc.linnum++;
|
|
break;
|
|
|
|
case 0:
|
|
case 0x1a:
|
|
error("unterminated string constant starting at %s", start.toChars());
|
|
t.ustring = "";
|
|
t.postfix = 0;
|
|
return(TOK.TOKstring);
|
|
|
|
case '"':
|
|
case '`':
|
|
if (c == tc) {
|
|
// t.len = stringbuffer.offset;
|
|
stringbuffer.write(cast(byte)0);
|
|
t.ustring = stringbuffer.toString;
|
|
// t.ustring = (ubyte *)mem.malloc(stringbuffer.offset);
|
|
// memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);
|
|
stringPostfix(t);
|
|
return(TOK.TOKstring);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
if (c & 0x80) {
|
|
p--;
|
|
uint u = decodeUTF();
|
|
p++;
|
|
if (u == PS || u == LS) {
|
|
loc.linnum++;
|
|
}
|
|
stringbuffer.write(u);
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
stringbuffer.write(c);
|
|
}
|
|
}
|
|
|
|
/**************************************
|
|
* Lex hex strings:
|
|
* x"0A ae 34FE BD"
|
|
*/
|
|
|
|
TOK hexStringConstant(Token *t) {
|
|
uint c;
|
|
Loc start = loc;
|
|
uint n = 0;
|
|
uint v;
|
|
|
|
p++;
|
|
stringbuffer.offset = 0;
|
|
while (1) {
|
|
c = *p++;
|
|
switch (c) {
|
|
case ' ':
|
|
case '\t':
|
|
case '\v':
|
|
case '\f':
|
|
continue; // skip white space
|
|
|
|
case '\r':
|
|
if (*p == '\n') {
|
|
continue; // ignore
|
|
}
|
|
|
|
// Treat isolated '\r' as if it were a '\n'
|
|
case '\n':
|
|
loc.linnum++;
|
|
continue;
|
|
|
|
case 0:
|
|
case 0x1a:
|
|
error("unterminated string constant starting at %s", start.toChars());
|
|
t.ustring = "";
|
|
t.postfix = 0;
|
|
return(TOK.TOKstring);
|
|
|
|
case '"':
|
|
if (n & 1) {
|
|
error("odd number (%d) of hex characters in hex string", n);
|
|
stringbuffer.write(v);
|
|
}
|
|
// t.len = stringbuffer.offset;
|
|
// stringbuffer.write(cast(byte)0);
|
|
t.ustring = stringbuffer.toString;
|
|
// t.ustring = (ubyte *)mem.malloc(stringbuffer.offset);
|
|
// memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);
|
|
stringPostfix(t);
|
|
return(TOK.TOKstring);
|
|
|
|
default:
|
|
if (c >= '0' && c <= '9') {
|
|
c -= '0';
|
|
}
|
|
else if (c >= 'a' && c <= 'f') {
|
|
c -= 'a' - 10;
|
|
}
|
|
else if (c >= 'A' && c <= 'F') {
|
|
c -= 'A' - 10;
|
|
}
|
|
else if (c & 0x80) {
|
|
p--;
|
|
uint u = decodeUTF();
|
|
p++;
|
|
if (u == PS || u == LS) {
|
|
loc.linnum++;
|
|
}
|
|
else {
|
|
error("non-hex character \\u%x", u);
|
|
}
|
|
}
|
|
else {
|
|
error("non-hex character '%s'", c);
|
|
}
|
|
if (n & 1) {
|
|
v = (v << 4) | c;
|
|
stringbuffer.write(v);
|
|
}
|
|
else {
|
|
v = c;
|
|
}
|
|
n++;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**************************************
|
|
*/
|
|
|
|
TOK escapeStringConstant(Token *t, int wide) {
|
|
uint c;
|
|
Loc start = loc;
|
|
|
|
p++;
|
|
stringbuffer.offset = 0;
|
|
// debug writefln( "escape string constant: %s", std.string.toString( cast(char*)p ) );
|
|
while (1) {
|
|
c = *p++;
|
|
switch (c) {
|
|
case '\\':
|
|
switch (*p) {
|
|
case 'u':
|
|
case 'U':
|
|
case '&':
|
|
c = escapeSequence();
|
|
stringbuffer.write(c);
|
|
continue;
|
|
|
|
default:
|
|
c = escapeSequence();
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case '\n':
|
|
loc.linnum++;
|
|
break;
|
|
|
|
case '\r':
|
|
if (*p == '\n') {
|
|
continue; // ignore
|
|
}
|
|
c = '\n'; // treat EndOfLine as \n character
|
|
loc.linnum++;
|
|
break;
|
|
|
|
case '"':
|
|
// writefln( "end of string: ", stringbuffer.toString );
|
|
t.ustring = stringbuffer.toString().dup;
|
|
// t.len = stringbuffer.offset;
|
|
// stringbuffer.write(cast(byte)0);
|
|
// t.ustring = (ubyte *)mem.malloc(stringbuffer.offset);
|
|
// memcpy(t.ustring, stringbuffer.data, stringbuffer.offset);
|
|
stringPostfix(t);
|
|
|
|
return(TOK.TOKstring);
|
|
|
|
case 0:
|
|
case 0x1a:
|
|
p--;
|
|
error("unterminated string constant starting at %s", start.toChars());
|
|
t.ustring = "";
|
|
// t.len = 0;
|
|
t.postfix = 0;
|
|
return(TOK.TOKstring);
|
|
|
|
default:
|
|
if (c & 0x80) {
|
|
p--;
|
|
c = decodeUTF();
|
|
if (c == LS || c == PS) {
|
|
c = '\n';
|
|
loc.linnum++;
|
|
}
|
|
p++;
|
|
stringbuffer.write(cast(char)c);
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
stringbuffer.write(cast(char)c);
|
|
// writefln( stringbuffer.toString );
|
|
}
|
|
}
|
|
|
|
//**************************************
|
|
TOK charConstant(Token *t, int wide) {
|
|
uint c;
|
|
TOK tk = TOK.TOKcharv;
|
|
|
|
//printf("Lexer.charConstant\n");
|
|
p++;
|
|
c = *p++;
|
|
switch (c) {
|
|
case '\\':
|
|
switch (*p) {
|
|
case 'u':
|
|
t.uns64value = escapeSequence();
|
|
tk = TOK.TOKwcharv;
|
|
break;
|
|
|
|
case 'U':
|
|
case '&':
|
|
t.uns64value = escapeSequence();
|
|
tk = TOK.TOKdcharv;
|
|
break;
|
|
|
|
default:
|
|
t.uns64value = escapeSequence();
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case '\n':
|
|
L1:
|
|
loc.linnum++;
|
|
|
|
case '\r':
|
|
case 0:
|
|
case 0x1a:
|
|
case '\'':
|
|
error("unterminated character constant");
|
|
return(tk);
|
|
|
|
default:
|
|
if (c & 0x80) {
|
|
p--;
|
|
c = decodeUTF();
|
|
p++;
|
|
if (c == LS || c == PS) {
|
|
goto L1;
|
|
}
|
|
if (c < 0xd800 || (c >= 0xe000 && c < 0xfffe)) {
|
|
tk = TOK.TOKwcharv;
|
|
}
|
|
else {
|
|
tk = TOK.TOKdcharv;
|
|
}
|
|
}
|
|
t.uns64value = c;
|
|
break;
|
|
}
|
|
|
|
if (*p != '\'') {
|
|
error("unterminated character constant");
|
|
return(tk);
|
|
}
|
|
p++;
|
|
return(tk);
|
|
}
|
|
|
|
// Get postfix of string literal.
|
|
void stringPostfix(Token *t) {
|
|
switch (*p) {
|
|
case 'c':
|
|
case 'w':
|
|
case 'd':
|
|
t.postfix = *p;
|
|
p++;
|
|
break;
|
|
|
|
default:
|
|
t.postfix = 0;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/***************************************
|
|
* Read \u or \U unicode sequence
|
|
* Input:
|
|
* u 'u' or 'U'
|
|
*/
|
|
/*
|
|
* uint Wchar(uint u)
|
|
* {
|
|
* uint value;
|
|
* uint n;
|
|
* ubyte c;
|
|
* uint nchars;
|
|
*
|
|
* nchars = (u == 'U') ? 8 : 4;
|
|
* value = 0;
|
|
* for (n = 0; 1; n++)
|
|
* {
|
|
* ++p;
|
|
* if( n == nchars)
|
|
* break;
|
|
* c = *p;
|
|
* if( !ishex(c))
|
|
* {
|
|
* error("\\%s sequence must be followed by %d hex characters", u, nchars);
|
|
* break;
|
|
* }
|
|
* if( isdigit(c))
|
|
* c -= '0';
|
|
* else if( islower(c))
|
|
* c -= 'a' - 10;
|
|
* else
|
|
* c -= 'A' - 10;
|
|
* value <<= 4;
|
|
* value |= c;
|
|
* }
|
|
* return value;
|
|
* }
|
|
*/
|
|
|
|
/**************************************
|
|
* Read in a number.
|
|
* If it's an integer, store it in tok.TKutok.Vlong.
|
|
* integers can be decimal, octal or hex
|
|
* Handle the suffixes U, UL, LU, L, etc.
|
|
* If it's double, store it in tok.TKutok.Vdouble.
|
|
* Returns:
|
|
* TKnum
|
|
* TKdouble,...
|
|
*/
|
|
|
|
TOK number(Token *t) {
|
|
//debug writefln("Lexer.number()");
|
|
// We use a state machine to collect numbers
|
|
enum STATE {
|
|
STATE_initial,
|
|
STATE_0,
|
|
STATE_decimal,
|
|
STATE_octal,
|
|
STATE_octale,
|
|
STATE_hex,
|
|
STATE_binary,
|
|
STATE_hex0,
|
|
STATE_binary0,
|
|
STATE_hexh,
|
|
STATE_error
|
|
}
|
|
|
|
enum FLAGS {
|
|
FLAGS_decimal = 1, // decimal
|
|
FLAGS_unsigned = 2, // u or U suffix
|
|
FLAGS_long = 4, // l or L suffix
|
|
}
|
|
FLAGS flags = FLAGS.FLAGS_decimal;
|
|
|
|
int i;
|
|
TOK result;
|
|
int base;
|
|
|
|
stringbuffer.offset = 0;
|
|
// stringbuffer.data = null;
|
|
STATE state = STATE.STATE_initial;
|
|
ubyte * start = p;
|
|
|
|
TOK _isreal() {
|
|
p = start;
|
|
return(inreal(t));
|
|
}
|
|
|
|
while (true) {
|
|
char c = cast(char)*p;
|
|
switch (state) {
|
|
case STATE.STATE_initial: // opening state
|
|
if (c == '0') {
|
|
state = STATE.STATE_0;
|
|
}
|
|
else {
|
|
state = STATE.STATE_decimal;
|
|
}
|
|
break;
|
|
|
|
case STATE.STATE_0:
|
|
flags = cast(FLAGS)(flags & ~FLAGS.FLAGS_decimal);
|
|
switch (c) {
|
|
// #if ZEROH
|
|
// case 'H': // 0h
|
|
// case 'h':
|
|
// goto hexh;
|
|
// #endif
|
|
case 'X':
|
|
case 'x':
|
|
state = STATE.STATE_hex0;
|
|
break;
|
|
|
|
case '.':
|
|
if (p[1] == '.') { // .. is a separate token
|
|
goto done;
|
|
}
|
|
|
|
case 'i':
|
|
case 'f':
|
|
case 'F':
|
|
goto _Real;
|
|
|
|
// #if ZEROH
|
|
// case 'E':
|
|
// case 'e':
|
|
// goto case_hex;
|
|
// #endif
|
|
case 'B':
|
|
case 'b':
|
|
state = STATE.STATE_binary0;
|
|
break;
|
|
|
|
case '0':
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
state = STATE.STATE_octal;
|
|
break;
|
|
|
|
// #if ZEROH
|
|
// case '8': case '9': case 'A':
|
|
// case 'C': case 'D': case 'F':
|
|
// case 'a': case 'c': case 'd': case 'f':
|
|
// case_hex:
|
|
// state = STATE.STATE_hexh;
|
|
// break;
|
|
// #endif
|
|
case '_':
|
|
state = STATE.STATE_octal;
|
|
p++;
|
|
continue;
|
|
|
|
default:
|
|
goto done;
|
|
}
|
|
break;
|
|
|
|
case STATE.STATE_decimal: // reading decimal number
|
|
|
|
// if its not a digit - decimal complete or not a decimal
|
|
if (!isdigit(c)) {
|
|
// debug writefln( "\tnon-digit( %s )", c );
|
|
// #if ZEROH
|
|
// if( ishex(c) || c == 'H' || c == 'h' )
|
|
// goto hexh;
|
|
// #endif
|
|
//! wtf ?
|
|
// ignore embedded _
|
|
if (c == '_') {
|
|
p++;
|
|
continue;
|
|
}
|
|
|
|
// check decimal point - make real
|
|
if (c == '.' && p[1] != '.') {
|
|
goto _Real;
|
|
}
|
|
|
|
// check for mantra - make real
|
|
if (c == 'i' || c == 'f' || c == 'F' || c == 'e' || c == 'E') {
|
|
_Real: // It's a real number. Back up and rescan as a real
|
|
p = start;
|
|
return(inreal(t));
|
|
}
|
|
|
|
goto done;
|
|
}
|
|
break;
|
|
|
|
case STATE.STATE_hex0: // reading hex number
|
|
case STATE.STATE_hex:
|
|
if (!ishex(c)) {
|
|
if (c == '_') { // ignore embedded _
|
|
p++;
|
|
continue;
|
|
}
|
|
if (c == '.' && p[1] != '.') {
|
|
goto _Real;
|
|
}
|
|
if (c == 'P' || c == 'p' || c == 'i') {
|
|
goto _Real;
|
|
}
|
|
if (state == STATE.STATE_hex0) {
|
|
error("Hex digit expected, not '%s'", c);
|
|
}
|
|
goto done;
|
|
}
|
|
state = STATE.STATE_hex;
|
|
break;
|
|
|
|
// #if ZEROH
|
|
// hexh:
|
|
// state = STATE.STATE_hexh;
|
|
//
|
|
// case STATE.STATE_hexh: // parse numbers like 0FFh
|
|
// if( !ishex(c))
|
|
// {
|
|
// if( c == 'H' || c == 'h')
|
|
// {
|
|
// p++;
|
|
// base = 16;
|
|
// goto done;
|
|
// }
|
|
// else
|
|
// {
|
|
// // Check for something like 1E3 or 0E24
|
|
// if( memchr(stringbuffer.data.ptr, 'E', stringbuffer.offset) || memchr( stringbuffer.data.ptr, 'e', stringbuffer.offset))
|
|
// goto _Real;
|
|
// error("Hex digit expected, not '%s'", c);
|
|
// goto done;
|
|
// }
|
|
// }
|
|
// break;
|
|
// #endif
|
|
|
|
case STATE.STATE_octal: // reading octal number
|
|
case STATE.STATE_octale: // reading octal number with non-octal digits
|
|
if (!isoctal(c)) {
|
|
// #if ZEROH
|
|
// if( ishex(c) || c == 'H' || c == 'h' )
|
|
// goto hexh;
|
|
// #endif
|
|
if (c == '_') { // ignore embedded _
|
|
p++;
|
|
continue;
|
|
}
|
|
if (c == '.' && p[1] != '.') {
|
|
goto _Real;
|
|
}
|
|
if (c == 'i') {
|
|
goto _Real;
|
|
}
|
|
if (isdigit(c)) {
|
|
state = STATE.STATE_octale;
|
|
}
|
|
else {
|
|
goto done;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case STATE.STATE_binary0: // starting binary number
|
|
case STATE.STATE_binary: // reading binary number
|
|
if (c != '0' && c != '1') {
|
|
// #if ZEROH
|
|
// if( ishex(c) || c == 'H' || c == 'h' )
|
|
// goto hexh;
|
|
// #endif
|
|
if (c == '_') { // ignore embedded _
|
|
p++;
|
|
continue;
|
|
}
|
|
if (state == STATE.STATE_binary0) {
|
|
error("binary digit expected");
|
|
state = STATE.STATE_error;
|
|
break;
|
|
}
|
|
else {
|
|
goto done;
|
|
}
|
|
}
|
|
state = STATE.STATE_binary;
|
|
break;
|
|
|
|
case STATE.STATE_error: // for error recovery
|
|
if (!isdigit(c)) { // scan until non-digit
|
|
goto done;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
assert(0);
|
|
}
|
|
stringbuffer.write(cast(char)c);
|
|
p++;
|
|
}
|
|
done:
|
|
stringbuffer.write(cast(char)0); // terminate string
|
|
|
|
// debug writefln( "\tdigit complete( %s )", stringbuffer.toString );
|
|
|
|
if (state == STATE.STATE_octale) {
|
|
error("Octal digit expected");
|
|
}
|
|
|
|
uinteger_t n; // unsigned >=64 bit integer type
|
|
|
|
if (stringbuffer.offset == 2 && (state == STATE.STATE_decimal || state == STATE.STATE_0)) {
|
|
n = stringbuffer.data[0] - '0';
|
|
}
|
|
else {
|
|
// Convert string to integer
|
|
char* p = cast(char*)stringbuffer.data.ptr;
|
|
int r = 10;
|
|
int d;
|
|
if (*p == '0') {
|
|
if (p[1] == 'x' || p[1] == 'X') {
|
|
// "0x#"
|
|
p += 2;
|
|
r = 16;
|
|
}
|
|
else if (p[1] == 'b' || p[1] == 'B') {
|
|
// "0b#" - binary
|
|
p += 2;
|
|
r = 2;
|
|
}
|
|
else if (isdigit(p[1])) {
|
|
p += 1;
|
|
r = 8;
|
|
}
|
|
}
|
|
|
|
n = 0;
|
|
|
|
while (true) {
|
|
if (*p >= '0' && *p <= '9') {
|
|
d = *p - '0';
|
|
}
|
|
else if (*p >= 'a' && *p <= 'z') {
|
|
d = *p - 'a' + 10;
|
|
}
|
|
else if (*p >= 'A' && *p <= 'Z') {
|
|
d = *p - 'A' + 10;
|
|
}
|
|
else {
|
|
break;
|
|
}
|
|
|
|
if (d >= r) {
|
|
break;
|
|
}
|
|
|
|
if (n * r + d < n) {
|
|
error("integer overflow");
|
|
break;
|
|
}
|
|
|
|
n = n * r + d;
|
|
p++;
|
|
}
|
|
|
|
// if n needs more than 64 bits
|
|
if (n.sizeof > 8 && n > 0xffffffffffffffffL) {
|
|
error("integer overflow");
|
|
}
|
|
}
|
|
|
|
// Parse trailing 'u', 'U', 'l' or 'L' in any combination
|
|
while (true) {
|
|
ubyte f;
|
|
switch (*p) {
|
|
case 'U':
|
|
case 'u':
|
|
f = FLAGS.FLAGS_unsigned;
|
|
goto L1;
|
|
|
|
case 'L':
|
|
case 'l':
|
|
f = FLAGS.FLAGS_long;
|
|
L1:
|
|
p++;
|
|
if (flags & f) {
|
|
error("unrecognized token");
|
|
}
|
|
flags = cast(FLAGS)(flags | f);
|
|
continue;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
|
|
switch (flags) {
|
|
case 0:
|
|
/* Octal or Hexadecimal constant.
|
|
* First that fits: int, uint, long, ulong
|
|
*/
|
|
if (n & 0x8000000000000000L) {
|
|
result = TOK.TOKuns64v;
|
|
}
|
|
else if (n & 0xffffffff00000000L) {
|
|
result = TOK.TOKint64v;
|
|
}
|
|
else if (n & 0x80000000) {
|
|
result = TOK.TOKuns32v;
|
|
}
|
|
else {
|
|
result = TOK.TOKint32v;
|
|
}
|
|
break;
|
|
|
|
case FLAGS.FLAGS_decimal:
|
|
/* First that fits: int, long, long long
|
|
*/
|
|
if (n & 0x8000000000000000L) {
|
|
error("signed integer overflow");
|
|
result = TOK.TOKuns64v;
|
|
}
|
|
else if (n & 0xffffffff80000000L) {
|
|
result = TOK.TOKint64v;
|
|
}
|
|
else {
|
|
result = TOK.TOKint32v;
|
|
}
|
|
break;
|
|
|
|
case FLAGS.FLAGS_unsigned:
|
|
case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned:
|
|
/* First that fits: uint, ulong
|
|
*/
|
|
if (n & 0xffffffff00000000L) {
|
|
result = TOK.TOKuns64v;
|
|
}
|
|
else {
|
|
result = TOK.TOKuns32v;
|
|
}
|
|
break;
|
|
|
|
case FLAGS.FLAGS_decimal | FLAGS.FLAGS_long:
|
|
if (n & 0x8000000000000000L) {
|
|
error("signed integer overflow");
|
|
result = TOK.TOKuns64v;
|
|
}
|
|
else {
|
|
result = TOK.TOKint64v;
|
|
}
|
|
break;
|
|
|
|
case FLAGS.FLAGS_long:
|
|
if (n & 0x8000000000000000L) {
|
|
result = TOK.TOKuns64v;
|
|
}
|
|
else {
|
|
result = TOK.TOKint64v;
|
|
}
|
|
break;
|
|
|
|
case FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long:
|
|
case FLAGS.FLAGS_decimal | FLAGS.FLAGS_unsigned | FLAGS.FLAGS_long:
|
|
result = TOK.TOKuns64v;
|
|
break;
|
|
|
|
default:
|
|
debug writefln("%x", flags);
|
|
assert(0);
|
|
}
|
|
t.uns64value = n;
|
|
return(result);
|
|
}
|
|
|
|
/**************************************
|
|
* Read in characters, converting them to real.
|
|
* Bugs:
|
|
* Exponent overflow not detected.
|
|
* Too much requested precision is not detected.
|
|
*/
|
|
|
|
TOK inreal(Token *t) {
|
|
int dblstate;
|
|
uint c;
|
|
char hex; // is this a hexadecimal-floating-constant?
|
|
TOK result;
|
|
|
|
//printf("Lexer.inreal()\n");
|
|
stringbuffer.offset = 0;
|
|
dblstate = 0;
|
|
hex = 0;
|
|
Lnext:
|
|
while (1) {
|
|
// Get next char from input
|
|
c = *p++;
|
|
//printf("dblstate = %d, c = '%s'\n", dblstate, c);
|
|
while (1) {
|
|
switch (dblstate) {
|
|
case 0: // opening state
|
|
if (c == '0') {
|
|
dblstate = 9;
|
|
}
|
|
else if (c == '.') {
|
|
dblstate = 3;
|
|
}
|
|
else {
|
|
dblstate = 1;
|
|
}
|
|
break;
|
|
|
|
case 9:
|
|
dblstate = 1;
|
|
if (c == 'X' || c == 'x') {
|
|
hex++;
|
|
break;
|
|
}
|
|
|
|
case 1: // digits to left of .
|
|
case 3: // digits to right of .
|
|
case 7: // continuing exponent digits
|
|
if (!isdigit(c) && !(hex && isxdigit(c))) {
|
|
if (c == '_') {
|
|
goto Lnext; // ignore embedded '_'
|
|
}
|
|
dblstate++;
|
|
continue;
|
|
}
|
|
break;
|
|
|
|
case 2: // no more digits to left of .
|
|
if (c == '.') {
|
|
dblstate++;
|
|
break;
|
|
}
|
|
|
|
case 4: // no more digits to right of .
|
|
if ((c == 'E' || c == 'e') || hex && (c == 'P' || c == 'p')) {
|
|
dblstate = 5;
|
|
hex = 0; // exponent is always decimal
|
|
break;
|
|
}
|
|
if (hex) {
|
|
error("binary-exponent-part required");
|
|
}
|
|
goto done;
|
|
|
|
case 5: // looking immediately to right of E
|
|
dblstate++;
|
|
if (c == '-' || c == '+') {
|
|
break;
|
|
}
|
|
|
|
case 6: // 1st exponent digit expected
|
|
if (!isdigit(c)) {
|
|
error("exponent expected");
|
|
}
|
|
dblstate++;
|
|
break;
|
|
|
|
case 8: // past end of exponent digits
|
|
goto done;
|
|
}
|
|
break;
|
|
}
|
|
stringbuffer.write(c);
|
|
}
|
|
done:
|
|
p--;
|
|
|
|
stringbuffer.write(cast(byte)0);
|
|
|
|
// #if _WIN32 && __DMC__
|
|
char *save = __locale_decpoint;
|
|
|
|
__locale_decpoint = ".";
|
|
// #endif
|
|
t.float80value = strtold(cast(char *)stringbuffer.data.ptr, null);
|
|
errno = 0;
|
|
switch (*p) {
|
|
case 'F':
|
|
case 'f':
|
|
strtof(cast(char *)stringbuffer.data.ptr, null);
|
|
result = TOK.TOKfloat32v;
|
|
p++;
|
|
break;
|
|
|
|
default:
|
|
strtod(cast(char *)stringbuffer.data.ptr, null);
|
|
result = TOK.TOKfloat64v;
|
|
break;
|
|
|
|
case 'L':
|
|
case 'l':
|
|
result = TOK.TOKfloat80v;
|
|
p++;
|
|
break;
|
|
}
|
|
if (*p == 'i' || *p == 'I') {
|
|
p++;
|
|
switch (result) {
|
|
case TOK.TOKfloat32v:
|
|
result = TOK.TOKimaginary32v;
|
|
break;
|
|
|
|
case TOK.TOKfloat64v:
|
|
result = TOK.TOKimaginary64v;
|
|
break;
|
|
|
|
case TOK.TOKfloat80v:
|
|
result = TOK.TOKimaginary80v;
|
|
break;
|
|
}
|
|
}
|
|
// #if _WIN32 && __DMC__
|
|
__locale_decpoint = save;
|
|
// #endif
|
|
if (errno == ERANGE) {
|
|
error("number is not representable");
|
|
}
|
|
return(result);
|
|
}
|
|
|
|
|
|
|
|
|
|
/*********************************************
|
|
* Do pragma.
|
|
* Currently, the only pragma supported is:
|
|
* #line linnum [filespec]
|
|
*/
|
|
|
|
void Pragma() {
|
|
Token tok;
|
|
int linnum;
|
|
|
|
char[] filespec;
|
|
Loc loc = this.loc;
|
|
|
|
scan(&tok);
|
|
|
|
if (tok.value != TOK.TOKidentifier || tok.identifier != Id.line) {
|
|
goto Lerr;
|
|
}
|
|
|
|
scan(&tok);
|
|
if (tok.value == TOK.TOKint32v || tok.value == TOK.TOKint64v) {
|
|
linnum = tok.uns64value - 1;
|
|
}
|
|
else {
|
|
goto Lerr;
|
|
}
|
|
|
|
while (1) {
|
|
switch (*p) {
|
|
case 0:
|
|
case 0x1a:
|
|
case '\n':
|
|
Lnewline:
|
|
this.loc.linnum = linnum;
|
|
if (filespec.length) {
|
|
this.loc.filename = filespec;
|
|
}
|
|
return;
|
|
|
|
case '\r':
|
|
p++;
|
|
if (*p != '\n') {
|
|
p--;
|
|
goto Lnewline;
|
|
}
|
|
continue;
|
|
|
|
case ' ':
|
|
case '\t':
|
|
case '\v':
|
|
case '\f':
|
|
p++;
|
|
continue; // skip white space
|
|
|
|
case '_':
|
|
if (mod && memcmp(p, cast(char*)"__FILE__", 8) == 0) {
|
|
p += 8;
|
|
//! filespec = mem.strdup(loc.filename ? loc.filename : mod.identifier.toChars());
|
|
}
|
|
continue;
|
|
|
|
case '"':
|
|
if (filespec) {
|
|
goto Lerr;
|
|
}
|
|
stringbuffer.offset = 0;
|
|
p++;
|
|
while (1) {
|
|
uint c;
|
|
c = *p;
|
|
switch (c) {
|
|
case '\n':
|
|
case '\r':
|
|
case 0:
|
|
case 0x1a:
|
|
goto Lerr;
|
|
|
|
case '"':
|
|
stringbuffer.write(cast(byte)0);
|
|
// filespec = mem.strdup((char *)stringbuffer.data);
|
|
filespec = stringbuffer.toString.dup;
|
|
p++;
|
|
break;
|
|
|
|
default:
|
|
if (c & 0x80) {
|
|
uint u = decodeUTF();
|
|
if (u == PS || u == LS) {
|
|
goto Lerr;
|
|
}
|
|
}
|
|
stringbuffer.write(c);
|
|
p++;
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
continue;
|
|
|
|
default:
|
|
if (*p & 0x80) {
|
|
uint u = decodeUTF();
|
|
if (u == PS || u == LS) {
|
|
goto Lnewline;
|
|
}
|
|
}
|
|
goto Lerr;
|
|
}
|
|
}
|
|
|
|
Lerr:
|
|
errorLoc(loc, "#line integer [\"filespec\"]\\n expected");
|
|
}
|
|
|
|
|
|
|
|
/***************************************************
|
|
* Parse doc comment embedded between t.ptr and p.
|
|
* Remove trailing blanks and tabs from lines.
|
|
* Replace all newlines with \n.
|
|
* Remove leading comment character from each line.
|
|
* Decide if it's a lineComment or a blockComment.
|
|
* Append to previous one for this token.
|
|
*/
|
|
|
|
void getDocComment(Token *t, uint lineComment) {
|
|
auto OutBuffer buf = new OutBuffer;
|
|
ubyte ct = t.ptr[2];
|
|
ubyte *q = t.ptr + 3; // start of comment text
|
|
int linestart = 0;
|
|
|
|
ubyte *qend = p;
|
|
|
|
if (ct == '*' || ct == '+') {
|
|
qend -= 2;
|
|
}
|
|
|
|
// Scan over initial row of ****'s or ++++'s or ////'s
|
|
for (; q < qend; q++) {
|
|
if (*q != ct) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Remove trailing row of ****'s or ++++'s
|
|
if (ct != '/') {
|
|
for (; q < qend; qend--) {
|
|
if (qend[-1] != ct) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (; q < qend; q++) {
|
|
ubyte c = *q;
|
|
|
|
switch (c) {
|
|
case '*':
|
|
case '+':
|
|
if (linestart && c == ct) {
|
|
linestart = 0;
|
|
// Trim preceding whitespace up to preceding \n
|
|
while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) {
|
|
buf.offset--;
|
|
}
|
|
continue;
|
|
}
|
|
break;
|
|
|
|
case ' ':
|
|
case '\t':
|
|
break;
|
|
|
|
case '\r':
|
|
if (q[1] == '\n') {
|
|
continue; // skip the \r
|
|
}
|
|
goto Lnewline;
|
|
|
|
default:
|
|
if (c == 226) {
|
|
// If LS or PS
|
|
if (q[1] == 128 &&
|
|
(q[2] == 168 || q[2] == 169)) {
|
|
q += 2;
|
|
goto Lnewline;
|
|
}
|
|
}
|
|
linestart = 0;
|
|
break;
|
|
|
|
Lnewline:
|
|
c = '\n'; // replace all newlines with \n
|
|
|
|
case '\n':
|
|
linestart = 1;
|
|
|
|
// Trim trailing whitespace
|
|
while (buf.offset && (buf.data[buf.offset - 1] == ' ' || buf.data[buf.offset - 1] == '\t')) {
|
|
buf.offset--;
|
|
}
|
|
|
|
break;
|
|
}
|
|
buf.write(c);
|
|
}
|
|
|
|
// Always end with a newline
|
|
if (!buf.offset || buf.data[buf.offset - 1] != '\n') {
|
|
buf.writenl();
|
|
}
|
|
|
|
//buf.write(cast(char)0);
|
|
|
|
// It's a line comment if the start of the doc comment comes
|
|
// after other non-whitespace on the same line.
|
|
// ubyte** dc = (lineComment && anyToken)
|
|
// ? &t.lineComment
|
|
// : &t.blockComment;
|
|
|
|
char[] dc = (lineComment && anyToken) ? t.lineComment : t.blockComment;
|
|
|
|
// Combine with previous doc comment, if any
|
|
if (dc.length) {
|
|
dc = combineComments(dc, buf.toString().dup);
|
|
}
|
|
else {
|
|
dc = buf.toString().dup;
|
|
}
|
|
|
|
// writefln( dc );
|
|
|
|
if (lineComment && anyToken) {
|
|
t.lineComment = dc;
|
|
}
|
|
else {
|
|
t.blockComment = dc;
|
|
}
|
|
}
|
|
}
|
|
|
|
// character maps
|
|
static ubyte[256] cmtable;
|
|
|
|
const int CMoctal = 0x1;
|
|
const int CMhex = 0x2;
|
|
const int CMidchar = 0x4;
|
|
|
|
ubyte isoctal(ubyte c) {
|
|
return(cmtable[c] & CMoctal);
|
|
}
|
|
ubyte ishex(ubyte c) {
|
|
return(cmtable[c] & CMhex);
|
|
}
|
|
ubyte isidchar(ubyte c) {
|
|
return(cmtable[c] & CMidchar);
|
|
}
|
|
|
|
static void cmtable_init() {
|
|
for (uint c = 0; c < cmtable.length; c++) {
|
|
if ('0' <= c && c <= '7') {
|
|
cmtable[c] |= CMoctal;
|
|
}
|
|
if (isdigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) {
|
|
cmtable[c] |= CMhex;
|
|
}
|
|
if (isalnum(c) || c == '_') {
|
|
cmtable[c] |= CMidchar;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/+
|
|
* struct StringValue
|
|
* {
|
|
* union
|
|
* {
|
|
* int intvalue;
|
|
* void *ptrvalue;
|
|
* dchar *string;
|
|
* }
|
|
*
|
|
* char[] lstring;
|
|
* }
|
|
* #define CASE_BASIC_TYPES
|
|
* case TOKwchar: case TOKdchar:
|
|
* case TOKbit: case TOKbool: case TOKchar:
|
|
* case TOKint8: case TOKuns8:
|
|
* case TOKint16: case TOKuns16:
|
|
* case TOKint32: case TOKuns32:
|
|
* case TOKint64: case TOKuns64:
|
|
* case TOKfloat32: case TOKfloat64: case TOKfloat80:
|
|
* case TOKimaginary32: case TOKimaginary64: case TOKimaginary80:
|
|
* case TOKcomplex32: case TOKcomplex64: case TOKcomplex80:
|
|
* case TOKvoid:
|
|
*
|
|
* #define CASE_BASIC_TYPES_X(t) \
|
|
* case TOKvoid: t = Type::tvoid; goto LabelX; \
|
|
* case TOKint8: t = Type::tint8; goto LabelX; \
|
|
* case TOKuns8: t = Type::tuns8; goto LabelX; \
|
|
* case TOKint16: t = Type::tint16; goto LabelX; \
|
|
* case TOKuns16: t = Type::tuns16; goto LabelX; \
|
|
* case TOKint32: t = Type::tint32; goto LabelX; \
|
|
* case TOKuns32: t = Type::tuns32; goto LabelX; \
|
|
* case TOKint64: t = Type::tint64; goto LabelX; \
|
|
* case TOKuns64: t = Type::tuns64; goto LabelX; \
|
|
* case TOKfloat32: t = Type::tfloat32; goto LabelX; \
|
|
* case TOKfloat64: t = Type::tfloat64; goto LabelX; \
|
|
* case TOKfloat80: t = Type::tfloat80; goto LabelX; \
|
|
* case TOKimaginary32: t = Type::timaginary32; goto LabelX; \
|
|
* case TOKimaginary64: t = Type::timaginary64; goto LabelX; \
|
|
* case TOKimaginary80: t = Type::timaginary80; goto LabelX; \
|
|
* case TOKcomplex32: t = Type::tcomplex32; goto LabelX; \
|
|
* case TOKcomplex64: t = Type::tcomplex64; goto LabelX; \
|
|
* case TOKcomplex80: t = Type::tcomplex80; goto LabelX; \
|
|
* case TOKbit: t = Type::tbit; goto LabelX; \
|
|
* case TOKchar: t = Type::tchar; goto LabelX; \
|
|
* case TOKwchar: t = Type::twchar; goto LabelX; \
|
|
* case TOKdchar: t = Type::tdchar; goto LabelX; \
|
|
* LabelX
|
|
+/
|