windows-nt/Source/XPSP1/NT/sdktools/rcdll/p0gettok.c
2020-09-26 16:20:57 +08:00

529 lines
20 KiB
C

/************************************************************************/
/* */
/* RCPP - Resource Compiler Pre-Processor for NT system */
/* */
/* P0GETTOK.C - Tokenization routines */
/* */
/* 29-Nov-90 w-BrianM Update for NT from PM SDK RCPP */
/* */
/************************************************************************/
#include "rc.h"
/************************************************************************
** MAP_TOKEN : a token has two representations and additional information.
** (ex : const, has basic token of L_CONST,
** mapped token of [L_TYPE | L_MODIFIER]
** and info based on what the map token is)
** MAP_AND_FILL : has two representations, but none of the extra info.
** (ex : '<', has basic of L_LT, and map of L_RELOP)
** NOMAP_TOKEN : has 1 representation and additional info.
** (ex: a string, basic and 'map' type L_STRING and ptrs to the actual str)
** NOMAP_AND_FILL : has 1 representation and no additional info.
** (ex : 'while', has basic and 'map' of L_WHILE)
** the FILL versions fill the token with the basic token type.
************************************************************************/
#define MAP_TOKEN(otok)\
(Basic_token = (otok), TS_VALUE(Basic_token))
#define MAP_AND_FILL(otok)\
(yylval.yy_token = Basic_token = (otok), TS_VALUE(Basic_token))
#define NOMAP_TOKEN(otok)\
(Basic_token = (otok))
#define NOMAP_AND_FILL(otok)\
(yylval.yy_token = Basic_token = (otok))
/************************************************************************/
/* yylex - main tokenization routine */
/************************************************************************/
token_t
yylex(
void
)
{
REG WCHAR last_mapped;
WCHAR mapped_c;
WCHAR buf[5];
REG token_t lex_token;
for(;;) {
last_mapped = mapped_c = CHARMAP(GETCH());
first_switch:
switch(mapped_c) {
case LX_EACH:
case LX_ASCII:
if (fAFXSymbols && PREVCH() == SYMUSESTART || PREVCH() == SYMDEFSTART
|| PREVCH() == SYMDELIMIT) {
myfwrite(&(PREVCH()), sizeof(WCHAR), 1, OUTPUTFILE);
continue;
}
Msg_Temp = GET_MSG(2018);
SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, PREVCH());
error(2018);
continue;
break;
case LX_OBRACE:
return(NOMAP_AND_FILL(L_LCURLY));
break;
case LX_CBRACE:
return(NOMAP_AND_FILL(L_RCURLY));
break;
case LX_OBRACK:
return(NOMAP_AND_FILL(L_LBRACK));
break;
case LX_CBRACK:
return(NOMAP_AND_FILL(L_RBRACK));
break;
case LX_OPAREN:
return(NOMAP_AND_FILL(L_LPAREN));
break;
case LX_CPAREN:
return(NOMAP_AND_FILL(L_RPAREN));
break;
case LX_COMMA:
return(NOMAP_AND_FILL(L_COMMA));
break;
case LX_QUEST:
return(NOMAP_AND_FILL(L_QUEST));
break;
case LX_SEMI:
return(NOMAP_AND_FILL(L_SEMI));
break;
case LX_TILDE:
return(NOMAP_AND_FILL(L_TILDE));
break;
case LX_NUMBER:
return(MAP_TOKEN(getnum(PREVCH())));
break;
case LX_MINUS:
switch(last_mapped = CHARMAP(GETCH())) {
case LX_EQ:
return(MAP_AND_FILL(L_MINUSEQ));
break;
case LX_GT:
return(MAP_AND_FILL(L_POINTSTO));
break;
case LX_MINUS:
return(MAP_AND_FILL(L_DECR));
break;
default:
lex_token = L_MINUS;
break;
}
break;
case LX_PLUS:
switch(last_mapped = CHARMAP(GETCH())) {
case LX_EQ:
return(MAP_AND_FILL(L_PLUSEQ));
break;
case LX_PLUS:
return(MAP_AND_FILL(L_INCR));
break;
default:
lex_token = L_PLUS;
break;
}
break;
case LX_AND:
switch(last_mapped = CHARMAP(GETCH())) {
case LX_EQ:
return(MAP_AND_FILL(L_ANDEQ));
break;
case LX_AND:
return(MAP_AND_FILL(L_ANDAND));
break;
default:
lex_token = L_AND;
break;
}
break;
case LX_OR:
switch(last_mapped = CHARMAP(GETCH())) {
case LX_EQ:
return(MAP_AND_FILL(L_OREQ));
break;
case LX_OR:
return(MAP_AND_FILL(L_OROR));
break;
default:
lex_token = L_OR;
break;
}
break;
case LX_COLON:
return(NOMAP_AND_FILL(L_COLON));
break;
case LX_HAT:
if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
return(MAP_AND_FILL(L_XOREQ));
}
lex_token = L_XOR;
break;
case LX_PERCENT:
if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
return(MAP_AND_FILL(L_MODEQ));
}
lex_token = L_MOD;
break;
case LX_EQ:
if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
return(MAP_AND_FILL(L_EQUALS));
}
lex_token = L_ASSIGN;
break;
case LX_BANG:
if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
return(MAP_AND_FILL(L_NOTEQ));
}
lex_token = L_EXCLAIM;
break;
case LX_SLASH:
switch(last_mapped = CHARMAP(GETCH())) {
case LX_STAR:
dump_comment();
continue;
break;
case LX_SLASH:
DumpSlashComment();
continue;
break;
case LX_EQ:
return(MAP_AND_FILL(L_DIVEQ));
break;
default:
lex_token = L_DIV;
break;
}
break;
case LX_STAR:
switch(last_mapped = CHARMAP(GETCH())) {
case LX_SLASH:
if( ! Prep ) {
strcpy (Msg_Text, GET_MSG(2138));
error(2138); /* (nested comments) */
} else {
myfwrite(L"*/", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
}
continue;
case LX_EQ:
return(MAP_AND_FILL(L_MULTEQ));
break;
default:
lex_token = L_MULT;
break;
}
break;
case LX_LT:
switch(last_mapped = CHARMAP(GETCH())) {
case LX_LT:
if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
return(MAP_AND_FILL(L_LSHFTEQ));
}
mapped_c = LX_LSHIFT;
lex_token = L_LSHIFT;
break;
case LX_EQ:
return(MAP_AND_FILL(L_LTEQ));
break;
default:
lex_token = L_LT;
break;
}
break;
case LX_LSHIFT:
/*
** if the next char is not an =, then we unget and return,
** since the only way in here is if we broke on the char
** following '<<'. since we'll have already worked the handle_eos()
** code prior to getting here, we'll not see another eos,
** UNLESS i/o buffering is char by char. ???
** see also, LX_RSHIFT
*/
if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
return(MAP_AND_FILL(L_LSHFTEQ));
}
UNGETCH();
return(MAP_AND_FILL(L_LSHIFT));
break;
case LX_GT:
switch(last_mapped = CHARMAP(GETCH())) {
case LX_EQ:
return(MAP_AND_FILL(L_GTEQ));
case LX_GT:
if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
return(MAP_AND_FILL(L_RSHFTEQ));
}
mapped_c = LX_RSHIFT;
lex_token = L_RSHIFT;
break;
default:
lex_token = L_GT;
break;
}
break;
case LX_RSHIFT:
if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
return(MAP_AND_FILL(L_RSHFTEQ));
}
UNGETCH();
return(MAP_AND_FILL(L_RSHIFT));
break;
case LX_POUND:
if( ! Prep ) {
strcpy (Msg_Text, GET_MSG(2014));
error(2014);/* # sign must be first non-whitespace */
UNGETCH(); /* replace it */
Linenumber--; /* do_newline counts a newline */
do_newline(); /* may be a 'real' prepro line */
} else {
myfwrite(L"#", sizeof(WCHAR), 1, OUTPUTFILE);
}
continue;
break;
case LX_EOS:
if(PREVCH() == L'\\') {
if( ! Prep ) {
if( ! checknl()) { /* ignore the new line */
strcpy (Msg_Text, GET_MSG(2017));
error(2017);/* illegal escape sequence */
}
} else {
myfwrite(L"\\", sizeof(WCHAR), 1, OUTPUTFILE);
*buf = get_non_eof();
myfwrite(buf, sizeof(WCHAR), 1, OUTPUTFILE);
}
continue;
}
if(Macro_depth == 0) {
if( ! io_eob()) { /* not the end of the buffer */
continue;
}
if(fpop()) { /* have more files to read */
continue;
}
return(MAP_AND_FILL(L_EOF)); /* all gone . . . */
}
handle_eos(); /* found end of macro */
continue;
break;
case LX_DQUOTE:
if( ! Prep ) {
str_const();
return(NOMAP_TOKEN(L_STRING));
}
prep_string(L'\"');
continue;
break;
case LX_SQUOTE:
if( ! Prep ) {
return(MAP_TOKEN(char_const()));
}
prep_string(L'\'');
continue;
break;
case LX_CR: /* ??? check for nl next */
continue;
break;
case LX_NL:
if(On_pound_line) {
UNGETCH();
return(NOMAP_TOKEN(L_NOTOKEN));
}
if(Prep) {
// must manually write '\r' with '\n' when writing 16-bit strings
myfwrite(L"\r\n", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
}
do_newline();
continue;
break;
case LX_WHITE: /* skip all white space */
if( ! Prep ) { /* check only once */
do {
;
} while(LXC_IS_WHITE(GETCH()));
}
else {
WCHAR c;
c = PREVCH();
do {
myfwrite(&c, sizeof(WCHAR), 1, OUTPUTFILE);
} while(LXC_IS_WHITE(c = GETCH()));
}
UNGETCH();
continue;
break;
case LX_ILL:
if( ! Prep ) {
Msg_Temp = GET_MSG(2018);
SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, PREVCH());
error(2018);/* unknown character */
} else {
myfwrite(&(PREVCH()), sizeof(WCHAR), 1, OUTPUTFILE);
}
continue;
break;
case LX_BACKSLASH:
if( ! Prep ) {
if( ! checknl()) { /* ignore the new line */
strcpy (Msg_Text, GET_MSG(2017));
error(2017);/* illegal escape sequence */
}
}
else {
myfwrite(L"\\", sizeof(WCHAR), 1, OUTPUTFILE);
*buf = get_non_eof();
myfwrite(buf, sizeof(WCHAR), 1, OUTPUTFILE);
}
continue;
break;
case LX_DOT:
dot_switch:
switch(last_mapped = CHARMAP(GETCH())) {
case LX_BACKSLASH:
if(checknl()) {
goto dot_switch;
}
UNGETCH();
break;
case LX_EOS:
if(handle_eos() == BACKSLASH_EOS) {
break;
}
goto dot_switch;
break;
case LX_DOT:
if( ! checkop(L'.') ) {
strcpy (Msg_Text, GET_MSG(2142));
error(2142);/* ellipsis requires three '.'s */
}
return(NOMAP_AND_FILL(L_ELLIPSIS));
break;
case LX_NUMBER:
/*
** don't worry about getting correct hash value.
** The text equivalent of a real number is never
** hashed
*/
Reuse_W[0] = L'.';
Reuse_W[1] = PREVCH();
return(MAP_TOKEN(get_real(&Reuse_W[2])));
break;
}
UNGETCH();
return(MAP_AND_FILL(L_PERIOD));
break;
case LX_NOEXPAND:
SKIPCH(); /* just skip length */
continue;
case LX_ID:
{
pdefn_t pdef;
if(Macro_depth > 0) {
if( ! lex_getid(PREVCH())) {
goto avoid_expand;
}
}
else {
getid(PREVCH());
}
if( ((pdef = get_defined()) != 0)
&&
( ! DEFN_EXPANDING(pdef))
&&
( can_expand(pdef))
) {
continue;
}
avoid_expand:
if( ! Prep ) {
/* M00BUG get near copy of identifier???? */
HLN_NAME(yylval.yy_ident) = Reuse_W;
HLN_HASH(yylval.yy_ident) = Reuse_W_hash;
HLN_LENGTH(yylval.yy_ident) = (UINT)Reuse_W_length;
return(L_IDENT);
} else {
myfwrite(Reuse_W, (Reuse_W_length - 1) * sizeof(WCHAR), 1, OUTPUTFILE);
return(NOMAP_TOKEN(L_NOTOKEN));
}
}
continue;
break;
}
/*
** all the multichar ( -> -- -= etc ) operands
** must come through here. we've gotten the next char,
** and not matched one of the possiblities, but we have to check
** for the end of the buffer character and act accordingly
** if it is the eob, then we handle it and go back for another try.
** otherwise, we unget the char we got, and return the base token.
*/
if(last_mapped == LX_EOS) {
if(handle_eos() != BACKSLASH_EOS) {
goto first_switch;
}
}
UNGETCH(); /* cause we got an extra one to check */
return(MAP_AND_FILL(lex_token));
}
}
/************************************************************************
**
** lex_getid: reads an identifier for the main lexer. The
** identifier is read into Reuse_W. This function should not handle
** an end of string if it is rescanning a macro expansion, because
** this could switch the context with regards to whether the macro
** is expandable or not. Similarly, the noexpand marker must only be
** allowed if a macro is being rescanned, otherwise let this character
** be caught as an illegal character in text
************************************************************************/
int
lex_getid(
WCHAR c
)
{
REG WCHAR *p;
int length = 0;
p = Reuse_W;
*p++ = c;
c &= HASH_MASK;
for(;;) {
while(LXC_IS_IDENT(*p = GETCH())) { /* collect character */
c += (*p & HASH_MASK); /* hash it */
p++;
}
if(CHARMAP(*p) == LX_NOEXPAND ) {
length = (int)GETCH();
continue;
}
UNGETCH();
break; /* out of for loop - only way out */
}
if(p >= LIMIT(Reuse_W)) { /* is this error # correct? */
strcpy (Msg_Text, GET_MSG(1067));
fatal(1067);
}
if(((p - Reuse_W) > LIMIT_ID_LENGTH) && ( ! Prep )) {
p = Reuse_W + LIMIT_ID_LENGTH;
*p = L'\0';
c = local_c_hash(Reuse_W);
Msg_Temp = GET_MSG(4011);
SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, Reuse_W);
warning(4011); /* id truncated */
} else {
*p = L'\0'; /* terminates identifier for expandable check */
}
Reuse_W_hash = (hash_t)c;
Reuse_W_length = (UINT)((p - Reuse_W) + 1);
return(length != (p - Reuse_W));
}