windows-nt/Source/XPSP1/NT/sdktools/hivepp/p0gettok.c
2020-09-26 16:20:57 +08:00

558 lines
13 KiB
C

/************************************************************************/
/* */
/* RCPP - Resource Compiler Pre-Processor for NT system */
/* */
/* P0GETTOK.C - Tokenization routines */
/* */
/* 29-Nov-90 w-BrianM Update for NT from PM SDK RCPP */
/* */
/************************************************************************/
#include <stdio.h>
#include "rcpptype.h"
#include "rcppdecl.h"
#include "rcppext.h"
#include "grammar.h"
#include "p0defs.h"
#include "charmap.h"
/************************************************************************
** MAP_TOKEN : a token has two representations and additional information.
** (ex : const, has basic token of L_CONST,
** mapped token of [L_TYPE | L_MODIFIER]
** and info based on what the map token is)
** MAP_AND_FILL : has two representations, but none of the extra info.
** (ex : '<', has basic of L_LT, and map of L_RELOP)
** NOMAP_TOKEN : has 1 representation and additional info.
** (ex: a string, basic and 'map' type L_STRING and ptrs to the actual str)
** NOMAP_AND_FILL : has 1 representation and no additional info.
** (ex : 'while', has basic and 'map' of L_WHILE)
** the FILL versions fill the token with the basic token type.
************************************************************************/
#define MAP_TOKEN(otok)\
(Basic_token = (otok), TS_VALUE(Basic_token))
#define MAP_AND_FILL(otok)\
(yylval.yy_token = Basic_token = (otok), TS_VALUE(Basic_token))
#define NOMAP_TOKEN(otok)\
(Basic_token = (otok))
#define NOMAP_AND_FILL(otok)\
(yylval.yy_token = Basic_token = (otok))
/************************************************************************/
/* yylex - main tokenization routine */
/************************************************************************/
token_t yylex(void)
{
REG UCHAR last_mapped;
UCHAR mapped_c;
REG token_t lex_token;
for(;;) {
last_mapped = mapped_c = CHARMAP(GETCH());
first_switch:
switch(mapped_c) {
case LX_EACH:
case LX_ASCII:
Msg_Temp = GET_MSG(2018);
SET_MSG (Msg_Text, Msg_Temp, PREVCH());
error(2018);
continue;
break;
case LX_OBRACE:
return(NOMAP_AND_FILL(L_LCURLY));
break;
case LX_CBRACE:
return(NOMAP_AND_FILL(L_RCURLY));
break;
case LX_OBRACK:
return(NOMAP_AND_FILL(L_LBRACK));
break;
case LX_CBRACK:
return(NOMAP_AND_FILL(L_RBRACK));
break;
case LX_OPAREN:
return(NOMAP_AND_FILL(L_LPAREN));
break;
case LX_CPAREN:
return(NOMAP_AND_FILL(L_RPAREN));
break;
case LX_COMMA:
return(NOMAP_AND_FILL(L_COMMA));
break;
case LX_QUEST:
return(NOMAP_AND_FILL(L_QUEST));
break;
case LX_SEMI:
return(NOMAP_AND_FILL(L_SEMI));
break;
case LX_TILDE:
return(NOMAP_AND_FILL(L_TILDE));
break;
case LX_NUMBER:
return(MAP_TOKEN(getnum(PREVCH())));
break;
case LX_MINUS:
switch(last_mapped = CHARMAP(GETCH())) {
case LX_EQ:
return(MAP_AND_FILL(L_MINUSEQ));
break;
case LX_GT:
return(MAP_AND_FILL(L_POINTSTO));
break;
case LX_MINUS:
return(MAP_AND_FILL(L_DECR));
break;
default:
lex_token = L_MINUS;
break;
}
break;
case LX_PLUS:
switch(last_mapped = CHARMAP(GETCH())) {
case LX_EQ:
return(MAP_AND_FILL(L_PLUSEQ));
break;
case LX_PLUS:
return(MAP_AND_FILL(L_INCR));
break;
default:
lex_token = L_PLUS;
break;
}
break;
case LX_AND:
switch(last_mapped = CHARMAP(GETCH())) {
case LX_EQ:
return(MAP_AND_FILL(L_ANDEQ));
break;
case LX_AND:
return(MAP_AND_FILL(L_ANDAND));
break;
default:
lex_token = L_AND;
break;
}
break;
case LX_OR:
switch(last_mapped = CHARMAP(GETCH())) {
case LX_EQ:
return(MAP_AND_FILL(L_OREQ));
break;
case LX_OR:
return(MAP_AND_FILL(L_OROR));
break;
default:
lex_token = L_OR;
break;
}
break;
case LX_COLON:
return(NOMAP_AND_FILL(L_COLON));
break;
case LX_HAT:
if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
return(MAP_AND_FILL(L_XOREQ));
}
lex_token = L_XOR;
break;
case LX_PERCENT:
if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
return(MAP_AND_FILL(L_MODEQ));
}
lex_token = L_MOD;
break;
case LX_EQ:
if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
return(MAP_AND_FILL(L_EQUALS));
}
lex_token = L_ASSIGN;
break;
case LX_BANG:
if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
return(MAP_AND_FILL(L_NOTEQ));
}
lex_token = L_EXCLAIM;
break;
case LX_SLASH:
switch(last_mapped = CHARMAP(GETCH())) {
case LX_STAR:
dump_comment();
continue;
break;
case LX_SLASH:
DumpSlashComment();
continue;
break;
case LX_EQ:
return(MAP_AND_FILL(L_DIVEQ));
break;
default:
lex_token = L_DIV;
break;
}
break;
case LX_STAR:
switch(last_mapped = CHARMAP(GETCH())) {
case LX_SLASH:
if( ! Prep ) {
Msg_Temp = GET_MSG(2138);
SET_MSG (Msg_Text, Msg_Temp);
error(2138); /* (nested comments) */
}
else {
fwrite("*/", 2, 1, OUTPUTFILE);
}
continue;
case LX_EQ:
return(MAP_AND_FILL(L_MULTEQ));
break;
default:
lex_token = L_MULT;
break;
}
break;
case LX_LT:
switch(last_mapped = CHARMAP(GETCH())) {
case LX_LT:
if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
return(MAP_AND_FILL(L_LSHFTEQ));
}
mapped_c = LX_LSHIFT;
lex_token = L_LSHIFT;
break;
case LX_EQ:
return(MAP_AND_FILL(L_LTEQ));
break;
default:
lex_token = L_LT;
break;
}
break;
case LX_LSHIFT:
/*
** if the next char is not an =, then we unget and return,
** since the only way in here is if we broke on the char
** following '<<'. since we'll have already worked the handle_eos()
** code prior to getting here, we'll not see another eos,
** UNLESS i/o buffering is char by char. ???
** see also, LX_RSHIFT
*/
if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
return(MAP_AND_FILL(L_LSHFTEQ));
}
UNGETCH();
return(MAP_AND_FILL(L_LSHIFT));
break;
case LX_GT:
switch(last_mapped = CHARMAP(GETCH())) {
case LX_EQ:
return(MAP_AND_FILL(L_GTEQ));
case LX_GT:
if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
return(MAP_AND_FILL(L_RSHFTEQ));
}
mapped_c = LX_RSHIFT;
lex_token = L_RSHIFT;
break;
default:
lex_token = L_GT;
break;
}
break;
case LX_RSHIFT:
if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
return(MAP_AND_FILL(L_RSHFTEQ));
}
UNGETCH();
return(MAP_AND_FILL(L_RSHIFT));
break;
case LX_POUND:
if( ! Prep ) {
Msg_Temp = GET_MSG(2014);
SET_MSG (Msg_Text, Msg_Temp);
error(2014);/* # sign must be first non-whitespace */
UNGETCH(); /* replace it */
Linenumber--; /* do_newline counts a newline */
do_newline(); /* may be a 'real' prepro line */
}
else {
fwrite("#", 1, 1, OUTPUTFILE);
}
continue;
break;
case LX_EOS:
if(PREVCH() == '\\') {
if( ! Prep ) {
if( ! checknl()) { /* ignore the new line */
Msg_Temp = GET_MSG(2017);
SET_MSG (Msg_Text, Msg_Temp);
error(2017);/* illegal escape sequence */
}
}
else {
fputc('\\', OUTPUTFILE);
fputc(get_non_eof(), OUTPUTFILE);
}
continue;
}
if(Macro_depth == 0) {
if( ! io_eob()) { /* not the end of the buffer */
continue;
}
if(fpop()) { /* have more files to read */
continue;
}
return(MAP_AND_FILL(L_EOF)); /* all gone . . . */
}
handle_eos(); /* found end of macro */
continue;
break;
case LX_DQUOTE:
if( ! Prep ) {
str_const();
return(NOMAP_TOKEN(L_STRING));
}
prep_string('\"');
continue;
break;
case LX_SQUOTE:
if( ! Prep ) {
return(MAP_TOKEN(char_const()));
}
prep_string('\'');
continue;
break;
case LX_CR: /* ??? check for nl next */
continue;
break;
case LX_NL:
if(On_pound_line) {
UNGETCH();
return(NOMAP_TOKEN(L_NOTOKEN));
}
if(Prep) {
fputc('\n', OUTPUTFILE);
}
do_newline();
continue;
break;
case LX_WHITE: /* skip all white space */
if( ! Prep ) { /* check only once */
do {
;
} while(LXC_IS_WHITE(GETCH()));
}
else {
UCHAR c;
c = PREVCH();
do {
fputc(c, OUTPUTFILE);
} while(LXC_IS_WHITE(c = GETCH()));
}
UNGETCH();
continue;
break;
/* Note:
* RCPP.EXE does not support DBCS code.
* Therefore, we should be displaied error message.
* IBM-J PTR 12JP-0092
* MSHQ PTR xxxxx
*/
case LX_LEADBYTE:
if( ! Prep ) { /* check only once */
Msg_Temp = GET_MSG(2018);
SET_MSG (Msg_Text, Msg_Temp, PREVCH());
error(2018);
Msg_Temp = GET_MSG(2018);
SET_MSG (Msg_Text, Msg_Temp, GETCH());
error(2018);
}
else {
fputc(PREVCH(), OUTPUTFILE);
#ifdef DBCS // token_t yylex(void)
fputc(get_non_eof(), OUTPUTFILE);
#else
fputc(GETCH(), OUTPUTFILE);
#endif // DBCS
}
continue;
break;
case LX_ILL:
if( ! Prep ) {
Msg_Temp = GET_MSG(2018);
SET_MSG (Msg_Text, Msg_Temp, PREVCH());
error(2018);/* unknown character */
} else {
fputc(PREVCH(), OUTPUTFILE);
}
continue;
break;
case LX_BACKSLASH:
if( ! Prep ) {
if( ! checknl()) { /* ignore the new line */
Msg_Temp = GET_MSG(2017);
SET_MSG (Msg_Text, Msg_Temp);
error(2017);/* illegal escape sequence */
}
}
else {
fputc('\\', OUTPUTFILE);
fputc(get_non_eof(), OUTPUTFILE);
}
continue;
break;
case LX_DOT:
dot_switch:
switch(last_mapped = CHARMAP(GETCH())) {
case LX_BACKSLASH:
if(checknl()) {
goto dot_switch;
}
UNGETCH();
break;
case LX_EOS:
if(handle_eos() == BACKSLASH_EOS) {
break;
}
goto dot_switch;
break;
case LX_DOT:
if( ! checkop('.') ) {
Msg_Temp = GET_MSG(2142);
SET_MSG (Msg_Text, Msg_Temp);
error(2142);/* ellipsis requires three '.'s */
}
return(NOMAP_AND_FILL(L_ELLIPSIS));
break;
case LX_NUMBER:
/*
** don't worry about getting correct hash value.
** The text equivalent of a real number is never
** hashed
*/
Reuse_1[0] = '.';
Reuse_1[1] = PREVCH();
return(MAP_TOKEN(get_real(&Reuse_1[2])));
break;
}
UNGETCH();
return(MAP_AND_FILL(L_PERIOD));
break;
case LX_NOEXPAND:
SKIPCH(); /* just skip length */
continue;
case LX_ID:
{
pdefn_t pdef;
if(Macro_depth > 0) {
if( ! lex_getid(PREVCH())) {
goto avoid_expand;
}
}
else {
getid(PREVCH());
}
if( ((pdef = get_defined()) != 0)
&&
( ! DEFN_EXPANDING(pdef))
&&
( can_expand(pdef))
) {
continue;
}
avoid_expand:
if( ! Prep ) {
/* M00BUG get near copy of identifier???? */
HLN_NAME(yylval.yy_ident) = Reuse_1;
HLN_HASH(yylval.yy_ident) = Reuse_1_hash;
HLN_LENGTH(yylval.yy_ident) = (UCHAR)Reuse_1_length;
return(L_IDENT);
}
else {
fwrite(Reuse_1, Reuse_1_length - 1, 1, OUTPUTFILE);
return(NOMAP_TOKEN(L_NOTOKEN));
}
}
continue;
break;
}
/*
** all the multichar ( -> -- -= etc ) operands
** must come through here. we've gotten the next char,
** and not matched one of the possiblities, but we have to check
** for the end of the buffer character and act accordingly
** if it is the eob, then we handle it and go back for another try.
** otherwise, we unget the char we got, and return the base token.
*/
if(last_mapped == LX_EOS) {
if(handle_eos() != BACKSLASH_EOS) {
goto first_switch;
}
}
UNGETCH(); /* cause we got an extra one to check */
return(MAP_AND_FILL(lex_token));
}
}
/************************************************************************
**
** lex_getid: reads an identifier for the main lexer. The
** identifier is read into Reuse_1. This function should not handle
** an end of string if it is rescanning a macro expansion, because
** this could switch the context with regards to whether the macro
** is expandable or not. Similarly, the noexpand marker must only be
** allowed if a macro is being rescanned, otherwise let this character
** be caught as an illegal character in text
************************************************************************/
int lex_getid(UCHAR c)
{
REG UCHAR *p;
int length = 0;
p = Reuse_1;
*p++ = c;
c &= HASH_MASK;
for(;;) {
while(LXC_IS_IDENT(*p = GETCH())) { /* collect character */
c += (*p & HASH_MASK); /* hash it */
p++;
}
if(CHARMAP(*p) == LX_NOEXPAND ) {
length = (int)GETCH();
continue;
}
UNGETCH();
break; /* out of for loop - only way out */
}
if(p >= LIMIT(Reuse_1)) { /* is this error # correct? */
Msg_Temp = GET_MSG(1067);
SET_MSG (Msg_Text, Msg_Temp);
fatal(1067);
}
if(((p - Reuse_1) > LIMIT_ID_LENGTH) && ( ! Prep )) {
p = Reuse_1 + LIMIT_ID_LENGTH;
*p = '\0';
c = local_c_hash(Reuse_1);
Msg_Temp = GET_MSG(4011);
SET_MSG (Msg_Text, Msg_Temp, Reuse_1);
warning(4011); /* id truncated */
}
else {
*p = '\0'; /* terminates identifier for expandable check */
}
Reuse_1_hash = c;
Reuse_1_length = (UCHAR)((p - Reuse_1) + 1);
return(length != (p - Reuse_1));
}