/************************************************************************/
/*                                                                      */
/* RCPP - Resource Compiler Pre-Processor for NT system                 */
/*                                                                      */
/* P0GETTOK.C - Tokenization routines                                   */
/*                                                                      */
/* 29-Nov-90 w-BrianM  Update for NT from PM SDK RCPP                   */
/*                                                                      */
/************************************************************************/

#include "rc.h"

/************************************************************************
**      MAP_TOKEN : a token has two representations and additional information.
**      (ex : const, has basic token of L_CONST,
**                              mapped token of [L_TYPE | L_MODIFIER]
**                              and info based on what the map token is)
**      MAP_AND_FILL : has two representations, but none of the extra info.
**      (ex : '<', has basic of L_LT, and map of L_RELOP)
**  NOMAP_TOKEN : has 1 representation and additional info.
**      (ex: a string, basic and 'map' type L_STRING and ptrs to the actual str)
**      NOMAP_AND_FILL : has 1 representation and no additional info.
**      (ex : 'while', has basic and 'map' of L_WHILE)
**  the FILL versions fill the token with the basic token type.
************************************************************************/
#define MAP_TOKEN(otok)\
        (Basic_token = (otok), TS_VALUE(Basic_token))
#define MAP_AND_FILL(otok)\
        (yylval.yy_token = Basic_token = (otok), TS_VALUE(Basic_token))
#define NOMAP_TOKEN(otok)\
        (Basic_token = (otok))
#define NOMAP_AND_FILL(otok)\
        (yylval.yy_token = Basic_token = (otok))


/************************************************************************/
/* yylex - main tokenization routine                                    */
/************************************************************************/

token_t
yylex(
    void
    )
{
    REG WCHAR           last_mapped;
    WCHAR               mapped_c;
    WCHAR               buf[5];
    REG token_t         lex_token;

    for(;;) {
        last_mapped = mapped_c = CHARMAP(GETCH());
first_switch:
        switch(mapped_c) {
            case LX_EACH:
            case LX_ASCII:
                if (fAFXSymbols && PREVCH() == SYMUSESTART || PREVCH() == SYMDEFSTART
                    || PREVCH() == SYMDELIMIT) {
                    myfwrite(&(PREVCH()), sizeof(WCHAR), 1, OUTPUTFILE);
                    continue;
                }
                Msg_Temp = GET_MSG(2018);
                SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, PREVCH());
                error(2018);
                continue;
                break;
            case LX_OBRACE:
                return(NOMAP_AND_FILL(L_LCURLY));
                break;
            case LX_CBRACE:
                return(NOMAP_AND_FILL(L_RCURLY));
                break;
            case LX_OBRACK:
                return(NOMAP_AND_FILL(L_LBRACK));
                break;
            case LX_CBRACK:
                return(NOMAP_AND_FILL(L_RBRACK));
                break;
            case LX_OPAREN:
                return(NOMAP_AND_FILL(L_LPAREN));
                break;
            case LX_CPAREN:
                return(NOMAP_AND_FILL(L_RPAREN));
                break;
            case LX_COMMA:
                return(NOMAP_AND_FILL(L_COMMA));
                break;
            case LX_QUEST:
                return(NOMAP_AND_FILL(L_QUEST));
                break;
            case LX_SEMI:
                return(NOMAP_AND_FILL(L_SEMI));
                break;
            case LX_TILDE:
                return(NOMAP_AND_FILL(L_TILDE));
                break;
            case LX_NUMBER:
                return(MAP_TOKEN(getnum(PREVCH())));
                break;

            case LX_MINUS:
                switch(last_mapped = CHARMAP(GETCH())) {
                    case LX_EQ:
                        return(MAP_AND_FILL(L_MINUSEQ));
                        break;
                    case LX_GT:
                        return(MAP_AND_FILL(L_POINTSTO));
                        break;
                    case LX_MINUS:
                        return(MAP_AND_FILL(L_DECR));
                        break;
                    default:
                        lex_token = L_MINUS;
                        break;
                }
                break;
            case LX_PLUS:
                switch(last_mapped = CHARMAP(GETCH())) {
                    case LX_EQ:
                        return(MAP_AND_FILL(L_PLUSEQ));
                        break;
                    case LX_PLUS:
                        return(MAP_AND_FILL(L_INCR));
                        break;
                    default:
                        lex_token = L_PLUS;
                        break;
                }
                break;
            case LX_AND:
                switch(last_mapped = CHARMAP(GETCH())) {
                    case LX_EQ:
                        return(MAP_AND_FILL(L_ANDEQ));
                        break;
                    case LX_AND:
                        return(MAP_AND_FILL(L_ANDAND));
                        break;
                    default:
                        lex_token = L_AND;
                        break;
                }
                break;
            case LX_OR:
                switch(last_mapped = CHARMAP(GETCH())) {
                    case LX_EQ:
                        return(MAP_AND_FILL(L_OREQ));
                        break;
                    case LX_OR:
                        return(MAP_AND_FILL(L_OROR));
                        break;
                    default:
                        lex_token = L_OR;
                        break;
                }
                break;
            case LX_COLON:
                return(NOMAP_AND_FILL(L_COLON));
                break;
            case LX_HAT:
                if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
                    return(MAP_AND_FILL(L_XOREQ));
                }
                lex_token = L_XOR;
                break;
            case LX_PERCENT:
                if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
                    return(MAP_AND_FILL(L_MODEQ));
                }
                lex_token = L_MOD;
                break;
            case LX_EQ:
                if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
                    return(MAP_AND_FILL(L_EQUALS));
                }
                lex_token = L_ASSIGN;
                break;
            case LX_BANG:
                if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
                    return(MAP_AND_FILL(L_NOTEQ));
                }
                lex_token = L_EXCLAIM;
                break;
            case LX_SLASH:
                switch(last_mapped = CHARMAP(GETCH())) {
                    case LX_STAR:
                        dump_comment();
                        continue;
                        break;
                    case LX_SLASH:
                        DumpSlashComment();
                        continue;
                        break;
                    case LX_EQ:
                        return(MAP_AND_FILL(L_DIVEQ));
                        break;
                    default:
                        lex_token = L_DIV;
                        break;
                }
                break;
            case LX_STAR:
                switch(last_mapped = CHARMAP(GETCH())) {
                    case LX_SLASH:
                        if( ! Prep ) {
                            strcpy (Msg_Text, GET_MSG(2138));
                            error(2138); /* (nested comments) */
                        } else {
                            myfwrite(L"*/", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
                        }
                        continue;
                    case LX_EQ:
                        return(MAP_AND_FILL(L_MULTEQ));
                        break;
                    default:
                        lex_token = L_MULT;
                        break;
                }
                break;
            case LX_LT:
                switch(last_mapped = CHARMAP(GETCH())) {
                    case LX_LT:
                        if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
                            return(MAP_AND_FILL(L_LSHFTEQ));
                        }
                        mapped_c = LX_LSHIFT;
                        lex_token = L_LSHIFT;
                        break;
                    case LX_EQ:
                        return(MAP_AND_FILL(L_LTEQ));
                        break;
                    default:
                        lex_token = L_LT;
                        break;
                }
                break;
            case LX_LSHIFT:
                /*
                **  if the next char is not an =, then we unget and return,
                **  since the only way in here is if we broke on the char
                **  following '<<'. since we'll have already worked the handle_eos()
                **  code prior to getting here, we'll not see another eos,
                **  UNLESS i/o buffering is char by char. ???
                **  see also, LX_RSHIFT
                */
                if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
                    return(MAP_AND_FILL(L_LSHFTEQ));
                }
                UNGETCH();
                return(MAP_AND_FILL(L_LSHIFT));
                break;
            case LX_GT:
                switch(last_mapped = CHARMAP(GETCH())) {
                    case LX_EQ:
                        return(MAP_AND_FILL(L_GTEQ));
                    case LX_GT:
                        if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
                            return(MAP_AND_FILL(L_RSHFTEQ));
                        }
                        mapped_c = LX_RSHIFT;
                        lex_token = L_RSHIFT;
                        break;
                    default:
                        lex_token = L_GT;
                        break;
                }
                break;
            case LX_RSHIFT:
                if((last_mapped = CHARMAP(GETCH())) == LX_EQ) {
                    return(MAP_AND_FILL(L_RSHFTEQ));
                }
                UNGETCH();
                return(MAP_AND_FILL(L_RSHIFT));
                break;
            case LX_POUND:
                if( ! Prep ) {
                    strcpy (Msg_Text, GET_MSG(2014));
                    error(2014);/* # sign must be first non-whitespace */
                    UNGETCH();              /* replace it */
                    Linenumber--;   /* do_newline counts a newline */
                    do_newline();   /* may be a 'real' prepro line */
                } else {
                    myfwrite(L"#", sizeof(WCHAR), 1, OUTPUTFILE);
                }
                continue;
                break;
            case LX_EOS:
                if(PREVCH() == L'\\') {
                    if( ! Prep ) {
                        if( ! checknl()) {  /* ignore the new line */
                            strcpy (Msg_Text, GET_MSG(2017));
                            error(2017);/* illegal escape sequence */
                        }
                    } else {
                        myfwrite(L"\\", sizeof(WCHAR), 1, OUTPUTFILE);
                        *buf = get_non_eof();
                        myfwrite(buf, sizeof(WCHAR), 1, OUTPUTFILE);
                    }
                    continue;
                }
                if(Macro_depth == 0) {
                    if( ! io_eob()) {       /* not the end of the buffer */
                        continue;
                    }
                    if(fpop()) {            /* have more files to read */
                        continue;
                    }
                    return(MAP_AND_FILL(L_EOF));    /* all gone . . . */
                }
                handle_eos();                       /* found end of macro */
                continue;
                break;
            case LX_DQUOTE:
                if( ! Prep ) {
                    str_const();
                    return(NOMAP_TOKEN(L_STRING));
                }
                prep_string(L'\"');
                continue;
                break;
            case LX_SQUOTE:
                if( ! Prep ) {
                    return(MAP_TOKEN(char_const()));
                }
                prep_string(L'\'');
                continue;
                break;
            case LX_CR:             /*  ??? check for nl next  */
                continue;
                break;
            case LX_NL:
                if(On_pound_line) {
                    UNGETCH();
                    return(NOMAP_TOKEN(L_NOTOKEN));
                }
                if(Prep) {
                    // must manually write '\r' with '\n' when writing 16-bit strings
                    myfwrite(L"\r\n", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
                }
                do_newline();
                continue;
                break;
            case LX_WHITE:          /* skip all white space */
                if( ! Prep ) {      /* check only once */
                    do {
                        ;
                    } while(LXC_IS_WHITE(GETCH()));
                }
                else {
                    WCHAR   c;

                    c = PREVCH();
                    do {
                        myfwrite(&c, sizeof(WCHAR), 1, OUTPUTFILE);
                    } while(LXC_IS_WHITE(c = GETCH()));
                }
                UNGETCH();
                continue;
                break;
            case LX_ILL:
                if( ! Prep ) {
                    Msg_Temp = GET_MSG(2018);
                    SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, PREVCH());
                    error(2018);/* unknown character */
                } else {
                    myfwrite(&(PREVCH()), sizeof(WCHAR), 1, OUTPUTFILE);
                }
                continue;
                break;
            case LX_BACKSLASH:
                if( ! Prep ) {
                    if( ! checknl()) {      /* ignore the new line */
                        strcpy (Msg_Text, GET_MSG(2017));
                        error(2017);/* illegal escape sequence */
                    }
                }
                else {
                    myfwrite(L"\\", sizeof(WCHAR), 1, OUTPUTFILE);
                    *buf = get_non_eof();
                    myfwrite(buf, sizeof(WCHAR), 1, OUTPUTFILE);
                }
                continue;
                break;
            case LX_DOT:
dot_switch:
                switch(last_mapped = CHARMAP(GETCH())) {
                    case LX_BACKSLASH:
                        if(checknl()) {
                            goto dot_switch;
                        }
                        UNGETCH();
                        break;
                    case LX_EOS:
                        if(handle_eos() == BACKSLASH_EOS) {
                            break;
                        }
                        goto dot_switch;
                        break;
                    case LX_DOT:
                        if( ! checkop(L'.') ) {
                            strcpy (Msg_Text, GET_MSG(2142));
                            error(2142);/* ellipsis requires three '.'s */
                        }
                        return(NOMAP_AND_FILL(L_ELLIPSIS));
                        break;
                    case LX_NUMBER:
                        /*
                        **      don't worry about getting correct hash value.
                        **      The text equivalent of a real number is never
                        **      hashed
                        */
                        Reuse_W[0] = L'.';
                        Reuse_W[1] = PREVCH();
                        return(MAP_TOKEN(get_real(&Reuse_W[2])));
                        break;
                }
                UNGETCH();
                return(MAP_AND_FILL(L_PERIOD));
                break;
            case LX_NOEXPAND:
                SKIPCH();                   /* just skip length */
                continue;
            case LX_ID:
                {
                    pdefn_t pdef;

                    if(Macro_depth > 0) {
                        if( ! lex_getid(PREVCH())) {
                            goto avoid_expand;
                        }
                    }
                    else {
                        getid(PREVCH());
                    }

                    if( ((pdef = get_defined()) != 0)
                        &&
                        ( ! DEFN_EXPANDING(pdef))
                        &&
                        ( can_expand(pdef))
                        ) {
                        continue;
                    }

avoid_expand:
                    if( ! Prep ) {
                        /* M00BUG get near copy of identifier???? */
                        HLN_NAME(yylval.yy_ident) = Reuse_W;
                        HLN_HASH(yylval.yy_ident) = Reuse_W_hash;
                        HLN_LENGTH(yylval.yy_ident) = (UINT)Reuse_W_length;
                        return(L_IDENT);
                    } else {
                        myfwrite(Reuse_W, (Reuse_W_length - 1) * sizeof(WCHAR), 1, OUTPUTFILE);
                        return(NOMAP_TOKEN(L_NOTOKEN));
                    }
                }
                continue;
                break;
        }
        /*
        **  all the multichar ( -> -- -= etc ) operands
        **  must come through here. we've gotten the next char,
        **  and not matched one of the possiblities, but we have to check
        **  for the end of the buffer character and act accordingly
        **  if it is the eob, then we handle it and go back for another try.
        **  otherwise, we unget the char we got, and return the base token.
        */
        if(last_mapped == LX_EOS) {
            if(handle_eos() != BACKSLASH_EOS) {
                goto first_switch;
            }
        }
        UNGETCH();      /* cause we got an extra one to check */
        return(MAP_AND_FILL(lex_token));
    }
}


/************************************************************************
**
**      lex_getid: reads an identifier for the main lexer.  The
**              identifier is read into Reuse_W. This function should not handle
**              an end of string if it is rescanning a macro expansion, because
**              this could switch the context with regards to whether the macro
**            is expandable or not.  Similarly, the noexpand marker must only be
**           allowed if a macro is being rescanned, otherwise let this character
**              be caught as an illegal character in text
************************************************************************/
int
lex_getid(
    WCHAR c
    )
{
    REG WCHAR   *p;
    int         length = 0;

    p = Reuse_W;
    *p++ = c;
    c &= HASH_MASK;
    for(;;) {
        while(LXC_IS_IDENT(*p = GETCH())) { /* collect character */
            c += (*p & HASH_MASK);                      /* hash it */
            p++;
        }
        if(CHARMAP(*p) == LX_NOEXPAND ) {
            length = (int)GETCH();
            continue;
        }
        UNGETCH();
        break;                          /* out of for loop  -  only way out */
    }
    if(p >= LIMIT(Reuse_W)) {   /* is this error # correct? */
        strcpy (Msg_Text, GET_MSG(1067));
        fatal(1067);
    }
    if(((p - Reuse_W) > LIMIT_ID_LENGTH) && ( ! Prep )) {
        p = Reuse_W + LIMIT_ID_LENGTH;
        *p = L'\0';
        c = local_c_hash(Reuse_W);
        Msg_Temp = GET_MSG(4011);
        SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, Reuse_W);
        warning(4011);  /* id truncated */
    } else {
        *p = L'\0';              /* terminates identifier for expandable check */
    }
    Reuse_W_hash = (hash_t)c;
    Reuse_W_length = (UINT)((p - Reuse_W) + 1);
    return(length != (p - Reuse_W));
}