/************************************************************************/ /* */ /* RCPP - Resource Compiler Pre-Processor for NT system */ /* */ /* SCANNER.C - Routines for token scanning */ /* */ /* 29-Nov-90 w-BrianM Update for NT from PM SDK RCPP */ /* */ /************************************************************************/ #include "rc.h" #define ABS(x) ((x > 0) ? x : -1 * x) #define ALERT_CHAR L'\007' /* ANSI alert character is ASCII BEL */ ULONG lCPPTotalLinenumber = 0; extern int vfCurrFileType; //- Added for 16-bit file support. /************************************************************************/ /* Local Function Prototypes */ /************************************************************************/ token_t c_size(long); int ctoi(int); int escape(int); token_t get_real(PWCHAR); token_t l_size(long); long matol(PWCHAR, int); token_t uc_size(long); token_t ul_size(long); void skip_1comment(void); /************************************************************************/ /* local_c_hash */ /************************************************************************/ hash_t local_c_hash( REG WCHAR *name ) { REG hash_t i; i = 0; while(*name) { i += (*name & HASH_MASK); name++; } return(i); } /************************************************************************ * GETID - Get an identifier or keyword. * (we know that we're given at least 1 id char) * in addition, we'll hash the value using 'c'. ************************************************************************/ void getid( REG UINT c ) { REG WCHAR *p; p = Reuse_W; *p++ = (WCHAR)c; c &= HASH_MASK; repeat: while(LXC_IS_IDENT(*p = GETCH())) { /* while it's an id char . . . */ c += (*p & HASH_MASK); /* hash it */ p++; } if(*p != EOS_CHAR) { if((*p == L'\\') && (checknl())) { goto repeat; } UNGETCH(); if(p >= LIMIT(Reuse_W)) { strcpy (Msg_Text, GET_MSG (1067)); fatal(1067); } if( ((p - Reuse_W) > LIMIT_ID_LENGTH) && ( ! Prep )) { p = Reuse_W + LIMIT_ID_LENGTH; *p++ = L'\0'; c = local_c_hash(Reuse_W); Msg_Temp = GET_MSG (4011); SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, Reuse_W); warning(4011); /* id truncated */ } else { *p++ = L'\0'; } Reuse_W_hash = (hash_t)c; Reuse_W_length = (UINT)(p - Reuse_W); return; } if(io_eob()) { /* end of file in middle of id */ strcpy (Msg_Text, GET_MSG (1004)); fatal(1004); } goto repeat; } /************************************************************************ ** prep_string : outputs char/string constants when preprocessing only ************************************************************************/ void prep_string( REG WCHAR c ) { REG WCHAR *p_buf; int term_char; p_buf = Reuse_W; term_char = c; *p_buf++ = c; /* save the open quote */ for(;;) { switch(CHARMAP(c = GETCH())) { case LX_DQUOTE: case LX_SQUOTE: if(c == (WCHAR)term_char) { *p_buf++ = (WCHAR)term_char;/* save the terminating quote */ goto out_of_loop; } break; case LX_BACKSLASH: *p_buf++ = c; break; case LX_CR: continue; case LX_NL: UNGETCH(); goto out_of_loop; case LX_EOS: if(c == L'\\') { *p_buf++ = c; c = get_non_eof(); break; } handle_eos(); continue; } *p_buf++ = c; if(p_buf >= &Reuse_W[MED_BUFFER - 1]) { *p_buf = L'\0'; myfwrite(Reuse_W, (size_t)(p_buf - Reuse_W) * sizeof(WCHAR), 1, OUTPUTFILE); p_buf = Reuse_W; } } out_of_loop: *p_buf = L'\0'; myfwrite(Reuse_W, (size_t)(p_buf - Reuse_W) * sizeof(WCHAR), 1, OUTPUTFILE); } /************************************************************************ ** char_const : gather up a character constant ** we're called after finding the openning single quote. ************************************************************************/ token_t char_const( void ) { REG WCHAR c; value_t value; token_t tok; tok = (token_t)(Jflag ? L_CUNSIGNED : L_CINTEGER); first_switch: switch(CHARMAP(c = GETCH())) { case LX_BACKSLASH: break; case LX_SQUOTE: strcpy (Msg_Text, GET_MSG (2137)); //"empty character constant" error(2137); value.v_long = 0; UNGETCH(); break; case LX_EOS: /* ??? assumes i/o buffering > 1 char */ if(handle_eos() != BACKSLASH_EOS) { goto first_switch; } value.v_long = escape(get_non_eof()); if( tok == L_CUNSIGNED ) { /* don't sign extend */ value.v_long &= 0xff; } break; case LX_NL: /* newline in character constant */ strcpy (Msg_Text, GET_MSG (2001)); error (2001); UNGETCH(); /* ** FALLTHROUGH */ default: value.v_long = c; break; } if((c = get_non_eof()) != L'\'') { strcpy (Msg_Text, GET_MSG (2015)); error (2015); /* too many chars in constant */ do { if(c == L'\n') { strcpy (Msg_Text, GET_MSG (2016)); error(2016); /* missing closing ' */ break; } } while((c = get_non_eof()) != L'\''); } yylval.yy_tree = build_const(tok, &value); return(tok); } /************************************************************************ ** str_const : gather up a string constant ************************************************************************/ void str_const( VOID ) { REG WCHAR c; REG PWCHAR p_buf; int not_warned_yet = TRUE; p_buf = yylval.yy_string.str_ptr = Macro_buffer; /* ** Is it possible that reading this string during a rescan will ** overwrite the expansion being rescanned? No, because a macro ** expansion is limited to the top half of Macro_buffer. ** For Macro_depth > 0, this is like copying the string from ** somewhere in the top half of Macro_buffer to the bottom half ** of Macro_buffer. ** Note that the restriction on the size of an expanded macro is ** stricter than the limit on an L_STRING length. An expanded ** macro is limited to around 1019 bytes, but an L_STRING is ** limited to 2043 bytes. */ for(;;) { switch(CHARMAP(c = GETCH())) { case LX_NL: UNGETCH(); strcpy (Msg_Text, GET_MSG (2001)); error(2001); /* ** FALLTHROUGH */ case LX_DQUOTE: *p_buf++ = L'\0'; yylval.yy_string.str_len = (USHORT)(p_buf-yylval.yy_string.str_ptr); return; break; case LX_EOS: if(handle_eos() != BACKSLASH_EOS) { continue; } if(InInclude) { break; } else { c = (WCHAR)escape(get_non_eof()); /* process escaped char */ } break; } if(p_buf - Macro_buffer > LIMIT_STRING_LENGTH) { if( not_warned_yet ) { strcpy (Msg_Text, GET_MSG (4009)); warning(4009); /* string too big, truncating */ not_warned_yet = FALSE; } } else { *p_buf++ = c; } } } /************************************************************************ ** do_newline : does work after a newline has been found. ************************************************************************/ void do_newline( void ) { ++Linenumber; for(;;) { switch(CHARMAP(GETCH())) { case LX_BOM: // ignore Byte Order Mark break; case LX_CR: break; case LX_POUND: preprocess(); break; case LX_SLASH: if( ! skip_comment()) { goto leave_do_newline; } break; case LX_NL: if ((lCPPTotalLinenumber++ & RC_PREPROCESS_UPDATE) == 0) UpdateStatus(1, lCPPTotalLinenumber); Linenumber++; // must manually write '\r' with '\n' when writing 16-bit strings if( Prep ) { /* preprocessing only */ myfwrite(L"\r", sizeof(WCHAR), 1, OUTPUTFILE); } /* ** FALLTHROUGH */ case LX_WHITE: if( Prep ) { /* preprocessing only, output whitespace */ myfwrite(&(PREVCH()), sizeof(WCHAR), 1, OUTPUTFILE); } else { do { ; } while(LXC_IS_WHITE(GETCH())); UNGETCH(); } break; case LX_EOS: if(PREVCH() == EOS_CHAR || PREVCH() == CONTROL_Z) { if(io_eob()) { /* leaves us pointing at a valid char */ return; } break; } if(checknl()) { continue; } /* it's a backslash */ /* ** FALLTHROUGH */ default: /* first non-white is not a '#', leave */ leave_do_newline: UNGETCH(); return; } } } /************************************************************************ * GETNUM - Get a number from the input stream. * * ARGUMENTS * radix - the radix of the number to be accumulated. Can only be 8, 10, * or 16 * pval - a pointer to a VALUE union to be filled in with the value * * RETURNS - type of the token (L_CINTEGER or L_CFLOAT) * * SIDE EFFECTS - * does push back on the input stream. * writes into pval by reference * uses buffer Reuse_W * * DESCRIPTION - * Accumulate the number according to the rules for each radix. * Set up the format string according to the radix (or distinguish * integer from float if radix is 10) and convert to binary. * * AUTHOR - Ralph Ryan, Sept. 8, 1982 * * MODIFICATIONS - none * ************************************************************************/ token_t getnum( REG WCHAR c ) { REG WCHAR *p; WCHAR *start; int radix; token_t tok; value_t value; tok = L_CINTEGER; start = (Tiny_lexer_nesting ? Exp_ptr : Reuse_W); p = start; if( c == L'0' ) { c = get_non_eof(); if( IS_X(c) ) { radix = 16; if( Prep ) { *p++ = L'0'; *p++ = L'x'; } for(c = get_non_eof(); LXC_IS_XDIGIT(c); c = get_non_eof()) { /* no check for overflow? */ *p++ = c; } if((p == Reuse_W) && (Tiny_lexer_nesting == 0)) { strcpy (Msg_Text, GET_MSG (2153)); error(2153); } goto check_suffix; } else { radix = 8; *p++ = L'0'; /* for preprocessing or 0.xxx case */ } } else { radix = 10; } while( LXC_IS_DIGIT((WCHAR)c) ) { *p++ = c; c = get_non_eof(); } if( IS_DOT(c) || IS_E(c) ) { UNGETCH(); return(get_real(p)); } check_suffix: if( IS_EL(c) ) { if( Prep ) { *p++ = c; } c = get_non_eof(); if( IS_U(c) ) { if(Prep) { *p++ = c; } tok = L_LONGUNSIGNED; } else { tok = L_LONGINT; UNGETCH(); } } else if( IS_U(c) ) { if( Prep ) { *p++ = c; } c = get_non_eof(); if( IS_EL(c) ) { if( Prep ) { *p++ = c; } tok = L_LONGUNSIGNED; } else { tok = L_CUNSIGNED; UNGETCH(); } } else { UNGETCH(); } *p = L'\0'; if( start == Exp_ptr ) { Exp_ptr = p; return(L_NOTOKEN); } else if( Prep ) { myfwrite( Reuse_W, (size_t)(p - Reuse_W) * sizeof(WCHAR), 1, OUTPUTFILE); return(L_NOTOKEN); } value.v_long = matol(Reuse_W,radix); switch(tok) { case L_CINTEGER: tok = (radix == 10) ? c_size(value.v_long) : uc_size(value.v_long) ; break; case L_LONGINT: tok = l_size(value.v_long); break; case L_CUNSIGNED: tok = ul_size(value.v_long); break; } yylval.yy_tree = build_const(tok, &value); return(tok); } /************************************************************************ ** get_real : gathers the real part/exponent of a real number. ** Input : ptr to the null terminator of the whole part ** pointer to receive value. ** Output : L_CFLOAT ** ** ASSUMES whole part is either at Exp_ptr or Reuse_W. ************************************************************************/ token_t get_real( REG PWCHAR p ) { REG int c; token_t tok; c = get_non_eof(); if(Cross_compile && (Tiny_lexer_nesting == 0)) { strcpy (Msg_Text, GET_MSG (4012)); warning(4012); /* float constant in cross compilation */ Cross_compile = FALSE; /* only one msg per file */ } /* ** if the next char is a digit, then we've been called after ** finding a '.'. if this is true, then ** we want to find the fractional part of the number. ** if it's a '.', then we've been called after finding ** a whole part, and we want the fraction. */ if( LXC_IS_DIGIT((WCHAR)c) || IS_DOT(c) ) { do { *p++ = (WCHAR)c; c = (int)get_non_eof(); } while( LXC_IS_DIGIT((WCHAR)c) ); } if( IS_E((WCHAR)c) ) { /* now have found the exponent */ *p++ = (WCHAR)c; /* save the 'e' */ c = (WCHAR)get_non_eof(); /* skip it */ if( IS_SIGN(c) ) { /* optional sign */ *p++ = (WCHAR)c; /* save the sign */ c = (int)get_non_eof(); } if( ! LXC_IS_DIGIT((WCHAR)c)) { if( ! Rflag ) { if(Tiny_lexer_nesting == 0) { Msg_Temp = GET_MSG (2021); SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, c); error(2021); /* missing or malformed exponent */ } *p++ = L'0'; } } else { do { /* gather the exponent */ *p++ = (WCHAR)c; c = (int)get_non_eof(); } while( LXC_IS_DIGIT((WCHAR)c) ); } } if( IS_F((WCHAR)c) ) { tok = L_CFLOAT; if( Prep ) { *p++ = (WCHAR)c; } } else if( IS_EL((WCHAR)c) ) { tok = L_CLDOUBLE; if( Prep ) { *p++ = (WCHAR)c; } } else { UNGETCH(); tok = L_CDOUBLE; } *p = L'\0'; if( Tiny_lexer_nesting > 0 ) { Exp_ptr = p; return(L_NOTOKEN); } else if( Prep ) { myfwrite( Reuse_W, (size_t)(p - Reuse_W) * sizeof(WCHAR), 1, OUTPUTFILE); return(L_NOTOKEN); } /* ** reals aren't used during preprocessing */ return(tok); } /************************************************************************ ** matol : ascii to long, given a radix. ************************************************************************/ long matol( REG PWCHAR p_start, REG int radix ) { long result, old_result; unsigned int i; old_result = result = 0; while(*p_start) { result *= radix; i = ctoi(*p_start); if( ((int)i >= radix) && (! Prep) ) { Msg_Temp = GET_MSG (2020); SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, *p_start, radix); error(2020); /* illegal digit % for base % */ } result += i; p_start++; if(radix == 10) { if(result < old_result) { p_start--; /* fix the string ptr since we have overflowed */ break; } } else if(*p_start) { /* ** the loop is not finished. ** we will multiply by the radix again ** check the upper bits. if they're on, then ** that mult will overflow the value */ if(radix == 8) { if(result & 0xe0000000) { break; } } else if(result & 0xf0000000) { break; } } old_result = result; } if(*p_start) { strcpy (Msg_Text, GET_MSG (2177)); error(2177); /* constant too big */ result = 0; } return(result); } /************************************************************************ ** uc_size : returns 'int' or 'long' (virtual unsigned). ** if their are no bits in the upper part of the value, ** then it's an int. otherwise, it's a long. ** this is valid too if target sizeof(int) != sizeof(long). ** then L_CINTEGER and L_LONGINT are synonymous. ************************************************************************/ token_t uc_size( long value ) { return((token_t)((value > INT_MAX) ? L_CUNSIGNED : L_CINTEGER)); } /************************************************************************ ** c_size : returns 'int' or 'long' for signed numbers. ** if the sign bit of the lower word is on or any bits ** in the upper word are on, then we must use 'long'. ************************************************************************/ token_t c_size( long value ) { return((token_t)((ABS(value) > INT_MAX) ? L_LONGINT : L_CINTEGER)); } /************************************************************************ ** l_size : returns 'longint' or 'longunsigned' for long numbers. ** if the sign bit of the high word is on this is 'longunsigned'; ************************************************************************/ token_t l_size( long value ) { return((token_t)((value > LONG_MAX) ? L_LONGUNSIGNED : L_LONGINT)); } /************************************************************************ ** ul_size : returns 'unsigned' or 'longunsigned' for unsigned numbers. ** if the number can't be represented as unsigned, it is promoted to ** unsignedlong. ************************************************************************/ token_t ul_size( long value ) { return((token_t)((ABS(value) > UINT_MAX-1) ? L_LONGUNSIGNED : L_CUNSIGNED)); } /************************************************************************ ** ctoi : character to int. ************************************************************************/ int ctoi( int c ) { if(LXC_IS_DIGIT((WCHAR)c)) { return(c - L'0'); } else { return(towupper((WCHAR)c) - towupper(L'A') + 10); } } /************************************************************************ * ESCAPE - get an escaped character * * ARGUMENTS - none * * RETURNS - value of escaped character * * SIDE EFFECTS - may push back input * * DESCRIPTION - An escape ( '\' ) was discovered in the input. Translate * the next symbol or symbols into an escape sequence. * * AUTHOR - Ralph Ryan, Sept. 7, 1982 * * MODIFICATIONS - none * ************************************************************************/ int escape( REG int c ) { REG int value; int cnt; escape_again: if( LXC_IS_ODIGIT((WCHAR)c) ) {/* \ooo is an octal number, must fit into a byte */ cnt = 1; for(value = ctoi(c), c = get_non_eof(); (cnt < 3) && LXC_IS_ODIGIT((WCHAR)c); cnt++, c = get_non_eof() ) { value *= 8; value += ctoi(c); } if( ! Prep ) { if(value > 255) { Msg_Temp = GET_MSG (2022); SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, value); error (2022); } } UNGETCH(); return((char)value); } switch( c ) { case L'a': return(ALERT_CHAR); break; case L'b': return(L'\b'); break; case L'f': return(L'\f'); break; case L'n': return fMacRsrcs ? (L'\r') : (L'\n'); break; case L'r': return fMacRsrcs ? (L'\n') : (L'\r'); break; case L't': return(L'\t'); break; case L'v': return(L'\v'); break; case L'x': cnt = 0; value = 0; c = get_non_eof(); while((cnt < 3) && LXC_IS_XDIGIT((WCHAR)c)) { value *= 16; value += ctoi(c); c = get_non_eof(); cnt++; } if(cnt == 0) { strcpy (Msg_Text, GET_MSG (2153)); error (2153); } UNGETCH(); return((char)value); /* cast to get sign extend */ default: if(c != L'\\') { return(c); } else { if(checknl()) { c = get_non_eof(); goto escape_again; } else { return(c); } } } } /************************************************************************ * CHECKOP - Check whether the next input character matches the argument. * * ARGUMENTS * short op - the character to be checked against * * RETURNS * TRUE or FALSE * * SIDE EFFECTS * Will push character back onto the input if there is no match. * * DESCRIPTION * If the next input character matches op, return TRUE. Otherwise * push it back onto the input. * * AUTHOR - Ralph Ryan, Sept. 9, 1982 * * MODIFICATIONS - none * ************************************************************************/ int checkop( int op ) { if(op == (int)get_non_eof()) { return(TRUE); } UNGETCH(); return(FALSE); } /************************************************************************ ** DumpSlashComment : while skipping a comment, output it. ************************************************************************/ void DumpSlashComment( VOID ) { if( ! Cflag ) { skip_NLonly(); return; } myfwrite(L"//", 2 * sizeof(WCHAR), 1, OUTPUTFILE); for(;;) { WCHAR c; switch(CHARMAP(c = GETCH())) { // must manually write '\r' with '\n' when writing 16-bit strings //case LX_CR: // continue; case LX_EOS: handle_eos(); continue; case LX_NL: UNGETCH(); return; } myfwrite(&c, sizeof(WCHAR), 1, OUTPUTFILE); } } /************************************************************************ ** dump_comment : while skipping a comment, output it. ************************************************************************/ void dump_comment( void ) { if( ! Cflag ) { skip_1comment(); return; } myfwrite(L"/*", 2 * sizeof(WCHAR), 1, OUTPUTFILE); for(;;) { WCHAR c; switch(CHARMAP(c = GETCH())) { case LX_STAR: if(checkop(L'/')) { myfwrite(L"*/", 2 * sizeof(WCHAR), 1, OUTPUTFILE); return; } break; case LX_EOS: handle_eos(); continue; case LX_NL: Linenumber++; break; /* output below */ // must manually write '\r' with '\n' when writing 16-bit strings //case LX_CR: // continue; } myfwrite(&c, sizeof(WCHAR), 1, OUTPUTFILE); } } /************************************************************************/ /* skip_comment() */ /************************************************************************/ int skip_comment( void ) { if(checkop(L'*')) { skip_1comment(); return(TRUE); } else if(checkop(L'/')) { skip_NLonly(); return(TRUE); } else { return(FALSE); } } /************************************************************************ ** skip_1comment : we're called when we're already in a comment. ** we're looking for the comment close. we also count newlines ** and output them if we're preprocessing. ************************************************************************/ void skip_1comment( void ) { UINT c; for(;;) { c = GETCH(); if(c == L'*') { recheck: c = GETCH(); if(c == L'/') { /* end of comment */ return; } else if(c == L'*') { /* ** if we get another '*' go back and check for a slash */ goto recheck; } else if(c == EOS_CHAR) { handle_eos(); goto recheck; } } /* ** note we fall through here. we know this baby is not a '*' ** we used to unget the char and continue. since we check for ** another '*' inside the above test, we can fall through here ** without ungetting/getting and checking again. */ if(c <= L'\n') { /* ** hopefully, the above test is less expensive than doing two tests */ if(c == L'\n') { Linenumber++; if(Prep) { myfwrite(L"\r\n", 2 * sizeof(WCHAR), 1, OUTPUTFILE); } } else if(c == EOS_CHAR) { handle_eos(); } } } } /************************************************************************ ** skip_cwhite : while the current character is whitespace or a comment. ** a newline is NOT whitespace. ************************************************************************/ WCHAR skip_cwhite( void ) { REG WCHAR c; skip_cwhite_again: while((c = GETCH()) <= L'/') { /* many chars are above this */ if(c == L'/') { if( ! skip_comment()) { return(L'/'); } } else if(c > L' ') { /* char is between '!' and '.' */ return(c); } else { switch(CHARMAP(c)) { case LX_EOS: handle_eos(); break; case LX_WHITE: continue; break; case LX_CR: continue; break; default: return(c); break; } } } if((c == L'\\') && (checknl())) { goto skip_cwhite_again; } return(c); } /************************************************************************ ** checknl : check for newline, skipping carriage return if there is one. ** also increments Linenumber, so this should be used by routines which ** will not push the newline back in such a way that rawtok() will be invoked, ** find the newline and do another increment. ************************************************************************/ int checknl( void ) { REG WCHAR c; for(;;) { c = GETCH(); if(c > L'\r') { UNGETCH(); return(FALSE); } switch(c) { case L'\n': Linenumber++; // must manually write '\r' with '\n' when writing 16-bit strings if( Prep ) { myfwrite(L"\r\n", 2 * sizeof(WCHAR), 1, OUTPUTFILE); } return(TRUE); break; case L'\r': continue; break; case EOS_CHAR: handle_eos(); PREVCH() = L'\\'; /* M00HACK - needs pushback */ continue; break; default: UNGETCH(); return(FALSE); break; } } } /************************************************************************ ** get_non_eof : get a real char. ************************************************************************/ WCHAR get_non_eof( void ) { WCHAR c; get_non_eof_again: while((c = GETCH()) <= L'\r') { if(c == L'\r') { continue; } else if(c != EOS_CHAR) { break; } if(Tiny_lexer_nesting > 0) { break; } handle_eos(); } if((c == L'\\') && (checknl())) { goto get_non_eof_again; } return(c); }