1103 lines
30 KiB
C
1103 lines
30 KiB
C
/************************************************************************/
|
|
/* */
|
|
/* RCPP - Resource Compiler Pre-Processor for NT system */
|
|
/* */
|
|
/* SCANNER.C - Routines for token scanning */
|
|
/* */
|
|
/* 29-Nov-90 w-BrianM Update for NT from PM SDK RCPP */
|
|
/* */
|
|
/************************************************************************/
|
|
|
|
#include "rc.h"
|
|
|
|
|
|
#define ABS(x) ((x > 0) ? x : -1 * x)
|
|
|
|
|
|
#define ALERT_CHAR L'\007' /* ANSI alert character is ASCII BEL */
|
|
|
|
ULONG lCPPTotalLinenumber = 0;
|
|
|
|
extern int vfCurrFileType; //- Added for 16-bit file support.
|
|
|
|
|
|
/************************************************************************/
|
|
/* Local Function Prototypes */
|
|
/************************************************************************/
|
|
token_t c_size(long);
|
|
int ctoi(int);
|
|
int escape(int);
|
|
token_t get_real(PWCHAR);
|
|
token_t l_size(long);
|
|
long matol(PWCHAR, int);
|
|
token_t uc_size(long);
|
|
token_t ul_size(long);
|
|
void skip_1comment(void);
|
|
|
|
|
|
|
|
/************************************************************************/
|
|
/* local_c_hash */
|
|
/************************************************************************/
|
|
hash_t
|
|
local_c_hash(
|
|
REG WCHAR *name
|
|
)
|
|
{
|
|
REG hash_t i;
|
|
|
|
i = 0;
|
|
while(*name) {
|
|
i += (*name & HASH_MASK);
|
|
name++;
|
|
}
|
|
return(i);
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
* GETID - Get an identifier or keyword.
|
|
* (we know that we're given at least 1 id char)
|
|
* in addition, we'll hash the value using 'c'.
|
|
************************************************************************/
|
|
void
|
|
getid(
|
|
REG UINT c
|
|
)
|
|
{
|
|
REG WCHAR *p;
|
|
|
|
p = Reuse_W;
|
|
*p++ = (WCHAR)c;
|
|
c &= HASH_MASK;
|
|
|
|
repeat:
|
|
while(LXC_IS_IDENT(*p = GETCH())) { /* while it's an id char . . . */
|
|
c += (*p & HASH_MASK); /* hash it */
|
|
p++;
|
|
}
|
|
if(*p != EOS_CHAR) {
|
|
if((*p == L'\\') && (checknl())) {
|
|
goto repeat;
|
|
}
|
|
UNGETCH();
|
|
if(p >= LIMIT(Reuse_W)) {
|
|
strcpy (Msg_Text, GET_MSG (1067));
|
|
fatal(1067);
|
|
}
|
|
if( ((p - Reuse_W) > LIMIT_ID_LENGTH) && ( ! Prep )) {
|
|
p = Reuse_W + LIMIT_ID_LENGTH;
|
|
*p++ = L'\0';
|
|
c = local_c_hash(Reuse_W);
|
|
Msg_Temp = GET_MSG (4011);
|
|
SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, Reuse_W);
|
|
warning(4011); /* id truncated */
|
|
} else {
|
|
*p++ = L'\0';
|
|
}
|
|
Reuse_W_hash = (hash_t)c;
|
|
Reuse_W_length = (UINT)(p - Reuse_W);
|
|
return;
|
|
}
|
|
if(io_eob()) { /* end of file in middle of id */
|
|
strcpy (Msg_Text, GET_MSG (1004));
|
|
fatal(1004);
|
|
}
|
|
goto repeat;
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
** prep_string : outputs char/string constants when preprocessing only
|
|
************************************************************************/
|
|
void
|
|
prep_string(
|
|
REG WCHAR c
|
|
)
|
|
{
|
|
REG WCHAR *p_buf;
|
|
int term_char;
|
|
|
|
p_buf = Reuse_W;
|
|
|
|
term_char = c;
|
|
|
|
*p_buf++ = c; /* save the open quote */
|
|
|
|
for(;;) {
|
|
switch(CHARMAP(c = GETCH())) {
|
|
case LX_DQUOTE:
|
|
case LX_SQUOTE:
|
|
if(c == (WCHAR)term_char) {
|
|
*p_buf++ = (WCHAR)term_char;/* save the terminating quote */
|
|
goto out_of_loop;
|
|
}
|
|
break;
|
|
case LX_BACKSLASH:
|
|
*p_buf++ = c;
|
|
break;
|
|
case LX_CR:
|
|
continue;
|
|
case LX_NL:
|
|
UNGETCH();
|
|
goto out_of_loop;
|
|
case LX_EOS:
|
|
if(c == L'\\') {
|
|
*p_buf++ = c;
|
|
c = get_non_eof();
|
|
break;
|
|
}
|
|
handle_eos();
|
|
continue;
|
|
}
|
|
*p_buf++ = c;
|
|
if(p_buf >= &Reuse_W[MED_BUFFER - 1]) {
|
|
*p_buf = L'\0';
|
|
myfwrite(Reuse_W, (size_t)(p_buf - Reuse_W) * sizeof(WCHAR), 1, OUTPUTFILE);
|
|
p_buf = Reuse_W;
|
|
}
|
|
}
|
|
|
|
out_of_loop:
|
|
*p_buf = L'\0';
|
|
myfwrite(Reuse_W, (size_t)(p_buf - Reuse_W) * sizeof(WCHAR), 1, OUTPUTFILE);
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
** char_const : gather up a character constant
|
|
** we're called after finding the openning single quote.
|
|
************************************************************************/
|
|
token_t
|
|
char_const(
|
|
void
|
|
)
|
|
{
|
|
REG WCHAR c;
|
|
value_t value;
|
|
token_t tok;
|
|
|
|
tok = (token_t)(Jflag ? L_CUNSIGNED : L_CINTEGER);
|
|
|
|
first_switch:
|
|
|
|
switch(CHARMAP(c = GETCH())) {
|
|
case LX_BACKSLASH:
|
|
break;
|
|
case LX_SQUOTE:
|
|
strcpy (Msg_Text, GET_MSG (2137)); //"empty character constant"
|
|
error(2137);
|
|
value.v_long = 0;
|
|
UNGETCH();
|
|
break;
|
|
case LX_EOS: /* ??? assumes i/o buffering > 1 char */
|
|
if(handle_eos() != BACKSLASH_EOS) {
|
|
goto first_switch;
|
|
}
|
|
value.v_long = escape(get_non_eof());
|
|
if( tok == L_CUNSIGNED ) { /* don't sign extend */
|
|
value.v_long &= 0xff;
|
|
}
|
|
break;
|
|
case LX_NL:
|
|
/* newline in character constant */
|
|
strcpy (Msg_Text, GET_MSG (2001));
|
|
error (2001);
|
|
UNGETCH();
|
|
/*
|
|
** FALLTHROUGH
|
|
*/
|
|
default:
|
|
value.v_long = c;
|
|
break;
|
|
}
|
|
|
|
if((c = get_non_eof()) != L'\'') {
|
|
strcpy (Msg_Text, GET_MSG (2015));
|
|
error (2015); /* too many chars in constant */
|
|
do {
|
|
if(c == L'\n') {
|
|
strcpy (Msg_Text, GET_MSG (2016));
|
|
error(2016); /* missing closing ' */
|
|
break;
|
|
}
|
|
} while((c = get_non_eof()) != L'\'');
|
|
}
|
|
yylval.yy_tree = build_const(tok, &value);
|
|
return(tok);
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
** str_const : gather up a string constant
|
|
************************************************************************/
|
|
void
|
|
str_const(
|
|
VOID
|
|
)
|
|
{
|
|
REG WCHAR c;
|
|
REG PWCHAR p_buf;
|
|
int not_warned_yet = TRUE;
|
|
|
|
p_buf = yylval.yy_string.str_ptr = Macro_buffer;
|
|
/*
|
|
** Is it possible that reading this string during a rescan will
|
|
** overwrite the expansion being rescanned? No, because a macro
|
|
** expansion is limited to the top half of Macro_buffer.
|
|
** For Macro_depth > 0, this is like copying the string from
|
|
** somewhere in the top half of Macro_buffer to the bottom half
|
|
** of Macro_buffer.
|
|
** Note that the restriction on the size of an expanded macro is
|
|
** stricter than the limit on an L_STRING length. An expanded
|
|
** macro is limited to around 1019 bytes, but an L_STRING is
|
|
** limited to 2043 bytes.
|
|
*/
|
|
for(;;) {
|
|
switch(CHARMAP(c = GETCH())) {
|
|
case LX_NL:
|
|
UNGETCH();
|
|
strcpy (Msg_Text, GET_MSG (2001));
|
|
error(2001);
|
|
/*
|
|
** FALLTHROUGH
|
|
*/
|
|
case LX_DQUOTE:
|
|
*p_buf++ = L'\0';
|
|
yylval.yy_string.str_len = (USHORT)(p_buf-yylval.yy_string.str_ptr);
|
|
return;
|
|
break;
|
|
case LX_EOS:
|
|
if(handle_eos() != BACKSLASH_EOS) {
|
|
continue;
|
|
}
|
|
if(InInclude) {
|
|
break;
|
|
}
|
|
else {
|
|
c = (WCHAR)escape(get_non_eof()); /* process escaped char */
|
|
}
|
|
break;
|
|
}
|
|
if(p_buf - Macro_buffer > LIMIT_STRING_LENGTH) {
|
|
if( not_warned_yet ) {
|
|
strcpy (Msg_Text, GET_MSG (4009));
|
|
warning(4009); /* string too big, truncating */
|
|
not_warned_yet = FALSE;
|
|
}
|
|
} else {
|
|
*p_buf++ = c;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
** do_newline : does work after a newline has been found.
|
|
************************************************************************/
|
|
|
|
void
|
|
do_newline(
|
|
void
|
|
)
|
|
{
|
|
++Linenumber;
|
|
for(;;) {
|
|
switch(CHARMAP(GETCH())) {
|
|
case LX_BOM: // ignore Byte Order Mark
|
|
break;
|
|
case LX_CR:
|
|
break;
|
|
case LX_POUND:
|
|
preprocess();
|
|
break;
|
|
case LX_SLASH:
|
|
if( ! skip_comment()) {
|
|
goto leave_do_newline;
|
|
}
|
|
break;
|
|
case LX_NL:
|
|
if ((lCPPTotalLinenumber++ & RC_PREPROCESS_UPDATE) == 0)
|
|
UpdateStatus(1, lCPPTotalLinenumber);
|
|
Linenumber++;
|
|
// must manually write '\r' with '\n' when writing 16-bit strings
|
|
if( Prep ) { /* preprocessing only */
|
|
myfwrite(L"\r", sizeof(WCHAR), 1, OUTPUTFILE);
|
|
}
|
|
/*
|
|
** FALLTHROUGH
|
|
*/
|
|
case LX_WHITE:
|
|
if( Prep ) { /* preprocessing only, output whitespace */
|
|
myfwrite(&(PREVCH()), sizeof(WCHAR), 1, OUTPUTFILE);
|
|
} else {
|
|
do {
|
|
;
|
|
} while(LXC_IS_WHITE(GETCH()));
|
|
UNGETCH();
|
|
}
|
|
break;
|
|
case LX_EOS:
|
|
if(PREVCH() == EOS_CHAR || PREVCH() == CONTROL_Z) {
|
|
if(io_eob()) { /* leaves us pointing at a valid char */
|
|
return;
|
|
}
|
|
break;
|
|
}
|
|
if(checknl()) {
|
|
continue;
|
|
}
|
|
/* it's a backslash */
|
|
/*
|
|
** FALLTHROUGH
|
|
*/
|
|
default: /* first non-white is not a '#', leave */
|
|
|
|
leave_do_newline:
|
|
|
|
UNGETCH();
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
* GETNUM - Get a number from the input stream.
|
|
*
|
|
* ARGUMENTS
|
|
* radix - the radix of the number to be accumulated. Can only be 8, 10,
|
|
* or 16
|
|
* pval - a pointer to a VALUE union to be filled in with the value
|
|
*
|
|
* RETURNS - type of the token (L_CINTEGER or L_CFLOAT)
|
|
*
|
|
* SIDE EFFECTS -
|
|
* does push back on the input stream.
|
|
* writes into pval by reference
|
|
* uses buffer Reuse_W
|
|
*
|
|
* DESCRIPTION -
|
|
* Accumulate the number according to the rules for each radix.
|
|
* Set up the format string according to the radix (or distinguish
|
|
* integer from float if radix is 10) and convert to binary.
|
|
*
|
|
* AUTHOR - Ralph Ryan, Sept. 8, 1982
|
|
*
|
|
* MODIFICATIONS - none
|
|
*
|
|
************************************************************************/
|
|
token_t
|
|
getnum(
|
|
REG WCHAR c
|
|
)
|
|
{
|
|
REG WCHAR *p;
|
|
WCHAR *start;
|
|
int radix;
|
|
token_t tok;
|
|
value_t value;
|
|
|
|
tok = L_CINTEGER;
|
|
start = (Tiny_lexer_nesting ? Exp_ptr : Reuse_W);
|
|
p = start;
|
|
if( c == L'0' ) {
|
|
c = get_non_eof();
|
|
if( IS_X(c) ) {
|
|
radix = 16;
|
|
if( Prep ) {
|
|
*p++ = L'0';
|
|
*p++ = L'x';
|
|
}
|
|
for(c = get_non_eof(); LXC_IS_XDIGIT(c); c = get_non_eof()) {
|
|
/* no check for overflow? */
|
|
*p++ = c;
|
|
}
|
|
if((p == Reuse_W) && (Tiny_lexer_nesting == 0)) {
|
|
strcpy (Msg_Text, GET_MSG (2153));
|
|
error(2153);
|
|
}
|
|
goto check_suffix;
|
|
} else {
|
|
radix = 8;
|
|
*p++ = L'0'; /* for preprocessing or 0.xxx case */
|
|
}
|
|
} else {
|
|
radix = 10;
|
|
}
|
|
|
|
while( LXC_IS_DIGIT((WCHAR)c) ) {
|
|
*p++ = c;
|
|
c = get_non_eof();
|
|
}
|
|
|
|
if( IS_DOT(c) || IS_E(c) ) {
|
|
UNGETCH();
|
|
return(get_real(p));
|
|
}
|
|
|
|
check_suffix:
|
|
if( IS_EL(c) ) {
|
|
if( Prep ) {
|
|
*p++ = c;
|
|
}
|
|
c = get_non_eof();
|
|
if( IS_U(c) ) {
|
|
if(Prep) {
|
|
*p++ = c;
|
|
}
|
|
tok = L_LONGUNSIGNED;
|
|
} else {
|
|
tok = L_LONGINT;
|
|
UNGETCH();
|
|
}
|
|
} else if( IS_U(c) ) {
|
|
if( Prep ) {
|
|
*p++ = c;
|
|
}
|
|
c = get_non_eof();
|
|
if( IS_EL(c) ) {
|
|
if( Prep ) {
|
|
*p++ = c;
|
|
}
|
|
tok = L_LONGUNSIGNED;
|
|
} else {
|
|
tok = L_CUNSIGNED;
|
|
UNGETCH();
|
|
}
|
|
} else {
|
|
UNGETCH();
|
|
}
|
|
*p = L'\0';
|
|
if( start == Exp_ptr ) {
|
|
Exp_ptr = p;
|
|
return(L_NOTOKEN);
|
|
} else if( Prep ) {
|
|
myfwrite( Reuse_W, (size_t)(p - Reuse_W) * sizeof(WCHAR), 1, OUTPUTFILE);
|
|
return(L_NOTOKEN);
|
|
}
|
|
value.v_long = matol(Reuse_W,radix);
|
|
switch(tok) {
|
|
case L_CINTEGER:
|
|
tok = (radix == 10)
|
|
? c_size(value.v_long)
|
|
: uc_size(value.v_long)
|
|
;
|
|
break;
|
|
case L_LONGINT:
|
|
tok = l_size(value.v_long);
|
|
break;
|
|
case L_CUNSIGNED:
|
|
tok = ul_size(value.v_long);
|
|
break;
|
|
}
|
|
yylval.yy_tree = build_const(tok, &value);
|
|
return(tok);
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
** get_real : gathers the real part/exponent of a real number.
|
|
** Input : ptr to the null terminator of the whole part
|
|
** pointer to receive value.
|
|
** Output : L_CFLOAT
|
|
**
|
|
** ASSUMES whole part is either at Exp_ptr or Reuse_W.
|
|
************************************************************************/
|
|
token_t
|
|
get_real(
|
|
REG PWCHAR p
|
|
)
|
|
{
|
|
REG int c;
|
|
token_t tok;
|
|
|
|
c = get_non_eof();
|
|
if(Cross_compile && (Tiny_lexer_nesting == 0)) {
|
|
strcpy (Msg_Text, GET_MSG (4012));
|
|
warning(4012); /* float constant in cross compilation */
|
|
Cross_compile = FALSE; /* only one msg per file */
|
|
}
|
|
/*
|
|
** if the next char is a digit, then we've been called after
|
|
** finding a '.'. if this is true, then
|
|
** we want to find the fractional part of the number.
|
|
** if it's a '.', then we've been called after finding
|
|
** a whole part, and we want the fraction.
|
|
*/
|
|
if( LXC_IS_DIGIT((WCHAR)c) || IS_DOT(c) ) {
|
|
do {
|
|
*p++ = (WCHAR)c;
|
|
c = (int)get_non_eof();
|
|
} while( LXC_IS_DIGIT((WCHAR)c) );
|
|
}
|
|
if( IS_E((WCHAR)c) ) { /* now have found the exponent */
|
|
*p++ = (WCHAR)c; /* save the 'e' */
|
|
c = (WCHAR)get_non_eof(); /* skip it */
|
|
if( IS_SIGN(c) ) { /* optional sign */
|
|
*p++ = (WCHAR)c; /* save the sign */
|
|
c = (int)get_non_eof();
|
|
}
|
|
if( ! LXC_IS_DIGIT((WCHAR)c)) {
|
|
if( ! Rflag ) {
|
|
if(Tiny_lexer_nesting == 0) {
|
|
Msg_Temp = GET_MSG (2021);
|
|
SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, c);
|
|
error(2021); /* missing or malformed exponent */
|
|
}
|
|
*p++ = L'0';
|
|
}
|
|
} else {
|
|
do { /* gather the exponent */
|
|
*p++ = (WCHAR)c;
|
|
c = (int)get_non_eof();
|
|
} while( LXC_IS_DIGIT((WCHAR)c) );
|
|
}
|
|
}
|
|
if( IS_F((WCHAR)c) ) {
|
|
tok = L_CFLOAT;
|
|
if( Prep ) {
|
|
*p++ = (WCHAR)c;
|
|
}
|
|
} else if( IS_EL((WCHAR)c) ) {
|
|
tok = L_CLDOUBLE;
|
|
if( Prep ) {
|
|
*p++ = (WCHAR)c;
|
|
}
|
|
} else {
|
|
UNGETCH();
|
|
tok = L_CDOUBLE;
|
|
}
|
|
*p = L'\0';
|
|
if( Tiny_lexer_nesting > 0 ) {
|
|
Exp_ptr = p;
|
|
return(L_NOTOKEN);
|
|
}
|
|
else if( Prep ) {
|
|
myfwrite( Reuse_W, (size_t)(p - Reuse_W) * sizeof(WCHAR), 1, OUTPUTFILE);
|
|
return(L_NOTOKEN);
|
|
}
|
|
/*
|
|
** reals aren't used during preprocessing
|
|
*/
|
|
return(tok);
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
** matol : ascii to long, given a radix.
|
|
************************************************************************/
|
|
long
|
|
matol(
|
|
REG PWCHAR p_start,
|
|
REG int radix
|
|
)
|
|
{
|
|
long result, old_result;
|
|
unsigned int i;
|
|
|
|
old_result = result = 0;
|
|
while(*p_start) {
|
|
result *= radix;
|
|
i = ctoi(*p_start);
|
|
if( ((int)i >= radix) && (! Prep) ) {
|
|
Msg_Temp = GET_MSG (2020);
|
|
SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, *p_start, radix);
|
|
error(2020); /* illegal digit % for base % */
|
|
}
|
|
result += i;
|
|
p_start++;
|
|
if(radix == 10) {
|
|
if(result < old_result) {
|
|
p_start--; /* fix the string ptr since we have overflowed */
|
|
break;
|
|
}
|
|
} else if(*p_start) {
|
|
/*
|
|
** the loop is not finished.
|
|
** we will multiply by the radix again
|
|
** check the upper bits. if they're on, then
|
|
** that mult will overflow the value
|
|
*/
|
|
if(radix == 8) {
|
|
if(result & 0xe0000000) {
|
|
break;
|
|
}
|
|
} else if(result & 0xf0000000) {
|
|
break;
|
|
}
|
|
}
|
|
old_result = result;
|
|
}
|
|
if(*p_start) {
|
|
strcpy (Msg_Text, GET_MSG (2177));
|
|
error(2177); /* constant too big */
|
|
result = 0;
|
|
}
|
|
return(result);
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
** uc_size : returns 'int' or 'long' (virtual unsigned).
|
|
** if their are no bits in the upper part of the value,
|
|
** then it's an int. otherwise, it's a long.
|
|
** this is valid too if target sizeof(int) != sizeof(long).
|
|
** then L_CINTEGER and L_LONGINT are synonymous.
|
|
************************************************************************/
|
|
token_t
|
|
uc_size(
|
|
long value
|
|
)
|
|
{
|
|
return((token_t)((value > INT_MAX) ? L_CUNSIGNED : L_CINTEGER));
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
** c_size : returns 'int' or 'long' for signed numbers.
|
|
** if the sign bit of the lower word is on or any bits
|
|
** in the upper word are on, then we must use 'long'.
|
|
************************************************************************/
|
|
token_t
|
|
c_size(
|
|
long value
|
|
)
|
|
{
|
|
return((token_t)((ABS(value) > INT_MAX) ? L_LONGINT : L_CINTEGER));
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
** l_size : returns 'longint' or 'longunsigned' for long numbers.
|
|
** if the sign bit of the high word is on this is 'longunsigned';
|
|
************************************************************************/
|
|
token_t
|
|
l_size(
|
|
long value
|
|
)
|
|
{
|
|
return((token_t)((value > LONG_MAX) ? L_LONGUNSIGNED : L_LONGINT));
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
** ul_size : returns 'unsigned' or 'longunsigned' for unsigned numbers.
|
|
** if the number can't be represented as unsigned, it is promoted to
|
|
** unsignedlong.
|
|
************************************************************************/
|
|
token_t
|
|
ul_size(
|
|
long value
|
|
)
|
|
{
|
|
return((token_t)((ABS(value) > UINT_MAX-1) ? L_LONGUNSIGNED : L_CUNSIGNED));
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
** ctoi : character to int.
|
|
************************************************************************/
|
|
int
|
|
ctoi(
|
|
int c
|
|
)
|
|
{
|
|
if(LXC_IS_DIGIT((WCHAR)c)) {
|
|
return(c - L'0');
|
|
} else {
|
|
return(towupper((WCHAR)c) - towupper(L'A') + 10);
|
|
}
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
* ESCAPE - get an escaped character
|
|
*
|
|
* ARGUMENTS - none
|
|
*
|
|
* RETURNS - value of escaped character
|
|
*
|
|
* SIDE EFFECTS - may push back input
|
|
*
|
|
* DESCRIPTION - An escape ( '\' ) was discovered in the input. Translate
|
|
* the next symbol or symbols into an escape sequence.
|
|
*
|
|
* AUTHOR - Ralph Ryan, Sept. 7, 1982
|
|
*
|
|
* MODIFICATIONS - none
|
|
*
|
|
************************************************************************/
|
|
int
|
|
escape(
|
|
REG int c
|
|
)
|
|
{
|
|
REG int value;
|
|
int cnt;
|
|
|
|
escape_again:
|
|
|
|
if( LXC_IS_ODIGIT((WCHAR)c) ) {/* \ooo is an octal number, must fit into a byte */
|
|
cnt = 1;
|
|
for(value = ctoi(c), c = get_non_eof();
|
|
(cnt < 3) && LXC_IS_ODIGIT((WCHAR)c);
|
|
cnt++, c = get_non_eof()
|
|
) {
|
|
value *= 8;
|
|
value += ctoi(c);
|
|
}
|
|
if( ! Prep ) {
|
|
if(value > 255) {
|
|
Msg_Temp = GET_MSG (2022);
|
|
SET_MSG (Msg_Text, sizeof(Msg_Text), Msg_Temp, value);
|
|
error (2022);
|
|
}
|
|
}
|
|
UNGETCH();
|
|
return((char)value);
|
|
}
|
|
switch( c ) {
|
|
case L'a':
|
|
return(ALERT_CHAR);
|
|
break;
|
|
case L'b':
|
|
return(L'\b');
|
|
break;
|
|
case L'f':
|
|
return(L'\f');
|
|
break;
|
|
case L'n':
|
|
return fMacRsrcs ? (L'\r') : (L'\n');
|
|
break;
|
|
case L'r':
|
|
return fMacRsrcs ? (L'\n') : (L'\r');
|
|
break;
|
|
case L't':
|
|
return(L'\t');
|
|
break;
|
|
case L'v':
|
|
return(L'\v');
|
|
break;
|
|
case L'x':
|
|
cnt = 0;
|
|
value = 0;
|
|
c = get_non_eof();
|
|
while((cnt < 3) && LXC_IS_XDIGIT((WCHAR)c)) {
|
|
value *= 16;
|
|
value += ctoi(c);
|
|
c = get_non_eof();
|
|
cnt++;
|
|
}
|
|
if(cnt == 0) {
|
|
strcpy (Msg_Text, GET_MSG (2153));
|
|
error (2153);
|
|
}
|
|
UNGETCH();
|
|
return((char)value); /* cast to get sign extend */
|
|
default:
|
|
if(c != L'\\') {
|
|
return(c);
|
|
} else {
|
|
if(checknl()) {
|
|
c = get_non_eof();
|
|
goto escape_again;
|
|
} else {
|
|
return(c);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
* CHECKOP - Check whether the next input character matches the argument.
|
|
*
|
|
* ARGUMENTS
|
|
* short op - the character to be checked against
|
|
*
|
|
* RETURNS
|
|
* TRUE or FALSE
|
|
*
|
|
* SIDE EFFECTS
|
|
* Will push character back onto the input if there is no match.
|
|
*
|
|
* DESCRIPTION
|
|
* If the next input character matches op, return TRUE. Otherwise
|
|
* push it back onto the input.
|
|
*
|
|
* AUTHOR - Ralph Ryan, Sept. 9, 1982
|
|
*
|
|
* MODIFICATIONS - none
|
|
*
|
|
************************************************************************/
|
|
int
|
|
checkop(
|
|
int op
|
|
)
|
|
{
|
|
if(op == (int)get_non_eof()) {
|
|
return(TRUE);
|
|
}
|
|
UNGETCH();
|
|
return(FALSE);
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
** DumpSlashComment : while skipping a comment, output it.
|
|
************************************************************************/
|
|
void
|
|
DumpSlashComment(
|
|
VOID
|
|
)
|
|
{
|
|
if( ! Cflag ) {
|
|
skip_NLonly();
|
|
return;
|
|
}
|
|
myfwrite(L"//", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
|
|
for(;;) {
|
|
WCHAR c;
|
|
|
|
switch(CHARMAP(c = GETCH())) {
|
|
// must manually write '\r' with '\n' when writing 16-bit strings
|
|
//case LX_CR:
|
|
// continue;
|
|
case LX_EOS:
|
|
handle_eos();
|
|
continue;
|
|
case LX_NL:
|
|
UNGETCH();
|
|
return;
|
|
}
|
|
myfwrite(&c, sizeof(WCHAR), 1, OUTPUTFILE);
|
|
}
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
** dump_comment : while skipping a comment, output it.
|
|
************************************************************************/
|
|
void
|
|
dump_comment(
|
|
void
|
|
)
|
|
{
|
|
if( ! Cflag ) {
|
|
skip_1comment();
|
|
return;
|
|
}
|
|
myfwrite(L"/*", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
|
|
for(;;) {
|
|
WCHAR c;
|
|
|
|
switch(CHARMAP(c = GETCH())) {
|
|
case LX_STAR:
|
|
if(checkop(L'/')) {
|
|
myfwrite(L"*/", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
|
|
return;
|
|
}
|
|
break;
|
|
case LX_EOS:
|
|
handle_eos();
|
|
continue;
|
|
case LX_NL:
|
|
Linenumber++;
|
|
break; /* output below */
|
|
// must manually write '\r' with '\n' when writing 16-bit strings
|
|
//case LX_CR:
|
|
// continue;
|
|
}
|
|
myfwrite(&c, sizeof(WCHAR), 1, OUTPUTFILE);
|
|
}
|
|
}
|
|
|
|
/************************************************************************/
|
|
/* skip_comment() */
|
|
/************************************************************************/
|
|
int
|
|
skip_comment(
|
|
void
|
|
)
|
|
{
|
|
if(checkop(L'*')) {
|
|
skip_1comment();
|
|
return(TRUE);
|
|
} else if(checkop(L'/')) {
|
|
skip_NLonly();
|
|
return(TRUE);
|
|
} else {
|
|
return(FALSE);
|
|
}
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
** skip_1comment : we're called when we're already in a comment.
|
|
** we're looking for the comment close. we also count newlines
|
|
** and output them if we're preprocessing.
|
|
************************************************************************/
|
|
void
|
|
skip_1comment(
|
|
void
|
|
)
|
|
{
|
|
UINT c;
|
|
|
|
for(;;) {
|
|
c = GETCH();
|
|
if(c == L'*') {
|
|
recheck:
|
|
c = GETCH();
|
|
if(c == L'/') { /* end of comment */
|
|
return;
|
|
} else if(c == L'*') {
|
|
/*
|
|
** if we get another '*' go back and check for a slash
|
|
*/
|
|
goto recheck;
|
|
} else if(c == EOS_CHAR) {
|
|
handle_eos();
|
|
goto recheck;
|
|
}
|
|
}
|
|
/*
|
|
** note we fall through here. we know this baby is not a '*'
|
|
** we used to unget the char and continue. since we check for
|
|
** another '*' inside the above test, we can fall through here
|
|
** without ungetting/getting and checking again.
|
|
*/
|
|
if(c <= L'\n') {
|
|
/*
|
|
** hopefully, the above test is less expensive than doing two tests
|
|
*/
|
|
if(c == L'\n') {
|
|
Linenumber++;
|
|
if(Prep) {
|
|
myfwrite(L"\r\n", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
|
|
}
|
|
} else if(c == EOS_CHAR) {
|
|
handle_eos();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
** skip_cwhite : while the current character is whitespace or a comment.
|
|
** a newline is NOT whitespace.
|
|
************************************************************************/
|
|
WCHAR
|
|
skip_cwhite(
|
|
void
|
|
)
|
|
{
|
|
REG WCHAR c;
|
|
|
|
skip_cwhite_again:
|
|
while((c = GETCH()) <= L'/') { /* many chars are above this */
|
|
if(c == L'/') {
|
|
if( ! skip_comment()) {
|
|
return(L'/');
|
|
}
|
|
} else if(c > L' ') { /* char is between '!' and '.' */
|
|
return(c);
|
|
} else {
|
|
switch(CHARMAP(c)) {
|
|
case LX_EOS:
|
|
handle_eos();
|
|
break;
|
|
case LX_WHITE:
|
|
continue;
|
|
break;
|
|
case LX_CR:
|
|
continue;
|
|
break;
|
|
default:
|
|
return(c);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if((c == L'\\') && (checknl())) {
|
|
goto skip_cwhite_again;
|
|
}
|
|
return(c);
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
** checknl : check for newline, skipping carriage return if there is one.
|
|
** also increments Linenumber, so this should be used by routines which
|
|
** will not push the newline back in such a way that rawtok() will be invoked,
|
|
** find the newline and do another increment.
|
|
************************************************************************/
|
|
int
|
|
checknl(
|
|
void
|
|
)
|
|
{
|
|
REG WCHAR c;
|
|
|
|
for(;;) {
|
|
c = GETCH();
|
|
if(c > L'\r') {
|
|
UNGETCH();
|
|
return(FALSE);
|
|
}
|
|
switch(c) {
|
|
case L'\n':
|
|
Linenumber++;
|
|
// must manually write '\r' with '\n' when writing 16-bit strings
|
|
if( Prep ) {
|
|
myfwrite(L"\r\n", 2 * sizeof(WCHAR), 1, OUTPUTFILE);
|
|
}
|
|
return(TRUE);
|
|
break;
|
|
case L'\r':
|
|
continue;
|
|
break;
|
|
case EOS_CHAR:
|
|
handle_eos();
|
|
PREVCH() = L'\\'; /* M00HACK - needs pushback */
|
|
continue;
|
|
break;
|
|
default:
|
|
UNGETCH();
|
|
return(FALSE);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
** get_non_eof : get a real char.
|
|
************************************************************************/
|
|
WCHAR
|
|
get_non_eof(
|
|
void
|
|
)
|
|
{
|
|
WCHAR c;
|
|
|
|
get_non_eof_again:
|
|
while((c = GETCH()) <= L'\r') {
|
|
if(c == L'\r') {
|
|
continue;
|
|
} else if(c != EOS_CHAR) {
|
|
break;
|
|
}
|
|
if(Tiny_lexer_nesting > 0) {
|
|
break;
|
|
}
|
|
handle_eos();
|
|
}
|
|
if((c == L'\\') && (checknl())) {
|
|
goto get_non_eof_again;
|
|
}
|
|
return(c);
|
|
}
|