windows-nt/Source/XPSP1/NT/com/rpc/midl/front/lex.cxx

/********************************** module *********************************/
/*              Copyright (c) 1993-2000 Microsoft Corporation              */
/*                                                                         */
/*                                  cclex                                  */
/*                  lexical analyser for the C compiler                    */
/*                                                                         */
/***************************************************************************/
/*                                                                         */
/*    @ Purpose:                                                           */
/*                                                                         */
/*    @ Functions included:                                                */
/*                                                                         */
/*                                                                         */
/*    @ Author: Gerd Immeyer              @ Version:                       */
/*                                                                         */
/*    @ Creation Date: 1987.02.09         @ Modification Date:             */
/*                                                                         */
/***************************************************************************/


#pragma warning ( disable : 4514 4310 4710 )

#include "nulldefs.h"
extern "C" {
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <ctype.h>
}

#include "common.hxx"
#include "errors.hxx"
#include "midlnode.hxx"
#include "listhndl.hxx"
#include "filehndl.hxx"
#include "lextable.hxx"
#include "lexutils.hxx"
#include "grammar.h"
#include "gramutil.hxx"
#include "cmdana.hxx"
#include "mbcs.hxx"

extern "C" {
    #include "lex.h"
}

extern void ParseError( STATUS_T, char *);
extern  NFA_INFO *pImportCntrl;

extern  lextype_t   yylval;

extern token_t  toktyp_G;           /* token type */
extern short    toklen_G;           /* len of token string */
extern char     *tokptr_G;          /* pointer to token string */
extern long     tokval_G;           /* value of constant token */
extern short    curr_line_G;


extern LexTable *pMidlLexTable;
extern short    CompileMode;
extern CMD_ARG * pCommand;

int chCached = 0;

char NewCCputbackc( char ch )
    {
    if (chCached)
    {
        pImportCntrl->UnGetChar(short(chCached));
    }
    chCached = ch;

    if ( ch == '\n' )
        curr_line_G--;
    return ch;
    }


/*****              definition of state table fields            ****/

#define ERR 0x7f0c          /* character not in character set */

#define X10 0x0100
#define X11 0x0101
#define X20 0x0200
#define X21 0x0201
#define X23 0x0203
#define X30 0x0300
#define X40 0x0400
#define X41 0x0401
#define X43 0x0403
#define X50 0x0500
#define X51 0x0501
#define X53 0x0503
#define X62 0x0602
#define X70 0x0700
#define X71 0x0701
#define X73 0x0703
#define X82 0x0802
#define X90 0x0900
#define X91 0x0901

#define XLQ 0x0a00
#define XLD 0x0b00

/*----              define of single operators          ----*/

#define O65     0x410d                      /* ' 65 */
#define O43     ('(' * 256 + 12)            /* ( 43 */
#define O44     (')' * 256 + 12)            /* ) 44 */
#define O49     (',' *256 + 12)             /* , 49 */
#define O24     ('.' *256 + 10)             /* . 24 */
#define O14     (':'  *256 + 12)            /* : 14 */
#define O50     (';'  *256 + 12)            /* ; 50 */
#define O13     ('?'  *256 + 12)            /* ? 13 */
#define O47     ('['  *256 + 12)            /* [ 47 */
#define O48     (']'  *256 + 12)            /* ] 48 */
#define O45     ('{'  *256 + 12)            /* { 45 */
#define O46     ('}'  *256 + 12)            /* } 46 */
#define O23     ('~'  *256 + 12)            /* ~ 23 */
#define OHS     ('#'  *256 + 12)            /* #    */
#define O64     0x400e                      /* " 64 */
#define O7d     0x0000                      /*  eol */
#define O7e     (short)0x9f0c               /*  eof */

/*----         define of possible multi character operator      ----*/

#define D00 0x000b      /* - 00 */
#define D01 0x010c      /* / 01 */
#define D02 0x020c      /* < 02 */
#define D03 0x030c      /* > 03 */
#define D04 0x040c      /* ! 04 */
#define D05 0x050c      /* % 05 */
#define D06 0x060c      /* & 06 */
#define D07 0x070c      /* * 07 */
#define D08 0x080b      /* + 08 */
#define D09 0x090c      /* = 09 */
#define D0a 0x0a0c      /* ^ 0a */
#define D0b 0x0b0c      /* | 0b */


/*****             character table              *****/
/*  MIDL supports the ANSI character set as input   */

const extern short ct[256]= {

/*     0   1   2   3   4   5   6   7   8   9   a   b   c   d   e   f    */
     O7e,ERR,ERR,ERR,ERR,  0,ERR,ERR,ERR,  0,O7d,ERR,  0,  0,ERR,ERR,
/*    10  11  12  13  14  15  16  17  18  19  1a  1b  1c  1d  1e  1f    */
     ERR,ERR,ERR,ERR,ERR,ERR,ERR,ERR,ERR,ERR,O7e,ERR,ERR,ERR,ERR,ERR,
/*         !   "   #   $   %   &   '   (   )   *   +   ,   -   .   /    */
       0,D04,O64,OHS,ERR,D05,D06,O65,O43,O44,D07,D08,O49,D00,O24,D01,
/*     0   1   2   3   4   5   6   7   8   9   :   ;   <   =   >   ?    */
       7,  8,  8,  8,  8,  8,  8,  8,  9,  9,O14,O50,D02,D09,D03,O13,
/*     @   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O    */
     ERR,  1,  1,  1,  1,  2,  3,  4,  4,  4,  4,  4, 15,  4,  4,  4,
/*     P   Q   R   S   T   U   V   W   X   Y   Z   [   \   ]   ^   _    */
       4,  4,  4,  4,  4,  4,  4,  4,  6,  4,  4,O47,ERR,O48,D0a,  4,
/*     `   a   b   c   d   e   f   g   h   i   j   k   l   m   n   o    */
     ERR,  1,  1,  1,  1,  2,  3,  4,  4,  4,  4,  4,  5,  4,  4,  4,
/*     p   q   r   s   t   u   v   w   x   y   z   {   |   }   ~ DEL    */
       4,  4,  4,  4,  4,  4,  4,  4,  6,  4,  4,O45,D0b,O46,O23,ERR,
/*    80  81  82  83  84  85  86  87  88  89  8a  8b  8c  8d  8e  8f    */
       4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
/*    90  91  92  93  94  95  96  97  98  99  9a  9b  9c  9d  9e  9f    */
       4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
/*    a0  a1  a2  a3  a4  a5  a6  a7  a8  a9  aa  ab  ac  ad  ae  af    */
       4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
/*    b0  b1  b2  b3  b4  b5  b6  b7  b8  b9  ba  bb  bc  bd  be  bf    */
       4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
/*    c0  c1  c2  c3  c4  c5  c6  c7  c8  c9  ca  cb  cc  cd  ce  cf    */
       4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
/*    d0  d1  d2  d3  d4  d5  d6  d7  d8  d9  da  db  dc  dd  de  df    */
       4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
/*    e0  e1  e2  e3  e4  e5  e6  e7  e8  e9  ea  eb  ec  ed  ee  ef    */
       4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
/*    f0  f1  f2  f3  f4  f5  f6  f7  f8  f9  fa  fb  fc  fd  fe  ff    */
       4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4};


/*****              state transition table          *****/

const extern short st[ 13 ][ 16 ] = {

//               0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
//             spc a-d   e   f g-z   l   x   0 1-7 8-9   . + -  op   '  "    L
//
/* start 0 */    0,  1,  1,  1,  1,  1,  1,  2,  5,  5,X90,X90,X90,X90,X90, 12,
/* name  1 */  X10,  1,  1,  1,  1,  1,  1,  1,  1,  1,X11,X11,X11,X11,X11,  1,
/* 0     2 */  X20,X23,  9,X23,X23,X30,  3,  6,  6,  6,X23,X21,X21,X21,X21,X30,
/* 0x    3 */  X53,  4,  4,  4,X53,X53,X53,  4,  4,  4,X53,X53,X53,X53,X53,X53,
/* hex   4 */  X50,  4,  4,  4,X53,X53,X53,  4,  4,  4,X53,X51,X51,X51,X51,X53,
/* int   5 */  X20,X23,  9,X23,X23,X23,X23,  5,  5,  5,X21,X21,X21,X21,X21,X23,
/* oct   6 */  X70,X73,  9,X73,X73,X73,X73,  6,  6,  5,  8,X71,X71,X71,X71,X73,
/* .     7 */  X91,X91,X91,X91,X91,X91,X91,X91,X91,X91,X91,X91,X91,X91,X91,X91,
/* int.  8 */  X40,X43,  9,X43,X43,X43,X43,  8,  8,  8,X43,X41,X41,X41,X41,X43,
/* .e    9 */  X40,X43,X43,X43,X43,X43,X43, 11, 11, 11,X43, 10,X41,X41,X41,X43,
/* .e-   10*/  X43,X43,X43,X43,X43,X43,X43, 11, 11, 11,X43,X43,X43,X43,X43,X43,
/* .e-i  11*/  X40,X43,X43,X43,X43,X43,X43, 11, 11, 11,X43,X41,X41,X41,X41,X43,
/* L     12*/  X10,  1,  1,  1,  1,  1,  1,  1,  1,  1,X11,X11,X11,XLQ,XLD,  1

};


/*****             multi character operator table           *****/

const token_t moptab[]     = {

/*             0   1   2   3   4   5   6   7   8   9  10  11      */
/*             -   /   <   >   !   %   &   *   +   =   ^   |      */
/*                                                                */
/*  single */ MINUS,DIV,LT,GT,EXCLAIM, MOD,
                                AND,  MULT, PLUS, ASSIGN, XOR, OR,
/*  op =   */ SUBASSIGN, DIVASSIGN, LTEQ, GTEQ, NOTEQ,
              MODASSIGN, ANDASSIGN, MULASSIGN, ADDASSIGN, EQUALS,
              XORASSIGN, ORASSIGN,
/*  op op  */ DECOP, GARBAGETOKEN, LSHIFT, RSHIFT, 0, 0,
                                ANDAND, 0, INCOP, EQUALS, 0, OROR   };

/*****          define of the action routines           *****/

token_t name(void);
token_t mulop(void);
token_t character(void);
token_t string(void);
token_t ProcessHash();
token_t ProcessComplexDefine( char *, char *, int );
token_t LChar();
token_t LStr();

extern token_t ScanGuid( void );
extern token_t ScanVersion( void );
extern token_t ScanImplicitImports(void);

typedef token_t (*TOKEN_PFN)(void);

const static TOKEN_PFN action[] = {
    0,              /* unused */
    name,               /* handle name token */
    cnv_int,            /* convert integer token */
    cnv_int,            /* convert integer token */
    cnv_hex,                /* convert hex constant */
    cnv_hex,                /* convert hex constant */
    cnv_octal,          /* convert octal constant */
    cnv_octal,          /* convert octal constant */
    cnv_float,          /* convert floating point constant */
    mulop,          /* handle multi character operator */
    LChar,          /* wide character */
    LStr,           /* wide character string */
    };

/*****          declare of global varables          *****/

static short ci;            /* current state character index */
static char ch;         /* current character */
static int pbch;        /* flag describing whether to take the next char
                            or not */
char LastLexChar;

unsigned short LexContext   = LEX_NORMAL;

#define MAX_LINE_SIZE 256
static char tok_buffer[MAX_LINE_SIZE];

token_t IsValidPragma( char *);


/*............................. internal function ..........................*/
/*                                      */
/*              comment analyzer                */
/*                                      */

token_t comment()
{
    BOOL    fParseError = FALSE;

    for (;;)
        {
        ch = NewCCGetch();
        if (ch == 0)
            {
            fParseError = TRUE;
            break;
            }
        if (CurrentCharSet.IsMbcsLeadByte(ch))
            {
            NewCCGetch();
            }
        else if (ch == '*')
            {
            char    chNext = NewCCGetch();
            if (chNext == 0)
                {
                fParseError = TRUE;
                break;
                }
            if (CurrentCharSet.IsMbcsLeadByte(chNext))
                {
                NewCCGetch();
                }
            else if (chNext == '/')
                {
                break;
                }
            else if (chNext == '*')
                {
                NewCCputbackc(chNext);
                }
            }
        }
    if (fParseError)
        {
        ParseError(EOF_IN_COMMENT, (char *)NULL);   /*   no end of comment operator */
        exit( EOF_IN_COMMENT );
        }
    return ( NOTOKEN );
}

token_t commentline()
{
    for (;;)
        {
        ch =  NewCCGetch();
        if( ch == 0 )
            {
            ParseError(EOF_IN_COMMENT, (char *)NULL);
            exit( EOF_IN_COMMENT );
            break;
            }
        else if (CurrentCharSet.IsMbcsLeadByte(ch))
            {
            NewCCGetch();
            }
        else if (ch == '\n')
            {
            break;
            }
        }
    return ( NOTOKEN );         /* get the next token */
}


/*............................. internal function ..........................*/
/*                                      */
/*              multi character operator                */
/*                                      */

const static token_t *snglop = &moptab[0];  /* adr of single character operator */
const static token_t *assgop = &moptab[12]; /* adr of assignment operator */
const static token_t *dblop  = &moptab[24]; /* adr of double character operator */

token_t mulop()
    {
    REG unsigned short i;                   /* index into multi operator table */
    REG char lstch;
//printf ("in mulop ch = %c\n", ch);

    i = unsigned short(((unsigned short)ci) >> 8);  /* get high byte of character index */
    if( i > 11 ) {          /* is it a type specification ? */
        // check for EOI
        if ( ci == short(0x9f0c) )
            {
            if(pImportCntrl->GetLexLevel() == 0)
                {
                if(pImportCntrl->GetEOIFlag())
                    return 0;
                else
                    pImportCntrl->SetEOIFlag();
                }
            return EOI;
            }
        if( i == 64 )           /* character is " */
            return ( string() );    /* handle string token */
        if( i == 65 )           /* character is ' */
            return ( character() ); /* handle character constant */
        if( i == '#' )
            return ProcessHash();   /* process any hash tokens */
        if( i == '.' )
            {
            if( (ch = NewCCGetch()) == '.' )
                {
                return DOTDOT;
                }
            NewCCputbackc( ch );
            }

        if ( i == LBRACK )
            {
            inside_rpc++;
            return i;
            }
        if ( i == RBRACK )
            {
            inside_rpc--;
            return i;
            }

        return ( i );               /* return type of single operator */
    }
    lstch = ch;                     /* save entry character */
    ch = NewCCGetch();              /* get a new one */
    if (CurrentCharSet.IsMbcsLeadByte(ch))
        {
        toklen_G = 1;
        tokptr_G[1] = 0;
        NewCCputbackc(ch);
        return *(snglop+i);
        }
    tokptr_G[1] = ch; tokptr_G[2] = 0;
    toklen_G = 2;                   /* add to token string */
    if( ch == '=' ) {               /* is next character an equal op. */
        return *(assgop+i);         /* return an assign operator */
    }
    if( lstch == ch ) {             /* is next char. = current char. ? */
        toktyp_G = *(dblop+i);      /*   yes, get its type */
        if( !toktyp_G ) {           /* is it a doppel operator ? */
            toklen_G = 1;           /* update token string */
            tokptr_G[1] = 0;
            NewCCputbackc(ch);      /* deliberate, puback of EOF is ignored */
            return *(snglop+i);     /*   no, return single operator */
        }
        if( ch == '/' )             /* if the operator is double // */
            {
            // potentially an error

//          ParseError( SINGLE_LINE_COMMENT, (char *)0 );
            return(commentline());  /*   the next line is a comment */
            }
        ch = NewCCGetch();                  /* get next character */
        if (ch == '=') {
            tokptr_G[2] = '='; tokptr_G[3] = '\0';
            toklen_G = 3;           /* update token string */
                        if(toktyp_G == LSHIFT) {              /* if shift op.and equal sign ? */
                                return (LEFTASSIGN);                     /* return as assign operator */
                        }
                        if(toktyp_G == RSHIFT) {
                                return (RIGHTASSIGN);
            }
            tokptr_G[2] = '\0'; toklen_G = 2;
        }
        NewCCputbackc(ch);                  /* put back unused character */
        return (toktyp_G);              /* else return doppel char. operator */
    }
    if( lstch == '-' && ch == '>' ) {   /* if structure operator */
                return (POINTSTO);                    /* return structure operator */
    }
    if( lstch == '/' && ch == '*' ) {   /* if comment */
        return( comment() );            /* ignore the comment */
    }
    tokptr_G[1] = '\0'; toklen_G = 1;   /* remove from token string */
    NewCCputbackc(ch);                      /* putback unused character */
    return *(snglop+i);                 /* return single character operator */
}

/*............................. internal function ..........................*/
/*                                      */
/*          convert escape (\) character                */
/*                                      */

char convesc()
    {
    unsigned short  value = 0;
    unsigned short  tmp;
    BOOL            fConstantIsIllegal  = FALSE;

    ch = NewCCGetch();

    if ( ch == 'n' )
        ch = 0xa;
    else if (ch == 't')
        ch = 0x9;
    else if (ch == 'v')
        ch = 0xb;
    else if (ch == 'b')
        ch = 0x8;
    else if( ch == 'r' )
        ch = 0xd;
    else if( ch == 'f' )
        ch = 0xc;
    else if( ch == 'a' )
        ch = 0x7;
    else if( (ch == 'x') || (ch == 'X') )
        {
        int i;

        for( i = 0, value = 0, fConstantIsIllegal = FALSE; i < 2; ++i )
            {
            tmp = ch = NewCCGetch();
            tmp = (unsigned short)toupper( tmp );
            if( isxdigit( tmp ) )
                {
                tmp = unsigned short( (tmp >= '0') && (tmp <= '9') ? (tmp - '0') : (tmp - 'A') + 0xa );
                }
            else if( ch == '\'' )
                {
                NewCCputbackc( ch );
                break;
                }
            else
                {
                fConstantIsIllegal  = TRUE;
                }
            value = unsigned short( value * 16 + tmp );
        }

        if( fConstantIsIllegal || (value > (unsigned short) 0x00ff) )
            ParseError( ILLEGAL_CONSTANT, (char *)0 );

        ch = (char )value;
        }
    else if( (ch >= '0') && (ch <= '7'))
        {
        int i;
        value = unsigned short(ch - '0');

        // the limit for this for loop is 2 because we already saw 1 character

        for ( i = 0, value = unsigned short(ch - '0'), fConstantIsIllegal = FALSE; i < 2; ++i)
            {
            tmp = ch = NewCCGetch();
            if( (ch >= '0') && (ch <= '7'))
                {
                tmp = unsigned short(tmp - '0');
                value = unsigned short(value * 8 + tmp);
                }
            else if( ch == '\'' )
                {
                NewCCputbackc( ch );
                break;
                }
            else
                fConstantIsIllegal = TRUE;
            }


        if( fConstantIsIllegal || (value > (unsigned short) 0x00ff) )
            ParseError( ILLEGAL_CONSTANT, (char *)0 );
        ch = (char )value;
        }

    return ( ch );
    }


/*............................. internal function ..........................*/
/*                                      */
/*               string analyzer                */
/*                                      */

token_t
character()
    {
        ch = NewCCGetch();
        if (CurrentCharSet.IsMbcsLeadByte(ch))
            {
            tokptr_G[0] = ch;
            tokptr_G[1] = NewCCGetch();
            tokptr_G[2] = 0;
            toklen_G = 2;
            }
        else
            {
            if (ch == '\\')
                {
                ch = convesc();
                }
            tokptr_G[0] = ch;
            tokptr_G[1] = '\0';

            yylval.yy_numeric.Val = tokval_G = ch;
            }
        if (NewCCGetch() != '\'')
            {
            ParseError(CHAR_CONST_NOT_TERMINATED,(char *)NULL );
            exit( CHAR_CONST_NOT_TERMINATED );
            }
         return (CHARACTERCONSTANT);
    }

// this rtn is called when the quote has been sensed.

char*    g_pchStrBuffer   = 0;
unsigned long g_ulStrBufferLen = 1024;

//
// Scan ahead in the current file to see if the next non-space character is a
// quote.  If it is then this is a string constant that is split into two
// pieces (e.g. "this" ... " is a " ... "test").  If the next character is
// not a quote reset the file back to where we started from.
//
// If this is a multi-string situation return MULTIPLE_PROPERTY_ATTRIBUTES.
// This is a bit of a mis-use of that error code but it sounds nice.
//
// HACKHACK: The routine depends on internal knowledge of how NewCCGetch works.
//           Doing something similiar using the grammar was tried but failed
//           when processing imports because of details of how the trickery
//           played on the lexer works to get it to change streams in mid-go.
//

STATUS_T spacereadahead()
{
    fpos_t  fpos;
    short   newlines = 0;
    int     ch = ' ';

    if (0 != fgetpos(hFile_G, &fpos))
        return INPUT_READ;

    while (isspace(ch) && !feof(hFile_G))
        {
        ch = getc(hFile_G);

        if ( '\n' == ch )
            ++newlines;
        }

    if ('\"' == ch)
        {
        curr_line_G = (short) (curr_line_G + newlines);
        return MULTIPLE_PROPERTY_ATTRIBUTES;
        }
    else
        {
        if (0 != fsetpos(hFile_G, &fpos))
            return INPUT_READ;

        return STATUS_OK;
        }
}


token_t
string()
    {
    STATUS_T    Status          = STATUS_OK;
    char    *   ptr;

    if ( !g_pchStrBuffer )
        {
        g_pchStrBuffer = ( char * )malloc( sizeof( char ) * g_ulStrBufferLen );
        if ( NULL == g_pchStrBuffer )
            {
            RpcError( 0, 0, OUT_OF_MEMORY, 0 );
            exit( OUT_OF_MEMORY );
            }
        }

    strncpy( g_pchStrBuffer, tokptr_G, toklen_G );
    ptr = g_pchStrBuffer;

    ch  = 0;
    while( ( ch != '"' ) && ( Status == STATUS_OK ) )
        {
        if ( ( unsigned long ) ( ptr - g_pchStrBuffer ) > ( g_ulStrBufferLen - 3 ) )
            {
            char* pTempStrBuffer = ( char* ) realloc( g_pchStrBuffer, g_ulStrBufferLen * 2 );
            if ( pTempStrBuffer )
                {
                ptr = ( g_pchStrBuffer - ptr ) + pTempStrBuffer;
                g_pchStrBuffer = pTempStrBuffer;
                g_ulStrBufferLen = g_ulStrBufferLen * 2;
                }
            else
                {
                Status = STRING_TOO_LONG;
                }
            }

        ch  = NewCCGetch();

        if( ch == 0 )
            {
            Status  = EOF_IN_STRING;
            }
        else if ( ch == '\\' )
            {
            *ptr++ = ch;
            *ptr++ = NewCCGetch();
            ch = 0;
            }
        else if ( ch != '\"' )
            {
            *ptr++  = ch;
            if (CurrentCharSet.IsMbcsLeadByte(ch))
                *ptr++ = NewCCGetch();
            }
        else
            {
            Status = spacereadahead();

            if ( MULTIPLE_PROPERTY_ATTRIBUTES == Status )
                {
                Status = STATUS_OK;
                ch = 0;
                }
            }
        }

    *ptr = 0;

    if( Status != STATUS_OK )
        {
        ParseError( Status, (char *)0 );
        exit( Status );
        }

    yylval.yy_string = pMidlLexTable->LexInsert( g_pchStrBuffer );
    return ( STRING );
    }
/****************************** external function ***************************/
/*                                      */
/*              lexical analyzer                */
/*                                      */

static BOOL     fLastToken  = 0;
static BOOL     fLineLengthError = 0;
static token_t  LastToken;

void
initlex()
    {
    fLastToken  = 0;
    }


void
yyunlex( token_t T )
    {
    LastToken   = T;
    fLastToken  = 1;
    }

token_t yylex()
{
    REG short state;        /* token state */
    REG char *ptr;

    if( fLastToken )
        {
        fLastToken  = 0;
        return LastToken;
        }

    if ( LexContext != LEX_NORMAL )
        {
        switch ( LexContext )
            {
            case LEX_GUID:
                {
                LexContext = LEX_NORMAL;
                return ScanGuid();
                }
            case LEX_VERSION:
                {
                LexContext = LEX_NORMAL;
                return ScanVersion();
                }
            case LEX_ODL_BASE_IMPORT:
            case LEX_ODL_BASE_IMPORT2:
                {
                return ScanImplicitImports();
                break;
                }
            default:
                MIDL_ASSERT(0);

            }
        }

again:
    state = 0;              /* initial state */
    ptr = tokptr_G = tok_buffer;    /* remember token begin position */
    toklen_G = 0;

    do
        {
        ci = ct[ (unsigned char) (ch=NewCCGetch()) ];         /* character index out of char.tab. */
        state = st[ state ][ ci & 0x00ff ]; /* determine new state */
        } while ( state == 0 );                 /* skip white space */

    *(ptr++) = ch;
    toklen_G++;             /* add chacter to token string */

    if (CurrentCharSet.IsMbcsLeadByte(ch))
        {
        *(ptr++) = NewCCGetch();
        toklen_G++;
        }

    while( state < 13 )
        {           /* loop til end state */
        ci = ct[ (unsigned char) (ch=NewCCGetch()) ];       /* character index out of char.tab. */
        state = st[ state ][ ci & 0x00ff ]; /* determine new state */
        if (state < 13)
            {                   /* if still going, */
            if (toklen_G + 1 != MAX_LINE_SIZE) /* and the token isn't too large */
                {
                *(ptr++) = ch;  toklen_G++;     /* add chacter to token string */
                if (CurrentCharSet.IsMbcsLeadByte(ch))
                    {
                    *(ptr++) = NewCCGetch();
                    toklen_G++;
                    }
                }
            else
                {
                fLineLengthError = 1;
                }
            }
        };

    *ptr = '\0';
    LastLexChar = ch;

    if (fLineLengthError)
        {
        ParseError(IDENTIFIER_TOO_LONG, (char *)0 );
        fLineLengthError = 0;
        }

    switch( state & 0x00ff )
        {
        case 2: ch = NewCCGetch();      /* position to next character */
            break;

        case 3:
        case 1: NewCCputbackc(ch);          /* position to current character */
            break;
        /* case 0 - do nothing */
        }
//printf ("current ch = %c\n", ch);
    toktyp_G = (*action[ state >> 8 ])();   /* execute action */

    // skip fluff like #line
    if (toktyp_G == NOTOKEN)
        goto again;

    return(toktyp_G);
}

token_t
LChar()
    {
    character();
    return WIDECHARACTERCONSTANT;
    }

token_t
LStr()
    {
    string();
    return WIDECHARACTERSTRING;
    }

// process line number tokens
token_t
ProcessLine()
    {
    char    *   ptr     = tokptr_G;

    curr_line_G = short( atoi( ptr ) - 1 );

    // skip spaces before file name
    while ( ( ch = NewCCGetch() ) == ' ' )
        ;

    ptr = tokptr_G;
    // see if we got a filename
    if ( ch == '\"' )
        {
        for (;;)
            {
            ch = NewCCGetch();
            *ptr++ = ch;
            if (CurrentCharSet.IsMbcsLeadByte(ch))
                {
                *ptr++ = NewCCGetch();
                }
            else if (ch == '\"')
                {
                break;
                }
            }
        *(--ptr) = '\0';

        StripSlashes( tokptr_G );

        pImportCntrl->SetLineFilename( tokptr_G );

        }

    // skip to end of line
    for (;;)
        {
        ch = NewCCGetch();
        if (ch == 0)
            {
            break;
            }
        else if (CurrentCharSet.IsMbcsLeadByte(ch))
            {
            ch = NewCCGetch();
            }
        else if (ch == '\n')
            {
            break;
            }
        }

    return NOTOKEN;
    }

// process # <something>
token_t
ProcessHash()
    {
    char    *   ptr     = tokptr_G,
            *   ptrsave = ptr;
    token_t     PragmaToken;

    do  // eat spaces
        {
        ch = NewCCGetch();
        } while( isspace( ch ) );

    // collect first token
    while( !isspace( ch ) )
        {
        *ptr++ = ch;
        if (CurrentCharSet.IsMbcsLeadByte(ch))
            {
            *ptr++ = NewCCGetch();
            }
        ch = NewCCGetch();
        }
    *ptr = '\0';

    // is this hash a pragma starter ?

#define PRAGMA_STRING               ("pragma")
#define LEN_PRAGMA_STRING           (6)
#define MIDL_PRAGMA_PREFIX          ("midl_")
#define LEN_MIDL_PRAGMA_PREFIX      (5)
#define LINE_STRING                 ("line")
#define LEN_LINE_STRING             (4)

    // we handle #pragma and #line directives

    // #line found
    if (strncmp( tokptr_G, LINE_STRING, LEN_LINE_STRING ) == 0 )
        {
        ptr = tokptr_G;
        // get the next token (the number)
        do  // eat spaces
            {
            ch = NewCCGetch();
            } while( isspace( ch ) );

        // collect first token
        while( !isspace( ch ) )
            {
            *ptr++ = ch;
            if (CurrentCharSet.IsMbcsLeadByte(ch))
                {
                *ptr++ = NewCCGetch();
                }
            ch = NewCCGetch();
            }
        *ptr = '\0';
        return ProcessLine();   // this needs to be called with tokptr_G pointing after
                                // the #line part
        }
    // # <number> found
    else if ( isdigit(*tokptr_G) )
        {
        *ptr = '\0';
        return ProcessLine();
        }
    else if( strncmp( tokptr_G, PRAGMA_STRING, LEN_PRAGMA_STRING ) == 0 )
        {
        // eat white space between #pragma and next word
        for(;;)
            {
            ch = NewCCGetch();
            if(!isspace(ch) ) break;
            *ptr++ = ch;
            }

        ptrsave = ptr;

        *ptr++ = ch;

        // pull next word in
        for(;;)
            {
            ch = NewCCGetch();
            if(!isalpha(ch) && (ch != '_') ) break;
            *ptr++ = ch;
            if (CurrentCharSet.IsMbcsLeadByte(ch))
                {
                *ptr++ = NewCCGetch();
                }
            }

        // put back next char (it may even be the \n)
        NewCCputbackc( ch );
        *ptr = 0;

        // check if it is a MIDL pragma or not
        if ( ( PragmaToken  = IsValidPragma( ptrsave ) ) != 0 )
            {
            return PragmaToken;
            }


        // assume it is some other C pragma, so return the string

        for (;;)
            {
            ch = NewCCGetch();
            *ptr++ = ch;
            if (CurrentCharSet.IsMbcsLeadByte(ch))
                {
                *ptr++ = NewCCGetch();
                }
            else if (ch == '\n')
                {
                break;
                }
            }
        *(--ptr) = 0;

        yylval.yy_string = pMidlLexTable->LexInsert( ptrsave );
        return KWCPRAGMA;

        }
    else
        {
        // some graceful recovery by the parser
        return GARBAGETOKEN;
        }
    }

token_t
IsValidPragma(
    char    *   p )
    {
static char * agPragmaNames[] = {
       "midl_import"
      ,"midl_echo"
      ,"midl_import_clnt_aux"
      ,"midl_import_srvr_aux"
      ,"pack"
};
static token_t agTokenVal[] = {
      KWMPRAGMAIMPORT
     ,KWMPRAGMAECHO
     ,KWMPRAGMAIMPORTCLNTAUX
     ,KWMPRAGMAIMPORTSRVRAUX
     ,KWCPRAGMAPACK
};

    short   Index = 0;

    while( Index < sizeof( agPragmaNames ) / sizeof(char *) )
        {
        if( !strcmp( p, agPragmaNames[ Index ] ) )
            return agTokenVal[ Index ];
        ++Index;
        }
    return 0;
    }

token_t
ScanGuid( void )
    {
    char        c;
    char    *   p = tokptr_G;

    if( (c = NewCCGetch()) == '\"' )
        {
        string();
        ParseError( QUOTED_UUID_NOT_OSF, (char *)0 );
        return UUIDTOKEN;
        }

    NewCCputbackc( c );

    // remove leading spaces.

    while ( (c = NewCCGetch()) != 0  && isspace( c ) )
        ;

    while ( c && (c  != ')') && (c != ',') && !isspace(c) )
        {
        *p++ = c;
        if (CurrentCharSet.IsMbcsLeadByte(c))
            {
            *p++ = NewCCGetch();
            }
        c = NewCCGetch();
        }

    NewCCputbackc( c );
    *p++ = 0;
    yylval.yy_string = pMidlLexTable->LexInsert(tokptr_G);

    return UUIDTOKEN;
    }

token_t
ScanVersion( void )
    {
    char        c;
    char    *   p = tokptr_G;

    //
    // remove leading spaces.
    //

    while ( (c = NewCCGetch()) != 0 && isspace(c) )
        ;

    while ( c && (c  != ')') && !isspace(c) )
        {
        *p++ = c;
        if (CurrentCharSet.IsMbcsLeadByte(c))
            {
            *p++ = NewCCGetch();
            }
        c    = NewCCGetch();
        }

    NewCCputbackc( c );
    *p++ = 0;
    yylval.yy_string = pMidlLexTable->LexInsert(tokptr_G);

    return VERSIONTOKEN;
    }

token_t
ScanImplicitImports( void )
    {
    switch ( LexContext )
        {
        case LEX_ODL_BASE_IMPORT:
            {
            tokptr_G    = "import";
            toktyp_G    = KWIMPORTODLBASE;
            LexContext  = LEX_ODL_BASE_IMPORT2;
            break;
            }
        case LEX_ODL_BASE_IMPORT2:
            {
            tokptr_G    = "oaidl.idl";
            toktyp_G    = STRING;
            yylval.yy_string = pMidlLexTable->LexInsert(tokptr_G);
            LexContext  = LEX_NORMAL;
            break;
            }
        default:
            MIDL_ASSERT(0);
        }


    return toktyp_G;

    }