/*++ Copyright (c) 1997-2000 Microsoft Corporation Module Name: EzParse.cpp Abstract: Poor man C/C++/any file parser. Author: Gor Nishanov (gorn) 03-Apr-1999 Revision History: Gor Nishanov (gorn) 03-Apr-1999 -- hacked together to prove that this can work GorN: 29-Sep-2000 - fix enumeration bug GorN: 29-Sep-2000 - add support for KdPrintEx like function GorN: 09-Oct-2000 - fixed "//" in the string bug GorN: 23-Oct-2000 - IGNORE_CPP_COMMENT, IGNORE_POUND_COMMENT options added GorN: 16-Apr-2001 - Properly handle \" within a string ToDo: Clean it up --*/ #define STRICT #include #include #pragma warning(disable: 4100) #include #include #include "ezparse.h" DWORD ErrorCount = 0; PEZPARSE_CONTEXT EzParseCurrentContext = NULL; // To force build tool to recognize our errors #define BUILD_PREFIX_FNAME "cl %s\n" #define BUILD_PREFIX "cl wpp\n" void ExParsePrintErrorPrefix(FILE* f, char * func) { ++ErrorCount; if (EzParseCurrentContext) { fprintf(f,BUILD_PREFIX_FNAME "%s(%d) : error : (%s)", EzParseCurrentContext->filename, EzParseCurrentContext->filename, EzGetLineNo(EzParseCurrentContext->currentStart, EzParseCurrentContext), func); } else { fprintf(f,BUILD_PREFIX "wpp : error : (%s)", func); } } LPCSTR skip_stuff_in_quotes(LPCSTR q, LPCSTR begin) { char ch = *q; if (q > begin) { if (q[-1] == '\\') { return q - 1; } } for(;;) { if (q == begin) { return 0; } --q; if (*q == ch && ( (q == begin) || (q[-1] != '\\') ) ) { return q; } } } void adjust_pair( STR_PAIR& str ) /*++ Shrink the pair to remote leading and trailing whitespace */ { while (str.beg < str.end && isspace(*str.beg)) { ++str.beg; } while (str.beg < str.end && isspace(str.end[-1])) { --str.end; } } void remove_cpp_comment(STR_PAIR& str) { LPCSTR p = str.beg; // printf("rcb: %s\n", std::string(str.beg, str.end).c_str()); // let's cut the comment in the beginning of the string for(;;) { // skip the whitespace for(;;) { if (p == str.end) return; if (!isspace(*p)) break; ++p; } str.beg = p; if (p + 1 == str.end) return; if (p[0] == '/' && p[1] == '/') { // we have a comment. Need to get to the end of the comment p += 2; // printf("rcd: %s %s\n", std::string(str.beg, p).c_str(), std::string(p,str.end).c_str()); for(;;) { if (p == str.end) return; if (*p == '\r' || *p == '\n') { str.beg = p; break; } ++p; } } else { // no leading comment break; } } // printf("rcc: %s %s\n", std::string(str.beg, p).c_str(), std::string(p,str.end).c_str()); for(;;) { if (p == str.end) return; if (*p == '"') { // don't look for comments within a string for(;;) { if (++p == str.end) return; if (*p == '"' && p[-1] != '\\') break; } ++p; continue; } if (p + 1 == str.end) return; if (p[0] == '/') if (p[1] == '/') break; else p += 2; else p += 1; } str.end = p; // printf("rce: %s\n", std::string(str.beg, str.end).c_str()); } DWORD ScanForFunctionCallsEx( IN LPCSTR begin, IN LPCSTR end, IN EZPARSE_CALLBACK Callback, IN PVOID Context, IN OUT PEZPARSE_CONTEXT ParseContext, IN DWORD Options ) /*++ Routine Description: Scan the buffer for expressions that looks like function calls, i.e name(sd,sdf,sdf,sdf,sdf); . It will treat variable declaration with constructor call as a function call as well. Inputs: begin, end -- pointers to the beginning and the end of the buffer Callback -- to be called for every function Context -- opaque context to be passed to callback ParseContext -- holds current parse state information --*/ { LPCSTR p = begin; LPCSTR q, funcNameEnd; DWORD Status = ERROR_SUCCESS; bool double_par = FALSE; no_match: if (Options & NO_SEMICOLON) { q = end; Options &= ~NO_SEMICOLON; } else { do { ++p; if (p == end) { return Status; } } while ( *p != ';' ); // Ok. Now p points to ';' // q = p; } do { if (--q <= begin) { goto no_match; } } while ( isspace(*q) ); // Now q points on the first non white space character // // If it is not a ')' then we need to search for the next ';' // if (*q != ')') { goto no_match; } ParseContext->macroEnd = q; // Ok. This is a function call (definition). // Now, let's go and collect all the arguments of the first level and // get to the name of the function // HACKHACK // We need a special case for functions that looks like // KdPrintEx((Level, Indent, Msg, ...)); // Essentially, we need to treat them as // KdPrintEx(Level, Indent, Msg, ...); const char *r = q; // check if we have )); do { if (--r <= begin) break; // no "));" } while ( isspace(*r) ); double_par = r > begin && *r == ')'; if (double_par) { q = r; // we assume that this is KdPrint((a,b,c,d,...)); at the moment // if our assumtion is wrong, we will retry the loop below } retry: { int level = 0; LPCSTR ends[128], *current = ends; STR_PAIR strs[128]; // LPCSTR closing_parenthisis = q; *current = q; for(;;) { --q; if (q <= begin) { goto no_match; } switch (*q) { case ',': if (!level) *++current = q; break; case '(': if (level) --level; else goto maybe_match; break; case ')': ++level; break; case '\'': case '"': q = skip_stuff_in_quotes(q, begin); if(!q) goto no_match; } } maybe_match: *++current = q; funcNameEnd = q; // now q point to '(' we need to find name of the function // do { --q; if (q <= begin) { goto no_match; } } while(isspace(*q)); // now q points to first not white character if (double_par) { // if we see )); and found a matching // parenthesis for the inner one, we can have // one of two cases // 1) KdPrint((a,b,c,d,...)); // or // 2) DebugPrint(a,b,(c,d)); // If it is the latter, we just need to // retry the scanning, now using leftmost bracket as a starting point if (*q != '(') { // restore q to the rightmost parenthesis q = ParseContext->macroEnd; double_par = FALSE; goto retry; } funcNameEnd = q; // now q point to '(' we need to find name of the function // do { --q; if (q <= begin) { goto no_match; } } while(isspace(*q)); } // now q points to first non white character // BUGBUG '{' and '}' are allowed only in config files if (*q == '}') { for(;;) { if (--q < begin) goto no_match; if (*q == '{') break; } if (--q < begin) goto no_match; } if (!(isalpha(*q) || isdigit(*q) || *q == '_')) { goto no_match; } do { --q; if (q <= begin) { goto found; } } while ( isalpha(*q) || isdigit(*q) || *q == '_'); ++q; if (isdigit(*q)) { goto no_match; } found: if (Options & IGNORE_COMMENT) // Verify that it is not a comment // # sign in the beginning of the line { LPCSTR line = q; // // Find the beginning of the line or file // for(;;) { if (line == begin) { // Beginning of the file. Good enough break; } if (Options & IGNORE_CPP_COMMENT && line[0] == '/' && line[1] == '/') { // C++ comment. Ignore goto no_match; } if (*line == 13 || *line == 10) { ++line; break; } --line; } // // If the first non-white character is #, ignore it // while (line <= q) { if ( *line != ' ' && *line != '\t' ) { break; } ++line; } if (Options & IGNORE_POUND_COMMENT && *line == '#') { goto no_match; } } { int i = 0; strs[0].beg = q; strs[0].end = funcNameEnd; adjust_pair(strs[0]); while (current != ends) { // putchar('<');printrange(current[0]+1, current[-1]); putchar('>'); ++i; strs[i].beg = current[0]+1; --current; strs[i].end = current[0]; adjust_pair(strs[i]); remove_cpp_comment(strs[i]); } ParseContext->currentStart = strs[0].beg; ParseContext->currentEnd = strs[0].end; ParseContext->doubleParent = double_par; Status = Callback(strs, i+1, Context, ParseContext); if (Status != ERROR_SUCCESS) { return Status; } } goto no_match; } // return ERROR_SUCCESS; // unreachable code } DWORD ScanForFunctionCalls( IN LPCSTR begin, IN LPCSTR end, IN EZPARSE_CALLBACK Callback, IN PVOID Context, IN OUT PEZPARSE_CONTEXT ParseContext ) { return ScanForFunctionCallsEx( begin, end, Callback, Context, ParseContext, IGNORE_COMMENT); } DWORD EzGetLineNo( IN LPCSTR Ptr, IN OUT PEZPARSE_CONTEXT ParseContext ) /*++ Computes a line number based on an pointer within a buffer. Last known lineno/pointer is cached in ParseContext for performance */ { int count = ParseContext->scannedLineCount; LPCSTR downto = ParseContext->lastScanned; LPCSTR p = Ptr; if (downto > p) { count = 1; downto = ParseContext->start; } while (p > downto) { if (*p == '\n') { ++count; } --p; } ParseContext->scannedLineCount = count; ParseContext->lastScanned = Ptr; return count; } const char begin_wpp[] = "begin_wpp"; const char end_wpp[] = "end_wpp"; const char define_[] = "#define"; const char enum_[] = "enum "; enum { begin_wpp_size = (sizeof(begin_wpp)-1), end_wpp_size = (sizeof(end_wpp)-1), define_size = (sizeof(define_)-1), enum_size = (sizeof(enum_)-1), }; typedef struct _SmartContext { EZPARSE_CALLBACK Callback; PVOID Context; OUT PEZPARSE_CONTEXT ParseContext; std::string buf; } SMART_CONTEXT, *PSMART_CONTEXT; void DoEnumItems(PSTR_PAIR name, LPCSTR begin, LPCSTR end, PSMART_CONTEXT ctx) { LPCSTR p,q; ULONG value = 0; STR_PAIR Item; BOOL First = TRUE; ctx->buf.assign("CUSTOM_TYPE("); ctx->buf.append(name->beg, name->end); ctx->buf.append(", ItemListLong"); p = begin; while(begin < end && isspace(*--end)); // skip spaces if (begin < end && *end != ',') ++end; for(;p < end;) { Item.beg = p; q = p; for(;;) { if (q == end) { goto enum_end; } if (*q == ',' || *q == '}') { // valueless item. Use current Item.end = q; break; } else if (*q == '=') { // need to calc the value. Skip for now // Item.end = q; while (q < end && *q != ',') ++q; break; } ++q; } adjust_pair(Item); if (Item.beg == Item.end) { break; } if (First) {ctx->buf.append("("); First = FALSE;} else ctx->buf.append(","); ctx->buf.append(Item.beg, Item.end); if (q == end) break; p = q+1; ++value; } enum_end:; ctx->buf.append(") )"); ScanForFunctionCallsEx( &ctx->buf[0], &ctx->buf[0] + ctx->buf.size(), ctx->Callback, ctx->Context, ctx->ParseContext, NO_SEMICOLON); Flood("enum %s\n", ctx->buf.c_str()); } void DoEnum(LPCSTR begin, LPCSTR end, PSMART_CONTEXT Ctx) { LPCSTR p, q, current = begin; for(;;) { p = std::search(current, end, enum_, enum_ + enum_size); if (p == end) break; q = std::find(p, end, '{'); if (q == end) break; // let's figure out enum name // STR_PAIR name; name.beg = p + enum_size; name.end = q; adjust_pair(name); if ( *name.beg == '_' ) ++name.beg; p = q+1; // past "{"; q = std::find(p, end, '}'); if (q == end) break; if (name.end > name.beg) { DoEnumItems(&name, p, q, Ctx); } else { ReportError("Cannot handle tagless enums yet"); } current = q; } } DWORD SmartScan( IN LPCSTR begin, IN LPCSTR end, IN EZPARSE_CALLBACK Callback, IN PVOID Context, IN OUT PEZPARSE_CONTEXT ParseContext ) { LPCSTR block_start, block_end, current = begin; SMART_CONTEXT Ctx; Ctx.Callback = Callback; Ctx.Context = Context; Ctx.ParseContext = ParseContext; for(;;) { block_start = std::search(current, end, begin_wpp, begin_wpp + begin_wpp_size); if (block_start == end) break; current = block_start; block_end = std::search(block_start, end, end_wpp, end_wpp + end_wpp_size); if (block_end == end) break; Flood("Block Found\n"); // determine block type // // begin_wpp enum // begin_wpp config // begin_wpp func // begin_wpp define LPCSTR block_type = block_start + begin_wpp_size + 1; Flood("block_type = %c%c%c%c\n", block_type[0],block_type[1],block_type[2],block_type[3]); if (memcmp(block_type, "enum", 4) == 0) { // do enum block // DoEnum( block_type + 4, block_end, &Ctx ); } else if (memcmp(block_type, "config", 6) == 0) { // do config block // ScanForFunctionCallsEx(block_type + 6, block_end, Callback, Context, ParseContext, IGNORE_POUND_COMMENT); } else if (memcmp(block_type, "func", 4) == 0) { LPCSTR func_start, func_end; current = block_type + 6; for(;;) { func_start = std::search(current, block_end, define_, define_ + define_size); if (func_start == block_end) break; func_start += define_size; while (isspace(*func_start)) { if(++func_start == block_end) goto no_func; } func_end = func_start; while (!isspace(*func_end)) { if(*func_end == '(') break; if(++func_end == block_end) goto no_func; } if(*func_end != '(') { Ctx.buf.assign(func_start, func_end); Ctx.buf.append("(MSGARGS)"); } else { func_end = std::find(func_start, block_end, ')'); if (func_end == block_end) break; ++func_end; // include ")" Ctx.buf.assign(func_start, func_end); } Flood("Func %s\n", Ctx.buf.c_str()); ScanForFunctionCallsEx( Ctx.buf.begin(), Ctx.buf.end(), Callback, Context, ParseContext, NO_SEMICOLON); current = func_end; } no_func:; } else if (memcmp(block_type, "define", 6) == 0) { // do define block } else { ReportError("Unknown block"); } current = block_end + end_wpp_size; } if (current == begin) { // file without marking, let's do default processing Unusual("Reverting back to plain scan\n"); ScanForFunctionCalls(begin, end, Callback, Context, ParseContext); } return ERROR_SUCCESS; } DWORD EzParse( IN LPCSTR filename, IN EZPARSE_CALLBACK Callback, IN PVOID Context) { // return EzParseEx(filename, SmartScan, Callback, Context); return EzParseEx(filename, ScanForFunctionCalls, Callback, Context, IGNORE_POUND_COMMENT); } DWORD EzParseWithOptions( IN LPCSTR filename, IN EZPARSE_CALLBACK Callback, IN PVOID Context, IN DWORD Options) { return EzParseEx(filename, ScanForFunctionCalls, Callback, Context, Options); } DWORD EzParseEx( IN LPCSTR filename, IN PROCESSFILE_CALLBACK ProcessData, IN EZPARSE_CALLBACK Callback, IN PVOID Context, IN DWORD Options ) { DWORD Status = ERROR_SUCCESS; HANDLE mapping; HANDLE file = CreateFileA(filename, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, 0); if (file == INVALID_HANDLE_VALUE) { Status = GetLastError(); ReportError("Cannot open file %s, error %u\n", filename, Status ); return Status; } DWORD size = GetFileSize(file, 0); mapping = CreateFileMapping(file,0,PAGE_READONLY,0,0, 0); if (!mapping) { Status = GetLastError(); ReportError("Cannot create mapping, error %u\n", Status ); CloseHandle(file); return Status; } PCHAR buf = (PCHAR)MapViewOfFileEx(mapping, FILE_MAP_READ,0,0,0,0); if (buf) { EZPARSE_CONTEXT ParseContext; ZeroMemory(&ParseContext, sizeof(ParseContext) ); ParseContext.start = buf; ParseContext.filename = filename; ParseContext.scannedLineCount = 1; ParseContext.lastScanned = buf; ParseContext.previousContext = EzParseCurrentContext; ParseContext.Options = Options; EzParseCurrentContext = &ParseContext; Status = (*ProcessData)(buf, buf + size, Callback, Context, &ParseContext); EzParseCurrentContext = ParseContext.previousContext; UnmapViewOfFile( buf ); } else { Status = GetLastError(); ReportError("MapViewOfFileEx failed, error %u\n", Status ); } CloseHandle(mapping); CloseHandle(file); return Status; } DWORD EzParseResourceEx( IN LPCSTR ResName, IN PROCESSFILE_CALLBACK ProcessData, IN EZPARSE_CALLBACK Callback, IN PVOID Context) { DWORD Status = ERROR_SUCCESS; HRSRC hRsrc; hRsrc = FindResource( NULL, //this Module ResName, RT_RCDATA); if (hRsrc == NULL) { Status = GetLastError(); ReportError("Cannot open resource %s, error %u\n", ResName, Status ); return Status; } HGLOBAL hGlobal = LoadResource(NULL, hRsrc); if (!hGlobal) { Status = GetLastError(); ReportError("LockResource failed, error %u\n", Status ); return Status; } DWORD size = SizeofResource(NULL, hRsrc); PCHAR buf = (PCHAR)LockResource(hGlobal); if (buf) { EZPARSE_CONTEXT ParseContext; ZeroMemory(&ParseContext, sizeof(ParseContext) ); ParseContext.start = buf; ParseContext.filename = ResName; ParseContext.scannedLineCount = 1; ParseContext.lastScanned = buf; ParseContext.previousContext = EzParseCurrentContext; EzParseCurrentContext = &ParseContext; Status = (*ProcessData)(buf, buf + size, Callback, Context, &ParseContext); EzParseCurrentContext = ParseContext.previousContext; } else { Status = GetLastError(); ReportError("LockResource failed, error %u\n", Status ); } // According to MSDN. There is no need to call Unlock/Free Resource return Status; }