#include #include #include #include #include "extract.h" #include "tags.h" #define SPACE(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') /* Output routine error messages */ char errOut[] = "%s: Error writing to file.\n"; /* Standard templated error messages */ static char errmsg[] = "%s (%u): %s\n"; /* File private functions */ static WORD CommonGetBlock(NPSourceFile sf, PSTR p); /* * @doc EXTRACT * * @api void | OutputTag | Print a tag name to the output file. * * @parm NPSourceFile | sf | Specifies the source file buffer block. * * @parm WORD | wBlock | Specifies the current outerlevel block type. * * @parm WORD | wTag | Gives the index of the tag to print. * * @comm Prints the innerlevel block tag specified by

, as * determined from the global tag array. The output tag printed is * affected by the current outerlevel block type, so that different * outerlevel blocks will generate different output tags for the same * input tag. * * The tag text is followed by a tab character. No output will occur if * the global fNoOutput flag is True. * */ void OutputTag(NPSourceFile sf, WORD wBlock, WORD wTag) { if (fNoOutput) return; /* Output text, and if error occurs, exit() for now. HACK! */ putc(TAG, fpOutput); assert(wBlock < NUM_LEVELS); /* Make sure there's a valid output tag to print */ assert(DocTags[wBlock][wTag] != NULL); if (fputs(DocTags[wBlock][wTag], fpOutput)) { fprintf(stderr, errOut, sf->fileEntry->filename); exit(4); } putc('\t', fpOutput); } /* * @doc EXTRACT * @api void | OutputTagText | Print a tag to the output file, where * the tag is specified by an immediate string. * * @parm NPSourceFile | sf | Specifies the source file buffer block. * @parm PSTR | szTag | Specifies the tag to output. * * @comm Prints tag

to the output file. The tag text is * followed by a tab character.

should not include the tag * prefix character (ie the '@') sign, as this is printed automatically. * No output will occur if the global flag fNoOutput is set. * */ void OutputTagText(NPSourceFile sf, PSTR szTag) { if (fNoOutput) return; /* Output text, and if error occurs, exit() for now. HACK! */ putc(TAG, fpOutput); if (fputs(szTag, fpOutput)) { fprintf(stderr, errOut, sf->fileEntry->filename); exit(4); } putc('\t', fpOutput); } /* * @doc EXTRACT * @api void | OutputRegion | Print the text between the point and * the mark, inclusive. * * @parm NPSourceFile | sf | Specifies the source file buffer block, * containing the output buffer, point, and mark. * @parm char | chPost | Specifies character with which to output * after outputing the text region. Usually a newline. This character * is ignored if NULL. * * @comm Prints the region given by the pt and mark fields of

. * The text printed is inclusive from the point to the mark. The *

character is appended to the output if

is * non-NULL (useful for printing newlines or tabs). * * If a write error occurs, an error message is printed to stderr and * the program exited. * * No output occurs if the global fNoOutput flag is TRUE. * */ void OutputRegion(NPSourceFile sf, char chPost) { char c; if (fNoOutput) return; /* Save char following mark, replace with NULL for printing */ if (*sf->mark) { c = *(sf->mark + 1); *(sf->mark + 1) = '\0'; } if (fputs(sf->pt, fpOutput)) { TextOutputError: fprintf(stderr, errOut, sf->fileEntry->filename); exit(4); } /* Send newline if one was asked for */ if (chPost) if (EOF == putc(chPost, fpOutput)) goto TextOutputError; /* Restored NULLed over character */ if (*sf->mark) *(sf->mark + 1) = c; } /* * @doc EXTRACT * @api void | OutputText | Outputs an arbitrary text string to the * output file. * * @parm NPSourceFile | sf | Specifies the source file buffer block. * @parm PSTR | szText | Specifies the text string to print. * * @comm Prints

to the output file. If a write error * occurs, an error message is printed and the program exited. If the * global fNoOutput flag is set, no output occurs. No newlines or other * formatting characters are appended to the output. * */ void OutputText(NPSourceFile sf, PSTR szText) { if (fNoOutput) return; if (fputs(szText, fpOutput)) { TextOutputError: fprintf(stderr, errOut, sf->fileEntry->filename); exit(4); } } /* * @doc EXTRACT * @api void | CopyRegion | Copies the current region from point to * mark inclusive into a null terminated buffer. * @parm NPSourceFile | sf | Specifies the source file buffer block. * @parm PSTR | buf | Pointer to buffer into which region will be * placed. * @parm WORD | wLen | Length in bytes of buffer

. * * @comm Copies the region from point to mark inclusive into the *

. Up to

- 1 characters will be copied, and

is * guaranteed to be NULL terminated. * */ void CopyRegion(NPSourceFile sf, PSTR buf, WORD wLen) { PSTR p; PSTR end; /* Fixup end to smaller of length of buffer, or region to copy */ end = sf->pt + (int) min(wLen, ((int) (sf->mark - sf->pt))); for (p = sf->pt; *p && p < end; *buf++ = *p++); *buf = '\0'; } /* * @doc EXTRACT * @api BOOL | FindNextTag | Moves the point forward until it points * to the next tag in a comment block, and moves the mark to the end of * the tag word. * * @parm NPSourceFile | sf | Specifies the source file buffer block. * * @rdesc Returns TRUE if a tag was found, or FALSE if no tag was found * in the comment block. * * @comm Starting from the current point, moves the point forward to * the next tag in the block. The mark is moved to the end of the tag * that is found. If no next tag exists in the buffer, FALSE is * returned and the mark and point are undefined. * * Note that multiple calls to without intervening calls * to move the point will cause the same tag to be repeatadly * found, as the search for tags begins at the point. * */ BOOL FindNextTag(NPSourceFile sf) { PSTR p; /* move forward until finding next tag, put point there */ p = sf->pt; BogusNextTag: for (; *p && *p != TAG; p++); /* Make sure that this is a tag by testing for a \n before the TAG char */ if (p > sf->lpbuf && *(p-1) != '\n') { p++; goto BogusNextTag; } if (!*p) return FALSE; // end of comment block! p++; if (!(*p && !SPACE(*p))) { sf->mark = p; return FALSE; } /* save beginning of tag */ sf->pt = p - 1; /* now move forward until finding next space, set mark there */ for (; *p && !SPACE(*p); p++); sf->mark = p; return TRUE; } /* * @doc EXTRACT * @api WORD | GetFirstBlock | Moves the point and mark to surround * the first block of text following a tag that has been located with * . * * @parm NPSourceFile | sf | Identifies the source file buffer * block. * * @rdesc If the call succeeds, the point is set to the start of the * text block that immediately follows the tag. The mark is set to the * end of this block, and either RET_ENDTAG or RET_ENDBLOCK is returned, * depending on if there are no more blocks in the tag or if there is a block * following respectively. * * If the call fails, the point is set to the start of the next tag or * the end of the comment buffer if no more tags exist, and * RET_EMPTYBLOCK is returned. * * In any case, if this function is followed by a call to * , no problems will result. * * @comm This call expects the point to be pointing the beginning of * the tag upon entry (as setup by ). Error conditions * should be checked upon exit from this function. * */ WORD GetFirstBlock(NPSourceFile sf) { PSTR p; p = sf->pt; /* Assumes that I'm on beginning of tag */ assert(*p == TAG); /* Move forward to first non-whitespace, to skip over tag */ for (; *p && !SPACE(*p); p++); // skip word for (; *p && SPACE(*p); p++); // skip whitespace /* Set point to this location, the beginning of the text */ sf->pt = p; return CommonGetBlock(sf, p); } /* * @doc EXTRACT * @api WORD | GetNextBlock | Moves the point and mark to surround * the next block of text of a particular tag. * * @parm NPSourceFile | sf | Identifies the source file buffer * information. * * @rdesc If the call succeeds, the point is set to the start of the * text block that follows the initial mark. The mark upon return is * set the end of the next text block. Either RET_ENDTAG or * RET_ENDBLOCK is returned. * * If the call fails due to a non existent block, or encountering the * end of the comment buffer, RET_ENDCOMMENT is returned and the point * is set to the start of the next tag or the end of the comment * buffer. * * @comm This procedure, in combination with , allows * the tag reader to step through the text fields associated with a tag. * Contiguous calls to are possible, which will * move the region forward to surround each field. If the tag's text * fields end prematurely, RET_EMPTYBLOCK will be returned as an error * flag. * * Calls to may always be followed by a call to * . * */ WORD GetNextBlock(NPSourceFile sf) { PSTR p; WORD ret; /* Entry: mark is at end of previous block of text. Move forward * to find the start of the next block (the one we want). */ p = sf->mark; /* If I'm on a block char, then this is an empty block being exited, * So we want to not skip whitespace */ if (*p != BLOCK) p++; /* Skip whitespace, till `|' char found */ for (; *p && SPACE(*p); p++); /* This should be the start of next block. If not, then puke */ if (*p != BLOCK) { sf->pt = sf->mark = p; // reset mark and point for FindNextTag. return RET_EMPTYBLOCK; } /* Don't bother with END_COMMENT conditions (ie NULL), as CommonGetBlock * will return RET_EMPTYBLOCK for this case. The next FindNextTag() * will then fail, causing a general comment buffer failure to result! */ #if 0 if (!*p) { sf->pt = p; return RET_ENDCOMMENT; } #endif /* Skip more whitespace, to start of actual text, set point there */ /* (if this under EOF, no pt advance is done */ if (*p) // skip the '|' char if there is one. p++; for (; *p && SPACE(*p); p++); sf->pt = p; // point at beginning of text return CommonGetBlock(sf, p); } /* * @doc EXTRACT * @api WORD | CommonGetBlock | Common block searcher routine for use * by and . * * @parm NPSourceFile | sf | Blah. * @parm PSTR | p | Point to start searching for the beginning of a * text block from. * * @rdesc Returns RET_ENDBLOCK when there are text blocks following * this tag, RET_ENDTAG when no more text blocks follow for this tag, or * RET_EMPTYBLOCK when this block has no text. Current region (point to * mark inclusive) is set to the selected block. * * @comm Performs magic. This does the real work for * and . * */ static WORD CommonGetBlock(NPSourceFile sf, PSTR p) { PSTR porig; PSTR psave; WORD ret; /* Entry: Save the initial p, as this is assumed to be the * start of the current block. */ porig = p; /* Scan forward until end of this block, either @ or | or EOF */ GetBlockScan: for (; *p && !(*p == TAG || *p == BLOCK); p++); /* Make sure there isn't an escaped char kicking off the scan */ if (*p == BLOCK) if (p > sf->lpbuf && *(p-1) == '\\') { p++; goto GetBlockScan; } /* Check the same thing for at characters */ if (*p == TAG) /* Tag must be on start of new line, so if not there, kick it out */ if (p > sf->lpbuf && *(p-1) != '\n') { p++; goto GetBlockScan; } /* Encountered another tag, or another block. For both, backup to * last non-white character, set mark there. Return appropriate * condition codes. */ ret = RET_ENDTAG; // the default return value. if (*p == BLOCK) ret = RET_ENDBLOCK; // if encountered another block following /* Now back up whitespaces until last non-whitespace is found. * If we end up backing up over the original setting of p on entry, * then this is an empty block, and return error condition. */ psave = p; // hang onto this location, if EMPTYBLOCK occurs. for (p--; *p && SPACE(*p) && p >= porig; p--); if (p < porig) { // emptyblock, so pt = end of prev block. sf->mark = sf->pt = psave; // point to next tag return RET_EMPTYBLOCK; } else { // normal backed up to end of block, set mark there. sf->mark = p; return ret; } } /* * @doc EXTRACT * @api void | FixLineCounts | Updates the line counts of the current * point and mark for error reporting purposes. * * @parm NPSourceFile | sf | Blah. * * @parm PSTR | pt | Point to return the line number of. This must be * a valid point within the comment buffer of

. * * @rdesc Returns the line number of point

within the comment * buffer of

. Newlines are counted to determine the line offset * within the buffer, and the resulting number of newlines added to the * initial line number of the first line of the comment buffer. This * value is returned. It is thus important for other tag reader * routines not to alter the original comment buffer, as the line number * returned from this routine would then be invalid. * */ WORD FixLineCounts(NPSourceFile sf, PSTR pt) { PSTR c; WORD w; /* Update the line counts for the point and mark by counting * newlines in the buffer */ w = 0; for (c = sf->lpbuf; c <= pt; c++) { if (*c == '\n') w++; if (c == pt) return (sf->wLineBuf + w); } /* something bogus happened */ return 0; } /* * @doc EXTRACT * @api void | PrintError | Prints an error message in a standard * format, and sets the exit condition flag for the source file block. * * @parm NPSourceFile | sf | Specifies the source file buffer block. * @parm PSTR | szMessage | Error message to print. * @parm BOOL | fExit | Indicates whether this is a fatal exit. If * TRUE, the program will exit when the current file has been completely * parsed. * * @comm Prints the source file filename and the line number of the * current point to standard error, followed by

. * */ void PrintError(NPSourceFile sf, PSTR szMessage, BOOL fExit) { WORD w; w = FixLineCounts(sf, sf->pt); fprintf(stderr, errmsg, sf->fileEntry->filename, w, szMessage); if (fExit) sf->fExitAfter = TRUE; } /* * @doc EXTRACT * @api WORD | ProcessWordList | Process a whitespace or comma * separated list of words following a tag, formatting * them as a space separated list of words. * * @parm NPSourceFile | sf | Blah. * @parm PSTR * | bufPt | Pointer to a buffer pointer, which should * initially contain a near buffer obtained with , where * the formatted word list will be placed. The buffer pointed to will * be automatically expanded as necessary. * * @parm BOOL | fCap | Specifies whether to convert to uppercase * the processed list of words. * * @rdesc Returns either RET_ENDBLOCK or RET_ENDTAG, depending on * whether there are following blocks within the tag's text or not, * respectively. (What a horrible sentence). The point and mark will * be at the end of the text block upon return. If there is no text * block following the tag, then RET_EMPTYBLOCK is returned, and the * point and mark point to the next tag in the comment block, or the * end of the comment block. * */ #define SEPSPACE(c) ((c)==' ' || (c)=='\n' || (c)=='\t' ||(c)==','||(c)==';') WORD ProcessWordList(NPSourceFile sf, PSTR *bufPt, BOOL fCap) { WORD ret; // hold return code PSTR pNew; // runner on copy buffer PSTR pOldMark; // keep the old mark around PSTR p; // runner on comment block ret = RET_ENDTAG; /* Get the text of the first block, ie the doclevel specification */ ret = GetFirstBlock(sf); if (ret == RET_EMPTYBLOCK) return ret; /* Warn if there's extra text blocks on DOC tag, ie ret == RET_ENDBLOCK */ /* Grow the memory copy buffer if needed */ if (NearSize(*bufPt) < (int) (sf->mark - sf->pt) + 5) *bufPt = NearRealloc(*bufPt, (WORD) (sf->mark - sf->pt) + 10); /* Save away copy buffer status */ pNew = *bufPt; pOldMark = sf->mark + 1; // save mark plus one p = sf->pt; while (1) { /* skip whitespace before doc level word */ for (; p < pOldMark && SEPSPACE(*p); p++); if (p >= pOldMark) { dprintf("ProcessWordList: Breaking loop after space skip\n"); break; } /* Save this location, beginning of word, and move to end of word */ for (sf->pt = p; p < pOldMark && !SEPSPACE(*p); p++) if (fCap) *pNew++ = (char) toupper(*p); else *pNew++ = *p; /* Put a space between the words, and then null terminate in * case this is the last word in a list */ *pNew++ = ' '; *pNew = '\0'; /* Check if we're at end of buffer */ if (p >= pOldMark) { dprintf("ProcessWordList: Breaking loop after word copy.\n"); break; // get out of loop } } // while loop /* Restore point and mark to the end of @doc text block */ sf->pt = sf->mark = pOldMark - 1; return ret; } /* * @doc EXTRACT * @api void | OutputFileHeader | Prints an output file header using * compiled in constants and system information. * * @parm FILE * | fpOut | File pointer to which to write header. * * @comm Currently, only the program name, version, and the current * time (in UNIX ) format. The file header is surrounded by * header begin and end tags. * */ #include #include "version.h" void OutputFileHeader(FILE *fpOut) { time_t curtime; fprintf(fpOut, "@%s\t\n", T2TEXT_BEGINHEADER); fprintf(fpOut, "@%s\t%s\n", T2TEXT_EXTRACTID, VERSIONNAME); fprintf(fpOut, "@%s\t%d.%d.%d\n", T2TEXT_EXTRACTVER, rmj, rmm, rup); time(&curtime); fprintf(fpOut, "@%s\t%s", T2TEXT_EXTRACTDATE, asctime(localtime(&curtime))); fprintf(fpOut, "@%s\t\n", T2TEXT_ENDHEADER); }