736 lines
33 KiB
C
736 lines
33 KiB
C
|
/*static char *SCCSID = "@(#)qmatch.c 13.7 90/08/13";*/
|
||
|
|
||
|
|
||
|
#include <stdio.h>
|
||
|
#include <ctype.h>
|
||
|
#include <assert.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
|
||
|
#define ASCLEN 128 /* Number of ascii characters */
|
||
|
#define BUFLEN 256 /* Temporary buffer length */
|
||
|
#define EOS ('\r') /* End of string character */
|
||
|
#define PATMAX 512 /* Maximum parsed pattern length */
|
||
|
|
||
|
#define BEGLINE 0x08 /* Match at beginning of line */
|
||
|
#define DEBUG 0x20 /* Print debugging output */
|
||
|
#define ENDLINE 0x10 /* Match at end of line */
|
||
|
|
||
|
#define T_END 0 /* End of expression */
|
||
|
#define T_STRING 1 /* String to match */
|
||
|
#define T_SINGLE 2 /* Single character to match */
|
||
|
#define T_CLASS 3 /* Class to match */
|
||
|
#define T_ANY 4 /* Match any character */
|
||
|
#define T_STAR 5 /* *-expr */
|
||
|
|
||
|
|
||
|
typedef struct exprnode
|
||
|
{
|
||
|
struct exprnode *ex_next; /* Next node in list */
|
||
|
unsigned char *ex_pattern; /* Pointer to pattern to match */
|
||
|
}
|
||
|
EXPR; /* Expression node */
|
||
|
|
||
|
|
||
|
static int clists = 1; /* One is first available index */
|
||
|
static int toklen[] = /* Table of token lengths */
|
||
|
{
|
||
|
32767, /* T_END: invalid */
|
||
|
32767, /* T_STRING: invalid */
|
||
|
2, /* T_SINGLE */
|
||
|
ASCLEN/8+1, /* T_CLASS */
|
||
|
1, /* T_ANY */
|
||
|
32767 /* T_STAR: invalid */
|
||
|
};
|
||
|
int ( __cdecl *ncmp)(const char *,const char *,size_t);
|
||
|
/* String comparison pointer */
|
||
|
|
||
|
unsigned char *exprparse(unsigned char *p);
|
||
|
|
||
|
extern int casesen; /* Case-sensitivity flag */
|
||
|
extern char *(*find)(); /* Pointer to search function */
|
||
|
extern int flags; /* Flags */
|
||
|
extern int strcnt; /* String count */
|
||
|
extern char transtab[]; /* Translation table */
|
||
|
EXPR *stringlist[ASCLEN];
|
||
|
/* String table */
|
||
|
|
||
|
|
||
|
void addexpr(char *e, int n); /* Add expression */
|
||
|
char *get1stcharset(unsigned char *e, char *bitvec);
|
||
|
extern char *alloc(); /* User-defined heap allocator */
|
||
|
unsigned char *simpleprefix();/* Match simple prefix */
|
||
|
char *strnupr(); /* See QGREPSUB.ASM */
|
||
|
|
||
|
|
||
|
unsigned char *simpleprefix(s,pp)
|
||
|
register unsigned char *s; /* String pointer */
|
||
|
unsigned char **pp; /* Pointer to pattern pointer */
|
||
|
{
|
||
|
register unsigned char *p; /* Simple pattern pointer */
|
||
|
register int c; /* Single character */
|
||
|
|
||
|
p = *pp; /* Initialize */
|
||
|
while(*p != T_END && *p != T_STAR) /* While not at end of pattern */
|
||
|
{
|
||
|
switch(*p++) /* Switch on token type */
|
||
|
{
|
||
|
case T_STRING: /* String to compare */
|
||
|
if((*ncmp)(s,p + 1,*p) != 0) return(NULL);
|
||
|
/* Fail if mismatch found */
|
||
|
s += *p; /* Skip matched portion */
|
||
|
p += *p + 1; /* Skip to next token */
|
||
|
break;
|
||
|
|
||
|
case T_SINGLE: /* Single character */
|
||
|
c = *s++; /* Get character */
|
||
|
if(!casesen) c = toupper(c);
|
||
|
/* Map to upper case if necessary */
|
||
|
if(c != (int)*p++) return(NULL);
|
||
|
/* Fail if mismatch found */
|
||
|
break;
|
||
|
|
||
|
case T_CLASS: /* Class of characters */
|
||
|
if(!isascii(*s) || !(p[*s >> 3] & (1 << (*s & 7)))) return(NULL);
|
||
|
/* Failure if bit not set */
|
||
|
p += ASCLEN/8; /* Skip bit vector */
|
||
|
++s; /* Skip character */
|
||
|
break;
|
||
|
|
||
|
case T_ANY: /* Any character */
|
||
|
if(*s++ == EOS) return(NULL);
|
||
|
/* Match all but end of string */
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
*pp = p; /* Update pointer */
|
||
|
return(s); /* Pattern is prefix of s */
|
||
|
}
|
||
|
|
||
|
|
||
|
int match(s,p)
|
||
|
register unsigned char *s; /* String to match */
|
||
|
unsigned char *p; /* Pattern to match against */
|
||
|
{
|
||
|
register unsigned char *q; /* Temporary pointer */
|
||
|
unsigned char *r; /* Temporary pointer */
|
||
|
register int c; /* Character */
|
||
|
|
||
|
if(*p != T_END && *p != T_STAR && (s = simpleprefix(s,&p)) == NULL)
|
||
|
return(0); /* Failure if prefix mismatch */
|
||
|
if(*p++ == T_END) return(1); /* Match if end of pattern */
|
||
|
q = r = p; /* Point to repeated token */
|
||
|
r += toklen[*q]; /* Skip repeated token */
|
||
|
switch(*q++) /* Switch on token type */
|
||
|
{
|
||
|
case T_ANY: /* Any character */
|
||
|
while(match(s,r) == 0) /* While match not found */
|
||
|
{
|
||
|
if(*s++ == EOS) return(0);/* Match all but end of string */
|
||
|
}
|
||
|
return(1); /* Success */
|
||
|
|
||
|
case T_SINGLE: /* Single character */
|
||
|
while(match(s,r) == 0) /* While match not found */
|
||
|
{
|
||
|
c = *s++; /* Get character */
|
||
|
if(!casesen) c = toupper(c);
|
||
|
/* Map to upper case if necessary */
|
||
|
if((unsigned char) c != *q) return(0);
|
||
|
/* Fail if mismatch found */
|
||
|
}
|
||
|
return(1); /* Success */
|
||
|
|
||
|
case T_CLASS: /* Class of characters */
|
||
|
while(match(s,r) == 0) /* While match not found */
|
||
|
{
|
||
|
if(!isascii(*s) || !(q[*s >> 3] & (1 << (*s & 7)))) return(0);
|
||
|
/* Fail if bit not set */
|
||
|
++s; /* Else skip character */
|
||
|
}
|
||
|
return(1); /* Success */
|
||
|
}
|
||
|
return(0); /* Return failure */
|
||
|
}
|
||
|
|
||
|
|
||
|
int exprmatch(s,p)
|
||
|
char *s; /* String */
|
||
|
char *p; /* Pattern */
|
||
|
{
|
||
|
ncmp = strncmp; /* Assume case-sensitive */
|
||
|
if(!casesen)
|
||
|
{
|
||
|
ncmp = _strnicmp;
|
||
|
} /* Be case-insensitive if flag set */
|
||
|
return(match(s,p)); /* See if pattern matches string */
|
||
|
}
|
||
|
|
||
|
|
||
|
void bitset(bitvec,first,last,bitval)
|
||
|
char *bitvec; /* Bit vector */
|
||
|
int first; /* First character */
|
||
|
int last; /* Last character */
|
||
|
int bitval; /* Bit value (0 or 1) */
|
||
|
{
|
||
|
int bitno; /* Bit number */
|
||
|
|
||
|
bitvec += first >> 3; /* Point at first byte */
|
||
|
bitno = first & 7; /* Calculate first bit number */
|
||
|
while(first <= last) /* Loop to set bits */
|
||
|
{
|
||
|
if(bitno == 0 && first + 8 <= last)
|
||
|
{ /* If we have a whole byte's worth */
|
||
|
*bitvec++ = (char)(bitval? '\xFF': '\0');
|
||
|
/* Set the bits */
|
||
|
first += 8; /* Increment the counter */
|
||
|
continue; /* Next iteration */
|
||
|
}
|
||
|
*bitvec=(char)(*bitvec & (unsigned char)(~(1 << bitno))) | (unsigned char)(bitval << bitno);
|
||
|
/* Set the appropriate bit */
|
||
|
if(++bitno == 8) /* If we wrap into next byte */
|
||
|
{
|
||
|
++bitvec; /* Increment pointer */
|
||
|
bitno = 0; /* Reset bit index */
|
||
|
}
|
||
|
++first; /* Increment bit index */
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
unsigned char *exprparse(p)
|
||
|
register unsigned char *p; /* Raw pattern */
|
||
|
{
|
||
|
register char *cp; /* Char pointer */
|
||
|
unsigned char *cp2; /* Char pointer */
|
||
|
int i; /* Counter/index */
|
||
|
int j; /* Counter/index */
|
||
|
int m; /* Counter/index */
|
||
|
int n; /* Counter/index */
|
||
|
int bitval; /* Bit value */
|
||
|
char buffer[PATMAX]; /* Temporary buffer */
|
||
|
|
||
|
if(!casesen) strnupr(p,strlen(p)); /* Force pattern to upper case */
|
||
|
cp = buffer; /* Initialize pointer */
|
||
|
if(*p == '^') *cp++ = *p++; /* Copy leading caret if any */
|
||
|
while(*p != '\0') /* While not end of pattern */
|
||
|
{
|
||
|
i = -2; /* Initialize */
|
||
|
for(n = 0;;) /* Loop to delimit ordinary string */
|
||
|
{
|
||
|
n += strcspn(p + n,".\\[*");/* Look for a special character */
|
||
|
if(p[n] != '\\') break; /* Break if not backslash */
|
||
|
i = n; /* Remember where backslash is */
|
||
|
if(p[++n] == '\0') return(NULL);
|
||
|
/* Cannot be at very end */
|
||
|
++n; /* Skip escaped character */
|
||
|
}
|
||
|
if(p[n] == '*') /* If we found a *-expr. */
|
||
|
{
|
||
|
if(n-- == 0) return(NULL); /* Illegal first character */
|
||
|
if(i == n - 1) n = i; /* Escaped single-char. *-expr. */
|
||
|
}
|
||
|
if(n > 0) /* If we have string or single */
|
||
|
{
|
||
|
if(n == 1 || (n == 2 && *p == '\\'))
|
||
|
{ /* If single character */
|
||
|
*cp++ = T_SINGLE; /* Set type */
|
||
|
if(*p == '\\') ++p; /* Skip escape if any */
|
||
|
*cp++ = *p++; /* Copy single character */
|
||
|
}
|
||
|
else /* Else we have a string */
|
||
|
{
|
||
|
*cp++ = T_STRING; /* Set type */
|
||
|
cp2 = cp++; /* Save pointer to length byte */
|
||
|
while(n-- > 0) /* While bytes to copy remain */
|
||
|
{
|
||
|
if(*p == '\\') /* If escape found */
|
||
|
{
|
||
|
++p; /* Skip escape */
|
||
|
--n; /* Adjust length */
|
||
|
}
|
||
|
*cp++ = *p++; /* Copy character */
|
||
|
}
|
||
|
*cp2 = (char)((cp - cp2) - 1);
|
||
|
/* Set string length */
|
||
|
}
|
||
|
}
|
||
|
if(*p == '\0') break; /* Break if end of pattern */
|
||
|
if(*p == '.') /* If matching any */
|
||
|
{
|
||
|
if(*++p == '*') /* If star follows any */
|
||
|
{
|
||
|
++p; /* Skip star, too */
|
||
|
*cp++ = T_STAR; /* Insert prefix ahead of token */
|
||
|
}
|
||
|
*cp++ = T_ANY; /* Match any character */
|
||
|
continue; /* Next iteration */
|
||
|
}
|
||
|
if(*p == '[') /* If character class */
|
||
|
{
|
||
|
if(*++p == '\0') return(NULL);
|
||
|
/* Skip '[' */
|
||
|
*cp++ = T_CLASS; /* Set type */
|
||
|
memset(cp,'\0',ASCLEN/8); /* Clear the vector */
|
||
|
bitval = 1; /* Assume we're setting bits */
|
||
|
if(*p == '^') /* If inverted class */
|
||
|
{
|
||
|
++p; /* Skip '^' */
|
||
|
memset(cp,'\xFF',ASCLEN/8);
|
||
|
/* Set all bits */
|
||
|
bitset(cp,EOS,EOS,0); /* All except end-of-string */
|
||
|
bitset(cp,'\n','\n',0); /* And linefeed! */
|
||
|
bitval = 0; /* Now we're clearing bits */
|
||
|
}
|
||
|
while(*p != ']') /* Loop to find ']' */
|
||
|
{
|
||
|
if(*p == '\0') return(NULL);
|
||
|
/* Check for malformed string */
|
||
|
if(*p == '\\') /* If escape found */
|
||
|
{
|
||
|
if(*++p == '\0') return(NULL);
|
||
|
/* Skip escape */
|
||
|
}
|
||
|
i = *p++; /* Get first character in range */
|
||
|
if(*p == '-' && p[1] != '\0' && p[1] != ']')
|
||
|
{ /* If range found */
|
||
|
++p; /* Skip hyphen */
|
||
|
if(*p == '\\' && p[1] != '\0') ++p;
|
||
|
/* Skip escape character */
|
||
|
j = *p++; /* Get end of range */
|
||
|
}
|
||
|
else j = i; /* Else just one character */
|
||
|
bitset(cp,i,j,bitval); /* Set bits in vector */
|
||
|
if(!casesen) /* If ignoring case */
|
||
|
{
|
||
|
m = (i < 'A')? 'A': i;
|
||
|
/* m = max(i,'A') */
|
||
|
n = (j > 'Z')? 'Z': j;
|
||
|
/* n = min(j,'Z') */
|
||
|
if(m <= n) bitset(cp,tolower(m),tolower(n),bitval);
|
||
|
/* Whack corresponding lower case */
|
||
|
m = (i < 'a')? 'a': i;
|
||
|
/* m = max(i,'a') */
|
||
|
n = (j > 'z')? 'z': j;
|
||
|
/* n = min(j,'z') */
|
||
|
if(m <= n) bitset(cp,toupper(m),toupper(n),bitval);
|
||
|
/* Whack corresponding upper case */
|
||
|
}
|
||
|
}
|
||
|
if(*++p == '*') /* If repeated class */
|
||
|
{
|
||
|
memmove(cp,cp - 1,ASCLEN/8 + 1);
|
||
|
/* Move vector forward 1 byte */
|
||
|
cp[-1] = T_STAR; /* Insert prefix */
|
||
|
++cp; /* Skip to start of vector */
|
||
|
++p; /* Skip star */
|
||
|
}
|
||
|
cp += ASCLEN/8; /* Skip over vector */
|
||
|
continue; /* Next iteration */
|
||
|
}
|
||
|
*cp++ = T_STAR; /* Repeated single character */
|
||
|
*cp++ = T_SINGLE;
|
||
|
if(*p == '\\') ++p; /* Skip escape if any */
|
||
|
*cp++ = *p++; /* Copy the character */
|
||
|
assert(*p == '*'); /* Validate assumption */
|
||
|
++p; /* Skip the star */
|
||
|
}
|
||
|
*cp++ = T_END; /* Mark end of parsed expression */
|
||
|
cp2 = alloc(cp - buffer); /* Allocate buffer */
|
||
|
memmove(cp2,buffer,(size_t)(cp - buffer)); /* Copy expression to buffer */
|
||
|
return(cp2); /* Return buffer pointer */
|
||
|
}
|
||
|
|
||
|
|
||
|
int istoken(s,n)
|
||
|
unsigned char *s; /* String */
|
||
|
int n; /* Length */
|
||
|
{
|
||
|
if(n >= 2 && s[0] == '\\' && s[1] == '<') return(1);
|
||
|
/* Token if starts with '\<' */
|
||
|
while(n-- > 0) /* Loop to find end of string */
|
||
|
{
|
||
|
if(*s++ == '\\') /* If escape found */
|
||
|
{
|
||
|
if(--n == 0 && *s == '>') return(1);
|
||
|
/* Token if ends with '\>' */
|
||
|
++s; /* Skip escaped character */
|
||
|
}
|
||
|
}
|
||
|
return(0); /* Not a token */
|
||
|
}
|
||
|
|
||
|
|
||
|
int isexpr(s,n)
|
||
|
unsigned char *s; /* String */
|
||
|
int n; /* Length */
|
||
|
{
|
||
|
unsigned char *cp; /* Char pointer */
|
||
|
int status; /* Return status */
|
||
|
char buffer[BUFLEN]; /* Temporary buffer */
|
||
|
|
||
|
if(istoken(s,n)) return(1); /* Tokens are exprs */
|
||
|
memmove(buffer,s,n); /* Copy string to buffer */
|
||
|
buffer[n] = '\0'; /* Null-terminate string */
|
||
|
if((s = exprparse(buffer)) == NULL) return(0);
|
||
|
/* Not an expression if parse fails */
|
||
|
status = 1; /* Assume we have an expression */
|
||
|
if(*s != '^' && *s != T_END) /* If no caret and not empty */
|
||
|
{
|
||
|
status = 0; /* Assume not an expression */
|
||
|
cp = s; /* Initialize */
|
||
|
do /* Loop to find special tokens */
|
||
|
{
|
||
|
switch(*cp++) /* Switch on token type */
|
||
|
{
|
||
|
case T_STAR: /* Repeat prefix */
|
||
|
case T_CLASS: /* Character class */
|
||
|
case T_ANY: /* Any character */
|
||
|
++status; /* This is an expression */
|
||
|
break;
|
||
|
|
||
|
case T_SINGLE: /* Single character */
|
||
|
++cp; /* Skip character */
|
||
|
break;
|
||
|
|
||
|
case T_STRING: /* String */
|
||
|
cp += *cp + 1; /* Skip string */
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
while(!status && *cp != T_END); /* Do while not at end of expression */
|
||
|
}
|
||
|
free(s); /* Free expression */
|
||
|
return(status); /* Return status */
|
||
|
}
|
||
|
|
||
|
|
||
|
void exprprint(p,fo)
|
||
|
unsigned char *p; /* Pointer to expression */
|
||
|
FILE *fo; /* File pointer */
|
||
|
{
|
||
|
int bit; /* Bit value */
|
||
|
int count; /* Count of characters in string */
|
||
|
int first; /* First character in range */
|
||
|
int last; /* Last character in range */
|
||
|
int star; /* Repeat prefix flag */
|
||
|
|
||
|
if(*p == '^') fputc(*p++,fo); /* Print leading caret */
|
||
|
while(*p != T_END) /* While not at end of expression */
|
||
|
{
|
||
|
star = 0; /* Assume no prefix */
|
||
|
if(*p == T_STAR) /* If repeat prefix found */
|
||
|
{
|
||
|
++star; /* Set flag */
|
||
|
++p; /* Skip prefix */
|
||
|
}
|
||
|
switch(*p++) /* Switch on token type */
|
||
|
{
|
||
|
case T_END: /* End of expression */
|
||
|
case T_STAR: /* Repeat prefix */
|
||
|
fprintf(stderr,"Internal error: exprprint\n");
|
||
|
/* Not valid */
|
||
|
exit(2); /* Die abnormal death */
|
||
|
|
||
|
case T_STRING: /* String */
|
||
|
count = *p++; /* Get string length */
|
||
|
goto common; /* Forgive me, Djikstra! */
|
||
|
|
||
|
case T_SINGLE: /* Single character */
|
||
|
count = 1; /* Only one character */
|
||
|
common:
|
||
|
while(count-- > 0) /* While bytes remain */
|
||
|
{
|
||
|
if(*p == EOS) /* If end-of-string found */
|
||
|
{
|
||
|
++p; /* Skip character */
|
||
|
fputc('$',fo); /* Emit special marker */
|
||
|
continue; /* Next iteration */
|
||
|
}
|
||
|
if(strchr("*.[\\$",*p) != NULL) fputc('\\',fo);
|
||
|
/* Emit escape if needed */
|
||
|
fputc(*p++,fo); /* Emit the character */
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case T_ANY: /* Match any */
|
||
|
fputc('.',fo); /* Emit dot */
|
||
|
break;
|
||
|
|
||
|
case T_CLASS:
|
||
|
first = -1; /* Initialize */
|
||
|
fputc('[',fo); /* Open braces */
|
||
|
for(count = ' '; count <= '~'; ++count)
|
||
|
{ /* Loop through printable characters */
|
||
|
if((bit = p[count >> 3] & (1 << (count & 7))) != 0)
|
||
|
{ /* If bit is set */
|
||
|
if(first == -1) first = count;
|
||
|
/* Set first bit */
|
||
|
last = count; /* Set last bit */
|
||
|
}
|
||
|
if((!bit || count == '~') && first != -1)
|
||
|
{ /* If range to print */
|
||
|
if(strchr("\\]-",first) != NULL) fputc('\\',fo);
|
||
|
/* Emit escape if needed */
|
||
|
fputc(first,fo); /* Print first character in range */
|
||
|
if(last != first) /* If we have a range */
|
||
|
{
|
||
|
if(last > first + 1) fputc('-',fo);
|
||
|
/* Emit hyphen if needed */
|
||
|
if(strchr("\\]-",last) != NULL) fputc('\\',fo);
|
||
|
/* Emit escape if needed */
|
||
|
fputc(last,fo);
|
||
|
/* Print last character in range */
|
||
|
}
|
||
|
first = -1; /* Range printed */
|
||
|
}
|
||
|
}
|
||
|
fputc(']',fo); /* Close braces */
|
||
|
p += ASCLEN/8; /* Skip bit vector */
|
||
|
break;
|
||
|
}
|
||
|
if(star) fputc('*',fo); /* Print star if needed */
|
||
|
}
|
||
|
fputc('\n',fo); /* Print newline */
|
||
|
}
|
||
|
|
||
|
|
||
|
char *get1stcharset(e,bitvec)
|
||
|
unsigned char *e; /* Pointer to expression */
|
||
|
char *bitvec; /* Pointer to bit vector */
|
||
|
{
|
||
|
unsigned char *cp; /* Char pointer */
|
||
|
int i; /* Index/counter */
|
||
|
int star; /* Repeat prefix flag */
|
||
|
|
||
|
if(*e == '^') ++e; /* Skip leading caret if any */
|
||
|
memset(bitvec,'\0',ASCLEN/8); /* Clear bit vector */
|
||
|
cp = e; /* Initialize */
|
||
|
while(*e != T_END) /* Loop to process leading *-expr.s */
|
||
|
{
|
||
|
star = 0; /* Assume no repeat prefix */
|
||
|
if(*e == T_STAR) /* If repeat prefix found */
|
||
|
{
|
||
|
++star; /* Set flag */
|
||
|
++e; /* Skip repeat prefix */
|
||
|
}
|
||
|
switch(*e++) /* Switch on token type */
|
||
|
{
|
||
|
case T_END: /* End of expression */
|
||
|
case T_STAR: /* Repeat prefix */
|
||
|
fprintf(stderr,"Internal error: get1stcharset\n");
|
||
|
/* Not valid */
|
||
|
exit(2); /* Die abnormal death */
|
||
|
|
||
|
case T_STRING: /* String */
|
||
|
if(star || *e++ == '\0') /* If repeat prefix or zero count */
|
||
|
{
|
||
|
fprintf(stderr,"Internal error: get1stcharset\n");
|
||
|
/* Not valid */
|
||
|
exit(2); /* Die abnormal death */
|
||
|
}
|
||
|
/* Drop through */
|
||
|
|
||
|
case T_SINGLE: /* Single character */
|
||
|
bitset(bitvec,*e,*e,1); /* Set the bit */
|
||
|
++e; /* Skip the character */
|
||
|
break;
|
||
|
|
||
|
case T_ANY: /* Match any */
|
||
|
memset(bitvec,'\xFF',ASCLEN/8);
|
||
|
/* Set all the bits */
|
||
|
bitset(bitvec,EOS,EOS,0); /* Except end-of-string */
|
||
|
bitset(bitvec,'\n','\n',0);
|
||
|
/* And linefeed! */
|
||
|
break;
|
||
|
|
||
|
case T_CLASS:
|
||
|
for(i = 0; i < ASCLEN/8; ++i) bitvec[i] |= *e++;
|
||
|
/* Or in all the bits */
|
||
|
break;
|
||
|
}
|
||
|
if(!star) break; /* Break if not repeated */
|
||
|
cp = e; /* Update pointer */
|
||
|
}
|
||
|
return(cp); /* Point to 1st non-repeated expr. */
|
||
|
}
|
||
|
|
||
|
|
||
|
char *findall(buffer,bufend)
|
||
|
char *buffer; /* Buffer in which to search */
|
||
|
char *bufend; /* End of buffer */
|
||
|
{
|
||
|
return(buffer < bufend? buffer: NULL);
|
||
|
/* Fail only on empty buffer */
|
||
|
}
|
||
|
|
||
|
|
||
|
void addtoken(e,n)
|
||
|
char *e; /* Raw token expression */
|
||
|
int n; /* Length of expression */
|
||
|
{
|
||
|
static char achpref[] = "^";/* Prefix */
|
||
|
static char achprefsuf[] = "[^A-Za-z0-9_]";
|
||
|
/* Prefix/suffix */
|
||
|
static char achsuf[] = "$"; /* Suffix */
|
||
|
char buffer[BUFLEN]; /* Temporary buffer */
|
||
|
|
||
|
assert(n >= 2); /* Must have at least two characters */
|
||
|
if(e[0] == '\\' && e[1] == '<') /* If begin token */
|
||
|
{
|
||
|
if(!(flags & BEGLINE)) /* If not matching at beginning only */
|
||
|
{
|
||
|
memcpy(buffer,achprefsuf,sizeof achprefsuf - 1);
|
||
|
/* Copy first prefix */
|
||
|
memcpy(buffer + sizeof achprefsuf - 1,e + 2,n - 2);
|
||
|
/* Attach expression */
|
||
|
addexpr(buffer,n + sizeof achprefsuf - 3);
|
||
|
/* Add expression */
|
||
|
}
|
||
|
memcpy(buffer,achpref,sizeof achpref - 1);
|
||
|
/* Copy second prefix */
|
||
|
memcpy(buffer + sizeof achpref - 1,e + 2,n - 2);
|
||
|
/* Attach expression */
|
||
|
addexpr(buffer,n + sizeof achpref - 3);
|
||
|
/* Add expression */
|
||
|
return; /* Done */
|
||
|
}
|
||
|
assert(e[n-2] == '\\' && e[n - 1] == '>');
|
||
|
/* Must be end token */
|
||
|
if(!(flags & ENDLINE)) /* If not matching at end only */
|
||
|
{
|
||
|
memcpy(buffer,e,n - 2); /* Copy expression */
|
||
|
memcpy(buffer + n - 2,achprefsuf,sizeof achprefsuf - 1);
|
||
|
/* Attach first suffix */
|
||
|
addexpr(buffer,n + sizeof achprefsuf - 3);
|
||
|
/* Add expression */
|
||
|
}
|
||
|
memcpy(buffer,e,n - 2); /* Copy expression */
|
||
|
memcpy(buffer + n - 2,achsuf,sizeof achsuf - 1);
|
||
|
/* Attach second suffix */
|
||
|
addexpr(buffer,n + sizeof achsuf - 3);
|
||
|
/* Add expression */
|
||
|
}
|
||
|
|
||
|
|
||
|
void addexpr(e,n)
|
||
|
char *e; /* Expression to add */
|
||
|
int n; /* Length of expression */
|
||
|
{
|
||
|
EXPR *expr; /* Expression node pointer */
|
||
|
int i; /* Index */
|
||
|
int j; /* Index */
|
||
|
int locflags; /* Local copy of flags */
|
||
|
char bitvec[ASCLEN/8];
|
||
|
/* First char. bit vector */
|
||
|
char buffer[BUFLEN]; /* Temporary buffer */
|
||
|
|
||
|
if(find == findall) return; /* Return if matching everything */
|
||
|
if(istoken(e,n)) /* If expr is token */
|
||
|
{
|
||
|
addtoken(e,n); /* Convert and add tokens */
|
||
|
return; /* Done */
|
||
|
}
|
||
|
locflags = flags; /* Initialize local copy */
|
||
|
if(*e == '^') locflags |= BEGLINE; /* Set flag if match must begin line */
|
||
|
j = -2; /* Assume no escapes in string */
|
||
|
for(i = 0; i < n - 1; ++i) /* Loop to find last escape */
|
||
|
{
|
||
|
if(e[i] == '\\') j = i++; /* Save index of last escape */
|
||
|
}
|
||
|
if(n > 0 && e[n-1] == '$' && j != n-2)
|
||
|
{ /* If expr. ends in unescaped '$' */
|
||
|
--n; /* Skip dollar sign */
|
||
|
locflags |= ENDLINE; /* Match must be at end */
|
||
|
}
|
||
|
strncpy(buffer,e,n); /* Copy pattern to buffer */
|
||
|
if(locflags & ENDLINE) buffer[n++] = EOS;
|
||
|
/* Add end character if needed */
|
||
|
buffer[n] = '\0'; /* Null-terminate string */
|
||
|
if((e = exprparse(buffer)) == NULL) return;
|
||
|
/* Return if invalid expression */
|
||
|
++strcnt; /* Increment string count */
|
||
|
if(!(locflags & BEGLINE)) /* If match needn't be at beginning */
|
||
|
{
|
||
|
e = get1stcharset(e,bitvec); /* Remove leading *-expr.s */
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* E now points to a buffer containing a preprocessed expression.
|
||
|
* We need to find the set of allowable first characters and make
|
||
|
* the appropriate entries in the string node table.
|
||
|
*/
|
||
|
|
||
|
if(*get1stcharset(e,bitvec) == T_END)
|
||
|
{ /* If expression will match anything */
|
||
|
find = findall; /* Match everything */
|
||
|
return; /* All done */
|
||
|
}
|
||
|
for(j = 0; j < ASCLEN; ++j) /* Loop to examine bit vector */
|
||
|
{
|
||
|
if(bitvec[j >> 3] & (1 << (j & 7)))
|
||
|
{ /* If the bit is set */
|
||
|
expr = (EXPR *) alloc(sizeof(EXPR));
|
||
|
/* Allocate record */
|
||
|
expr->ex_pattern = e; /* Point it at pattern */
|
||
|
if((i = transtab[j]) == 0) /* If no existing list */
|
||
|
{
|
||
|
if((i = clists++) >= ASCLEN)
|
||
|
{ /* If too many string lists */
|
||
|
fprintf(stderr,"Too many string lists\n");
|
||
|
/* Error message */
|
||
|
exit(2); /* Die */
|
||
|
}
|
||
|
stringlist[i] = NULL; /* Initialize */
|
||
|
transtab[j] = (char) i; /* Set pointer to new list */
|
||
|
if(!casesen && isalpha(j)) transtab[j ^ 0x20] = (char) i;
|
||
|
/* Set pointer for other case */
|
||
|
}
|
||
|
expr->ex_next = stringlist[i];
|
||
|
/* Link new record into table */
|
||
|
stringlist[i] = expr;
|
||
|
}
|
||
|
}
|
||
|
if(locflags & DEBUG) exprprint(e,stderr);
|
||
|
/* Print the expression if debugging */
|
||
|
}
|
||
|
|
||
|
|
||
|
char *findexpr(buffer,bufend)
|
||
|
unsigned char *buffer; /* Buffer in which to search */
|
||
|
char *bufend; /* End of buffer */
|
||
|
{
|
||
|
EXPR *expr; /* Expression list pointer */
|
||
|
unsigned char *pattern; /* Pattern */
|
||
|
int i; /* Index */
|
||
|
|
||
|
while(buffer < bufend) /* Loop to find match */
|
||
|
{
|
||
|
if((i = transtab[*buffer++]) == 0) continue;
|
||
|
/* Continue if not valid 1st char */
|
||
|
if((expr = (EXPR *) stringlist[i]) == NULL)
|
||
|
{ /* If null pointer */
|
||
|
fprintf(stderr,"Internal error: findexpr\n");
|
||
|
/* Print error message */
|
||
|
exit(2); /* Die */
|
||
|
}
|
||
|
--buffer; /* Back up to first character */
|
||
|
while(expr != NULL) /* Loop to find match */
|
||
|
{
|
||
|
pattern = expr->ex_pattern; /* Point to pattern */
|
||
|
expr = expr->ex_next; /* Point to next record */
|
||
|
if(pattern[0] == '^') /* If match begin line */
|
||
|
{
|
||
|
++pattern; /* Skip caret */
|
||
|
if(buffer[-1] != '\n') continue;
|
||
|
/* Don't bother if not at beginning */
|
||
|
}
|
||
|
if(exprmatch(buffer,pattern)) return(buffer);
|
||
|
/* Return pointer if match found */
|
||
|
}
|
||
|
++buffer; /* Skip first character */
|
||
|
}
|
||
|
return(NULL); /* No match */
|
||
|
}
|