windows-nt/Source/XPSP1/NT/base/ntsetup/oobe/msobdl/boyer.cpp
2020-09-26 16:20:57 +08:00

188 lines
4.9 KiB
C++

/*-----------------------------------------------------------------------------
Program Specification
in: search space s, pattern p
out: a pointer where p is exactly matched at s[i], NULL indicates fail
why: Boyer-Moore algorithm is best for general text search. On
"average" it takes length(s)/length(p) steps to match p in s.
ref: I recommend the following references:
"Algorithms". Robert Sedgewick. Addison Wesley Publishing Company.
1988. 2nd addition. p286. QA76.6.S435 1983
"Faster String Searches". Doctor Dobb's Journal. Volume 14
Issue 7 July 1989. Costas Menico. p74.
usage: e.g. to find a pattern "tiger" in a text in RAM starting at
pointer "txtp" with a length of 1, 000,000 characters,
program like this:
LPSTR matchp;
SetFindPattern( "tiger" );
matchp = Find( txtp, 1000000L );
if (matchp != NULL)
// found
else
// not found
matchp = FindBackward( txtp + 1000000L - 1, 1000000L);
if (matchp != NULL)
// found
else
// not found
Q: Can I use Find() with a GlobalLock() pointer in Windows?
A: Yes.
Q: Must I delcare my pointer as HPSTR (huge pointer) ?
A: Not necessary. Find() and FindBackward() will convert your
LPSTR as HPSTR. However, in your own code you must aware
that you are holding a LPSTR and take care of the pointer
arithmetic and conversion. (see demo.c for example)
Q: What is the limit of the memory space I can search?
A: To the limit of huge pointer implementation and your hardware.
-----------------------------------------------------------------------------*/
#include "pch.hpp"
/*-----------------------------------------------------------------------------
func: SetFindPattern
desc: initialize the pattern to be matched and generate skip table
pass: lpszPattern = pattern string
rtrn: HFIND - the find handle for further text search
-----------------------------------------------------------------------------*/
HFIND SetFindPattern( LPSTR lpszPattern )
{
register unsigned int j;
register CHAR c;
HFIND hfind;
hfind = (HFIND)MyAlloc(sizeof(FINDSTRUCT));
hfind->plen = lstrlenA( lpszPattern );
if (hfind->plen > MAXPAT)
hfind->plen = MAXPAT;
lstrcpyA( (LPSTR)(hfind->p), lpszPattern );
for (j=0; j<256; j++)
{
hfind->skip[j] = hfind->plen;
}
for (j=0; j<hfind->plen; j++)
{
c = lpszPattern[j];
hfind->skip[c] = hfind->plen - (j +1);
}
return (hfind);
}
/*-----------------------------------------------------------------------------
func: FreeFindPattern
desc: free the memory occupied by SetFindPattern
pass: hfind - the find handle
rtrn: nothing
-----------------------------------------------------------------------------*/
void FreeFindPattern( HFIND hfind )
{
MyFree((LPSTR)hfind);
}
/*-----------------------------------------------------------------------------
func: Find
desc: match a pattern defined in SetFindPattern against string s
pass: hfind = the find handle created by SetFindPattern
s = start of search space, slen = length of s
rtrn: NULL = match fail
else = a LPSTR to p[0] in s matches p
-----------------------------------------------------------------------------*/
LPSTR Find( HFIND hfind, LPSTR s, long slen )
{
register int i;
unsigned int n, j;
register unsigned char c;
LPSTR lpresult;
i = hfind->plen;
j = hfind->plen;
do
{
c = *(s + (i - 1));
if (c == hfind->p[j - 1])
{
i--;
j--;
}
else
{
n = hfind->plen - j + 1;
if (n > hfind->skip[c] )
{
i += n;
}
else
{
i += hfind->skip[c];
}
j = hfind->plen;
}
}
while ((j >= 1) && (i <= slen));
/* match fails */
if (i >= slen)
{
lpresult = (LPSTR)NULL;
}
/* match successful */
else
{
lpresult = s + i;
}
return (lpresult);
}
#ifdef TEST_MAIN
#pragma message("Building with TEST_MAIN")
#include <stdio.h>
CHAR test_buffer[]=L"___________12191919191919This is string for testing our find ___________12191919191919function 12abE Is it in here somehwere ?";
CHAR test_pattern[]=L"___________12191919191919";
void main(void)
{
HFIND hFind;
CHAR *tmp;
hFind=SetFindPattern(test_pattern);
tmp=Find(hFind, test_buffer, strlen(test_buffer));
if (tmp!=NULL) wsprintf(L"Found pattern at offset %u, %s", tmp-test_buffer,tmp);
FreeFindPattern(hFind);
}
#endif