795 lines
28 KiB
C
795 lines
28 KiB
C
|
/*
|
||
|
* rhizome.c
|
||
|
*
|
||
|
* author: John R. Douceur
|
||
|
* date: 28 April 1997
|
||
|
*
|
||
|
* This source file provides functions that implement insertion, removal, and
|
||
|
* search operations on the rhizome database. The code is object-oriented C,
|
||
|
* transliterated from a C++ implementation.
|
||
|
*
|
||
|
* The rhizome is a database that stores patterns containing wildcards.
|
||
|
* Each pattern defines a set of keys that it matches; if a pattern contains
|
||
|
* N wildcards, then it matches 2^N keys. Since each pattern can match
|
||
|
* multiple keys, it is possible for a given key to match multiple patterns
|
||
|
* in the database. The rhizome requires that all patterns stored therein
|
||
|
* have a strict hierarchical interrelationship. Two patterns may match no
|
||
|
* common keys (in which case the patterns are said to be independent), or
|
||
|
* one pattern may match all the keys matched by a second pattern as well as
|
||
|
* additonal keys (in which case the second pattern is said to be more general
|
||
|
* than the first, and the first more specific than the second). The database
|
||
|
* will not accept two patterns which match some keys in common but each of
|
||
|
* which also matches additional keys that the other does not.
|
||
|
*
|
||
|
* The database can be searched for patterns that match a given search key.
|
||
|
* When the database is searched for a given key, the most specifically
|
||
|
* matching pattern is found. If no patterns in the database match the key,
|
||
|
* an appropriate indication is returned.
|
||
|
*
|
||
|
* None of the code or comments in this file needs to be understood by writers
|
||
|
* of client code; all explanatory information for clients is found in the
|
||
|
* associated header file, rhizome.h.
|
||
|
*
|
||
|
*/
|
||
|
|
||
|
#include "precomp.h"
|
||
|
#pragma hdrstop
|
||
|
|
||
|
// The fields of the RhizomeNode structure are accessed through the following
|
||
|
// macros. The first three are obvious; the subsequent three rely on an agreed
|
||
|
// usage of the cdata array in the RhizomeNode. The first keybytes locations
|
||
|
// of the cdata array are used to store the value field of the node; the second
|
||
|
// keybytes locations store the mask field; and the third keybytes locations
|
||
|
// store the imask field.
|
||
|
//
|
||
|
#define CHILDREN udata.branch.children
|
||
|
#define REFERENCE udata.leaf.reference
|
||
|
#define GODPARENT udata.leaf.godparent
|
||
|
#define VALUE(pointer) (pointer->cdata)
|
||
|
#define MASK(pointer) (pointer->cdata + rhizome->keybytes)
|
||
|
#define IMASK(pointer) (pointer->cdata + 2 * rhizome->keybytes)
|
||
|
|
||
|
// This macro allocates a new rhizome node structure. The size of the structure
|
||
|
// is a function of the value of keybytes, since three bytes of information
|
||
|
// need to be stored in the structure for each byte of pattern length. The
|
||
|
// cdata array, which is the last field in the structure, is declared as a
|
||
|
// having a single element, but this array will actually extend beyond the
|
||
|
// defined end of the structure into additional space that is allocated for it
|
||
|
// by the following macro.
|
||
|
//
|
||
|
#define NEW_RhizomeNode(_pa) \
|
||
|
GpcAllocMem((_pa),\
|
||
|
sizeof(RhizomeNode) + 3 * rhizome->keybytes - 1,\
|
||
|
NAT_TAG_RHIZOME)/*;\
|
||
|
TRACE(RHIZOME, *_pa, sizeof(RhizomeNode) + 3 * rhizome->keybytes - 1, "NEW_RhizomeNode")*/
|
||
|
|
||
|
|
||
|
// This macro gets the indexed bit of the value, where the most-significant bit
|
||
|
// is defined as bit 0.
|
||
|
//
|
||
|
#define BIT_OF(value, index) \
|
||
|
(((value)[(index) >> 3] >> (7 - ((index) & 0x7))) & 0x1)
|
||
|
|
||
|
// Following are prototypes for static functions that are used internally by
|
||
|
// the implementation of the rhizome routines.
|
||
|
|
||
|
static int
|
||
|
node_insert(
|
||
|
Rhizome *rhizome,
|
||
|
RhizomeNode *new_leaf,
|
||
|
RhizomeNode **ppoint,
|
||
|
int prev_bit);
|
||
|
|
||
|
static void
|
||
|
node_remove(
|
||
|
Rhizome *rhizome,
|
||
|
RhizomeNode *leaf,
|
||
|
RhizomeNode **ppoint);
|
||
|
|
||
|
static RhizomeNode *
|
||
|
replicate(
|
||
|
Rhizome *rhizome,
|
||
|
RhizomeNode *source,
|
||
|
int pivot_bit);
|
||
|
|
||
|
static void
|
||
|
eliminate(
|
||
|
Rhizome *rhizome,
|
||
|
RhizomeNode *point);
|
||
|
|
||
|
static void
|
||
|
coalesce(
|
||
|
Rhizome *rhizome,
|
||
|
RhizomeNode **leaf_list,
|
||
|
RhizomeNode *point);
|
||
|
|
||
|
|
||
|
// Since this is not C++, the Rhizome structure is not self-constructing;
|
||
|
// therefore, the following constructor code must be called on the Rhizome
|
||
|
// structure after it is allocated. The argument keybits specifies the size
|
||
|
// (in bits) of each pattern that will be stored in the database.
|
||
|
//
|
||
|
void
|
||
|
constructRhizome(
|
||
|
Rhizome *rhizome,
|
||
|
int keybits)
|
||
|
{
|
||
|
rhizome->keybits = keybits;
|
||
|
rhizome->keybytes = (keybits - 1) / 8 + 1;
|
||
|
rhizome->root = 0;
|
||
|
}
|
||
|
|
||
|
// Since this is not C++, the Rhizome structure is not self-destructing;
|
||
|
// therefore, the following destructor code must be called on the Rhizome
|
||
|
// structure before it is deallocated.
|
||
|
//
|
||
|
// If the structure is non-empty, call coalesce() to eliminate
|
||
|
// all branch nodes and to string leaf nodes into a list; then delete list.
|
||
|
//
|
||
|
void
|
||
|
destructRhizome(
|
||
|
Rhizome *rhizome)
|
||
|
{
|
||
|
RhizomeNode *leaf_list, *next;
|
||
|
if (rhizome->root != 0)
|
||
|
{
|
||
|
leaf_list = 0;
|
||
|
coalesce(rhizome, &leaf_list, rhizome->root);
|
||
|
while (leaf_list != 0)
|
||
|
{
|
||
|
next = leaf_list->GODPARENT;
|
||
|
GpcFreeMem(leaf_list, NAT_TAG_RHIZOME);
|
||
|
leaf_list = next;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// This function searches the database for the pattern that most specifically
|
||
|
// matches the given key. The key is passed as an array of bytes. When the
|
||
|
// most specific match is found, the PatternHandle of that matching pattern is
|
||
|
// returned. From the PatternHandle can be gotten the reference value via the
|
||
|
// macro GetReferenceFromPatternHandle. If no pattern in the database is found
|
||
|
// to match the key, then a value of 0 is returned as the PatternHandle.
|
||
|
//
|
||
|
PatternHandle
|
||
|
searchRhizome(
|
||
|
Rhizome *rhizome,
|
||
|
char *key)
|
||
|
{
|
||
|
int index;
|
||
|
RhizomeNode *point;
|
||
|
// If tree is empty, search fails.
|
||
|
if (rhizome->root == 0)
|
||
|
{
|
||
|
return 0;
|
||
|
}
|
||
|
// Otherwise, start at rhizome->root and navigate tree until reaching a leaf.
|
||
|
point = rhizome->root;
|
||
|
while (point->pivot_bit < rhizome->keybits)
|
||
|
{
|
||
|
point = point->CHILDREN[BIT_OF(key, point->pivot_bit)];
|
||
|
}
|
||
|
// Check value for match, one byte at a time. If any byte fails to match,
|
||
|
// continue checking godparent with same byte; since previous bytes matched
|
||
|
// godchild, they are guaranteed to match godparent also.
|
||
|
index = 0;
|
||
|
while (index < rhizome->keybytes)
|
||
|
{
|
||
|
if ((((key)[index]) & MASK(point)[index]) != VALUE(point)[index])
|
||
|
{
|
||
|
if (point->GODPARENT != 0)
|
||
|
{
|
||
|
point = point->GODPARENT;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
index++;
|
||
|
}
|
||
|
}
|
||
|
return point;
|
||
|
}
|
||
|
|
||
|
// This function inserts a new pattern into the database. The pattern is
|
||
|
// specified by a value and a mask. Each bit of the mask determines whether
|
||
|
// the bit position is specified or is a wildcard: A 1 in a mask bit indicates
|
||
|
// that the value of that bit is specified by the pattern; a 0 indicates that
|
||
|
// the value of that bit is a wildcard. If a mask bit is 1, then the
|
||
|
// corresponding bit in the value field indicates the specified value of that
|
||
|
// bit. Value and mask fields are passed as arrays of bytes.
|
||
|
//
|
||
|
// The client specifies a void pointer reference value to associate with the
|
||
|
// pattern. When the pattern is installed, the insertRhizome function returns
|
||
|
// a pointer to a PatternHandle.
|
||
|
//
|
||
|
// If the new pattern conflicts with a pattern already installed in the
|
||
|
// database, meaning that the two patterns match some keys in common but each
|
||
|
// also matches additional keys that the other does not, then the new pattern
|
||
|
// is not inserted, and a value of 0 is returned as the PatternHandle.
|
||
|
//
|
||
|
PatternHandle
|
||
|
insertRhizome(
|
||
|
Rhizome *rhizome,
|
||
|
char *value,
|
||
|
char *mask,
|
||
|
void *reference,
|
||
|
ulong *status)
|
||
|
{
|
||
|
RhizomeNode *new_leaf;
|
||
|
int index0, insert_status;
|
||
|
|
||
|
*status = GPC_STATUS_SUCCESS;
|
||
|
|
||
|
// Create new leaf and copy data into it; restrict set bits of value to
|
||
|
// those set in mask, since later code assumes this is the case. Add new
|
||
|
// leaf to reference table.
|
||
|
NEW_RhizomeNode(&new_leaf);
|
||
|
if (new_leaf == 0)
|
||
|
{
|
||
|
// Memory could not be allocated for this new node. Therefore, we
|
||
|
// return an indication of failure to the client.
|
||
|
*status = GPC_STATUS_RESOURCES;
|
||
|
return 0;
|
||
|
}
|
||
|
for (index0 = 0; index0 < rhizome->keybytes; index0++)
|
||
|
{
|
||
|
VALUE(new_leaf)[index0] = value[index0] & mask[index0];
|
||
|
MASK(new_leaf)[index0] = mask[index0];
|
||
|
IMASK(new_leaf)[index0] = mask[index0];
|
||
|
}
|
||
|
new_leaf->REFERENCE = reference;
|
||
|
new_leaf->pivot_bit = rhizome->keybits;
|
||
|
new_leaf->GODPARENT = 0;
|
||
|
// If tree is empty, leaf becomes first node; otherwise, attempt to insert
|
||
|
// using recursive node_insert() routine. If new leaf conflicts with
|
||
|
// existing leaf, node_insert() throws exception; then remove new leaf and
|
||
|
// return failure code.
|
||
|
if (rhizome->root == 0)
|
||
|
{
|
||
|
rhizome->root = new_leaf;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
insert_status = node_insert(rhizome, new_leaf, &rhizome->root, -1);
|
||
|
if (insert_status != GPC_STATUS_SUCCESS)
|
||
|
{
|
||
|
removeRhizome(rhizome, new_leaf);
|
||
|
*status = GPC_STATUS_CONFLICT;
|
||
|
return 0; // return null pointer
|
||
|
};
|
||
|
}
|
||
|
return new_leaf;
|
||
|
}
|
||
|
|
||
|
// This function removes a pattern from the rhizome. The pattern is specified
|
||
|
// by the PatternHandle that was returned by the insertRhizome function. No
|
||
|
// checks are performed to insure that this is a valid handle.
|
||
|
//
|
||
|
void
|
||
|
removeRhizome(
|
||
|
Rhizome *rhizome,
|
||
|
PatternHandle phandle)
|
||
|
{
|
||
|
// Call recursive node_remove() routine to remove all references to leaf;
|
||
|
// then delete leaf.
|
||
|
node_remove(rhizome, phandle, &rhizome->root);
|
||
|
//TRACE(RHIZOME, rhizome, phandle, "removeRhizome")
|
||
|
GpcFreeMem(phandle, NAT_TAG_RHIZOME);
|
||
|
}
|
||
|
|
||
|
// Insert new_leaf into subtree pointed to by *ppoint. Update *ppoint to point
|
||
|
// to newly created nodes if necessary. Index of most recently examined bit
|
||
|
// is given by prev_bit. The return value is a status code: Normally, it
|
||
|
// returns GPC_STATUS_SUCCESS; if there is a conflict, then it returns NDIS_STATUS_CONFLICT;
|
||
|
// if there is insufficient memory available to perform the insertion, then it
|
||
|
// returns GPC_STATUS_RESOURCES.
|
||
|
//
|
||
|
static int
|
||
|
node_insert(
|
||
|
Rhizome *rhizome,
|
||
|
RhizomeNode *new_leaf,
|
||
|
RhizomeNode **ppoint,
|
||
|
int prev_bit)
|
||
|
{
|
||
|
int index, index0, bit_value, insert_status;
|
||
|
char sub, super;
|
||
|
RhizomeNode *point, *child, *new_branch;
|
||
|
// This routine has a recursive structure, but unnecessary recursions have
|
||
|
// been replaced by iteration, in order to improve performance. This
|
||
|
// recursion removal has introduced a forever loop which encloses the
|
||
|
// entirety of the routine; looping back to the beginning of this loop is
|
||
|
// thus the equivalent of recursing.
|
||
|
while (1)
|
||
|
{
|
||
|
point = *ppoint;
|
||
|
// Examine each bit index beginnig with that following last bit index
|
||
|
// examined previously. Continue examining bits until pivot bit of
|
||
|
// current node is reached (unless loop is terminated prematurely).
|
||
|
for (index = prev_bit + 1; index < point->pivot_bit; index++)
|
||
|
{
|
||
|
// If some leaf in the current subtree cares about the value of the
|
||
|
// current bit, and if the new leaf cares about the value of the
|
||
|
// current bit, and these two leaves disagree about the value of
|
||
|
// this bit, then a new branch node should be inserted here.
|
||
|
if (BIT_OF(MASK(new_leaf), index) == 1 &&
|
||
|
BIT_OF(MASK(point), index) == 1 &&
|
||
|
BIT_OF(VALUE(new_leaf), index) != BIT_OF(VALUE(point), index))
|
||
|
{
|
||
|
// Create new branch node; insert into tree; and set fields.
|
||
|
bit_value = BIT_OF(VALUE(new_leaf), index);
|
||
|
NEW_RhizomeNode(&new_branch);
|
||
|
if (new_branch == 0)
|
||
|
{
|
||
|
// Memory could not be allocated for this new node.
|
||
|
// Therefore, we pass an indication of failure up the stack.
|
||
|
return GPC_STATUS_RESOURCES;
|
||
|
}
|
||
|
*ppoint = new_branch;
|
||
|
for (index0 = 0; index0 < rhizome->keybytes; index0++)
|
||
|
{
|
||
|
VALUE(new_branch)[index0] =
|
||
|
VALUE(point)[index0] | VALUE(new_leaf)[index0];
|
||
|
MASK(new_branch)[index0] =
|
||
|
MASK(point)[index0] | MASK(new_leaf)[index0];
|
||
|
IMASK(new_branch)[index0] =
|
||
|
IMASK(point)[index0] & IMASK(new_leaf)[index0];
|
||
|
}
|
||
|
// Pivot bit of new branch node is the bit that inspired the
|
||
|
// creation of this branch.
|
||
|
new_branch->pivot_bit = index;
|
||
|
// The earlier subtree becomes the child whose bit disagreed
|
||
|
// with that of the new leaf.
|
||
|
new_branch->CHILDREN[1 - bit_value] = point;
|
||
|
// If every leaf in the subtree cares about the value of this
|
||
|
// bit, then we can insert the new leaf as the other child of
|
||
|
// this branch.
|
||
|
if (BIT_OF(IMASK(point), index) == 1)
|
||
|
{
|
||
|
// Insert new leaf here and return.
|
||
|
new_branch->CHILDREN[bit_value] = new_leaf;
|
||
|
return GPC_STATUS_SUCCESS;
|
||
|
}
|
||
|
// Otherwise, at least one leaf in the earlier subtree does not
|
||
|
// care about the value of this bit. Copy all such leaves
|
||
|
// (and necessary branches) to the other child of the new
|
||
|
// branch node.
|
||
|
child = replicate(rhizome, point, index);
|
||
|
if (child == 0)
|
||
|
{
|
||
|
// Memory could not be allocated for the replica.
|
||
|
// Therefore, we remove the new node from the structure,
|
||
|
// delete the new node, and pass an indication of failure
|
||
|
// up the stack.
|
||
|
*ppoint = point;
|
||
|
GpcFreeMem(new_branch, NAT_TAG_RHIZOME);
|
||
|
return GPC_STATUS_RESOURCES;
|
||
|
}
|
||
|
new_branch->CHILDREN[bit_value] = child;
|
||
|
// Continue search on newly copied subtree.
|
||
|
ppoint = &new_branch->CHILDREN[bit_value];
|
||
|
point = *ppoint;
|
||
|
}
|
||
|
}
|
||
|
// All bits have been examined up to the pivot bit of the current node.
|
||
|
// If this node is a leaf, then we have found a leaf with which the new
|
||
|
// leaf has no disagreements over bit values.
|
||
|
if (point->pivot_bit >= rhizome->keybits)
|
||
|
{
|
||
|
// Loop up the chain of godparents until one of the four cases
|
||
|
// below causes an exit from the subroutine.
|
||
|
while (1)
|
||
|
{
|
||
|
// Case 1: We have reached the end of the godparent chain.
|
||
|
if (point == 0)
|
||
|
{
|
||
|
// Insert new leaf at this point and return.
|
||
|
*ppoint = new_leaf;
|
||
|
return GPC_STATUS_SUCCESS;
|
||
|
}
|
||
|
// Case 2: We discover that we have already inserted this leaf
|
||
|
// at the appropriate location. This can happen because two
|
||
|
// leaves in separate parts of the tree may have a common god-
|
||
|
// ancestor, and a leaf which is a further god-ancestor of that
|
||
|
// leaf will be reached more than once. Since the first
|
||
|
// occasion inserted the leaf, the second one can return without
|
||
|
// performing any action.
|
||
|
if (point == new_leaf)
|
||
|
{
|
||
|
return GPC_STATUS_SUCCESS;
|
||
|
}
|
||
|
// Compare mask bits of the new leaf to the current leaf.
|
||
|
sub = 0;
|
||
|
super = 0;
|
||
|
for (index = 0; index < rhizome->keybytes; index++)
|
||
|
{
|
||
|
sub |= MASK(new_leaf)[index] & ~MASK(point)[index];
|
||
|
super |= ~MASK(new_leaf)[index] & MASK(point)[index];
|
||
|
}
|
||
|
// Case 3: The new leaf cares about at least one bit that the
|
||
|
// current leaf does not; and the current leaf does not care
|
||
|
// about any bits that the new leaf does not; thus, the new leaf
|
||
|
// should be a godchild of the current leaf.
|
||
|
if (sub != 0 && super == 0)
|
||
|
{
|
||
|
// Update imask field of new leaf; insert into chain;
|
||
|
// and return.
|
||
|
for (index0 = 0; index0 < rhizome->keybytes; index0++)
|
||
|
{
|
||
|
IMASK(new_leaf)[index0] &= IMASK(point)[index0];
|
||
|
}
|
||
|
new_leaf->GODPARENT = point;
|
||
|
*ppoint = new_leaf;
|
||
|
return GPC_STATUS_SUCCESS;
|
||
|
}
|
||
|
// Case 4: Either the new leaf has the same value and mask as
|
||
|
// the current leaf, or there is a hierarchy conflict between
|
||
|
// the two leaves. In either case, terminate the insertion
|
||
|
// process and clean up (in insert() routine) anything done
|
||
|
// already.
|
||
|
if (sub != 0 || super == 0)
|
||
|
{
|
||
|
return GPC_STATUS_CONFLICT;
|
||
|
}
|
||
|
// None of the above cases occurred; thus, the new leaf should
|
||
|
// be a god-ancestor of the current leaf. Update the imask
|
||
|
// field of the current leaf, and continue with godparent of
|
||
|
// current leaf.
|
||
|
for (index0 = 0; index0 < rhizome->keybytes; index0++)
|
||
|
{
|
||
|
IMASK(point)[index0] &= IMASK(new_leaf)[index0];
|
||
|
}
|
||
|
ppoint = &point->GODPARENT;
|
||
|
point = *ppoint;
|
||
|
}
|
||
|
}
|
||
|
// The current node is not a leaf node. Thus, we recurse on one or both
|
||
|
// of the child nodes of the current node. First, update the fields of
|
||
|
// the current node to reflect the insertion of the new leaf into the
|
||
|
// subtree.
|
||
|
for (index0 = 0; index0 < rhizome->keybytes; index0++)
|
||
|
{
|
||
|
VALUE(point)[index0] |= VALUE(new_leaf)[index0];
|
||
|
MASK(point)[index0] |= MASK(new_leaf)[index0];
|
||
|
IMASK(point)[index0] &= IMASK(new_leaf)[index0];
|
||
|
}
|
||
|
// If the new leaf doesn't care about the value of the pivot bit of the
|
||
|
// current leaf, then we must recurse on both children. We can only
|
||
|
// replace a single recursive call with iteration, so we perform a true
|
||
|
// recursion in this case, and we recurse on child 1.
|
||
|
if (BIT_OF(MASK(new_leaf), point->pivot_bit) == 0)
|
||
|
{
|
||
|
insert_status =
|
||
|
node_insert(rhizome, new_leaf, &point->CHILDREN[1],
|
||
|
point->pivot_bit);
|
||
|
if (insert_status != GPC_STATUS_SUCCESS)
|
||
|
{
|
||
|
return insert_status;
|
||
|
}
|
||
|
}
|
||
|
// Update the values of prev_bit and ppoint to reflect the same
|
||
|
// conditions that would hold in a recursive call. The pseudo-recursion
|
||
|
// is performed on the bit indicated by the value of the pivot bit of
|
||
|
// the new leaf. If the new leaf does not care about this bit, then
|
||
|
// this value will be a 0, and we recursed on child 1 above. If the new
|
||
|
// leaf does care about the value of this bit, then we continue down the
|
||
|
// appropriate path.
|
||
|
prev_bit = point->pivot_bit;
|
||
|
ppoint = &point->CHILDREN[BIT_OF(VALUE(new_leaf), point->pivot_bit)];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Remove references to leaf from subtree pointed to by *ppoint. Update *ppoint
|
||
|
// if necessary due to removal of branch nodes.
|
||
|
//
|
||
|
static void
|
||
|
node_remove(
|
||
|
Rhizome *rhizome,
|
||
|
RhizomeNode *leaf,
|
||
|
RhizomeNode **ppoint)
|
||
|
{
|
||
|
int pivot_bit, bit_value, index0;
|
||
|
RhizomeNode *point, *child, *child0, *child1;
|
||
|
point = *ppoint;
|
||
|
pivot_bit = point->pivot_bit;
|
||
|
if (pivot_bit < rhizome->keybits)
|
||
|
{
|
||
|
// The current node is a branch node.
|
||
|
if (BIT_OF(MASK(leaf), pivot_bit) == 1)
|
||
|
{
|
||
|
// The leaf to be removed cares about this node's pivot bit;
|
||
|
// therefore, we need only recurse on one of the current node's
|
||
|
// children.
|
||
|
bit_value = BIT_OF(VALUE(leaf), pivot_bit);
|
||
|
node_remove(rhizome, leaf, &point->CHILDREN[bit_value]);
|
||
|
child = point->CHILDREN[bit_value];
|
||
|
if (child != 0 && BIT_OF(MASK(child), pivot_bit) == 1)
|
||
|
{
|
||
|
// Some leaf in the same subtree as the removed leaf cares about
|
||
|
// the value of this node's pivot bit; therefore, this node
|
||
|
// still has reason to exist. Update its fields to reflect the
|
||
|
// change in one of its subtrees.
|
||
|
child0 = point->CHILDREN[0];
|
||
|
child1 = point->CHILDREN[1];
|
||
|
for (index0 = 0; index0 < rhizome->keybytes; index0++)
|
||
|
{
|
||
|
VALUE(point)[index0] =
|
||
|
VALUE(child0)[index0] | VALUE(child1)[index0];
|
||
|
MASK(point)[index0] =
|
||
|
MASK(child0)[index0] | MASK(child1)[index0];
|
||
|
IMASK(point)[index0] =
|
||
|
IMASK(child0)[index0] & IMASK(child1)[index0];
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// No leaf in the same subtree as the removed leaf cares about
|
||
|
// the value of this node's pivot bit; therefore, there is no
|
||
|
// longer any reason for this node to exist. Have the other
|
||
|
// subtree take the current node's place in the tree; call
|
||
|
// remove() to remove the unneeded subtree; and delete the
|
||
|
// current node.
|
||
|
*ppoint = point->CHILDREN[1 - bit_value];
|
||
|
if (child != 0)
|
||
|
{
|
||
|
eliminate(rhizome, child);
|
||
|
}
|
||
|
GpcFreeMem(point, NAT_TAG_RHIZOME);
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// The leaf to be removed does not care about this node's pivot bit;
|
||
|
// therefore, we must recurse on both of the current node's
|
||
|
// children. This node must still be necessary, since we have not
|
||
|
// removed any leaf which cares about this node's value. So we
|
||
|
// update its fields to reflect the change in its two subtrees.
|
||
|
node_remove(rhizome, leaf, &point->CHILDREN[0]);
|
||
|
node_remove(rhizome, leaf, &point->CHILDREN[1]);
|
||
|
child0 = point->CHILDREN[0];
|
||
|
child1 = point->CHILDREN[1];
|
||
|
for (index0 = 0; index0 < rhizome->keybytes; index0++)
|
||
|
{
|
||
|
VALUE(point)[index0] =
|
||
|
VALUE(child0)[index0] | VALUE(child1)[index0];
|
||
|
MASK(point)[index0] =
|
||
|
MASK(child0)[index0] | MASK(child1)[index0];
|
||
|
IMASK(point)[index0] =
|
||
|
IMASK(child0)[index0] & IMASK(child1)[index0];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// The current node is a leaf node.
|
||
|
if (point == leaf)
|
||
|
{
|
||
|
// The current node is the leaf to be removed; therefore, remove it
|
||
|
// from chain of godparents.
|
||
|
*ppoint = leaf->GODPARENT;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// The current node is not leaf to be removed. Therefore, if this
|
||
|
// node has a godparent, then recurse on that godparent. If this
|
||
|
// node does not have a godparent, then the to-be-removed leaf
|
||
|
// either already was removed by a different path, or it was never
|
||
|
// inserted to begin with. The latter might be the case if remove()
|
||
|
// was called from the catch clause of insert().
|
||
|
if (point->GODPARENT != 0)
|
||
|
{
|
||
|
node_remove(rhizome, leaf, &point->GODPARENT);
|
||
|
}
|
||
|
// We are now popping back up the recursion stack. If this node
|
||
|
// does not have a godparent, or if it did but it does not anymore,
|
||
|
// then initialize imask to mask; otherwise, copy the godparent's
|
||
|
// value of imask. Since the godparent chain follows a strict
|
||
|
// hierarchy, and since imask is formed by successive conjunction,
|
||
|
// all leaves in any given godparent chain will have the same value
|
||
|
// of imask, namely the mask value of the highest god-ancestor.
|
||
|
if (point->GODPARENT == 0)
|
||
|
{
|
||
|
for (index0 = 0; index0 < rhizome->keybytes; index0++)
|
||
|
{
|
||
|
IMASK(point)[index0] = MASK(point)[index0];
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for (index0 = 0; index0 < rhizome->keybytes; index0++)
|
||
|
{
|
||
|
IMASK(point)[index0] = IMASK(point->GODPARENT)[index0];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Replicate all nodes in a subtree which do not care about the value of
|
||
|
// pivot_bit.
|
||
|
//
|
||
|
static RhizomeNode *
|
||
|
replicate(
|
||
|
Rhizome *rhizome,
|
||
|
RhizomeNode *source,
|
||
|
int pivot_bit)
|
||
|
{
|
||
|
int index0, current_bit;
|
||
|
RhizomeNode *new_node, *child0, *child1;
|
||
|
// If this routine were fully recursive, the following while statement
|
||
|
// would be an if statement. However, recursion has been replaced by
|
||
|
// iteration where possible, so the following code loops until bottoming
|
||
|
// out when a leaf node is reached.
|
||
|
while (source->pivot_bit < rhizome->keybits)
|
||
|
{
|
||
|
if (BIT_OF(IMASK(source->CHILDREN[0]), pivot_bit) == 0)
|
||
|
{
|
||
|
if (BIT_OF(IMASK(source->CHILDREN[1]), pivot_bit) == 0)
|
||
|
{
|
||
|
// Both subtrees contain leaves which do not care about the
|
||
|
// pivot bit; therefore, we may need to make a copy of the
|
||
|
// current node. It is not guaranteed that we need to make
|
||
|
// a copy, since it may be a common leaf in both subtrees
|
||
|
// that does not care about the pivot bit. This may happen
|
||
|
// for a leaf which is a godparent of two leaves, one in each
|
||
|
// subtree. Recurse on each child and examine results.
|
||
|
child0 = replicate(rhizome, source->CHILDREN[0], pivot_bit);
|
||
|
if (child0 == 0)
|
||
|
{
|
||
|
// Memory could not be allocated for the child replica.
|
||
|
// Therefore, we abort the replication process and pass an
|
||
|
// indication of failure op the stack.
|
||
|
return 0;
|
||
|
}
|
||
|
child1 = replicate(rhizome, source->CHILDREN[1], pivot_bit);
|
||
|
if (child1 == 0)
|
||
|
{
|
||
|
// Memory could not be allocated for the child replica.
|
||
|
// Therefore, we abort the replication process, eliminate
|
||
|
// the other child replica, and pass an indication of
|
||
|
// failure op the stack.
|
||
|
eliminate(rhizome, child0);
|
||
|
return 0; // return null pointer
|
||
|
}
|
||
|
current_bit = source->pivot_bit;
|
||
|
if (BIT_OF(MASK(child0), current_bit) == 1)
|
||
|
{
|
||
|
if (BIT_OF(MASK(child1), current_bit) == 1)
|
||
|
{
|
||
|
// Both replicated child subtrees contain leaves which
|
||
|
// care about the current node's bit. Since any node
|
||
|
// which is a godparent of nodes in both subtrees could
|
||
|
// not possibly care about the current node's bit, we
|
||
|
// know that we need to make a copy of the current node.
|
||
|
NEW_RhizomeNode(&new_node);
|
||
|
if (new_node == 0)
|
||
|
{
|
||
|
// Memory could not be allocated for this new node.
|
||
|
// Therefore, we have to eliminate both children
|
||
|
// and pass an indication of failure up the stack.
|
||
|
eliminate(rhizome, child0);
|
||
|
eliminate(rhizome, child1);
|
||
|
return 0; // return null pointer
|
||
|
}
|
||
|
for (index0 = 0; index0 < rhizome->keybytes; index0++)
|
||
|
{
|
||
|
VALUE(new_node)[index0] =
|
||
|
VALUE(child0)[index0] | VALUE(child1)[index0];
|
||
|
MASK(new_node)[index0] =
|
||
|
MASK(child0)[index0] | MASK(child1)[index0];
|
||
|
IMASK(new_node)[index0] =
|
||
|
IMASK(child0)[index0] & IMASK(child1)[index0];
|
||
|
}
|
||
|
new_node->pivot_bit = current_bit;
|
||
|
new_node->CHILDREN[0] = child0;
|
||
|
new_node->CHILDREN[1] = child1;
|
||
|
return new_node;
|
||
|
}
|
||
|
// Child 0's subtree contains a leaf that cares about the
|
||
|
// current bit; however, child 1's subtree does not. Thus,
|
||
|
// all leaves which are in child 1's subtree are also in
|
||
|
// child 0's subtree, so we only need to keep the latter.
|
||
|
// We therefore eliminate child 1's subtree, and we return
|
||
|
// child 0 as the new subtree at this location, since we
|
||
|
// do not need to create a new branch node here.
|
||
|
eliminate(rhizome, child1);
|
||
|
return child0;
|
||
|
}
|
||
|
// Child 0's subtree does not contain a leaf that cares about
|
||
|
// the current node's bit. Thus, all leaves which are in child
|
||
|
// 0's subtree are also in child 1's subtree, so we only need to
|
||
|
// keep the latter. We therefore eliminate child 0's subtree,
|
||
|
// and we return child 1 as the new subtree at this location,
|
||
|
// since we do not need to create a new branch node here.
|
||
|
eliminate(rhizome, child0);
|
||
|
return child1;
|
||
|
}
|
||
|
// Child 0's subtree contains a leaf which does not care about the
|
||
|
// pivot bit; however, child 1's subtree does not. Therefore, we
|
||
|
// recurse on child 0. Rather than truly recursing, we update the
|
||
|
// value of source and iterate once through the while loop.
|
||
|
source = source->CHILDREN[0];
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
// Child 0's subtree does not contain a leaf which does not care
|
||
|
// about the pivot bit. Child 1's subtree must contain such a leaf,
|
||
|
// since the current node's subtree contains such a leaf. Thus, we
|
||
|
// recurse on child 1. Rather than truly recursing, we update the
|
||
|
// value of source and iterate once through the while loop.
|
||
|
source = source->CHILDREN[1];
|
||
|
}
|
||
|
}
|
||
|
// A leaf node has been reached. We now iterate through the godparents of
|
||
|
// the leaf until we find one which does not care about the pivot bit.
|
||
|
// Once we find it, we know that all godparents of that leaf also do not
|
||
|
// care about the pivot bit, since the godparents are arranged in a strict
|
||
|
// hierarchy. We thus return the first leaf found which does not care about
|
||
|
// the value of the pivot bit.
|
||
|
while (BIT_OF(MASK(source), pivot_bit) == 1)
|
||
|
{
|
||
|
source = source->GODPARENT;
|
||
|
}
|
||
|
return source;
|
||
|
}
|
||
|
|
||
|
// Eliminate an entire subtree.
|
||
|
//
|
||
|
static void
|
||
|
eliminate(
|
||
|
Rhizome *rhizome,
|
||
|
RhizomeNode *point)
|
||
|
{
|
||
|
RhizomeNode *child;
|
||
|
// Partial recursion removal. The while loop takes the place of one of the
|
||
|
// recursive calls to eliminate(). We eliminate each node and recursively
|
||
|
// eleminate each subtree under the node. We do not eliminate leaves, since
|
||
|
// there is only one copy of each leaf stored in the entire structure.
|
||
|
while (point->pivot_bit < rhizome->keybits)
|
||
|
{
|
||
|
eliminate(rhizome, point->CHILDREN[0]);
|
||
|
child = point->CHILDREN[1];
|
||
|
GpcFreeMem(point, NAT_TAG_RHIZOME);
|
||
|
point = child;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Coalesce leaves of subtree into a linked list and eliminate subtree. This
|
||
|
// routine is called by the destructor so that it can deallocate the leaf nodes
|
||
|
// after the branch nodes are eliminated.
|
||
|
//
|
||
|
static void
|
||
|
coalesce(
|
||
|
Rhizome *rhizome,
|
||
|
RhizomeNode **leaf_list,
|
||
|
RhizomeNode *point)
|
||
|
{
|
||
|
RhizomeNode *child, *godparent;
|
||
|
// Partial recursion removal. This while loop takes the place of one of
|
||
|
// the recursive calls to coalesce(). This performs an inorder traversal.
|
||
|
// We delete each branch node after we have visited it, just as in the
|
||
|
// eliminate() routine.
|
||
|
while (point->pivot_bit < rhizome->keybits && point->pivot_bit >= 0)
|
||
|
{
|
||
|
coalesce(rhizome, leaf_list, point->CHILDREN[0]);
|
||
|
child = point->CHILDREN[1];
|
||
|
GpcFreeMem(point, NAT_TAG_RHIZOME);
|
||
|
point = child;
|
||
|
}
|
||
|
// Once we have found a leaf, we search through the chain of godparents,
|
||
|
// adding to the list each leaf node that is not already in the list.
|
||
|
// A pivot_bit of -1 indicates that the leaf is already in the list.
|
||
|
// If a leaf is in the list, then so are all of its godparents.
|
||
|
while (point != 0 && point->pivot_bit >= 0)
|
||
|
{
|
||
|
godparent = point->GODPARENT;
|
||
|
point->pivot_bit = -1;
|
||
|
point->GODPARENT = *leaf_list;
|
||
|
*leaf_list = point;
|
||
|
point = godparent;
|
||
|
}
|
||
|
}
|