:::::::::::::: skiplist.c :::::::::::::: /* Skiplist module -- Warwick */ /* ------------------------------------------------------------------------- *\ Skiplists, also known as self-balancing trees, have most of the advantages of normal trees, but are a little more efficient with memory, and have considerably better deletion properties. Insertion is about the same. In fact, you are able to 'tune' skiplists to particular situations with the 'attrition rate' parameter. This determines the memory/speed tradeoff. A 'Header' consists essentially of a vector of pointers. The topmost [0] pointer leads to a conventional sorted linked list of nodes. The second, however leads to a list of some of the items in the second list, which in addition to the linked list 'next' have a 'next[1]' pointer, forming the second level list. A third level has some of the items in the second list, and so on. An item in the first level only has one 'next' pointer. An item in the second level has two 'next's. An item in the third level has three 'next's. and so on. Searching requires a look along the deepest level, until the item is found and returned, or something later than the item is found, when the previous item is taken, and its link to the next level is used to search for the item, and so on, until the item is either found, or the plain linked list is exhausted. Adding is a similar process, but depending on whether duplicates are allowed, the addition will either fail, or a set of pointers to the deepest level will be gained. The order of the future item is then randomly determined, and it is slipped into the lists it will need to span. Deletion is simply a matter of finding the item and tying the pointers around it, before freeing the item. * ------------------------------------------------------------------------- * Skipnode arrays have the simple chain in [0] No node-depth is kept since it can be inferred from the header. Attrition rate specifies the approximate ratio of items on one level to items on the lower level. Ie, a rate of '2' will approximate to a binary tree, 3 to a ternary tree. Fractional rates are possible but unimplemented. When the item count of the top level exceeds the attrition rate, a new level will be added. No level removal occurs at present, although this may be added later. \* ------------------------------------------------------------------------- */ #include /* For rand() only, I think. */ #include /* For memcpy. */ #ifdef AMIGA #include #include #else #define AllocMem(a,b) calloc(a) #define FreeMem(a,b) free(a) #define G(a) {} #endif /*--------------------------------------------------------------------------*/ typedef struct SKIPNODE { void *data; /* Pointer to list-item. */ struct SKIPNODE *vec[0]; /* Arbitrary depth vector */ } SKIPNODE; typedef struct SKIPHEADER { int (*cmp)(void *, void *); /* Comparison function */ unsigned char attrition; /* Attrition rate */ unsigned char allocdepth; /* Available max index of vector */ unsigned char depth; /* Used max depth of vector */ /* NB. vec[depth] and vec[allocdepth] are vaild */ /* ie, they count from 0. */ SKIPNODE *initvec; /* Pointer to 'null' item of full depth. */ long *count; /* Count of items in each list. */ } SKIPHEADER; #define unless(a) if(!(a)) /*----------------------------------------------------------------------------*/ extern void skip_print( SKIPHEADER *head, void (*printf)(char *,...), char *(*node_to_string)(void*) ) { int depth, level; if ( !head ) { printf("skip_print: Null 'head' supplied.\n"); return; } depth = head->depth; printf("\nHeader. Attrn: %ld AllocD: %ld Depth: %ld\n\n", head->attrition, head->allocdepth, head->depth); for ( level=0; level<=depth; level++ ) { SKIPNODE *base = head->initvec->vec[0], *chain = head->initvec->vec[level]; printf("%3ld(%3ld) :",level,head->count[level]); while ( chain ) { char *buf; if ( node_to_string ) buf=DataToStr(base->data); else buf=base->data; if ( chain == base ) { printf("-->%s",buf); /* Print string at head */ chain=chain->vec[level]; } else { printf("---"); while (*buf++) printf("-"); } base=base->vec[0]; } printf("--|\n"); } printf("Done.\n"); } /*----------------------------------------------------------------------------*/ static void free_skip_node ( SKIPNODE *node, int deep ) { if ( node ) FreeMem( node, sizeof(SKIPNODE) + (deep+1)*sizeof(SKIPNODE*)); } static void free_skip_count ( long *count, int deep ) { if ( count ) FreeMem( count, (deep+1)*sizeof(long)); } static SKIPNODE *new_skip_node ( int deep ) { return AllocMem( sizeof(SKIPNODE) + (deep+1)*sizeof(SKIPNODE*), MEMF_CLEAR); } static long *new_skip_count ( int deep ) { return AllocMem( (deep+1)*sizeof(long), MEMF_CLEAR); } /*--------------------------------------------------*/ extern void free_skip_head ( SKIPHEADER *head ) { G(bug("free_skip_head: Entry\n")); if ( head ) { if ( head->initvec ) { free_skip_node( head->initvec, head->allocdepth ); head->initvec = NULL; } if ( head->count ) { free_skip_count( head->count, head->allocdepth ); head->count = NULL; } FreeMem( head, sizeof(SKIPHEADER)); } G(bug("free_skip_head: Entry\n")); } /* Trivial default for cmp==NULL: first pointer in node is a string */ static int default_cmp ( void *a, void *b ) { return strcmp(*(char **)a, *(char **)b); } extern SKIPHEADER *new_skip_head( int (*cmp)(void*,void*) , int attrition ) { SKIPHEADER *head; G(bug("new_skip_head: Entry\n")); unless( head=AllocMem( sizeof(SKIPHEADER) , MEMF_CLEAR ) ) return NULL; head->cmp = (cmp?cmp:&default_cmp); head->attrition = attrition; head->allocdepth = 5; /* Initial allocated depth */ head->depth = 0; unless( head->initvec = new_skip_node(head->allocdepth) ) { free_skip_head(head); return NULL; } unless( head->count = new_skip_count(head->allocdepth) ) { free_skip_head(head); return NULL; } G(bug("new_skip_head: ExitOK\n")); return head; } /*----------------------------------------------------------------------------*/ /* Adds a few more levels to the initial header vectors for SKIPNODEs and * counts. It may be possible to reduce too, but in practice all I'd do * is decrement head->depth. */ #define LEVEL_INC 10 static void skip_inc_depth ( SKIPHEADER *head ) { SKIPNODE *nuvec; long *nucount, depth=head->allocdepth; if ( head->depth != depth ) { head->depth++; return; } unless( nuvec = new_skip_node( depth+LEVEL_INC ) ) return; unless( nucount = new_skip_count( depth+LEVEL_INC ) ) { free_skip_node( nuvec, depth+LEVEL_INC ); return; } G(bug("got new vec and count.\n")); memcpy(nuvec, head->initvec, sizeof(SKIPNODE) + (depth+1)*sizeof(SKIPNODE*)); memcpy(nucount, head->count, (depth+1)*sizeof(long)); G(bug("copying done.\n")); free_skip_node( head->initvec, depth ); free_skip_count( head->count, depth ); G(bug("Freeing done.\n")); head->initvec = nuvec; head->count = nucount; head->allocdepth=depth+LEVEL_INC; head->depth++; G(bug("All done.\n")); } /*----------------------------------------------------------------------------*/ /* skip_seek builds an insertion vector 'deep' items deep for the given item, * which may or may not exist. * * On return, the vector points to the SKIPNODEs preceeding the proposed or * existing site, ready for removal/insertion. The data entry is NULL. * Returns NULL if it couldn't make vec, or it couldn't find the item. * Examine vec if clarification is required. * * Removal MUST request a head->depth-deep vector. * * Note that if a precise match is found, that item will be sought by address * in lower lists, to ensure that items with duplicates are correctly removed. */ static void *skip_seek( void * find, SKIPNODE **vec, int deep, SKIPHEADER *head ) { SKIPNODE *node = head->initvec, *back = head->initvec; int depth = head->depth, cmp; unless( *vec=new_skip_node( deep ) ) return NULL; G(bug("skip_seek: Entry\n")); while ( depth>=0 ) { if ( depth <= deep ) (*vec)->vec[depth]=back; if ( node=back->vec[depth] ) { cmp = head->cmp(find,node->data); if ( cmp < 0 ) depth--; /* No further match possible */ else if ( cmp > 0 ) back=node; /* Match is later on (if it exists) */ else { /* Found a precise match. Must match this particular item now. */ /* But we know it exists, so we needn't use the cmp() function. */ SKIPNODE *hit=node; G(bug("skip_seek: Exit found. node=%lx. Completing\n",node)); depth--; while ( depth>=0 ) { if ( depth <= deep ) (*vec)->vec[depth]=back; if ( node=back->vec[depth] ) { if ( node == hit ) depth--; /* No further match possible */ else back=node; /* Match is later on (if it exists) */ } else depth--; } G(bug("skip_seek: Exit found. node=%lx\n",node)); return node->data; } } else depth--; } G(bug("skip_seek: Exit notfound\n")); return NULL; } /*----------------------------------------------------------------------------*/ extern void *skip_find( void * find, SKIPHEADER *head ) { if ( head ) { SKIPNODE *node = head->initvec, *back = head->initvec; int depth = head->depth; while ( depth>=0 ) if ( node=back->vec[depth] ) { signed char cmp = head->cmp(find,node->data); if ( cmp < 0 ) depth--; /* No match possible on this level */ else if ( cmp > 0 ) back=node; /* Match is later on (if it exists) */ else return node->data; /* Found a match! */ } else depth--; } return NULL; } /*------------------------------------------------------------*/ /* skip_add adds a vector into the skiplist and points it to data. * Data is returned, unless: * DUPS_DATA and the actual entry for addition is already in there ==NULL * DUPS_NONE and the actual entry for addition is already in there ==NULL * DUPS_NONE and the value of the entry for addition is already in there. * NULL also indicates insufficient memory * * BUG: There's no way to distinguish an actual match from a memory problem, * apart from looking for it. This shouldn't be a problem in practice. */ #define DUPS_ANY 0 #define DUPS_DATA 1 #define DUPS_NONE 2 static void *skip_add_int( void *data, int dups, SKIPHEADER *head ) { SKIPNODE *vec; void *found; int deep=0; for ( ; deep<=head->depth && ((rand()>>16)/91)%head->attrition==0; deep++); if ( deep>head->depth ) deep=0; /* not too much in top level. */ found=skip_seek( data, &vec, deep, head ); if ( !vec ) { return NULL; } if ( dups ) { if ( found==data ) { free_skip_node( vec, deep ); return NULL; } if ( dups == DUPS_NONE ) if ( found ) { free_skip_node( vec, deep ); return found; } } G(bug(" head->count[head->depth %ld] %ld > head->attrition %ld\n", head->depth,head->count[head->depth],head->attrition)); if ( head->count[head->depth] > head->attrition ) skip_inc_depth( head ); G( int i; for ( i=0; i<=deep; i++ ) bug("vec[%ld] == %lx\n",i,vec->vec[i]); ) vec->data=data; for ( ; deep>=0 ; deep-- ) { SKIPNODE *pt=vec->vec[deep]; G(bug("Making %lx[%ld] --> %lx[%ld] --> %lx\n", pt, deep, vec, deep, pt->vec[deep])) vec->vec[deep]=pt->vec[deep]; pt->vec[deep]=vec; (head->count[deep])++; } return data; } /*---------------------*/ /* Add item, duplicate data not allowed. */ extern void *skip_add ( void *data, SKIPHEADER *head ) { return skip_add_int( data, DUPS_NONE, head ); } /* Add item, duplicate data values allowed, but not duplicate items. */ extern void *skip_insert ( void *data, SKIPHEADER *head ) { return skip_add_int( data, DUPS_DATA, head ); } /* Add item, no restrictions on data added. */ extern void *skip_jam ( void *data, SKIPHEADER *head ) { return skip_add_int( data, DUPS_ANY, head ); } /*---------------------*/ /* skip_remove removes a vector from the skiplist. * Returns data or NULL if unfound. */ extern void *skip_remove( void *data, SKIPHEADER *head ) { SKIPNODE *vec, *me; void *found; int deep=head->depth, medeep=0; G(bug("skip_remove: Entry\n")); found=skip_seek( data, &vec, deep, head ); if ( !found ) { free_skip_node( vec, deep ); G(bug("skip_remove: BadExit\n")); return found; } G( int i; for ( i=0; i<=deep; i++ ) bug("vec[%ld] == %lx\n",i,vec->vec[i]); ) me=vec->vec[0]->vec[0]; for ( ; deep>=0 ; deep-- ) { SKIPNODE *pt=vec->vec[deep]; G(bug("Making %lx[%ld] -X-> %lx[%ld] --> %lx\n", pt, deep, vec, deep, pt->vec[deep])) if ( pt ) if ( pt->vec[deep] == me ) { if ( !medeep ) medeep=deep; pt->vec[deep] = me->vec[deep]; (head->count[deep])--; } } free_skip_node( vec, head->depth ); free_skip_node( me, medeep ); G(bug("skip_remove: GoodExit\n")); return found; } /*------------------------------------------------------------*/ /* Destroys a whole list quickly. * Takes a list header and a destructor for the *data pointer. */ extern void skip_destruct ( void (*kill)(void *), SKIPHEADER *head ) { SKIPNODE *pt = head->initvec, *pt2 = pt->vec[0]; int depth = head->depth, deep; while (pt2) { if ( kill ) kill(pt2->data); /* That's the easy bit... :3 */ for ( deep = 0; deep<=depth && pt2 == pt->vec[deep]; deep++ ) pt->vec[deep]=pt->vec[deep]->vec[deep]; free_skip_node( pt2, deep-1 ); pt2=pt->vec[0]; } free_skip_head( head ); } /*------------------------------------------------------------*/ /* Pointer-to pointer-to is pointer to SKIPNODE, * first entry of which is pointer to data. ANSI says this is safe. * * Returns pointer-to pointer-to the data entry. NULL for empty. */ extern void **skip_first ( SKIPHEADER *head ) { return head?&(head->initvec->vec[0]->data):(void**)0; /* Cast is wrong, SAS RQs it. */ } /* Returns pointer-to pointer-to the next data entry from value from * skip_first or skip_next. NULL for end-of-list. */ extern void **skip_next ( void **node ) { if ( !node || !((SKIPNODE*)node)->vec) return NULL; return &(((SKIPNODE*)node)->vec[0]->data); } /*------------------------------------------------------------*/ :::::::::::::: skiplist.h :::::::::::::: /* Skiplist module -- Warwick */ /* ------------------------------------------------------------------------- *\ Skiplists, also known as self-balancing trees, have most of the advantages of normal trees, but are a little more efficient with memory, and have considerably better deletion properties. Insertion is about the same. In fact, you are able to 'tune' skiplists to particular situations with the 'attrition rate' parameter. This determines the memory/speed tradeoff. A 'Header' consists essentially of a vector of pointers. The topmost [0] pointer leads to a conventional sorted linked list of nodes. The second, however leads to a list of some of the items in the first list, which in addition to the linked list 'next' have a 'next[1]' pointer, forming the second level list. A third level has some of the items in the second list, and so on. An item in the first level only has one 'next' pointer. An item in the second level has two 'next's. An item in the third level has three 'next's. and so on. A parameter called 'attrition rate' determines how likely it is that an item will be added just to the simple chain (most likely), or to higher chains. A value of n approximates an n-ary tree. Searching requires a look along the deepest level, until the item is found and returned, or something later than the item is found, when the previous item is taken, and its link to the nexl level is used to search for the item, and so on, until the item is either found, or the first level list is exhausted. Adding is a similar process, but depending on whether duplicates are allowed, the addition will either fail, or a set of pointers to the deepest level will be gained. The order of the future item is then randomly determined, and it is slipped into the lists it will need to span. Deletion is simply a matter of finding the item and tying the pointers around it, before freeing the item. * ------------------------------------------------------------------------- * Hopefully, use is fairly clear. Inserting items in multiple lists is similarly simple, although clearly, skip_destruct should only be given destructors for data not previously deallocated. int Mycmp ( void *a, void *b ) { return strcmp(((MYNODE)a)->name,((MYNODE)b)->name); } char *MyShow ( void *a ) { return ((MYNODE)a)->name; } void MyKiller( void *a ) { if (a) free(((MYNODE)a)->name), free(a); } main () { SKIPHEADER *MyList=new_skip_head( Mycmp, 4); MYNODE *node=0; while ( getitem(&node) ) printf( skip_add(node,MyList) ? node=0,"Added.\n" : "Not added.\n" ); skip_print( MyList, printf, MyShow ); while ( getitem(&node) ) printf( skip_find(node,MyList) ? "Found that.\n" : "Not found.\n" ); skip_destruct( MyKiller, MyList ); } \* ------------------------------------------------------------------------- */ /* SKIPHEADER is the 'handle' on the list. */ typedef struct SKIPHEADER SKIPHEADER; /* skip_error is set after most functions. */ enum { SKIP_NOERROR=0, /* No worries. */ SKIP_NOMEM, /* malloc failed during insertion. */ SKIP_SAMEVALUE, SKIP_SAMEITEM, /* Insertion found duplicate. */ SKIP_BADPARAMS, /* A function was given null pointers */ SKIP_CORRUPT /* An insert/remove went sadly wrong */ }; extern char skip_error; /*--------------------------------------------------------------------------*/ /* skip_print is a slightly specialised routine, mainly for debugging. * It shows the internal structure of the skiplist. You should supply * a header, a printf-like routine for printing, and a function which * returns a pointer to a static string representation of the data * sent to it. * * If NULL, node_to_string assumes that the first item in *data is a pointer to * a null-terminated string. */ extern void skip_print( SKIPHEADER *head, void (*printf)(char *,...), char *(*node_to_string)(void*) ); /*--------------------------------------------------------------------------*/ /* Creator of SKIPHEADERs. You shall provide a comparison * function, much as required by qsort() in stdlib.h. * * The attrition rate gives the approximate order of tree to which * the list aspires. */ extern SKIPHEADER *new_skip_head( int (*cmp)(void*,void*) , int attrition ); /*--------------------------------------------------------------------------*/ /* skip_destruct destroys a whole list, including header, quickly. * Takes a list header and a destructor for the *data pointer. * If kill is NULL, it is assumed that the data needs no deallocation. * (For lists sharing items, for instance) */ extern void skip_destruct ( void (*kill)(void *), SKIPHEADER *head ); /* You should almost certainly use skip_destruct() rather than free_skip_head() * since skip_destruct() kills both the header and any items in the list * if non-empty. It is provided mainly for symmetry with new_skip_head(). */ extern void free_skip_head ( SKIPHEADER *head ); /*--------------------------------------------------------------------------*/ /* Search for a match of 'find' in the list. If found, return a pointer to * the entry. Otherwise, return NULL. */ extern void *skip_find( void * find, SKIPHEADER *head ); /* skip_add: Add item, duplicate data-values and data-items not allowed. * Returns extant data, (SKIP_SAMEVALUE) * added data or (skip_error unchanged) * NULL. (SKIP_NOMEM or SKIP_SAMEITEM) */ /* skip_insert: Add item, duplicate data-values allowed, but not data-items. * Returns added data or NULL (SKIP_NOMEM or SKIP_SAMEITEM). */ /* skip_jam: Add item, no restrictions on data added. * Returns added data or NULL (SKIP_NOMEM). */ extern void *skip_add ( void *data, SKIPHEADER *head ); extern void *skip_insert ( void *data, SKIPHEADER *head ); extern void *skip_jam ( void *data, SKIPHEADER *head ); /*--------------------------------------------------------------------------*/ /* skip_remove removes a vector from the skiplist. * Returns data for free()ing (or whatever), or NULL if unfound. */ extern void *skip_remove( void *data, SKIPHEADER *head ); /*--------------------------------------------------------------------------*/ /* Ordered-list interrogation routines: * * Both return a pointer to a structure, the first item of which is a * pointer to the data item. Both return NULL for end-of-list or NULL. * * You may record returned values if you wish, but be certain that the item is * still validly in the list before subsequent use with skip_next(). * * An item's position in the list will always be current and sorted. * * skip_next() should be given a value returned by skip_first() or previous * skip_next()s. */ extern void **skip_first ( SKIPHEADER *head ); extern void **skip_next ( void **node );