/* Copyright (c) 2008 James Dow Allen. All Rights Reserved; see License. */ /* * This is Tight Hash, a hash table method * which is *very* efficient in both time and space. * NB: It operates with integer keys that fit into 64 * (or 32) bits, and therefore *CAN NOT* be used * (at least without major change) with the many applications * whose keys are variable-lenght strings. * * The size parameters are constants found in a parameter * file -- the fact that they're constants improves speed. * Yes, this will make it more difficult (though not *that* * difficult) to use multiple tables with different size * parameters in a single application. * */ #include #include #include /* We have two unrelated demonstration applications * in this directory; hence the need for the if-else. */ #ifdef WORDTEST #include "parm_tword.h" #else #include "parm_tpeg.h" #endif #include "bitaccess.h" #include "tighthash.h" #include "genhash.h" /* * Here's an overview of Tight Hash Table. It's only very * loosely based on Valmari's "Very Tight Hash Table." * (In fact, essentially none of the following details * are applicable to Valmari's Hash Table.) * * Conceptually, Tight Hash Table is like a simple chained hash table * with linked lists, but we want to minimize overhead * for "pointers" (usually to 0, 2 or 3 bits). * Doing this efficiently gets complicated. * * Each table bucket ('Bgroup') has 8 bits of allocation flags, * M "main" slots, and A+B overflow slots divided into * an "A block" and a "B block". * The main slots are only used for entries that hash to that * bucket. When the main slots are all used up, further * entries are placed into the A or B block in this bucket, * or into the A or B block in a nearby bucket. The eight * bits of allocation flags serve as three pointers to * indicate which blocks follow the M, A and B blocks. * When all "nearby" A and B blocks are used (this is fairly rare), * a (distant) block is selected and a pointer to it is * placed in the last slot of the from-block. This means * *two* slots will be immediately used in the pointed-to * block: one for the new insertion, and one for the old * entry just "bumped" to mke room for the pointer. * When slots aren't large enough to contain a pointer, * "#define SLOTPP 2" will turn on, and *two* entries will * need to be bumped. * The code needs to make conditional branches on these allocation * flags *before* it gets to end-of-block. This makes * it a bit confusing. * * I mentioned elsewhere that Tight Hash supports "stable * element tagging" (useful for space-efficient hash trees). * The bumping (dynamic movement) of entries complicates * but does not prevent this. This "tagging" is not * demonstrated in this source module. * * Normally you will let the 'genparms' executable pick * good choices for {M+A+B, A, B} * ({30, 4, 3} might be typical values) * but there are two cautions: * 1. since a Bgroup is an array of longs (or long longs) * you will have wasted-space (padding) when a bucket's * bits number slightly more than a multiple of 32 (or 64). * 'genparms' warns you of this with a "Bucket utilization" * comment, and you can rerun with a different value of * #Blk (M+A+B). * 2. the "optimal" A,B values output by 'genparms' assume * excellent hash function. Increase A,B slightly if your * hash function is poor (causes more long collision chains). * To achieve max Speed AND max storage utilization AND * portability, it is hard to unionize the data and flags * as we would like. (Using separate arrays would cause * an extra cache miss.) But it is possible: we simply * need an Endianness/compiler definition for a padded * bitfield structure (struct bg2bf). We expect genparms * to create such a structure, than access the bitfield * flags using macro following. */ union bgbf { unsigned long bgbf_b[1]; struct bg2bf bgbf_s[1]; /* embedded inside is unsigned long tmmai:2, tmofa:3, tmofb:3; */ }; #define B_LAST (ALLOSZ(TOTSIZE*BENTRY + 8) - 1) #define FLAGS(tp) (((union bgbf *)((tp)->ddd + B_LAST))->bgbf_s) /* Definitions for tmmai */ #define TMD_EMPTY 0 #define TMD_POINT 1 #define TMD_USEB 2 #define TMD_USEA 3 /* Definitions for tmofb, tmofa */ #define TMB_EMPTY 0 #define TMB_POINT 7 #define TMA_USEB 6 /* We allow deletions but, to keep this sample code simple, * do not reuse deleted space. */ #define DELETED ((Bgroup)1) /* Deletion Marker */ /* Bucket slots are partitioned into Main, A, and B blocks */ #define MBSIZE (TOTSIZE - ABSIZE - BBSIZE) /* Scramble a grid, for good hashing */ #define SCRAMBLE(g) SCRAM(g, IGBITS, REV_FMPY, REV_PRIM) /* Unscramble a scrambled grid */ #define UNSCRAMBLE(g) SCRAM(g, IGBITS, REV_IMPY, REV_PRIM) #define setslot(p, n, v) Jsetslot(p, n, v, BENTRY) #define setkey(p, n, v) Jsetkey(p, n, v, BENTRY, QSIZE) #define setpay(p, n, v) Jsetpay(p, n, v, PSIZE, BENTRY) #define getslot(p, n) Jgetslot(p, n, BENTRY) #define getkey(p, n) Jgetkey(p, n, PSIZE, BENTRY) #define getpay(p, n) Jgetpay(p, n, PSIZE, QSIZE) struct hashtab *makehashtable() { struct hashtab *p; p = malloc(sizeof *p); if (!p) { fprintf(stderr, "Malloc(%d) didn't alloc.\n", sizeof *p); exit(EXIT_MEMORY); } memset(p, 0, sizeof *p); return p; } /* Just a hook in case you want module to * print statistics when application exits. */ void showhashstat(struct hashtab *tabp) { } int getnextg(struct hashtab *tabp, int resetf, Key *gp) { struct tabblock *tp; int tval, tof; Key g; if (resetf) { tabp->pix = -1; nxt_pix: tabp->slotno = 0; do { tabp->pix += 1; if (tabp->pix >= TABSZ01) return 0; tval = getslot(tabp->tb[tabp->pix].ddd, 0); } while (tval == 0); tabp->ofix = tabp->pix; goto normexit; } skipthis: tp = &tabp->tb[tabp->ofix]; tabp->slotno += 1; switch (tabp->slotno) { default: ordinary: tval = getslot(tp->ddd, tabp->slotno); if (tval == 0) goto nxt_pix; normexit: g = tval >> PSIZE; if (g == DELETED) goto skipthis; g = (g - SKIPQUO) * (Key)TABSZ01 + tabp->pix; *gp = UNSCRAMBLE(g); return OLDEN(tval & PMASK); case MBSIZE + ABSIZE - SLOTPP: if (FLAGS(tp)->tmofa != TMB_POINT) goto ordinary; goto lookpoint; case MBSIZE + ABSIZE + BBSIZE - SLOTPP: if (FLAGS(tp)->tmofb != TMB_POINT) goto ordinary; goto lookpoint; case MBSIZE - SLOTPP: if (FLAGS(tp)->tmmai != TMD_POINT) goto ordinary; lookpoint: #if SLOTPP==2 tval = getslot(tp->ddd, tabp->slotno) - 1; tval = tval << BENTRY | getslot(tp->ddd, tabp->slotno + 1); #else tval = getslot(tp->ddd, tabp->slotno) - 1; #endif tabp->slotno = tval & 1 ? MBSIZE + ABSIZE : MBSIZE; tval >>= 1; tabp->ofix = tval; simpexit: tp = &tabp->tb[tabp->ofix]; almordin: tval = getslot(tp->ddd, tabp->slotno); goto normexit; case MBSIZE - 0: switch (FLAGS(tp)->tmmai) { case TMD_EMPTY: goto nxt_pix; case TMD_POINT: fprintf(stderr, "Cant happen 1\n"); exit(EXIT_SOFTWARE); case TMD_USEB: tabp->slotno += ABSIZE; case TMD_USEA: break; } goto almordin; case MBSIZE + ABSIZE - 0: switch (tof = FLAGS(tp)->tmofa) { case TMB_EMPTY: goto nxt_pix; case TMB_POINT: fprintf(stderr, "Cant happen 3\n"); exit(EXIT_SOFTWARE); default: tabp->ofix = tabp->ofix - tof; goto simpexit; case TMA_USEB: goto almordin; } case MBSIZE + ABSIZE + BBSIZE - 0: switch (tof = FLAGS(tp)->tmofb) { case TMB_EMPTY: goto nxt_pix; case TMB_POINT: fprintf(stderr, "Cant happen 8\n"); exit(EXIT_SOFTWARE); default: tabp->ofix = tabp->ofix - tof; tabp->slotno -= BBSIZE; goto simpexit; } } } static long getfree(struct hashtab *tabp, int whichs, int incmode) { long bnum; bnum = tabp->nextavail[whichs] | !!whichs; switch (tabp->passno[whichs]) { default: for (; bnum < TABSZ01; bnum += incmode) { if (! getkey(tabp->tb[bnum].ddd, MBSIZE)) if (! getkey(tabp->tb[bnum].ddd, MBSIZE + ABSIZE)) if (! getkey(tabp->tb[bnum].ddd, MBSIZE - 3)) { reta: tabp->nextavail[whichs] = bnum + incmode; if (incmode == 2) return bnum & ~1; else return bnum << 1; } } tabp->passno[whichs] = 2; bnum = whichs; case 2: for (; bnum < TABSZ01; bnum += incmode) { if (! getkey(tabp->tb[bnum].ddd, MBSIZE)) { goto reta; } } tabp->passno[whichs] = 3; bnum = whichs; case 3: for (; bnum < TABSZ01; bnum += incmode) { if (! getkey(tabp->tb[bnum].ddd, MBSIZE + ABSIZE)) { tabp->nextavail[whichs] = bnum + incmode; if (incmode == 2) return bnum | 1; else return bnum << 1 | 1; } } break; } fprintf(stderr, "Getfree gave up\n"); showhashstat(tabp); exit(EXIT_EXCEED); } int find(struct hashtab *tabp, Key g, int val, hashop hop, int *tagp) { struct tabblock *tp; unsigned int jj, tval, tvalo, ovflag; unsigned int rem; Quoti quo; unsigned long bnum; int this_tmai; int firemo, fijjo; if (val < 0 || val > (int)PMASK) return -1; g = SCRAMBLE(g); rem = g % TABSZ01; quo = g / TABSZ01 + SKIPQUO; tp = &tabp->tb[rem]; for (jj = 0; ; jj++) { tval = getslot(tp->ddd, jj); if (tval == 0) { notfound: if (hop < HCreate) return 0; setnew: setslot(tp->ddd, jj, (Bgroup)quo << PSIZE | val); if (tagp) *tagp = 1 + rem * TOTSIZE + jj; return 0; } if (jj == MBSIZE - SLOTPP) break; if (tval >> PSIZE == quo) { isfound: firemo = rem; fijjo = jj; isxfound: tval &= PMASK; if (tagp) *tagp = 1 + firemo * TOTSIZE + fijjo; switch (hop) { case Hadd: case HCadd: val += tval; if (val > (int)PMASK) goto norep; else goto dorep; case HLrep: case HLmak: if (val >= (int)tval) goto norep; case HArep: case HAmak: dorep: setpay(tp->ddd, jj, val); return OLDEN(tval); case HHrep: case HHmak: if (val > (int)tval) goto dorep; case HLook: norep: return OLDEN(tval); case HDelete: setkey(tp->ddd, jj, DELETED); return OLDEN(tval); default: fprintf(stderr, "Unknown hashop %d\n", hop); exit(EXIT_INVOKE); } } } this_tmai = FLAGS(tp)->tmmai; if (this_tmai == TMD_POINT) goto ptrhandle; if (tval >> PSIZE == quo) goto isfound; #if SLOTPP==2 tvalo = tval; ++jj; tval = getslot(tp->ddd, jj); if (tval >> PSIZE == quo) goto isfound; #endif switch (this_tmai) { default: case TMD_POINT: fprintf(stderr, "Impossible case for this_tmai\n"); exit(EXIT_SOFTWARE); case TMD_EMPTY: if (hop < HCreate) { return 0; } else if (tval == 0) { goto setnew; } else if (getslot(tp->ddd, MBSIZE) == 0) { FLAGS(tp)->tmmai = TMD_USEA; jj = MBSIZE; goto setnew; } else if (getslot(tp->ddd, MBSIZE + ABSIZE) == 0) { FLAGS(tp)->tmmai = TMD_USEB; jj = MBSIZE + ABSIZE; goto setnew; } else { FLAGS(tp)->tmmai = TMD_POINT; jj = MBSIZE; goto comoset; } /* Not reached */ case TMD_USEA: jj += 1; break; case TMD_USEB: jj += ABSIZE + 1; break; } while (1) { tval = getslot(tp->ddd, jj); if (tval == 0) { goto notfound; } else if (jj == MBSIZE + ABSIZE - SLOTPP) { ovflag = FLAGS(tp)->tmofa; } else if (jj == MBSIZE + ABSIZE + BBSIZE - SLOTPP) { ovflag = FLAGS(tp)->tmofb; } else if (tval >> PSIZE == quo) { goto isfound; } else { ppjj: ++jj; continue; } if (ovflag == TMB_POINT) { ptrhandle: firemo = rem; fijjo = jj; tval -= 1; #if SLOTPP==2 tval = tval << BENTRY | getslot(tp->ddd, jj + 1); #endif jj = tval & 1 ? MBSIZE + ABSIZE : MBSIZE; #ifdef HALFADDR rem = (tval & ~1) | rem & 1; #else rem = tval >> 1; #endif tp = &tabp->tb[rem]; tval = getslot(tp->ddd, jj); if (tval >> PSIZE == quo) goto isxfound; else { #if SLOTPP==2 ++jj; tval = getslot(tp->ddd, jj); if (tval == 0) { goto notfound; } else if (tval >> PSIZE == quo) { ++fijjo; goto isxfound; } #endif goto ppjj; } } else if (tval >> PSIZE == quo) { goto isfound; } #if SLOTPP==2 tvalo = tval; ++jj; tval = getslot(tp->ddd, jj); if (tval >> PSIZE == quo) { goto isfound; } #endif ++jj; if (ovflag == TMB_EMPTY) { break; } if (jj == MBSIZE + ABSIZE) { if (ovflag == TMA_USEB) continue; } else { jj = MBSIZE + ABSIZE; } rem -= ovflag; tp -= ovflag; } if (hop < HCreate) { return 0; } else if (tval == 0) { jj -= 1; goto setnew; } #define OSET(n) \ if (((n) == 0 || tp >= tabp->tb + (n)) \ && getslot(tp[-n].ddd, MBSIZE + ABSIZE) == 0) { \ if (jj == MBSIZE + ABSIZE) \ FLAGS(tp)->tmofa = (n) ? (n) : TMA_USEB; \ else \ FLAGS(tp)->tmofb = (n); \ rem -= (n); \ tp -= (n); \ jj = MBSIZE + ABSIZE; \ goto setnew; \ } if (jj == MBSIZE + ABSIZE) { OSET(0); } else { OSET(6); } OSET(5); OSET(4); OSET(3); OSET(2); OSET(1); if (jj == MBSIZE + ABSIZE) { FLAGS(tp)->tmofa = TMB_POINT; } else { FLAGS(tp)->tmofb = TMB_POINT; } comoset: #ifdef HALFADDR bnum = getfree(tabp, rem & 1, 2); #else bnum = getfree(tabp, 0, 1); #endif #if SLOTPP==2 setslot(tp->ddd, jj - 2, (bnum >> BENTRY) + 1); setslot(tp->ddd, jj - 1, bnum & (1 << BENTRY) - 1); #else setslot(tp->ddd, jj - 1, bnum + 1); #endif jj = bnum & 1 ? MBSIZE + ABSIZE : MBSIZE; #ifdef HALFADDR rem = (bnum & ~1) | rem & 1; #else rem = bnum >> 1; #endif tp = &tabp->tb[rem]; #if SLOTPP==2 setslot(tp->ddd, jj, tvalo); setslot(tp->ddd, jj + 1, tval); #else setslot(tp->ddd, jj, tval); #endif jj += SLOTPP; goto setnew; } void makecuckoos(Key n, int qq, int paysz, int mincuck, int maxcuck, int numtabs, struct hashtab **p) { fprintf(stderr, "This isn't the Cuckoo system!\n"); exit(EXIT_SWCONF); } /* Return the maximum number of pointers possible in tree node * (doesn't depend on its argument!) */ int maxppn(struct hashtab *tabp) { return ((1 << QSIZE) - SKIPQUO) / TOTSIZE; }