💾 Archived View for gmi.noulin.net › gitRepositories › perfecthash › file › genHanov.c.gmi captured on 2024-09-29 at 01:15:32. Gemini links have been rewritten to link to archived content
⬅️ Previous capture (2023-01-29)
-=-=-=-=-=-=-
genHanov.c (8309B)
1 #! /usr/bin/env sheepy 2 /* or direct path to sheepy: #! /usr/local/bin/sheepy */ 3 4 /** 5 * simple C implementation of hanov's perfect hash: http://stevehanov.ca/blog/index.php?id=119 6 * Minimal Perfect Hashing - Dr. Amjad M Daoud - http://iswsa.acm.org/mphf/index.html 7 */ 8 9 /* Libsheepy documentation: http://spartatek.se/libsheepy/ */ 10 #include "libsheepyObject.h" 11 #include "shpPackages/crc/crc.h" 12 13 /* enable/disable logging */ 14 /* #undef pLog */ 15 /* #define pLog(...) */ 16 17 const char *type = "hanovt"; 18 const char *perfectHashData = "perfectHash"; 19 const char *keysArray = "keys"; 20 const char *funcScope = "static inline"; 21 const char *hashFunc = "hanovHash"; 22 const char *lookupFunc = "hanovLookup"; 23 const char *findFunc = "hanovFind"; 24 25 #define CFG_FILENAME "hanovConfig.yml" 26 27 // column count in the perfectHashData and keysArray arrays 28 #define colCount 4 29 30 u32 hash(u32 d, const char *str) { 31 if (!d) d = 0x811c9dc5; 32 ret crc32_Buf(d, (void*)str, lenG(str)) & 0x7fffffff; 33 } 34 35 typ struct {smallArrayt *G; u32 size;} hanovt; 36 37 hanovt create(const char **keys) { 38 var size = lenG(keys); 39 createSmallArray(buckets); 40 loop(size) {pushG(&buckets, NULL);} 41 hanovt res; 42 res.G = allocG(rtSmallArrayt); 43 loop(size) {pushG(res.G, NULL);} 44 res.size = size; 45 46 // Place all of the keys into buckets 47 cast(char**, ks, keys); 48 forEachS(ks, key) { 49 var bkey = hash(0, key) % size; 50 var bucket = getG(&buckets, rtSmallArrayt, bkey); 51 if (!bucket) { 52 bucket = allocG(bucket); 53 } 54 pushG(bucket, key); 55 setNFreePG(&buckets, bkey, bucket); 56 } 57 58 int bucketCmp(const void *a, const void *b) { 59 smallArrayt *A = (smallArrayt*)a; 60 smallArrayt *B = (smallArrayt*)b; 61 size_t la,lb; 62 if (!A or !isOSmallArray(A)) la = 0; else la = lenG(A); 63 if (!B or !isOSmallArray(B)) lb = 0; else lb = lenG(B); 64 ret lb - la; 65 } 66 67 sortFG(&buckets, bucketCmp); 68 69 staticBitsetT(usedSlotst, u64 , size); 70 usedSlotst usedSlots; 71 staticBitsetClear(&usedSlots); 72 73 size_t b; 74 for(b = 0; b < size; b++) { 75 var bucket = getG(&buckets, rtSmallArrayt, b); 76 if (!bucket) continue; 77 78 if(lenG(bucket) <= 1) { finishG(bucket); break;} 79 80 u32 d = 1; size_t item = 0; u32 slots[size]; u32 slot; bool used[size]; 81 indexer slotsi; 82 indexerInit(slotsi, size); 83 range(i, size) used[i] = false; 84 85 // Repeatedly try different values of d until we find a hash function 86 // that places all items in the bucket into free slots 87 while(item < lenG(bucket)) { 88 slot = hash(d, getG(bucket, rtChar, item)) % size; 89 // need hasObjectG 90 if(staticBitsetGet(&usedSlots, slot) || used[slot]) { 91 d++; 92 item = 0; 93 indexerInit(slotsi, size); 94 range(i, size) used[i] = false; 95 } else { 96 used[slot] = true; 97 indexerPush(slotsi); 98 slots[slotsi.last] = slot; 99 item++; 100 } 101 } 102 103 setG(res.G, hash(0, getG(bucket, rtChar, 0)) % size, d); 104 range(i, lenG(bucket)) { 105 staticBitset1(&usedSlots, slots[i]); 106 } 107 setNFreePG(&buckets, b, bucket); 108 } 109 110 // Only buckets with 1 item remain. Process them more quickly by directly 111 // placing them into a free slot. Use a negative value of d to indicate 112 // this. 113 114 createSmallArray(freelist); 115 range(i, size) { 116 if (!staticBitsetGet(&usedSlots, i)) { 117 pushG(&freelist, i); 118 } 119 } 120 121 for(; b < size; b++ ) { 122 var bucket = getG(&buckets, rtSmallArrayt, b); 123 if (!bucket) break; 124 var slot = popG(&freelist, rtU32); 125 126 // We subtract one to ensure it's negative even if the zeroeth slot was used. 127 setG(res.G, hash(0, getG(bucket, rtChar, 0)) % size, 0-slot-1); 128 setNFreePG(&buckets, b, bucket); 129 } 130 131 freeG(&freelist); 132 freeG(&buckets); 133 134 ret res; 135 } 136 137 u32 lookup(hanovt *GV, const char *key) { 138 var d = getG(GV->G, rtI32, hash(0, key) % lenG(GV->G)); 139 if (d < 0) ret 0-d-1; 140 ret hash(d, key) % GV->size; 141 } 142 143 void freeHanov(hanovt *h) { 144 terminateG(h->G); 145 } 146 147 int main(int ARGC, char** ARGV) { 148 149 initLibsheepy(ARGV[0]); 150 setLogMode(LOG_CONCISE); 151 setLogSymbols(LOG_UTF8); 152 153 char **vector = NULL; 154 155 if (ARGC < 2) { 156 logE("key list missing"); 157 logP("\n"BLD GRN"Usage:"RST); 158 puts(BLD "genHanov keyFileList\n\n" RST 159 "Optional configuration file: "CFG_FILENAME"\n" 160 "(located in directory where genHanov runs)\n\n" 161 "---\n" 162 " type: hanovt\n" 163 " perfectHashData: perfectHash\n" 164 " keysArray: keys\n" 165 " funcScope: static inline\n" 166 " hashFunc: hanovHash\n" 167 " lookupFunc: hanovLookup\n" 168 " findFunc: hanovFind" 169 ); 170 XFAILURE; 171 } 172 173 vector = readFileG(&vector, ARGV[1]); 174 175 if (!vector) { 176 logE("Couldn't key list file "BLD RED"'%s'"RST, ARGV[1]); 177 XFAILURE; 178 } 179 180 var han = create((const char**)vector); 181 182 // load configuration 183 createSmallJson(cfg); 184 if (fileExists(CFG_FILENAME)) { 185 readFileG(&cfg, CFG_FILENAME); 186 type = (const char *) getG(&cfg, rtChar, "type"); 187 perfectHashData = (const char *) getG(&cfg, rtChar, "perfectHashData"); 188 keysArray = (const char *) getG(&cfg, rtChar, "keysArray"); 189 funcScope = (const char *) getG(&cfg, rtChar, "funcScope"); 190 hashFunc = (const char *) getG(&cfg, rtChar, "hashFunc"); 191 lookupFunc = (const char *) getG(&cfg, rtChar, "lookupFunc"); 192 findFunc = (const char *) getG(&cfg, rtChar, "findFunc"); 193 } 194 195 // print minimal perfect hash function C code 196 197 char *dataDecl = formatS("const %s %s = {{", type, perfectHashData); 198 var ts = getCurrentDateYMD(); 199 200 printf("// generated by genHanov.c from perfecthash sheepy package at %s\n\n" 201 "#include "_"libsheepyObject.h"_"\n" 202 "#include "_"shpPackages/crc/crc.h"_"\n\n" 203 "typ struct { u32 G[%zu]; u32 size;} %s;\n\n" 204 "%s", 205 ts, 206 lenG(han.G), type, 207 dataDecl); 208 free(ts); 209 210 size_t dataDeclLen = lenG(dataDecl) -1; 211 free(dataDecl); 212 213 char *comma[] = {"", ","}; 214 215 indexer cols; 216 indexerInit(cols, colCount); 217 218 iter(han.G, D) { 219 cast(smallIntt*,d,D); 220 if (isOSmallInt(D)) { 221 printf("%s%u", comma[iterIndexG(han.G) ? 1 : 0], getValG(d)); 222 } 223 else printf("%s0", comma[iterIndexG(han.G) ? 1 : 0]); 224 indexerPush(cols); 225 if (indexerLast(cols) == cols.maxCount -1) printf("\n%*c",dataDeclLen,' '); 226 } 227 228 printf("}, %u};\n\n", han.size); 229 230 staticBitsetT(usedSlotst, u64 , lenG(vector)); 231 usedSlotst usedSlots; 232 staticBitsetClear(&usedSlots); 233 234 char *keys[han.size]; 235 236 forEachS(vector, key) { 237 var o = lookup(&han, key); 238 if (staticBitsetGet(&usedSlots, o)) { 239 logE("Collision: hash %u", o); 240 XFAILURE; 241 } 242 keys[o] = key; 243 /* logVarG(key); */ 244 /* logVarG(o); */ 245 staticBitset1(&usedSlots, o); 246 } 247 248 char *keysDecl = formatS("const char *%s[%u] = {", keysArray, han.size); 249 printf("%s", keysDecl); 250 251 dataDeclLen = lenG(keysDecl) -1; 252 free(keysDecl); 253 254 indexerInit(cols, colCount); 255 256 range(i, han.size) { 257 printf("%s"_"%s"_, comma[i ? 1 : 0], keys[i]); 258 indexerPush(cols); 259 if (indexerLast(cols) == cols.maxCount -1) printf("\n%*c",dataDeclLen,' '); 260 } 261 262 printf("};\n\n"); 263 264 printf("%s u32 %s(u32 d, const char *str) {\n" 265 " if (!d) d = 0x811c9dc5;\n" 266 " ret crc32_Buf(d, (void*)str, lenG(str)) & 0x7fffffff;\n" 267 "}\n\n" 268 "%s u32 %s(const %s *GV, const char *key) {\n" 269 " var d = (i32)GV->G[%s(0, key) %% ARRAY_SIZE(GV->G)];\n" 270 " if (d < 0) ret 0-d-1;\n" 271 " ret %s(d, key) %% GV->size;\n" 272 "}\n\n" 273 "// lookup and return -1 when key is not found\n" 274 "%s ssize_t %s(const %s *GV, const char *key) {\n" 275 " var idx = %s(GV, key);\n\n" 276 " if (eqG(key, %s[idx])) ret idx;\n" 277 " else ret -1; // key is not found\n" 278 "}\n\n" 279 "// Loopup: u32 idx = %s(&%s, "_"key"_");\n" 280 "// Find key: u32 idx = %s(&%s, "_"key"_");", 281 funcScope, hashFunc, 282 funcScope, lookupFunc, type, 283 hashFunc, 284 hashFunc, 285 funcScope, findFunc, type, 286 lookupFunc, 287 keysArray, 288 lookupFunc, perfectHashData, 289 findFunc, perfectHashData); 290 291 freeG(&cfg); 292 freeG(vector); 293 freeHanov(&han); 294 } 295 // vim: set expandtab ts=2 sw=2: