nfs-ganesha 1.4
|
00001 /* 00002 ------------------------------------------------------------------------------- 00003 lookup3.c, by Bob Jenkins, May 2006, Public Domain. 00004 00005 These are functions for producing 32-bit hashes for hash table lookup. 00006 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() 00007 are externally useful functions. Routines to test the hash are included 00008 if SELF_TEST is defined. You can use this free for any purpose. It's in 00009 the public domain. It has no warranty. 00010 00011 You probably want to use hashlittle(). hashlittle() and hashbig() 00012 hash byte arrays. hashlittle() is is faster than hashbig() on 00013 little-endian machines. Intel and AMD are little-endian machines. 00014 On second thought, you probably want hashlittle2(), which is identical to 00015 hashlittle() except it returns two 32-bit hashes for the price of one. 00016 You could implement hashbig2() if you wanted but I haven't bothered here. 00017 00018 If you want to find a hash of, say, exactly 7 integers, do 00019 a = i1; b = i2; c = i3; 00020 mix(a,b,c); 00021 a += i4; b += i5; c += i6; 00022 mix(a,b,c); 00023 a += i7; 00024 final(a,b,c); 00025 then use c as the hash value. If you have a variable length array of 00026 4-byte integers to hash, use hashword(). If you have a byte array (like 00027 a character string), use hashlittle(). If you have several byte arrays, or 00028 a mix of things, see the comments above hashlittle(). 00029 00030 Why is this so big? I read 12 bytes at a time into 3 4-byte integers, 00031 then mix those integers. This is fast (you can do a lot more thorough 00032 mixing with 12*3 instructions on 3 integers than you can with 3 instructions 00033 on 1 byte), but shoehorning those bytes into integers efficiently is messy. 00034 ------------------------------------------------------------------------------- 00035 */ 00036 #define SELF_TEST 1 00037 00038 #include <stdio.h> /* defines printf for tests */ 00039 #include <time.h> /* defines time_t for timings in the test */ 00040 #include <stdint.h> /* defines uint32_t etc */ 00041 #include <sys/param.h> /* attempt to define endianness */ 00042 #ifdef linux 00043 # include <endian.h> /* attempt to define endianness */ 00044 #endif 00045 00046 #include <inttypes.h> 00047 #include "config.h" 00048 00049 /* 00050 * My best guess at if you are big-endian or little-endian. This may 00051 * need adjustment. 00052 */ 00053 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \ 00054 __BYTE_ORDER == __LITTLE_ENDIAN) || \ 00055 (defined(i386) || defined(__i386__) || defined(__i486__) || \ 00056 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL)) 00057 # define HASH_LITTLE_ENDIAN 1 00058 # define HASH_BIG_ENDIAN 0 00059 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \ 00060 __BYTE_ORDER == __BIG_ENDIAN) || \ 00061 (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel)) 00062 # define HASH_LITTLE_ENDIAN 0 00063 # define HASH_BIG_ENDIAN 1 00064 #else 00065 # define HASH_LITTLE_ENDIAN 0 00066 # define HASH_BIG_ENDIAN 0 00067 #endif 00068 00069 #define hashsize(n) ((uint32_t)1<<(n)) 00070 #define hashmask(n) (hashsize(n)-1) 00071 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k)))) 00072 00073 /* 00074 ------------------------------------------------------------------------------- 00075 mix -- mix 3 32-bit values reversibly. 00076 00077 This is reversible, so any information in (a,b,c) before mix() is 00078 still in (a,b,c) after mix(). 00079 00080 If four pairs of (a,b,c) inputs are run through mix(), or through 00081 mix() in reverse, there are at least 32 bits of the output that 00082 are sometimes the same for one pair and different for another pair. 00083 This was tested for: 00084 * pairs that differed by one bit, by two bits, in any combination 00085 of top bits of (a,b,c), or in any combination of bottom bits of 00086 (a,b,c). 00087 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed 00088 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as 00089 is commonly produced by subtraction) look like a single 1-bit 00090 difference. 00091 * the base values were pseudorandom, all zero but one bit set, or 00092 all zero plus a counter that starts at zero. 00093 00094 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that 00095 satisfy this are 00096 4 6 8 16 19 4 00097 9 15 3 18 27 15 00098 14 9 3 7 17 3 00099 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing 00100 for "differ" defined as + with a one-bit base and a two-bit delta. I 00101 used http://burtleburtle.net/bob/hash/avalanche.html to choose 00102 the operations, constants, and arrangements of the variables. 00103 00104 This does not achieve avalanche. There are input bits of (a,b,c) 00105 that fail to affect some output bits of (a,b,c), especially of a. The 00106 most thoroughly mixed value is c, but it doesn't really even achieve 00107 avalanche in c. 00108 00109 This allows some parallelism. Read-after-writes are good at doubling 00110 the number of bits affected, so the goal of mixing pulls in the opposite 00111 direction as the goal of parallelism. I did what I could. Rotates 00112 seem to cost as much as shifts on every machine I could lay my hands 00113 on, and rotates are much kinder to the top and bottom bits, so I used 00114 rotates. 00115 ------------------------------------------------------------------------------- 00116 */ 00117 #define mix(a,b,c) \ 00118 { \ 00119 a -= c; a ^= rot(c, 4); c += b; \ 00120 b -= a; b ^= rot(a, 6); a += c; \ 00121 c -= b; c ^= rot(b, 8); b += a; \ 00122 a -= c; a ^= rot(c,16); c += b; \ 00123 b -= a; b ^= rot(a,19); a += c; \ 00124 c -= b; c ^= rot(b, 4); b += a; \ 00125 } 00126 00127 /* 00128 ------------------------------------------------------------------------------- 00129 final -- final mixing of 3 32-bit values (a,b,c) into c 00130 00131 Pairs of (a,b,c) values differing in only a few bits will usually 00132 produce values of c that look totally different. This was tested for 00133 * pairs that differed by one bit, by two bits, in any combination 00134 of top bits of (a,b,c), or in any combination of bottom bits of 00135 (a,b,c). 00136 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed 00137 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as 00138 is commonly produced by subtraction) look like a single 1-bit 00139 difference. 00140 * the base values were pseudorandom, all zero but one bit set, or 00141 all zero plus a counter that starts at zero. 00142 00143 These constants passed: 00144 14 11 25 16 4 14 24 00145 12 14 25 16 4 14 24 00146 and these came close: 00147 4 8 15 26 3 22 24 00148 10 8 15 26 3 22 24 00149 11 8 15 26 3 22 24 00150 ------------------------------------------------------------------------------- 00151 */ 00152 #define final(a,b,c) \ 00153 { \ 00154 c ^= b; c -= rot(b,14); \ 00155 a ^= c; a -= rot(c,11); \ 00156 b ^= a; b -= rot(a,25); \ 00157 c ^= b; c -= rot(b,16); \ 00158 a ^= c; a -= rot(c,4); \ 00159 b ^= a; b -= rot(a,14); \ 00160 c ^= b; c -= rot(b,24); \ 00161 } 00162 00163 #ifndef LITTLEEND 00164 00165 /* 00166 -------------------------------------------------------------------- 00167 This works on all machines. To be useful, it requires 00168 -- that the key be an array of uint32_t's, and 00169 -- that the length be the number of uint32_t's in the key 00170 00171 The function hashword() is identical to hashlittle() on little-endian 00172 machines, and identical to hashbig() on big-endian machines, 00173 except that the length has to be measured in uint32_ts rather than in 00174 bytes. hashlittle() is more complicated than hashword() only because 00175 hashlittle() has to dance around fitting the key bytes into registers. 00176 -------------------------------------------------------------------- 00177 */ 00178 static uint32_t hashword( 00179 const uint32_t *k, /* the key, an array of uint32_t values */ 00180 size_t length, /* the length of the key, in uint32_ts */ 00181 uint32_t initval) /* the previous hash, or an arbitrary value */ 00182 { 00183 uint32_t a,b,c; 00184 00185 /* Set up the internal state */ 00186 a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval; 00187 00188 /*------------------------------------------------- handle most of the key */ 00189 while (length > 3) 00190 { 00191 a += k[0]; 00192 b += k[1]; 00193 c += k[2]; 00194 mix(a,b,c); 00195 length -= 3; 00196 k += 3; 00197 } 00198 00199 /*------------------------------------------- handle the last 3 uint32_t's */ 00200 switch(length) /* all the case statements fall through */ 00201 { 00202 case 3 : c+=k[2]; 00203 case 2 : b+=k[1]; 00204 case 1 : a+=k[0]; 00205 final(a,b,c); 00206 case 0: /* case 0: nothing left to add */ 00207 break; 00208 } 00209 /*------------------------------------------------------ report the result */ 00210 return c; 00211 } 00212 00213 00214 /* 00215 -------------------------------------------------------------------- 00216 hashword2() -- same as hashword(), but take two seeds and return two 00217 32-bit values. pc and pb must both be nonnull, and *pc and *pb must 00218 both be initialized with seeds. If you pass in (*pb)==0, the output 00219 (*pc) will be the same as the return value from hashword(). 00220 -------------------------------------------------------------------- 00221 */ 00222 static void hashword2 ( 00223 const uint32_t *k, /* the key, an array of uint32_t values */ 00224 size_t length, /* the length of the key, in uint32_ts */ 00225 uint32_t *pc, /* IN: seed OUT: primary hash value */ 00226 uint32_t *pb) /* IN: more seed OUT: secondary hash value */ 00227 { 00228 uint32_t a,b,c; 00229 00230 /* Set up the internal state */ 00231 a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc; 00232 c += *pb; 00233 00234 /*------------------------------------------------- handle most of the key */ 00235 while (length > 3) 00236 { 00237 a += k[0]; 00238 b += k[1]; 00239 c += k[2]; 00240 mix(a,b,c); 00241 length -= 3; 00242 k += 3; 00243 } 00244 00245 /*------------------------------------------- handle the last 3 uint32_t's */ 00246 switch(length) /* all the case statements fall through */ 00247 { 00248 case 3 : c+=k[2]; 00249 case 2 : b+=k[1]; 00250 case 1 : a+=k[0]; 00251 final(a,b,c); 00252 case 0: /* case 0: nothing left to add */ 00253 break; 00254 } 00255 /*------------------------------------------------------ report the result */ 00256 *pc=c; *pb=b; 00257 } 00258 00259 #endif 00260 00261 #ifdef LITTLEEND 00262 00263 /* 00264 ------------------------------------------------------------------------------- 00265 hashlittle() -- hash a variable-length key into a 32-bit value 00266 k : the key (the unaligned variable-length array of bytes) 00267 length : the length of the key, counting by bytes 00268 initval : can be any 4-byte value 00269 Returns a 32-bit value. Every bit of the key affects every bit of 00270 the return value. Two keys differing by one or two bits will have 00271 totally different hash values. 00272 00273 The best hash table sizes are powers of 2. There is no need to do 00274 mod a prime (mod is sooo slow!). If you need less than 32 bits, 00275 use a bitmask. For example, if you need only 10 bits, do 00276 h = (h & hashmask(10)); 00277 In which case, the hash table should have hashsize(10) elements. 00278 00279 If you are hashing n strings (uint8_t **)k, do it like this: 00280 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h); 00281 00282 By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this 00283 code any way you wish, private, educational, or commercial. It's free. 00284 00285 Use for hash table lookup, or anything where one collision in 2^^32 is 00286 acceptable. Do NOT use for cryptographic purposes. 00287 ------------------------------------------------------------------------------- 00288 */ 00289 00290 static uint32_t hashlittle( const void *key, size_t length, uint32_t initval) 00291 { 00292 uint32_t a,b,c; /* internal state */ 00293 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ 00294 00295 /* Set up the internal state */ 00296 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; 00297 00298 u.ptr = key; 00299 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { 00300 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ 00301 #ifdef VALGRIND 00302 const uint8_t *k8; 00303 #endif 00304 00305 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ 00306 while (length > 12) 00307 { 00308 a += k[0]; 00309 b += k[1]; 00310 c += k[2]; 00311 mix(a,b,c); 00312 length -= 12; 00313 k += 3; 00314 } 00315 00316 /*----------------------------- handle the last (probably partial) block */ 00317 /* 00318 * "k[2]&0xffffff" actually reads beyond the end of the string, but 00319 * then masks off the part it's not allowed to read. Because the 00320 * string is aligned, the masked-off tail is in the same word as the 00321 * rest of the string. Every machine with memory protection I've seen 00322 * does it on word boundaries, so is OK with this. But VALGRIND will 00323 * still catch it and complain. The masking trick does make the hash 00324 * noticably faster for short strings (like English words). 00325 */ 00326 #ifndef VALGRIND 00327 00328 switch(length) 00329 { 00330 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 00331 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; 00332 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; 00333 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; 00334 case 8 : b+=k[1]; a+=k[0]; break; 00335 case 7 : b+=k[1]&0xffffff; a+=k[0]; break; 00336 case 6 : b+=k[1]&0xffff; a+=k[0]; break; 00337 case 5 : b+=k[1]&0xff; a+=k[0]; break; 00338 case 4 : a+=k[0]; break; 00339 case 3 : a+=k[0]&0xffffff; break; 00340 case 2 : a+=k[0]&0xffff; break; 00341 case 1 : a+=k[0]&0xff; break; 00342 case 0 : return c; /* zero length strings require no mixing */ 00343 } 00344 00345 #else /* make valgrind happy */ 00346 00347 k8 = (const uint8_t *)k; 00348 switch(length) 00349 { 00350 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 00351 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ 00352 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ 00353 case 9 : c+=k8[8]; /* fall through */ 00354 case 8 : b+=k[1]; a+=k[0]; break; 00355 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ 00356 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ 00357 case 5 : b+=k8[4]; /* fall through */ 00358 case 4 : a+=k[0]; break; 00359 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ 00360 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ 00361 case 1 : a+=k8[0]; break; 00362 case 0 : return c; 00363 } 00364 00365 #endif /* !valgrind */ 00366 00367 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { 00368 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ 00369 const uint8_t *k8; 00370 00371 /*--------------- all but last block: aligned reads and different mixing */ 00372 while (length > 12) 00373 { 00374 a += k[0] + (((uint32_t)k[1])<<16); 00375 b += k[2] + (((uint32_t)k[3])<<16); 00376 c += k[4] + (((uint32_t)k[5])<<16); 00377 mix(a,b,c); 00378 length -= 12; 00379 k += 6; 00380 } 00381 00382 /*----------------------------- handle the last (probably partial) block */ 00383 k8 = (const uint8_t *)k; 00384 switch(length) 00385 { 00386 case 12: c+=k[4]+(((uint32_t)k[5])<<16); 00387 b+=k[2]+(((uint32_t)k[3])<<16); 00388 a+=k[0]+(((uint32_t)k[1])<<16); 00389 break; 00390 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ 00391 case 10: c+=k[4]; 00392 b+=k[2]+(((uint32_t)k[3])<<16); 00393 a+=k[0]+(((uint32_t)k[1])<<16); 00394 break; 00395 case 9 : c+=k8[8]; /* fall through */ 00396 case 8 : b+=k[2]+(((uint32_t)k[3])<<16); 00397 a+=k[0]+(((uint32_t)k[1])<<16); 00398 break; 00399 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ 00400 case 6 : b+=k[2]; 00401 a+=k[0]+(((uint32_t)k[1])<<16); 00402 break; 00403 case 5 : b+=k8[4]; /* fall through */ 00404 case 4 : a+=k[0]+(((uint32_t)k[1])<<16); 00405 break; 00406 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ 00407 case 2 : a+=k[0]; 00408 break; 00409 case 1 : a+=k8[0]; 00410 break; 00411 case 0 : return c; /* zero length requires no mixing */ 00412 } 00413 00414 } else { /* need to read the key one byte at a time */ 00415 const uint8_t *k = (const uint8_t *)key; 00416 00417 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ 00418 while (length > 12) 00419 { 00420 a += k[0]; 00421 a += ((uint32_t)k[1])<<8; 00422 a += ((uint32_t)k[2])<<16; 00423 a += ((uint32_t)k[3])<<24; 00424 b += k[4]; 00425 b += ((uint32_t)k[5])<<8; 00426 b += ((uint32_t)k[6])<<16; 00427 b += ((uint32_t)k[7])<<24; 00428 c += k[8]; 00429 c += ((uint32_t)k[9])<<8; 00430 c += ((uint32_t)k[10])<<16; 00431 c += ((uint32_t)k[11])<<24; 00432 mix(a,b,c); 00433 length -= 12; 00434 k += 12; 00435 } 00436 00437 /*-------------------------------- last block: affect all 32 bits of (c) */ 00438 switch(length) /* all the case statements fall through */ 00439 { 00440 case 12: c+=((uint32_t)k[11])<<24; 00441 case 11: c+=((uint32_t)k[10])<<16; 00442 case 10: c+=((uint32_t)k[9])<<8; 00443 case 9 : c+=k[8]; 00444 case 8 : b+=((uint32_t)k[7])<<24; 00445 case 7 : b+=((uint32_t)k[6])<<16; 00446 case 6 : b+=((uint32_t)k[5])<<8; 00447 case 5 : b+=k[4]; 00448 case 4 : a+=((uint32_t)k[3])<<24; 00449 case 3 : a+=((uint32_t)k[2])<<16; 00450 case 2 : a+=((uint32_t)k[1])<<8; 00451 case 1 : a+=k[0]; 00452 break; 00453 case 0 : return c; 00454 } 00455 } 00456 00457 final(a,b,c); 00458 return c; 00459 } 00460 00461 00462 /* 00463 * hashlittle2: return 2 32-bit hash values 00464 * 00465 * This is identical to hashlittle(), except it returns two 32-bit hash 00466 * values instead of just one. This is good enough for hash table 00467 * lookup with 2^^64 buckets, or if you want a second hash if you're not 00468 * happy with the first, or if you want a probably-unique 64-bit ID for 00469 * the key. *pc is better mixed than *pb, so use *pc first. If you want 00470 * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)". 00471 */ 00472 static void hashlittle2( 00473 const void *key, /* the key to hash */ 00474 size_t length, /* length of the key */ 00475 uint32_t *pc, /* IN: primary initval, OUT: primary hash */ 00476 uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */ 00477 { 00478 uint32_t a,b,c; /* internal state */ 00479 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */ 00480 00481 /* Set up the internal state */ 00482 a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc; 00483 c += *pb; 00484 00485 u.ptr = key; 00486 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) { 00487 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ 00488 #ifdef VALGRIND 00489 const uint8_t *k8; 00490 #endif 00491 00492 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ 00493 while (length > 12) 00494 { 00495 a += k[0]; 00496 b += k[1]; 00497 c += k[2]; 00498 mix(a,b,c); 00499 length -= 12; 00500 k += 3; 00501 } 00502 00503 /*----------------------------- handle the last (probably partial) block */ 00504 /* 00505 * "k[2]&0xffffff" actually reads beyond the end of the string, but 00506 * then masks off the part it's not allowed to read. Because the 00507 * string is aligned, the masked-off tail is in the same word as the 00508 * rest of the string. Every machine with memory protection I've seen 00509 * does it on word boundaries, so is OK with this. But VALGRIND will 00510 * still catch it and complain. The masking trick does make the hash 00511 * noticably faster for short strings (like English words). 00512 */ 00513 #ifndef VALGRIND 00514 00515 switch(length) 00516 { 00517 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 00518 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break; 00519 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break; 00520 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break; 00521 case 8 : b+=k[1]; a+=k[0]; break; 00522 case 7 : b+=k[1]&0xffffff; a+=k[0]; break; 00523 case 6 : b+=k[1]&0xffff; a+=k[0]; break; 00524 case 5 : b+=k[1]&0xff; a+=k[0]; break; 00525 case 4 : a+=k[0]; break; 00526 case 3 : a+=k[0]&0xffffff; break; 00527 case 2 : a+=k[0]&0xffff; break; 00528 case 1 : a+=k[0]&0xff; break; 00529 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ 00530 } 00531 00532 #else /* make valgrind happy */ 00533 00534 k8 = (const uint8_t *)k; 00535 switch(length) 00536 { 00537 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 00538 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ 00539 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */ 00540 case 9 : c+=k8[8]; /* fall through */ 00541 case 8 : b+=k[1]; a+=k[0]; break; 00542 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ 00543 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */ 00544 case 5 : b+=k8[4]; /* fall through */ 00545 case 4 : a+=k[0]; break; 00546 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ 00547 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */ 00548 case 1 : a+=k8[0]; break; 00549 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ 00550 } 00551 00552 #endif /* !valgrind */ 00553 00554 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) { 00555 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */ 00556 const uint8_t *k8; 00557 00558 /*--------------- all but last block: aligned reads and different mixing */ 00559 while (length > 12) 00560 { 00561 a += k[0] + (((uint32_t)k[1])<<16); 00562 b += k[2] + (((uint32_t)k[3])<<16); 00563 c += k[4] + (((uint32_t)k[5])<<16); 00564 mix(a,b,c); 00565 length -= 12; 00566 k += 6; 00567 } 00568 00569 /*----------------------------- handle the last (probably partial) block */ 00570 k8 = (const uint8_t *)k; 00571 switch(length) 00572 { 00573 case 12: c+=k[4]+(((uint32_t)k[5])<<16); 00574 b+=k[2]+(((uint32_t)k[3])<<16); 00575 a+=k[0]+(((uint32_t)k[1])<<16); 00576 break; 00577 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */ 00578 case 10: c+=k[4]; 00579 b+=k[2]+(((uint32_t)k[3])<<16); 00580 a+=k[0]+(((uint32_t)k[1])<<16); 00581 break; 00582 case 9 : c+=k8[8]; /* fall through */ 00583 case 8 : b+=k[2]+(((uint32_t)k[3])<<16); 00584 a+=k[0]+(((uint32_t)k[1])<<16); 00585 break; 00586 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */ 00587 case 6 : b+=k[2]; 00588 a+=k[0]+(((uint32_t)k[1])<<16); 00589 break; 00590 case 5 : b+=k8[4]; /* fall through */ 00591 case 4 : a+=k[0]+(((uint32_t)k[1])<<16); 00592 break; 00593 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */ 00594 case 2 : a+=k[0]; 00595 break; 00596 case 1 : a+=k8[0]; 00597 break; 00598 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ 00599 } 00600 00601 } else { /* need to read the key one byte at a time */ 00602 const uint8_t *k = (const uint8_t *)key; 00603 00604 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ 00605 while (length > 12) 00606 { 00607 a += k[0]; 00608 a += ((uint32_t)k[1])<<8; 00609 a += ((uint32_t)k[2])<<16; 00610 a += ((uint32_t)k[3])<<24; 00611 b += k[4]; 00612 b += ((uint32_t)k[5])<<8; 00613 b += ((uint32_t)k[6])<<16; 00614 b += ((uint32_t)k[7])<<24; 00615 c += k[8]; 00616 c += ((uint32_t)k[9])<<8; 00617 c += ((uint32_t)k[10])<<16; 00618 c += ((uint32_t)k[11])<<24; 00619 mix(a,b,c); 00620 length -= 12; 00621 k += 12; 00622 } 00623 00624 /*-------------------------------- last block: affect all 32 bits of (c) */ 00625 switch(length) /* all the case statements fall through */ 00626 { 00627 case 12: c+=((uint32_t)k[11])<<24; 00628 case 11: c+=((uint32_t)k[10])<<16; 00629 case 10: c+=((uint32_t)k[9])<<8; 00630 case 9 : c+=k[8]; 00631 case 8 : b+=((uint32_t)k[7])<<24; 00632 case 7 : b+=((uint32_t)k[6])<<16; 00633 case 6 : b+=((uint32_t)k[5])<<8; 00634 case 5 : b+=k[4]; 00635 case 4 : a+=((uint32_t)k[3])<<24; 00636 case 3 : a+=((uint32_t)k[2])<<16; 00637 case 2 : a+=((uint32_t)k[1])<<8; 00638 case 1 : a+=k[0]; 00639 break; 00640 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */ 00641 } 00642 } 00643 00644 final(a,b,c); 00645 *pc=c; *pb=b; 00646 } 00647 00648 #else 00649 00650 /* 00651 * hashbig(): 00652 * This is the same as hashword() on big-endian machines. It is different 00653 * from hashlittle() on all machines. hashbig() takes advantage of 00654 * big-endian byte ordering. 00655 */ 00656 static uint32_t hashbig( const void *key, size_t length, uint32_t initval) 00657 { 00658 uint32_t a,b,c; 00659 union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */ 00660 00661 /* Set up the internal state */ 00662 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval; 00663 00664 u.ptr = key; 00665 if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) { 00666 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */ 00667 #ifdef VALGRIND 00668 const uint8_t *k8; 00669 #endif 00670 00671 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */ 00672 while (length > 12) 00673 { 00674 a += k[0]; 00675 b += k[1]; 00676 c += k[2]; 00677 mix(a,b,c); 00678 length -= 12; 00679 k += 3; 00680 } 00681 00682 /*----------------------------- handle the last (probably partial) block */ 00683 /* 00684 * "k[2]<<8" actually reads beyond the end of the string, but 00685 * then shifts out the part it's not allowed to read. Because the 00686 * string is aligned, the illegal read is in the same word as the 00687 * rest of the string. Every machine with memory protection I've seen 00688 * does it on word boundaries, so is OK with this. But VALGRIND will 00689 * still catch it and complain. The masking trick does make the hash 00690 * noticably faster for short strings (like English words). 00691 */ 00692 #ifndef VALGRIND 00693 00694 switch(length) 00695 { 00696 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 00697 case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break; 00698 case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break; 00699 case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break; 00700 case 8 : b+=k[1]; a+=k[0]; break; 00701 case 7 : b+=k[1]&0xffffff00; a+=k[0]; break; 00702 case 6 : b+=k[1]&0xffff0000; a+=k[0]; break; 00703 case 5 : b+=k[1]&0xff000000; a+=k[0]; break; 00704 case 4 : a+=k[0]; break; 00705 case 3 : a+=k[0]&0xffffff00; break; 00706 case 2 : a+=k[0]&0xffff0000; break; 00707 case 1 : a+=k[0]&0xff000000; break; 00708 case 0 : return c; /* zero length strings require no mixing */ 00709 } 00710 00711 #else /* make valgrind happy */ 00712 00713 k8 = (const uint8_t *)k; 00714 switch(length) /* all the case statements fall through */ 00715 { 00716 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break; 00717 case 11: c+=((uint32_t)k8[10])<<8; /* fall through */ 00718 case 10: c+=((uint32_t)k8[9])<<16; /* fall through */ 00719 case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */ 00720 case 8 : b+=k[1]; a+=k[0]; break; 00721 case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */ 00722 case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */ 00723 case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */ 00724 case 4 : a+=k[0]; break; 00725 case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */ 00726 case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */ 00727 case 1 : a+=((uint32_t)k8[0])<<24; break; 00728 case 0 : return c; 00729 } 00730 00731 #endif /* !VALGRIND */ 00732 00733 } else { /* need to read the key one byte at a time */ 00734 const uint8_t *k = (const uint8_t *)key; 00735 00736 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */ 00737 while (length > 12) 00738 { 00739 a += ((uint32_t)k[0])<<24; 00740 a += ((uint32_t)k[1])<<16; 00741 a += ((uint32_t)k[2])<<8; 00742 a += ((uint32_t)k[3]); 00743 b += ((uint32_t)k[4])<<24; 00744 b += ((uint32_t)k[5])<<16; 00745 b += ((uint32_t)k[6])<<8; 00746 b += ((uint32_t)k[7]); 00747 c += ((uint32_t)k[8])<<24; 00748 c += ((uint32_t)k[9])<<16; 00749 c += ((uint32_t)k[10])<<8; 00750 c += ((uint32_t)k[11]); 00751 mix(a,b,c); 00752 length -= 12; 00753 k += 12; 00754 } 00755 00756 /*-------------------------------- last block: affect all 32 bits of (c) */ 00757 switch(length) /* all the case statements fall through */ 00758 { 00759 case 12: c+=k[11]; 00760 case 11: c+=((uint32_t)k[10])<<8; 00761 case 10: c+=((uint32_t)k[9])<<16; 00762 case 9 : c+=((uint32_t)k[8])<<24; 00763 case 8 : b+=k[7]; 00764 case 7 : b+=((uint32_t)k[6])<<8; 00765 case 6 : b+=((uint32_t)k[5])<<16; 00766 case 5 : b+=((uint32_t)k[4])<<24; 00767 case 4 : a+=k[3]; 00768 case 3 : a+=((uint32_t)k[2])<<8; 00769 case 2 : a+=((uint32_t)k[1])<<16; 00770 case 1 : a+=((uint32_t)k[0])<<24; 00771 break; 00772 case 0 : return c; 00773 } 00774 } 00775 00776 final(a,b,c); 00777 return c; 00778 } 00779 00780 #endif 00781 00782 uint32_t Lookup3_hash_buff( char * str, uint32_t len ) 00783 { 00784 static uint32_t ret = 0 ; 00785 00786 #ifdef LITTLEEND 00787 ret = hashlittle( (const uint32_t *)str, len, 13 ) ; 00788 #else 00789 ret = hashbig( (const uint32_t *)str, len, 13 ) ; 00790 #endif 00791 00792 return ret ; 00793 } /* HashTable_hash_buff */ 00794 00795 void Lookup3_hash_buff_dual( char * str, uint32_t len, uint32_t * pval1, uint32_t *pval2 ) 00796 { 00797 #ifdef LITTLEEND 00798 hashlittle2( (caddr_t)str, (size_t)len, pval1, pval2 ) ; 00799 #else 00800 hashword2( (caddr_t)str, (size_t)len, pval1, pval2 ) ; 00801 #endif 00802 } /* HashTable_hash_buff */