💾 Archived View for gmi.noulin.net › gitRepositories › hashfunctions › file › siphash.c.gmi captured on 2024-08-19 at 05:10:06. Gemini links have been rewritten to link to archived content

View Raw

More Information

⬅️ Previous capture (2023-01-29)

-=-=-=-=-=-=-

hashfunctions

Log

Files

Refs

LICENSE

siphash.c (13845B)

     1 /*
     2    SipHash reference C implementation
     3 
     4    Copyright (c) 2012-2016 Jean-Philippe Aumasson
     5    <jeanphilippe.aumasson@gmail.com>
     6    Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
     7    Copyright (c) 2017 Salvatore Sanfilippo <antirez@gmail.com>
     8 
     9    To the extent possible under law, the author(s) have dedicated all copyright
    10    and related and neighboring rights to this software to the public domain
    11    worldwide. This software is distributed without any warranty.
    12 
    13    You should have received a copy of the CC0 Public Domain Dedication along
    14    with this software. If not, see
    15    <http://creativecommons.org/publicdomain/zero/1.0/>.
    16 
    17    ----------------------------------------------------------------------------
    18 
    19    This version was modified by Salvatore Sanfilippo <antirez@gmail.com>
    20    in the following ways:
    21 
    22    1. We use SipHash 1-2. This is not believed to be as strong as the
    23       suggested 2-4 variant, but AFAIK there are not trivial attacks
    24       against this reduced-rounds version, and it runs at the same speed
    25       as Murmurhash2 that we used previously, why the 2-4 variant slowed
    26       down Redis by a 4% figure more or less.
    27    2. Hard-code rounds in the hope the compiler can optimize it more
    28       in this raw from. Anyway we always want the standard 2-4 variant.
    29    3. Modify the prototype and implementation so that the function directly
    30       returns an uint64_t value, the hash itself, instead of receiving an
    31       output buffer. This also means that the output size is set to 8 bytes
    32       and the 16 bytes output code handling was removed.
    33    4. Provide a case insensitive variant to be used when hashing strings that
    34       must be considered identical by the hash table regardless of the case.
    35       If we don't have directly a case insensitive hash function, we need to
    36       perform a text transformation in some temporary buffer, which is costly.
    37    5. Remove debugging code.
    38    6. Modified the original test.c file to be a stand-alone function testing
    39       the function in the new form (returing an uint64_t) using just the
    40       relevant test vector.
    41  */
    42 #include <assert.h>
    43 #include <stdint.h>
    44 #include <stdio.h>
    45 #include <string.h>
    46 #include <ctype.h>
    47 
    48 /* Fast tolower() alike function that does not care about locale
    49  * but just returns a-z insetad of A-Z. */
    50 int siptlw(int c) {
    51     if (c >= 'A' && c <= 'Z') {
    52         return c+('a'-'A');
    53     } else {
    54         return c;
    55     }
    56 }
    57 
    58 /* Test of the CPU is Little Endian and supports not aligned accesses.
    59  * Two interesting conditions to speedup the function that happen to be
    60  * in most of x86 servers. */
    61 #if defined(__X86_64__) || defined(__x86_64__) || defined (__i386__)
    62 #define UNALIGNED_LE_CPU
    63 #endif
    64 
    65 #define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
    66 
    67 #define U32TO8_LE(p, v)                                                        \
    68     (p)[0] = (uint8_t)((v));                                                   \
    69     (p)[1] = (uint8_t)((v) >> 8);                                              \
    70     (p)[2] = (uint8_t)((v) >> 16);                                             \
    71     (p)[3] = (uint8_t)((v) >> 24);
    72 
    73 #define U64TO8_LE(p, v)                                                        \
    74     U32TO8_LE((p), (uint32_t)((v)));                                           \
    75     U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
    76 
    77 #ifdef UNALIGNED_LE_CPU
    78 #define U8TO64_LE(p) (*((uint64_t*)(p)))
    79 #else
    80 #define U8TO64_LE(p)                                                           \
    81     (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) |                        \
    82      ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) |                 \
    83      ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) |                 \
    84      ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))
    85 #endif
    86 
    87 #define U8TO64_LE_NOCASE(p)                                                    \
    88     (((uint64_t)(siptlw((p)[0]))) |                                           \
    89      ((uint64_t)(siptlw((p)[1])) << 8) |                                      \
    90      ((uint64_t)(siptlw((p)[2])) << 16) |                                     \
    91      ((uint64_t)(siptlw((p)[3])) << 24) |                                     \
    92      ((uint64_t)(siptlw((p)[4])) << 32) |                                              \
    93      ((uint64_t)(siptlw((p)[5])) << 40) |                                              \
    94      ((uint64_t)(siptlw((p)[6])) << 48) |                                              \
    95      ((uint64_t)(siptlw((p)[7])) << 56))
    96 
    97 #define SIPROUND                                                               \
    98     do {                                                                       \
    99         v0 += v1;                                                              \
   100         v1 = ROTL(v1, 13);                                                     \
   101         v1 ^= v0;                                                              \
   102         v0 = ROTL(v0, 32);                                                     \
   103         v2 += v3;                                                              \
   104         v3 = ROTL(v3, 16);                                                     \
   105         v3 ^= v2;                                                              \
   106         v0 += v3;                                                              \
   107         v3 = ROTL(v3, 21);                                                     \
   108         v3 ^= v0;                                                              \
   109         v2 += v1;                                                              \
   110         v1 = ROTL(v1, 17);                                                     \
   111         v1 ^= v2;                                                              \
   112         v2 = ROTL(v2, 32);                                                     \
   113     } while (0)
   114 
   115 uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k) {
   116 #ifndef UNALIGNED_LE_CPU
   117     uint64_t hash;
   118     uint8_t *out = (uint8_t*) &hash;
   119 #endif
   120     uint64_t v0 = 0x736f6d6570736575ULL;
   121     uint64_t v1 = 0x646f72616e646f6dULL;
   122     uint64_t v2 = 0x6c7967656e657261ULL;
   123     uint64_t v3 = 0x7465646279746573ULL;
   124     uint64_t k0 = U8TO64_LE(k);
   125     uint64_t k1 = U8TO64_LE(k + 8);
   126     uint64_t m;
   127     const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
   128     const int left = inlen & 7;
   129     uint64_t b = ((uint64_t)inlen) << 56;
   130     v3 ^= k1;
   131     v2 ^= k0;
   132     v1 ^= k1;
   133     v0 ^= k0;
   134 
   135     for (; in != end; in += 8) {
   136         m = U8TO64_LE(in);
   137         v3 ^= m;
   138 
   139         SIPROUND;
   140 
   141         v0 ^= m;
   142     }
   143 
   144     switch (left) {
   145     case 7: b |= ((uint64_t)in[6]) << 48;
   146     case 6: b |= ((uint64_t)in[5]) << 40;
   147     case 5: b |= ((uint64_t)in[4]) << 32;
   148     case 4: b |= ((uint64_t)in[3]) << 24;
   149     case 3: b |= ((uint64_t)in[2]) << 16;
   150     case 2: b |= ((uint64_t)in[1]) << 8;
   151     case 1: b |= ((uint64_t)in[0]); break;
   152     case 0: break;
   153     }
   154 
   155     v3 ^= b;
   156 
   157     SIPROUND;
   158 
   159     v0 ^= b;
   160     v2 ^= 0xff;
   161 
   162     SIPROUND;
   163     SIPROUND;
   164 
   165     b = v0 ^ v1 ^ v2 ^ v3;
   166 #ifndef UNALIGNED_LE_CPU
   167     U64TO8_LE(out, b);
   168     return hash;
   169 #else
   170     return b;
   171 #endif
   172 }
   173 
   174 uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k)
   175 {
   176 #ifndef UNALIGNED_LE_CPU
   177     uint64_t hash;
   178     uint8_t *out = (uint8_t*) &hash;
   179 #endif
   180     uint64_t v0 = 0x736f6d6570736575ULL;
   181     uint64_t v1 = 0x646f72616e646f6dULL;
   182     uint64_t v2 = 0x6c7967656e657261ULL;
   183     uint64_t v3 = 0x7465646279746573ULL;
   184     uint64_t k0 = U8TO64_LE(k);
   185     uint64_t k1 = U8TO64_LE(k + 8);
   186     uint64_t m;
   187     const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
   188     const int left = inlen & 7;
   189     uint64_t b = ((uint64_t)inlen) << 56;
   190     v3 ^= k1;
   191     v2 ^= k0;
   192     v1 ^= k1;
   193     v0 ^= k0;
   194 
   195     for (; in != end; in += 8) {
   196         m = U8TO64_LE_NOCASE(in);
   197         v3 ^= m;
   198 
   199         SIPROUND;
   200 
   201         v0 ^= m;
   202     }
   203 
   204     switch (left) {
   205     case 7: b |= ((uint64_t)siptlw(in[6])) << 48;
   206     case 6: b |= ((uint64_t)siptlw(in[5])) << 40;
   207     case 5: b |= ((uint64_t)siptlw(in[4])) << 32;
   208     case 4: b |= ((uint64_t)siptlw(in[3])) << 24;
   209     case 3: b |= ((uint64_t)siptlw(in[2])) << 16;
   210     case 2: b |= ((uint64_t)siptlw(in[1])) << 8;
   211     case 1: b |= ((uint64_t)siptlw(in[0])); break;
   212     case 0: break;
   213     }
   214 
   215     v3 ^= b;
   216 
   217     SIPROUND;
   218 
   219     v0 ^= b;
   220     v2 ^= 0xff;
   221 
   222     SIPROUND;
   223     SIPROUND;
   224 
   225     b = v0 ^ v1 ^ v2 ^ v3;
   226 #ifndef UNALIGNED_LE_CPU
   227     U64TO8_LE(out, b);
   228     return hash;
   229 #else
   230     return b;
   231 #endif
   232 }
   233 
   234 
   235 /* --------------------------------- TEST ------------------------------------ */
   236 
   237 #ifdef SIPHASH_TEST
   238 
   239 const uint8_t vectors_sip64[64][8] = {
   240     { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72, },
   241     { 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74, },
   242     { 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d, },
   243     { 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85, },
   244     { 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf, },
   245     { 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18, },
   246     { 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb, },
   247     { 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab, },
   248     { 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93, },
   249     { 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e, },
   250     { 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a, },
   251     { 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4, },
   252     { 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75, },
   253     { 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14, },
   254     { 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7, },
   255     { 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1, },
   256     { 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f, },
   257     { 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69, },
   258     { 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b, },
   259     { 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb, },
   260     { 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe, },
   261     { 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0, },
   262     { 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93, },
   263     { 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8, },
   264     { 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8, },
   265     { 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc, },
   266     { 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17, },
   267     { 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f, },
   268     { 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde, },
   269     { 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6, },
   270     { 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad, },
   271     { 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32, },
   272     { 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71, },
   273     { 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7, },
   274     { 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12, },
   275     { 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15, },
   276     { 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31, },
   277     { 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02, },
   278     { 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca, },
   279     { 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a, },
   280     { 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e, },
   281     { 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad, },
   282     { 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18, },
   283     { 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4, },
   284     { 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9, },
   285     { 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9, },
   286     { 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb, },
   287     { 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0, },
   288     { 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6, },
   289     { 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7, },
   290     { 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee, },
   291     { 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1, },
   292     { 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a, },
   293     { 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81, },
   294     { 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f, },
   295     { 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24, },
   296     { 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7, },
   297     { 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea, },
   298     { 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60, },
   299     { 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66, },
   300     { 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c, },
   301     { 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f, },
   302     { 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5, },
   303     { 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95, },
   304 };
   305 
   306 
   307 /* Test siphash using a test vector. Returns 0 if the function passed
   308  * all the tests, otherwise 1 is returned.
   309  *
   310  * IMPORTANT: The test vector is for SipHash 2-4. Before running
   311  * the test revert back the siphash() function to 2-4 rounds since
   312  * now it uses 1-2 rounds. */
   313 int siphash_test(void) {
   314     uint8_t in[64], k[16];
   315     int i;
   316     int fails = 0;
   317 
   318     for (i = 0; i < 16; ++i)
   319         k[i] = i;
   320 
   321     for (i = 0; i < 64; ++i) {
   322         in[i] = i;
   323         uint64_t hash = siphash(in, i, k);
   324         const uint8_t *v = NULL;
   325         v = (uint8_t *)vectors_sip64;
   326         if (memcmp(&hash, v + (i * 8), 8)) {
   327             /* printf("fail for %d bytes\n", i); */
   328             fails++;
   329         }
   330     }
   331 
   332     /* Run a few basic tests with the case insensitive version. */
   333     uint64_t h1, h2;
   334     h1 = siphash((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678");
   335     h2 = siphash_nocase((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678");
   336     if (h1 != h2) fails++;
   337 
   338     h1 = siphash((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678");
   339     h2 = siphash_nocase((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678");
   340     if (h1 != h2) fails++;
   341 
   342     h1 = siphash((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678");
   343     h2 = siphash_nocase((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678");
   344     if (h1 == h2) fails++;
   345 
   346     if (!fails) return 0;
   347     return 1;
   348 }
   349 
   350 int main(void) {
   351     if (siphash_test() == 0) {
   352         printf("SipHash test: OK\n");
   353         return 0;
   354     } else {
   355         printf("SipHash test: FAILED\n");
   356         return 1;
   357     }
   358 }
   359 
   360 #endif