💾 Archived View for gmi.noulin.net › gitRepositories › sodiumTest › file › bloom.c.gmi captured on 2024-09-29 at 01:16:48. Gemini links have been rewritten to link to archived content

View Raw

More Information

⬅️ Previous capture (2023-01-29)

-=-=-=-=-=-=-

sodiumTest

Log

Files

Refs

README

bloom.c (7564B)

     1 /*
     2  *  Copyright (c) 2012-2022, Jyri J. Virkki
     3  *  All rights reserved.
     4  *
     5  *  This file is under BSD license. See LICENSE file.
     6  */
     7 
     8 /*
     9  * Refer to bloom.h for documentation on the public interfaces.
    10  */
    11 
    12 #include <assert.h>
    13 #include <fcntl.h>
    14 #include <math.h>
    15 #include <stdint.h>
    16 #include <stdio.h>
    17 #include <stdlib.h>
    18 #include <string.h>
    19 #include <sys/stat.h>
    20 #include <sys/types.h>
    21 #include <unistd.h>
    22 
    23 #include "bloom.h"
    24 #include "murmurhash2.h"
    25 
    26 #define MAKESTRING(n) STRING(n)
    27 #define STRING(n) #n
    28 #define BLOOM_MAGIC "libbloom2"
    29 
    30 inline static int test_bit_set_bit(unsigned char * buf,
    31                                    unsigned long int bit, int set_bit)
    32 {
    33   unsigned long int byte = bit >> 3;
    34   unsigned char c = buf[byte];        // expensive memory access
    35   unsigned char mask = 1 << (bit % 8ul);
    36 
    37   if (c & mask) {
    38     return 1;
    39   } else {
    40     if (set_bit) {
    41       buf[byte] = c | mask;
    42     }
    43     return 0;
    44   }
    45 }
    46 
    47 
    48 static int bloom_check_add(struct bloom * bloom,
    49                            const void * buffer, int len, int add)
    50 {
    51   if (bloom->ready == 0) {
    52     printf("bloom at %p not initialized!\n", (void *)bloom);
    53     return -1;
    54   }
    55 
    56   unsigned char hits = 0;
    57   unsigned int a = murmurhash2(buffer, len, 0x9747b28c);
    58   unsigned int b = murmurhash2(buffer, len, a);
    59   unsigned long int x;
    60   unsigned long int i;
    61 
    62   for (i = 0; i < bloom->hashes; i++) {
    63     x = (a + b*i) % bloom->bits;
    64     if (test_bit_set_bit(bloom->bf, x, add)) {
    65       hits++;
    66     } else if (!add) {
    67       // Don't care about the presence of all the bits. Just our own.
    68       return 0;
    69     }
    70   }
    71 
    72   if (hits == bloom->hashes) {
    73     return 1;                // 1 == element already in (or collision)
    74   }
    75 
    76   return 0;
    77 }
    78 
    79 
    80 // DEPRECATED - Please migrate to bloom_init2.
    81 int bloom_init(struct bloom * bloom, int entries, double error)
    82 {
    83   return bloom_init2(bloom, (unsigned int)entries, error);
    84 }
    85 
    86 
    87 int bloom_init2(struct bloom * bloom, unsigned int entries, double error)
    88 {
    89   if (sizeof(unsigned long int) < 8) {
    90     printf("error: libbloom will not function correctly because\n");
    91     printf("sizeof(unsigned long int) == %ld\n", sizeof(unsigned long int));
    92     exit(1);
    93   }
    94 
    95   memset(bloom, 0, sizeof(struct bloom));
    96 
    97   if (entries < 1000 || error <= 0 || error >= 1) {
    98     return 1;
    99   }
   100 
   101   bloom->entries = entries;
   102   bloom->error = error;
   103 
   104   double num = -log(bloom->error);
   105   double denom = 0.480453013918201; // ln(2)^2
   106   bloom->bpe = (num / denom);
   107 
   108   long double dentries = (long double)entries;
   109   long double allbits = dentries * bloom->bpe;
   110   bloom->bits = (unsigned long int)allbits;
   111 
   112   if (bloom->bits % 8) {
   113     bloom->bytes = (bloom->bits / 8) + 1;
   114   } else {
   115     bloom->bytes = bloom->bits / 8;
   116   }
   117 
   118   bloom->hashes = (unsigned char)ceil(0.693147180559945 * bloom->bpe); // ln(2)
   119 
   120   bloom->bf = (unsigned char *)calloc(bloom->bytes, sizeof(unsigned char));
   121   if (bloom->bf == NULL) {                                   // LCOV_EXCL_START
   122     return 1;
   123   }                                                          // LCOV_EXCL_STOP
   124 
   125   bloom->ready = 1;
   126 
   127   bloom->major = BLOOM_VERSION_MAJOR;
   128   bloom->minor = BLOOM_VERSION_MINOR;
   129 
   130   return 0;
   131 }
   132 
   133 
   134 int bloom_check(struct bloom * bloom, const void * buffer, int len)
   135 {
   136   return bloom_check_add(bloom, buffer, len, 0);
   137 }
   138 
   139 
   140 int bloom_add(struct bloom * bloom, const void * buffer, int len)
   141 {
   142   return bloom_check_add(bloom, buffer, len, 1);
   143 }
   144 
   145 
   146 void bloom_print(struct bloom * bloom)
   147 {
   148   printf("bloom at %p\n", (void *)bloom);
   149   if (!bloom->ready) { printf(" *** NOT READY ***\n"); }
   150   printf(" ->version = %d.%d\n", bloom->major, bloom->minor);
   151   printf(" ->entries = %u\n", bloom->entries);
   152   printf(" ->error = %f\n", bloom->error);
   153   printf(" ->bits = %lu\n", bloom->bits);
   154   printf(" ->bits per elem = %f\n", bloom->bpe);
   155   printf(" ->bytes = %lu", bloom->bytes);
   156   unsigned int KB = bloom->bytes / 1024;
   157   unsigned int MB = KB / 1024;
   158   printf(" (%u KB, %u MB)\n", KB, MB);
   159   printf(" ->hash functions = %d\n", bloom->hashes);
   160 }
   161 
   162 
   163 void bloom_free(struct bloom * bloom)
   164 {
   165   if (bloom->ready) {
   166     free(bloom->bf);
   167   }
   168   bloom->ready = 0;
   169 }
   170 
   171 
   172 int bloom_reset(struct bloom * bloom)
   173 {
   174   if (!bloom->ready) return 1;
   175   memset(bloom->bf, 0, bloom->bytes);
   176   return 0;
   177 }
   178 
   179 
   180 int bloom_save(struct bloom * bloom, char * filename)
   181 {
   182   if (filename == NULL || filename[0] == 0) {
   183     return 1;
   184   }
   185 
   186   int fd = open(filename, O_WRONLY | O_CREAT, 0644);
   187   if (fd < 0) {
   188     return 1;
   189   }
   190 
   191   ssize_t out = write(fd, BLOOM_MAGIC, strlen(BLOOM_MAGIC));
   192   if (out != strlen(BLOOM_MAGIC)) { goto save_error; }       // LCOV_EXCL_LINE
   193 
   194   uint16_t size = sizeof(struct bloom);
   195   out = write(fd, &size, sizeof(uint16_t));
   196   if (out != sizeof(uint16_t)) { goto save_error; }          // LCOV_EXCL_LINE
   197 
   198   out = write(fd, bloom, sizeof(struct bloom));
   199   if (out != sizeof(struct bloom)) { goto save_error; }      // LCOV_EXCL_LINE
   200 
   201   out = write(fd, bloom->bf, bloom->bytes);
   202   if (out != bloom->bytes) { goto save_error; }              // LCOV_EXCL_LINE
   203 
   204   close(fd);
   205   return 0;
   206                                                              // LCOV_EXCL_START
   207  save_error:
   208   close(fd);
   209   return 1;
   210                                                              // LCOV_EXCL_STOP
   211 }
   212 
   213 
   214 int bloom_load(struct bloom * bloom, char * filename)
   215 {
   216   int rv = 0;
   217 
   218   if (filename == NULL || filename[0] == 0) { return 1; }
   219   if (bloom == NULL) { return 2; }
   220 
   221   memset(bloom, 0, sizeof(struct bloom));
   222 
   223   int fd = open(filename, O_RDONLY);
   224   if (fd < 0) { return 3; }
   225 
   226   char line[30];
   227   memset(line, 0, 30);
   228   ssize_t in = read(fd, line, strlen(BLOOM_MAGIC));
   229 
   230   if (in != strlen(BLOOM_MAGIC)) {
   231     rv = 4;
   232     goto load_error;
   233   }
   234 
   235   if (strncmp(line, BLOOM_MAGIC, strlen(BLOOM_MAGIC))) {
   236     rv = 5;
   237     goto load_error;
   238   }
   239 
   240   uint16_t size;
   241   in = read(fd, &size, sizeof(uint16_t));
   242   if (in != sizeof(uint16_t)) {
   243     rv = 6;
   244     goto load_error;
   245   }
   246 
   247   if (size != sizeof(struct bloom)) {
   248     rv = 7;
   249     goto load_error;
   250   }
   251 
   252   in = read(fd, bloom, sizeof(struct bloom));
   253   if (in != sizeof(struct bloom)) {
   254     rv = 8;
   255     goto load_error;
   256   }
   257 
   258   bloom->bf = NULL;
   259   if (bloom->major != BLOOM_VERSION_MAJOR) {
   260     rv = 9;
   261     goto load_error;
   262   }
   263 
   264   bloom->bf = (unsigned char *)malloc(bloom->bytes);
   265   if (bloom->bf == NULL) { rv = 10; goto load_error; }       // LCOV_EXCL_LINE
   266 
   267   in = read(fd, bloom->bf, bloom->bytes);
   268   if (in != bloom->bytes) {
   269     rv = 11;
   270     free(bloom->bf);
   271     bloom->bf = NULL;
   272     goto load_error;
   273   }
   274 
   275   close(fd);
   276   return rv;
   277 
   278  load_error:
   279   close(fd);
   280   bloom->ready = 0;
   281   return rv;
   282 }
   283 
   284 
   285 int bloom_merge(struct bloom * bloom_dest, struct bloom * bloom_src)
   286 {
   287   if (bloom_dest->ready == 0) {
   288     printf("bloom at %p not initialized!\n", (void *)bloom_dest);
   289     return -1;
   290   }
   291 
   292   if (bloom_src->ready == 0) {
   293     printf("bloom at %p not initialized!\n", (void *)bloom_src);
   294     return -1;
   295   }
   296 
   297   if (bloom_dest->entries != bloom_src->entries) {
   298     return 1;
   299   }
   300 
   301   if (bloom_dest->error != bloom_src->error) {
   302     return 1;
   303   }
   304 
   305   if (bloom_dest->major != bloom_src->major) {
   306     return 1;
   307   }
   308 
   309   if (bloom_dest->minor != bloom_src->minor) {
   310     return 1;
   311   }
   312 
   313   // Not really possible if properly used but check anyway to avoid the
   314   // possibility of buffer overruns.
   315   if (bloom_dest->bytes != bloom_src->bytes) {
   316     return 1;                                                // LCOV_EXCL_LINE
   317   }
   318 
   319   unsigned long int p;
   320   for (p = 0; p < bloom_dest->bytes; p++) {
   321     bloom_dest->bf[p] |= bloom_src->bf[p];
   322   }
   323 
   324   return 0;
   325 }
   326 
   327 
   328 const char * bloom_version()
   329 {
   330   return MAKESTRING(BLOOM_VERSION);
   331 }