bloom.h (6598B)
1 /* 2 * Copyright (c) 2012-2022, Jyri J. Virkki 3 * All rights reserved. 4 * 5 * This file is under BSD license. See LICENSE file. 6 */ 7 8 #ifndef _BLOOM_H 9 #define _BLOOM_H 10 11 #ifdef __cplusplus 12 extern "C" { 13 #endif 14 15 #define BLOOM_VERSION_MAJOR 2 16 #define BLOOM_VERSION_MINOR 0 17 18 #define NULL_BLOOM_FILTER { 0, 0, 0, 0, 0.0, 0, 0, 0, 0.0, NULL } 19 20 #define ENTRIES_T unsigned int 21 #define BYTES_T unsigned long int 22 #define BITS_T unsigned long int 23 24 25 /** *************************************************************************** 26 * Structure to keep track of one bloom filter. Caller needs to 27 * allocate this and pass it to the functions below. First call for 28 * every struct must be to bloom_init(). 29 * 30 */ 31 struct bloom 32 { 33 // These fields are part of the public interface of this structure. 34 // Client code may read these values if desired. Client code MUST NOT 35 // modify any of these. 36 unsigned int entries; 37 unsigned long int bits; 38 unsigned long int bytes; 39 unsigned char hashes; 40 double error; 41 42 // Fields below are private to the implementation. These may go away or 43 // change incompatibly at any moment. Client code MUST NOT access or rely 44 // on these. 45 unsigned char ready; 46 unsigned char major; 47 unsigned char minor; 48 double bpe; 49 unsigned char * bf; 50 }; 51 52 53 /** *************************************************************************** 54 * Initialize the bloom filter for use. 55 * 56 * The filter is initialized with a bit field and number of hash functions 57 * according to the computations from the wikipedia entry: 58 * http://en.wikipedia.org/wiki/Bloom_filter 59 * 60 * Optimal number of bits is: 61 * bits = (entries * ln(error)) / ln(2)^2 62 * 63 * Optimal number of hash functions is: 64 * hashes = bpe * ln(2) 65 * 66 * Parameters: 67 * ----------- 68 * bloom - Pointer to an allocated struct bloom (see above). 69 * entries - The expected number of entries which will be inserted. 70 * Must be at least 1000 (in practice, likely much larger). 71 * error - Probability of collision (as long as entries are not 72 * exceeded). 73 * 74 * Return: 75 * ------- 76 * 0 - on success 77 * 1 - on failure 78 * 79 */ 80 int bloom_init2(struct bloom * bloom, unsigned int entries, double error); 81 82 83 /** 84 * DEPRECATED. 85 * Kept for compatibility with libbloom v.1. To be removed in v3.0. 86 * 87 */ 88 int bloom_init(struct bloom * bloom, int entries, double error); 89 90 91 /** *************************************************************************** 92 * Check if the given element is in the bloom filter. Remember this may 93 * return false positive if a collision occurred. 94 * 95 * Parameters: 96 * ----------- 97 * bloom - Pointer to an allocated struct bloom (see above). 98 * buffer - Pointer to buffer containing element to check. 99 * len - Size of 'buffer'. 100 * 101 * Return: 102 * ------- 103 * 0 - element is not present 104 * 1 - element is present (or false positive due to collision) 105 * -1 - bloom not initialized 106 * 107 */ 108 int bloom_check(struct bloom * bloom, const void * buffer, int len); 109 110 111 /** *************************************************************************** 112 * Add the given element to the bloom filter. 113 * The return code indicates if the element (or a collision) was already in, 114 * so for the common check+add use case, no need to call check separately. 115 * 116 * Parameters: 117 * ----------- 118 * bloom - Pointer to an allocated struct bloom (see above). 119 * buffer - Pointer to buffer containing element to add. 120 * len - Size of 'buffer'. 121 * 122 * Return: 123 * ------- 124 * 0 - element was not present and was added 125 * 1 - element (or a collision) had already been added previously 126 * -1 - bloom not initialized 127 * 128 */ 129 int bloom_add(struct bloom * bloom, const void * buffer, int len); 130 131 132 /** *************************************************************************** 133 * Print (to stdout) info about this bloom filter. Debugging aid. 134 * 135 */ 136 void bloom_print(struct bloom * bloom); 137 138 139 /** *************************************************************************** 140 * Deallocate internal storage. 141 * 142 * Upon return, the bloom struct is no longer usable. You may call bloom_init 143 * again on the same struct to reinitialize it again. 144 * 145 * Parameters: 146 * ----------- 147 * bloom - Pointer to an allocated struct bloom (see above). 148 * 149 * Return: none 150 * 151 */ 152 void bloom_free(struct bloom * bloom); 153 154 155 /** *************************************************************************** 156 * Erase internal storage. 157 * 158 * Erases all elements. Upon return, the bloom struct returns to its initial 159 * (initialized) state. 160 * 161 * Parameters: 162 * ----------- 163 * bloom - Pointer to an allocated struct bloom (see above). 164 * 165 * Return: 166 * 0 - on success 167 * 1 - on failure 168 * 169 */ 170 int bloom_reset(struct bloom * bloom); 171 172 173 /** *************************************************************************** 174 * Save a bloom filter to a file. 175 * 176 * Parameters: 177 * ----------- 178 * bloom - Pointer to an allocated struct bloom (see above). 179 * filename - Create (or overwrite) bloom data to this file. 180 * 181 * Return: 182 * 0 - on success 183 * 1 - on failure 184 * 185 */ 186 int bloom_save(struct bloom * bloom, char * filename); 187 188 189 /** *************************************************************************** 190 * Load a bloom filter from a file. 191 * 192 * This functions loads a file previously saved with bloom_save(). 193 * 194 * Parameters: 195 * ----------- 196 * bloom - Pointer to an allocated struct bloom (see above). 197 * filename - Load bloom filter data from this file. 198 * 199 * Return: 200 * 0 - on success 201 * > 0 - on failure 202 * 203 */ 204 int bloom_load(struct bloom * bloom, char * filename); 205 206 207 /** *************************************************************************** 208 * Merge two compatible bloom filters. 209 * 210 * On success, bloom_dest will contain all elements of bloom_src in addition 211 * to its own. The bloom_src bloom filter is never modified. 212 * 213 * Both bloom_dest and bloom_src must be initialized and both must have 214 * identical parameters. 215 * 216 * Parameters: 217 * ----------- 218 * bloom_dest - will contain the merged elements from bloom_src 219 * bloom_src - its elements will be merged into bloom_dest 220 * 221 * Return: 222 * ------- 223 * 0 - on success 224 * 1 - incompatible bloom filters 225 * -1 - bloom not initialized 226 * 227 */ 228 int bloom_merge(struct bloom * bloom_dest, struct bloom * bloom_src); 229 230 231 /** *************************************************************************** 232 * Returns version string compiled into library. 233 * 234 * Return: version string 235 * 236 */ 237 const char * bloom_version(); 238 239 #ifdef __cplusplus 240 } 241 #endif 242 243 #endif