IBR-DTNSuite
0.8
|
00001 /* 00002 * BloomFilter.cpp 00003 * 00004 * Created on: 03.03.2010 00005 * Author: morgenro 00006 */ 00007 00008 #include "ibrcommon/data/BloomFilter.h" 00009 #include "ibrcommon/Exceptions.h" 00010 #include <limits> 00011 #include <math.h> 00012 00013 namespace ibrcommon 00014 { 00015 DefaultHashProvider::DefaultHashProvider(size_t salt_count) 00016 : salt_count_(salt_count) 00017 { 00018 generate_salt(); 00019 } 00020 00021 DefaultHashProvider::~DefaultHashProvider() 00022 { 00023 } 00024 00025 bool DefaultHashProvider::operator==(const DefaultHashProvider &provider) const 00026 { 00027 return ( provider.salt_count_ == salt_count_); 00028 } 00029 00030 void DefaultHashProvider::clear() 00031 { 00032 _salt.clear(); 00033 } 00034 00035 void DefaultHashProvider::add(bloom_type hash) 00036 { 00037 _salt.push_back(hash); 00038 } 00039 00040 size_t DefaultHashProvider::count() const 00041 { 00042 return _salt.size(); 00043 } 00044 00045 const std::list<bloom_type> DefaultHashProvider::hash(const unsigned char* begin, std::size_t remaining_length) const 00046 { 00047 std::list<bloom_type> hashes; 00048 00049 for (std::vector<bloom_type>::const_iterator iter = _salt.begin(); iter != _salt.end(); iter++) 00050 { 00051 hashes.push_back(hash_ap(begin, remaining_length, (*iter))); 00052 } 00053 00054 return hashes; 00055 } 00056 00057 00058 void DefaultHashProvider::generate_salt() 00059 { 00060 const unsigned int predef_salt_count = 64; 00061 static const bloom_type predef_salt[predef_salt_count] = 00062 { 00063 0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC, 00064 0x66666666, 0x99999999, 0xB5B5B5B5, 0x4B4B4B4B, 00065 0xAA55AA55, 0x55335533, 0x33CC33CC, 0xCC66CC66, 00066 0x66996699, 0x99B599B5, 0xB54BB54B, 0x4BAA4BAA, 00067 0xAA33AA33, 0x55CC55CC, 0x33663366, 0xCC99CC99, 00068 0x66B566B5, 0x994B994B, 0xB5AAB5AA, 0xAAAAAA33, 00069 0x555555CC, 0x33333366, 0xCCCCCC99, 0x666666B5, 00070 0x9999994B, 0xB5B5B5AA, 0xFFFFFFFF, 0xFFFF0000, 00071 0xB823D5EB, 0xC1191CDF, 0xF623AEB3, 0xDB58499F, 00072 0xC8D42E70, 0xB173F616, 0xA91A5967, 0xDA427D63, 00073 0xB1E8A2EA, 0xF6C0D155, 0x4909FEA3, 0xA68CC6A7, 00074 0xC395E782, 0xA26057EB, 0x0CD5DA28, 0x467C5492, 00075 0xF15E6982, 0x61C6FAD3, 0x9615E352, 0x6E9E355A, 00076 0x689B563E, 0x0C9831A8, 0x6753C18B, 0xA622689B, 00077 0x8CA63C47, 0x42CC2884, 0x8E89919B, 0x6EDBD7D3, 00078 0x15B6796C, 0x1D6FDFE4, 0x63FF9092, 0xE7401432 00079 }; 00080 00081 if (salt_count_ > predef_salt_count) 00082 { 00083 throw ibrcommon::Exception("Max. 64 hash salts supported!"); 00084 } 00085 00086 for(unsigned int i = 0; i < salt_count_; ++i) 00087 { 00088 add(predef_salt[i]); 00089 } 00090 } 00091 00092 bloom_type DefaultHashProvider::hash_ap(const unsigned char* begin, std::size_t remaining_length, bloom_type hash) const 00093 { 00094 const unsigned char* it = begin; 00095 while(remaining_length >= 2) 00096 { 00097 hash ^= (hash << 7) ^ (*it++) * (hash >> 3); 00098 hash ^= (~((hash << 11) + ((*it++) ^ (hash >> 5)))); 00099 remaining_length -= 2; 00100 } 00101 if (remaining_length) 00102 { 00103 hash ^= (hash << 7) ^ (*it) * (hash >> 3); 00104 } 00105 return hash; 00106 } 00107 00108 const unsigned char BloomFilter::bit_mask[bits_per_char] = { 00109 0x01, //00000001 00110 0x02, //00000010 00111 0x04, //00000100 00112 0x08, //00001000 00113 0x10, //00010000 00114 0x20, //00100000 00115 0x40, //01000000 00116 0x80 //10000000 00117 }; 00118 00119 BloomFilter::BloomFilter(std::size_t table_size, std::size_t salt_count) 00120 : _hashp(salt_count), table_size_(table_size), _itemcount(0), salt_count_(salt_count) 00121 { 00122 bit_table_ = new cell_type[table_size_ / bits_per_char]; 00123 std::fill_n(bit_table_,(table_size_ / bits_per_char),0x00); 00124 } 00125 00126 BloomFilter::BloomFilter(const BloomFilter& filter) 00127 : _hashp(filter._hashp), table_size_(filter.table_size_), _itemcount(filter._itemcount), salt_count_(filter.salt_count_) 00128 { 00129 bit_table_ = new cell_type[table_size_ / bits_per_char]; 00130 std::copy(filter.bit_table_, filter.bit_table_ + (table_size_ / bits_per_char), bit_table_); 00131 } 00132 00133 BloomFilter::~BloomFilter() 00134 { 00135 delete[] bit_table_; 00136 } 00137 00138 BloomFilter& BloomFilter::operator=(const BloomFilter& filter) 00139 { 00140 _hashp = filter._hashp; 00141 table_size_ = filter.table_size_; 00142 delete[] bit_table_; 00143 bit_table_ = new cell_type[table_size_ / bits_per_char]; 00144 std::copy(filter.bit_table_, filter.bit_table_ + (table_size_ / bits_per_char), bit_table_); 00145 return *this; 00146 } 00147 00148 bool BloomFilter::operator!() const 00149 { 00150 return (0 == table_size_); 00151 } 00152 00153 void BloomFilter::clear() 00154 { 00155 std::fill_n(bit_table_,(table_size_ / bits_per_char),0x00); 00156 _itemcount = 0; 00157 } 00158 00159 void BloomFilter::insert(const unsigned char* key_begin, const std::size_t length) 00160 { 00161 std::size_t bit_index = 0; 00162 std::size_t bit = 0; 00163 00164 std::list<bloom_type> hashes = _hashp.hash(key_begin, length); 00165 00166 for (std::list<bloom_type>::iterator iter = hashes.begin(); iter != hashes.end(); iter++) 00167 { 00168 compute_indices( (*iter), bit_index, bit ); 00169 bit_table_[bit_index / bits_per_char] |= bit_mask[bit]; 00170 } 00171 00172 if (_itemcount < std::numeric_limits<unsigned int>::max()) _itemcount++; 00173 } 00174 00175 void BloomFilter::insert(const std::string& key) 00176 { 00177 insert(reinterpret_cast<const unsigned char*>(key.c_str()),key.size()); 00178 } 00179 00180 void BloomFilter::insert(const char* data, const std::size_t& length) 00181 { 00182 insert(reinterpret_cast<const unsigned char*>(data),length); 00183 } 00184 00185 bool BloomFilter::contains(const unsigned char* key_begin, const std::size_t length) const 00186 { 00187 std::size_t bit_index = 0; 00188 std::size_t bit = 0; 00189 00190 const std::list<bloom_type> hashes = _hashp.hash(key_begin, length); 00191 00192 for (std::list<bloom_type>::const_iterator iter = hashes.begin(); iter != hashes.end(); iter++) 00193 { 00194 compute_indices( (*iter), bit_index, bit ); 00195 if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit]) 00196 { 00197 return false; 00198 } 00199 } 00200 00201 return true; 00202 } 00203 00204 bool BloomFilter::contains(const std::string& key) const 00205 { 00206 return contains(reinterpret_cast<const unsigned char*>(key.c_str()),key.size()); 00207 } 00208 00209 bool BloomFilter::contains(const char* data, const std::size_t& length) const 00210 { 00211 return contains(reinterpret_cast<const unsigned char*>(data),length); 00212 } 00213 00214 std::size_t BloomFilter::size() const 00215 { 00216 return (table_size_ / bits_per_char); 00217 } 00218 00219 BloomFilter& BloomFilter::operator &= (const BloomFilter& filter) 00220 { 00221 /* intersection */ 00222 if ( 00223 (_hashp == filter._hashp) && 00224 (table_size_ == filter.table_size_) 00225 ) 00226 { 00227 for (std::size_t i = 0; i < (table_size_ / bits_per_char); ++i) 00228 { 00229 bit_table_[i] &= filter.bit_table_[i]; 00230 } 00231 } 00232 return *this; 00233 } 00234 00235 BloomFilter& BloomFilter::operator |= (const BloomFilter& filter) 00236 { 00237 /* union */ 00238 if ( 00239 (_hashp == filter._hashp) && 00240 (table_size_ == filter.table_size_) 00241 ) 00242 { 00243 for (std::size_t i = 0; i < (table_size_ / bits_per_char); ++i) 00244 { 00245 bit_table_[i] |= filter.bit_table_[i]; 00246 } 00247 } 00248 return *this; 00249 } 00250 00251 BloomFilter& BloomFilter::operator ^= (const BloomFilter& filter) 00252 { 00253 /* difference */ 00254 if ( 00255 (_hashp == filter._hashp) && 00256 (table_size_ == filter.table_size_) 00257 ) 00258 { 00259 for (std::size_t i = 0; i < (table_size_ / bits_per_char); ++i) 00260 { 00261 bit_table_[i] ^= filter.bit_table_[i]; 00262 } 00263 } 00264 return *this; 00265 } 00266 00267 void BloomFilter::load(const cell_type* data, size_t len) 00268 { 00269 if (len < table_size_) 00270 { 00271 std::copy(data, data + len, bit_table_); 00272 } 00273 else 00274 { 00275 std::copy(data, data + table_size_, bit_table_); 00276 } 00277 00278 _itemcount = 0; 00279 } 00280 00281 const cell_type* BloomFilter::table() const 00282 { 00283 return bit_table_; 00284 } 00285 00286 void BloomFilter::compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const 00287 { 00288 bit_index = hash % table_size_; 00289 bit = bit_index % bits_per_char; 00290 } 00291 00292 float BloomFilter::getAllocation() const 00293 { 00294 float m = table_size_; 00295 float n = _itemcount; 00296 float k = salt_count_; 00297 00298 return pow(1 - pow(1 - (1 / m), k * n), k); 00299 } 00300 }