IBR-DTNSuite  0.10
BloomFilter.cpp
Go to the documentation of this file.
1 /*
2  * BloomFilter.cpp
3  *
4  * This bloom filter implementation is based on
5  * Open Bloom Filter (http://www.partow.net) written by Arash Partow.
6  *
7  * Copyright (C) 2011 IBR, TU Braunschweig
8  *
9  * Written-by: Johannes Morgenroth <morgenroth@ibr.cs.tu-bs.de>
10  *
11  * Licensed under the Apache License, Version 2.0 (the "License");
12  * you may not use this file except in compliance with the License.
13  * You may obtain a copy of the License at
14  *
15  * http://www.apache.org/licenses/LICENSE-2.0
16  *
17  * Unless required by applicable law or agreed to in writing, software
18  * distributed under the License is distributed on an "AS IS" BASIS,
19  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20  * See the License for the specific language governing permissions and
21  * limitations under the License.
22  *
23  */
24 
26 #include "ibrcommon/Exceptions.h"
27 #include <limits>
28 #include <math.h>
29 
30 namespace ibrcommon
31 {
33  { }
34 
36  : salt_count_(salt_count)
37  {
38  generate_salt();
39  }
40 
42  {
43  }
44 
46  {
47  return ( provider.salt_count_ == salt_count_);
48  }
49 
51  {
52  _salt.clear();
53  }
54 
55  void DefaultHashProvider::add(bloom_type hash)
56  {
57  _salt.push_back(hash);
58  }
59 
61  {
62  return _salt.size();
63  }
64 
65  const std::list<bloom_type> DefaultHashProvider::hash(const unsigned char* begin, std::size_t remaining_length) const
66  {
67  std::list<bloom_type> hashes;
68 
69  for (std::vector<bloom_type>::const_iterator iter = _salt.begin(); iter != _salt.end(); ++iter)
70  {
71  hashes.push_back(hash_ap(begin, remaining_length, (*iter)));
72  }
73 
74  return hashes;
75  }
76 
77 
78  void DefaultHashProvider::generate_salt()
79  {
80  const unsigned int predef_salt_count = 64;
81  static const bloom_type predef_salt[predef_salt_count] =
82  {
83  0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC,
84  0x66666666, 0x99999999, 0xB5B5B5B5, 0x4B4B4B4B,
85  0xAA55AA55, 0x55335533, 0x33CC33CC, 0xCC66CC66,
86  0x66996699, 0x99B599B5, 0xB54BB54B, 0x4BAA4BAA,
87  0xAA33AA33, 0x55CC55CC, 0x33663366, 0xCC99CC99,
88  0x66B566B5, 0x994B994B, 0xB5AAB5AA, 0xAAAAAA33,
89  0x555555CC, 0x33333366, 0xCCCCCC99, 0x666666B5,
90  0x9999994B, 0xB5B5B5AA, 0xFFFFFFFF, 0xFFFF0000,
91  0xB823D5EB, 0xC1191CDF, 0xF623AEB3, 0xDB58499F,
92  0xC8D42E70, 0xB173F616, 0xA91A5967, 0xDA427D63,
93  0xB1E8A2EA, 0xF6C0D155, 0x4909FEA3, 0xA68CC6A7,
94  0xC395E782, 0xA26057EB, 0x0CD5DA28, 0x467C5492,
95  0xF15E6982, 0x61C6FAD3, 0x9615E352, 0x6E9E355A,
96  0x689B563E, 0x0C9831A8, 0x6753C18B, 0xA622689B,
97  0x8CA63C47, 0x42CC2884, 0x8E89919B, 0x6EDBD7D3,
98  0x15B6796C, 0x1D6FDFE4, 0x63FF9092, 0xE7401432
99  };
100 
101  if (salt_count_ > predef_salt_count)
102  {
103  throw ibrcommon::Exception("Max. 64 hash salts supported!");
104  }
105 
106  for(unsigned int i = 0; i < salt_count_; ++i)
107  {
108  add(predef_salt[i]);
109  }
110  }
111 
112  bloom_type DefaultHashProvider::hash_ap(const unsigned char* begin, std::size_t remaining_length, bloom_type hash) const
113  {
114  const unsigned char* it = begin;
115  while(remaining_length >= 2)
116  {
117  hash ^= (hash << 7) ^ (*it++) * (hash >> 3);
118  hash ^= (~((hash << 11) + ((*it++) ^ (hash >> 5))));
119  remaining_length -= 2;
120  }
121  if (remaining_length)
122  {
123  hash ^= (hash << 7) ^ (*it) * (hash >> 3);
124  }
125  return hash;
126  }
127 
128  const unsigned char BloomFilter::bit_mask[bits_per_char] = {
129  0x01, //00000001
130  0x02, //00000010
131  0x04, //00000100
132  0x08, //00001000
133  0x10, //00010000
134  0x20, //00100000
135  0x40, //01000000
136  0x80 //10000000
137  };
138 
139  BloomFilter::BloomFilter(std::size_t table_size, std::size_t salt_count)
140  : _hashp(salt_count), table_size_(table_size), _itemcount(0), salt_count_(salt_count)
141  {
142  // check if table_size_ is multiple of 8
143  if (table_size_ % bits_per_char != 0) {
145  }
146 
148  std::fill_n(bit_table_,(table_size_ / bits_per_char),0x00);
149  }
150 
152  : _hashp(filter._hashp), table_size_(filter.table_size_), _itemcount(filter._itemcount), salt_count_(filter.salt_count_)
153  {
155  std::copy(filter.bit_table_, filter.bit_table_ + (table_size_ / bits_per_char), bit_table_);
156  }
157 
159  {
160  delete[] bit_table_;
161  }
162 
164  {
165  _hashp = filter._hashp;
166  table_size_ = filter.table_size_;
167  delete[] bit_table_;
169  std::copy(filter.bit_table_, filter.bit_table_ + (table_size_ / bits_per_char), bit_table_);
170  return *this;
171  }
172 
174  {
175  return (0 == table_size_);
176  }
177 
179  {
180  std::fill_n(bit_table_,(table_size_ / bits_per_char),0x00);
181  _itemcount = 0;
182  }
183 
184  void BloomFilter::insert(const unsigned char* key_begin, const std::size_t length)
185  {
186  std::size_t bit_index = 0;
187  std::size_t bit = 0;
188 
189  std::list<bloom_type> hashes = _hashp.hash(key_begin, length);
190 
191  for (std::list<bloom_type>::iterator iter = hashes.begin(); iter != hashes.end(); ++iter)
192  {
193  compute_indices( (*iter), bit_index, bit );
194  bit_table_[bit_index / bits_per_char] |= bit_mask[bit];
195  }
196 
197  if (_itemcount < std::numeric_limits<unsigned int>::max()) _itemcount++;
198  }
199 
200  void BloomFilter::insert(const std::string& key)
201  {
202  insert(reinterpret_cast<const unsigned char*>(key.c_str()),key.size());
203  }
204 
205  void BloomFilter::insert(const char* data, const std::size_t& length)
206  {
207  insert(reinterpret_cast<const unsigned char*>(data),length);
208  }
209 
210  bool BloomFilter::contains(const unsigned char* key_begin, const std::size_t length) const
211  {
212  std::size_t bit_index = 0;
213  std::size_t bit = 0;
214 
215  const std::list<bloom_type> hashes = _hashp.hash(key_begin, length);
216 
217  for (std::list<bloom_type>::const_iterator iter = hashes.begin(); iter != hashes.end(); ++iter)
218  {
219  compute_indices( (*iter), bit_index, bit );
220  if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit])
221  {
222  return false;
223  }
224  }
225 
226  return true;
227  }
228 
229  bool BloomFilter::contains(const std::string& key) const
230  {
231  return contains(reinterpret_cast<const unsigned char*>(key.c_str()),key.size());
232  }
233 
234  bool BloomFilter::contains(const char* data, const std::size_t& length) const
235  {
236  return contains(reinterpret_cast<const unsigned char*>(data),length);
237  }
238 
239  std::size_t BloomFilter::size() const
240  {
241  return (table_size_ / bits_per_char);
242  }
243 
245  {
246  /* intersection */
247  if (
248  (_hashp == filter._hashp) &&
249  (table_size_ == filter.table_size_)
250  )
251  {
252  for (std::size_t i = 0; i < (table_size_ / bits_per_char); ++i)
253  {
254  bit_table_[i] &= filter.bit_table_[i];
255  }
256  }
257  return *this;
258  }
259 
261  {
262  /* union */
263  if (
264  (_hashp == filter._hashp) &&
265  (table_size_ == filter.table_size_)
266  )
267  {
268  for (std::size_t i = 0; i < (table_size_ / bits_per_char); ++i)
269  {
270  bit_table_[i] |= filter.bit_table_[i];
271  }
272  }
273  return *this;
274  }
275 
277  {
278  /* difference */
279  if (
280  (_hashp == filter._hashp) &&
281  (table_size_ == filter.table_size_)
282  )
283  {
284  for (std::size_t i = 0; i < (table_size_ / bits_per_char); ++i)
285  {
286  bit_table_[i] ^= filter.bit_table_[i];
287  }
288  }
289  return *this;
290  }
291 
292  void BloomFilter::load(const cell_type* data, size_t len)
293  {
294  if (len < table_size_)
295  {
296  std::copy(data, data + len, bit_table_);
297  }
298  else
299  {
300  std::copy(data, data + table_size_, bit_table_);
301  }
302 
303  _itemcount = 0;
304  }
305 
307  {
308  return bit_table_;
309  }
310 
311  void BloomFilter::compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const
312  {
313  bit_index = hash % table_size_;
314  bit = bit_index % bits_per_char;
315  }
316 
318  {
319  double m = static_cast<double>(table_size_);
320  double n = static_cast<double>(_itemcount);
321  double k = static_cast<double>(salt_count_);
322 
323  return pow(1 - pow(1 - (1 / m), k * n), k);
324  }
325 }