IBR-DTNSuite  0.8
ibrcommon/ibrcommon/data/BloomFilter.cpp
Go to the documentation of this file.
00001 /*
00002  * BloomFilter.cpp
00003  *
00004  *  Created on: 03.03.2010
00005  *      Author: morgenro
00006  */
00007 
00008 #include "ibrcommon/data/BloomFilter.h"
00009 #include "ibrcommon/Exceptions.h"
00010 #include <limits>
00011 #include <math.h>
00012 
00013 namespace ibrcommon
00014 {
00015         DefaultHashProvider::DefaultHashProvider(size_t salt_count)
00016          : salt_count_(salt_count)
00017         {
00018                 generate_salt();
00019         }
00020 
00021         DefaultHashProvider::~DefaultHashProvider()
00022         {
00023         }
00024 
00025         bool DefaultHashProvider::operator==(const DefaultHashProvider &provider) const
00026         {
00027                 return ( provider.salt_count_ == salt_count_);
00028         }
00029 
00030         void DefaultHashProvider::clear()
00031         {
00032                 _salt.clear();
00033         }
00034 
00035         void DefaultHashProvider::add(bloom_type hash)
00036         {
00037                 _salt.push_back(hash);
00038         }
00039 
00040         size_t DefaultHashProvider::count() const
00041         {
00042                 return _salt.size();
00043         }
00044 
00045         const std::list<bloom_type> DefaultHashProvider::hash(const unsigned char* begin, std::size_t remaining_length) const
00046         {
00047                 std::list<bloom_type> hashes;
00048 
00049                 for (std::vector<bloom_type>::const_iterator iter = _salt.begin(); iter != _salt.end(); iter++)
00050                 {
00051                         hashes.push_back(hash_ap(begin, remaining_length, (*iter)));
00052                 }
00053 
00054                 return hashes;
00055         }
00056 
00057 
00058         void DefaultHashProvider::generate_salt()
00059         {
00060                 const unsigned int predef_salt_count = 64;
00061                 static const bloom_type predef_salt[predef_salt_count] =
00062                                                                         {
00063                                                                                   0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC,
00064                                                                                   0x66666666, 0x99999999, 0xB5B5B5B5, 0x4B4B4B4B,
00065                                                                                   0xAA55AA55, 0x55335533, 0x33CC33CC, 0xCC66CC66,
00066                                                                                   0x66996699, 0x99B599B5, 0xB54BB54B, 0x4BAA4BAA,
00067                                                                                   0xAA33AA33, 0x55CC55CC, 0x33663366, 0xCC99CC99,
00068                                                                                   0x66B566B5, 0x994B994B, 0xB5AAB5AA, 0xAAAAAA33,
00069                                                                                   0x555555CC, 0x33333366, 0xCCCCCC99, 0x666666B5,
00070                                                                                   0x9999994B, 0xB5B5B5AA, 0xFFFFFFFF, 0xFFFF0000,
00071                                                                                   0xB823D5EB, 0xC1191CDF, 0xF623AEB3, 0xDB58499F,
00072                                                                                   0xC8D42E70, 0xB173F616, 0xA91A5967, 0xDA427D63,
00073                                                                                   0xB1E8A2EA, 0xF6C0D155, 0x4909FEA3, 0xA68CC6A7,
00074                                                                                   0xC395E782, 0xA26057EB, 0x0CD5DA28, 0x467C5492,
00075                                                                                   0xF15E6982, 0x61C6FAD3, 0x9615E352, 0x6E9E355A,
00076                                                                                   0x689B563E, 0x0C9831A8, 0x6753C18B, 0xA622689B,
00077                                                                                   0x8CA63C47, 0x42CC2884, 0x8E89919B, 0x6EDBD7D3,
00078                                                                                   0x15B6796C, 0x1D6FDFE4, 0x63FF9092, 0xE7401432
00079                                                                         };
00080 
00081                 if (salt_count_ > predef_salt_count)
00082                 {
00083                         throw ibrcommon::Exception("Max. 64 hash salts supported!");
00084                 }
00085 
00086                 for(unsigned int i = 0; i < salt_count_; ++i)
00087                 {
00088                         add(predef_salt[i]);
00089                 }
00090         }
00091 
00092         bloom_type DefaultHashProvider::hash_ap(const unsigned char* begin, std::size_t remaining_length, bloom_type hash) const
00093         {
00094                 const unsigned char* it = begin;
00095                 while(remaining_length >= 2)
00096                 {
00097                         hash ^=    (hash <<  7) ^  (*it++) * (hash >> 3);
00098                         hash ^= (~((hash << 11) + ((*it++) ^ (hash >> 5))));
00099                         remaining_length -= 2;
00100                 }
00101                 if (remaining_length)
00102                 {
00103                         hash ^= (hash <<  7) ^ (*it) * (hash >> 3);
00104                 }
00105                 return hash;
00106         }
00107 
00108         const unsigned char BloomFilter::bit_mask[bits_per_char] = {
00109                                                                                                                    0x01,  //00000001
00110                                                                                                                    0x02,  //00000010
00111                                                                                                                    0x04,  //00000100
00112                                                                                                                    0x08,  //00001000
00113                                                                                                                    0x10,  //00010000
00114                                                                                                                    0x20,  //00100000
00115                                                                                                                    0x40,  //01000000
00116                                                                                                                    0x80   //10000000
00117                                                                                                                  };
00118 
00119         BloomFilter::BloomFilter(std::size_t table_size, std::size_t salt_count)
00120          : _hashp(salt_count), table_size_(table_size), _itemcount(0), salt_count_(salt_count)
00121         {
00122                 bit_table_ = new cell_type[table_size_ / bits_per_char];
00123                 std::fill_n(bit_table_,(table_size_ / bits_per_char),0x00);
00124         }
00125 
00126         BloomFilter::BloomFilter(const BloomFilter& filter)
00127          : _hashp(filter._hashp), table_size_(filter.table_size_), _itemcount(filter._itemcount), salt_count_(filter.salt_count_)
00128         {
00129                 bit_table_ = new cell_type[table_size_ / bits_per_char];
00130                 std::copy(filter.bit_table_, filter.bit_table_ + (table_size_ / bits_per_char), bit_table_);
00131         }
00132 
00133         BloomFilter::~BloomFilter()
00134         {
00135                 delete[] bit_table_;
00136         }
00137 
00138         BloomFilter& BloomFilter::operator=(const BloomFilter& filter)
00139         {
00140                 _hashp = filter._hashp;
00141                 table_size_ = filter.table_size_;
00142                 delete[] bit_table_;
00143                 bit_table_ = new cell_type[table_size_ / bits_per_char];
00144                 std::copy(filter.bit_table_, filter.bit_table_ + (table_size_ / bits_per_char), bit_table_);
00145                 return *this;
00146         }
00147 
00148         bool BloomFilter::operator!() const
00149         {
00150                 return (0 == table_size_);
00151         }
00152 
00153         void BloomFilter::clear()
00154         {
00155                 std::fill_n(bit_table_,(table_size_ / bits_per_char),0x00);
00156                 _itemcount = 0;
00157         }
00158 
00159         void BloomFilter::insert(const unsigned char* key_begin, const std::size_t length)
00160         {
00161                 std::size_t bit_index = 0;
00162                 std::size_t bit = 0;
00163 
00164                 std::list<bloom_type> hashes = _hashp.hash(key_begin, length);
00165 
00166                 for (std::list<bloom_type>::iterator iter = hashes.begin(); iter != hashes.end(); iter++)
00167                 {
00168                         compute_indices( (*iter), bit_index, bit );
00169                         bit_table_[bit_index / bits_per_char] |= bit_mask[bit];
00170                 }
00171 
00172                 if (_itemcount < std::numeric_limits<unsigned int>::max()) _itemcount++;
00173         }
00174 
00175         void BloomFilter::insert(const std::string& key)
00176         {
00177                 insert(reinterpret_cast<const unsigned char*>(key.c_str()),key.size());
00178         }
00179 
00180         void BloomFilter::insert(const char* data, const std::size_t& length)
00181         {
00182                 insert(reinterpret_cast<const unsigned char*>(data),length);
00183         }
00184 
00185         bool BloomFilter::contains(const unsigned char* key_begin, const std::size_t length) const
00186         {
00187                 std::size_t bit_index = 0;
00188                 std::size_t bit = 0;
00189 
00190                 const std::list<bloom_type> hashes = _hashp.hash(key_begin, length);
00191 
00192                 for (std::list<bloom_type>::const_iterator iter = hashes.begin(); iter != hashes.end(); iter++)
00193                 {
00194                         compute_indices( (*iter), bit_index, bit );
00195                         if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit])
00196                         {
00197                                 return false;
00198                         }
00199                 }
00200 
00201                 return true;
00202         }
00203 
00204         bool BloomFilter::contains(const std::string& key) const
00205         {
00206                 return contains(reinterpret_cast<const unsigned char*>(key.c_str()),key.size());
00207         }
00208 
00209         bool BloomFilter::contains(const char* data, const std::size_t& length) const
00210         {
00211                 return contains(reinterpret_cast<const unsigned char*>(data),length);
00212         }
00213 
00214         std::size_t BloomFilter::size() const
00215         {
00216                 return (table_size_ / bits_per_char);
00217         }
00218 
00219         BloomFilter& BloomFilter::operator &= (const BloomFilter& filter)
00220         {
00221                   /* intersection */
00222                   if (
00223                           (_hashp  == filter._hashp) &&
00224                           (table_size_  == filter.table_size_)
00225                          )
00226                   {
00227                          for (std::size_t i = 0; i < (table_size_ / bits_per_char); ++i)
00228                          {
00229                                 bit_table_[i] &= filter.bit_table_[i];
00230                          }
00231                   }
00232                   return *this;
00233         }
00234 
00235         BloomFilter& BloomFilter::operator |= (const BloomFilter& filter)
00236         {
00237           /* union */
00238           if (
00239                   (_hashp  == filter._hashp) &&
00240                   (table_size_  == filter.table_size_)
00241                  )
00242           {
00243                  for (std::size_t i = 0; i < (table_size_ / bits_per_char); ++i)
00244                  {
00245                         bit_table_[i] |= filter.bit_table_[i];
00246                  }
00247           }
00248           return *this;
00249         }
00250 
00251         BloomFilter& BloomFilter::operator ^= (const BloomFilter& filter)
00252         {
00253                   /* difference */
00254                   if (
00255                           (_hashp  == filter._hashp) &&
00256                           (table_size_  == filter.table_size_)
00257                          )
00258                   {
00259                          for (std::size_t i = 0; i < (table_size_ / bits_per_char); ++i)
00260                          {
00261                                 bit_table_[i] ^= filter.bit_table_[i];
00262                          }
00263                   }
00264                   return *this;
00265         }
00266 
00267         void BloomFilter::load(const cell_type* data, size_t len)
00268         {
00269                 if (len < table_size_)
00270                 {
00271                         std::copy(data, data + len, bit_table_);
00272                 }
00273                 else
00274                 {
00275                         std::copy(data, data + table_size_, bit_table_);
00276                 }
00277 
00278                 _itemcount = 0;
00279         }
00280 
00281         const cell_type* BloomFilter::table() const
00282         {
00283                 return bit_table_;
00284         }
00285 
00286         void BloomFilter::compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const
00287         {
00288                 bit_index = hash % table_size_;
00289                 bit = bit_index % bits_per_char;
00290         }
00291 
00292         float BloomFilter::getAllocation() const
00293         {
00294                 float m = table_size_;
00295                 float n = _itemcount;
00296                 float k = salt_count_;
00297 
00298                 return pow(1 - pow(1 - (1 / m), k * n), k);
00299         }
00300 }