barry: Your go-to motif accountant  0.0-1
Full enumeration of sample space and fast count of sufficient statistics for binary arrays
freqtable.hpp
Go to the documentation of this file.
1 #ifndef BARRY_STATSDB_HPP
2 #define BARRY_STATSDB_HPP 1
3 
21 template<typename T = double>
22 class FreqTable {
23 private:
24 
25  std::unordered_map<size_t, size_t> index;
26  std::vector< double > data;
27  size_t k = 0u;
28  size_t n = 0u;
29 
30  typename std::unordered_map<size_t, size_t>::iterator iter;
31 
32 public:
33  // size_t ncols;
34  FreqTable() {};
35  ~FreqTable() {};
36 
37  size_t add(const std::vector< T > & x, size_t * h_precomp);
38 
39  Counts_type as_vector() const;
40  const std::vector< double > & get_data() const {return data;};
41  const std::unordered_map<size_t,size_t> & get_index() const {return index;};
42 
43  void clear();
44  void reserve(size_t n, size_t k);
45  void print() const;
46 
52  size_t size() const noexcept;
53 
54  size_t make_hash(const std::vector< T > & x) const;
55 
56 };
57 
58 template<typename T>
59 inline size_t FreqTable<T>::add(
60  const std::vector< T > & x,
61  size_t * h_precomp
62  ) {
63 
64  // The term exists, then we add it to the list and we initialize it
65  // with a single count
66  size_t h;
67  if (h_precomp == nullptr)
68  h = make_hash(x);
69  else
70  h = *h_precomp;
71 
72  if (k == 0u)
73  {
74 
75  index.insert({h, 0u});
76 
77  data.push_back(1.0);
78  data.insert(data.end(), x.begin(), x.end());
79 
80  k = x.size();
81  n++;
82 
83  return h;
84 
85  }
86  else
87  {
88 
89 
90  if (x.size() != k)
91  throw std::length_error(
92  "The value you are trying to add doesn't have the same lenght used in the database."
93  );
94 
95  #if __cplusplus > 201700L
96  auto iter2 = index.try_emplace(h, data.size());
97 
98  if (!iter2.second)
99  {
100 
101  data[(iter2.first)->second] += 1.0;
102 
103  }
104  else
105  {
106  data.push_back(1.0);
107  data.insert(data.end(), x.begin(), x.end());
108  n++;
109  }
110  #else
111  iter = index.find(h);
112 
113  if (iter == index.end())
114  {
115 
116 
117  index.insert({h, data.size()});
118  data.push_back(1.0);
119  data.insert(data.end(), x.begin(), x.end());
120 
121  n++;
122 
123  return h;
124 
125  }
126 
127  data[(*iter).second] += 1.0;
128 
129  #endif
130 
131 
132  }
133 
134  return h;
135 
136 }
137 
138 template<typename T>
140 {
141 
142  Counts_type ans;
143 
144  ans.reserve(index.size());
145 
146  for (size_t i = 0u; i < n; ++i)
147  {
148 
149  std::vector< double > tmp(k, 0.0);
150 
151  for (size_t j = 1u; j < (k + 1u); ++j)
152  tmp[j - 1u] = data[i * (k + 1) + j];
153 
154  ans.push_back(
155  std::make_pair<std::vector<double>,size_t>(
156  std::move(tmp),
157  static_cast<size_t>(data[i * (k + 1u)])
158  )
159  );
160 
161  }
162 
163 
164  return ans;
165 }
166 
167 template<typename T>
168 inline void FreqTable<T>::clear()
169 {
170 
171  index.clear();
172  data.clear();
173 
174  n = 0u;
175  k = 0u;
176 
177  return;
178 
179 }
180 
181 template<typename T>
183  size_t n,
184  size_t k
185 )
186 {
187 
188  // Figuring out the max size
189  auto nk = std::min(BARRY_MAX_NUM_ELEMENTS, n * k);
190  n = nk / k;
191  data.reserve(nk);
192  index.reserve(n);
193 
194  return;
195 
196 }
197 
198 // inline void StatsDB::rehash() {
199 // stats.rehash();
200 // return;
201 // }
202 
203 template<typename T>
204 inline void FreqTable<T>::print() const
205 {
206 
207  size_t grand_total = 0u;
208 
209  printf_barry("%7s | %s\n", "Counts", "Stats");
210 
211  for (size_t i = 0u; i < n; ++i)
212  {
213 
214  printf_barry("%7i | ", static_cast<int>(data[i * (k + 1u)]));
215 
216  for (size_t j = 1u; j < (k + 1u); ++j)
217  printf_barry(" %.2f", data[i * (k + 1) + j]);
218  printf_barry("\n");
219 
220  grand_total += static_cast<size_t>(data[i * (k + 1u)]);
221 
222  }
223 
224  printf_barry("Grand total: %li\n", grand_total);
225 
226  return;
227 
228 }
229 
230 template<typename T>
231 inline size_t FreqTable<T>::size() const noexcept
232 {
233 
234  return index.size();
235 
236 }
237 
238 template<typename T>
239 inline size_t FreqTable<T>::make_hash(const std::vector< T > & x) const
240 {
241 
242  std::hash< T > hasher;
243  std::size_t hash = hasher(x[0u]);
244 
245  // ^ makes bitwise XOR
246  // 0x9e3779b9 is a 32 bit constant (comes from the golden ratio)
247  // << is a shift operator, something like lhs * 2^(rhs)
248  if (x.size() > 1u)
249  for (size_t i = 1u; i < x.size(); ++i)
250  hash ^= hasher(x[i]) + 0x9e3779b9 + (hash<<6) + (hash>>2);
251 
252  return hash;
253 
254 }
255 
256 #endif
#define printf_barry
#define BARRY_MAX_NUM_ELEMENTS
Frequency table of vectors.
Definition: freqtable.hpp:22
void reserve(size_t n, size_t k)
Definition: freqtable.hpp:182
Counts_type as_vector() const
Definition: freqtable.hpp:139
void clear()
Definition: freqtable.hpp:168
const std::vector< double > & get_data() const
Definition: freqtable.hpp:40
void print() const
Definition: freqtable.hpp:204
const std::unordered_map< size_t, size_t > & get_index() const
Definition: freqtable.hpp:41
size_t make_hash(const std::vector< T > &x) const
Definition: freqtable.hpp:239
size_t add(const std::vector< T > &x, size_t *h_precomp)
Definition: freqtable.hpp:59
size_t size() const noexcept
Number of unique elements in the table. (.
Definition: freqtable.hpp:231
Data_Type &&counter_ data(std::move(counter_.data))
Data_Type hasher(counter_.hasher)
size_t size_t j
size_t i
Data_Type &&counter_ noexcept
std::vector< std::pair< std::vector< double >, size_t > > Counts_type
Definition: typedefs.hpp:51