barry: Your go-to motif accountant  0.0-1
Full enumeration of sample space and fast count of sufficient statistics for binary arrays
support-meat.hpp
Go to the documentation of this file.
1 #ifndef BARRY_SUPPORT_MEAT
2 #define BARRY_SUPPORT_MEAT_HPP 1
3 
4 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
6  std::vector< Array_Type > * array_bank,
7  std::vector< double > * stats_bank
8 ) {
9 
10  // Resetting the counter
11  this->iter_counter = 0u;
12 
13  // Computing the locations
14  coordinates_free.clear();
15  coordinates_locked.clear();
16  rules->get_seq(EmptyArray, &coordinates_free, &coordinates_locked);
17 
18  coordiantes_n_free = coordinates_free.size() / 2u;
19  coordiantes_n_locked = coordinates_locked.size() / 2u;
20  n_counters = counters->size();
21 
22  hashes.resize(coordiantes_n_free, 0u);
23  hashes_initialized.resize(coordiantes_n_free, false);
24 
25  // Computing initial statistics
26  if (EmptyArray.nnozero() > 0u)
27  {
28 
29  for (size_t i = 0u; i < coordiantes_n_free; ++i)
30  EmptyArray.rm_cell(
31  coordinates_free[i * 2u],
32  coordinates_free[i * 2u + 1u],
33  false, true
34  );
35 
36  }
37 
38  // Looked coordinates should still be removed if these are
39  // equivalent to zero
40  for (size_t i = 0u; i < coordiantes_n_locked; ++i)
41  {
42 
43  if (static_cast<int>(EmptyArray(
44  coordinates_locked[i * 2u], coordinates_locked[i * 2u + 1u]
45  )) == 0)
46 
47  EmptyArray.rm_cell(
48  coordinates_locked[i * 2u],
49  coordinates_locked[i * 2u + 1u],
50  false, true
51  );
52 
53  }
54 
55  // Do we have any counter?
56  if (n_counters == 0u)
57  throw std::logic_error("No counters added: Cannot compute the support without knowning what to count!");
58 
59  // Initial count (including constrains)
60  if (coordiantes_n_locked)
61  {
62 
64  tmpcount.set_counters(counters);
65  current_stats = tmpcount.count_all();
66 
67  }
68  else
69  {
70 
71  current_stats.resize(n_counters, 0.0);
72 
73  // Initialize counters
74  for (size_t n = 0u; n < n_counters; ++n)
75  {
76 
77  current_stats[n] = counters->operator[](n).init(
78  EmptyArray,
79  coordinates_free[0u],
80  coordinates_free[1u]
81  );
82 
83  }
84 
85  }
86 
87  // Resizing support
88  data.reserve(
89  pow(2.0, static_cast<double>(coordiantes_n_free)),
90  counters->size()
91  );
92 
93  // Adding to the overall count
94  bool include_it = rules_dyn->operator()(EmptyArray, 0u, 0u);
95  if (include_it)
96  data.add(current_stats, nullptr);
97 
98  change_stats.resize(coordiantes_n_free * n_counters, 0.0);
99 
100  if (include_it && (array_bank != nullptr))
101  array_bank->push_back(EmptyArray);
102 
103  if (include_it && (stats_bank != nullptr))
104  std::copy(current_stats.begin(), current_stats.end(), std::back_inserter(*stats_bank));
105 
106  return;
107 
108 }
109 
110 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
112 
113  data.clear();
114 
115 }
116 
117 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
119 
120  data.clear();
121  EmptyArray = Array_;
122  N = Array_.nrow();
123  M = Array_.ncol();
124 
125 }
126 
127 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
129  size_t pos,
130  std::vector< Array_Type > * array_bank,
131  std::vector< double > * stats_bank
132  ) {
133 
134  #ifdef BARRY_USER_INTERRUPT
135  if (++iter_counter % 1000u == 0u)
136  {
137  BARRY_USER_INTERRUPT
138  }
139  #endif
140 
141  // Did we reached the end??
142  if (pos >= coordiantes_n_free)
143  return;
144 
145  // We will pass it to the next step, if the iteration makes sense.
146  calc_backend_sparse(pos + 1u, array_bank, stats_bank);
147 
148  // Once we have returned, everything will be back as it used to be, so we
149  // treat the data as if nothing has changed.
150  const size_t & coord_i = coordinates_free[pos * 2u];
151  const size_t & coord_j = coordinates_free[pos * 2u + 1u];
152 
153  // Toggle the cell (we will toggle it back after calling the counter)
154  EmptyArray.insert_cell(
155  coord_i,
156  coord_j,
157  EmptyArray.default_val().value,
158  false, false
159  );
160 
161  // Counting
162  // std::vector< double > change_stats(counters.size());
163  double tmp_chng;
164  size_t change_stats_different = hashes_initialized[pos] ? 0u : 1u;
165  for (size_t n = 0u; n < n_counters; ++n)
166  {
167 
168  tmp_chng = counters->operator[](n).count(
169  EmptyArray,
170  coord_i,
171  coord_j
172  );
173 
174  if ((tmp_chng < DBL_MIN) & (tmp_chng > -DBL_MIN))
175  {
176 
177  change_stats[pos * n_counters + n] = 0.0;
178 
179  }
180  else
181  {
182 
183  change_stats_different++;
184  current_stats[n] += tmp_chng;
185  change_stats[pos * n_counters + n] = tmp_chng;
186 
187  }
188 
189  }
190 
191  // Adding to the overall count
192  BARRY_CHECK_SUPPORT(data, max_num_elements)
193  if (rules_dyn->size() > 0u)
194  {
195 
196  if (rules_dyn->operator()(
197  EmptyArray,
198  coord_i,
199  coord_j
200  ))
201  {
202 
203  if (change_stats_different > 0u)
204  hashes[pos] = data.add(current_stats, nullptr);
205  else
206  (void) data.add(current_stats, &hashes[pos]);
207 
208  // Need to save?
209  if (array_bank != nullptr)
210  array_bank->push_back(EmptyArray);
211 
212  if (stats_bank != nullptr)
213  std::copy(current_stats.begin(), current_stats.end(), std::back_inserter(*stats_bank));
214 
215  }
216 
217 
218  } else {
219 
220  if (change_stats_different > 0u)
221  hashes[pos] = data.add(current_stats, nullptr);
222  else
223  (void) data.add(current_stats, &hashes[pos]);
224 
225  // Need to save?
226  if (array_bank != nullptr)
227  array_bank->push_back(EmptyArray);
228 
229  if (stats_bank != nullptr)
230  std::copy(current_stats.begin(), current_stats.end(), std::back_inserter(*stats_bank));
231 
232  }
233 
234  // Again, we only pass it to the next level iff the next level is not
235  // passed the last step.
236  calc_backend_sparse(pos + 1u, array_bank, stats_bank);
237 
238  // We need to restore the state of the cell
239  EmptyArray.rm_cell(
240  coord_i,
241  coord_j,
242  false, false
243  );
244 
245  if (change_stats_different > 0u)
246  {
247  #if defined(__OPENMP) || defined(_OPENMP)
248  #pragma omp simd
249  #endif
250  for (size_t n = 0u; n < n_counters; ++n)
251  current_stats[n] -= change_stats[pos * n_counters + n];
252  }
253 
254 
255  return;
256 
257 }
258 
259 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
261  size_t pos,
262  std::vector< Array_Type > * array_bank,
263  std::vector< double > * stats_bank
264  ) {
265 
266  #ifdef BARRY_USER_INTERRUPT
267  if (++iter_counter % 1000u == 0u)
268  {
269  BARRY_USER_INTERRUPT
270  }
271  #endif
272 
273  // Did we reached the end??
274  if (pos >= coordiantes_n_free)
275  return;
276 
277  // We will pass it to the next step, if the iteration makes sense.
278  calc_backend_dense(pos + 1u, array_bank, stats_bank);
279 
280  // Once we have returned, everything will be back as it used to be, so we
281  // treat the data as if nothing has changed.
282  const size_t & coord_i = coordinates_free[pos * 2u];
283  const size_t & coord_j = coordinates_free[pos * 2u + 1u];
284 
285  // Toggle the cell (we will toggle it back after calling the counter)
286  EmptyArray.insert_cell(coord_i, coord_j, 1, false, false);
287 
288  // Counting
289  // std::vector< double > change_stats(counters.size());
290  double tmp_chng;
291  size_t change_stats_different = hashes_initialized[pos] ? 0u : 1u;
292  for (size_t n = 0u; n < n_counters; ++n)
293  {
294 
295  tmp_chng = counters->operator[](n).count(
296  EmptyArray,
297  coord_i,
298  coord_j
299  );
300 
301  if ((tmp_chng < DBL_MIN) & (tmp_chng > -DBL_MIN))
302  {
303 
304  change_stats[pos * n_counters + n] = 0.0;
305 
306  }
307  else
308  {
309  if (std::isnan(tmp_chng))
310  throw std::domain_error("Undefined number.");
311 
312  change_stats_different++;
313  current_stats[n] += tmp_chng;
314  change_stats[pos * n_counters + n] = tmp_chng;
315 
316  }
317 
318  }
319 
320  // Adding to the overall count
321  BARRY_CHECK_SUPPORT(data, max_num_elements)
322  if (rules_dyn->size() > 0u)
323  {
324 
325  if (rules_dyn->operator()(EmptyArray, coord_i, coord_j))
326  {
327 
328  if (change_stats_different > 0u)
329  hashes[pos] = data.add(current_stats, nullptr);
330  else
331  (void) data.add(current_stats, &hashes[pos]);
332 
333  // Need to save?
334  if (array_bank != nullptr)
335  array_bank->push_back(EmptyArray);
336 
337  if (stats_bank != nullptr)
338  std::copy(current_stats.begin(), current_stats.end(), std::back_inserter(*stats_bank));
339 
340  }
341 
342 
343  }
344  else
345  {
346 
347  if (change_stats_different > 0u)
348  hashes[pos] = data.add(current_stats, nullptr);
349  else
350  (void) data.add(current_stats, &hashes[pos]);
351 
352  // Need to save?
353  if (array_bank != nullptr)
354  array_bank->push_back(EmptyArray);
355 
356  if (stats_bank != nullptr)
357  std::copy(current_stats.begin(), current_stats.end(), std::back_inserter(*stats_bank));
358 
359  }
360 
361  // Again, we only pass it to the next level iff the next level is not
362  // passed the last step.
363  calc_backend_dense(pos + 1u, array_bank, stats_bank);
364 
365  // We need to restore the state of the cell
366  EmptyArray.rm_cell(coord_i, coord_j, false, false);
367 
368  if (change_stats_different > 0u)
369  {
370  #if defined(__OPENMP) || defined(_OPENMP)
371  #pragma omp simd
372  #endif
373  for (size_t n = 0u; n < n_counters; ++n)
374  current_stats[n] -= change_stats[pos * n_counters + n];
375  }
376 
377  return;
378 
379 }
380 
381 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
382 inline void
384  std::vector< Array_Type > * array_bank,
385  std::vector< double > * stats_bank,
386  size_t max_num_elements_
387 ) {
388 
389  if (max_num_elements_ != 0u)
390  this->max_num_elements = max_num_elements_;
391 
392  // Generating sequence
393  this->init_support(array_bank, stats_bank);
394 
395  // Recursive function to count
396  if (EmptyArray.is_dense())
397  calc_backend_dense(0u, array_bank, stats_bank);
398  else
399  calc_backend_sparse(0u, array_bank, stats_bank);
400 
401  change_stats.clear();
402 
403  if (max_num_elements_ != 0u)
404  this->max_num_elements = BARRY_MAX_NUM_ELEMENTS;
405 
406  if (this->data.size() == 0u)
407  {
408  throw std::logic_error("The array has support of size 0 (i.e., empty support). This could be a problem in the rules (constraints).\n");
409  }
410 
411 
412  return;
413 
414 }
415 
416 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
419 ) {
420 
421  counters->add_counter(f_);
422  return;
423 
424 }
425 
426 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
429 ) {
430 
431  // Cleaning up before replacing the memory
432  if (delete_counters)
433  delete counters;
434  delete_counters = false;
436 
437  return;
438 
439 }
440 
442 
443 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
446 ) {
447 
448  rules->add_rule(f_);
449  return;
450 
451 }
452 
453 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
456 ) {
457 
458  rules->add_rule(f_);
459  return;
460 
461 }
462 
463 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
466 ) {
467 
468  // Cleaning up before replacing the memory
469  if (delete_rules)
470  delete rules;
471  delete_rules = false;
472  rules = rules_;
473 
474  return;
475 
476 }
477 
478 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
481 ) {
482 
483  rules_dyn->add_rule(f_);
484  return;
485 
486 }
487 
488 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
491 ) {
492 
493  rules_dyn->add_rule(f_);
494  return;
495 
496 }
497 
498 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
501 ) {
502 
503  // Cleaning up before replacing the memory
504  if (delete_rules_dyn)
505  delete rules_dyn;
506  delete_rules_dyn = false;
507  rules_dyn = rules_;
508 
509  return;
510 
511 }
512 
513 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
515  const std::vector< double > & counts,
516  const size_t & i,
517  const size_t & j
518 ) {
519 
520  if (rules_dyn->size() == 0u)
521  return true;
522 
523  // Swapping pointers for a while
524  std::vector< double > tmpstats = current_stats;
525  current_stats = counts;
526 
527  bool rule_res = rules_dyn->operator()(EmptyArray, i, j);
528  current_stats = tmpstats;
529 
530  return rule_res;
531 
532 }
533 
534 // template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
535 //inline bool Support<Array_Type,Data_Counter_Type,Data_Rule_Type, Data_Rule_Dyn_Type>::eval_rules_dyn(
536 // const double * counts,
537 // const size_t & i,
538 // const size_t & j
539 // ) {
540 
541 // if (rules_dyn->size() == 0u)
542 // return true;
543 
544 // // Swapping pointers for a while
545 // std::vector< double > tmpstats = current_stats;
546 // current_stats = counts;
547 
548 // bool rule_res = rules_dyn->operator()(EmptyArray, i, j);
549 // current_stats = tmpstats;
550 
551 // return rule_res;
552 
553 // }
554 
556 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
558 
559  return data.get_data();
560 
561 }
562 
563 // template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
564 // inline const MapVec_type<> * Support<Array_Type,Data_Counter_Type,Data_Rule_Type, Data_Rule_Dyn_Type>::get_counts_ptr() const {
565 
566 // return data.get_data_ptr();
567 
568 // }
569 
570 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
572  return &this->current_stats;
573 }
574 
575 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
577 
578  // Starting from the name of the stats
579  printf_barry("Position of variables:\n");
580  for (size_t i = 0u; i < n_counters; ++i) {
581  printf_barry("[% 2li] %s\n", i, counters->operator[](i).name.c_str());
582  }
583 
584  data.print();
585 }
586 
587 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
589  return this->data;
590 }
591 
592 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
594  return this->counters;
595 }
596 
597 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
599  return this->rules;
600 }
601 
602 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
604  return this->rules_dyn;
605 }
606 
607 
608 #endif
#define printf_barry
#define BARRY_MAX_NUM_ELEMENTS
#define BARRY_CHECK_SUPPORT(x, maxs)
A counter function based on change statistics.
Vector of counters.
Frequency table of vectors.
Definition: freqtable.hpp:22
Rule for determining if a cell should be included in a sequence.
Definition: rules-bones.hpp:20
Vector of objects of class Rule.
Definition: rules-bones.hpp:71
Count stats for a single Array.
void set_counters(Counters< Array_Type, Data_Type > *counters_)
std::vector< double > count_all()
Compute the support of sufficient statistics.
bool eval_rules_dyn(const std::vector< double > &counts, const size_t &i, const size_t &j)
void calc(std::vector< Array_Type > *array_bank=nullptr, std::vector< double > *stats_bank=nullptr, size_t max_num_elements_=0u)
Computes the entire support.
Rules< Array_Type, Data_Rule_Type > * get_rules()
Vector of static rules (cells to iterate).
void init_support(std::vector< Array_Type > *array_bank=nullptr, std::vector< double > *stats_bank=nullptr)
Definition: support-meat.hpp:5
Rules< Array_Type, Data_Rule_Dyn_Type > * get_rules_dyn()
Vector of dynamic rules (to include/exclude a realizaton).
void add_rule(Rule< Array_Type, Data_Rule_Type > *f_)
void print() const
void add_rule_dyn(Rule< Array_Type, Data_Rule_Dyn_Type > *f_)
void set_rules_dyn(Rules< Array_Type, Data_Rule_Dyn_Type > *rules_)
Counters< Array_Type, Data_Counter_Type > * get_counters()
Vector of couter functions.
const std::vector< double > & get_counts() const
void add_counter(Counter< Array_Type, Data_Counter_Type > f_)
const FreqTable< double > & get_data() const
void reset_array()
void set_rules(Rules< Array_Type, Data_Rule_Type > *rules_)
void set_counters(Counters< Array_Type, Data_Counter_Type > *counters_)
std::vector< double > * get_current_stats()
List current statistics.
Data_Type &&counter_ data(std::move(counter_.data))
if(add_dims)
size_t size_t j
size_t i
Data_Type * counters_
current_stats
Data_Type f_