barry: Your go-to motif accountant  0.0-1
Full enumeration of sample space and fast count of sufficient statistics for binary arrays
model-meat.hpp
Go to the documentation of this file.
1 #ifndef BARRY_MODEL_MEAT_HPP
2 #define BARRY_MODEL_MEAT_HPP 1
3 
10  const std::vector<double> & params,
11  const double * support,
12  size_t k,
13  size_t n
14 )
15 {
16  double res = 0.0;
17 
18  std::vector< double > resv(n, 0.0);
19 
20  for (size_t j = 0u; j < (k - 1u); ++j)
21  {
22 
23  const double p = params[j];
24 
25  #if defined(__OPENMP) || defined(_OPENMP)
26  #pragma omp simd
27  #elif defined(__GNUC__) && !defined(__clang__)
28  #pragma GCC ivdep
29  #endif
30  for (size_t i = 0u; i < n; ++i)
31  resv[i] += (*(support + i * k + 1u + j)) * p;
32 
33  }
34 
35  // Accumulate resv to a double res
36  #if defined(__OPENMP) || defined(_OPENMP)
37  #pragma omp simd reduction(+:res)
38  #elif defined(__GNUC__) && !defined(__clang__)
39  #pragma GCC ivdep
40  #endif
41  for (size_t i = 0u; i < n; ++i)
42  {
43  res += std::exp(resv[i] BARRY_SAFE_EXP) * (*(support + i * k));
44  }
45 
46 
47 
48 
49  #ifdef BARRY_DEBUG
50  if (std::isnan(res))
51  throw std::overflow_error(
52  std::string("NaN in update_normalizing_constant. ") +
53  std::string("res = ") + std::to_string(res) +
54  std::string(", k = ") + std::to_string(k) +
55  std::string(", n = ") + std::to_string(n)
56  );
57  if (std::isinf(res))
58  throw std::overflow_error(
59  std::string("Inf in update_normalizing_constant. ") +
60  std::string("res = ") + std::to_string(res) +
61  std::string(", k = ") + std::to_string(k) +
62  std::string(", n = ") + std::to_string(n)
63  );
64 
65  #endif
66 
67  return res;
68 
69 }
70 
71 inline double likelihood_(
72  const double * stats_target,
73  const std::vector< double > & params,
74  const double normalizing_constant,
75  size_t n_params,
76  bool log_ = false
77 ) {
78 
79  if (n_params != params.size())
80  throw std::length_error("-stats_target- and -params- should have the same length.");
81 
82  double numerator = 0.0;
83 
84  // Computing the numerator
85  #ifdef __INTEL_LLVM_COMPILER
86  #pragma code_align 32
87  #endif
88  #if defined(__OPENMP) || defined(_OPENMP)
89  #pragma omp simd reduction(+:numerator)
90  #endif
91  for (size_t j = 0u; j < n_params; ++j)
92  numerator += *(stats_target + j) * params[j];
93 
94  if (!log_)
95  numerator = std::exp(numerator BARRY_SAFE_EXP);
96  else
97  return numerator BARRY_SAFE_EXP - std::log(normalizing_constant);
98 
99  double ans = numerator/normalizing_constant;
100 
101  #ifdef BARRY_DEBUG
102  if (std::isnan(ans))
103  throw std::overflow_error(
104  std::string("NaN in likelihood_. ") +
105  std::string("numerator = ") + std::to_string(numerator) +
106  std::string(", normalizing_constant = ") +
107  std::to_string(normalizing_constant)
108  );
109  if (std::isinf(ans))
110  throw std::overflow_error(
111  std::string("Inf in likelihood_. ") +
112  std::string("numerator = ") + std::to_string(numerator) +
113  std::string(", normalizing_constant = ") +
114  std::to_string(normalizing_constant)
115  );
116 
117  if (ans > 1.0)
118  throw std::overflow_error(
119  std::string("Likelihood > 1.0") +
120  std::string("numerator = ") + std::to_string(numerator) +
121  std::string(", normalizing_constant = ") +
122  std::to_string(normalizing_constant)
123  );
124  #endif
125 
126  return ans;
127 
128 }
129 
130 template <
131  typename Array_Type,
132  typename Data_Counter_Type,
133  typename Data_Rule_Type,
134  typename Data_Rule_Dyn_Type
135  >
137  const std::vector< double > & params,
138  size_t ncores,
139  int i
140 ) {
141 
142  const size_t n = stats_support_sizes.size();
143 
144  // Barrier to make sure paralelization makes sense
145  if ((ncores > 1u) && (n < 128u))
146  ncores = 1u;
147 
148 
149  if (i >= 0)
150  ncores = 1u;
151 
152  #if defined(__OPENMP) || defined(_OPENMP)
153  #pragma omp parallel for firstprivate(params) num_threads(ncores) \
154  shared(n, normalizing_constants, first_calc_done, \
155  stats_support, stats_support_sizes, stats_support_sizes_acc, i) \
156  default(none)
157  #endif
158  for (size_t s = 0u; s < n; ++s)
159  {
160 
161  if ((i > -1) && (i != static_cast<int>(s)))
162  continue;
163 
164  size_t k = params.size() + 1u;
165  size_t n = stats_support_sizes[s];
166 
167  first_calc_done[s] = true;
168  normalizing_constants[s] = update_normalizing_constant(
169  params, &stats_support[
170  stats_support_sizes_acc[s] * k
171  ], k, n
172  );
173 
174  }
175 
176  return;
177 
178 }
179 
180 template <
181  typename Array_Type,
182  typename Data_Counter_Type,
183  typename Data_Rule_Type,
184  typename Data_Rule_Dyn_Type
185  >
187  const std::vector< double > & params,
188  size_t ncores
189 ) {
190 
191  update_normalizing_constants(params, ncores);
192 
193  size_t n_params = params.size();
194 
195  if (stats_likelihood.size() != stats_target.size())
196  stats_likelihood.resize(stats_target.size());
197 
198  #if defined(__OPENMP) || defined(_OPENMP)
199  #pragma omp parallel for simd num_threads(ncores) \
200  shared(n_params, stats_target, normalizing_constants, arrays2support, \
201  params) \
202  default(none)
203  #endif
204  for (size_t s = 0u; s < stats_target.size(); ++s)
205  {
206  stats_likelihood[s] = 0.0;
207  for (size_t j = 0u; j < n_params; ++j)
208  stats_likelihood[s] += stats_target[s][j] * params[j];
209 
210  stats_likelihood[s] =
211  std::exp(stats_likelihood[s] BARRY_SAFE_EXP)/
212  normalizing_constants[arrays2support[s]];
213  }
214 
215  return;
216 
217 }
218 
219 template <
220  typename Array_Type,
221  typename Data_Counter_Type,
222  typename Data_Rule_Type,
223  typename Data_Rule_Dyn_Type
224  >
226  const std::vector< double > & params,
227  size_t ncores,
228  int i
229 ) {
230 
231  update_normalizing_constants(params, ncores, i);
232 
233  if (i > -1)
234  params_last[i] = params;
235 
236  size_t n_params = params.size();
237  pset_probs.resize(
238  pset_locations.back() +
239  pset_sizes.back()
240  );
241 
242  // No need to paralelize if there is only one core
243  if (i >= 0)
244  ncores = 1u;
245 
246  #if defined(__OPENMP) || defined(_OPENMP)
247  #pragma omp parallel for num_threads(ncores) collapse(1) \
248  shared(n_params, pset_stats, pset_probs, normalizing_constants, pset_sizes, \
249  params, i) \
250  default(none)
251  #endif
252  for (size_t s = 0u; s < pset_sizes.size(); ++s)
253  {
254 
255  if ((i >= 0) && (i != static_cast<int>(s)))
256  continue;
257 
258  // When does the pset starts
259  size_t pset_start = pset_locations[s];
260 
261  // Looping over observations of the pset
262  #if defined(__OPENMP) || defined(_OPENMP)
263  #pragma omp simd
264  #endif
265  for (size_t a = 0u; a < pset_sizes[s]; ++a)
266  {
267 
268  // Start location in the array
269  size_t start_loc = pset_start * n_params + a * n_params;
270 
271  pset_probs[pset_start + a] = 0.0;
272 
273  // Looping over the parameters
274  for (size_t j = 0u; j < n_params; ++j)
275  pset_probs[pset_start + a] +=
276  pset_stats[start_loc + j] * params[j];
277 
278  // Now turning into a probability
279  pset_probs[pset_start + a] =
280  std::exp(pset_probs[pset_start + a] BARRY_SAFE_EXP)/
281  normalizing_constants[s];
282  }
283 
284  #ifdef BARRY_DEBUG
285  // Making sure the probabilities add to one
286  double totprob = 0.0;
287  for (size_t i_ = 0u; i_ < pset_sizes[s]; ++i)
288  totprob =+ pset_probs[pset_start + i_];
289 
290  if (std::abs(totprob - 1) > 1e-6)
291  throw std::runtime_error(
292  std::string("Probabilities do not add to one! ") +
293  std::string("totprob = ") + std::to_string(totprob)
294  );
295 
296  #endif
297  }
298 
299  return;
300 
301 }
302 
303 template <
304  typename Array_Type,
305  typename Data_Counter_Type,
306  typename Data_Rule_Type,
307  typename Data_Rule_Dyn_Type
308  >
310  stats_support(0u),
311  stats_support_sizes(0u),
312  stats_support_sizes_acc(0u),
313  stats_support_n_arrays(0u),
314  stats_target(0u),
315  stats_likelihood(0u),
316  arrays2support(0u),
317  keys2support(0u),
318  pset_arrays(0u), pset_stats(0u),
319  counters(new Counters<Array_Type,Data_Counter_Type>()),
320  rules(new Rules<Array_Type,Data_Rule_Type>()),
321  rules_dyn(new Rules<Array_Type,Data_Rule_Dyn_Type>()),
322  support_fun(), counter_fun(), delete_counters(true),
323  delete_rules(true),
324  delete_rules_dyn(true),
325  transform_model_fun(nullptr),
326  transform_model_term_names(0u)
327 {
328 
329  // Counters are shared
330  support_fun.set_counters(counters);
331  counter_fun.set_counters(counters);
332 
333  // Rules are shared
334  support_fun.set_rules(rules);
335  support_fun.set_rules_dyn(rules_dyn);
336 
337  return;
338 
339 }
340 
341 template <
342  typename Array_Type,
343  typename Data_Counter_Type,
344  typename Data_Rule_Type,
345  typename Data_Rule_Dyn_Type
346  >
348  size_t size_
349  ) :
350  stats_support(0u),
351  stats_support_sizes(0u),
352  stats_support_sizes_acc(0u),
353  stats_support_n_arrays(0u),
354  stats_target(0u),
355  stats_likelihood(0u),
356  arrays2support(0u), keys2support(0u),
357  pset_arrays(0u), pset_stats(0u),
358  counters(new Counters<Array_Type,Data_Counter_Type>()),
359  rules(new Rules<Array_Type,Data_Rule_Type>()),
360  rules_dyn(new Rules<Array_Type,Data_Rule_Dyn_Type>()),
361  support_fun(), counter_fun(), delete_counters(true),
362  delete_rules(true),
363  delete_rules_dyn(true),
364  transform_model_fun(nullptr),
365  transform_model_term_names(0u)
366 {
367 
368  stats_target.reserve(size_);
369  arrays2support.reserve(size_);
370 
371  // Counters are shared
372  support_fun.set_counters(counters);
373  counter_fun.set_counters(counters);
374 
375  // Rules are shared
376  support_fun.set_rules(rules);
377  support_fun.set_rules_dyn(rules_dyn);
378 
379  return;
380 
381 }
382 
383 template <
384  typename Array_Type,
385  typename Data_Counter_Type,
386  typename Data_Rule_Type,
387  typename Data_Rule_Dyn_Type
388  >
391  ) :
392  stats_support(Model_.stats_support),
393  stats_support_sizes(Model_.stats_support_sizes),
394  stats_support_sizes_acc(Model_.stats_support_sizes_acc),
395  stats_support_n_arrays(Model_.stats_support_n_arrays),
396  stats_target(Model_.stats_target),
397  stats_likelihood(Model_.stats_likelihood),
398  arrays2support(Model_.arrays2support),
399  keys2support(Model_.keys2support),
400  pset_arrays(Model_.pset_arrays),
401  pset_stats(Model_.pset_stats),
402  pset_probs(Model_.pset_probs),
403  pset_sizes(Model_.pset_sizes),
404  pset_locations(Model_.pset_locations),
405  counters(new Counters<Array_Type,Data_Counter_Type>(*(Model_.counters))),
406  rules(new Rules<Array_Type,Data_Rule_Type>(*(Model_.rules))),
407  rules_dyn(new Rules<Array_Type,Data_Rule_Dyn_Type>(*(Model_.rules_dyn))),
408  support_fun(),
409  counter_fun(),
410  params_last(Model_.params_last),
411  normalizing_constants(Model_.normalizing_constants),
412  first_calc_done(Model_.first_calc_done),
413  delete_counters(true),
414  delete_rules(true),
415  delete_rules_dyn(true),
416  transform_model_fun(Model_.transform_model_fun),
417  transform_model_term_names(Model_.transform_model_term_names)
418  {
419 
420  // Counters are shared
421  support_fun.set_counters(counters);
422  counter_fun.set_counters(counters);
423 
424  // Rules are shared
425  support_fun.set_rules(rules);
426  support_fun.set_rules_dyn(rules_dyn);
427 
428  return;
429 
430 }
431 
432 template <
433  typename Array_Type,
434  typename Data_Counter_Type,
435  typename Data_Rule_Type,
436  typename Data_Rule_Dyn_Type
437  >
441 ) {
442 
443  // Clearing
444  if (this != &Model_) {
445 
446  if (delete_counters)
447  delete counters;
448 
449  if (delete_rules)
450  delete rules;
451 
452  if (delete_rules_dyn)
453  delete rules_dyn;
454 
455  stats_support = Model_.stats_support;
456  stats_support_sizes = Model_.stats_support_sizes;
457  stats_support_sizes_acc = Model_.stats_support_sizes_acc;
458  stats_support_n_arrays = Model_.stats_support_n_arrays;
459  stats_target = Model_.stats_target;
460  stats_likelihood = Model_.stats_likelihood;
461  arrays2support = Model_.arrays2support;
462  keys2support = Model_.keys2support;
463  pset_arrays = Model_.pset_arrays;
464  pset_stats = Model_.pset_stats;
465  pset_probs = Model_.pset_probs;
466  pset_sizes = Model_.pset_sizes;
467  pset_locations = Model_.pset_locations;
469  rules = new Rules<Array_Type,Data_Rule_Type>(*(Model_.rules));
470  rules_dyn = new Rules<Array_Type,Data_Rule_Dyn_Type>(*(Model_.rules_dyn));
471  delete_counters = true;
472  delete_rules = true;
473  delete_rules_dyn = true;
474  params_last = Model_.params_last;
475  normalizing_constants = Model_.normalizing_constants;
476  first_calc_done = Model_.first_calc_done;
477  transform_model_fun = Model_.transform_model_fun;
478  transform_model_term_names = Model_.transform_model_term_names;
479 
480  // Counters are shared
481  support_fun.set_counters(counters);
482  counter_fun.set_counters(counters);
483 
484  // Rules are shared
485  support_fun.set_rules(rules);
486  support_fun.set_rules_dyn(rules_dyn);
487 
488  }
489 
490  return *this;
491 
492 }
493 
494 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
496  with_pset = true;
497  return;
498 }
499 
500 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
502  const Array_Type & Array_
503 ) {
504  return this->counters->gen_hash(Array_);
505 }
506 
507 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
510 ) {
511 
512  counters->add_counter(counter, Data_Counter_Type());
513  return;
514 }
515 
516 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
520  Data_Counter_Type data_
521 ) {
522 
523  counters->add_counter(
524  count_fun_,
525  init_fun_,
526  data_
527  );
528 
529  return;
530 
531 }
532 
533 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
536 ) {
537 
538  if (delete_counters) {
539  delete counters;
540  delete_counters = false;
541  }
542 
543  this->counters = counters_;
544  support_fun.set_counters(counters);
545  counter_fun.set_counters(counters);
546 
547  return;
548 
549 }
550 
551 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
554 ) {
555 
556  counters->add_hash(fun_);
557 
558 }
559 
561 
562 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
565 ) {
566 
567  rules->add_rule(rules, Data_Rule_Type());
568  return;
569 }
570 
571 
572 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
575 ) {
576 
577  if (delete_rules)
578  delete rules;
579 
580  this->rules = rules_;
581  this->delete_rules = false;
582 
583  support_fun.set_rules(rules);
584  return;
585 
586 }
587 
589 
590 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
593 ) {
594 
595  rules_dyn->add_rule(rules_, Data_Rule_Dyn_Type());
596  return;
597 }
598 
599 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
602  Data_Rule_Dyn_Type data_
603 ) {
604 
605  rules_dyn->add_rule(
606  rule_fun_,
607  data_
608  );
609 
610  return;
611 
612 }
613 
614 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
617 ) {
618 
619  if (delete_rules_dyn)
620  delete rules_dyn;
621 
622  this->rules_dyn = rules_;
623  this->delete_rules_dyn = false;
624  support_fun.set_rules_dyn(rules_dyn);
625  return;
626 
627 }
628 
630 
631 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
632 inline size_t
634  const Array_Type & Array_,
635  bool force_new
636 ) {
637 
638  // Array counts (target statistics)
639  counter_fun.reset_array(&Array_);
640 
641  if (transform_model_fun)
642  {
643 
644  auto tmpcounts = counter_fun.count_all();
645  stats_target.emplace_back(
646  transform_model_fun(&tmpcounts[0u], tmpcounts.size())
647  );
648 
649  } else
650  stats_target.push_back(counter_fun.count_all());
651 
652  // If the data hasn't been analyzed earlier, then we need to compute
653  // the support
654  std::vector< double > key = counters->gen_hash(Array_);
655  MapVec_type< double, size_t >::const_iterator locator = keys2support.find(key);
656  if (force_new | (locator == keys2support.end()))
657  {
658 
659  // Current size of the support stats
660  size_t stats_support_size = stats_support.size();
661 
662  // Adding to the map
663  keys2support[key] = stats_support_sizes.size();
664  stats_support_n_arrays.push_back(1u); // How many elements now
665  arrays2support.push_back(stats_support_sizes.size()); // Map of the array id to the support
666 
667  // Computing support using the counters included in the model
668  support_fun.reset_array(Array_);
669 
672  if (with_pset)
673  {
674 
675  // Making space for storing the support
676  pset_arrays.resize(pset_arrays.size() + 1u);
677 
678  // Current size of the powerset
679  size_t pset_stats_size = pset_stats.size();
680 
681  try
682  {
683 
684  support_fun.calc(
685  &(pset_arrays[pset_arrays.size() - 1u]),
686  &pset_stats
687  );
688 
689  }
690  catch (const std::exception& e)
691  {
692 
693  printf_barry(
694  "A problem ocurred while trying to add the array (and recording the powerset). "
695  );
696  printf_barry("with error %s\n", e.what());
697  printf_barry("Here is the array that generated the error.\n");
698  Array_.print();
699  throw std::logic_error("");
700 
701  }
702 
703  // Recording the number of elements
704  pset_locations.push_back(
705  pset_locations.size() == 0u ?
706  0u :
707  pset_locations.back() + pset_sizes.back()
708  );
709 
710  pset_sizes.push_back(
711  (pset_stats.size() - pset_stats_size) / (counter_fun.size())
712  );
713 
714 
715 
716 
717  }
718  else
719  {
720  try
721  {
722 
723  support_fun.calc();
724 
725  }
726  catch (const std::exception& e)
727  {
728 
729  printf_barry(
730  "A problem ocurred while trying to add the array (and recording the powerset). "
731  );
732  printf_barry("with error %s\n", e.what());
733  printf_barry("Here is the array that generated the error.\n");
734  Array_.print();
735  throw std::logic_error("");
736 
737  }
738  }
739 
740  if (transform_model_fun)
741  {
742  auto tmpsupport = support_fun.get_counts();
743  size_t k = counter_fun.size();
744  size_t n = tmpsupport.size() / (k + 1);
745 
746  std::vector< double > s_new(0u);
747  s_new.reserve(tmpsupport.size());
748 
749  for (size_t i = 0u; i < n; ++i)
750  {
751 
752  // Appending size
753  s_new.push_back(tmpsupport[i * (k + 1u)]);
754 
755  // Applying transformation and adding to the new set
756  auto res = transform_model_fun(&tmpsupport[i * (k + 1u) + 1u], k);
757  std::copy(res.begin(), res.end(), std::back_inserter(s_new));
758 
759  }
760 
761  for (auto & s : s_new)
762  stats_support.push_back(s);
763 
764 
765  } else {
766  for (const auto & s: support_fun.get_counts())
767  stats_support.push_back(s);
768  }
769 
770  // Making room for the previous parameters. This will be used to check if
771  // the normalizing constant has been updated or not.
772  params_last.push_back(stats_target[0u]);
773  normalizing_constants.push_back(0.0);
774  first_calc_done.push_back(false);
775 
776  // Incrementing the size of the support set
777  if (stats_support_sizes.size() == 0u)
778  {
779  stats_support_sizes_acc.push_back(0u);
780  } else {
781  stats_support_sizes_acc.push_back(
782  stats_support_sizes.back() +
783  stats_support_sizes_acc.back()
784  );
785  }
786 
787 
788  stats_support_sizes.push_back(
789 
790  (stats_support.size() - stats_support_size)/
791  (counter_fun.size() + 1u)
792 
793  );
794 
795  return arrays2support.size() - 1u;
796 
797  }
798 
799  // Increasing the number of arrays in that stat
800  ++stats_support_n_arrays[locator->second];
801 
802  // Adding the corresponding map
803  arrays2support.push_back(locator->second);
804 
805  return arrays2support.size() - 1u;
806 
807 }
808 
809 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
811  const std::vector<double> & params,
812  const size_t & i,
813  bool as_log,
814  bool no_update_normconst
815 ) {
816 
817  // Checking if the index exists
818  if (i >= arrays2support.size())
819  throw std::range_error("The requested support is out of range");
820 
821  size_t idx = arrays2support[i];
822 
823  // Checking if this actually has a change of happening
824  if (this->stats_support_sizes[idx] == 0u)
825  return as_log ? -std::numeric_limits<double>::infinity() : 0.0;
826 
827  // Checking if we have updated the normalizing constant or not
828  if (!no_update_normconst && (!first_calc_done[idx] || !vec_equal_approx(params, params_last[idx])))
829  {
830 
831  first_calc_done[idx] = true;
832 
833  size_t k = params.size() + 1u;
834  size_t n = stats_support_sizes[idx];
835 
836  normalizing_constants[idx] = update_normalizing_constant(
837  params, &stats_support[
838  stats_support_sizes_acc[idx] * k
839  ], k, n
840  );
841 
842  params_last[idx] = params;
843 
844  }
845 
846  return likelihood_(
847  &stats_target[i],
848  params,
849  normalizing_constants[idx],
850  nterms(),
851  as_log
852  );
853 
854 }
855 
856 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
858  const std::vector<double> & params,
859  const Array_Type & Array_,
860  int i,
861  bool as_log,
862  bool no_update_normconst
863 ) {
864 
865  // Key of the support set to use
866  int loc;
867 
868  if (i < 0)
869  {
870 
871  std::vector< double > key = counters->gen_hash(Array_);
872  MapVec_type< double, size_t >::const_iterator locator = keys2support.find(key);
873  if (locator == keys2support.end())
874  throw std::range_error(
875  "This type of array has not been included in the model."
876  );
877 
878  loc = locator->second;
879 
880  }
881  else
882  {
883 
884  if (static_cast<size_t>(i) >= arrays2support.size())
885  throw std::range_error(
886  "This type of array has not been included in the model."
887  );
888 
889  loc = arrays2support[i];
890 
891  }
892 
893  // Checking if this actually has a change of happening
894  if (this->stats_support_sizes[loc] == 0u)
895  return as_log ? -std::numeric_limits<double>::infinity() : 0.0;
896 
897  // Counting stats_target
899 
900  tmpstats.set_counters(this->counters);
901 
902  std::vector< double > target_ = tmpstats.count_all();
903 
904  if (transform_model_fun)
905  target_ = transform_model_fun(&target_[0u], target_.size());
906 
907  // Checking if we have updated the normalizing constant or not
908  if (!no_update_normconst && (!first_calc_done[loc] || !vec_equal_approx(params, params_last[loc])) )
909  {
910 
911  first_calc_done[loc] = true;
912 
913  size_t k = params.size() + 1u;
914  size_t n = stats_support_sizes[loc];
915 
916  normalizing_constants[loc] = update_normalizing_constant(
917  params, &stats_support[
918  stats_support_sizes_acc[loc] * k
919  ], k, n
920  );
921 
922  params_last[loc] = params;
923 
924  }
925 
926  // Checking if passes the rules
927  if (!support_fun.eval_rules_dyn(target_, 0u, 0u))
928  return as_log ? -std::numeric_limits<double>::infinity() : 0.0;
929 
930  return likelihood_(
931  &target_[0u],
932  params,
933  normalizing_constants[loc],
934  nterms(),
935  as_log
936  );
937 
938 }
939 
940 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
942  const std::vector<double> & params,
943  const std::vector<double> & target_,
944  const size_t & i,
945  bool as_log,
946  bool no_update_normconst
947 ) {
948 
949  // Checking if the index exists
950  if (i >= arrays2support.size())
951  throw std::range_error("The requested support is out of range");
952 
953  size_t loc = arrays2support[i];
954 
955  // Checking if passes the rules
956  if (!support_fun.eval_rules_dyn(target_, 0u, 0u))
957  {
958 
959  // Concatenating the elements of target_ into aa single string
960  std::string target_str = "";
961  for (size_t i = 0u; i < target_.size(); ++i)
962  target_str += std::to_string(target_[i]) + " ";
963 
964  throw std::range_error(
965  "The array is not in the support set. The array's statistics are: " +
966  target_str +
967  std::string(".")
968  );
969  }
970 
971 
972  // Checking if this actually has a change of happening
973  if (this->stats_support_sizes[loc] == 0u)
974  {
975  throw std::logic_error("The support set for this array is empty.");
976  }
977 
978  // Checking if we have updated the normalizing constant or not
979  if (!no_update_normconst && (!first_calc_done[loc] || !vec_equal_approx(params, params_last[loc])) ) {
980 
981  first_calc_done[loc] = true;
982 
983  size_t k = params.size() + 1u;
984  size_t n = stats_support_sizes[loc];
985 
986  normalizing_constants[loc] = update_normalizing_constant(
987  params, &stats_support[
988  stats_support_sizes_acc[loc] * k
989  ], k, n
990  );
991 
992  params_last[loc] = params;
993 
994  }
995 
996  return likelihood_(
997  &target_[0u],
998  params,
999  normalizing_constants[loc],
1000  nterms(),
1001  as_log
1002  );
1003 
1004 }
1005 
1006 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1008  const std::vector<double> & params,
1009  const double * target_,
1010  const size_t & i,
1011  bool as_log,
1012  bool no_update_normconst
1013 ) {
1014 
1015  // Checking if the index exists
1016  if (i >= arrays2support.size())
1017  throw std::range_error("The requested support is out of range");
1018 
1019  size_t loc = arrays2support[i];
1020 
1021  // Checking if passes the rules
1022  if (support_fun.get_rules_dyn()->size() > 0u)
1023  {
1024 
1025  std::vector< double > tmp_target;
1026  tmp_target.reserve(nterms());
1027  for (size_t t = 0u; t < nterms(); ++t)
1028  tmp_target.push_back(*(target_ + t));
1029 
1030  if (!support_fun.eval_rules_dyn(tmp_target, 0u, 0u))
1031  {
1032  // Concatenating the elements of target_ into aa single string
1033  std::string target_str = "";
1034  for (size_t i = 0u; i < nterms(); ++i)
1035  target_str += std::to_string((*target_ + i)) + " ";
1036 
1037  throw std::range_error(
1038  "The array is not in the support set. The array's statistics are: " + target_str + std::string(".")
1039  );
1040  }
1041 
1042  }
1043 
1044  // Checking if this actually has a change of happening
1045  if (this->stats_support_sizes[loc] == 0u)
1046  {
1047  throw std::logic_error("The support set for this array is empty.");
1048  }
1049 
1050  // Checking if we have updated the normalizing constant or not
1051  if (!no_update_normconst && (!first_calc_done[loc] || !vec_equal_approx(params, params_last[loc]) )) {
1052 
1053  first_calc_done[loc] = true;
1054 
1055  size_t k = params.size() + 1u;
1056  size_t n = stats_support_sizes[loc];
1057 
1058  normalizing_constants[loc] = update_normalizing_constant(
1059  params, &stats_support[
1060  stats_support_sizes_acc[loc] * k
1061  ], k, n
1062  );
1063 
1064  params_last[loc] = params;
1065 
1066  }
1067 
1068  return likelihood_(
1069  target_,
1070  params,
1071  normalizing_constants[loc],
1072  nterms(),
1073  as_log
1074  );
1075 
1076 }
1077 
1078 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1080  const std::vector<double> & params,
1081  bool as_log,
1082  BARRY_NCORES_ARG(),
1083  bool no_update_normconst
1084 ) {
1085 
1086  size_t params_last_size = params_last.size();
1087 
1088  if (!no_update_normconst)
1089  {
1090  #if defined(__OPENMP) || defined(_OPENMP)
1091  #pragma omp parallel for num_threads(ncores) \
1092  shared(normalizing_constants, params_last, first_calc_done, \
1093  stats_support, stats_support_sizes, stats_support_sizes_acc) \
1094  firstprivate(params)
1095  #endif
1096  for (size_t i = 0u; i < params_last_size; ++i)
1097  {
1098 
1099  if (!first_calc_done[i] || !vec_equal_approx(params, params_last[i]) )
1100  {
1101 
1102  size_t k = params.size() + 1u;
1103  size_t n = stats_support_sizes[i];
1104 
1105  first_calc_done[i] = true;
1106  normalizing_constants[i] = update_normalizing_constant(
1107  params, &stats_support[
1108  stats_support_sizes_acc[i] * k
1109  ], k, n
1110  );
1111 
1112  params_last[i] = params;
1113 
1114  }
1115 
1116  }
1117  }
1118 
1119  double res = 0.0;
1120  if (as_log)
1121  {
1122 
1123  for (size_t i = 0; i < stats_target.size(); ++i)
1124  res += vec_inner_prod(
1125  &stats_target[i][0u],
1126  &params[0u],
1127  params.size()
1128  ) BARRY_SAFE_EXP;
1129 
1130  #if defined(__OPENMP) || defined(_OPENMP)
1131  #pragma omp simd reduction(-:res)
1132  #endif
1133  for (size_t i = 0u; i < params_last_size; ++i)
1134  res -= (std::log(normalizing_constants[i]) * this->stats_support_n_arrays[i]);
1135 
1136  } else {
1137 
1138  res = 1.0;
1139  size_t stats_target_size = stats_target.size();
1140  #if defined(__OPENMP) || defined(_OPENMP)
1141  #pragma omp simd reduction(*:res)
1142  #endif
1143  for (size_t i = 0; i < stats_target_size; ++i)
1144  res *= std::exp(
1146  &stats_target[i][0u],
1147  &params[0u],
1148  params.size()
1149  ) BARRY_SAFE_EXP) /
1150  normalizing_constants[arrays2support[i]];
1151 
1152  }
1153 
1154  return res;
1155 
1156 }
1157 
1158 template <
1159  typename Array_Type,
1160  typename Data_Counter_Type,
1161  typename Data_Rule_Type,
1162  typename Data_Rule_Dyn_Type
1163  >
1164 inline const std::vector< double > &
1166 
1167  return normalizing_constants;
1168 
1169 }
1170 
1171 template<
1172  typename Array_Type,
1173  typename Data_Counter_Type,
1174  typename Data_Rule_Type,
1175  typename Data_Rule_Dyn_Type
1176  >
1177 inline const std::vector< double > &
1179 
1180  return stats_likelihood;
1181 
1182 }
1183 
1184 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1185 inline const std::vector< Array_Type > *
1187  const size_t & i
1188 ) {
1189 
1190  if (i >= arrays2support.size())
1191  throw std::range_error("The requested support is out of range");
1192 
1193 
1194  return &pset_arrays[arrays2support[i]];
1195 
1196 }
1197 
1198 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1199 inline const double *
1201  const size_t & i
1202 ) {
1203 
1204  if (i >= arrays2support.size())
1205  throw std::range_error("The requested support is out of range");
1206 
1207  return &pset_stats[pset_locations[arrays2support[i]] * counter_fun.size()];
1208 
1209 }
1210 
1211 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1213 {
1214 
1215  if (i >= arrays2support.size())
1216  throw std::range_error("The requested support is out of range");
1217 
1218  // const auto & S = stats_support[arrays2support[i]];
1219  size_t array_id = arrays2support[i];
1220 
1221  size_t k = nterms();
1222  size_t nunique = stats_support_sizes.size();
1223 
1224  for (size_t l = 0u; l < nunique; ++l)
1225  {
1226 
1227  printf_barry("% 5li ", l);
1228 
1229  printf_barry("counts: %.0f motif: ", stats_support[
1230  stats_support_sizes_acc[l] * (k + 1u)
1231  // l * (k + 1u)
1232  ]);
1233 
1234  for (size_t j = 0u; j < k; ++j)
1235  {
1236  printf_barry(
1237  "%.2f, ",
1238  stats_support[
1239  stats_support_sizes_acc[l] * (k + 1u) + j + 1u
1240  ]);
1241  }
1242 
1243  printf_barry("\n");
1244 
1245  }
1246 
1247  return;
1248 
1249 }
1250 
1251 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1253 {
1254 
1255  // Relevant information:
1256  // - Number of arrays involved
1257  // - Size of the support
1258  // - Terms involved
1259 
1260  int min_v = std::numeric_limits<int>::max();
1261  int max_v = 0;
1262 
1263  for (const auto & stat : this->stats_support_sizes)
1264  {
1265 
1266  if (static_cast<int>(stat) > max_v)
1267  max_v = static_cast<int>(stat);
1268 
1269  if (static_cast<int>(stat) < min_v)
1270  min_v = static_cast<int>(stat);
1271 
1272  }
1273 
1274  // The vectors in the support reflec the size of nterms x entries
1275  // max_v /= static_cast<int>(nterms() + 1);
1276  // min_v /= static_cast<int>(nterms() + 1);
1277 
1278  if (this->size() > 0u)
1279  {
1280  printf_barry("Num. of Arrays : %li\n", this->size());
1281  printf_barry("Support size : %li\n", this->size_unique());
1282  printf_barry("Support size range : [%i, %i]\n", min_v, max_v);
1283  }
1284  else
1285  {
1286  printf_barry("Num. of Arrays : 0\n");
1287  printf_barry("Support size : -\n");
1288  printf_barry("Support size range : -\n");
1289  }
1290 
1291 
1292  if (with_pset)
1293  {
1294  printf_barry("Arrays in powerset : %li\n",
1295  static_cast<size_t>(std::accumulate(pset_sizes.begin(), pset_sizes.end(), 0u))
1296  );
1297  }
1298 
1299  printf_barry("Transform. Fun. : %s\n", transform_model_fun ? "yes": "no");
1300  printf_barry("Model terms (% 2li) :\n", this->nterms());
1301  for (auto & cn : this->colnames())
1302  {
1303  printf_barry(" - %s\n", cn.c_str());
1304  }
1305 
1306  if (this->nrules() > 0u)
1307  {
1308  printf_barry("Model rules (%li) :\n", this->nrules());
1309 
1310  for (auto & rn : rules->get_names())
1311  {
1312  printf_barry(" - %s\n", rn.c_str());
1313  }
1314  }
1315 
1316  if (this->nrules_dyn() > 0u)
1317  {
1318  printf_barry("Model rules dyn (% 2li) :\n", this->nrules_dyn());
1319 
1320  for (auto & rn : rules_dyn->get_names())
1321  {
1322  printf_barry(" - %s\n", rn.c_str());
1323  }
1324  }
1325 
1326  return;
1327 
1328 }
1329 
1330 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1332 {
1333  // INITIALIZED()
1334  return this->stats_target.size();
1335 
1336 }
1337 
1338 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1340 {
1341 
1342  // INITIALIZED()
1343  return this->stats_support_sizes.size();
1344 
1345 }
1346 
1347 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1349 {
1350 
1351  if (transform_model_fun)
1352  return transform_model_term_names.size();
1353  else
1354  return this->counters->size();
1355 
1356 }
1357 
1358 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1360 {
1361 
1362  return this->rules->size();
1363 
1364 }
1365 
1366 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1368 {
1369 
1370  return this->rules_dyn->size();
1371 
1372 }
1373 
1374 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1376 {
1377 
1378  // INITIALIZED()
1379  return stats_support_sizes_acc.back();
1380  // size_t tot = 0u;
1381  // for (auto& a : stats_support)
1382  // tot += a.size();
1383 
1384  // return tot;
1385 
1386 }
1387 
1388 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1390 {
1391 
1392  if (transform_model_fun)
1393  return transform_model_term_names;
1394  else
1395  return counters->get_names();
1396 
1397 }
1398 
1399 template <
1400  typename Array_Type,
1401  typename Data_Counter_Type,
1402  typename Data_Rule_Type,
1403  typename Data_Rule_Dyn_Type
1404  >
1405 inline Array_Type
1407  const size_t & i,
1408  const std::vector<double> & params
1409 ) {
1410 
1411  // Are we recording this?
1412  if (!this->with_pset)
1413  throw std::logic_error("Sampling is only available when store_pset() is active.");
1414 
1415  if (i >= arrays2support.size())
1416  throw std::range_error("The requested support is out of range");
1417 
1418  // Getting the index
1419  size_t a = arrays2support[i];
1420 
1421  // Generating a random
1422  std::uniform_real_distribution<> urand(0, 1);
1423  double r = urand(*rengine);
1424  double cumprob = 0.0;
1425 
1426  // Updating the current pset
1427  if (pset_probs.size() == 0u)
1428  update_pset_probs(params, 1u, static_cast<int>(a));
1429 
1430  // Sampling an array
1431  size_t j = 0u;
1432  if (vec_equal_approx(params, params_last[a]))
1433  // If precomputed, then no need to recalc support
1434  {
1435 
1436  const double * probs = &pset_probs[pset_locations[a]];
1437  while (cumprob < r)
1438  cumprob += *(probs + j++);
1439 
1440  if (j > 0u)
1441  j--;
1442 
1443  } else {
1444 
1445  update_pset_probs(params, 1u, static_cast<int>(a));
1446 
1447  const double * probs = &pset_probs[pset_locations[a]];
1448  while (cumprob < r)
1449  cumprob += *(probs + j++);
1450 
1451  if (j > 0u)
1452  j--;
1453 
1454  #ifdef BARRY_DEBUG
1455  if (j > pset_arrays.at(a).size())
1456  throw std::logic_error(
1457  std::string(
1458  "Something went wrong when sampling from a different set of.") +
1459  std::string("parameters. Please report this bug: ") +
1460  std::string(" cumprob: ") + std::to_string(cumprob) +
1461  std::string(" r: ") + std::to_string(r)
1462  );
1463  #endif
1464 
1465  }
1466 
1467  #ifdef BARRY_DEBUG
1468  return this->pset_arrays.at(a).at(j);
1469  #else
1470  return this->pset_arrays[a][j];
1471  #endif
1472 
1473 }
1474 
1475 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1477  const Array_Type & Array_,
1478  const std::vector<double> & params
1479 ) {
1480 
1481  // Are we recording this?
1482  if (!this->with_pset)
1483  throw std::logic_error("Sampling is only available when store_pset() is active.");
1484 
1485  size_t i;
1486 
1487  // If the data hasn't been analyzed earlier, then we need to compute
1488  // the support
1489  std::vector< double > key = counters->gen_hash(Array_);
1490  MapVec_type< double, size_t >::const_iterator locator = keys2support.find(key);
1491  if (locator == keys2support.end())
1492  {
1493  size_t stats_support_size = stats_support.size();
1494 
1495  // Adding to the map
1496  keys2support[key] = stats_support_sizes.size();
1497  stats_support_n_arrays.push_back(1u); // How many elements now
1498  arrays2support.push_back(stats_support_sizes.size()); // Map of the array id to the support
1499 
1500  // Computing support using the counters included in the model
1501  support_fun.reset_array(Array_);
1502 
1505  if (with_pset)
1506  {
1507 
1508  // Current size of the powerset
1509  size_t pset_stats_size = pset_stats.size();
1510 
1511  // Making space for storing the support
1512  pset_arrays.resize(pset_arrays.size() + 1u);
1513  // pset_stats.resize(pset_stats.size() + 1u);
1514  // pset_probs.resize(pset_probs.size() + 1u);
1515 
1516  try
1517  {
1518 
1519  support_fun.calc(
1520  &(pset_arrays[pset_arrays.size() - 1u]),
1521  &pset_stats
1522  );
1523 
1524  }
1525  catch (const std::exception& e)
1526  {
1527 
1528  printf_barry(
1529  "A problem ocurred while trying to add the array (and recording the powerset). "
1530  );
1531  printf_barry("with error %s\n", e.what());
1532  throw std::logic_error("");
1533 
1534  }
1535 
1536  // Recording the number of elements
1537  pset_locations.push_back(
1538  pset_locations.size() == 0u ?
1539  0u :
1540  pset_locations.back() + pset_sizes.back()
1541  );
1542 
1543  pset_sizes.push_back(
1544  (pset_stats.size() - pset_stats_size) / (counter_fun.size())
1545  );
1546 
1547  // Increasing the space to store probabilities
1548  pset_probs.resize(pset_probs.size() + pset_sizes.back());
1549 
1550 
1551  }
1552  else
1553  {
1554  support_fun.calc();
1555  }
1556 
1557  if (transform_model_fun)
1558  {
1559  auto tmpsupport = support_fun.get_counts();
1560  size_t k = counter_fun.size();
1561  size_t n = tmpsupport.size() / (k + 1);
1562 
1563  std::vector< double > s_new(0u);
1564  s_new.reserve(tmpsupport.size());
1565 
1566  for (size_t i = 0u; i < n; ++i)
1567  {
1568 
1569  // Appending size
1570  s_new.push_back(tmpsupport[i * (k + 1u)]);
1571 
1572  // Applying transformation and adding to the new set
1573  auto res = transform_model_fun(&tmpsupport[i * (k + 1u) + 1u], k);
1574  std::copy(res.begin(), res.end(), std::back_inserter(s_new));
1575 
1576  }
1577 
1578  for (auto & s : s_new)
1579  stats_support.push_back(s);
1580  // stats_support.push_back(s_new);
1581 
1582  } else {
1583  for (auto & s : support_fun.get_counts())
1584  stats_support.push_back(s);
1585 
1586  // stats_support.push_back(support_fun.get_counts());
1587  }
1588 
1589  // Making room for the previous parameters. This will be used to check if
1590  // the normalizing constant has been updated or not.
1591  params_last.push_back(stats_target[0u]);
1592  normalizing_constants.push_back(0.0);
1593  first_calc_done.push_back(false);
1594 
1595  // Incrementing the size of the support set
1596  if (stats_support_sizes.size() == 0u)
1597  {
1598  stats_support_sizes_acc.push_back(0u);
1599  } else {
1600  stats_support_sizes_acc.push_back(
1601  stats_support_sizes.back() +
1602  stats_support_sizes_acc.back()
1603  );
1604  }
1605 
1606 
1607  stats_support_sizes.push_back(
1608 
1609  (stats_support.size() - stats_support_size)/
1610  (counter_fun.size() + 1u)
1611 
1612  );
1613 
1614 
1615  i = arrays2support.size() - 1u;
1616  } else
1617  // Retrieving the corresponding position in the support
1618  i = locator->second;
1619 
1620  // Getting the index
1621  size_t a = arrays2support[i];
1622 
1623  // Generating a random
1624  std::uniform_real_distribution<> urand(0, 1);
1625  double r = urand(*rengine);
1626  double cumprob = 0.0;
1627 
1628  size_t k = params.size();
1629 
1630  // Sampling an array
1631  size_t j = 0u;
1632  double * probs = &pset_probs[ pset_locations[a] ];
1633  if (first_calc_done[a] && (vec_equal_approx(params, params_last[a])))
1634  // If precomputed, then no need to recalc support
1635  {
1636 
1637  while (cumprob < r)
1638  cumprob += *(probs + j++);
1639 
1640  if (j > 0u)
1641  j--;
1642 
1643  } else {
1644 
1645  // probs.resize(pset_arrays[a].size());
1646  std::vector< double > temp_stats(params.size());
1647  const double * stats = &pset_stats[pset_locations[a] * k];
1648 
1649  int i_matches = -1;
1650  for (size_t array = 0u; array < pset_sizes[a]; ++array)
1651  {
1652 
1653  // Filling out the parameters
1654  for (auto p = 0u; p < params.size(); ++p)
1655  temp_stats[p] = stats[array * k + p];
1656 
1657  *(probs + array) = this->likelihood(params, temp_stats, i, false);
1658  cumprob += *(probs + array);
1659 
1660  if (i_matches == -1 && cumprob >= r)
1661  i_matches = array;
1662  }
1663 
1664  #ifdef BARRY_DEBUG
1665  if (i_matches < 0)
1666  throw std::logic_error(
1667  std::string(
1668  "Something went wrong when sampling from a different set of.") +
1669  std::string("parameters. Please report this bug: ") +
1670  std::string(" cumprob: ") + std::to_string(cumprob) +
1671  std::string(" r: ") + std::to_string(r)
1672  );
1673  #endif
1674 
1675  j = i_matches;
1676  first_calc_done[a] = true;
1677  }
1678 
1679 
1680  #ifdef BARRY_DEBUG
1681  return this->pset_arrays.at(a).at(j);
1682  #else
1683  return this->pset_arrays[a][j];
1684  #endif
1685 
1686 }
1687 
1688 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1690  const Array_Type & Array_,
1691  const std::vector< double > & params,
1692  size_t i,
1693  size_t j
1694 ) {
1695 
1696  // Generating a copy of the array so we can update
1697  Array_Type A(Array_, true);
1698 
1699  // Making sure we add it first
1700  A.insert_cell(i, j, A.default_val(), true, false);
1701 
1702  // Computing the change stats_target
1703  std::vector< double > tmp_counts;
1704  tmp_counts.reserve(counters->size());
1705  for (size_t ii = 0u; ii < counters->size(); ++ii)
1706  tmp_counts.push_back(counters->operator[](ii).count(A, i, j));
1707 
1708  // If there is a transformation function, it needs to be
1709  // applied before dealing with the likelihood.
1710  if (transform_model_fun)
1711  tmp_counts = transform_model_fun(&tmp_counts[0u], tmp_counts.size());
1712 
1713  return 1.0/
1714  (1.0 + std::exp(-vec_inner_prod<double>(
1715  &params[0u], &tmp_counts[0u], params.size()
1716  )));
1717 
1718 
1719 }
1720 
1721 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1723  return this->rengine;
1724 }
1725 
1726 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1728  return this->counters;
1729 }
1730 
1731 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1733  return this->rules;
1734 }
1735 
1736 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1738  return this->rules_dyn;
1739 }
1740 
1741 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1744  return &this->support_fun;
1745 }
1746 
1747 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1749 {
1750  return &stats_target;
1751 }
1752 
1753 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1754 inline std::vector< double > *
1756 {
1757  return &stats_support;
1758 }
1759 
1760 // Implementation of get_stats_support_sizes()
1761 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1762 inline std::vector< size_t > *
1764 {
1765  return &stats_support_sizes;
1766 }
1767 
1768 // Implementation of get_stats_support_sizes_acc()
1769 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1770 inline std::vector< size_t > *
1772 {
1773  return &stats_support_sizes_acc;
1774 }
1775 
1776 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1777 inline std::vector< size_t > *
1779 {
1780  return &arrays2support;
1781 }
1782 
1783 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1784 inline std::vector< std::vector< Array_Type > > *
1786  return &pset_arrays;
1787 }
1788 
1789 template <typename Array_Type, typename Data_Counter_Type, typename Data_Rule_Type, typename Data_Rule_Dyn_Type>
1790 inline std::vector<double> *
1792  return &pset_stats;
1793 }
1794 
1795 template <
1796  typename Array_Type,
1797  typename Data_Counter_Type,
1798  typename Data_Rule_Type,
1799  typename Data_Rule_Dyn_Type
1800  >
1801 inline std::vector<double> *
1803  return &pset_probs;
1804 }
1805 
1806 template <
1807  typename Array_Type,
1808  typename Data_Counter_Type,
1809  typename Data_Rule_Type,
1810  typename Data_Rule_Dyn_Type
1811  >
1813 {
1814  return &pset_sizes;
1815 }
1816 
1817 template <
1818  typename Array_Type,
1819  typename Data_Counter_Type,
1820  typename Data_Rule_Type,
1821  typename Data_Rule_Dyn_Type
1822  >
1824 {
1825  return &pset_locations;
1826 }
1827 
1828 template <
1829  typename Array_Type,
1830  typename Data_Counter_Type,
1831  typename Data_Rule_Type,
1832  typename Data_Rule_Dyn_Type
1833  >
1834 inline void
1836  std::function<std::vector<double>(double *,size_t)> fun,
1837  std::vector< std::string > names
1838  )
1839 {
1840 
1841  if (transform_model_fun)
1842  throw std::logic_error("A transformation function for the model has already been established.");
1843 
1844  transform_model_fun = fun;
1845  transform_model_term_names = names;
1846 
1847  size_t k = counters->size();
1848 
1849  auto stats_support_old = stats_support;
1850 
1851  // Applying over the support
1852  for (size_t nsupport = 0u; nsupport < stats_support_sizes.size(); ++nsupport)
1853  {
1854 
1855  // How many observations in the support
1856  size_t n = stats_support_sizes[nsupport];
1857 
1858  // Iterating through each observation in the nsupport'th
1859  for (size_t i = 0; i < n; ++i)
1860  {
1861 
1862  // Applying transformation and adding to the new set
1863  auto res = transform_model_fun(
1864  &stats_support_old[
1865  stats_support_sizes_acc[nsupport] * (k + 1u) +
1866  i * (k + 1u) + 1u
1867  ],
1868  k
1869  );
1870 
1871  if (res.size() != transform_model_term_names.size())
1872  throw std::length_error(
1873  std::string("The transform vector from -transform_model_fun- ") +
1874  std::string(" does not match the size of ") +
1875  std::string("-transform_model_term_names-.")
1876  );
1877 
1878  // Resizing stats_support if the transform stats do not match the
1879  // previous size
1880  if ((nsupport == 0u) && (i == 0u) && (res.size() != k))
1881  stats_support.resize(
1882  (res.size() + 1) * (
1883  stats_support_sizes_acc.back() +
1884  stats_support_sizes.back()
1885  )
1886  );
1887 
1888  // Weigth
1889  stats_support[
1890  stats_support_sizes_acc[nsupport] * (res.size() + 1u) +
1891  (res.size() + 1u) * i
1892  ] = stats_support_old[
1893  stats_support_sizes_acc[nsupport] * (k + 1u) +
1894  i * (k + 1u)
1895  ];
1896 
1897  // Copying the rest of the elements
1898  for (size_t j = 0u; j < res.size(); ++j)
1899  stats_support[
1900  stats_support_sizes_acc[nsupport] * (res.size() + 1u) +
1901  (res.size() + 1u) * i + j + 1u
1902  ] = res[j];
1903 
1904  }
1905 
1906  }
1907 
1908  // Applying over the target statistics
1909  for (auto & s : stats_target)
1910  s = transform_model_fun(&s[0u], k);
1911 
1912  // Checking if there is support included
1913  if (with_pset)
1914  {
1915 
1916  // Applying it to the support
1917  for (auto s = 0u; s < pset_arrays.size(); ++s)
1918  {
1919  std::vector< double > new_stats;
1920  size_t pset_stats_loc = pset_locations[s] * k;
1921 
1922  for (auto a = 0u; a < pset_arrays[s].size(); ++a)
1923  {
1924  // Computing the transformed version of the data
1925  auto tmpstats = transform_model_fun(
1926  &pset_stats[pset_stats_loc + a * k], k
1927  );
1928 
1929  // Storing the new values
1930  for (auto p = 0u; p < k; ++p)
1931  new_stats.push_back(tmpstats[p]);
1932  }
1933 
1934  // Updating the dataset
1935  for (size_t stat = 0u; stat < new_stats.size(); ++stat)
1936  pset_stats[pset_stats_loc + stat] = new_stats[stat];
1937 
1938  }
1939 
1940  }
1941 
1942  // And, resizing the last set of parameters
1943  for (auto & p : params_last)
1944  p.resize(transform_model_term_names.size());
1945 
1946  return;
1947 
1948 }
1949 
1950 #undef MODEL_TEMPLATE
1951 #undef MODEL_TEMPLATE_ARGS
1952 #undef MODEL_TYPE
1953 
1954 #endif
#define printf_barry
#define BARRY_SAFE_EXP
#define BARRY_NCORES_ARG(default)
A counter function based on change statistics.
Vector of counters.
General framework for discrete exponential models. This class allows generating discrete exponential ...
Definition: model-bones.hpp:34
size_t size_unique() const noexcept
std::vector< std::string > transform_model_term_names
const std::mt19937 * get_rengine() const
void update_normalizing_constants(const std::vector< double > &params, BARRY_NCORES_ARG(=1), int i=-1)
Computes the normalizing constant for a given set of parameters.
Definition: model-meat.hpp:136
std::vector< size_t > * get_stats_support_sizes_acc()
Accumulated number of vectors included in the support.
void update_pset_probs(const std::vector< double > &params, BARRY_NCORES_ARG(=1), int i=-1)
Definition: model-meat.hpp:225
Rules< Array_Type, Data_Rule_Dyn_Type > * rules_dyn
Definition: model-bones.hpp:98
std::vector< size_t > pset_sizes
Number of vectors included in the support.
Definition: model-bones.hpp:87
Rules< Array_Type, Data_Rule_Type > * rules
Definition: model-bones.hpp:97
std::vector< double > * get_pset_stats()
Statistics of the support(s)
StatsCounter< Array_Type, Data_Counter_Type > counter_fun
size_t size() const noexcept
void add_counter(Counter< Array_Type, Data_Counter_Type > &counter)
Definition: model-meat.hpp:508
Counters< Array_Type, Data_Counter_Type > * get_counters()
std::vector< std::vector< double > > stats_target
Target statistics of the model.
Definition: model-bones.hpp:66
std::vector< double > pset_probs
Probabilities of the support(s)
Definition: model-bones.hpp:86
Array_Type sample(const Array_Type &Array_, const std::vector< double > &params={})
void add_rule_dyn(Rule< Array_Type, Data_Rule_Dyn_Type > &rule)
Definition: model-meat.hpp:591
std::vector< double > stats_likelihood
Definition: model-bones.hpp:67
std::vector< double > stats_support
Sufficient statistics of the model (support)
Definition: model-bones.hpp:62
void store_psets() noexcept
Definition: model-meat.hpp:495
double conditional_prob(const Array_Type &Array_, const std::vector< double > &params, size_t i, size_t j)
Conditional probability ("Gibbs sampler")
Model< Array_Type, Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_Type > & operator=(const Model< Array_Type, Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_Type > &Model_)
Definition: model-meat.hpp:439
std::vector< std::vector< double > > * get_stats_target()
Raw pointers to the support and target statistics.
std::vector< std::vector< Array_Type > > * get_pset_arrays()
Support< Array_Type, Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_Type > * get_support_fun()
std::vector< double > gen_key(const Array_Type &Array_)
Definition: model-meat.hpp:501
std::vector< size_t > arrays2support
Definition: model-bones.hpp:68
size_t add_array(const Array_Type &Array_, bool force_new=false)
Adds an array to the support of not already included.
Definition: model-meat.hpp:633
void add_hasher(Hasher_fun_type< Array_Type, Data_Counter_Type > fun_)
Definition: model-meat.hpp:552
std::vector< std::string > colnames() const
double likelihood(const std::vector< double > &params, const size_t &i, bool as_log=false, bool no_update_normconst=false)
Definition: model-meat.hpp:810
std::vector< size_t > stats_support_sizes_acc
Accumulated number of vectors included in the support.
Definition: model-bones.hpp:64
std::vector< double > * get_stats_support()
Sufficient statistics of the support(s)
void set_rules(Rules< Array_Type, Data_Rule_Type > *rules_)
Definition: model-meat.hpp:573
size_t nrules() const noexcept
std::vector< size_t > * get_pset_locations()
const std::vector< double > & get_normalizing_constants() const
std::vector< size_t > stats_support_n_arrays
Number of arrays included per support.
Definition: model-bones.hpp:65
void print_stats(size_t i) const
size_t nterms() const noexcept
double likelihood_total(const std::vector< double > &params, bool as_log=false, BARRY_NCORES_ARG(=2), bool no_update_normconst=false)
Rules< Array_Type, Data_Rule_Type > * get_rules()
void set_counters(Counters< Array_Type, Data_Counter_Type > *counters_)
Definition: model-meat.hpp:534
size_t support_size() const noexcept
size_t nrules_dyn() const noexcept
std::vector< std::vector< double > > params_last
Vector of the previously used parameters.
virtual void print() const
Prints information about the model.
Support< Array_Type, Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_Type > support_fun
Definition: model-bones.hpp:99
void set_rules_dyn(Rules< Array_Type, Data_Rule_Dyn_Type > *rules_)
Definition: model-meat.hpp:615
std::vector< std::vector< Array_Type > > pset_arrays
Arrays of the support(s)
Definition: model-bones.hpp:84
std::function< std::vector< double >double *, size_t k)> transform_model_fun
Transformation of the model.
std::vector< size_t > * get_arrays2support()
std::vector< double > * get_pset_probs()
std::vector< size_t > * get_pset_sizes()
MapVec_type< double, size_t > keys2support
Map of types of arrays to support sets.
Definition: model-bones.hpp:75
std::vector< size_t > stats_support_sizes
Number of vectors included in the support.
Definition: model-bones.hpp:63
std::vector< double > normalizing_constants
std::vector< size_t > * get_stats_support_sizes()
Number of vectors included in the support.
std::vector< double > pset_stats
Statistics of the support(s)
Definition: model-bones.hpp:85
std::vector< size_t > pset_locations
Accumulated number of vectors included in the support.
Definition: model-bones.hpp:88
std::vector< bool > first_calc_done
const std::vector< double > & get_likelihoods() const
void update_likelihoods(const std::vector< double > &params,)
Definition: model-meat.hpp:186
void set_transform_model(std::function< std::vector< double >(double *, size_t)> fun, std::vector< std::string > names)
Set the transform_model_fun object.
Counters< Array_Type, Data_Counter_Type > * counters
Definition: model-bones.hpp:96
const std::vector< Array_Type > * get_pset(const size_t &i)
void add_rule(Rule< Array_Type, Data_Rule_Type > &rule)
Definition: model-meat.hpp:563
Rules< Array_Type, Data_Rule_Dyn_Type > * get_rules_dyn()
Rule for determining if a cell should be included in a sequence.
Definition: rules-bones.hpp:20
Vector of objects of class Rule.
Definition: rules-bones.hpp:71
Count stats for a single Array.
void set_counters(Counters< Array_Type, Data_Type > *counters_)
std::vector< double > count_all()
Compute the support of sufficient statistics.
Data_Type fun
Data_Type count_fun_
return res
Data_Type Counter_fun_type< Array_Type, Data_Type > init_fun_
Data_Type fun_
size_t size_t j
Data_Type counter
size_t i
Data_Type &&counter_ noexcept
Data_Type Counter_fun_type< Array_Type, Data_Type > Hasher_fun_type< Array_Type, Data_Type > Data_Type data_
double likelihood_(const double *stats_target, const std::vector< double > &params, const double normalizing_constant, size_t n_params, bool log_=false)
Definition: model-meat.hpp:71
double update_normalizing_constant(const std::vector< double > &params, const double *support, size_t k, size_t n)
Definition: model-meat.hpp:9
Data_Type * counters_
std::unordered_map< std::vector< Ta >, Tb, vecHasher< Ta > > MapVec_type
Definition: typedefs.hpp:128
std::function< bool(const Array_Type &, size_t, size_t, Data_Type &)> Rule_fun_type
Definition: typedefs.hpp:190
std::function< std::vector< double >(const Array_Type &, Data_Type *)> Hasher_fun_type
Hasher function used by the counter.
Definition: typedefs.hpp:200
T vec_inner_prod(const T *a, const T *b, size_t n)
Definition: typedefs.hpp:263
std::function< double(const Array_Type &, size_t, size_t, Data_Type &)> Counter_fun_type
Counter and rule functions.
Definition: typedefs.hpp:187
bool vec_equal_approx(const std::vector< T > &a, const std::vector< T > &b, double eps=1e-100)
Definition: typedefs.hpp:235