1 #ifndef BARRY_MODEL_MEAT_HPP
2 #define BARRY_MODEL_MEAT_HPP 1
10 const std::vector<double> & params,
11 const double * support,
18 std::vector< double > resv(n, 0.0);
20 for (
size_t j = 0u;
j < (k - 1u); ++
j)
23 const double p = params[
j];
25 #if defined(__OPENMP) || defined(_OPENMP)
27 #elif defined(__GNUC__) && !defined(__clang__)
30 for (
size_t i = 0u;
i < n; ++
i)
31 resv[
i] += (*(support +
i * k + 1u +
j)) * p;
36 #if defined(__OPENMP) || defined(_OPENMP)
37 #pragma omp simd reduction(+:res)
38 #elif defined(__GNUC__) && !defined(__clang__)
41 for (
size_t i = 0u;
i < n; ++
i)
51 throw std::overflow_error(
52 std::string(
"NaN in update_normalizing_constant. ") +
53 std::string(
"res = ") + std::to_string(
res) +
54 std::string(
", k = ") + std::to_string(k) +
55 std::string(
", n = ") + std::to_string(n)
58 throw std::overflow_error(
59 std::string(
"Inf in update_normalizing_constant. ") +
60 std::string(
"res = ") + std::to_string(
res) +
61 std::string(
", k = ") + std::to_string(k) +
62 std::string(
", n = ") + std::to_string(n)
72 const double * stats_target,
73 const std::vector< double > & params,
74 const double normalizing_constant,
79 if (n_params != params.size())
80 throw std::length_error(
"-stats_target- and -params- should have the same length.");
82 double numerator = 0.0;
85 #ifdef __INTEL_LLVM_COMPILER
88 #if defined(__OPENMP) || defined(_OPENMP)
89 #pragma omp simd reduction(+:numerator)
91 for (
size_t j = 0u;
j < n_params; ++
j)
92 numerator += *(stats_target +
j) * params[
j];
99 double ans = numerator/normalizing_constant;
103 throw std::overflow_error(
104 std::string(
"NaN in likelihood_. ") +
105 std::string(
"numerator = ") + std::to_string(numerator) +
106 std::string(
", normalizing_constant = ") +
107 std::to_string(normalizing_constant)
110 throw std::overflow_error(
111 std::string(
"Inf in likelihood_. ") +
112 std::string(
"numerator = ") + std::to_string(numerator) +
113 std::string(
", normalizing_constant = ") +
114 std::to_string(normalizing_constant)
118 throw std::overflow_error(
119 std::string(
"Likelihood > 1.0") +
120 std::string(
"numerator = ") + std::to_string(numerator) +
121 std::string(
", normalizing_constant = ") +
122 std::to_string(normalizing_constant)
132 typename Data_Counter_Type,
133 typename Data_Rule_Type,
134 typename Data_Rule_Dyn_Type
137 const std::vector< double > & params,
142 const size_t n = stats_support_sizes.size();
145 if ((ncores > 1u) && (n < 128u))
152 #if defined(__OPENMP) || defined(_OPENMP)
153 #pragma omp parallel for firstprivate(params) num_threads(ncores) \
154 shared(n, normalizing_constants, first_calc_done, \
155 stats_support, stats_support_sizes, stats_support_sizes_acc, i) \
158 for (
size_t s = 0u; s < n; ++s)
161 if ((
i > -1) && (
i !=
static_cast<int>(s)))
164 size_t k = params.size() + 1u;
165 size_t n = stats_support_sizes[s];
167 first_calc_done[s] =
true;
169 params, &stats_support[
170 stats_support_sizes_acc[s] * k
182 typename Data_Counter_Type,
183 typename Data_Rule_Type,
184 typename Data_Rule_Dyn_Type
187 const std::vector< double > & params,
191 update_normalizing_constants(params, ncores);
193 size_t n_params = params.size();
195 if (stats_likelihood.size() != stats_target.size())
196 stats_likelihood.resize(stats_target.size());
198 #if defined(__OPENMP) || defined(_OPENMP)
199 #pragma omp parallel for simd num_threads(ncores) \
200 shared(n_params, stats_target, normalizing_constants, arrays2support, \
204 for (
size_t s = 0u; s < stats_target.size(); ++s)
206 stats_likelihood[s] = 0.0;
207 for (
size_t j = 0u;
j < n_params; ++
j)
208 stats_likelihood[s] += stats_target[s][
j] * params[
j];
210 stats_likelihood[s] =
212 normalizing_constants[arrays2support[s]];
221 typename Data_Counter_Type,
222 typename Data_Rule_Type,
223 typename Data_Rule_Dyn_Type
226 const std::vector< double > & params,
231 update_normalizing_constants(params, ncores,
i);
234 params_last[
i] = params;
236 size_t n_params = params.size();
238 pset_locations.back() +
246 #if defined(__OPENMP) || defined(_OPENMP)
247 #pragma omp parallel for num_threads(ncores) collapse(1) \
248 shared(n_params, pset_stats, pset_probs, normalizing_constants, pset_sizes, \
252 for (
size_t s = 0u; s < pset_sizes.size(); ++s)
255 if ((
i >= 0) && (
i !=
static_cast<int>(s)))
259 size_t pset_start = pset_locations[s];
262 #if defined(__OPENMP) || defined(_OPENMP)
265 for (
size_t a = 0u; a < pset_sizes[s]; ++a)
269 size_t start_loc = pset_start * n_params + a * n_params;
271 pset_probs[pset_start + a] = 0.0;
274 for (
size_t j = 0u;
j < n_params; ++
j)
275 pset_probs[pset_start + a] +=
276 pset_stats[start_loc +
j] * params[
j];
279 pset_probs[pset_start + a] =
281 normalizing_constants[s];
286 double totprob = 0.0;
287 for (
size_t i_ = 0u; i_ < pset_sizes[s]; ++
i)
288 totprob =+ pset_probs[pset_start + i_];
290 if (std::abs(totprob - 1) > 1e-6)
291 throw std::runtime_error(
292 std::string(
"Probabilities do not add to one! ") +
293 std::string(
"totprob = ") + std::to_string(totprob)
305 typename Data_Counter_Type,
306 typename Data_Rule_Type,
307 typename Data_Rule_Dyn_Type
311 stats_support_sizes(0u),
312 stats_support_sizes_acc(0u),
313 stats_support_n_arrays(0u),
315 stats_likelihood(0u),
318 pset_arrays(0u), pset_stats(0u),
320 rules(new
Rules<Array_Type,Data_Rule_Type>()),
321 rules_dyn(new
Rules<Array_Type,Data_Rule_Dyn_Type>()),
322 support_fun(), counter_fun(), delete_counters(true),
324 delete_rules_dyn(true),
325 transform_model_fun(nullptr),
326 transform_model_term_names(0u)
343 typename Data_Counter_Type,
344 typename Data_Rule_Type,
345 typename Data_Rule_Dyn_Type
351 stats_support_sizes(0u),
352 stats_support_sizes_acc(0u),
353 stats_support_n_arrays(0u),
355 stats_likelihood(0u),
356 arrays2support(0u), keys2support(0u),
357 pset_arrays(0u), pset_stats(0u),
359 rules(new
Rules<Array_Type,Data_Rule_Type>()),
360 rules_dyn(new
Rules<Array_Type,Data_Rule_Dyn_Type>()),
361 support_fun(), counter_fun(), delete_counters(true),
363 delete_rules_dyn(true),
364 transform_model_fun(nullptr),
365 transform_model_term_names(0u)
385 typename Data_Counter_Type,
386 typename Data_Rule_Type,
387 typename Data_Rule_Dyn_Type
392 stats_support(Model_.stats_support),
393 stats_support_sizes(Model_.stats_support_sizes),
394 stats_support_sizes_acc(Model_.stats_support_sizes_acc),
395 stats_support_n_arrays(Model_.stats_support_n_arrays),
396 stats_target(Model_.stats_target),
397 stats_likelihood(Model_.stats_likelihood),
398 arrays2support(Model_.arrays2support),
399 keys2support(Model_.keys2support),
400 pset_arrays(Model_.pset_arrays),
401 pset_stats(Model_.pset_stats),
402 pset_probs(Model_.pset_probs),
403 pset_sizes(Model_.pset_sizes),
404 pset_locations(Model_.pset_locations),
406 rules(new
Rules<Array_Type,Data_Rule_Type>(*(Model_.rules))),
407 rules_dyn(new
Rules<Array_Type,Data_Rule_Dyn_Type>(*(Model_.rules_dyn))),
410 params_last(Model_.params_last),
411 normalizing_constants(Model_.normalizing_constants),
412 first_calc_done(Model_.first_calc_done),
413 delete_counters(true),
415 delete_rules_dyn(true),
416 transform_model_fun(Model_.transform_model_fun),
417 transform_model_term_names(Model_.transform_model_term_names)
434 typename Data_Counter_Type,
435 typename Data_Rule_Type,
436 typename Data_Rule_Dyn_Type
444 if (
this != &Model_) {
452 if (delete_rules_dyn)
471 delete_counters =
true;
473 delete_rules_dyn =
true;
485 support_fun.set_rules(rules);
486 support_fun.set_rules_dyn(rules_dyn);
494 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
500 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
502 const Array_Type & Array_
504 return this->
counters->gen_hash(Array_);
507 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
516 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
520 Data_Counter_Type
data_
533 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
538 if (delete_counters) {
540 delete_counters =
false;
551 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
562 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
567 rules->add_rule(rules, Data_Rule_Type());
572 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
580 this->rules = rules_;
581 this->delete_rules =
false;
583 support_fun.set_rules(rules);
590 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
595 rules_dyn->add_rule(rules_, Data_Rule_Dyn_Type());
599 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
602 Data_Rule_Dyn_Type
data_
614 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
619 if (delete_rules_dyn)
622 this->rules_dyn = rules_;
623 this->delete_rules_dyn =
false;
624 support_fun.set_rules_dyn(rules_dyn);
631 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
634 const Array_Type & Array_,
639 counter_fun.reset_array(&Array_);
641 if (transform_model_fun)
644 auto tmpcounts = counter_fun.count_all();
645 stats_target.emplace_back(
646 transform_model_fun(&tmpcounts[0u], tmpcounts.size())
650 stats_target.push_back(counter_fun.count_all());
654 std::vector< double > key =
counters->gen_hash(Array_);
656 if (force_new | (locator == keys2support.end()))
660 size_t stats_support_size = stats_support.size();
663 keys2support[key] = stats_support_sizes.size();
664 stats_support_n_arrays.push_back(1u);
665 arrays2support.push_back(stats_support_sizes.size());
668 support_fun.reset_array(Array_);
676 pset_arrays.resize(pset_arrays.size() + 1u);
679 size_t pset_stats_size = pset_stats.size();
685 &(pset_arrays[pset_arrays.size() - 1u]),
690 catch (
const std::exception& e)
694 "A problem ocurred while trying to add the array (and recording the powerset). "
697 printf_barry(
"Here is the array that generated the error.\n");
699 throw std::logic_error(
"");
704 pset_locations.push_back(
705 pset_locations.size() == 0u ?
707 pset_locations.back() + pset_sizes.back()
710 pset_sizes.push_back(
711 (pset_stats.size() - pset_stats_size) / (counter_fun.size())
726 catch (
const std::exception& e)
730 "A problem ocurred while trying to add the array (and recording the powerset). "
733 printf_barry(
"Here is the array that generated the error.\n");
735 throw std::logic_error(
"");
740 if (transform_model_fun)
742 auto tmpsupport = support_fun.get_counts();
743 size_t k = counter_fun.size();
744 size_t n = tmpsupport.size() / (k + 1);
746 std::vector< double > s_new(0u);
747 s_new.reserve(tmpsupport.size());
749 for (
size_t i = 0u;
i < n; ++
i)
753 s_new.push_back(tmpsupport[
i * (k + 1u)]);
756 auto res = transform_model_fun(&tmpsupport[
i * (k + 1u) + 1u], k);
757 std::copy(
res.begin(),
res.end(), std::back_inserter(s_new));
761 for (
auto & s : s_new)
762 stats_support.push_back(s);
766 for (
const auto & s: support_fun.get_counts())
767 stats_support.push_back(s);
772 params_last.push_back(stats_target[0u]);
773 normalizing_constants.push_back(0.0);
774 first_calc_done.push_back(
false);
777 if (stats_support_sizes.size() == 0u)
779 stats_support_sizes_acc.push_back(0u);
781 stats_support_sizes_acc.push_back(
782 stats_support_sizes.back() +
783 stats_support_sizes_acc.back()
788 stats_support_sizes.push_back(
790 (stats_support.size() - stats_support_size)/
791 (counter_fun.size() + 1u)
795 return arrays2support.size() - 1u;
800 ++stats_support_n_arrays[locator->second];
803 arrays2support.push_back(locator->second);
805 return arrays2support.size() - 1u;
809 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
811 const std::vector<double> & params,
814 bool no_update_normconst
818 if (
i >= arrays2support.size())
819 throw std::range_error(
"The requested support is out of range");
821 size_t idx = arrays2support[
i];
824 if (this->stats_support_sizes[idx] == 0u)
825 return as_log ? -std::numeric_limits<double>::infinity() : 0.0;
828 if (!no_update_normconst && (!first_calc_done[idx] || !
vec_equal_approx(params, params_last[idx])))
831 first_calc_done[idx] =
true;
833 size_t k = params.size() + 1u;
834 size_t n = stats_support_sizes[idx];
837 params, &stats_support[
838 stats_support_sizes_acc[idx] * k
842 params_last[idx] = params;
849 normalizing_constants[idx],
856 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
858 const std::vector<double> & params,
859 const Array_Type & Array_,
862 bool no_update_normconst
871 std::vector< double > key =
counters->gen_hash(Array_);
873 if (locator == keys2support.end())
874 throw std::range_error(
875 "This type of array has not been included in the model."
878 loc = locator->second;
884 if (
static_cast<size_t>(
i) >= arrays2support.size())
885 throw std::range_error(
886 "This type of array has not been included in the model."
889 loc = arrays2support[
i];
894 if (this->stats_support_sizes[loc] == 0u)
895 return as_log ? -std::numeric_limits<double>::infinity() : 0.0;
902 std::vector< double > target_ = tmpstats.
count_all();
904 if (transform_model_fun)
905 target_ = transform_model_fun(&target_[0u], target_.size());
908 if (!no_update_normconst && (!first_calc_done[loc] || !
vec_equal_approx(params, params_last[loc])) )
911 first_calc_done[loc] =
true;
913 size_t k = params.size() + 1u;
914 size_t n = stats_support_sizes[loc];
917 params, &stats_support[
918 stats_support_sizes_acc[loc] * k
922 params_last[loc] = params;
927 if (!support_fun.eval_rules_dyn(target_, 0u, 0u))
928 return as_log ? -std::numeric_limits<double>::infinity() : 0.0;
933 normalizing_constants[loc],
940 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
942 const std::vector<double> & params,
943 const std::vector<double> & target_,
946 bool no_update_normconst
950 if (
i >= arrays2support.size())
951 throw std::range_error(
"The requested support is out of range");
953 size_t loc = arrays2support[
i];
956 if (!support_fun.eval_rules_dyn(target_, 0u, 0u))
960 std::string target_str =
"";
961 for (
size_t i = 0u;
i < target_.size(); ++
i)
962 target_str += std::to_string(target_[
i]) +
" ";
964 throw std::range_error(
965 "The array is not in the support set. The array's statistics are: " +
973 if (this->stats_support_sizes[loc] == 0u)
975 throw std::logic_error(
"The support set for this array is empty.");
979 if (!no_update_normconst && (!first_calc_done[loc] || !
vec_equal_approx(params, params_last[loc])) ) {
981 first_calc_done[loc] =
true;
983 size_t k = params.size() + 1u;
984 size_t n = stats_support_sizes[loc];
987 params, &stats_support[
988 stats_support_sizes_acc[loc] * k
992 params_last[loc] = params;
999 normalizing_constants[loc],
1006 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1008 const std::vector<double> & params,
1009 const double * target_,
1012 bool no_update_normconst
1016 if (
i >= arrays2support.size())
1017 throw std::range_error(
"The requested support is out of range");
1019 size_t loc = arrays2support[
i];
1022 if (support_fun.get_rules_dyn()->size() > 0u)
1025 std::vector< double > tmp_target;
1026 tmp_target.reserve(nterms());
1027 for (
size_t t = 0u; t < nterms(); ++t)
1028 tmp_target.push_back(*(target_ + t));
1030 if (!support_fun.eval_rules_dyn(tmp_target, 0u, 0u))
1033 std::string target_str =
"";
1034 for (
size_t i = 0u;
i < nterms(); ++
i)
1035 target_str += std::to_string((*target_ +
i)) +
" ";
1037 throw std::range_error(
1038 "The array is not in the support set. The array's statistics are: " + target_str + std::string(
".")
1045 if (this->stats_support_sizes[loc] == 0u)
1047 throw std::logic_error(
"The support set for this array is empty.");
1051 if (!no_update_normconst && (!first_calc_done[loc] || !
vec_equal_approx(params, params_last[loc]) )) {
1053 first_calc_done[loc] =
true;
1055 size_t k = params.size() + 1u;
1056 size_t n = stats_support_sizes[loc];
1059 params, &stats_support[
1060 stats_support_sizes_acc[loc] * k
1064 params_last[loc] = params;
1071 normalizing_constants[loc],
1078 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1080 const std::vector<double> & params,
1083 bool no_update_normconst
1086 size_t params_last_size = params_last.size();
1088 if (!no_update_normconst)
1090 #if defined(__OPENMP) || defined(_OPENMP)
1091 #pragma omp parallel for num_threads(ncores) \
1092 shared(normalizing_constants, params_last, first_calc_done, \
1093 stats_support, stats_support_sizes, stats_support_sizes_acc) \
1094 firstprivate(params)
1096 for (
size_t i = 0u;
i < params_last_size; ++
i)
1102 size_t k = params.size() + 1u;
1103 size_t n = stats_support_sizes[
i];
1105 first_calc_done[
i] =
true;
1107 params, &stats_support[
1108 stats_support_sizes_acc[
i] * k
1112 params_last[
i] = params;
1123 for (
size_t i = 0;
i < stats_target.size(); ++
i)
1125 &stats_target[
i][0u],
1130 #if defined(__OPENMP) || defined(_OPENMP)
1131 #pragma omp simd reduction(-:res)
1133 for (
size_t i = 0u;
i < params_last_size; ++
i)
1134 res -= (std::log(normalizing_constants[
i]) * this->stats_support_n_arrays[
i]);
1139 size_t stats_target_size = stats_target.size();
1140 #if defined(__OPENMP) || defined(_OPENMP)
1141 #pragma omp simd reduction(*:res)
1143 for (
size_t i = 0;
i < stats_target_size; ++
i)
1146 &stats_target[
i][0u],
1150 normalizing_constants[arrays2support[
i]];
1159 typename Array_Type,
1160 typename Data_Counter_Type,
1161 typename Data_Rule_Type,
1162 typename Data_Rule_Dyn_Type
1164 inline const std::vector< double > &
1167 return normalizing_constants;
1172 typename Array_Type,
1173 typename Data_Counter_Type,
1174 typename Data_Rule_Type,
1175 typename Data_Rule_Dyn_Type
1177 inline const std::vector< double > &
1180 return stats_likelihood;
1184 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1185 inline const std::vector< Array_Type > *
1190 if (
i >= arrays2support.size())
1191 throw std::range_error(
"The requested support is out of range");
1194 return &pset_arrays[arrays2support[
i]];
1198 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1199 inline const double *
1204 if (
i >= arrays2support.size())
1205 throw std::range_error(
"The requested support is out of range");
1207 return &pset_stats[pset_locations[arrays2support[
i]] * counter_fun.size()];
1211 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1215 if (
i >= arrays2support.size())
1216 throw std::range_error(
"The requested support is out of range");
1219 size_t array_id = arrays2support[
i];
1221 size_t k = nterms();
1222 size_t nunique = stats_support_sizes.size();
1224 for (
size_t l = 0u; l < nunique; ++l)
1230 stats_support_sizes_acc[l] * (k + 1u)
1234 for (
size_t j = 0u;
j < k; ++
j)
1239 stats_support_sizes_acc[l] * (k + 1u) +
j + 1u
1251 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1260 int min_v = std::numeric_limits<int>::max();
1263 for (
const auto & stat : this->stats_support_sizes)
1266 if (
static_cast<int>(stat) > max_v)
1267 max_v =
static_cast<int>(stat);
1269 if (
static_cast<int>(stat) < min_v)
1270 min_v =
static_cast<int>(stat);
1278 if (this->size() > 0u)
1281 printf_barry(
"Support size : %li\n", this->size_unique());
1282 printf_barry(
"Support size range : [%i, %i]\n", min_v, max_v);
1295 static_cast<size_t>(std::accumulate(pset_sizes.begin(), pset_sizes.end(), 0u))
1299 printf_barry(
"Transform. Fun. : %s\n", transform_model_fun ?
"yes":
"no");
1300 printf_barry(
"Model terms (% 2li) :\n", this->nterms());
1301 for (
auto & cn : this->colnames())
1306 if (this->nrules() > 0u)
1310 for (
auto & rn : rules->get_names())
1316 if (this->nrules_dyn() > 0u)
1318 printf_barry(
"Model rules dyn (% 2li) :\n", this->nrules_dyn());
1320 for (
auto & rn : rules_dyn->get_names())
1330 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1334 return this->stats_target.size();
1338 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1343 return this->stats_support_sizes.size();
1347 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1351 if (transform_model_fun)
1352 return transform_model_term_names.size();
1358 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1362 return this->rules->size();
1366 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1370 return this->rules_dyn->size();
1374 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1379 return stats_support_sizes_acc.back();
1388 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1392 if (transform_model_fun)
1393 return transform_model_term_names;
1400 typename Array_Type,
1401 typename Data_Counter_Type,
1402 typename Data_Rule_Type,
1403 typename Data_Rule_Dyn_Type
1408 const std::vector<double> & params
1412 if (!this->with_pset)
1413 throw std::logic_error(
"Sampling is only available when store_pset() is active.");
1415 if (
i >= arrays2support.size())
1416 throw std::range_error(
"The requested support is out of range");
1419 size_t a = arrays2support[
i];
1422 std::uniform_real_distribution<> urand(0, 1);
1423 double r = urand(*rengine);
1424 double cumprob = 0.0;
1427 if (pset_probs.size() == 0u)
1428 update_pset_probs(params, 1u,
static_cast<int>(a));
1436 const double * probs = &pset_probs[pset_locations[a]];
1438 cumprob += *(probs +
j++);
1445 update_pset_probs(params, 1u,
static_cast<int>(a));
1447 const double * probs = &pset_probs[pset_locations[a]];
1449 cumprob += *(probs +
j++);
1455 if (
j > pset_arrays.at(a).size())
1456 throw std::logic_error(
1458 "Something went wrong when sampling from a different set of.") +
1459 std::string(
"parameters. Please report this bug: ") +
1460 std::string(
" cumprob: ") + std::to_string(cumprob) +
1461 std::string(
" r: ") + std::to_string(r)
1468 return this->pset_arrays.at(a).at(
j);
1470 return this->pset_arrays[a][
j];
1475 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1477 const Array_Type & Array_,
1478 const std::vector<double> & params
1482 if (!this->with_pset)
1483 throw std::logic_error(
"Sampling is only available when store_pset() is active.");
1489 std::vector< double > key =
counters->gen_hash(Array_);
1491 if (locator == keys2support.end())
1493 size_t stats_support_size = stats_support.size();
1496 keys2support[key] = stats_support_sizes.size();
1497 stats_support_n_arrays.push_back(1u);
1498 arrays2support.push_back(stats_support_sizes.size());
1501 support_fun.reset_array(Array_);
1509 size_t pset_stats_size = pset_stats.size();
1512 pset_arrays.resize(pset_arrays.size() + 1u);
1520 &(pset_arrays[pset_arrays.size() - 1u]),
1525 catch (
const std::exception& e)
1529 "A problem ocurred while trying to add the array (and recording the powerset). "
1532 throw std::logic_error(
"");
1537 pset_locations.push_back(
1538 pset_locations.size() == 0u ?
1540 pset_locations.back() + pset_sizes.back()
1543 pset_sizes.push_back(
1544 (pset_stats.size() - pset_stats_size) / (counter_fun.size())
1548 pset_probs.resize(pset_probs.size() + pset_sizes.back());
1557 if (transform_model_fun)
1559 auto tmpsupport = support_fun.get_counts();
1560 size_t k = counter_fun.size();
1561 size_t n = tmpsupport.size() / (k + 1);
1563 std::vector< double > s_new(0u);
1564 s_new.reserve(tmpsupport.size());
1566 for (
size_t i = 0u;
i < n; ++
i)
1570 s_new.push_back(tmpsupport[
i * (k + 1u)]);
1573 auto res = transform_model_fun(&tmpsupport[
i * (k + 1u) + 1u], k);
1574 std::copy(
res.begin(),
res.end(), std::back_inserter(s_new));
1578 for (
auto & s : s_new)
1579 stats_support.push_back(s);
1583 for (
auto & s : support_fun.get_counts())
1584 stats_support.push_back(s);
1591 params_last.push_back(stats_target[0u]);
1592 normalizing_constants.push_back(0.0);
1593 first_calc_done.push_back(
false);
1596 if (stats_support_sizes.size() == 0u)
1598 stats_support_sizes_acc.push_back(0u);
1600 stats_support_sizes_acc.push_back(
1601 stats_support_sizes.back() +
1602 stats_support_sizes_acc.back()
1607 stats_support_sizes.push_back(
1609 (stats_support.size() - stats_support_size)/
1610 (counter_fun.size() + 1u)
1615 i = arrays2support.size() - 1u;
1618 i = locator->second;
1621 size_t a = arrays2support[
i];
1624 std::uniform_real_distribution<> urand(0, 1);
1625 double r = urand(*rengine);
1626 double cumprob = 0.0;
1628 size_t k = params.size();
1632 double * probs = &pset_probs[ pset_locations[a] ];
1638 cumprob += *(probs +
j++);
1646 std::vector< double > temp_stats(params.size());
1647 const double * stats = &pset_stats[pset_locations[a] * k];
1650 for (
size_t array = 0u; array < pset_sizes[a]; ++array)
1654 for (
auto p = 0u; p < params.size(); ++p)
1655 temp_stats[p] = stats[array * k + p];
1657 *(probs + array) = this->likelihood(params, temp_stats,
i,
false);
1658 cumprob += *(probs + array);
1660 if (i_matches == -1 && cumprob >= r)
1666 throw std::logic_error(
1668 "Something went wrong when sampling from a different set of.") +
1669 std::string(
"parameters. Please report this bug: ") +
1670 std::string(
" cumprob: ") + std::to_string(cumprob) +
1671 std::string(
" r: ") + std::to_string(r)
1676 first_calc_done[a] =
true;
1681 return this->pset_arrays.at(a).at(
j);
1683 return this->pset_arrays[a][
j];
1688 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1690 const Array_Type & Array_,
1691 const std::vector< double > & params,
1697 Array_Type A(Array_,
true);
1700 A.insert_cell(
i,
j, A.default_val(),
true,
false);
1703 std::vector< double > tmp_counts;
1704 tmp_counts.reserve(
counters->size());
1705 for (
size_t ii = 0u; ii <
counters->size(); ++ii)
1706 tmp_counts.push_back(
counters->operator[](ii).count(A,
i,
j));
1710 if (transform_model_fun)
1711 tmp_counts = transform_model_fun(&tmp_counts[0u], tmp_counts.size());
1714 (1.0 + std::exp(-vec_inner_prod<double>(
1715 ¶ms[0u], &tmp_counts[0u], params.size()
1721 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1723 return this->rengine;
1726 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1731 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1736 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1738 return this->rules_dyn;
1741 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1744 return &this->support_fun;
1747 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1750 return &stats_target;
1753 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1754 inline std::vector< double > *
1757 return &stats_support;
1761 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1762 inline std::vector< size_t > *
1765 return &stats_support_sizes;
1769 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1770 inline std::vector< size_t > *
1773 return &stats_support_sizes_acc;
1776 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1777 inline std::vector< size_t > *
1780 return &arrays2support;
1783 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1784 inline std::vector< std::vector< Array_Type > > *
1786 return &pset_arrays;
1789 template <
typename Array_Type,
typename Data_Counter_Type,
typename Data_Rule_Type,
typename Data_Rule_Dyn_Type>
1790 inline std::vector<double> *
1796 typename Array_Type,
1797 typename Data_Counter_Type,
1798 typename Data_Rule_Type,
1799 typename Data_Rule_Dyn_Type
1801 inline std::vector<double> *
1807 typename Array_Type,
1808 typename Data_Counter_Type,
1809 typename Data_Rule_Type,
1810 typename Data_Rule_Dyn_Type
1818 typename Array_Type,
1819 typename Data_Counter_Type,
1820 typename Data_Rule_Type,
1821 typename Data_Rule_Dyn_Type
1825 return &pset_locations;
1829 typename Array_Type,
1830 typename Data_Counter_Type,
1831 typename Data_Rule_Type,
1832 typename Data_Rule_Dyn_Type
1836 std::function<std::vector<double>(
double *,
size_t)>
fun,
1837 std::vector< std::string > names
1841 if (transform_model_fun)
1842 throw std::logic_error(
"A transformation function for the model has already been established.");
1844 transform_model_fun =
fun;
1845 transform_model_term_names = names;
1849 auto stats_support_old = stats_support;
1852 for (
size_t nsupport = 0u; nsupport < stats_support_sizes.size(); ++nsupport)
1856 size_t n = stats_support_sizes[nsupport];
1859 for (
size_t i = 0;
i < n; ++
i)
1863 auto res = transform_model_fun(
1865 stats_support_sizes_acc[nsupport] * (k + 1u) +
1871 if (
res.size() != transform_model_term_names.size())
1872 throw std::length_error(
1873 std::string(
"The transform vector from -transform_model_fun- ") +
1874 std::string(
" does not match the size of ") +
1875 std::string(
"-transform_model_term_names-.")
1880 if ((nsupport == 0u) && (
i == 0u) && (
res.size() != k))
1881 stats_support.resize(
1882 (
res.size() + 1) * (
1883 stats_support_sizes_acc.back() +
1884 stats_support_sizes.back()
1890 stats_support_sizes_acc[nsupport] * (
res.size() + 1u) +
1891 (
res.size() + 1u) *
i
1892 ] = stats_support_old[
1893 stats_support_sizes_acc[nsupport] * (k + 1u) +
1898 for (
size_t j = 0u;
j <
res.size(); ++
j)
1900 stats_support_sizes_acc[nsupport] * (
res.size() + 1u) +
1901 (
res.size() + 1u) *
i +
j + 1u
1909 for (
auto & s : stats_target)
1910 s = transform_model_fun(&s[0u], k);
1917 for (
auto s = 0u; s < pset_arrays.size(); ++s)
1919 std::vector< double > new_stats;
1920 size_t pset_stats_loc = pset_locations[s] * k;
1922 for (
auto a = 0u; a < pset_arrays[s].size(); ++a)
1925 auto tmpstats = transform_model_fun(
1926 &pset_stats[pset_stats_loc + a * k], k
1930 for (
auto p = 0u; p < k; ++p)
1931 new_stats.push_back(tmpstats[p]);
1935 for (
size_t stat = 0u; stat < new_stats.size(); ++stat)
1936 pset_stats[pset_stats_loc + stat] = new_stats[stat];
1943 for (
auto & p : params_last)
1944 p.resize(transform_model_term_names.size());
1950 #undef MODEL_TEMPLATE
1951 #undef MODEL_TEMPLATE_ARGS
#define BARRY_NCORES_ARG(default)
A counter function based on change statistics.
General framework for discrete exponential models. This class allows generating discrete exponential ...
size_t size_unique() const noexcept
std::vector< std::string > transform_model_term_names
const std::mt19937 * get_rengine() const
void update_normalizing_constants(const std::vector< double > ¶ms, BARRY_NCORES_ARG(=1), int i=-1)
Computes the normalizing constant for a given set of parameters.
std::vector< size_t > * get_stats_support_sizes_acc()
Accumulated number of vectors included in the support.
void update_pset_probs(const std::vector< double > ¶ms, BARRY_NCORES_ARG(=1), int i=-1)
Rules< Array_Type, Data_Rule_Dyn_Type > * rules_dyn
std::vector< size_t > pset_sizes
Number of vectors included in the support.
Rules< Array_Type, Data_Rule_Type > * rules
std::vector< double > * get_pset_stats()
Statistics of the support(s)
StatsCounter< Array_Type, Data_Counter_Type > counter_fun
size_t size() const noexcept
void add_counter(Counter< Array_Type, Data_Counter_Type > &counter)
Counters< Array_Type, Data_Counter_Type > * get_counters()
std::vector< std::vector< double > > stats_target
Target statistics of the model.
std::vector< double > pset_probs
Probabilities of the support(s)
Array_Type sample(const Array_Type &Array_, const std::vector< double > ¶ms={})
void add_rule_dyn(Rule< Array_Type, Data_Rule_Dyn_Type > &rule)
std::vector< double > stats_likelihood
std::vector< double > stats_support
Sufficient statistics of the model (support)
void store_psets() noexcept
double conditional_prob(const Array_Type &Array_, const std::vector< double > ¶ms, size_t i, size_t j)
Conditional probability ("Gibbs sampler")
Model< Array_Type, Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_Type > & operator=(const Model< Array_Type, Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_Type > &Model_)
std::vector< std::vector< double > > * get_stats_target()
Raw pointers to the support and target statistics.
std::vector< std::vector< Array_Type > > * get_pset_arrays()
Support< Array_Type, Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_Type > * get_support_fun()
std::vector< double > gen_key(const Array_Type &Array_)
std::vector< size_t > arrays2support
size_t add_array(const Array_Type &Array_, bool force_new=false)
Adds an array to the support of not already included.
void add_hasher(Hasher_fun_type< Array_Type, Data_Counter_Type > fun_)
std::vector< std::string > colnames() const
double likelihood(const std::vector< double > ¶ms, const size_t &i, bool as_log=false, bool no_update_normconst=false)
std::vector< size_t > stats_support_sizes_acc
Accumulated number of vectors included in the support.
std::vector< double > * get_stats_support()
Sufficient statistics of the support(s)
void set_rules(Rules< Array_Type, Data_Rule_Type > *rules_)
size_t nrules() const noexcept
std::vector< size_t > * get_pset_locations()
const std::vector< double > & get_normalizing_constants() const
std::vector< size_t > stats_support_n_arrays
Number of arrays included per support.
void print_stats(size_t i) const
size_t nterms() const noexcept
double likelihood_total(const std::vector< double > ¶ms, bool as_log=false, BARRY_NCORES_ARG(=2), bool no_update_normconst=false)
Rules< Array_Type, Data_Rule_Type > * get_rules()
void set_counters(Counters< Array_Type, Data_Counter_Type > *counters_)
size_t support_size() const noexcept
size_t nrules_dyn() const noexcept
std::vector< std::vector< double > > params_last
Vector of the previously used parameters.
virtual void print() const
Prints information about the model.
Support< Array_Type, Data_Counter_Type, Data_Rule_Type, Data_Rule_Dyn_Type > support_fun
void set_rules_dyn(Rules< Array_Type, Data_Rule_Dyn_Type > *rules_)
std::vector< std::vector< Array_Type > > pset_arrays
Arrays of the support(s)
std::function< std::vector< double >double *, size_t k)> transform_model_fun
Transformation of the model.
std::vector< size_t > * get_arrays2support()
std::vector< double > * get_pset_probs()
std::vector< size_t > * get_pset_sizes()
MapVec_type< double, size_t > keys2support
Map of types of arrays to support sets.
std::vector< size_t > stats_support_sizes
Number of vectors included in the support.
std::vector< double > normalizing_constants
std::vector< size_t > * get_stats_support_sizes()
Number of vectors included in the support.
std::vector< double > pset_stats
Statistics of the support(s)
std::vector< size_t > pset_locations
Accumulated number of vectors included in the support.
std::vector< bool > first_calc_done
const std::vector< double > & get_likelihoods() const
void update_likelihoods(const std::vector< double > ¶ms,)
void set_transform_model(std::function< std::vector< double >(double *, size_t)> fun, std::vector< std::string > names)
Set the transform_model_fun object.
Counters< Array_Type, Data_Counter_Type > * counters
const std::vector< Array_Type > * get_pset(const size_t &i)
void add_rule(Rule< Array_Type, Data_Rule_Type > &rule)
Rules< Array_Type, Data_Rule_Dyn_Type > * get_rules_dyn()
Rule for determining if a cell should be included in a sequence.
Vector of objects of class Rule.
Count stats for a single Array.
void set_counters(Counters< Array_Type, Data_Type > *counters_)
std::vector< double > count_all()
Compute the support of sufficient statistics.
Data_Type Counter_fun_type< Array_Type, Data_Type > init_fun_
Data_Type &&counter_ noexcept
Data_Type Counter_fun_type< Array_Type, Data_Type > Hasher_fun_type< Array_Type, Data_Type > Data_Type data_
double likelihood_(const double *stats_target, const std::vector< double > ¶ms, const double normalizing_constant, size_t n_params, bool log_=false)
double update_normalizing_constant(const std::vector< double > ¶ms, const double *support, size_t k, size_t n)
std::unordered_map< std::vector< Ta >, Tb, vecHasher< Ta > > MapVec_type
std::function< bool(const Array_Type &, size_t, size_t, Data_Type &)> Rule_fun_type
std::function< std::vector< double >(const Array_Type &, Data_Type *)> Hasher_fun_type
Hasher function used by the counter.
T vec_inner_prod(const T *a, const T *b, size_t n)
std::function< double(const Array_Type &, size_t, size_t, Data_Type &)> Counter_fun_type
Counter and rule functions.
bool vec_equal_approx(const std::vector< T > &a, const std::vector< T > &b, double eps=1e-100)