barry: Your go-to motif accountant  0.0-1
Full enumeration of sample space and fast count of sufficient statistics for binary arrays
counters.hpp
Go to the documentation of this file.
1 #ifndef BARRAY_DEFM_H
2 #define BARRAY_DEFM_H 1
3 
4 #include "formula.hpp"
5 
13 
14 
15 
16 
17 
19 
27 #define MAKE_DEFM_HASHER(hasher,a,cov) \
28  barry::Hasher_fun_type<DEFMArray, DEFMCounterData> \
29  hasher = [cov](const DEFMArray & array, DEFMCounterData * d) -> \
30  std::vector< double > { \
31  std::vector< double > res; \
32  /* Adding the column feature */ \
33  for (size_t i = 0u; i < array.nrow(); ++i) \
34  res.push_back(array.D()(i, cov)); \
35  /* Adding the fixed dims */ \
36  for (size_t i = 0u; i < (array.nrow() - 1); ++i) \
37  for (size_t j = 0u; j < array.ncol(); ++j) \
38  res.push_back(array(i, j)); \
39  return res;\
40  };
41 
42 
46 
47 #define DEFM_COUNTER(a) \
48 inline double (a) (const DEFMArray & Array, size_t i, size_t j, DEFMCounterData & data)
49 
51 #define DEFM_COUNTER_LAMBDA(a) \
52 barry::Counter_fun_type<DEFMArray, DEFMCounterData> a = \
53  [](const DEFMArray & Array, size_t i, size_t j, DEFMCounterData & data) -> double
54 
56 
60 
61 #define DEFM_RULE(a) \
62 inline bool (a) (const DEFMArray & Array, size_t i, size_t j, bool & data)
63 
65 #define DEFM_RULE_LAMBDA(a) \
66 barry::Rule_fun_type<DEFMArray, DEFMRuleData> a = \
67 [](const DEFMArray & Array, size_t i, size_t j, DEFMRuleData & data) -> bool
69 
71 #define DEFM_RULEDYN_LAMBDA(a) \
72 barry::Rule_fun_type<DEFMArray, DEFMRuleDynData> a = \
73 [](const DEFMArray & Array, size_t i, size_t j, DEFMRuleDynData & data) -> bool
75 
82 // -----------------------------------------------------------------------------
89 inline void counter_ones(
91  int covar_index = -1,
92  std::string vname = "",
93  const std::vector< std::string > * x_names = nullptr
94 )
95 {
96 
97  // Weighted by a feature of the array
98  if (covar_index >= 0)
99  {
100 
101  MAKE_DEFM_HASHER(hasher, array, covar_index)
102 
103  DEFM_COUNTER_LAMBDA(counter_tmp)
104  {
105 
106  // Only count the current
107  if (i != (Array.nrow() - 1))
108  return 0.0;
109 
110  return Array.D()(i, data.idx(0u));
111 
112  };
113 
114  if (vname == "")
115  {
116  if (x_names != nullptr)
117  vname = x_names->operator[](covar_index);
118  else
119  vname = std::string("attr")+ std::to_string(covar_index);
120  }
121 
122  counters->add_counter(
123  counter_tmp, nullptr, hasher,
124  DEFMCounterData({static_cast<size_t>(covar_index)}, {}, {}, true),
125  "Num. of ones x " + vname,
126  "Overall number of ones"
127  );
128 
129  }
130  else
131  {
132 
133  DEFM_COUNTER_LAMBDA(count_ones)
134  {
135 
136  // Only count the current
137  if (i != (Array.nrow() - 1))
138  return 0.0;
139 
140  return 1.0;
141  };
142 
143  DEFMCounterData dat;
144  dat.is_motif = true;
145 
146  counters->add_counter(
147  count_ones, nullptr, nullptr,
148  dat, // DEFMCounterData(),
149  "Num. of ones",
150  "Overall number of ones"
151  );
152  }
153 
154  return;
155 
156 }
157 
158 
172  size_t n_y,
173  std::vector< size_t > which = {},
174  int covar_index = -1,
175  std::string vname = "",
176  const std::vector< std::string > * x_names = nullptr,
177  const std::vector< std::string > * y_names = nullptr
178 ) {
179 
180 
181  if (which.size() == 0u)
182  {
183  which.resize(n_y, 0u);
184  std::iota(which.begin(), which.end(), 0u);
185  } else {
186  for (auto w : which)
187  if (w >= n_y)
188  throw std::logic_error("Values in `which` are out of range.");
189  }
190 
191  // Case when no interaction happens, whatsoever.
192  if (covar_index < 0)
193  {
194 
195  DEFM_COUNTER_LAMBDA(tmp_counter)
196  {
197  if (i != (Array.nrow() - 1))
198  return 0.0;
199 
200  if (j != data.idx(0u))
201  return 0.0;
202 
203  return 1.0;
204  };
205 
206  for (auto i : which)
207  {
208 
209  if (y_names != nullptr)
210  vname = y_names->operator[](i);
211  else
212  vname = std::to_string(i);
213 
214  counters->add_counter(
215  tmp_counter, nullptr, nullptr,
216  DEFMCounterData({i}, {}, {}, false),
217  "Logit intercept " + vname,
218  "Equal to one if the outcome " + vname + " is one. Equivalent to the logistic regression intercept."
219  );
220 
221  }
222 
223  } else {
224 
225  DEFM_COUNTER_LAMBDA(tmp_counter)
226  {
227  if (i != Array.nrow() - 1)
228  return 0.0;
229 
230  if (j != data.idx(0u))
231  return 0.0;
232 
233  return Array.D()(i, data.idx(1u));
234  };
235 
236  MAKE_DEFM_HASHER(hasher, array, covar_index)
237  bool hasher_added = false;
238 
239  std::string yname;
240  for (auto i : which)
241  {
242 
243  if (y_names != nullptr)
244  yname = y_names->operator[](i);
245  else
246  yname = std::to_string(i);
247 
248  if (vname == "")
249  {
250  if (x_names != nullptr)
251  vname = x_names->operator[](covar_index);
252  else
253  vname = std::string("attr")+ std::to_string(covar_index);
254  }
255 
256  if (hasher_added)
257  counters->add_counter(
258  tmp_counter, nullptr, nullptr,
259  DEFMCounterData({i, static_cast<size_t>(covar_index)}, {}, {}, false),
260  "Logit intercept " + yname + " x " + vname,
261  "Equal to one if the outcome " + yname + " is one. Equivalent to the logistic regression intercept."
262  );
263  else {
264 
265  hasher_added = true;
266 
267  counters->add_counter(
268  tmp_counter, nullptr, hasher,
269  DEFMCounterData({i, static_cast<size_t>(covar_index)}, {}, {}, false),
270  "Logit intercept " + yname + " x " + vname,
271  "Equal to one if the outcome " + yname + " is one. Equivalent to the logistic regression intercept."
272  );
273 
274  }
275 
276  }
277 
278  }
279 
280 
281 }
282 
289 inline void counter_transition(
291  std::vector< size_t > coords,
292  std::vector< bool > signs,
293  size_t m_order,
294  size_t n_y,
295  int covar_index = -1,
296  std::string vname = "",
297  const std::vector< std::string > * x_names = nullptr,
298  const std::vector< std::string > * y_names = nullptr
299 )
300 {
301 
302  // A vector to store the type of dat
303  if (signs.size() == 0u)
304  signs.resize(coords.size(), true);
305  else if (signs.size() != coords.size())
306  throw std::length_error("Size of -coords- and -signs- must match.");
307 
308  if (covar_index >= 0)
309  coords.push_back(static_cast<size_t>(covar_index));
310  else
311  coords.push_back(1000u);
312 
313  DEFM_COUNTER_LAMBDA(count_init)
314  {
315 
316  auto indices = data.indices;
317  auto sgn = data.logical;
318  int covaridx = indices[indices.size() - 1u];
319 
320  // Notice that the indices vector contains:
321  // - 1st, the indices of the motif. That's why we set the lenght
322  // using -1.
323  // - the last is, the covariate index
324  for (size_t k = 0u; k < (indices.size() - 1u); ++k)
325  {
326  if (indices[k] >= (Array.ncol()* Array.nrow()))
327  throw std::range_error("The motif includes entries out of range.");
328  }
329 
330  // Counting
331  const auto & array = Array.get_data();
332  for (size_t k = 0u; k < (indices.size() - 1); ++k)
333  {
334  auto cellv = array[indices[k]];
335  if (sgn[k] && (cellv != 1))
336  return 0.0;
337  }
338 
339  // If nothing happens, then is one or the covaridx
340  return (covaridx < 1000) ? Array.D()(Array.nrow() - 1u, covaridx) : 1.0;
341 
342  };
343 
344  DEFM_COUNTER_LAMBDA(count_ones)
345  {
346 
347  auto dat = data.indices;
348  auto sgn = data.logical;
349  int covaridx = dat[dat.size() - 1u];
350 
351  // Checking if the observation is in the stat. We
352  const auto & array = Array.get_data();
353  size_t loc = i + j * Array.nrow();
354  size_t n_cells = dat.size() - 1u;
355 
356  // Only one currently needs to be a zero for it
357  // to change
358  size_t n_now = 0;
359  bool baseline_value = false;
360  bool i_in_array = false;
361  for (size_t e = 0u; e < n_cells; ++e)
362  {
363 
364  // Is the current cell in the list?
365  if (dat[e] == loc)
366  {
367  i_in_array = true;
368  baseline_value = sgn[e];
369  }
370 
371  if ((sgn[e] && (array[dat[e]] == 1)) || (!sgn[e] && (array[dat[e]] == 0)))
372  n_now++;
373 
374  }
375 
376  // If i in array still false, then no change
377  if (!i_in_array)
378  return 0.0;
379 
380  size_t n_prev = n_now;
381  if (baseline_value)
382  n_prev--;
383  else
384  n_prev++;
385 
386  // Computing stats
387  if (covaridx < 1000)
388  {
389 
390  double val = Array.D()(Array.nrow() - 1u, covaridx);
391  double value_now = n_now == n_cells ? val : 0.0;
392  double value_prev = n_prev == n_cells ? val : 0.0;
393 
394  return value_now - value_prev;
395 
396  }
397  else
398  {
399 
400  double value_now = n_now == n_cells ? 1.0 : 0.0;
401  double value_prev = n_prev == n_cells ? 1.0 : 0.0;
402 
403  return value_now - value_prev;
404 
405  }
406 
407  };
408 
409  // Creating name of the structure
410  std::string name;
411  if (coords.size() == 1u)
412  name = "";
413  else
414  name = "Motif ";
415 
416  // Creating an empty motif filled with zeros
417  barry::BArrayDense<int> motif(m_order + 1u, n_y, 0);
418 
419  // Filling the matrix in, negative values are 0s and 1s are... 1s.
420  // Zero are values not used.
421  size_t n_cells = coords.size() - 1u;
422  for (size_t d = 0u; d < n_cells; ++d)
423  {
424  size_t c = std::floor(coords[d] / (m_order + 1u));
425  size_t r = coords[d] - c * (m_order + 1u);
426  motif(r, c) = signs[d] ? 1 : -1;
427 
428  }
429 
430  // Checking if any prior to the event
431  bool any_before_event = false;
432 
433  for (size_t i = 0u; i < m_order; ++i)
434  {
435  for (size_t j = 0u; j < n_y; ++j)
436  {
437  if (motif(i,j) != 0)
438  {
439  any_before_event = true;
440  break;
441  }
442 
443  }
444  }
445 
446  #ifdef BARRY_WITH_LATEX
447  name += "$";
448  #endif
449 
450  if (any_before_event)
451  #ifdef BARRY_WITH_LATEX
452  name += "(";
453  #else
454  name += "{";
455  #endif
456 
457  #ifdef BARRY_WITH_LATEX
458  #define UNI_SUB(a) \
459  (\
460  ((a) == 0) ? "_0" : (\
461  ((a) == 1) ? "_1" : (\
462  ((a) == 2) ? "_2" : (\
463  ((a) == 3) ? "_3" : (\
464  ((a) == 4) ? "_4" : (\
465  ((a) == 5) ? "_5" : (\
466  ((a) == 6) ? "_6" : (\
467  ((a) == 7) ? "_7" : (\
468  ((a) == 8) ? "_8" : \
469  "_9"))))))))\
470  )
471  #else
472  #define UNI_SUB(a) \
473  (\
474  ((a) == 0) ? u8"\u2080" : (\
475  ((a) == 1) ? u8"\u2081" : (\
476  ((a) == 2) ? u8"\u2082" : (\
477  ((a) == 3) ? u8"\u2083" : (\
478  ((a) == 4) ? u8"\u2084" : (\
479  ((a) == 5) ? u8"\u2085" : (\
480  ((a) == 6) ? u8"\u2086" : (\
481  ((a) == 7) ? u8"\u2087" : (\
482  ((a) == 8) ? u8"\u2088" : \
483  u8"\u2089"))))))))\
484  )
485  #endif
486 
487  // If order is greater than zero, the starting point of the transtion
488  for (size_t i = 0u; i < m_order; ++i)
489  {
490 
491  bool row_start = true;
492  for (size_t j = 0u; j < n_y; ++j)
493  {
494 
495  // Is it included?
496  if (motif(i,j) == 0)
497  continue;
498 
499  // Is not the first?
500  if (row_start)
501  row_start = false;
502  else
503  name += ", ";
504 
505  if (y_names != nullptr)
506  name += y_names->operator[](j);
507  else
508  name += (std::string("y") + std::to_string(j));
509 
510  #ifdef BARRY_WITH_LATEX
511  name += (motif(i,j) < 0 ? "^-" : "^+");
512  #else
513  name += (motif(i,j) < 0 ? u8"\u207B" : u8"\u207A");
514  #endif
515 
516  }
517 
518  }
519 
520  // If it has starting point, then need to close.
521  if (any_before_event & (m_order > 0u))
522  #ifdef BARRY_WITH_LATEX
523  name += ") -> (";
524  #else
525  name += std::string("}") + u8"\u21E8" + std::string("{");
526  #endif
527  else
528  #ifdef BARRY_WITH_LATEX
529  name += "(";
530  #else
531  name += "{";
532  #endif
533 
534  // Looking onto the transtions
535  bool row_start = true;
536  for (size_t j = 0u; j < n_y; ++j)
537  {
538 
539  if (motif(m_order, j) == 0)
540  continue;
541 
542  if (row_start)
543  row_start = false;
544  else
545  name += ", ";
546 
547  if (y_names != nullptr)
548  name += y_names->operator[](j);
549  else
550  name += (std::string("y") + std::to_string(j));
551 
552  #ifdef BARRY_WITH_LATEX
553  name += (motif(m_order, j) < 0 ? "^-" : "^+" );
554  #else
555  name += (motif(m_order, j) < 0 ? u8"\u207B" : u8"\u207A" );
556  #endif
557 
558 
559  }
560 
561  #undef UNI_SUB
562 
563  #ifdef BARRY_WITH_LATEX
564  name += ")$";
565  #else
566  name += "}";
567  #endif
568 
569  if (covar_index >= 0)
570  {
571 
572  MAKE_DEFM_HASHER(hasher, array, covar_index)
573 
574  if (vname == "")
575  {
576  if (x_names != nullptr)
577  vname = x_names->operator[](covar_index);
578  else
579  vname = std::string("attr")+ std::to_string(covar_index);
580  }
581 
582  counters->add_counter(
583  count_ones, count_init, hasher,
584  DEFMCounterData(coords, {}, signs, coords.size() > 1u ? true : false),
585  name + " x " + vname,
586  "Motif weighted by single attribute"
587  );
588 
589  } else {
590 
591  counters->add_counter(
592  count_ones, count_init, nullptr,
593  DEFMCounterData(coords, {}, signs, coords.size() > 1u ? true : false),
594  name,
595  "Motif"
596  );
597 
598  }
599 
600 
601  return;
602 
603 }
604 
613  std::string formula,
614  size_t m_order,
615  size_t n_y,
616  int covar_index = -1,
617  std::string vname = "",
618  const std::vector< std::string > * x_names = nullptr,
619  const std::vector< std::string > * y_names = nullptr
620 ) {
621 
622  std::vector< size_t > coords;
623  std::vector< bool > signs;
624  std::string covar_name = "";
625 
627  formula, coords, signs, m_order, n_y, covar_name, vname
628  );
629 
630  if ((covar_name != "") && (covar_index >= 0))
631  throw std::logic_error("Can't have both a formula and a covariate index.");
632 
633  if (covar_name != "")
634  {
635 
636  if (x_names != nullptr)
637  {
638  for (size_t i = 0u; i < x_names->size(); ++i)
639  if (x_names->operator[](i) == covar_name)
640  {
641  covar_index = static_cast<int>(i);
642  break;
643  }
644  }
645 
646  if (covar_index < 0)
647  throw std::logic_error(
648  std::string("The covariate name '") +
649  covar_name +
650  std::string("' was not found in the list of covariates.")
651  );
652 
653  }
654 
655  // Checking the number of coords, could be single intercept
656  if (coords.size() == 1u)
657  {
658 
659  // Getting the column
660  size_t coord = static_cast< size_t >(
661  std::floor(
662  static_cast<double>(coords[0u]) / static_cast<double>(m_order + 1)
663  ));
664 
666  counters, n_y, {coord},
667  covar_index,
668  vname,
669  x_names,
670  y_names
671  );
672 
673  }
674  else
675  {
676 
678  counters, coords, signs, m_order, n_y, covar_index, vname,
679  x_names, y_names
680  );
681 
682  }
683 
684 
685 }
686 
695  int covar_index,
696  double k,
697  std::string vname = "",
698  const std::vector< std::string > * x_names = nullptr
699 )
700 {
701 
702  DEFM_COUNTER_LAMBDA(count_init)
703  {
704  return std::pow(Array.D()((size_t) i, data.idx(0u)), data.num(0u));
705  };
706 
707  DEFM_COUNTER_LAMBDA(count_tmp)
708  {
709  return 0.0;
710  };
711 
712  MAKE_DEFM_HASHER(hasher, array, covar_index)
713 
714  if (x_names != nullptr)
715  vname = x_names->operator[](covar_index);
716  else
717  vname = std::string("attr")+ std::to_string(covar_index);
718 
719  counters->add_counter(
720  count_tmp, count_init, hasher,
721  DEFMCounterData({static_cast<size_t>(covar_index)}, {k}, {}),
722  "Fixed effect feature (" + vname + ")^" + std::to_string(k)
723  );
724 
725  return;
726 
727 }
728 
734 // -----------------------------------------------------------------------------
736 inline void rules_markov_fixed(
737  DEFMRules * rules,
738  size_t markov_order
739  ) {
740 
741  DEFM_RULE_LAMBDA(no_self_tie) {
742  return i >= data.idx(0u);
743  };
744 
745  rules->add_rule(
746  no_self_tie,
747  DEFMRuleData({},{markov_order}),
748  std::string("Markov model of order ") + std::to_string(markov_order),
749  std::string("Blocks the first morder cells of the array.")
750  );
751 
752  return;
753 }
754 
762  DEFMSupport * support,
763  std::vector<size_t> ids
764  ) {
765 
766  DEFM_RULE_LAMBDA(rule) {
767 
768  if (!data.init)
769  {
770  std::vector< size_t > tmp(Array.ncol(), 0u);
771 
772  for (auto v : data.indices)
773  {
774  if (v >= Array.ncol())
775  throw std::range_error("The specified id for `dont_become_zero` is out of range.");
776 
777  tmp[v] = 1u;
778  }
779 
780  data.indices.resize(Array.ncol());
781  for (size_t v = 0u; v < tmp.size(); ++v)
782  data.indices[v] = tmp[v];
783 
784  data.init = true;
785  }
786 
787  // If not considered, then continue
788  if (data.indices[j] == 0u)
789  return true;
790 
791  // The data outside of the markov chain is checked by other rule
792  if (i != (Array.nrow() - 1))
793  return true;
794 
795  // This is now one, is the next different zero? If so,
796  // we can include it (1->1)
797  return (Array(i - 1, j) != 1) || (Array(i, j) != 1);
798 
799  };
800 
801  support->get_rules()->add_rule(
802  rule,
803  DEFMRuleData({}, {ids}),
804  std::string("Ones can't become zero"),
805  std::string("Blocks cells that have became equal to one.")
806  );
807 
808  return;
809 }
810 
821  DEFMSupport * support,
822  size_t pos,
823  double lb,
824  double ub
825 )
826 {
827 
828  DEFM_RULEDYN_LAMBDA(tmp_rule)
829  {
830 
831  if (data() < data.lb)
832  return false;
833  else if (data() > data.ub)
834  return false;
835  else
836  return true;
837 
838  };
839 
840 
841  support->get_rules_dyn()->add_rule(
842  tmp_rule,
844  support->get_current_stats(),
845  pos, lb, ub
846  ),
847  support->get_counters()->get_names()[pos] +
848  "' within [" + std::to_string(lb) + ", " +
849  std::to_string(ub) + std::string("]"),
850  std::string("When the support is ennumerated, only states where the statistic '") +
851  support->get_counters()->get_names()[pos] +
852  std::to_string(pos) + "' falls within [" + std::to_string(lb) + ", " +
853  std::to_string(ub) + "] are included."
854  );
855 
856  return;
857 
858 }
859 
860 
862 
864 
865 #endif
Data class used to store arbitrary size_t or double vectors.
Definition: defm-types.hpp:66
Data_Type &&counter_ name(std::move(counter_.name))
Data_Type &&counter_ data(std::move(counter_.data))
Data_Type hasher(counter_.hasher)
size_t size_t j
size_t i
#define DEFM_COUNTER_LAMBDA(a)
Definition: counters.hpp:51
#define DEFM_RULE_LAMBDA(a)
Definition: counters.hpp:65
#define DEFM_RULEDYN_LAMBDA(a)
Definition: counters.hpp:71
void defm_motif_parser(std::string formula, std::vector< size_t > &locations, std::vector< bool > &signs, size_t m_order, size_t y_ncol, std::string &covar_name, std::string &vname)
Parses a motif formula.
Definition: formula.hpp:46
void counter_transition(DEFMCounters *counters, std::vector< size_t > coords, std::vector< bool > signs, size_t m_order, size_t n_y, int covar_index=-1, std::string vname="", const std::vector< std::string > *x_names=nullptr, const std::vector< std::string > *y_names=nullptr)
Prevalence of ones.
Definition: counters.hpp:289
void rules_markov_fixed(DEFMRules *rules, size_t markov_order)
Number of edges.
Definition: counters.hpp:736
void counter_ones(DEFMCounters *counters, int covar_index=-1, std::string vname="", const std::vector< std::string > *x_names=nullptr)
Prevalence of ones.
Definition: counters.hpp:89
void counter_fixed_effect(DEFMCounters *counters, int covar_index, double k, std::string vname="", const std::vector< std::string > *x_names=nullptr)
Prevalence of ones.
Definition: counters.hpp:693
void counter_logit_intercept(DEFMCounters *counters, size_t n_y, std::vector< size_t > which={}, int covar_index=-1, std::string vname="", const std::vector< std::string > *x_names=nullptr, const std::vector< std::string > *y_names=nullptr)
Definition: counters.hpp:170
void rule_constrain_support(DEFMSupport *support, size_t pos, double lb, double ub)
Overall functional gains.
Definition: counters.hpp:820
void rules_dont_become_zero(DEFMSupport *support, std::vector< size_t > ids)
Blocks switching a one to zero.
Definition: counters.hpp:761
void counter_transition_formula(DEFMCounters *counters, std::string formula, size_t m_order, size_t n_y, int covar_index=-1, std::string vname="", const std::vector< std::string > *x_names=nullptr, const std::vector< std::string > *y_names=nullptr)
Prevalence of ones.
Definition: counters.hpp:611
#define MAKE_DEFM_HASHER(hasher, a, cov)
Data for the counters.
Definition: counters.hpp:27
bool is_motif
If false, then is a logit intercept.
Definition: defm-types.hpp:72
barry::Counters< DEFMArray, DEFMCounterData > DEFMCounters
Definition: defm-types.hpp:187
barry::Rules< DEFMArray, DEFMRuleData > DEFMRules
Definition: defm-types.hpp:194
barry::Support< DEFMArray, DEFMCounterData, DEFMRuleData, DEFMRuleDynData > DEFMSupport
Definition: defm-types.hpp:188