barry: Your go-to motif accountant  0.0-1
Full enumeration of sample space and fast count of sufficient statistics for binary arrays
geese-bones.hpp
Go to the documentation of this file.
1 #ifndef GEESE_BONES_HPP
2 #define GEESE_BONES_HPP 1
3 
4 // #include <vector>
5 // #include <algorithm>
6 // #include <random>
7 // #include <stdexcept>
8 
9 template<typename Ta, typename Tb>
10 inline std::vector< Ta > vector_caster(const std::vector< Tb > & x) {
11 
12  std::vector< Ta > ans;
13  ans.reserve(x.size());
14 
15  for (auto i = x.begin(); i != x.end(); ++i)
16  ans.push_back(static_cast< Ta >(*i));
17 
18  return ans;
19 
20 }
21 
22 #define INITIALIZED() if (!this->initialized) \
23  throw std::logic_error("The model has not been initialized yet.");
24 
25 // The same need to be locked
26 RULE_FUNCTION(rule_empty_free) {
27 
28  return Array(i, j) == 9u;
29 
30 }
31 
32 
33 
34 // Hasher
35 
36 inline std::vector< double > keygen_full(
37  const PhyloArray & array,
38  const PhyloCounterData * d
39  ) {
40 
41  // Baseline data: nrows and columns
42  std::vector< double > dat = {
43  static_cast<double>(array.nrow()) * 100000 +
44  static_cast<double>(array.ncol()),
45  // state of the parent
46  1000000.0,
47  // type of the parent
48  array.D_ptr()->duplication ? 1.0 : 0.0,
49  // Annotations with zeros
50  0.0
51  };
52 
53  // State of the parent
54  double pow10 = 1.0;
55  for (bool i : array.D_ptr()->states) {
56  dat[1u] += (i ? 1.0 : 0.0) * pow10;
57  pow10 *= 10.0;
58  }
59 
60  // // Annotations with zeros
61  // pow10 = 1.0;
62  // for (const auto & cell: array.get_data()) {
63  // dat[3u] += (cell == 9u ? 2.0 : static_cast<double>(cell)) * pow10;
64  // pow10 *= 10.0;
65  // }
66 
67  return dat;
68 
69 }
70 
71 inline bool vec_diff(
72  const std::vector< size_t > & s,
73  const std::vector< size_t > & a
74 ) {
75 
76  for (size_t i = 0u; i < a.size(); ++i)
77  if ((a[i] != 9u) && (a[i] != s[i]))
78  return true;
79 
80  return false;
81 }
82 
83 class Flock;
84 
114 class Geese {
115  friend Flock;
116 private:
117 
129  std::mt19937 * rengine = nullptr;
130  PhyloModel * model = nullptr;
131  std::vector< std::vector< bool > > states;
132  size_t n_zeros = 0u;
133  size_t n_ones = 0u;
134  size_t n_dupl_events = 0u;
135  size_t n_spec_events = 0u;
137 
138 public:
139 
140  // Data
141  size_t nfunctions;
142  std::map< size_t, Node > nodes;
143 
144  barry::MapVec_type< size_t > map_to_state_id;
145  std::vector< std::vector< std::vector< size_t > > > pset_loc;
146 
147  // Tree-traversal sequence
148  std::vector< size_t > sequence;
149  std::vector< size_t > reduced_sequence;
150 
151  // Admin-related objects
152  bool initialized = false;
153  bool delete_rengine = false;
154  bool delete_support = false;
155 
156  // Information about the type of event
157 
158  /***
159  * @name Information about the type of event
160  * @details
161  * The type of event is stored in the `etype` member. The possible values
162  * are `etype_default`, `etype_speciation`, `etype_duplication`, and
163  * `etype_either`.
164  *
165  */
167  static const size_t etype_default = 1ul;
168  static const size_t etype_speciation = 0ul;
169  static const size_t etype_duplication = 1ul;
170  static const size_t etype_either = 2ul;
172 
193  Geese();
194 
195  Geese(
196  std::vector< std::vector<size_t> > & annotations,
197  std::vector< size_t > & geneid,
198  std::vector< int > & parent,
199  std::vector< bool > & duplication
200  );
201 
202  // Copy constructor
203  Geese(const Geese & model_, bool copy_data = true);
204 
205  // Constructor move
206  Geese(Geese && x) noexcept;
207 
208  // Copy assignment
209  Geese & operator=(const Geese & model_) = delete;
210 
211  // // Move assignment
212  Geese & operator=(Geese && model_) noexcept = delete;
213 
215 
216  ~Geese();
217 
218  void init(size_t bar_width = BARRY_PROGRESS_BAR_WIDTH);
219 
220  void inherit_support(const Geese & model_, bool delete_support_ = false);
221 
222  // Node * operator()(size_t & nodeid);
223  void calc_sequence(Node * n = nullptr);
224  void calc_reduced_sequence();
225 
226  double likelihood(
227  const std::vector< double > & par,
228  bool as_log = false,
229  bool use_reduced_sequence = true,
230  size_t ncores = 1u,
231  bool no_update_pset_probs = false
232  );
233 
234  double likelihood_exhaust(const std::vector< double > & par);
235 
236  std::vector< double > get_probabilities() const;
237 
238  void set_seed(const size_t & s);
239  std::vector< std::vector< size_t > > simulate(
240  const std::vector< double > & par
241  );
242 
249  size_t nfuns() const noexcept;
250  size_t nnodes() const noexcept;
251  size_t nleafs() const noexcept;
252  size_t nterms() const;
253  size_t support_size() const noexcept;
254  std::vector< size_t > nannotations() const noexcept;
255  std::vector< std::string > colnames() const;
256  size_t parse_polytomies(
257  bool verb = true,
258  std::vector< size_t > * dist = nullptr
259  ) const noexcept;
260 
262 
263  std::vector< std::vector<double> > observed_counts();
264  void print_observed_counts();
265 
269  void print() const;
270  void print_nodes() const;
271 
272 
294  std::vector< std::vector< double > > predict(
295  const std::vector< double > & par,
296  std::vector< std::vector< double > > * res_prob = nullptr,
297  bool leave_one_out = false,
298  bool only_annotated = false,
299  bool use_reduced_sequence = true
300  );
301 
302  std::vector< std::vector<double> > predict_backend(
303  const std::vector< double > & par,
304  bool use_reduced_sequence,
305  const std::vector< size_t > & preorder
306  );
307 
308  std::vector< std::vector< double > > predict_exhaust_backend(
309  const std::vector< double > & par,
310  const std::vector< size_t > & preorder
311  );
312 
313  std::vector< std::vector< double > > predict_exhaust(
314  const std::vector< double > & par
315  );
316 
317  std::vector< std::vector< double > > predict_sim(
318  const std::vector< double > & par,
319  bool only_annotated = false,
320  size_t nsims = 10000u
321  );
323 
324  void init_node(Node & n);
325  void update_annotations(
326  size_t nodeid,
327  std::vector< size_t > newann
328  );
329 
342  std::mt19937 * get_rengine();
344  PhyloModel * get_model();
347 
356  std::vector< std::vector< bool > > get_states() const;
357  std::vector< size_t > get_annotated_nodes() const;
358  std::vector< size_t > get_annotations() const;
359 
360 };
361 
362 #endif
A Flock is a group of Geese.
Definition: flock-bones.hpp:14
Annotated Phylo Model.
std::vector< size_t > reduced_sequence
std::vector< std::vector< bool > > get_states() const
Powerset of a gene's possible states.
Definition: geese-meat.hpp:767
std::vector< size_t > sequence
void calc_reduced_sequence()
Definition: geese-meat.hpp:361
static const size_t etype_either
std::vector< std::vector< std::vector< size_t > > > pset_loc
Locations of columns.
std::vector< std::vector< double > > predict(const std::vector< double > &par, std::vector< std::vector< double > > *res_prob=nullptr, bool leave_one_out=false, bool only_annotated=false, bool use_reduced_sequence=true)
std::vector< size_t > get_annotations() const
Returns the annotations of the nodes with at least one annotation.
Definition: geese-meat.hpp:794
void print() const
Prints information about the GEESE.
Definition: geese-meat.hpp:658
bool delete_rengine
PhyloSupport * get_support_fun()
Definition: geese-meat.hpp:763
PhyloCounters * get_counters()
Definition: geese-meat.hpp:754
size_t support_size() const noexcept
Number of unique sets of sufficient stats.
Definition: geese-meat.hpp:463
void init_node(Node &n)
Definition: geese-meat.hpp:6
void print_observed_counts()
Definition: geese-meat.hpp:595
size_t nfunctions
size_t parse_polytomies(bool verb=true, std::vector< size_t > *dist=nullptr) const noexcept
Check polytomies and return the largest.
Definition: geese-meat.hpp:489
std::vector< std::string > colnames() const
Names of the terms in the model.
Definition: geese-meat.hpp:482
void init(size_t bar_width=BARRY_PROGRESS_BAR_WIDTH)
Definition: geese-meat.hpp:131
size_t nnodes() const noexcept
Number of nodes (interior + leaf)
Definition: geese-meat.hpp:437
Geese & operator=(Geese &&model_) noexcept=delete
std::vector< double > get_probabilities() const
Definition: geese-meat.hpp:409
std::vector< std::vector< double > > predict_sim(const std::vector< double > &par, bool only_annotated=false, size_t nsims=10000u)
PhyloModel * get_model()
Definition: geese-meat.hpp:759
double likelihood(const std::vector< double > &par, bool as_log=false, bool use_reduced_sequence=true, size_t ncores=1u, bool no_update_pset_probs=false)
std::vector< std::vector< double > > observed_counts()
Definition: geese-meat.hpp:524
Geese & operator=(const Geese &model_)=delete
size_t nleafs() const noexcept
Number of leaf.
Definition: geese-meat.hpp:444
std::vector< std::vector< double > > predict_exhaust_backend(const std::vector< double > &par, const std::vector< size_t > &preorder)
bool delete_support
static const size_t etype_duplication
bool initialized
void calc_sequence(Node *n=nullptr)
Definition: geese-meat.hpp:317
void inherit_support(const Geese &model_, bool delete_support_=false)
Definition: geese-meat.hpp:260
std::vector< size_t > get_annotated_nodes() const
Returns the ids of the nodes with at least one annotation.
Definition: geese-meat.hpp:771
std::vector< std::vector< size_t > > simulate(const std::vector< double > &par)
std::vector< size_t > nannotations() const noexcept
Number of annotations.
Definition: geese-meat.hpp:473
void print_nodes() const
Definition: geese-meat.hpp:676
void update_annotations(size_t nodeid, std::vector< size_t > newann)
Definition: geese-meat.hpp:288
std::vector< std::vector< double > > predict_exhaust(const std::vector< double > &par)
std::map< size_t, Node > nodes
static const size_t etype_speciation
std::vector< std::vector< double > > predict_backend(const std::vector< double > &par, bool use_reduced_sequence, const std::vector< size_t > &preorder)
std::mt19937 * get_rengine()
Definition: geese-meat.hpp:749
barry::MapVec_type< size_t > map_to_state_id
size_t nterms() const
Number of terms included.
Definition: geese-meat.hpp:456
size_t nfuns() const noexcept
Number of functions analyzed.
Definition: geese-meat.hpp:430
void set_seed(const size_t &s)
double likelihood_exhaust(const std::vector< double > &par)
static const size_t etype_default
A single node for the model.
size_t size_t j
size_t i
Data_Type &&counter_ noexcept
RULE_FUNCTION(rule_empty_free)
Definition: geese-bones.hpp:26
std::vector< Ta > vector_caster(const std::vector< Tb > &x)
Definition: geese-bones.hpp:10
std::vector< double > keygen_full(const PhyloArray &array, const PhyloCounterData *d)
Definition: geese-bones.hpp:36
bool vec_diff(const std::vector< size_t > &s, const std::vector< size_t > &a)
Definition: geese-bones.hpp:71
barry::Counters< PhyloArray, PhyloCounterData > PhyloCounters
barry::Model< PhyloArray, PhyloCounterData, PhyloRuleData, PhyloRuleDynData > PhyloModel
barry::BArrayDense< size_t, NodeData > PhyloArray
barry::Support< PhyloArray, PhyloCounterData, PhyloRuleData, PhyloRuleDynData > PhyloSupport
#define BARRY_PROGRESS_BAR_WIDTH
Definition: progress.hpp:5