barry: Your go-to motif accountant  0.0-1
Full enumeration of sample space and fast count of sufficient statistics for binary arrays
geese-meat-predict_exhaust.hpp
Go to the documentation of this file.
1 
2 #ifndef GEESE_MEAT_PREDICT_EXHAUST_HPP
3 #define GEESE_MEAT_PREDICT_EXHAUST_HPP 1
4 
5 inline std::vector< std::vector<double> > Geese::predict_exhaust(
6  const std::vector< double > & par
7 ) {
8 
10 
11  // This is only worthwhile if the number of nodes is small
12  if (this->nnodes() > 6)
13  throw std::overflow_error("Too many nodes! Exhaust calculation of likelihood cannot be done for such cases.");
14 
15  if (this->nfuns() > 2)
16  throw std::overflow_error("Too many functions! Exhaust calculation of prediction cannot be done for such cases.");
17 
18 
19  // Generating the sequence preorder sequence -------------------------------
20  std::vector< size_t > preorder(this->sequence);
21  std::reverse(preorder.begin(), preorder.end());
22 
23  std::vector< std::vector< double > > res = predict_exhaust_backend(
24  par, preorder
25  );
26 
27  // Looping to do LOO
28  std::vector< size_t > annotated_ids = this->get_annotated_nodes();
29  std::vector< size_t > missing_vec(nfuns(), 9u);
30  for (auto & i : annotated_ids) {
31 
32  Node & n = nodes[i];
33 
34  auto old_ann = n.annotations;
35  update_annotations(i, missing_vec);
36 
37  res[n.ord] = predict_exhaust_backend(par, preorder)[n.ord];
38 
39  update_annotations(i, old_ann);
40 
41  }
42 
43  return res;
44 
45 }
46 
47 inline std::vector< std::vector<double> > Geese::predict_exhaust_backend(
48 
49  const std::vector< double > & par,
50  const std::vector< size_t > & preorder
51 ) {
52 
53  // Processing the probabilities --------------------------------------------
54  std::vector< double > par_terms(par.begin(), par.end() - nfuns());
55  std::vector< double > par_root(par.end() - nfuns(), par.end());
56 
57  // Scaling root
58  for (auto& p : par_root)
59  p = std::exp(p)/(std::exp(p) + 1);
60 
61  double baseline_likelihood = this->likelihood(par);
62 
63  // Computing all combinations ----------------------------------------------
64  // The base PhyloArray will store the original set of annotations.
65  PhyloArray base(nfuns(), nnodes());
66  for (auto& n : nodes)
67  {
68 
69  for (size_t f = 0u; f < nfuns(); ++f)
70  base(f, n.second.ord) = n.second.annotations[f];
71 
72  }
73 
74  PhyloPowerSet pset(base);//this->nfuns(), this->nnodes());
75  pset.add_rule(
76  rule_empty_free<PhyloArray,PhyloRuleData>,
78  );
79  pset.calc();
80 
81  // Making space for the expected values
82  std::vector< double > expected(nnodes() * nfuns(), 0.0);
83 
84  // This vector says whether the probability has to be included in
85  // the final likelihood or not.
86  for (size_t p = 0u; p < pset.size(); ++p)
87  {
88 
89  // ith state
90  const PhyloArray * s = &pset[p];
91 
92  // Computing the likelihood of the state s
93  double current_prob = 1.0;
94  for (auto & o: preorder)
95  {
96  // Getting the corresponding node
97  Node & n = nodes[o];
98 
99  // Nothing to do at the leaf level (leafs are calculated from parents)
100  if (n.is_leaf())
101  continue;
102 
103  // Extracting the parent column (without checking boundaries)
104  auto par_state = s->get_col_vec(n.ord, false);
105 
106  // Need to compute the root probability (if we havent before)
107  if (n.parent == nullptr)
108  {
109 
110  for (size_t f = 0u; f < nfuns(); ++f)
111  current_prob *= par_state[f] ? par_root[f] : (1.0 - par_root[f]);
112 
113  }
114 
115  // Generating a copy of the observed array
116  // (data is copied so that we can chage the state of the parent)
117  PhyloArray tmparray(n.array, true);
118 
119  // Updating the state of the parent
120  for (size_t f = 0u; f < nfuns(); ++f)
121  tmparray.D_ptr()->states[f] = par_state[f] == 1u;
122 
123  // Updating offspring annotations
124  int loc = 0;
125  for (auto & off : n.offspring) {
126 
127  for (size_t f = 0u; f < nfuns(); ++f)
128  {
129 
130  if (s->operator()(f, off->ord) == 1u)
131  tmparray(f, loc) = 1u;
132  else
133  tmparray.rm_cell(f, loc);
134 
135  }
136 
137  // Next offspring start in the next column of the array, Duh.
138  ++loc;
139 
140  }
141 
142  // Computing the likelihood
143  current_prob *= model->likelihood(par_terms, tmparray, -1, false);
144 
145  }
146  // this->update_annotations(n.second.id, s->get_col_vec(n.second.ord));
147 
148  // Adding to the overall probability
149  for (auto & n: nodes)
150  for (size_t j = 0u; j < nfuns(); ++j)
151  expected[n.second.ord + j * nnodes()] += s->operator()(j, n.second.ord) * current_prob/
152  baseline_likelihood;
153 
154  }
155 
156  // Coercing expected to a list vector
157  std::vector< std::vector< double > > res(nnodes());
158  std::vector< double > zerovec(nfuns(), 0.0);
159  for (auto & n: nodes)
160  {
161  res[n.second.ord] = zerovec;
162  for (size_t i = 0u; i < nfuns(); ++i)
163  res[n.second.ord][i] = expected[n.second.ord + i * nnodes()];
164  }
165 
166  return res;
167 
168 }
169 #endif
std::vector< size_t > sequence
size_t nnodes() const noexcept
Number of nodes (interior + leaf)
Definition: geese-meat.hpp:437
double likelihood(const std::vector< double > &par, bool as_log=false, bool use_reduced_sequence=true, size_t ncores=1u, bool no_update_pset_probs=false)
std::vector< std::vector< double > > predict_exhaust_backend(const std::vector< double > &par, const std::vector< size_t > &preorder)
std::vector< size_t > get_annotated_nodes() const
Returns the ids of the nodes with at least one annotation.
Definition: geese-meat.hpp:771
void update_annotations(size_t nodeid, std::vector< size_t > newann)
Definition: geese-meat.hpp:288
std::vector< std::vector< double > > predict_exhaust(const std::vector< double > &par)
std::map< size_t, Node > nodes
size_t nfuns() const noexcept
Number of functions analyzed.
Definition: geese-meat.hpp:430
A single node for the model.
std::vector< size_t > annotations
Observed annotations (only defined for Geese)
PhyloArray array
Array of the node.
std::vector< Node * > offspring
Offspring nodes.
Node * parent
Parent node.
size_t ord
Order in which the node was created.
bool is_leaf() const noexcept
return res
size_t size_t j
size_t i
#define INITIALIZED()
Definition: geese-bones.hpp:22
std::vector< std::pair< size_t, size_t > > PhyloRuleData
barry::BArrayDense< size_t, NodeData > PhyloArray
barry::PowerSet< PhyloArray, PhyloRuleData > PhyloPowerSet