3 #ifndef GEESE_MEAT_PREDICT_HPP
4 #define GEESE_MEAT_PREDICT_HPP 1
7 const std::vector< double > & par,
8 bool use_reduced_sequence,
9 const std::vector< size_t > & preorder
15 std::vector< double > par_terms(par.begin(), par.end() -
nfuns());
16 std::vector< double > par_root(par.end() -
nfuns(), par.end());
19 for (
auto& p : par_root)
20 p = std::exp(p)/(std::exp(p) + 1);
23 std::vector< double > rootp(this->states.size(), 1.0);
24 for (
size_t s = 0u; s < rootp.size(); ++s)
27 for (
size_t f = 0u; f <
nfuns(); ++f)
28 rootp[s] *= states[s][f] ? par_root[f] : (1.0 - par_root[f]);
33 std::vector< std::vector<double> >
res(
38 std::vector< double > tmp_prob(
nfuns(), 0.0);
39 size_t root_id = preorder[0u];
42 double tmp_likelihood =
likelihood(par,
false, use_reduced_sequence, 1u,
true);
44 if (!std::isfinite(tmp_likelihood))
46 throw std::runtime_error(
"Likelihood is not finite");
49 for (
size_t s = 0u; s < states.size(); ++s)
58 throw std::runtime_error(
"Probability is not finite");
62 for (
size_t f = 0u; f <
nfuns(); ++f)
79 const auto & pset_probs = *(model->get_pset_probs());
80 const auto & arrays2support = *(model->get_arrays2support());
81 const auto & pset_locations = *(model->get_pset_locations());
84 for (
auto &
i : preorder)
93 std::vector< std::vector< double > > everything_below;
94 std::vector< std::vector< double > > everything_above;
96 everything_below.reserve(states.size());
97 everything_above.reserve(states.size());
102 std::vector< std::vector< PhyloArray > > psets;
103 psets.reserve(states.size());
108 off->probability.resize(states.size(), 0.0);
109 std::fill(off->probability.begin(), off->probability.end(), 0.0);
113 for (
size_t s = 0u; s < states.size(); ++s)
115 std::vector< double > below;
116 std::vector< double > above;
117 std::vector< PhyloArray > pset;
120 size_t array_id = parent.
narray[s];
121 size_t support_id = arrays2support[array_id];
124 const auto & pset_arrays = model->get_pset(array_id);
127 for (
size_t p = 0u; p < pset_arrays->size(); ++p)
131 const PhyloArray & array_p = pset_arrays->at(p);
135 bool in_the_set =
true;
139 double everything_below_p = 1.0;
140 for (
size_t off = 0u; off < parent.
offspring.size(); ++off)
149 const auto & off_ann = parent.
offspring[off]->annotations;
150 for (
size_t f = 0u; f <
nfuns(); ++f)
153 if ((off_ann[f] != 9u) && (off_ann[f] != array_p(f, off)))
170 const auto & off_state = array_p.get_col_vec(off);
173 everything_below_p *= parent.
offspring[off]->subtree_prob[loc];
183 pset.push_back(array_p);
188 below.push_back(everything_below_p);
192 pset_probs[pset_locations[support_id] + p]
203 psets.push_back(std::move(pset));
204 everything_below.push_back(std::move(below));
205 everything_above.push_back(std::move(above));
211 for (
size_t s = 0u; s < states.size(); ++s)
214 for (
size_t p = 0u; p < everything_above[s].size(); ++p)
218 const auto & pset_p = psets[s][p];
221 everything_above[s][p] *= everything_below[s][p];
223 for (
size_t off = 0u; off < parent.
offspring.size(); ++off)
227 auto cvec = pset_p.get_col_vec(off);
229 parent.
offspring[off]->probability[off_s] += everything_above[s][p];
232 for (
size_t f = 0u; f <
nfuns(); ++f)
235 res[parent.
offspring[off]->ord][f] += everything_above[s][p];
247 for (
const auto & off : parent.
offspring)
251 for (
size_t f = 0u; f <
nfuns(); ++f)
253 if ((
res[off->ord][f] > 1.00001) || (
res[off->ord][f] < -.0000009))
255 auto msg =
"[geese] Out-of-range probability for node.id " +
256 std::to_string(off->id) +
" for function " +
257 std::to_string(f) +
": " +
258 std::to_string(
res[off->ord][f]);
260 throw std::logic_error(msg);
264 if (
res[off->ord][f] > 1.0)
265 res[off->ord][f] = 1.0;
266 else if (
res[off->ord][f] < 0.0)
267 res[off->ord][f] = 0.0;
280 const std::vector< double > & par,
281 std::vector< std::vector< double > > * res_prob,
284 bool use_reduced_sequence
291 std::vector< size_t > preorder;
297 std::reverse(preorder.begin(), preorder.end());
299 std::vector< double > par0(par.begin(), par.end() -
nfuns());
300 model->update_pset_probs(par0, 1u);
305 par, use_reduced_sequence, preorder
309 if (res_prob !=
nullptr)
312 res_prob->resize(
nnodes());
323 std::vector< size_t > default_empty(
nfuns(), 9u);
324 for (
auto& n :
nodes)
327 if (n.second.is_leaf())
330 Node & ntmp = n.second;
361 if (res_prob !=
nullptr)
std::vector< size_t > reduced_sequence
std::vector< size_t > sequence
std::vector< std::vector< double > > predict(const std::vector< double > &par, std::vector< std::vector< double > > *res_prob=nullptr, bool leave_one_out=false, bool only_annotated=false, bool use_reduced_sequence=true)
size_t nnodes() const noexcept
Number of nodes (interior + leaf)
double likelihood(const std::vector< double > &par, bool as_log=false, bool use_reduced_sequence=true, size_t ncores=1u, bool no_update_pset_probs=false)
void update_annotations(size_t nodeid, std::vector< size_t > newann)
std::map< size_t, Node > nodes
std::vector< std::vector< double > > predict_backend(const std::vector< double > &par, bool use_reduced_sequence, const std::vector< size_t > &preorder)
barry::MapVec_type< size_t > map_to_state_id
size_t nfuns() const noexcept
Number of functions analyzed.
A single node for the model.
std::vector< size_t > annotations
Observed annotations (only defined for Geese)
size_t id
Id of the node (as specified in the input)
std::vector< Node * > offspring
Offspring nodes.
std::vector< size_t > narray
ID of the array in the model.
std::vector< double > probability
The probability of observing each state.
size_t ord
Order in which the node was created.
bool is_leaf() const noexcept
std::vector< double > subtree_prob
Induced subtree probabilities.
barry::BArrayDense< size_t, NodeData > PhyloArray