4 #define GEESE_MEAT_HPP 1
12 std::vector< bool > tmp_state = vector_caster<bool,size_t>(n.
annotations);
14 std::vector< double > blen(n.
offspring.size(), 1.0);
38 if (o->annotations[k] != 0)
39 n.
array.insert_cell(k,
j, o->annotations[k],
false,
false);
50 n.
array.insert_cell(k,
j, 9u,
false,
false);
61 if (n.
arrays.size() != states.size())
64 n.
arrays.resize(states.size());
65 n.
narray.resize(states.size());
78 for (
size_t s = 0u; s < states.size(); ++s)
94 catch (
const std::exception & e)
96 auto err = std::string(e.what());
98 err =
"Array " + std::to_string(n.
id) +
99 " cannot be added to the model with error:\n" + err +
100 "\n. This is likely due to a dynamic rule. " +
101 "The array to be added was in the following state:";
103 std::string state_str =
"";
104 for (
const auto & ss : states[s])
105 state_str += std::to_string(ss) +
" ";
107 err += state_str +
"\n";
109 throw std::runtime_error(err);
134 if (this->model ==
nullptr)
142 this->model->store_psets();
151 if (this->model->get_rengine() ==
nullptr)
152 this->model->set_rengine(this->rengine,
false);
159 states.reserve(pset.data.size());
163 for (
auto& iter : pset.data)
166 states.emplace_back(std::vector< bool >(
nfunctions,
false));
171 if (!iter.is_empty(
j, 0u,
false))
185 printf_barry(
"Initializing nodes in Geese (this could take a while)...\n");
187 barry::Progress prog_bar(this->
nnodes(), bar_width);
190 for (
auto& iter :
nodes)
194 if (!iter.second.is_leaf())
209 for (
auto& iter :
nodes)
213 if (!iter.second.is_leaf())
221 for (
auto& n: this->
nodes)
222 n.second.visited =
false;
227 auto sup_arrays = model->get_pset_arrays();
229 pset_loc.resize(sup_arrays->size());
232 for (
auto s = 0u; s < sup_arrays->size(); ++s)
235 auto sup_array = sup_arrays->operator[](s);
236 pset_loc[s].resize(sup_array.size());
238 for (
auto a = 0u; a < sup_array.size(); ++a)
241 for (
auto o = 0u; o < sup_array[a].ncol(); ++o)
244 sup_array[a].get_col_vec(&tmpstate, o,
false);
263 if (this->model !=
nullptr)
264 throw std::logic_error(
265 "There is already a -model- in this Geese. Cannot set a -model- after one is present."
268 this->model = model_.model;
276 delete this->rengine;
282 this->rengine = model_.rengine;
290 std::vector< size_t > newann
298 throw std::length_error(
"The requested node is not present.");
300 if (
nodes[nodeid].annotations.size() != newann.size())
301 throw std::length_error(
"Incorrect length of the new annotations.");
305 nodes[nodeid].annotations = newann;
308 if (!
nodes[nodeid].is_leaf())
325 n = &(
nodes.begin()->second);
366 std::vector< bool > includeit(
nodes.size(),
false);
377 for (
size_t k = 0u; k <
nfuns(); ++k)
381 includeit[n.
ord] =
true;
394 if (includeit[o->ord])
397 includeit[n.
ord] =
true;
412 std::vector< double >
res;
415 this->states.size() *
nodes.size()
421 for (
auto& p : this->
nodes.at(
i).subtree_prob)
440 return this->
nodes.size();
449 for (
auto& iter : this->
nodes)
450 if (iter.second.is_leaf())
459 return model->nterms() + this->
nfuns();
466 if (model ==
nullptr)
469 return model->support_size();
476 std::vector< size_t > ans = {this->n_zeros, this->n_ones};
485 return this->model->colnames();
491 std::vector< size_t > * dist
496 for (
const auto& n : this->nodes)
499 if (n.second.is_leaf())
502 size_t noff = n.second.noffspring();
505 dist->push_back(noff);
511 printf_barry(
"Node id: %li has polytomy size %li\n", n.second.id, noff);
528 std::vector<std::vector<double>> ans;
535 tmpcount.set_counters(this->model->get_counters());
538 for (
auto& n :
nodes)
541 if (n.second.is_leaf())
544 ans.emplace_back(std::vector<double>({}));
553 for (
auto& o : n.second.offspring)
556 for (
size_t k = 0u; k <
nfuns(); ++k)
559 if (o->annotations.at(k) != 0)
562 tmparray.insert_cell(
563 k,
j, o->annotations.at(k),
false, false
574 std::vector< bool > tmp_state = vector_caster<bool,size_t>(
578 std::vector< double > blen(n.second.offspring.size(), 1.0);
581 new NodeData(blen, tmp_state, n.second.duplication),
585 tmpcount.reset_array(&tmparray);
587 ans.push_back(tmpcount.count_all());
599 std::vector<std::vector<double>> ans;
604 tmpcount.set_counters(this->model->get_counters());
607 for (
auto& n :
nodes) {
609 if (n.second.is_leaf()) {
610 ans.emplace_back(std::vector< double >({}));
617 for (
auto& o : n.second.offspring) {
618 for (
size_t k = 0u; k <
nfuns(); ++k) {
619 if (o->annotations.at(k) != 0) {
620 tmparray.insert_cell(
621 k,
j, o->annotations.at(k),
false, false
628 std::vector< bool > tmp_state =vector_caster<bool,size_t>(n.second.annotations);
629 std::vector< double > blen(n.second.offspring.size(), 1.0);
631 new NodeData(blen, tmp_state, n.second.duplication),
635 tmpcount.reset_array(&tmparray);
636 std::vector< double > counts = tmpcount.count_all();
639 auto dpl = n.second.duplication ?
"duplication" :
"speciation";
641 printf_barry(
"nodeid: % 3li (%s)\nstate: [", n.second.id, dpl);
642 for (
size_t f = 0u; f <
nfuns(); ++f)
643 printf_barry(
"%i, ", (tmparray.D_ptr()->states[f] ? 1 : 0));
648 for (
auto& c : counts)
668 printf_barry(
"# of ann. [zeros; ones] : [%li; %li]\n", this->n_zeros, this->n_ones);
669 printf_barry(
"# of events [dupl; spec] : [%li; %li]\n", this->n_dupl_events, this->n_spec_events);
672 this->model->print();
681 for (
const auto & n:
nodes)
683 printf_barry(
"% 4li - Id: %li -- ", n.second.ord, n.second.id);
689 std::string(
"leaf").c_str() :
690 std::string(
"internal").c_str()
695 "event type: %s -- ",
696 n.second.duplication ?
697 std::string(
"duplication").c_str() :
698 std::string(
"speciation").c_str()
703 for (
const auto & a: n.second.annotations)
706 if (&a == &n.second.annotations.back())
717 if (n.second.parent ==
nullptr)
721 printf_barry(
"parent id: %li -- ", n.second.parent->id);
725 if (n.second.offspring.size() > 0u)
728 for (
const auto & o: n.second.offspring)
731 if (&o == &n.second.offspring.back())
751 return this->rengine;
756 return this->model->get_counters();
764 return this->model->get_support_fun();
773 std::vector< size_t > ids(0u);
774 for (
auto & n :
nodes)
778 for (
size_t f = 0u; f <
nfuns(); ++f)
782 if (n.second.annotations[f] != 9u) {
783 ids.push_back(n.second.id);
797 std::vector< size_t > ann(this->
nfuns() * this->
nnodes(), 9u);
798 size_t nrows = this->
nnodes();
799 for (
auto & n :
nodes)
803 size_t row = n.second.ord;
806 for (
size_t f = 0u; f <
nfuns(); ++f)
810 if (n.second.annotations[f] != 9u) {
811 ann[f * nrows + row] = n.second.annotations[f];
std::vector< size_t > reduced_sequence
std::vector< std::vector< bool > > get_states() const
Powerset of a gene's possible states.
std::vector< size_t > sequence
void calc_reduced_sequence()
std::vector< std::vector< std::vector< size_t > > > pset_loc
Locations of columns.
std::vector< size_t > get_annotations() const
Returns the annotations of the nodes with at least one annotation.
void print() const
Prints information about the GEESE.
PhyloSupport * get_support_fun()
PhyloCounters * get_counters()
size_t support_size() const noexcept
Number of unique sets of sufficient stats.
void print_observed_counts()
size_t parse_polytomies(bool verb=true, std::vector< size_t > *dist=nullptr) const noexcept
Check polytomies and return the largest.
std::vector< std::string > colnames() const
Names of the terms in the model.
void init(size_t bar_width=BARRY_PROGRESS_BAR_WIDTH)
size_t nnodes() const noexcept
Number of nodes (interior + leaf)
std::vector< double > get_probabilities() const
std::vector< std::vector< double > > observed_counts()
size_t nleafs() const noexcept
Number of leaf.
void calc_sequence(Node *n=nullptr)
void inherit_support(const Geese &model_, bool delete_support_=false)
std::vector< size_t > get_annotated_nodes() const
Returns the ids of the nodes with at least one annotation.
std::vector< size_t > nannotations() const noexcept
Number of annotations.
void update_annotations(size_t nodeid, std::vector< size_t > newann)
std::map< size_t, Node > nodes
std::mt19937 * get_rengine()
barry::MapVec_type< size_t > map_to_state_id
size_t nterms() const
Number of terms included.
size_t nfuns() const noexcept
Number of functions analyzed.
Data definition for the PhyloArray class.
A single node for the model.
std::vector< size_t > annotations
Observed annotations (only defined for Geese)
PhyloArray array
Array of the node.
std::vector< PhyloArray > arrays
Arrays given all possible states.
size_t id
Id of the node (as specified in the input)
std::vector< Node * > offspring
Offspring nodes.
std::vector< size_t > narray
ID of the array in the model.
Node * parent
Parent node.
size_t ord
Order in which the node was created.
bool is_leaf() const noexcept
std::vector< double > subtree_prob
Induced subtree probabilities.
Data_Type &&counter_ noexcept
std::vector< double > keygen_full(const PhyloArray &array, const PhyloCounterData *d)
barry::Counters< PhyloArray, PhyloCounterData > PhyloCounters
barry::Model< PhyloArray, PhyloCounterData, PhyloRuleData, PhyloRuleDynData > PhyloModel
barry::BArrayDense< size_t, NodeData > PhyloArray
barry::StatsCounter< PhyloArray, PhyloCounterData > PhyloStatsCounter
barry::PowerSet< PhyloArray, PhyloRuleData > PhyloPowerSet
barry::Support< PhyloArray, PhyloCounterData, PhyloRuleData, PhyloRuleDynData > PhyloSupport