barry: Your go-to motif accountant  0.0-1
Full enumeration of sample space and fast count of sufficient statistics for binary arrays
geese-meat-constructors.hpp
Go to the documentation of this file.
1 // #include "geese-bones.hpp"
2 
3 #ifndef GEESE_MEAT_CONSTRUCTORS_HPP
4 #define GEESE_MEAT_CONSTRUCTORS_HPP 1
5 
6 inline Geese::Geese() {
7 
8  // In order to start...
9  this->rengine = new std::mt19937;
10  this->delete_rengine = true;
11  this->model = new PhyloModel();
12  this->delete_support = true;
13 
14  this->model->add_hasher(keygen_full);
15  this->model->store_psets();
16 
17  return;
18 }
19 
20 inline Geese::Geese(
21  std::vector< std::vector<size_t> > & annotations,
22  std::vector< size_t > & geneid,
23  std::vector< int > & parent,
24  std::vector< bool > & duplication
25 ) {
26 
27  // In order to start...
28  this->rengine = new std::mt19937;
29  this->delete_rengine = true;
30  this->model = new PhyloModel();
31  this->delete_support = true;
32 
33  this->model->add_hasher(keygen_full);
34  this->model->store_psets();
35 
36  // Check the lengths
37  if (annotations.size() == 0u)
38  throw std::logic_error("Annotations is empty");
39 
40  nfunctions = annotations.at(0u).size();
41 
42  // size_t n = annotations.size();
43  for (auto& iter : annotations)
44  {
45 
46  if (iter.size() != nfunctions)
47  throw std::length_error(
48  "Not all the annotations have the same length"
49  );
50 
51  }
52 
53  // Grouping up the data by parents -----------------------------------------
54  for (size_t i = 0u; i < geneid.size(); ++i)
55  {
56 
57  // Temp vector with the annotations
58  std::vector< size_t > & funs(annotations.at(i));
59 
60  // Case 1: Not the root node, and the parent does not exists
61  if ((parent.at(i) >= 0) && (nodes.find(parent.at(i)) == nodes.end()))
62  {
63 
64  // Adding parent
65  auto key_par = nodes.insert({
66  parent.at(i),
67  Node(parent.at(i), std::numeric_limits< size_t >::max(), true)
68  });
69 
70  // Case 1a: i does not exists
71  if (nodes.find(geneid.at(i)) == nodes.end())
72  {
73 
74  auto key_off = nodes.insert({
75  geneid.at(i),
76  Node(geneid.at(i), i, funs, duplication.at(i))
77  });
78 
79  // Adding the offspring to the parent
80  key_par.first->second.offspring.push_back(
81  &key_off.first->second
82  );
83 
84  // Adding the parent to the offspring
85  key_off.first->second.parent = &key_par.first->second;
86 
87  } else { // Case 1b: i does exists (we saw it earlier)
88 
89  // We just need to make sure that we update it!
90  nodes[geneid.at(i)].duplication = duplication.at(i);
91  nodes[geneid.at(i)].annotations = funs;
92  nodes[geneid.at(i)].parent = &nodes[parent.at(i)];
93  nodes[geneid.at(i)].ord = i;
94  nodes[geneid.at(i)].id = geneid.at(i);
95 
96  nodes[parent.at(i)].offspring.push_back(
97  &nodes[geneid.at(i)]
98  );
99 
100  }
101 
102  } else { // Case 2: Either this is the root, or the parent does exists
103 
104  // Case 2a: i does not exists (but its parent does)
105  if (nodes.find(geneid.at(i)) == nodes.end())
106  {
107 
108  // Adding i
109  auto key_off = nodes.insert({
110  geneid.at(i),
111  Node(geneid.at(i), i, funs, duplication.at(i))
112  });
113 
114  // We only do this if this is not the root
115  if (parent.at(i) >= 0)
116  {
117 
118  nodes[parent.at(i)].offspring.push_back(
119  &key_off.first->second
120  );
121 
122  // Adding the parent to the offspring
123  key_off.first->second.parent = &nodes[parent.at(i)];
124 
125  }
126 
127  } else { // Case 2b: i does exists (and so does its parent)
128 
129  // We just need to make sure that we update it!
130  nodes[geneid.at(i)].duplication = duplication.at(i);
131  nodes[geneid.at(i)].annotations = funs;
132  nodes[geneid.at(i)].ord = i;
133  nodes[geneid.at(i)].id = geneid.at(i);
134 
135  if (parent.at(i) >= 0)
136  {
137 
138  nodes[geneid.at(i)].parent = &nodes[parent.at(i)];
139  nodes[parent.at(i)].offspring.push_back(
140  &nodes[geneid.at(i)]
141  );
142 
143  }
144 
145  }
146  }
147 
148  }
149 
150  // Verifying that all have the variable ord, and that
151  // ord does not repeat
152  std::vector< size_t > ord_count(geneid.size(), 0u);
153  for (auto& n : nodes)
154  {
155 
156  Node & node = n.second;
157 
158  // Checking variable
159  if (node.ord == std::numeric_limits< size_t >::max())
160  {
161 
162  std::string msg = "Node id " +
163  std::to_string(node.id) +
164  " does not have an ord.";
165 
166  throw std::logic_error(msg);
167 
168  }
169 
170  // Checking duplication
171  if (node.duplication != duplication[node.ord])
172  {
173 
174  std::string msg = "Node id " +
175  std::to_string(node.id) +
176  "'s duplication was not properly recorded.";
177 
178  throw std::logic_error(msg);
179 
180  }
181 
182  // Counting the type of annotations
183  if (node.is_leaf())
184  {
185 
186  for (const auto & a : node.annotations)
187  {
188 
189  if (a == 1u)
190  this->n_ones++;
191  else if (a == 0u)
192  this->n_zeros++;
193 
194  }
195 
196  } else {
197 
198  if (node.duplication)
199  this->n_dupl_events++;
200  else
201  this->n_spec_events++;
202 
203  }
204 
205  if (++ord_count[node.ord] > 1u)
206  {
207 
208  std::string msg = "Node id " +
209  std::to_string(node.id) +
210  "'s ord was repeated.";
211  throw std::logic_error(msg);
212 
213  }
214 
215  }
216 
217 
218  // Computing the pruning sequence.
219  calc_sequence();
221 
222  // Are the sequences OK?
223  if (this->sequence.size() != this->nnodes())
224  throw std::logic_error("The pruning sequence's length is different from nnodes(). This should not happen! (contact the developers).");
225 
226  return;
227 
228 }
229 
230 inline Geese::Geese(const Geese & model_, bool copy_data) :
231  states(model_.states),
232  n_zeros(model_.n_zeros),
233  n_ones(model_.n_ones),
234  n_dupl_events(model_.n_dupl_events),
235  n_spec_events(model_.n_spec_events),
236  nfunctions(model_.nfunctions),
237  nodes(model_.nodes),
238  map_to_state_id(model_.map_to_state_id),
239  pset_loc(model_.pset_loc),
240  sequence(model_.sequence),
241  reduced_sequence(model_.reduced_sequence),
242  initialized(model_.initialized) {
243 
244 
245  // Replicating -------------------------------------------------------------
246  if (copy_data)
247  {
248 
249  if (model_.rengine != nullptr)
250  {
251  rengine = new std::mt19937(*(model_.rengine));
252  delete_rengine = true;
253  }
254 
255  if (model_.model != nullptr)
256  {
257  model = new PhyloModel(*(model_.model));
258  delete_support = true;
259  }
260 
261  } else {
262 
263  if (model_.rengine != nullptr)
264  {
265  rengine = model_.rengine;
266  delete_rengine = false;
267  }
268 
269  if (model_.model != nullptr)
270  {
271  model = model_.model;
272  delete_support = false;
273  }
274 
275  }
276 
277  // These should not be necesary as they are already initialized.
278  // this->model->set_keygen(keygen_full);
279  // this->model->store_psets();
280 
281  // Dealing with the nodes is a bit different -------------------------------
282  auto revseq = this->sequence;
283  std::reverse(revseq.begin(), revseq.end());
284 
285  for (auto& i : revseq)
286  {
287 
288  // Leaf do not have offspring
289  if (this->nodes[i].is_leaf())
290  continue;
291 
292  // Clearing offspring
293  this->nodes[i].offspring.clear();
294 
295  // I cannot directly access the node since, if non existent, it will
296  // create an entry with it (alegedly).
297  auto n = model_.nodes.find(i);
298 
299  for (const auto& off : n->second.offspring)
300  this->nodes[i].offspring.push_back(&this->nodes[off->id]);
301 
302  }
303 
304  return;
305 
306 }
307 
308 // Constructor move
310  rengine(nullptr),
311  model(nullptr),
312  states(std::move(x.states)),
313  n_zeros(std::move(x.n_zeros)),
314  n_ones(std::move(x.n_ones)),
315  n_dupl_events(std::move(x.n_dupl_events)),
316  n_spec_events(std::move(x.n_spec_events)),
317  nfunctions(x.nfunctions),
318  nodes(std::move(x.nodes)),
319  map_to_state_id(std::move(x.map_to_state_id)),
320  pset_loc(std::move(x.pset_loc)),
321  sequence(std::move(x.sequence)),
322  reduced_sequence(std::move(x.reduced_sequence)),
323  initialized(x.initialized)
324 {
325 
326  if (x.delete_rengine)
327  {
328 
329  rengine = new std::mt19937(*x.rengine);
330  delete_rengine = true;
331 
332  } else {
333 
334  rengine = x.rengine;
335  delete_rengine = false;
336 
337  }
338 
339  if (x.delete_support)
340  {
341 
342  model = new PhyloModel(*x.model);
343  delete_support = true;
344 
345  } else {
346 
347  model = x.model;
348  delete_support = false;
349 
350  }
351 
352  // Figuring out if model needs to be updated
353  if ((model != nullptr) && (x.delete_support | x.delete_rengine))
354  model->set_rengine(rengine, false);
355 
356  return;
357 
358 }
359 
360 
361 
362 #endif
return * this
Annotated Phylo Model.
std::vector< size_t > sequence
void calc_reduced_sequence()
Definition: geese-meat.hpp:361
bool delete_rengine
size_t nfunctions
bool delete_support
void calc_sequence(Node *n=nullptr)
Definition: geese-meat.hpp:317
std::map< size_t, Node > nodes
A single node for the model.
std::vector< size_t > annotations
Observed annotations (only defined for Geese)
bool duplication
size_t id
Id of the node (as specified in the input)
size_t ord
Order in which the node was created.
bool is_leaf() const noexcept
size_t i
Data_Type &&counter_ noexcept
std::vector< double > keygen_full(const PhyloArray &array, const PhyloCounterData *d)
Definition: geese-bones.hpp:36
barry::Model< PhyloArray, PhyloCounterData, PhyloRuleData, PhyloRuleDynData > PhyloModel