barry: Your go-to motif accountant  0.0-1
Full enumeration of sample space and fast count of sufficient statistics for binary arrays
flock-meat.hpp
Go to the documentation of this file.
1 #ifndef GEESE_FLOCK_MEET_HPP
2 #define GEESE_FLOCK_MEET_HPP 1
3 
4 // #include "flock-bones.hpp"
5 
6 inline size_t Flock::add_data(
7  std::vector< std::vector<size_t> > & annotations,
8  std::vector< size_t > & geneid,
9  std::vector< int > & parent,
10  std::vector< bool > & duplication
11 ) {
12 
13  // Setting up the model
14  if (dat.size() == 0u)
15  {
16 
17  model.set_rengine(&this->rengine, false);
18 
19  model.add_hasher(keygen_full);
20 
21  model.store_psets();
22 
23  }
24  else
25  {
26 
27  if (annotations[0u].size() != nfuns())
28  throw std::length_error("The number of functions in the new set of annotations does not match that of the first Geese.");
29 
30  }
31 
32  // Generating the Geese object
33  dat.emplace_back(Geese(annotations, geneid, parent, duplication));
34 
35  if (dat.size() == 1u)
36  this->nfunctions = dat[0].nfuns();
37 
38  return dat.size() - 1u;
39 
40 }
41 
42 inline void Flock::set_seed(const size_t & s)
43 {
44 
45  this->rengine.seed(s);
46 
47 }
48 
49 inline void Flock::init(size_t bar_width)
50 {
51 
52  // For some strange reason, pointing to model during
53  // the add_data function changes addresses once its out.
54  for (auto& a : dat)
55  {
56 
57  if (a.delete_support)
58  delete a.model;
59 
60  a.model = &model;
61  a.delete_support = false;
62 
63  if (a.delete_rengine)
64  delete a.rengine;
65 
66  a.rengine = &rengine;
67  a.delete_rengine = false;
68 
69  }
70 
71  // Initializing the models.
72  if (bar_width > 0u)
73  {
74 
75  printf_barry("Initializing nodes in Flock (this could take a while)...\n");
76  barry::Progress prog_bar(this->ntrees(), bar_width);
77  for (auto& d : dat)
78  {
79 
80  d.init(0u);
81  prog_bar.next();
82 
83  }
84 
85  prog_bar.end();
86 
87  }
88  else
89  {
90 
91  for (auto& d : dat)
92  d.init(0u);
93 
94  }
95 
96  this->initialized = true;
97 
98 }
99 
101 {
102 
103  if (dat.size() == 0u)
104  throw std::logic_error("The flock has no data yet.");
105 
106  return this->model.get_counters();
107 
108 }
109 
111 {
112 
113  return this->model.get_support_fun();
114 
115 }
116 
117 inline std::vector< double > * Flock::get_stats_support()
118 {
119 
120  return this->model.get_stats_support();
121 
122 }
123 
124 inline std::vector< std::vector< double > > * Flock::get_stats_target()
125 {
126 
127  return this->model.get_stats_target();
128 
129 }
130 
132 {
133 
134  return &this->model;
135 
136 }
137 
139  const std::vector< double > & par,
140  bool as_log,
141  bool use_reduced_sequence,
142  size_t ncores
143 )
144 {
145 
146  INITIALIZED()
147 
148  double ans = as_log ? 0.0: 1.0;
149 
150  std::vector< double > par0(par.begin(), par.end() - nfunctions);
151  model.update_pset_probs(par0, ncores);
152 
153  if (as_log) {
154 
155  if (ncores > 1u)
156  {
157  #if defined(_OPENMP) || defined(__OPENMP)
158  #pragma omp parallel for reduction(+:ans) num_threads(ncores)
159  #endif
160  for (auto& d : this->dat)
161  ans += d.likelihood(par, as_log, use_reduced_sequence, 1u, true);
162  } else {
163  for (auto& d : this->dat)
164  ans += d.likelihood(par, as_log, use_reduced_sequence, 1u, true);
165  }
166 
167  }
168  else
169  {
170 
171  if (ncores > 1u)
172  {
173  #if defined(_OPENMP) || defined(__OPENMP)
174  #pragma omp parallel for reduction(*:ans) num_threads(ncores)
175  #endif
176  for (auto& d : this->dat)
177  ans *= d.likelihood(par, as_log, use_reduced_sequence, 1u, true);
178  } else {
179  for (auto& d : this->dat)
180  ans *= d.likelihood(par, as_log, use_reduced_sequence, 1u, true);
181  }
182 
183  }
184 
185  return ans;
186 
187 }
188 
189 inline size_t Flock::nfuns() const noexcept
190 {
191 
192  return this->nfunctions;
193 
194 }
195 
196 inline size_t Flock::ntrees() const noexcept
197 {
198 
199  return this->dat.size();
200 
201 }
202 
203 inline std::vector< size_t > Flock::nnodes() const noexcept
204 {
205 
206  std::vector< size_t > res;
207 
208  res.reserve(this->ntrees());
209 
210  for (const auto& d : dat)
211  res.push_back(d.nnodes());
212 
213  return res;
214 
215 }
216 
217 inline std::vector< size_t > Flock::nleafs() const noexcept
218 {
219 
220  std::vector< size_t > res;
221 
222  res.reserve(this->ntrees());
223 
224  for (const auto& d : dat)
225  res.push_back(d.nleafs());
226 
227  return res;
228 
229 }
230 
231 inline size_t Flock::nterms() const
232 {
233 
234  INITIALIZED()
235  return model.nterms() + this->nfuns();
236 
237 }
238 
239 inline size_t Flock::support_size() const noexcept
240 {
241 
242  return this->model.support_size();
243 
244 }
245 
246 inline std::vector< std::string > Flock::colnames() const
247 {
248 
249  return this->model.colnames();
250 
251 }
252 
254  bool verb,
255  std::vector< size_t > * dist
256 ) const noexcept
257 {
258 
259  size_t ans = 0;
260 
261  int i = 0;
262 
263  for (const auto & d : dat)
264  {
265 
266  if (verb)
267  printf_barry("Checking tree %i\n", i);
268 
269  size_t tmp = d.parse_polytomies(verb, dist);
270 
271  if (tmp > ans)
272  ans = tmp;
273 
274  }
275 
276  return ans;
277 
278 }
279 
280 inline void Flock::print() const
281 {
282 
283  // Information relevant to print:
284  // - Number of phylogenies
285  // - Number of functions
286  // - Total number of annotations.
287 
288  // Computing total number of annotations and events
289  size_t nzeros = 0u;
290 
291  size_t nones = 0u;
292 
293  size_t ndpl = 0u;
294 
295  size_t nspe = 0u;
296 
297  for (const auto & tree : this->dat)
298  {
299  nzeros += tree.n_zeros;
300  nones += tree.n_ones;
301  ndpl += tree.n_dupl_events;
302  nspe += tree.n_spec_events;
303 
304  }
305 
306  printf_barry("FLOCK (GROUP OF GEESE)\nINFO ABOUT THE PHYLOGENIES\n");
307 
308  printf_barry("# of phylogenies : %li\n", ntrees());
309 
310  printf_barry("# of functions : %li\n", nfuns());
311 
312  printf_barry("# of ann. [zeros; ones] : [%li; %li]\n", nzeros, nones);
313 
314  printf_barry("# of events [dupl; spec] : [%li; %li]\n", ndpl, nspe);
315 
316  printf_barry("Largest polytomy : %li\n", parse_polytomies(false));
317 
318  printf_barry("\nINFO ABOUT THE SUPPORT\n");
319 
320  return this->model.print();
321 
322 }
323 
324 inline Geese* Flock::operator()(size_t i, bool check_bounds)
325 {
326 
327  if (check_bounds && i >= ntrees())
328  throw std::logic_error("Geese not found in the flock (out of range).");
329 
330  return &dat[i];
331 
332 }
333 
334 #endif
#define printf_barry
Geese * operator()(size_t i, bool check_bounds=true)
Access the i-th geese element.
Definition: flock-meat.hpp:324
PhyloModel model
Definition: flock-bones.hpp:23
std::mt19937 rengine
Definition: flock-bones.hpp:22
std::vector< std::string > colnames() const
Definition: flock-meat.hpp:246
PhyloCounters * get_counters()
Definition: flock-meat.hpp:100
std::vector< double > * get_stats_support()
Definition: flock-meat.hpp:117
void set_seed(const size_t &s)
Set the seed of the model.
Definition: flock-meat.hpp:42
std::vector< std::vector< double > > * get_stats_target()
Definition: flock-meat.hpp:124
size_t nfuns() const noexcept
Definition: flock-meat.hpp:189
size_t parse_polytomies(bool verb=true, std::vector< size_t > *dist=nullptr) const noexcept
Check polytomies and return the largest.
Definition: flock-meat.hpp:253
void print() const
Definition: flock-meat.hpp:280
void init(size_t bar_width=BARRY_PROGRESS_BAR_WIDTH)
Definition: flock-meat.hpp:49
size_t add_data(std::vector< std::vector< size_t > > &annotations, std::vector< size_t > &geneid, std::vector< int > &parent, std::vector< bool > &duplication)
Add a tree to the flock.
Definition: flock-meat.hpp:6
std::vector< size_t > nleafs() const noexcept
Definition: flock-meat.hpp:217
size_t nterms() const
Definition: flock-meat.hpp:231
PhyloModel * get_model()
Definition: flock-meat.hpp:131
bool initialized
Definition: flock-bones.hpp:19
size_t support_size() const noexcept
Definition: flock-meat.hpp:239
double likelihood_joint(const std::vector< double > &par, bool as_log=false, bool use_reduced_sequence=true, size_t ncores=1u)
Returns the joint likelihood of the model.
Definition: flock-meat.hpp:138
PhyloSupport * get_support_fun()
Definition: flock-meat.hpp:110
std::vector< size_t > nnodes() const noexcept
Definition: flock-meat.hpp:203
size_t ntrees() const noexcept
Definition: flock-meat.hpp:196
std::vector< Geese > dat
Definition: flock-bones.hpp:17
size_t nfunctions
Definition: flock-bones.hpp:18
Annotated Phylo Model.
return res
size_t i
Data_Type &&counter_ noexcept
#define INITIALIZED()
Definition: geese-bones.hpp:22
std::vector< double > keygen_full(const PhyloArray &array, const PhyloCounterData *d)
Definition: geese-bones.hpp:36
barry::Counters< PhyloArray, PhyloCounterData > PhyloCounters
barry::Model< PhyloArray, PhyloCounterData, PhyloRuleData, PhyloRuleDynData > PhyloModel
barry::Support< PhyloArray, PhyloCounterData, PhyloRuleData, PhyloRuleDynData > PhyloSupport