barry: Your go-to motif accountant  0.0-1
Full enumeration of sample space and fast count of sufficient statistics for binary arrays
counters.hpp
Go to the documentation of this file.
1 #ifndef BARRAY_PHYLO_H
2 #define BARRAY_PHYLO_H 1
3 
4 
5 
12 #define MAKE_DUPL_VARS() \
13  bool DPL = Array.D_ptr()->duplication; \
14  size_t DATA_AT = data[0u];
15 
16 #define IS_EITHER() (DATA_AT == Geese::etype_either)
17 #define IS_DUPLICATION() ((DATA_AT == Geese::etype_duplication) & (DPL))
18 #define IS_SPECIATION() ((DATA_AT == Geese::etype_speciation) & (!DPL))
19 
20 #define IF_MATCHES() MAKE_DUPL_VARS() \
21  if (IS_EITHER() || IS_DUPLICATION() || IS_SPECIATION())
22 #define IF_NOTMATCHES() MAKE_DUPL_VARS() \
23  if (!IS_EITHER() && !IS_DUPLICATION() && !IS_SPECIATION())
24 
25 
36 #define PHYLO_RULE_LAMBDA(a) barry::Rule_fun_type<PhyloArray, PhyloRuleData> a = \
37  [](const PhyloArray & Array, size_t i, size_t j, PhyloRuleData & data)
38 
39 #define PHYLO_COUNTER_LAMBDA(a) barry::Counter_fun_type<PhyloArray, PhyloCounterData> a = \
40  [](const PhyloArray & Array, size_t i, size_t j, PhyloCounterData & data)
41 
42 #define PHYLO_RULE_DYN_LAMBDA(a) barry::Rule_fun_type<PhyloArray, PhyloRuleDynData> a = \
43  [](const PhyloArray & Array, size_t i, size_t j, PhyloRuleDynData & data)
44 
45 #define PHYLO_CHECK_MISSING() if (Array.D_ptr() == nullptr) \
46  throw std::logic_error("The array data is nullptr."); \
47 
48 inline std::string get_last_name(size_t d) {return ((d == 1u)? " at duplication" : ((d == 0u)? " at speciation" : ""));}
49 
56 // -----------------------------------------------------------------------------
63  size_t duplication = Geese::etype_default
64 )
65 {
66 
67  PHYLO_COUNTER_LAMBDA(tmp_init)
68  {
69 
71 
72  return 0.0;
73 
74  };
75 
76  PHYLO_COUNTER_LAMBDA(tmp_count)
77  {
79  return 0.0;
80 
81  return Array.D_ptr()->states[i] ? 0.0 : 1.0;
82 
83  };
84 
85  counters->add_counter(
86  tmp_count, tmp_init, nullptr,
87  PhyloCounterData({duplication}),
88  "Overall gains" + get_last_name(duplication)
89  );
90 
91  return;
92 
93 }
94 
95 // -----------------------------------------------------------------------------
99 inline void counter_gains(
101  std::vector<size_t> nfun,
102  size_t duplication = Geese::etype_default
103 )
104 {
105 
106  PHYLO_COUNTER_LAMBDA(tmp_init)
107  {
108 
109  IF_NOTMATCHES()
110  return 0.0;
111 
112  double ngains = 0.0;
113  auto k = data[1u];
114  auto s = Array.D_ptr()->states[k];
115 
116  if (s)
117  return 0.0;
118 
119  for (auto o = 0u; o < Array.ncol(); ++o)
120  {
121  if (Array(k, o) == 1u)
122  ngains += 1.0;
123  }
124 
125  return ngains;
126 
127  };
128 
129  PHYLO_COUNTER_LAMBDA(tmp_count)
130  {
131 
132  // Is there any gain?
133  if (Array.D_ptr()->states[i])
134  return 0.0;
135 
136  IF_MATCHES()
137  return (i == data[1u]) ? 1.0 : 0.0;
138 
139  return 0.0;
140 
141  };
142 
143  for (auto& i : nfun)
144  counters->add_counter(
145  tmp_count, tmp_init, nullptr,
146  PhyloCounterData({duplication, i}),
147  "Gains " + std::to_string(i) + get_last_name(duplication)
148  );
149 
150  return;
151 
152 }
153 
154 
155 // -----------------------------------------------------------------------------
161  std::vector<size_t> nfun,
162  size_t k = 1u,
163  size_t duplication = Geese::etype_default
164 )
165 {
166 
167  PHYLO_COUNTER_LAMBDA(tmp_init)
168  {
169 
171  return 0.0;
172 
173  };
174 
175  PHYLO_COUNTER_LAMBDA(tmp_count)
176  {
177 
178  // Is this relevant?
179  if (i != data[1u])
180  return 0.0;
181 
182  IF_NOTMATCHES()
183  return 0.0;
184 
185  // Is there any gain?
186  if (Array.D_ptr()->states[i])
187  return 0.0;
188 
189  // Making the counts
190  int counts = 0;
191  for (size_t k = 0u; k < Array.ncol(); ++k)
192  if (k != j)
193  {
194  if (Array(i, k, false) == 1u)
195  ++counts;
196  }
197 
198  // Three cases: base on the diff
199  int diff = static_cast<int>(data[2u]) - counts + 1;
200  // (a) counts were 1 below k, then +1
201  if (diff == 1)
202  return -1.0;
203  // (b) counts were equal to k, then -1
204  else if (diff == 0)
205  {
206  return 1.0;
207  } else
208  // (c) Otherwise, nothing happens
209  return 0.0;
210 
211 
212  };
213 
214  for (auto& i : nfun)
215  counters->add_counter(
216  tmp_count, tmp_init, nullptr,
217  PhyloCounterData({duplication, i, k}),
218  std::to_string(k) + " genes gain " + std::to_string(i) +
219  get_last_name(duplication)
220  );
221 
222  return;
223 
224 }
225 
226 // -----------------------------------------------------------------------------
233  size_t duplication = Geese::etype_default
234 )
235 {
236 
237  PHYLO_COUNTER_LAMBDA(tmp_init)
238  {
239 
241 
242  IF_NOTMATCHES()
243  return 0.0;
244 
245  // At the beginning, all offspring are zero, so we need to
246  // find at least one state = true.
247  for (auto s : Array.D_ptr()->states)
248  {
249 
250  if (s)
251  // Yup, we are loosing a function, so break
252  return static_cast<double>(Array.ncol());
253 
254  }
255 
256  return 0.0;
257 
258 
259  };
260 
261  PHYLO_COUNTER_LAMBDA(tmp_count)
262  {
263 
264  // Checking the type of event
265  IF_NOTMATCHES()
266  return 0.0;
267 
268  // Need to check the other functions
269  for (size_t k = 0u; k < Array.nrow(); ++k)
270  {
271 
272  // Nah, this gene was already different.
273  if ((k != i) && (Array.D_ptr()->states[k] != (Array(k, j, false) == 1u)))
274  return 0.0;
275 
276 
277  }
278 
279  // Nope, this gene is now matching its parent, so we need to
280  // take it out from the count of genes that have changed.
281  return Array.D_ptr()->states[i] ? -1.0 : 1.0;
282 
283  };
284 
285  counters->add_counter(
286  tmp_count, tmp_init, nullptr,
287  PhyloCounterData({duplication}),
288  "Num. of genes changing" + get_last_name(duplication)
289  );
290 
291 
292  return;
293 
294 }
295 
296 // -----------------------------------------------------------------------------
302  size_t nfunA,
303  size_t nfunB,
304  size_t duplication = Geese::etype_default
305 )
306 {
307 
308  PHYLO_COUNTER_LAMBDA(tmp_init)
309  {
310 
312 
313  IF_NOTMATCHES()
314  return 0.0;
315 
316  // At the beginning, all offspring are zero, so we need to
317  // find at least one state = true.
318  if (Array.D_ptr()->states[data[1u]] || Array.D_ptr()->states[data[2u]])
319  return 0.0;
320 
321  double n = static_cast<double>(Array.ncol());
322  return n * (n - 1.0) / 2.0;
323 
324 
325  };
326 
327  PHYLO_COUNTER_LAMBDA(tmp_count)
328  {
329 
330  // Checking the type of event
331  IF_NOTMATCHES()
332  return 0.0;
333 
334  auto nfunA = data[1u];
335  auto nfunB = data[2u];
336 
337  if ((i != nfunA) & (i != nfunB))
338  return 0.0;
339 
340  if (Array.D_ptr()->states[data[1u]] || Array.D_ptr()->states[data[2u]])
341  return 0.0;
342 
343  size_t k = (i == nfunA) ? nfunB : nfunA;
344 
345  if (Array(k, j) == 1u)
346  return 0.0;
347 
348  double res = 0.0;
349  for (auto off = 0u; off < Array.ncol(); ++off)
350  {
351  if (off == j)
352  continue;
353 
354  if ((Array(i, off) == 0u) && (Array(k, off) == 0u))
355  res -= 1.0;
356 
357  }
358 
359  return res;
360 
361  };
362 
363  counters->add_counter(
364  tmp_count, tmp_init, nullptr,
365  PhyloCounterData({duplication}),
366  "Preserve pseudo gene (" +
367  std::to_string(nfunA) + ", " +
368  std::to_string(nfunB) + ")" + get_last_name(duplication)
369  );
370 
371 
372  return;
373 
374 }
375 
376 
377 // -----------------------------------------------------------------------------
384  size_t duplication = Geese::etype_default
385 )
386 {
387 
388  PHYLO_COUNTER_LAMBDA(tmp_init)
389  {
390 
392 
393  IF_NOTMATCHES()
394  return 0.0;
395 
396  // At the beginning, all offspring are zero, so we need to
397  // find at least one state = true.
398  for (auto s : Array.D_ptr()->states)
399  {
400  if (s)
401  return 1.0;
402  }
403 
404  return 0.0;
405 
406  };
407 
408  PHYLO_COUNTER_LAMBDA(tmp_count)
409  {
410 
411  // Checking the type of event
412  IF_NOTMATCHES()
413  return 0.0;
414 
415  // Setup
416  bool j_diverges = false;
417  const std::vector< bool > & par_state = Array.D_ptr()->states;
418 
419  for (size_t f = 0u; f < Array.nrow(); ++f)
420  {
421 
422  // Was the gene annotation different from the parent?
423  if (par_state[f] != (Array(f,j) == 1u))
424  {
425  j_diverges = true;
426  break;
427  }
428 
429  }
430 
431 
432  bool j_used_to_diverge = false;
433  for (size_t f = 0u; f < Array.nrow(); ++f)
434  {
435 
436  if (f == i)
437  {
438  if (par_state[f])
439  {
440  j_used_to_diverge = true;
441  break;
442  }
443  }
444  else
445  {
446 
447  if (par_state[f] != (Array(f,j) == 1u))
448  {
449  j_used_to_diverge = true;
450  break;
451  }
452 
453  }
454 
455  }
456 
457  // Case 1: j hasn't changed
458  if ((!j_used_to_diverge & !j_diverges) | (j_used_to_diverge & j_diverges))
459  return 0.0;
460  // Case 2: j NOW diverges
461  else if (j_diverges)
462  return 1.0/Array.ncol();
463  // Case 3: j USED to diverge, so no more
464  else
465  return -1.0/Array.ncol();
466 
467  };
468 
469  counters->add_counter(
470  tmp_count, tmp_init, nullptr,
471  PhyloCounterData({duplication}),
472  "Proportion of genes changing" + get_last_name(duplication)
473  );
474 
475 
476  return;
477 
478 }
479 
480 // -----------------------------------------------------------------------------
486  size_t duplication = Geese::etype_default
487  )
488 {
489 
490  PHYLO_COUNTER_LAMBDA(tmp_count)
491  {
492 
493  if (!Array.D_ptr()->states[i])
494  return 0.0;
495 
496  IF_MATCHES()
497  return -1.0;
498  else
499  return 0.0;
500 
501  };
502 
503  PHYLO_COUNTER_LAMBDA(tmp_init)
504  {
505 
506  IF_NOTMATCHES()
507  return 0.0;
508 
509  double res = 0.0;
510  for (auto s : Array.D_ptr()->states)
511  if (s)
512  res += 1.0;
513 
514  return res * static_cast<double>(Array.ncol());
515 
516  };
517 
518  counters->add_counter(
519  tmp_count, tmp_init, nullptr,
520  PhyloCounterData({duplication}),
521  "Overall loses" + get_last_name(duplication)
522  );
523 
524  return;
525 
526 }
527 
528 // -----------------------------------------------------------------------------
532 inline void counter_maxfuns(
534  size_t lb,
535  size_t ub,
536  size_t duplication = Geese::etype_default
537  )
538  {
539 
540  PHYLO_COUNTER_LAMBDA(tmp_init)
541  {
542 
544 
545  IF_NOTMATCHES()
546  return 0.0;
547 
548  // At first, all are zero, so we need to check if the lower
549  // bound is zero
550  if (data[1u] == 0)
551  return static_cast<double>(Array.ncol());
552 
553  return 0.0;
554 
555  };
556 
557  PHYLO_COUNTER_LAMBDA(tmp_count)
558  {
559 
560  IF_NOTMATCHES()
561  return 0.0;
562 
563  int count = Array.colsum(j);
564  int ub = data[2u];
565 
566  // It now matches
567  if (count == static_cast<int>(data[1u]))
568  return 1.0;
569 
570  // Was within, but now outside
571  if (count > ub && ((count - ub) == 1))
572  return -1.0;
573 
574  // Otherwise nothing happens.
575  return 0.0;
576 
577  };
578 
579  counters->add_counter(
580  tmp_count, tmp_init, nullptr,
581  PhyloCounterData({duplication, lb, ub}),
582  "Genes with [" + std::to_string(lb) + ", " + std::to_string(ub) +
583  "] funs" + get_last_name(duplication)
584  );
585 
586  return;
587 
588 }
589 
590 // -----------------------------------------------------------------------------
594 inline void counter_loss(
596  std::vector<size_t> nfun,
597  size_t duplication = Geese::etype_default
598 )
599 {
600 
601  PHYLO_COUNTER_LAMBDA(tmp_count)
602  {
603 
604  IF_NOTMATCHES()
605  return 0.0;
606 
607  if (!Array.D_ptr()->states[i])
608  return 0.0;
609 
610  return (i == data[1u]) ? -1.0 : 0.0;
611 
612  };
613 
614  PHYLO_COUNTER_LAMBDA(tmp_init)
615  {
616 
618 
619  IF_NOTMATCHES()
620  return 0.0;
621 
622  auto f = data[1u];
623 
624  if (!Array.D_ptr()->states[f])
625  return 0.0;
626 
627  return static_cast<double>(Array.ncol());
628 
629  };
630 
631  for (auto& i : nfun)
632  counters->add_counter(
633  tmp_count, tmp_init, nullptr,
634  PhyloCounterData({duplication, i}),
635  "Loss " + std::to_string(i) + get_last_name(duplication)
636  );
637 
638  return;
639 
640 }
641 
642 // -----------------------------------------------------------------------------
648  size_t duplication = Geese::etype_default
649 )
650 {
651 
652  PHYLO_COUNTER_LAMBDA(tmp_count)
653  {
654 
655  IF_NOTMATCHES()
656  return 0.0;
657 
658  if (Array.D_ptr()->states[i])
659  return -1.0;
660  else
661  return 1.0;
662 
663  };
664 
665  PHYLO_COUNTER_LAMBDA(tmp_init)
666  {
667 
668  IF_NOTMATCHES()
669  return 0.0;
670 
672 
673 
674  // Since we start with all the array at zero,
675  // As many chances to change as offspring
676  double noff = static_cast<double> (Array.ncol());
677  double counts = 0.0;
678  for (size_t k = 0u; k < Array.nrow(); ++k)
679  if (Array.D_ptr()->states[k])
680  counts += noff;
681 
682  return counts;
683 
684 
685 
686  };
687 
688  counters->add_counter(
689  tmp_count, tmp_init, nullptr,
690  PhyloCounterData({duplication}),
691  "Overall changes" + get_last_name(duplication)
692  );
693 
694 
695  return;
696 
697 }
698 
699 
700 // -----------------------------------------------------------------------------
705 inline void counter_subfun(
707  size_t nfunA,
708  size_t nfunB,
709  size_t duplication = Geese::etype_default
710 )
711 {
712 
713  PHYLO_COUNTER_LAMBDA(tmp_count)
714  {
715 
716  // Is this node duplication?
717  IF_NOTMATCHES()
718  return 0.0;
719 
720  auto funA = data[1u];
721  auto funB = data[2u];
722 
723  // Are we looking at either of the relevant functions?
724  if ((funA != i) && (funB != i))
725  return 0.0;
726 
727  // Are A and B existant? if not, no change
728  if (!Array.D_ptr()->states[funA] || !Array.D_ptr()->states[funB])
729  return 0.0;
730 
731  // Figuring out which is the first (reference) function
732  size_t other = (i == funA)? funB : funA;
733  double res = 0.0;
734  // There are 4 cases: (first x second) x (had the second function)
735  if (Array(other, j, false) == 1u)
736  {
737 
738  for (size_t off = 0u; off < Array.ncol(); ++off)
739  {
740 
741  // Not on self
742  if (off == j)
743  continue;
744 
745  if ((Array(i, off, false) == 1u) && (Array(other, off, false) == 0u))
746  res -= 1.0;
747 
748  }
749 
750  } else {
751 
752  for (size_t off = 0u; off < Array.ncol(); ++off)
753  {
754 
755  // Not on self
756  if (off == j)
757  continue;
758 
759  if ((Array(i, off, false) == 0u) && (Array(other, off, false) == 1u))
760  res += 1.0;
761 
762  }
763 
764  }
765 
766  return res;
767 
768  };
769 
770  PHYLO_COUNTER_LAMBDA(tmp_init)
771  {
772 
774  return 0.0;
775 
776  };
777 
778  counters->add_counter(
779  tmp_count, tmp_init, nullptr,
780  PhyloCounterData({duplication, nfunA, nfunB}),
781  "Subfun between " + std::to_string(nfunA) + " and " +
782  std::to_string(nfunB) + get_last_name(duplication)
783  );
784 
785  return;
786 
787 }
788 
789 // -----------------------------------------------------------------------------
794 inline void counter_cogain(
796  size_t nfunA,
797  size_t nfunB,
798  size_t duplication = Geese::etype_default
799 )
800 {
801 
802  PHYLO_COUNTER_LAMBDA(tmp_count)
803  {
804 
805  IF_NOTMATCHES()
806  return 0.0;
807 
808  auto d1 = data[1u];
809  auto d2 = data[2u];
810 
811  // Is the function in scope relevant?
812  if ((i != d1) && (i != d2))
813  return 0.0;
814 
815  // None should have it
816  if (!Array.D_ptr()->states[d1] && !Array.D_ptr()->states[d2])
817  {
818 
819  size_t other = (i == d1)? d2 : d1;
820 
821  if (Array(other, j, false) == 1u)
822  return 1.0;
823  else
824  return 0.0;
825 
826  } else
827  return 0.0;
828 
829  };
830 
831  PHYLO_COUNTER_LAMBDA(tmp_init) {
832 
834  return 0.0;
835 
836  };
837 
838  counters->add_counter(
839  tmp_count, tmp_init, nullptr,
840  PhyloCounterData({duplication, nfunA, nfunB}),
841  "Co-gains " + std::to_string(nfunA) + " & " + std::to_string(nfunB) +
842  get_last_name(duplication)
843  );
844 
845  return;
846 
847 }
848 
849 // -----------------------------------------------------------------------------
851 inline void counter_longest(
853  size_t duplication = Geese::etype_default
854  )
855 {
856 
857  PHYLO_COUNTER_LAMBDA(tmp_count)
858  {
859 
860  IF_NOTMATCHES()
861  return 0.0;
862 
863  // Figuring out which match
864  std::vector< bool> is_longest(Array.ncol(), false);
865  bool j_mutates = false;
866  int nmutate = 0;
867  int nmutate_longest = 0;
868 
869  auto states = Array.D_ptr()->states;
870 
871  for (auto off = 0u; off < Array.ncol(); ++off)
872  {
873 
874  // On the fly, figuring out if it is longest
875  for (auto & l : data)
876  if (l == off)
877  is_longest[off] = true;
878 
879  for (auto f = 0u; f < Array.nrow(); ++f)
880  {
881  if ((Array(f, off) == 1u) != states[f])
882  {
883 
884  // If it happens that j != off and is not longest
885  // then return 0 (a not longest was mutating prev)
886  if (is_longest[off] && (off != j))
887  return 0.0;
888 
889  if (off == j)
890  j_mutates = true;
891 
892  if (is_longest[j])
893  nmutate_longest++;
894  else
895  nmutate++;
896 
897  break;
898  }
899 
900  }
901  }
902 
903  // There was already more than one in difference
904  // so nothing to change
905  if (std::fabs(nmutate - nmutate_longest) > 1)
906  return 0.0;
907 
908  // Figuring out previously
909  bool j_mutates_prev = false;
910  for (auto f = 0u; f < Array.nrow(); ++f)
911  {
912  // Checking the previous function... was it
913  // different before?
914  if ((f == i) && states[i])
915  {
916  j_mutates_prev = true;
917  break;
918  }
919  else if ((Array(f, j) == 1u) != states[f])
920  {
921  j_mutates_prev = true;
922  break;
923  }
924 
925  }
926 
927  // Adjusting the previous count
928  auto nmutate_prev = nmutate;
929  auto nmutate_longest_prev = nmutate_longest;
930  if (j_mutates & !j_mutates_prev)
931  {
932  if (is_longest[j])
933  nmutate_longest_prev--;
934  else
935  nmutate_prev--;
936  }
937  else if (!j_mutates & j_mutates)
938  {
939  if (is_longest[j])
940  nmutate_longest_prev++;
941  else
942  nmutate_prev++;
943 
944  }
945 
946  // Just compute the change statistic directly
947  return
948  ( ((nmutate == 0) & (nmutate_longest > 0)) ? 1.0 : 0.0 ) +
949  ( ((nmutate_prev == 0) & (nmutate_longest_prev > 0)) ? 1.0 : 0.0 );
950 
951  };
952 
953  PHYLO_COUNTER_LAMBDA(tmp_init)
954  {
955 
957 
958  if (Array.D_ptr()->blengths.size() != Array.ncol())
959  throw std::logic_error(
960  "longest should be initialized with a vec of size Array.ncol()."
961  );
962 
963  // Finding the longest branch (or branches) --
964  size_t longest_idx = 0u;
965  double diff = 0.0;
966  data.reserve(Array.ncol());
967  data.push_back(0u);
968  for (size_t ii = 1u; ii < Array.ncol(); ++ii)
969  {
970 
971  diff = Array.D_ptr()->blengths[longest_idx] - Array.D_ptr()->blengths[ii];
972  if (diff > 0.0)
973  continue;
974  else if (diff < 0.0)
975  {
976 
977  data.empty();
978  data.push_back(ii);
979  longest_idx = ii;
980 
981  }
982  else if (diff == 0.0)
983  data.push_back(ii);
984 
985  }
986 
987  data.shrink_to_fit();
988 
989  if (data.size() == 0u)
990  throw std::logic_error("The data on the longest branch has size 0.");
991 
992  // Starting the counter, since all in zero, then this will be equal to
993  // the number of functions in 1 x number of longest branches
994  for (size_t ii = 0u; ii < Array.nrow(); ++ii)
995  {
996 
997  if (Array.D_ptr()->states[ii])
998  return (1.0 * static_cast<double>(data.size()));
999 
1000  }
1001 
1002  return 0.0;
1003 
1004  };
1005 
1006  counters->add_counter(
1007  tmp_count, tmp_init, nullptr,
1008  PhyloCounterData({duplication}),
1009  "Longest branch mutates" + get_last_name(duplication)
1010  );
1011 
1012  return;
1013 
1014 }
1015 
1016 //------------------------------------------------------------------------------
1021 inline void counter_neofun(
1023  size_t nfunA,
1024  size_t nfunB,
1025  size_t duplication = Geese::etype_default
1026 )
1027 {
1028 
1029  PHYLO_COUNTER_LAMBDA(tmp_count)
1030  {
1031 
1032  // Is this node duplication?
1033  IF_NOTMATCHES()
1034  return 0.0;
1035 
1036  auto funA = data[1u];
1037  auto funB = data[2u];
1038 
1039  // Is the function in scope relevant?
1040  if ((i != funA) && (i != funB))
1041  return 0.0;
1042 
1043  // Checking if the parent has both functions
1044  size_t other = (i == funA)? funB : funA;
1045  bool parent_i = Array.D_ptr()->states[i];
1046  bool parent_other = Array.D_ptr()->states[other];
1047 
1048  if (!parent_i & !parent_other)
1049  return 0.0;
1050  else if (parent_i & parent_other)
1051  return 0.0;
1052 
1053  // Figuring out which is the first (reference) function
1054  double res = 0.0;
1055 
1056  if (Array(other, j) == 0u)
1057  {
1058 
1059 
1060  for (auto off = 0u; off < Array.ncol(); ++off)
1061  if ((Array(i,off) == 0) && (Array(other,off) == 1))
1062  res += 1.0;
1063 
1064  }
1065  else
1066  {
1067 
1068  for (auto off = 0u; off < Array.ncol(); ++off)
1069  if ((Array(i,off) == 1) && (Array(other,off) == 0))
1070  res -= 1.0;
1071 
1072  }
1073 
1074  return res;
1075 
1076  };
1077 
1078  PHYLO_COUNTER_LAMBDA(tmp_init) {
1079 
1081  return 0.0;
1082 
1083  };
1084 
1085  counters->add_counter(
1086  tmp_count, tmp_init, nullptr,
1087  PhyloCounterData({duplication, nfunA, nfunB}),
1088  "Neofun between " + std::to_string(nfunA) + " and " +
1089  std::to_string(nfunB) + get_last_name(duplication)
1090  );
1091 
1092  return;
1093 
1094 }
1095 
1096 //------------------------------------------------------------------------------
1104  size_t nfunA,
1105  size_t duplication = Geese::etype_default
1106 )
1107 {
1108 
1109  PHYLO_COUNTER_LAMBDA(tmp_count)
1110  {
1111 
1112  // Is this node duplication?
1113  IF_NOTMATCHES()
1114  return 0.0;
1115 
1116  // Is the function in scope relevant?
1117  if (i != data[1u])
1118  return 0.0;
1119 
1120  // Checking if the parent has the function
1121  if (Array.D_ptr()->states[i])
1122  return 0.0;
1123 
1124  // Figuring out which is the first (reference) function
1125  double res = 0.0;
1126  for (auto off = 0u; off < Array.ncol(); ++off)
1127  {
1128 
1129  if (off == j)
1130  continue;
1131 
1132  if ((Array(i, off) == 0))
1133  res += 1.0;
1134  else
1135  res -= 1.0;
1136 
1137  }
1138 
1139  return res;
1140 
1141  };
1142 
1143  PHYLO_COUNTER_LAMBDA(tmp_init) {
1144 
1146  return 0.0;
1147 
1148  };
1149 
1150  counters->add_counter(
1151  tmp_count, tmp_init, nullptr,
1152  PhyloCounterData({duplication, nfunA}),
1153  "Pairwise neofun function " + std::to_string(nfunA) +
1154  get_last_name(duplication)
1155  );
1156 
1157  return;
1158 
1159 }
1160 
1161 //------------------------------------------------------------------------------
1168  size_t nfunA,
1169  size_t nfunB,
1170  size_t duplication = Geese::etype_default
1171 )
1172 {
1173 
1174  PHYLO_COUNTER_LAMBDA(tmp_count)
1175  {
1176 
1177  // Is this node duplication?
1178  IF_NOTMATCHES()
1179  return 0.0;
1180 
1181  const size_t & funA = data[1u];
1182  const size_t & funB = data[2u];
1183 
1184  // Checking scope
1185  if ((i != funA) && (i != funB))
1186  return 0.0;
1187 
1188  // Checking the parent doesn't have funA or has funB
1189  if (!Array.D_ptr()->states[funA] || Array.D_ptr()->states[funB])
1190  return 0.0;
1191 
1192  double res = 0.0;
1193 
1194  if (i == funA)
1195  {
1196 
1197  if (Array(funB, j) == 0u)
1198  {
1199 
1200  for (auto off = 0u; off < Array.ncol(); ++off)
1201  {
1202 
1203  if (off == j)
1204  continue;
1205 
1206  if ((Array(funA, off) == 0u) && (Array(funB, off) == 1u))
1207  res += 1.0;
1208 
1209  }
1210 
1211  }
1212  else
1213  {
1214 
1215  for (auto off = 0u; off < Array.ncol(); ++off)
1216  {
1217 
1218  if (off == j)
1219  continue;
1220 
1221  if ((Array(funA, off) == 1u) && (Array(funB, off) == 0u))
1222  res -= 1.0;
1223 
1224  }
1225 
1226  }
1227 
1228  }
1229  else
1230  {
1231 
1232  if (Array(funA, j) == 0u)
1233  {
1234 
1235  for (auto off = 0u; off < Array.ncol(); ++off)
1236  {
1237 
1238  if (off == j)
1239  continue;
1240 
1241  if ((Array(funA, off) == 1u) && (Array(funB, off) == 0u))
1242  res += 1.0;
1243 
1244  }
1245 
1246  }
1247  else
1248  {
1249 
1250  for (auto off = 0u; off < Array.ncol(); ++off)
1251  {
1252 
1253  if (off == j)
1254  continue;
1255 
1256  if ((Array(funA, off) == 0u) && (Array(funB, off) == 1u))
1257  res -= 1.0;
1258 
1259  }
1260 
1261  }
1262 
1263  }
1264 
1265  return res;
1266 
1267  };
1268 
1269  PHYLO_COUNTER_LAMBDA(tmp_init)
1270  {
1271 
1273  return 0.0;
1274 
1275  };
1276 
1277  counters->add_counter(
1278  tmp_count, tmp_init, nullptr,
1279  PhyloCounterData({duplication, nfunA, nfunB}),
1280  "Neofun from " + std::to_string(nfunA) + " to " +
1281  std::to_string(nfunB) + get_last_name(duplication)
1282  );
1283 
1284  return;
1285 
1286 }
1287 
1288 // -----------------------------------------------------------------------------
1299 inline void counter_co_opt(
1301  size_t nfunA,
1302  size_t nfunB,
1303  size_t duplication = Geese::etype_default
1304 ) {
1305 
1306  PHYLO_COUNTER_LAMBDA(tmp_count)
1307  {
1308 
1309  // Checking whether this is for duplication or not
1310  IF_NOTMATCHES()
1311  return 0.0;
1312 
1313  const size_t funA = data[1u];
1314  const size_t funB = data[2u];
1315 
1316  // If the change is out of scope, then nothing to do
1317  if ((i != funA) & (i != funB))
1318  return 0.0;
1319 
1320  // If the parent does not have the initial state, then it makes no sense
1321  if ((!Array.D_ptr()->states[funA]) || Array.D_ptr()->states[funB])
1322  return 0.0;
1323 
1324  // Checking whether function A or function B changed
1325  if (i == funA) {
1326 
1327  // What was the state of the other function? If B is present, then
1328  // nothing changes.
1329  if (Array(funB, j, false) == 1u)
1330  return 0.0;
1331 
1332  // Iterating through the sibs
1333  double res = 0.0;
1334  for (auto c = 0u; c < Array.ncol(); ++c)
1335  if ((c != j) && (Array(funA, c, false) == 1u) && (Array(funB, c, false) == 1u))
1336  res += 1.0;
1337 
1338  return res;
1339 
1340  } else {
1341 
1342  // What was the state of the other function? If A is not present, then
1343  // nothing changes.
1344  if (Array(funA, j, false) == 0u)
1345  return 0.0;
1346 
1347  // Iterating through the sibs
1348  double res = 0.0;
1349  for (auto c = 0u; c < Array.ncol(); ++c)
1350  if ((c != j) && (Array(funA, c, false) == 1u))
1351  res += (Array(funB, c, false) == 0u) ? 1.0 : -1.0;
1352 
1353  return res;
1354 
1355  }
1356 
1357 
1358 
1359  };
1360 
1361  PHYLO_COUNTER_LAMBDA(tmp_init) {
1362 
1364  if (data.size() != 3u)
1365  throw std::length_error("The counter data should be of length 2.");
1366 
1367  if (data[1u] == data[2u])
1368  throw std::logic_error("Functions A and B should be different from each other.");
1369 
1370  if (data[1u] >= Array.nrow())
1371  throw std::length_error("Function A in counter out of range.");
1372 
1373  if (data[2u] >= Array.nrow())
1374  throw std::length_error("Function B in counter out of range.");
1375 
1376  return 0.0;
1377 
1378  };
1379 
1380  counters->add_counter(
1381  tmp_count, tmp_init, nullptr,
1382  PhyloCounterData({duplication, nfunA, nfunB}),
1383  "Coopt of " + std::to_string(nfunA) + " by " +
1384  std::to_string(nfunB) + get_last_name(duplication)
1385  );
1386 
1387 
1388  return;
1389 
1390 }
1391 
1392 // -----------------------------------------------------------------------------
1399  size_t k,
1400  size_t duplication = Geese::etype_default
1401 )
1402 {
1403 
1404  PHYLO_COUNTER_LAMBDA(tmp_init)
1405  {
1406 
1408 
1409  IF_NOTMATCHES()
1410  return 0.0;
1411 
1412  // At the beginning, all offspring are zero, so we need to
1413  // find at least one state = true.
1414  for (auto s : Array.D_ptr()->states)
1415  if (s)
1416  return Array.ncol() == data[1u] ? 1.0 : 0.0;
1417 
1418  return data[1u] == 0 ? 1.0 : 0.0;
1419 
1420  };
1421 
1422  PHYLO_COUNTER_LAMBDA(tmp_count)
1423  {
1424 
1425  // Checking the type of event
1426  IF_NOTMATCHES()
1427  return 0.0;
1428 
1429  // How many genes diverge the parent
1430  int count = 0;
1431  bool j_diverges = false;
1432  const auto & par_state = Array.D_ptr()->states;
1433 
1434  int k = static_cast<int>(data[1u]);
1435 
1436  for (auto o = 0u; o < Array.ncol(); ++o)
1437  {
1438 
1439  for (auto f = 0u; f < Array.nrow(); ++f)
1440  {
1441 
1442  // Was the gene annotation different from the parent?
1443  if ((Array(f, o) == 1u) != par_state[f])
1444  {
1445 
1446  if (o == j)
1447  j_diverges = true;
1448 
1449  count++;
1450  break;
1451 
1452  }
1453 
1454  }
1455 
1456  }
1457 
1458  // Counts will only be relevant if (count - k) > 1. Otherwise,
1459  // having the j gene changed is not relevant
1460  if (std::abs(count - k) > 1)
1461  return 0.0;
1462 
1463  // Did it used to diverge?
1464  bool j_used_to_diverge = false;
1465  for (auto f = 0u; f < Array.nrow(); ++f)
1466  {
1467 
1468  if (f == i)
1469  {
1470  if (par_state[f]) // Since it is now true, it used to diverge
1471  {
1472  j_used_to_diverge = true;
1473  break;
1474  }
1475  }
1476  else
1477  {
1478 
1479  if (par_state[f] != (Array(f,j) == 1u))
1480  {
1481  j_used_to_diverge = true;
1482  break;
1483  }
1484 
1485  }
1486 
1487  }
1488 
1489  auto count_prev = count;
1490  // Case 1: j hasn't changed
1491  if ((!j_used_to_diverge & !j_diverges) | (j_used_to_diverge & j_diverges))
1492  return 0.0;
1493  // Case 2: j NOW diverges
1494  else if (j_diverges)
1495  count_prev--;
1496  // Case 3: j USED to diverge
1497  else
1498  count_prev++;
1499 
1500  return (count == k ? 1.0 : 0.0) - (count_prev == k ? 1.0 : 0.0);
1501 
1502  };
1503 
1504  counters->add_counter(
1505  tmp_count, tmp_init, nullptr,
1506  PhyloCounterData({duplication, k}),
1507  std::to_string(k) + " genes changing" + get_last_name(duplication)
1508  );
1509 
1510 }
1511 
1512 // -----------------------------------------------------------------------------
1519  double p,
1520  size_t duplication = Geese::etype_default
1521 )
1522 {
1523 
1524  PHYLO_COUNTER_LAMBDA(tmp_init)
1525  {
1526 
1528 
1529  IF_NOTMATCHES()
1530  return 0.0;
1531 
1532  for (auto s : Array.D_ptr()->states)
1533  if (s)
1534  return data[1u] == 100 ? 1.0 : 0.0;
1535 
1536  // Only one if it was specified it was zero
1537  return 1.0;
1538 
1539  };
1540 
1541  PHYLO_COUNTER_LAMBDA(tmp_count)
1542  {
1543 
1544  // Checking the type of event
1545  IF_NOTMATCHES()
1546  return 0.0;
1547 
1548  // Setup
1549  double count = 0.0;
1550 
1551  bool j_diverges = false;
1552  const std::vector< bool > & par_state = Array.D_ptr()->states;
1553 
1554  for (size_t o = 0u; o < Array.ncol(); ++o)
1555  {
1556 
1557  for (size_t f = 0u; f < Array.nrow(); ++f)
1558  {
1559 
1560  // Was the gene annotation different from the parent?
1561  if ((Array(f, o) == 1u) != par_state[f])
1562  {
1563 
1564  if (o == j)
1565  j_diverges = true;
1566 
1567  count += 1.0;
1568  break;
1569 
1570  }
1571 
1572  }
1573 
1574  }
1575 
1576 
1577  bool j_used_to_diverge = false;
1578  for (size_t f = 0u; f < Array.nrow(); ++f)
1579  {
1580 
1581  if (f == i)
1582  {
1583  if (par_state[f])
1584  {
1585  j_used_to_diverge = true;
1586  break;
1587  }
1588  }
1589  else
1590  {
1591 
1592  if (par_state[f] != (Array(f,j) == 1u))
1593  {
1594  j_used_to_diverge = true;
1595  break;
1596  }
1597 
1598  }
1599 
1600  }
1601 
1602  auto count_prev = count;
1603  // Case 1: j hasn't changed
1604  if ((!j_used_to_diverge & !j_diverges) | (j_used_to_diverge & j_diverges))
1605  return 0.0;
1606  // Case 2: j NOW diverges
1607  else if (j_diverges)
1608  count_prev -= 1.0;
1609  // Case 3: j USED to diverge
1610  else
1611  count_prev += 1.0;
1612 
1613  double ncol = static_cast<double>(Array.ncol());
1614  double p = static_cast<double>(data[1u]) / 100.0;
1615 
1616  return ((count/ncol) <= p ? 1.0 : 0.0) - ((count_prev/ncol) <= p ? 1.0 : 0.0);
1617 
1618  };
1619 
1620  counters->add_counter(
1621  tmp_count, tmp_init, nullptr,
1622  PhyloCounterData({duplication, static_cast<size_t>(p * 100)}),
1623  std::to_string(p) + " prop genes changing" + get_last_name(duplication)
1624  );
1625 
1626 }
1627 
1628 // -----------------------------------------------------------------------------
1635  std::vector< size_t > nfun,
1636  size_t duplication = Geese::etype_default
1637 )
1638 {
1639 
1640  PHYLO_COUNTER_LAMBDA(tmp_count)
1641  {
1642 
1643  IF_NOTMATCHES()
1644  return 0.0;
1645 
1646  // All must be false
1647  for (auto s : Array.D_ptr()->states)
1648  {
1649 
1650  if (s)
1651  return 0.0;
1652 
1653  }
1654 
1655  // Is this the function?
1656  if (i != data[1u])
1657  return 0.0;
1658 
1659  // Now computing the change stats
1660  double res = static_cast<double>(Array.ncol()) - 1.0;
1661  for (auto off = 0u; off < Array.ncol(); ++off)
1662  {
1663  if (off == j)
1664  continue;
1665 
1666  if (Array(i, off) == 1u)
1667  res -= 2.0;
1668  }
1669 
1670 
1671  return res;
1672 
1673  };
1674 
1675  PHYLO_COUNTER_LAMBDA(tmp_init) {
1676 
1678  return 0.0;
1679 
1680  };
1681 
1682  for (auto& i : nfun)
1683  counters->add_counter(
1684  tmp_count, tmp_init, nullptr,
1685  PhyloCounterData({duplication, i}),
1686  "First gain " + std::to_string(i) +
1687  get_last_name(duplication)
1688  );
1689 
1690  return;
1691 
1692 }
1693 
1694 // -----------------------------------------------------------------------------
1701  size_t duplication = Geese::etype_default
1702 )
1703 {
1704 
1705  PHYLO_COUNTER_LAMBDA(tmp_count)
1706  {
1707 
1708  IF_NOTMATCHES()
1709  return 0.0;
1710 
1711  // All must be false
1712  for (auto s : Array.D_ptr()->states)
1713  {
1714 
1715  if (s)
1716  return 0.0;
1717 
1718  }
1719 
1720  return 1.0;
1721 
1722  };
1723 
1724  PHYLO_COUNTER_LAMBDA(tmp_init) {
1725 
1727  return 0.0;
1728 
1729  };
1730 
1731  counters->add_counter(
1732  tmp_count, tmp_init, nullptr,
1733  PhyloCounterData({duplication}),
1734  "Overall first gains" +
1735  get_last_name(duplication)
1736  );
1737 
1738  return;
1739 
1740 }
1741 
1742 // -----------------------------------------------------------------------------
1749  size_t duplication = Geese::etype_default
1750 )
1751 {
1752 
1753  PHYLO_COUNTER_LAMBDA(tmp_count)
1754  {
1755 
1756  IF_NOTMATCHES()
1757  return 0.0;
1758 
1759  size_t funpar = Array.D_ptr()->states[i] == 1u;
1760 
1761  // All must be false
1762  double res = 0.0;
1763  for (auto off = 0u; off < Array.ncol(); ++off)
1764  {
1765  if (off == j)
1766  continue;
1767 
1768  if (funpar > Array(i, off))
1769  res -= 1.0;
1770  else if (funpar < Array(i, off))
1771  res += 1.0;
1772  }
1773 
1774  return res;
1775 
1776  };
1777 
1778  PHYLO_COUNTER_LAMBDA(tmp_init) {
1779 
1781 
1782  IF_NOTMATCHES()
1783  return 0.0;
1784 
1785  double res = 0.0;
1786  double n = static_cast<double>(Array.ncol());
1787  for (auto s : Array.D_ptr()->states)
1788  if (s)
1789  res += n * (n - 1.0) / 2.0;
1790 
1791  return res;
1792 
1793  };
1794 
1795  counters->add_counter(
1796  tmp_count, tmp_init, nullptr,
1797  PhyloCounterData({duplication}),
1798  "Pairs of genes changing" +
1799  get_last_name(duplication)
1800  );
1801 
1802  return;
1803 
1804 }
1805 
1806 // -----------------------------------------------------------------------------
1814  size_t nfunA,
1815  size_t nfunB,
1816  size_t duplication = Geese::etype_default
1817 )
1818 {
1819 
1820  PHYLO_COUNTER_LAMBDA(tmp_count)
1821  {
1822 
1823  IF_NOTMATCHES()
1824  return 0.0;
1825 
1826  // Not in the scope
1827  auto funA = data[1u];
1828  auto funB = data[2u];
1829  if ((funA != i) && (funB != i))
1830  return 0.0;
1831 
1832  size_t k = (funA == i) ? funB : funA;
1833 
1834  bool parent_i = Array.D_ptr()->states[i];
1835  bool parent_k = Array.D_ptr()->states[k];
1836 
1837  // if (!parent_i & !parent_k)
1838  // return 0.0;
1839 
1840  double res = 0.0;
1841  // Case 1: (0,0)
1842  if (!parent_i & !parent_k)
1843  {
1844 
1845  if (Array(k, j) == 1u)
1846  return 0.0;
1847 
1848  for (auto off = 0u; off < Array.ncol(); ++off)
1849  {
1850 
1851  if (off == j)
1852  continue;
1853 
1854  if ((Array(i, off) == 0u) && (Array(k, off) == 0u))
1855  res -= 1.0;
1856 
1857  }
1858 
1859  }
1860  else if (parent_i & !parent_k)
1861  {
1862 
1863  if (Array(k, j) == 1u)
1864  return 0.0;
1865 
1866  for (auto off = 0u; off < Array.ncol(); ++off)
1867  {
1868 
1869  if (off == j)
1870  continue;
1871 
1872  if ((Array(i, off) == 1u) && (Array(k, off) == 0u))
1873  res += 1.0;
1874 
1875  }
1876 
1877  }
1878  else if (!parent_i & parent_k)
1879  {
1880 
1881  if (Array(k, j) == 0u)
1882  return 0.0;
1883 
1884  for (auto off = 0u; off < Array.ncol(); ++off)
1885  {
1886 
1887  if (off == j)
1888  continue;
1889 
1890  if ((Array(i, off) == 0u) && (Array(k, off) == 1u))
1891  res += 1.0;
1892 
1893  }
1894 
1895  }
1896  else
1897  {
1898 
1899  if (Array(k, j) == 0u)
1900  return 0.0;
1901 
1902  for (auto off = 0u; off < Array.ncol(); ++off)
1903  {
1904 
1905  if (off == j)
1906  continue;
1907 
1908  if ((Array(i, off) == 1u) && (Array(k, off) == 1u))
1909  res += 1.0;
1910 
1911  }
1912  }
1913 
1914  return res;
1915 
1916  };
1917 
1918  PHYLO_COUNTER_LAMBDA(tmp_init) {
1919 
1920 
1921  IF_NOTMATCHES()
1922  return 0.0;
1923 
1925 
1926  double n = static_cast< double >(Array.ncol());
1927  if (!Array.D_ptr()->states[data[1u]] && !Array.D_ptr()->states[data[2u]])
1928  return n * (n - 1.0) / 2.0;
1929 
1930  return 0.0;
1931 
1932  };
1933 
1934  counters->add_counter(
1935  tmp_count, tmp_init, nullptr,
1936  PhyloCounterData({duplication, nfunA, nfunB}),
1937  "Pariwise preserve (" + std::to_string(nfunA) + ", " +
1938  std::to_string(nfunB) + ")" +get_last_name(duplication)
1939  );
1940 
1941  return;
1942 
1943 }
1944 
1945 // -----------------------------------------------------------------------------
1953  size_t nfunA,
1954  size_t nfunB,
1955  size_t duplication = Geese::etype_default
1956 )
1957 {
1958 
1959  PHYLO_COUNTER_LAMBDA(tmp_count)
1960  {
1961 
1962  IF_NOTMATCHES()
1963  return 0.0;
1964 
1965  // Not in the scope
1966  auto funA = data[1u];
1967  auto funB = data[2u];
1968  if ((funA != i) && (funB != i))
1969  return 0.0;
1970 
1971  size_t k = (funA == i) ? funB : funA;
1972 
1973  double res = 0.0;
1974  if (Array(k, j) == 1)
1975  {
1976 
1977  for (auto off = 0u; off < Array.ncol(); ++off)
1978  {
1979  if (off == j)
1980  continue;
1981 
1982  if ((Array(i,off) == 0u) && (Array(k,off) == 0u))
1983  res -= 1.0;
1984  }
1985 
1986  }
1987  else
1988  {
1989 
1990  for (auto off = 0u; off < Array.ncol(); ++off)
1991  {
1992 
1993  if (off == j)
1994  continue;
1995 
1996  if ((Array(i, off) == 1u))
1997  {
1998 
1999  // j: (0,0)\‍(1,0) -> (1,0)\‍(1,0), so less 1
2000  if (Array(k, off) == 0u)
2001  res -= 1.0;
2002 
2003  }
2004  else
2005  {
2006 
2007  if (Array(k, off) == 1u)
2008  // j: (0,0)\‍(0,1) -> (1,0)\‍(0,1), so less 1
2009  res -= 1.0;
2010  else
2011  // j: (0,0)\‍(0,0) -> (1,0)\‍(0,0), so plus 1
2012  res += 1.0;
2013 
2014  }
2015 
2016  }
2017 
2018  }
2019 
2020 
2021  return res;
2022 
2023  };
2024 
2025  PHYLO_COUNTER_LAMBDA(tmp_init) {
2026 
2028 
2029  return 0.0;
2030 
2031  };
2032 
2033  counters->add_counter(
2034  tmp_count, tmp_init, nullptr,
2035  PhyloCounterData({duplication, nfunA, nfunB}),
2036  "First gain (either " + std::to_string(nfunA) + " or " +
2037  std::to_string(nfunB) + ")" +get_last_name(duplication)
2038  );
2039 
2040  return;
2041 
2042 }
2043 
2045 
2052 inline void rule_leafs(
2053  PhyloSupport * support
2054 ) {
2055 
2056  PHYLO_RULE_LAMBDA(tmp_rule)
2057  {
2058  if (Array.D().has_leaf)
2059  {
2060  return Array(i, j) != 9u;
2061  }
2062 
2063  return true;
2064  };
2065 
2066  support->get_rules()->add_rule(
2067  tmp_rule,
2068  PhyloRuleData(),
2069  "Fix annotated leafs",
2070  "Reduces the support by fixing the cells of annotated leafs."
2071  );
2072 
2073  return;
2074 
2075 }
2076 
2077 
2088  PhyloSupport * support,
2089  size_t pos,
2090  size_t lb,
2091  size_t ub,
2092  size_t duplication = Geese::etype_default
2093 )
2094 {
2095 
2096  PHYLO_RULE_DYN_LAMBDA(tmp_rule)
2097  {
2098 
2099  size_t rule_type = data.duplication;
2100  if (rule_type != Geese::etype_either)
2101  {
2102 
2103  if (Array.D_ptr()->duplication & (rule_type != Geese::etype_duplication))
2104  return true;
2105  else if (!Array.D_ptr()->duplication & (rule_type != Geese::etype_speciation))
2106  return true;
2107 
2108  }
2109 
2110  if (data() < data.lb)
2111  return false;
2112  else if (data() > data.ub)
2113  return false;
2114  else
2115  return true;
2116 
2117  };
2118 
2119  // Checking whether the rule makes sense (dupl)
2120  if (duplication != Geese::etype_either)
2121  {
2122  if (support->get_counters()->operator[](pos).data[0] != duplication)
2123  throw std::logic_error(
2124  "The rule is not compatible with the duplication type of the model." +
2125  std::string("The rule is for ") + get_last_name(duplication) +
2126  std::string(" but the term is for ") + get_last_name(
2127  support->get_counters()->operator[](pos).data[0]
2128  )
2129  );
2130  }
2131 
2132  support->get_rules_dyn()->add_rule(
2133  tmp_rule,
2135  support->get_current_stats(),
2136  pos, lb, ub, duplication
2137  ),
2138  std::string("Limiting changes in '") +
2139  support->get_counters()->get_names()[pos] +
2140  "' to [" + std::to_string(lb) + ", " +
2141  std::to_string(ub) + std::string("]") +
2142  get_last_name(duplication),
2143  std::string("When the support is ennumerated, the number of changes in '") +
2144  support->get_counters()->get_names()[pos] +
2145  std::to_string(pos) + "' is limited to [" + std::to_string(lb) + ", " +
2146  std::to_string(ub) + "]" +
2147  get_last_name(duplication)
2148  );
2149 
2150  return;
2151 
2152 }
2153 
2155 
2156 #undef MAKE_DUPL_VARS
2157 #undef IS_EITHER
2158 #undef IS_DUPLICATION
2159 #undef IS_SPECIATION
2160 #undef IF_MATCHES
2161 #undef IF_NOTMATCHES
2162 
2163 
2164 #endif
static const size_t etype_either
static const size_t etype_duplication
static const size_t etype_speciation
static const size_t etype_default
return res
Data_Type &&counter_ data(std::move(counter_.data))
size_t size_t j
size_t i
barry::Counters< PhyloArray, PhyloCounterData > PhyloCounters
std::vector< std::pair< size_t, size_t > > PhyloRuleData
barry::Support< PhyloArray, PhyloCounterData, PhyloRuleData, PhyloRuleDynData > PhyloSupport
void counter_overall_gains(PhyloCounters *counters, size_t duplication=Geese::etype_default)
Overall functional gains.
Definition: counters.hpp:61
void counter_neofun_a2b(PhyloCounters *counters, size_t nfunA, size_t nfunB, size_t duplication=Geese::etype_default)
Total number of neofunctionalization events.
Definition: counters.hpp:1166
void counter_k_genes_changing(PhyloCounters *counters, size_t k, size_t duplication=Geese::etype_default)
Indicator function. Equals to one if genes changed and zero otherwise.
Definition: counters.hpp:1397
void counter_overall_changes(PhyloCounters *counters, size_t duplication=Geese::etype_default)
Total number of changes. Use this statistic to account for "preservation".
Definition: counters.hpp:646
void counter_subfun(PhyloCounters *counters, size_t nfunA, size_t nfunB, size_t duplication=Geese::etype_default)
Total count of Sub-functionalization events.
Definition: counters.hpp:705
void counter_genes_changing(PhyloCounters *counters, size_t duplication=Geese::etype_default)
Keeps track of how many genes are changing (either 0, 1, or 2 if dealing with regular trees....
Definition: counters.hpp:231
void counter_co_opt(PhyloCounters *counters, size_t nfunA, size_t nfunB, size_t duplication=Geese::etype_default)
Function co-opting.
Definition: counters.hpp:1299
#define PHYLO_RULE_DYN_LAMBDA(a)
Definition: counters.hpp:42
#define IF_NOTMATCHES()
Definition: counters.hpp:22
void counter_loss(PhyloCounters *counters, std::vector< size_t > nfun, size_t duplication=Geese::etype_default)
Total count of losses for an specific function.
Definition: counters.hpp:594
#define PHYLO_CHECK_MISSING()
Definition: counters.hpp:45
void counter_prop_genes_changing(PhyloCounters *counters, size_t duplication=Geese::etype_default)
Keeps track of how many genes are changing (either 0, 1, or 2 if dealing with regular trees....
Definition: counters.hpp:382
void counter_gains(PhyloCounters *counters, std::vector< size_t > nfun, size_t duplication=Geese::etype_default)
Functional gains for a specific function (nfun).
Definition: counters.hpp:99
void counter_pairwise_overall_change(PhyloCounters *counters, size_t duplication=Geese::etype_default)
Used when all the functions are in 0 (like the root node prob.)
Definition: counters.hpp:1747
void counter_gains_from_0(PhyloCounters *counters, std::vector< size_t > nfun, size_t duplication=Geese::etype_default)
Used when all the functions are in 0 (like the root node prob.)
Definition: counters.hpp:1633
void counter_overall_loss(PhyloCounters *counters, size_t duplication=Geese::etype_default)
Overall functional loss.
Definition: counters.hpp:484
void counter_cogain(PhyloCounters *counters, size_t nfunA, size_t nfunB, size_t duplication=Geese::etype_default)
Co-evolution (joint gain or loss)
Definition: counters.hpp:794
void counter_less_than_p_prop_genes_changing(PhyloCounters *counters, double p, size_t duplication=Geese::etype_default)
Indicator function. Equals to one if genes changed and zero otherwise.
Definition: counters.hpp:1517
void counter_pairwise_neofun_singlefun(PhyloCounters *counters, size_t nfunA, size_t duplication=Geese::etype_default)
Total number of neofunctionalization events sum_u sum_{w < u} [x(u,a)*(1 - x(w,a)) + (1 - x(u,...
Definition: counters.hpp:1102
void counter_preserve_pseudogene(PhyloCounters *counters, size_t nfunA, size_t nfunB, size_t duplication=Geese::etype_default)
Keeps track of how many pairs of genes preserve pseudostate.
Definition: counters.hpp:300
void counter_pairwise_preserving(PhyloCounters *counters, size_t nfunA, size_t nfunB, size_t duplication=Geese::etype_default)
Used when all the functions are in 0 (like the root node prob.)
Definition: counters.hpp:1812
#define IF_MATCHES()
Definition: counters.hpp:20
std::string get_last_name(size_t d)
Definition: counters.hpp:48
void counter_overall_gains_from_0(PhyloCounters *counters, size_t duplication=Geese::etype_default)
Used when all the functions are in 0 (like the root node prob.)
Definition: counters.hpp:1699
void counter_longest(PhyloCounters *counters, size_t duplication=Geese::etype_default)
Longest branch mutates (either by gain or by loss)
Definition: counters.hpp:851
void counter_gains_k_offspring(PhyloCounters *counters, std::vector< size_t > nfun, size_t k=1u, size_t duplication=Geese::etype_default)
k genes gain function nfun
Definition: counters.hpp:159
#define PHYLO_COUNTER_LAMBDA(a)
Definition: counters.hpp:39
#define PHYLO_RULE_LAMBDA(a)
Extension of a simple counter.
Definition: counters.hpp:36
void counter_neofun(PhyloCounters *counters, size_t nfunA, size_t nfunB, size_t duplication=Geese::etype_default)
Total number of neofunctionalization events.
Definition: counters.hpp:1021
void counter_maxfuns(PhyloCounters *counters, size_t lb, size_t ub, size_t duplication=Geese::etype_default)
Cap the number of functions per gene.
Definition: counters.hpp:532
void counter_pairwise_first_gain(PhyloCounters *counters, size_t nfunA, size_t nfunB, size_t duplication=Geese::etype_default)
Used when all the functions are in 0 (like the root node prob.)
Definition: counters.hpp:1951
void rule_dyn_limit_changes(PhyloSupport *support, size_t pos, size_t lb, size_t ub, size_t duplication=Geese::etype_default)
Overall functional gains.
Definition: counters.hpp:2087
void rule_leafs(PhyloSupport *support)
Definition: counters.hpp:2052