barry: Your go-to motif accountant  0.0-1
Full enumeration of sample space and fast count of sufficient statistics for binary arrays
typedefs.hpp
Go to the documentation of this file.
1 #ifndef BARRY_TYPEDEFS_HPP
2 #define BARRY_TYPEDEFS_HPP 1
3 
4 // Configuration ---------------------------------------------------------------
6 
7 // Debug
8 #include "barry-debug.hpp"
9 
10 // Progress bar
11 #include "progress.hpp"
12 
13 // -----------------------------------------------------------------------------
14 
15 // Basic types
16 // See this thread
17 // https://stackoverflow.com/questions/35055042/difference-between-size_t8-t-size_t-fast8-t-and-size_t-least8-t
18 
19 // Mostly relevant for the BArray definition -----------------------------------
20 
21 // Constants
26 namespace CHECK {
27  const int BOTH = -1;
28  const int NONE = 0;
29  const int ONE = 1;
30  const int TWO = 2;
31 }
32 
37 namespace EXISTS {
38  const int BOTH = -1;
39  const int NONE = 0;
40  const int ONE = 1;
41  const int TWO = 1;
42 
43  const int UKNOWN = -1;
44  const int AS_ZERO = 0;
45  const int AS_ONE = 1;
46 }
47 
48 /***
49  * A single count
50  */
51 typedef std::vector< std::pair< std::vector<double>, size_t > > Counts_type;
52 
53 // class Counts_type
54 // {
55 // private:
56 // std::vector< std::size_t_fast32_t > stats_counts;
57 // std::vector< double > stats_values;
58 // size_t n_stats;
59 // size_t n_obs;
60 // public:
61 // std::vector< double > operator()
62 // }
63 
64 template <class Type_A > class Cell;
65 
66 template<typename Cell_Type>
68 
69 template<typename Cell_Type>
71 
77 template<typename Cell_Type>
78 class Entries {
79 public:
80  std::vector< size_t > source;
81  std::vector< size_t > target;
82  std::vector< Cell_Type > val;
83 
84  Entries() : source(0u), target(0u), val(0u) {};
85  Entries(size_t n) {
86  source.reserve(n);
87  target.reserve(n);
88  val.reserve(n);
89  return;
90  };
91 
92  ~Entries() {};
93 
94  void resize(size_t n) {
95  source.resize(n);
96  target.resize(n);
97  val.resize(n);
98  return;
99  }
100 
101 };
102 
103 // Relevant for anything using vecHasher function ------------------------------
104 template <typename T>
105 struct vecHasher
106 {
107 
108  std::size_t operator()(std::vector< T > const& dat) const noexcept
109  {
110 
111  std::hash< T > hasher;
112  std::size_t hash = hasher(dat[0u]);
113 
114  // ^ makes bitwise XOR
115  // 0x9e3779b9 is a 32 bit constant (comes from the golden ratio)
116  // << is a shift operator, something like lhs * 2^(rhs)
117  if (dat.size() > 1u)
118  for (size_t i = 1u; i < dat.size(); ++i)
119  hash ^= hasher(dat[i]) + 0x9e3779b9 + (hash<<6) + (hash>>2);
120 
121  return hash;
122 
123  }
124 
125 };
126 
127 template<typename Ta = double, typename Tb = size_t>
128 using MapVec_type = std::unordered_map< std::vector< Ta >, Tb, vecHasher<Ta>>;
129 
141 inline std::vector< size_t > sort_array(
142  const double * v,
143  size_t start,
144  size_t ncols,
145  size_t nrows
146  ) {
147 
148  // initialize original index locations
149  std::vector<size_t> idx(nrows);
150  std::iota(idx.begin(), idx.end(), 0);
151 
152  std::sort(idx.begin(), idx.end(),
153  [&v,nrows,ncols,start](size_t i1, size_t i2) {
154 
155  for (size_t j = 0u; j < ncols; ++j)
156  {
157  if (*(v + (nrows * j + i1+start)) == *(v + (nrows * j + i2 + start)))
158  continue;
159  else
160  return *(v + (nrows * j + i1+start)) < *(v + (nrows * j + i2 + start));
161  }
162 
163  return false;
164  });
165 
166  return idx;
167 
168 }
169 
170 
171 // Mostly relevant in the case of the stats count functions -------------------
172 template <typename Cell_Type, typename Data_Type> class BArray;
173 template <typename Array_Type, typename Counter_Type> class Counter;
174 template <typename Cell_Type, typename Data_Type> class BArrayDense;
175 
186 template <typename Array_Type, typename Data_Type>
187 using Counter_fun_type = std::function<double(const Array_Type &, size_t, size_t, Data_Type &)>;
188 
189 template <typename Array_Type, typename Data_Type>
190 using Rule_fun_type = std::function<bool(const Array_Type &, size_t, size_t, Data_Type &)>;
192 
199 template <typename Array_Type, typename Data_Type>
200 using Hasher_fun_type = std::function<std::vector<double>(const Array_Type &, Data_Type *)>;
201 
202 // Misc ------------------------------------------------------------------------
208 template <typename T>
210 inline bool vec_equal(
211  const std::vector< T > & a,
212  const std::vector< T > & b
213 ) {
214 
215  if (a.size() != b.size())
216  {
217 
218  std::string err = "-a- and -b- should have the same length. length(a) = " +
219  std::to_string(a.size()) + " and length(b) = " + std::to_string(b.size()) +
220  std::string(".");
221  throw std::length_error(err);
222 
223  }
224 
225  size_t i = 0;
226  while (a[i] == b[i]) {
227  if (++i == a.size())
228  return true;
229  }
230 
231  return false;
232 }
233 
234 template <typename T>
235 inline bool vec_equal_approx(
236  const std::vector< T > & a,
237  const std::vector< T > & b,
238  double eps = 1e-100
239 ) {
240 
241  if (a.size() != b.size())
242  {
243  std::string err = "-a- and -b- should have the same length. length(a) = " +
244  std::to_string(a.size()) + " and length(b) = " + std::to_string(b.size()) +
245  std::string(".");
246  throw std::length_error(err);
247  }
248 
249  size_t i = 0;
250  while (static_cast<double>(std::fabs(a[i] - b[i])) < eps) {
251  if (++i == a.size())
252  return true;
253  }
254 
255  return false;
256 }
258 
259 #if defined(__OPENMP) || defined(_OPENMP)
260 #pragma omp declare simd
261 #endif
262 template <typename T>
263 inline T vec_inner_prod(
264  const T * a,
265  const T * b,
266  size_t n
267 ) {
268 
269  double res = 0.0;
270  #if defined(__OPENMP) || defined(_OPENMP)
271  #pragma omp simd reduction(+:res)
272  #elif defined(__GNUC__) && !defined(__clang__)
273  #pragma GCC ivdep
274  #endif
275  for (size_t i = 0u; i < n; ++i)
276  res += (*(a + i) * *(b + i));
277 
278  return res;
279 
280 }
281 
282 #if defined(__OPENMP) || defined(_OPENMP)
283 #pragma omp declare simd
284 #endif
285 template <>
286 inline double vec_inner_prod(
287  const double * a,
288  const double * b,
289  size_t n
290 ) {
291 
292  double res = 0.0;
293  #if defined(__OPENMP) || defined(_OPENMP)
294  #pragma omp simd reduction(+:res)
295  #elif defined(__GNUC__) && !defined(__clang__)
296  #pragma GCC ivdep
297  #endif
298  for (size_t i = 0u; i < n; ++i)
299  res += (*(a + i) * *(b + i));
300 
301  return res;
302 
303 }
304 
305 #endif
306 
std::map< Ta, Tb > Map
Baseline class for binary arrays.
Baseline class for binary arrays.
Entries in BArray. For now, it only has two members:
Definition: cell-bones.hpp:10
A counter function based on change statistics.
A wrapper class to store source, target, val from a BArray object.
Definition: typedefs.hpp:78
void resize(size_t n)
Definition: typedefs.hpp:94
std::vector< size_t > target
Definition: typedefs.hpp:81
Entries()
Definition: typedefs.hpp:84
Entries(size_t n)
Definition: typedefs.hpp:85
std::vector< size_t > source
Definition: typedefs.hpp:80
std::vector< Cell_Type > val
Definition: typedefs.hpp:82
~Entries()
Definition: typedefs.hpp:92
return res
Data_Type hasher(counter_.hasher)
size_t i
Data_Type &&counter_ noexcept
Integer constants used to specify which cell should be check.
Definition: typedefs.hpp:26
const int TWO
Definition: typedefs.hpp:30
const int NONE
Definition: typedefs.hpp:28
const int BOTH
Definition: typedefs.hpp:27
const int ONE
Definition: typedefs.hpp:29
Integer constants used to specify which cell should be check to exist or not.
Definition: typedefs.hpp:37
const int AS_ZERO
Definition: typedefs.hpp:44
const int BOTH
Definition: typedefs.hpp:38
const int NONE
Definition: typedefs.hpp:39
const int ONE
Definition: typedefs.hpp:40
const int AS_ONE
Definition: typedefs.hpp:45
const int UKNOWN
Definition: typedefs.hpp:43
const int TWO
Definition: typedefs.hpp:41
std::size_t operator()(std::vector< T > const &dat) const noexcept
Definition: typedefs.hpp:108
std::unordered_map< std::vector< Ta >, Tb, vecHasher< Ta > > MapVec_type
Definition: typedefs.hpp:128
bool vec_equal(const std::vector< T > &a, const std::vector< T > &b)
Compares if -a- and -b- are equal.
Definition: typedefs.hpp:210
std::vector< std::pair< std::vector< double >, size_t > > Counts_type
Definition: typedefs.hpp:51
Map< size_t, Cell< Cell_Type > > Row_type
Definition: typedefs.hpp:67
std::function< bool(const Array_Type &, size_t, size_t, Data_Type &)> Rule_fun_type
Definition: typedefs.hpp:190
std::function< std::vector< double >(const Array_Type &, Data_Type *)> Hasher_fun_type
Hasher function used by the counter.
Definition: typedefs.hpp:200
std::vector< size_t > sort_array(const double *v, size_t start, size_t ncols, size_t nrows)
Ascending sorting an array.
Definition: typedefs.hpp:141
T vec_inner_prod(const T *a, const T *b, size_t n)
Definition: typedefs.hpp:263
std::function< double(const Array_Type &, size_t, size_t, Data_Type &)> Counter_fun_type
Counter and rule functions.
Definition: typedefs.hpp:187
Map< size_t, Cell< Cell_Type > * > Col_type
Definition: typedefs.hpp:70
bool vec_equal_approx(const std::vector< T > &a, const std::vector< T > &b, double eps=1e-100)
Definition: typedefs.hpp:235