barry: Your go-to motif accountant  0.0-1
Full enumeration of sample space and fast count of sufficient statistics for binary arrays
formula.hpp
Go to the documentation of this file.
1 #ifndef BARRY_DEFM_MOTIF_FORMULA_HPP
2 #define BARRY_DEFM_MOTIF_FORMULA_HPP
46 inline void defm_motif_parser(
47  std::string formula,
48  std::vector< size_t > & locations,
49  std::vector< bool > & signs,
50  size_t m_order,
51  size_t y_ncol,
52  std::string & covar_name,
53  std::string & vname
54 )
55 {
56  // Resetting the results
57  locations.clear();
58  signs.clear();
59 
60  std::regex pattern_intercept(
61  std::string("\\{\\s*[01]?y[0-9]+(_[0-9]+)?(\\s*,\\s*[01]?y[0-9]+(_[0-9]+)?)*\\s*\\}") +
62  std::string("(\\s*x\\s*[^\\s]+([(].+[)])?\\s*)?")
63  );
64  std::regex pattern_transition(
65  std::string("\\{\\s*[01]?y[0-9]+(_[0-9]+)?(\\s*,\\s*[01]?y[0-9]+(_[0-9]+)?)*\\}\\s*(>)\\s*") +
66  std::string("\\{\\s*[01]?y[0-9]+(_[0-9]+)?(\\s*,\\s*[01]?y[0-9]+(_[0-9]+)?)*\\s*\\}") +
67  std::string("(\\s*x\\s*[^\\s]+([(].+[)])?\\s*)?")
68  );
69 
70  auto empty = std::sregex_iterator();
71 
72  // This column-major vector indicates true if the variable has already been
73  // selected
74  std::vector< bool > selected((m_order + 1) * y_ncol, false);
75 
76  std::smatch match;
77  std::regex_match(formula, match, pattern_transition);
78  if (!match.empty())
79  {
80 
81  if (m_order == 0)
82  throw std::logic_error("Transition effects are only valid when the data is a markov process.");
83 
84  // Matching the pattern '| [no spaces]$'
85  std::regex pattern_conditional(".+[}]\\s+x\\s+([^(]+)([(][^)]+[)])?\\s*$");
86  std::smatch condmatch;
87  std::regex_match(formula, condmatch, pattern_conditional);
88  // Extracting the [no_spaces] part of the conditional
89  if (!condmatch.empty())
90  {
91  covar_name = condmatch[1].str();
92  vname = condmatch[2].str();
93 
94  // Removing starting and ending parenthesis
95  if (vname != "")
96  vname = vname.substr(1, vname.size() - 2);
97 
98  }
99 
100  // Will indicate where the arrow is located at
101  size_t arrow_position = match.position(4u);
102 
103  // This pattern will match
104  std::regex pattern("(0?)y([0-9]+)(_([0-9]+))?");
105 
106  auto iter = std::sregex_iterator(formula.begin(), formula.end(), pattern);
107 
108  for (auto i = iter; i != empty; ++i)
109  {
110 
111  // Baseline position
112  size_t current_location = i->position(0u);
113 
114  // First value true/false
115  bool is_positive = true;
116  if (i->operator[](1u).str() == "0")
117  is_positive = false;
118 
119  // Variable position
120  size_t y_col = std::stoul(i->operator[](2u).str());
121  if (y_col >= y_ncol)
122  throw std::logic_error("The proposed column is out of range.");
123 
124  // Time location
125  size_t y_row;
126  std::string tmp_str = i->operator[](4u).str();
127  if (m_order > 1)
128  {
129  // If missing, we replace with the location
130  if (tmp_str == "")
131  {
132 
133  if (current_location > arrow_position)
134  y_row = m_order;
135  else
136  throw std::logic_error("LHS of transition must specify time when m_order > 1");
137 
138  } else
139  y_row = std::stoul(tmp_str);
140 
141  if (y_row > m_order)
142  throw std::logic_error("The proposed row is out of range.");
143 
144 
145  } else {
146 
147  // If missing, we replace with the location
148  if (tmp_str != "")
149  y_row = std::stoul(tmp_str);
150  else
151  y_row = (current_location < arrow_position ? 0u: 1u);
152 
153  }
154 
155  if (selected[y_col * (m_order + 1) + y_row])
156  throw std::logic_error(
157  "The term " + i->str() + " shows more than once in the formula.");
158 
159  // Only the end of the chain can be located at position after the
160  // arrow
161  if ((current_location > arrow_position) && (y_row != m_order))
162  throw std::logic_error(
163  "Only the row " + std::to_string(m_order) +
164  " can be specified at the RHS of the motif."
165  );
166 
167  selected[y_col * (m_order + 1) + y_row] = true;
168 
169  locations.push_back(y_col * (m_order + 1) + y_row);
170  signs.push_back(is_positive);
171 
172 
173  }
174 
175  return;
176 
177  }
178 
179  std::regex_match(formula, match, pattern_intercept);
180  if (!match.empty())
181  {
182 
183  // Matching the pattern '| [no spaces]$'
184  std::regex pattern_conditional(".+[}]\\s+x\\s+([^(]+)([(][^)]+[)])?\\s*$");
185  std::smatch condmatch;
186  std::regex_match(formula, condmatch, pattern_conditional);
187  // Extracting the [no_spaces] part of the conditional
188  if (!condmatch.empty())
189  {
190  covar_name = condmatch[1].str();
191  vname = condmatch[2].str();
192 
193  // Removing starting and ending parenthesis
194  if (vname != "")
195  vname = vname.substr(1, vname.size() - 2);
196  }
197 
198  // This pattern will match
199  std::regex pattern("(0?)y([0-9]+)(_([0-9]+))?");
200 
201  auto iter = std::sregex_iterator(formula.begin(), formula.end(), pattern);
202 
203  for (auto i = iter; i != empty; ++i)
204  {
205 
206  // First value true/false
207  bool is_positive = true;
208  if (i->operator[](1u).str() == "0")
209  is_positive = false;
210 
211  // Variable position
212  size_t y_col = std::stoul(i->operator[](2u).str());
213  if (y_col >= y_ncol)
214  throw std::logic_error("The proposed column is out of range.");
215 
216  // Time location
217  size_t y_row;
218  if (i->operator[](4u).str() == "") // Assume is the last
219  y_row = m_order;
220  else {
221 
222  y_row = std::stoul(i->operator[](4u).str());
223 
224  if (y_row != m_order)
225  throw std::logic_error(
226  std::string("Intercept motifs cannot feature past events. ") +
227  std::string("Only transition motifs can: {...} > {...}.")
228  );
229 
230  }
231 
232  if (selected[y_col * (m_order + 1) + y_row])
233  throw std::logic_error(
234  "The term " + i->str() + " shows more than once in the formula.");
235 
236  selected[y_col * (m_order + 1) + y_row] = true;
237 
238  locations.push_back(y_col * (m_order + 1) + y_row);
239  signs.push_back(is_positive);
240 
241 
242  }
243 
244  return;
245 
246  }
247 
248  throw std::logic_error(
249  "The motif specified in the formula: " + formula +
250  " has the wrong syntax."
251  );
252 
253 }
254 #endif
255 
size_t i
void defm_motif_parser(std::string formula, std::vector< size_t > &locations, std::vector< bool > &signs, size_t m_order, size_t y_ncol, std::string &covar_name, std::string &vname)
Parses a motif formula.
Definition: formula.hpp:46