bpp-seq3  3.0.0
SymbolListTools.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include <Bpp/Numeric/NumTools.h>
8 #include <Bpp/Utils/MapTools.h>
9 
10 #include "Alphabet/AlphabetTools.h"
11 #include "SymbolListTools.h"
12 
13 // From the STL:
14 #include <algorithm>
15 
16 using namespace std;
17 
18 using namespace bpp;
19 
20 /******************************************************************************/
21 
22 bool SymbolListTools::hasGap(const IntSymbolListInterface& list)
23 {
24  // Main loop : for all characters in list
25  for (size_t i = 0; i < list.size(); ++i)
26  {
27  if (list.getAlphabet()->isGap(list[i]))
28  return true;
29  }
30  return false;
31 }
32 
33 bool SymbolListTools::hasGap(const ProbabilisticSymbolListInterface& list)
34 {
35  // Main loop : for all characters in list
36  for (size_t i = 0; i < list.size(); ++i)
37  {
38  if (VectorTools::sum(list[i]) <= NumConstants::TINY())
39  return true;
40  }
41  return false;
42 }
43 
44 /******************************************************************************/
45 
46 bool SymbolListTools::hasUnresolved(const IntSymbolListInterface& list)
47 {
48  // Main loop : for all characters in list
49  for (size_t i = 0; i < list.size(); ++i)
50  {
51  if (list.getAlphabet()->isUnresolved(list[i]))
52  return true;
53  }
54  return false;
55 }
56 
57 /******************************************************************************/
58 
59 bool SymbolListTools::isGapOnly(const IntSymbolListInterface& list)
60 {
61  // Main loop : for all characters in list
62  for (size_t i = 0; i < list.size(); ++i)
63  {
64  if (!list.getAlphabet()->isGap(list[i]))
65  return false;
66  }
67  return true;
68 }
69 
70 
71 bool SymbolListTools::isGapOnly(const ProbabilisticSymbolListInterface& list)
72 {
73  // Main loop : for all characters in list
74  for (size_t i = 0; i < list.size(); ++i)
75  {
76  if (VectorTools::sum(list[i]) > NumConstants::TINY())
77  return false;
78  }
79  return true;
80 }
81 
82 /******************************************************************************/
83 
84 bool SymbolListTools::isGapOrUnresolvedOnly(const IntSymbolListInterface& list)
85 {
86  // Main loop : for all characters in list
87  for (size_t i = 0; i < list.size(); ++i)
88  {
89  if (!list.getAlphabet()->isGap(list[i]) && !list.getAlphabet()->isUnresolved(list[i]))
90  return false;
91  }
92  return true;
93 }
94 
95 bool SymbolListTools::isGapOrUnresolvedOnly(const ProbabilisticSymbolListInterface& list)
96 {
97  // Main loop : for all characters in list
98  for (size_t i = 0; i < list.size(); ++i)
99  {
100  double ss = VectorTools::sum(list[i]);
101  if (ss > NumConstants::TINY() && ss < 1.)
102  return false;
103  }
104  return true;
105 }
106 
107 /******************************************************************************/
108 
109 bool SymbolListTools::hasUnknown(const IntSymbolListInterface& list)
110 {
111  // Main loop : for all characters in list
112  for (size_t i = 0; i < list.size(); ++i)
113  {
114  if (list[i] == list.getAlphabet()->getUnknownCharacterCode())
115  return true;
116  }
117  return false;
118 }
119 
120 bool SymbolListTools::hasUnknown(const ProbabilisticSymbolListInterface& list)
121 {
122  // Main loop : for all characters in list
123  for (size_t i = 0; i < list.size(); ++i)
124  {
125  double ss = VectorTools::sum(list[i]);
126  if (ss > 1.)
127  return true;
128  }
129  return false;
130 }
131 
132 /******************************************************************************/
133 
134 bool SymbolListTools::isComplete(const IntSymbolListInterface& list)
135 {
136  // Main loop : for all characters in list
137  for (size_t i = 0; i < list.size(); ++i)
138  {
139  if (list.getAlphabet()->isGap(list[i]) || list.getAlphabet()->isUnresolved(list[i]))
140  return false;
141  }
142  return true;
143 }
144 
145 bool SymbolListTools::isComplete(const ProbabilisticSymbolListInterface& list)
146 {
147  // Main loop : for all characters in list
148  for (size_t i = 0; i < list.size(); ++i)
149  {
150  double ss = VectorTools::sum(list[i]);
151  if (ss < NumConstants::TINY())// || ss > 1.)
152  return false;
153  }
154  return true;
155 }
156 
157 /******************************************************************************/
158 
159 size_t SymbolListTools::numberOfGaps(const IntSymbolListInterface& list)
160 {
161  size_t n = 0;
162 
163  // Main loop : for all characters in list
164  for (size_t i = 0; i < list.size(); ++i)
165  {
166  if (list.getAlphabet()->isGap(list[i]))
167  n++;
168  }
169  return n;
170 }
171 
172 size_t SymbolListTools::numberOfGaps(const ProbabilisticSymbolListInterface& list)
173 {
174  size_t n = 0;
175 
176  // Main loop : for all characters in list
177  for (size_t i = 0; i < list.size(); ++i)
178  {
179  if (VectorTools::sum(list[i]) < NumConstants::TINY())
180  n++;
181  }
182  return n;
183 }
184 
185 /******************************************************************************/
186 
187 size_t SymbolListTools::numberOfUnresolved(const IntSymbolListInterface& list)
188 {
189  size_t n = 0;
190 
191  // Main loop : for all characters in list
192  for (size_t i = 0; i < list.size(); ++i)
193  {
194  if (list.getAlphabet()->isUnresolved(list[i]))
195  n++;
196  }
197  return n;
198 }
199 
200 size_t SymbolListTools::numberOfUnresolved(const ProbabilisticSymbolListInterface& list)
201 {
202  size_t n = 0;
203 
204  // Main loop : for all characters in list
205  for (size_t i = 0; i < list.size(); ++i)
206  {
207  if (VectorTools::sum(list[i]) > 1.)
208  n++;
209  }
210  return n;
211 }
212 
213 /******************************************************************************/
214 
215 
216 bool SymbolListTools::areSymbolListsIdentical(
217  const IntSymbolListInterface& list1,
218  const IntSymbolListInterface& list2)
219 {
220  // IntCoreSymbolList's size and content checking
221  if (list1.getAlphabet()->getAlphabetType() != list2.getAlphabet()->getAlphabetType())
222  return false;
223  if (list1.size() != list2.size())
224  return false;
225  else
226  {
227  for (size_t i = 0; i < list1.size(); ++i)
228  {
229  if (list1[i] != list2[i])
230  return false;
231  }
232  return true;
233  }
234 }
235 
236 bool SymbolListTools::areSymbolListsIdentical(
239 {
240  // IntCoreSymbolList's size and content checking
241  if (list1.getAlphabet()->getAlphabetType() != list2.getAlphabet()->getAlphabetType())
242  return false;
243  if (list1.size() != list2.size())
244  return false;
245  else
246  {
247  for (size_t i = 0; i < list1.size(); ++i)
248  {
249  if (list1[i] != list2[i])
250  return false;
251  }
252  return true;
253  }
254 }
255 
256 /******************************************************************************/
257 
258 bool SymbolListTools::isConstant(
259  const IntSymbolListInterface& list,
260  bool ignoreUnknown,
261  bool unresolvedRaisesException)
262 {
263  // Emspty list checking
264  if (list.size() == 0)
265  throw Exception("SymbolListTools::isConstant: Incorrect specified list, size must be > 0");
266 
267  // For all list's characters
268  int gap = list.getAlphabet()->getGapCharacterCode();
269  if (ignoreUnknown)
270  {
271  int s = list[0];
272  int unknown = list.getAlphabet()->getUnknownCharacterCode();
273  size_t i = 0;
274  while (i < list.size() && (s == gap || s == unknown))
275  {
276  s = list[i];
277  i++;
278  }
279  if (s == unknown || s == gap)
280  {
281  if (unresolvedRaisesException)
282  throw Exception("SymbolListTools::isConstant: IntCoreSymbolList is only made of gaps or generic characters.");
283  else
284  return false;
285  }
286  while (i < list.size())
287  {
288  if (list[i] != s && list[i] != gap && list[i] != unknown)
289  return false;
290  i++;
291  }
292  }
293  else
294  {
295  int s = list[0];
296  size_t i = 0;
297  while (i < list.size() && s == gap)
298  {
299  s = list[i];
300  i++;
301  }
302  if (s == gap)
303  {
304  if (unresolvedRaisesException)
305  throw Exception("SymbolListTools::isConstant: IntSymbolList is only made of gaps.");
306  else
307  return false;
308  }
309  while (i < list.size())
310  {
311  if (list[i] != s && list[i] != gap)
312  return false;
313  i++;
314  }
315  }
316 
317  return true;
318 }
319 
320 bool SymbolListTools::isConstant(
322  bool unresolvedRaisesException)
323 {
324  // Empty list checking
325  if (list.size() == 0)
326  throw Exception("SymbolListTools::isConstant: Incorrect specified list, size must be > 0");
327 
328  // For all list's characters
329  Vdouble s = list[0];
330  double ss = VectorTools::sum(s);
331  size_t i = 0;
332  while (i < list.size() && (ss < NumConstants::TINY()))
333  {
334  s = list[i];
335  ss = VectorTools::sum(s);
336 
337  i++;
338  }
339  if (ss < NumConstants::TINY())
340  {
341  if (unresolvedRaisesException)
342  throw Exception("SymbolListTools::isConstant: ProbabilisticSymbolList is only made of gaps.");
343  else
344  return false;
345  }
346  while (i < list.size())
347  {
348  if (list[i] != s && VectorTools::sum(list[i]) < NumConstants::TINY())
349  return false;
350  i++;
351  }
352 
353  return true;
354 }
355 
356 
357 void SymbolListTools::getCountsResolveUnknowns(
358  const IntSymbolListInterface& list1,
359  const IntSymbolListInterface& list2,
360  map< int, map<int, double>>& counts)
361 {
362  if (list1.size() != list2.size())
363  throw DimensionException("SymbolListTools::getCounts: the two lists must have the same size.", list1.size(), list2.size());
364  for (size_t i = 0; i < list1.size(); ++i)
365  {
366  vector<int> alias1 = list1.getAlphabet()->getAlias(list1[i]);
367  vector<int> alias2 = list2.getAlphabet()->getAlias(list2[i]);
368  double n1 = (double)alias1.size();
369  double n2 = (double)alias2.size();
370  for (auto j : alias1)
371  {
372  for (auto k : alias2)
373  {
374  counts[j][k] += 1. / (n1 * n2);
375  }
376  }
377  }
378 }
379 
380 void SymbolListTools::getFrequencies(
381  const CruxSymbolListInterface& list,
382  map<int, double>& frequencies,
383  bool resolveUnknowns)
384 {
385  double n = (double)list.size();
386  map<int, double> counts;
387 
388  getCounts(list, counts, resolveUnknowns);
389 
390  for (auto it : counts)
391  {
392  frequencies[it.first] = it.second / n;
393  }
394 }
395 
396 void SymbolListTools::getFrequencies(
397  const CruxSymbolListInterface& list1,
398  const CruxSymbolListInterface& list2,
399  map<int, map<int, double>>& frequencies,
400  bool resolveUnknowns)
401 {
402  double n2 = static_cast<double>(list1.size()) * static_cast<double>(list1.size());
403  map<int, map<int, double>> counts;
404  getCounts(list1, list2, counts, resolveUnknowns);
405 
406  for (auto it1 : counts)
407  {
408  for (auto it2 : it1.second)
409  {
410  frequencies[it1.first][it2.first] = it2.second / n2;
411  }
412  }
413 }
414 
415 double SymbolListTools::getGCContent(
416  const IntSymbolListInterface& list,
417  bool ignoreUnresolved,
418  bool ignoreGap)
419 {
420  auto alphabet = list.getAlphabet();
421  if (!AlphabetTools::isNucleicAlphabet(*alphabet))
422  throw AlphabetException("SymbolListTools::getGCContent. Method only works on nucleotides.", alphabet);
423  double gc = 0;
424  double total = 0;
425  for (size_t i = 0; i < list.size(); i++)
426  {
427  int state = list.getValue(i);
428  if (state > -1) // not a gap
429  {
430  if (state == 1 || state == 2) // G or C
431  {
432  gc++;
433  total++;
434  }
435  else if (state == 0 || state == 3) // A, T or U
436  {
437  total++;
438  }
439  else // Unresolved character
440  {
441  if (!ignoreUnresolved)
442  {
443  total++;
444  switch (state)
445  {
446  case (7): gc++; break; // G or C
447  case (4): gc += 0.5; break; // A or C
448  case (5): gc += 0.5; break; // A or G
449  case (6): gc += 0.5; break; // C or T
450  case (9): gc += 0.5; break; // G or T
451  case (10): gc += 2. / 3.; break; // A or C or G
452  case (11): gc += 1. / 3.; break; // A or C or T
453  case (12): gc += 1. / 3.; break; // A or G or T
454  case (13): gc += 2. / 3.; break; // C or G or T
455  case (14): gc += 0.5; break; // A or C or G or T
456  }
457  }
458  }
459  }
460  else
461  {
462  if (!ignoreGap)
463  total++;
464  }
465  }
466  return total != 0 ? gc / total : 0;
467 }
468 
469 size_t SymbolListTools::getNumberOfDistinctPositions(
470  const IntSymbolListInterface& l1,
471  const IntSymbolListInterface& l2)
472 {
473  if (l1.getAlphabet()->getAlphabetType() != l2.getAlphabet()->getAlphabetType())
474  throw AlphabetMismatchException("SymbolListTools::getNumberOfDistinctPositions.", l1.getAlphabet(), l2.getAlphabet());
475  size_t n = min(l1.size(), l2.size());
476  size_t count = 0;
477  for (size_t i = 0; i < n; i++)
478  {
479  if (l1[i] != l2[i])
480  count++;
481  }
482  return count;
483 }
484 
485 size_t SymbolListTools::getNumberOfPositionsWithoutGap(
486  const IntSymbolListInterface& l1,
487  const IntSymbolListInterface& l2)
488 {
489  if (l1.getAlphabet()->getAlphabetType() != l2.getAlphabet()->getAlphabetType())
490  throw AlphabetMismatchException("SymbolListTools::getNumberOfDistinctPositions.", l1.getAlphabet(), l2.getAlphabet());
491  size_t n = min(l1.size(), l2.size());
492  size_t count = 0;
493  for (size_t i = 0; i < n; i++)
494  {
495  if (l1[i] != -1 && l2[i] != -1)
496  count++;
497  }
498  return count;
499 }
500 
501 void SymbolListTools::changeGapsToUnknownCharacters(IntSymbolListInterface& l)
502 {
503  int unknownCode = l.getAlphabet()->getUnknownCharacterCode();
504  for (size_t i = 0; i < l.size(); i++)
505  {
506  if (l.getAlphabet()->isGap(l[i]))
507  l[i] = unknownCode;
508  }
509 }
510 
511 void SymbolListTools::changeUnresolvedCharactersToGaps(IntSymbolListInterface& l)
512 {
513  int gapCode = l.getAlphabet()->getGapCharacterCode();
514  for (size_t i = 0; i < l.size(); i++)
515  {
516  if (l.getAlphabet()->isUnresolved(l[i]))
517  l[i] = gapCode;
518  }
519 }
520 
521 
522 void SymbolListTools::getCountsResolveUnknowns(
525  map< int, map<int, double>>& counts)
526 {
527  if (list1.size() != list2.size())
528  throw DimensionException("SymbolListTools::getCounts: the two lists must have the same size.", list1.size(), list2.size());
529  for (size_t i = 0; i < list1.size(); ++i)
530  {
531  const std::vector<double>& c1(list1[i]), &c2(list2[i]);
532  double s12 = VectorTools::sum(c1) * VectorTools::sum(c2);
533  if ((s12 != 0))
534  for (size_t j = 0; j < c1.size(); ++j)
535  {
536  for (size_t k = 0; k < c2.size(); ++k)
537  {
538  counts[(int)j][(int)k] += c1.at(j) * c2.at(k) / s12;
539  }
540  }
541  }
542 }
543 
544 double SymbolListTools::getGCContent(
546  bool ignoreUnresolved,
547  bool ignoreGap)
548 {
549  auto alphabet = list.getAlphabet();
550  if (!AlphabetTools::isNucleicAlphabet(*alphabet))
551  throw AlphabetException("SymbolListTools::getGCContent. Method only works on nucleotides.", alphabet);
552  double gc = 0;
553  double total = 0;
554  for (size_t i = 0; i < list.size(); ++i)
555  {
556  const Vdouble& state = list.getValue(i);
557  double ss = VectorTools::sum(state);
558  if (ss != 0) // not a gap
559  {
560  if (ss < 1)
561  {
562  total++;
563  gc += state.at(1) + state.at(2);
564  }
565  else if (!ignoreUnresolved)
566  {
567  total++;
568  gc += (state.at(1) + state.at(2)) / ss;
569  }
570  }
571  else
572  {
573  if (!ignoreGap)
574  total++;
575  }
576  }
577 
578  return total != 0 ? gc / total : 0;
579 }
580 
581 size_t SymbolListTools::getNumberOfDistinctPositions(
584 {
585  if (l1.getAlphabet()->getAlphabetType() != l2.getAlphabet()->getAlphabetType())
586  throw AlphabetMismatchException("SymbolListTools::getNumberOfDistinctPositions.", l1.getAlphabet(), l2.getAlphabet());
587 
588  size_t n = min(l1.size(), l2.size());
589  size_t count = 0;
590  for (size_t i = 0; i < n; ++i)
591  {
592  if (l1[i] != l2[i])
593  count++;
594  }
595  return count;
596 }
597 
598 size_t SymbolListTools::getNumberOfPositionsWithoutGap(
601 {
602  if (l1.getAlphabet()->getAlphabetType() != l2.getAlphabet()->getAlphabetType())
603  throw AlphabetMismatchException("SymbolListTools::getNumberOfDistinctPositions.", l1.getAlphabet(), l2.getAlphabet());
604  size_t n = min(l1.size(), l2.size());
605  size_t count = 0;
606  for (size_t i = 0; i < n; ++i)
607  {
608  if (VectorTools::sum(l1[i]) > NumConstants::TINY() && VectorTools::sum(l2[i]) > NumConstants::TINY())
609  count++;
610  }
611  return count;
612 }
613 
614 void SymbolListTools::changeGapsToUnknownCharacters(ProbabilisticSymbolListInterface& l)
615 {
616  for (size_t i = 0; i < l.size(); ++i)
617  {
618  if (VectorTools::sum(l[i]) < NumConstants::TINY())
619  VectorTools::fill(l[i], 1.);
620  }
621 }
622 
623 void SymbolListTools::changeUnresolvedCharactersToGaps(ProbabilisticSymbolListInterface& l)
624 {
625  for (size_t i = 0; i < l.size(); ++i)
626  {
627  if (VectorTools::sum(l[i]) > 1.)
628  VectorTools::fill(l[i], 0.);
629  }
630 }
631 
632 double SymbolListTools::variabilityShannon(
633  const CruxSymbolListInterface& list,
634  bool resolveUnknown)
635 {
636  // Empty list checking
637  if (list.size() == 0)
638  throw Exception("SymbolListTools::variabilityShannon: Incorrect specified list, size must be > 0.");
639 
640  map<int, double> p;
641  getFrequencies(list, p, resolveUnknown);
642  // We need to correct frequencies for gaps:
643  double s = 0.;
644  for (int i = 0; i < static_cast<int>(list.getAlphabet()->getSize()); i++)
645  {
646  double f = p[i];
647  if (f > 0)
648  s += f * log(f);
649  }
650  return -s;
651 }
652 
653 /******************************************************************************/
654 
655 
656 double SymbolListTools::mutualInformation(
657  const CruxSymbolListInterface& list1,
658  const CruxSymbolListInterface& list2,
659  bool resolveUnknown)
660 {
661  // Empty list checking
662  if (list1.size() == 0)
663  throw Exception("SymbolListTools::mutualInformation: Incorrect specified list, size must be > 0");
664  if (list2.size() == 0)
665  throw Exception("SymbolListTools::mutualInformation: Incorrect specified list, size must be > 0");
666  if (list1.size() != list2.size())
667  throw DimensionException("SymbolListTools::mutualInformation: lists must have the same size!", list1.size(), list2.size());
668  vector<double> p1(list1.getAlphabet()->getSize());
669  vector<double> p2(list2.getAlphabet()->getSize());
670  map<int, map<int, double>> p12;
671  getCounts(list1, list2, p12, resolveUnknown);
672  double mi = 0, tot = 0, pxy;
673  // We need to correct frequencies for gaps:
674  for (size_t i = 0; i < list1.getAlphabet()->getSize(); i++)
675  {
676  for (size_t j = 0; j < list2.getAlphabet()->getSize(); j++)
677  {
678  pxy = p12[static_cast<int>(i)][static_cast<int>(j)];
679  tot += pxy;
680  p1[i] += pxy;
681  p2[j] += pxy;
682  }
683  }
684  for (size_t i = 0; i < list1.getAlphabet()->getSize(); i++)
685  {
686  p1[i] /= tot;
687  }
688  for (size_t j = 0; j < list2.getAlphabet()->getSize(); j++)
689  {
690  p2[j] /= tot;
691  }
692  for (size_t i = 0; i < list1.getAlphabet()->getSize(); i++)
693  {
694  for (size_t j = 0; j < list2.getAlphabet()->getSize(); j++)
695  {
696  pxy = p12[static_cast<int>(i)][static_cast<int>(j)] / tot;
697  if (pxy > 0)
698  mi += pxy * log(pxy / (p1[i] * p2[j]));
699  }
700  }
701  return mi;
702 }
703 
704 /******************************************************************************/
705 
706 double SymbolListTools::jointEntropy(
707  const CruxSymbolListInterface& list1,
708  const CruxSymbolListInterface& list2,
709  bool resolveUnknown)
710 {
711  // Empty list checking
712  if (list1.size() == 0)
713  throw Exception("SymbolListTools::jointEntropy: Incorrect specified list, size must be > 0");
714  if (list2.size() == 0)
715  throw Exception("SymbolListTools::jointEntropy: Incorrect specified list, size must be > 0");
716  if (list1.size() != list2.size())
717  throw DimensionException("SymbolListTools::jointEntropy: lists must have the same size!", list1.size(), list2.size());
718  map<int, map<int, double>> p12;
719  getCounts(list1, list2, p12, resolveUnknown);
720  double tot = 0, pxy, h = 0;
721  // We need to correct frequencies for gaps:
722  for (size_t i = 0; i < list1.getAlphabet()->getSize(); ++i)
723  {
724  for (size_t j = 0; j < list2.getAlphabet()->getSize(); ++j)
725  {
726  pxy = p12[static_cast<int>(i)][static_cast<int>(j)];
727  tot += pxy;
728  }
729  }
730  for (size_t i = 0; i < list1.getAlphabet()->getSize(); ++i)
731  {
732  for (size_t j = 0; j < list2.getAlphabet()->getSize(); ++j)
733  {
734  pxy = p12[static_cast<int>(i)][static_cast<int>(j)] / tot;
735  if (pxy > 0)
736  h += pxy * log(pxy);
737  }
738  }
739  return -h;
740 }
741 
742 /******************************************************************************/
743 
744 double SymbolListTools::variabilityFactorial(
745  const IntSymbolListInterface& list)
746 {
747  // Empty list checking
748  if (list.size() == 0)
749  throw Exception("SymbolListTools::variabilityFactorial: Incorrect specified list, size must be > 0");
750  map<int, size_t> p;
751  getCounts(list, p);
752  vector<size_t> c = MapTools::getValues(p);
753  size_t s = VectorTools::sum(c);
754  long double l = static_cast<long double>(NumTools::fact(s)) / static_cast<long double>(VectorTools::sum(VectorTools::fact(c)));
755  return static_cast<double>(std::log(l));
756 }
757 
758 /******************************************************************************/
759 
760 double SymbolListTools::heterozygosity(const CruxSymbolListInterface& list)
761 {
762  // Empty list checking
763  if (list.size() == 0)
764  throw Exception("SymbolListTools::heterozygosity: Incorrect specified list, size must be > 0");
765  map<int, double> p;
766  getFrequencies(list, p);
767  vector<double> c = MapTools::getValues(p);
768  double n = VectorTools::norm<double, double>(MapTools::getValues(p));
769  return 1. - n * n;
770 }
771 
772 /******************************************************************************/
773 
774 size_t SymbolListTools::getNumberOfDistinctCharacters(const IntSymbolListInterface& list)
775 {
776  // Empty list checking
777  if (list.size() == 0)
778  throw Exception("SymbolListTools::getNumberOfDistinctCharacters(): Incorrect specified list, size must be > 0");
779  // For all list's characters
780  if (SymbolListTools::isConstant(list))
781  return 1;
782  map<int, size_t> counts;
783  SymbolListTools::getCounts(list, counts);
784  size_t s = 0;
785  for (map<int, size_t>::iterator it = counts.begin(); it != counts.end(); it++)
786  {
787  if (it->second != 0)
788  s++;
789  }
790  return s;
791 }
792 
793 /******************************************************************************/
794 
795 size_t SymbolListTools::getMajorAlleleFrequency(const IntSymbolListInterface& list)
796 {
797  // Empty list checking
798  if (list.size() == 0)
799  throw Exception("SymbolListTools::getMajorAlleleFrequency(): Incorrect specified list, size must be > 0");
800  // For all list's characters
801  if (SymbolListTools::isConstant(list))
802  return list.size();
803  map<int, size_t> counts;
804  getCounts(list, counts);
805  size_t s = 0;
806  for (map<int, size_t>::iterator it = counts.begin(); it != counts.end(); it++)
807  {
808  if (it->second != 0)
809  if (it->second > s)
810  s = it->second;
811  }
812  return s;
813 }
814 
815 /******************************************************************************/
816 
817 int SymbolListTools::getMajorAllele(const CruxSymbolListInterface& list)
818 {
819  // Empty list checking
820  if (list.size() == 0)
821  throw Exception("SymbolListTools::getMajorAllele(): Incorrect specified list, size must be > 0");
822  // For all list's characters
823  if (dynamic_cast<const IntSymbolListInterface*>(&list) && SymbolListTools::isConstant(list))
824  return (dynamic_cast<const IntSymbolListInterface&>(list))[0];
825 
826  map<int, double> counts;
827  SymbolListTools::getCounts(list, counts);
828  double s = 0;
829  int ma = -100;
830  for (auto it : counts)
831  {
832  if (it.second != 0)
833  if (it.second > s)
834  {
835  s = it.second;
836  ma = it.first;
837  }
838  }
839  return ma;
840 }
841 
842 /******************************************************************************/
843 
844 size_t SymbolListTools::getMinorAlleleFrequency(const IntSymbolListInterface& list)
845 {
846  // Empty list checking
847  if (list.size() == 0)
848  throw Exception("SymbolListTools::getMinorAlleleFrequency(): Incorrect specified list, size must be > 0.");
849  // For all list's characters
850  if (SymbolListTools::isConstant(list))
851  return list.size();
852  map<int, size_t> counts;
853  SymbolListTools::getCounts(list, counts);
854  size_t s = list.size();
855  for (auto it : counts)
856  {
857  if (it.second != 0)
858  if (it.second < s)
859  s = it.second;
860  }
861  return s;
862 }
863 
864 /******************************************************************************/
865 
866 int SymbolListTools::getMinorAllele(const CruxSymbolListInterface& list)
867 {
868  // Empty list checking
869  if (list.size() == 0)
870  throw Exception("SymbolListTools::getMinorAllele(): Incorrect specified list, size must be > 0.");
871  // For all list's characters
872  if (dynamic_cast<const IntSymbolListInterface*>(&list) && SymbolListTools::isConstant(list))
873  return (dynamic_cast<const IntSymbolListInterface&>(list))[0];
874  map<int, double> counts;
875  SymbolListTools::getCounts(list, counts);
876  double s = (double)list.size();
877  int ma = -100;
878  for (auto it : counts)
879  {
880  if (it.second != 0)
881  if (it.second < s)
882  {
883  s = it.second;
884  ma = it.first;
885  }
886  }
887  return ma;
888 }
889 
890 /******************************************************************************/
891 
892 bool SymbolListTools::hasSingleton(const IntSymbolListInterface& list)
893 {
894  // Empty list checking
895  if (list.size() == 0)
896  throw Exception("SymbolListTools::hasSingleton: Incorrect specified list, size must be > 0");
897  // For all list's characters
898  if (SymbolListTools::isConstant(list))
899  return false;
900  map<int, size_t> counts;
901  getCounts(list, counts);
902  for (map<int, size_t>::iterator it = counts.begin(); it != counts.end(); it++)
903  {
904  if (it->second == 1)
905  return true;
906  }
907  return false;
908 }
909 
910 /******************************************************************************/
911 
912 bool SymbolListTools::isParsimonyInformativeSite(const IntSymbolListInterface& list)
913 {
914  // Empty list checking
915  if (list.size() == 0)
916  throw Exception("SymbolListTools::isParsimonyInformativeSite: Incorrect specified list, size must be > 0");
917  // For all list's characters
918  if (SymbolListTools::isConstant(list, false, false))
919  return false;
920  map<int, size_t> counts;
921  SymbolListTools::getCounts(list, counts);
922  size_t npars = 0;
923  for (map<int, size_t>::iterator it = counts.begin(); it != counts.end(); it++)
924  {
925  if (it->second > 1)
926  npars++;
927  }
928  if (npars > 1)
929  return true;
930  return false;
931 }
932 
933 /******************************************************************************/
934 
935 bool SymbolListTools::isTriplet(const IntSymbolListInterface& list)
936 {
937  // Empty list checking
938  if (list.size() == 0)
939  throw Exception("SymbolListTools::isTriplet: Incorrect specified list, size must be > 0");
940  // For all list's characters
941  return SymbolListTools::getNumberOfDistinctCharacters(list) >= 3;
942 }
943 
944 /******************************************************************************/
945 
946 bool SymbolListTools::isDoubleton(const IntSymbolListInterface& list)
947 {
948  // Empty list checking
949  if (list.size() == 0)
950  throw Exception("SymbolListTools::isDoubleton: Incorrect specified list, size must be > 0");
951  // For all list's characters
952  return SymbolListTools::getNumberOfDistinctCharacters(list) == 2;
953 }
954 
955 /******************************************************************************/
The alphabet exception base class.
Exception thrown when two alphabets do not match.
The CruxSymbolList interface.
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
Get the alphabet associated to the list.
virtual size_t size() const =0
Get the number of elements in the list.
The specific IntSymbolList interface.
Definition: IntSymbolList.h:29
The ProbabilisticSymbolList interface.
virtual const T & getValue(size_t pos) const =0
checked access to a character in list.
std::size_t count(const std::string &s, const std::string &pattern)
This alphabet is used to deal NumericAlphabet.
std::vector< double > Vdouble