bpp-phyl3  3.0.0
SubstitutionMappingTools.h
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #ifndef _LEGACY_SUBSTITUTION_MAPPING_TOOLS_H_
6 #define _LEGACY_SUBSTITUTION_MAPPING_TOOLS_H_
7 
9 #include "../Likelihood/DRTreeLikelihood.h"
10 #include "../../Mapping/SubstitutionCount.h" // We use the new implementation here.
11 #include "../../Mapping/OneJumpSubstitutionCount.h"
12 
13 #include <memory>
14 
15 namespace bpp
16 {
31 {
32 public:
35 
36 public:
47  static std::unique_ptr<LegacyProbabilisticSubstitutionMapping> computeSubstitutionVectors(
48  std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
49  std::shared_ptr<SubstitutionCountInterface> substitutionCount,
50  bool verbose = true)
51  {
52  std::vector<int> nodeIds;
53  return computeSubstitutionVectors(drtl, nodeIds, substitutionCount, verbose);
54  }
55 
69  static std::unique_ptr<LegacyProbabilisticSubstitutionMapping> computeSubstitutionVectors(
70  std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
71  const std::vector<int>& nodeIds,
72  std::shared_ptr<SubstitutionCountInterface> substitutionCount,
73  bool verbose = true);
74 
75  static std::unique_ptr<LegacyProbabilisticSubstitutionMapping> computeSubstitutionVectors(
76  std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
77  const SubstitutionModelSet& modelSet,
78  const std::vector<int>& nodeIds,
79  std::shared_ptr<SubstitutionCountInterface> substitutionCount,
80  bool verbose = true);
81 
102  static std::unique_ptr<LegacyProbabilisticSubstitutionMapping> computeSubstitutionVectorsNoAveraging(
103  std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
104  std::shared_ptr<SubstitutionCountInterface> substitutionCount,
105  bool verbose = true);
106 
107 
126  static std::unique_ptr<LegacyProbabilisticSubstitutionMapping> computeSubstitutionVectorsNoAveragingMarginal(
127  std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
128  std::shared_ptr<SubstitutionCountInterface> substitutionCount,
129  bool verbose = true);
130 
131 
147  static std::unique_ptr<LegacyProbabilisticSubstitutionMapping> computeSubstitutionVectorsMarginal(
148  std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
149  std::shared_ptr<SubstitutionCountInterface> substitutionCount,
150  bool verbose = true);
151 
152 
160  static std::unique_ptr<LegacyProbabilisticSubstitutionMapping> computeOneJumpProbabilityVectors(
161  std::shared_ptr<const DRTreeLikelihoodInterface> drtl,
162  bool verbose = true)
163  {
164  std::shared_ptr<SubstitutionModelInterface> ptr = nullptr;
165  auto ojsm = std::make_shared<OneJumpSubstitutionCount>(ptr);
166  return computeSubstitutionVectors(drtl, drtl->tree().getNodesId(), ojsm, 0);
167  }
168 
169 
181  static void writeToStream(
182  const LegacyProbabilisticSubstitutionMapping& substitutions,
183  const SiteContainerInterface& sites,
184  size_t type,
185  std::ostream& out);
186 
187 
196  static void readFromStream(std::istream& in, LegacyProbabilisticSubstitutionMapping& substitutions, size_t type);
197 
198 
208  static std::vector<double> computeTotalSubstitutionVectorForSitePerBranch(const LegacySubstitutionMappingInterface& smap, size_t siteIndex);
209 
219  static std::vector<double> computeTotalSubstitutionVectorForSitePerType(const LegacySubstitutionMappingInterface& smap, size_t siteIndex);
220 
232  static double computeNormForSite(const LegacySubstitutionMappingInterface& smap, size_t siteIndex);
233 
241  static std::vector<double> computeSumForBranch(const LegacySubstitutionMappingInterface& smap, size_t branchIndex);
242 
243 
251  static std::vector<double> computeSumForSite(const LegacySubstitutionMappingInterface& smap, size_t siteIndex);
252 
253 
259  static std::vector<std::vector<double>> getCountsPerBranch(
260  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
261  const std::vector<int>& ids,
262  std::shared_ptr<SubstitutionModelInterface> model,
263  std::shared_ptr<const SubstitutionRegisterInterface> reg,
264  double threshold = -1,
265  bool verbose = true);
266 
267  static std::vector<std::vector<double>> getCountsPerBranch(
268  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
269  const std::vector<int>& ids,
270  const SubstitutionModelSet& modelSet,
271  std::shared_ptr<const SubstitutionRegisterInterface> reg,
272  double threshold = -1,
273  bool verbose = true);
274 
275 
288  static std::vector<std::vector<double>> getNormalizationsPerBranch(
289  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
290  const std::vector<int>& ids,
291  std::shared_ptr<const SubstitutionModelInterface> nullModel,
293  bool verbose = true);
294 
295 
308  static std::vector<std::vector<double>> getNormalizationsPerBranch(
309  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
310  const std::vector<int>& ids,
311  std::shared_ptr<const SubstitutionModelSet> nullModelSet,
313  bool verbose = true);
314 
315 
333  static std::vector<std::vector<double>> getRelativeCountsPerBranch(
334  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
335  const std::vector<int>& ids,
336  std::shared_ptr<SubstitutionModelInterface> model,
337  std::shared_ptr<const SubstitutionRegisterInterface> reg,
338  double threshold = -1,
339  bool verbose = true)
340  {
341  std::vector<std::vector<double>> result;
342  computeCountsPerTypePerBranch(drtl, ids, model, reg, result, threshold, verbose);
343  return result;
344  }
345 
362  static std::vector<std::vector<double>> getNormalizedCountsPerBranch(
363  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
364  const std::vector<int>& ids,
365  std::shared_ptr<SubstitutionModelInterface> model,
366  std::shared_ptr<SubstitutionModelInterface> nullModel,
367  std::shared_ptr<const SubstitutionRegisterInterface> reg,
368  bool perTime,
369  bool perWord,
370  bool verbose = true)
371  {
372  std::vector< std::vector<double>> result;
373  computeCountsPerTypePerBranch(drtl, ids, model, nullModel, reg, result, perTime, perWord, verbose);
374  return result;
375  }
376 
393  static std::vector<std::vector<double>> getNormalizedCountsPerBranch(
394  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
395  const std::vector<int>& ids,
396  std::shared_ptr<SubstitutionModelSet> modelSet,
397  std::shared_ptr<SubstitutionModelSet> nullModelSet,
398  std::shared_ptr<const SubstitutionRegisterInterface> reg,
399  bool perTime,
400  bool perWord,
401  bool verbose = true)
402  {
403  std::vector<std::vector<double>> result;
404  computeCountsPerTypePerBranch(drtl, ids, modelSet, nullModelSet, reg, result, perTime, perWord, verbose);
405  return result;
406  }
407 
428  static void computeCountsPerSitePerBranch(
429  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
430  const std::vector<int>& ids,
431  std::shared_ptr<SubstitutionModelInterface> model,
432  std::shared_ptr<const SubstitutionRegisterInterface> reg,
433  VVdouble& array);
434 
435 
460  static void computeCountsPerTypePerBranch(
461  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
462  const std::vector<int>& ids,
463  std::shared_ptr<SubstitutionModelInterface> model,
464  std::shared_ptr<const SubstitutionRegisterInterface> reg,
465  VVdouble& result,
466  double threshold = -1,
467  bool verbose = true);
468 
488  static void computeCountsPerTypePerBranch(
489  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
490  const std::vector<int>& ids,
491  std::shared_ptr<SubstitutionModelInterface> model,
492  std::shared_ptr<SubstitutionModelInterface> nullModel,
493  std::shared_ptr<const SubstitutionRegisterInterface> reg,
494  VVdouble& result,
495  bool perTime,
496  bool perWord,
497  bool verbose = true);
498 
520  static void computeCountsPerTypePerBranch(
521  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
522  const std::vector<int>& ids,
523  std::shared_ptr<SubstitutionModelSet> modelSet,
524  std::shared_ptr<SubstitutionModelSet> nullModelSet,
525  std::shared_ptr<const SubstitutionRegisterInterface> reg,
526  VVdouble& result,
527  bool perTime,
528  bool perWord,
529  bool verbose = true);
530 
551  static void computeCountsPerSitePerType(
552  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
553  const std::vector<int>& ids,
554  std::shared_ptr<SubstitutionModelInterface> model,
555  std::shared_ptr<const SubstitutionRegisterInterface> reg,
556  VVdouble& result);
557 
576  static void computeCountsPerSitePerType(
577  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
578  const std::vector<int>& ids,
579  std::shared_ptr<SubstitutionModelInterface> model,
580  std::shared_ptr<SubstitutionModelInterface> nullModel,
581  std::shared_ptr<const SubstitutionRegisterInterface> reg,
582  VVdouble& result,
583  bool perTime,
584  bool perWord);
585 
604  static void computeCountsPerSitePerType(
605  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
606  const std::vector<int>& ids,
607  std::shared_ptr<SubstitutionModelSet> modelSet,
608  std::shared_ptr<SubstitutionModelSet> nullModelSet,
609  std::shared_ptr<const SubstitutionRegisterInterface> reg,
610  VVdouble& result,
611  bool perTime,
612  bool perWord);
613 
636  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
637  const std::vector<int>& ids,
638  std::shared_ptr<SubstitutionModelInterface> model,
639  std::shared_ptr<const SubstitutionRegisterInterface> reg,
640  VVVdouble& result);
641 
660  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
661  const std::vector<int>& ids,
662  std::shared_ptr<SubstitutionModelInterface> model,
663  std::shared_ptr<SubstitutionModelInterface> nullModel,
664  std::shared_ptr<const SubstitutionRegisterInterface> reg,
665  VVVdouble& result,
666  bool perTime,
667  bool perWord);
668 
687  std::shared_ptr<DRTreeLikelihoodInterface> drtl,
688  const std::vector<int>& ids,
689  std::shared_ptr<SubstitutionModelSet> modelSet,
690  std::shared_ptr<SubstitutionModelSet> nullModelSet,
691  std::shared_ptr<const SubstitutionRegisterInterface> reg,
692  VVVdouble& result,
693  bool perTime,
694  bool perWord);
695 
705  static void outputPerSitePerBranch(const std::string& filename,
706  const std::vector<int>& ids,
707  const VVdouble& counts);
708 
712  static void outputPerSitePerType(const std::string& filename,
714  const VVdouble& counts);
715 
719  static void outputPerSitePerBranchPerType(const std::string& filenamePrefix,
720  const std::vector<int>& ids,
722  const VVVdouble& counts);
723 
724 
733 };
734 } // end of namespace bpp.
735 
736 #endif // _LEGACY_SUBSTITUTION_MAPPING_TOOLS_H_
Legacy data storage class for probabilistic substitution mappings.
Legacy interface for storing mapping data.
Provide methods to compute substitution mappings.
static void outputPerSitePerBranch(const std::string &filename, const std::vector< int > &ids, const VVdouble &counts)
Outputs of counts.
static void outputPerSitePerType(const std::string &filename, const SubstitutionRegisterInterface &reg, const VVdouble &counts)
Output Per Site Per Type.
static std::unique_ptr< LegacyProbabilisticSubstitutionMapping > computeOneJumpProbabilityVectors(std::shared_ptr< const DRTreeLikelihoodInterface > drtl, bool verbose=true)
This method computes for each site and for each branch the probability that at least one jump occurre...
static std::vector< double > computeTotalSubstitutionVectorForSitePerBranch(const LegacySubstitutionMappingInterface &smap, size_t siteIndex)
Sum all type of substitutions for each branch of a given position (specified by its index).
static std::vector< double > computeSumForSite(const LegacySubstitutionMappingInterface &smap, size_t siteIndex)
Sum all substitutions for each type of a given site (specified by its index).
static void readFromStream(std::istream &in, LegacyProbabilisticSubstitutionMapping &substitutions, size_t type)
Read the substitutions vectors from a stream.
static std::unique_ptr< LegacyProbabilisticSubstitutionMapping > computeSubstitutionVectors(std::shared_ptr< const DRTreeLikelihoodInterface > drtl, std::shared_ptr< SubstitutionCountInterface > substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
static std::vector< std::vector< double > > getRelativeCountsPerBranch(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< SubstitutionModelInterface > model, std::shared_ptr< const SubstitutionRegisterInterface > reg, double threshold=-1, bool verbose=true)
Returns the counts relative to the frequency of the states in case of non-stationarity.
static void computeCountsPerSitePerBranch(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< SubstitutionModelInterface > model, std::shared_ptr< const SubstitutionRegisterInterface > reg, VVdouble &array)
Per Branch Per Site methods.
static std::vector< std::vector< double > > getNormalizationsPerBranch(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< const SubstitutionModelInterface > nullModel, const SubstitutionRegisterInterface &reg, bool verbose=true)
Returns the normalization factors due to the null model on each branch, for each register.
static void computeCountsPerSitePerType(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< SubstitutionModelInterface > model, std::shared_ptr< const SubstitutionRegisterInterface > reg, VVdouble &result)
Per Type Per Site methods.
static std::vector< double > computeSumForBranch(const LegacySubstitutionMappingInterface &smap, size_t branchIndex)
Sum all substitutions for each type of a given branch (specified by its index).
static std::unique_ptr< LegacyProbabilisticSubstitutionMapping > computeSubstitutionVectorsNoAveragingMarginal(std::shared_ptr< const DRTreeLikelihoodInterface > drtl, std::shared_ptr< SubstitutionCountInterface > substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
static void outputPerSitePerBranchPerType(const std::string &filenamePrefix, const std::vector< int > &ids, const SubstitutionRegisterInterface &reg, const VVVdouble &counts)
Output Per Site Per Branch Per Type.
static std::vector< std::vector< double > > getNormalizedCountsPerBranch(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< SubstitutionModelInterface > model, std::shared_ptr< SubstitutionModelInterface > nullModel, std::shared_ptr< const SubstitutionRegisterInterface > reg, bool perTime, bool perWord, bool verbose=true)
Returns the counts normalized by a null model.
static void writeToStream(const LegacyProbabilisticSubstitutionMapping &substitutions, const SiteContainerInterface &sites, size_t type, std::ostream &out)
Write the substitutions vectors to a stream.
static std::vector< double > computeTotalSubstitutionVectorForSitePerType(const LegacySubstitutionMappingInterface &smap, size_t siteIndex)
Sum all type of substitutions for each type of a given position (specified by its index).
static std::vector< std::vector< double > > getCountsPerBranch(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< SubstitutionModelInterface > model, std::shared_ptr< const SubstitutionRegisterInterface > reg, double threshold=-1, bool verbose=true)
Per Branch methods.
static std::unique_ptr< LegacyProbabilisticSubstitutionMapping > computeSubstitutionVectorsNoAveraging(std::shared_ptr< const DRTreeLikelihoodInterface > drtl, std::shared_ptr< SubstitutionCountInterface > substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
static std::vector< std::vector< double > > getNormalizedCountsPerBranch(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< SubstitutionModelSet > modelSet, std::shared_ptr< SubstitutionModelSet > nullModelSet, std::shared_ptr< const SubstitutionRegisterInterface > reg, bool perTime, bool perWord, bool verbose=true)
Returns the counts normalized by a null model set.
static void computeCountsPerSitePerBranchPerType(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< SubstitutionModelInterface > model, std::shared_ptr< const SubstitutionRegisterInterface > reg, VVVdouble &result)
Per Branch Per Site Per Type methods.
static std::unique_ptr< LegacyProbabilisticSubstitutionMapping > computeSubstitutionVectorsMarginal(std::shared_ptr< const DRTreeLikelihoodInterface > drtl, std::shared_ptr< SubstitutionCountInterface > substitutionCount, bool verbose=true)
Compute the substitutions vectors for a particular dataset using the double-recursive likelihood comp...
static double computeNormForSite(const LegacySubstitutionMappingInterface &smap, size_t siteIndex)
Compute the norm of a substitution vector for a given position (specified by its index).
static void computeCountsPerTypePerBranch(std::shared_ptr< DRTreeLikelihoodInterface > drtl, const std::vector< int > &ids, std::shared_ptr< SubstitutionModelInterface > model, std::shared_ptr< const SubstitutionRegisterInterface > reg, VVdouble &result, double threshold=-1, bool verbose=true)
Per Type Per Branch methods.
Substitution models manager for non-homogeneous / non-reversible models of evolution.
The SubstitutionRegister interface.
Defines the basic types of data flow nodes.
std::vector< VVdouble > VVVdouble
std::vector< Vdouble > VVdouble