bpp-phyl3  3.0.0
SubstitutionMappingTools.h
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #ifndef BPP_PHYL_MAPPING_SUBSTITUTIONMAPPINGTOOLS_H
6 #define BPP_PHYL_MAPPING_SUBSTITUTIONMAPPINGTOOLS_H
7 
9 
10 #include "../Likelihood/DataFlow/LikelihoodCalculationSingleProcess.h"
11 #include "BranchedModelSet.h"
14 #include "SubstitutionCount.h"
15 
16 namespace bpp
17 {
33 {
34 public:
47  static const short UNRESOLVED_ZERO = 0;
48  static const short UNRESOLVED_AVERAGE = 2;
49  static const short UNRESOLVED_ONE = 1;
50 
51 public:
53 
55 
56 public:
74  static std::unique_ptr<ProbabilisticSubstitutionMapping> computeCounts(
76  SubstitutionCountInterface& substitutionCount,
77  short unresolvedOption = UNRESOLVED_ZERO,
78  double threshold = -1,
79  bool verbose = true)
80  {
81  std::vector<uint> edgeIds = rltc.substitutionProcess().parametrizablePhyloTree().getAllEdgesIndexes();
82  return computeCounts(rltc, edgeIds, substitutionCount, unresolvedOption, threshold, verbose);
83  }
84 
103  static std::unique_ptr<ProbabilisticSubstitutionMapping> computeCounts(
105  std::shared_ptr<const SubstitutionRegisterInterface> reg,
106  std::shared_ptr<const AlphabetIndex2> weights = 0,
107  std::shared_ptr<const AlphabetIndex2> distances = 0,
108  short unresolvedOption = UNRESOLVED_ZERO,
109  double threshold = -1,
110  bool verbose = true)
111  {
112  std::vector<uint> edgeIds = rltc.substitutionProcess().parametrizablePhyloTree().getAllEdgesIndexes();
113  return computeCounts(rltc, edgeIds, reg, weights, distances, unresolvedOption, threshold, verbose);
114  }
115 
130  static std::unique_ptr<ProbabilisticSubstitutionMapping> computeCounts(
132  const std::vector<uint>& speciesIds,
133  SubstitutionCountInterface& substitutionCount,
134  short unresolvedOption = UNRESOLVED_ZERO,
135  double threshold = -1,
136  bool verbose = true);
137 
159  static std::unique_ptr<ProbabilisticSubstitutionMapping> computeCounts(
161  const std::vector<uint>& edgeIds,
162  std::shared_ptr<const SubstitutionRegisterInterface> reg,
163  std::shared_ptr<const AlphabetIndex2> weights = 0,
164  std::shared_ptr<const AlphabetIndex2> distances = 0,
165  short unresolvedOption = UNRESOLVED_ZERO,
166  double threshold = -1,
167  bool verbose = true);
168 
187  static std::unique_ptr<ProbabilisticSubstitutionMapping> computeNormalizations(
189  const std::vector<uint>& edgeIds,
190  std::shared_ptr<const BranchedModelSet> nullModels,
191  std::shared_ptr<const SubstitutionRegisterInterface> reg,
192  std::shared_ptr<const AlphabetIndex2> distances = 0,
193  short unresolvedOption = UNRESOLVED_ZERO,
194  bool verbose = true);
195 
211  static std::unique_ptr<ProbabilisticSubstitutionMapping> computeNormalizations(
213  std::shared_ptr<const BranchedModelSet> nullModels,
214  std::shared_ptr<const SubstitutionRegisterInterface> reg,
215  std::shared_ptr<const AlphabetIndex2> distances = 0,
216  short unresolvedOption = UNRESOLVED_ZERO,
217  bool verbose = true)
218  {
219  std::vector<uint> edgeIds = rltc.substitutionProcess().parametrizablePhyloTree().getAllEdgesIndexes();
220  return computeNormalizations(rltc, edgeIds, nullModels, reg, distances, unresolvedOption, verbose);
221  }
222 
254  static std::unique_ptr<ProbabilisticSubstitutionMapping> computeNormalizedCounts(
256  const std::vector<uint>& edgeIds,
257  std::shared_ptr<const BranchedModelSet> nullModels,
258  std::shared_ptr<const SubstitutionRegisterInterface> reg,
259  std::shared_ptr<const AlphabetIndex2> weights = 0,
260  std::shared_ptr<const AlphabetIndex2> distances = 0,
261  bool perTimeUnit = false,
262  uint siteSize = 1,
263  short unresolvedOption = SubstitutionMappingTools::UNRESOLVED_ZERO,
264  double threshold = -1,
265  bool verbose = true);
266 
267  static std::unique_ptr<ProbabilisticSubstitutionMapping> computeNormalizedCounts(
268  const ProbabilisticSubstitutionMapping& counts,
269  const ProbabilisticSubstitutionMapping& factors,
270  const std::vector<uint>& edgeIds,
271  bool perTimeUnit = false,
272  uint siteSize = 1);
273 
274  static std::unique_ptr<ProbabilisticSubstitutionMapping> computeNormalizedCounts(
276  std::shared_ptr<const BranchedModelSet> nullModels,
277  std::shared_ptr<const SubstitutionRegisterInterface> reg,
278  std::shared_ptr<const AlphabetIndex2> weights = 0,
279  std::shared_ptr<const AlphabetIndex2> distances = 0,
280  bool perTimeUnit = false,
281  uint siteSize = 1,
282  short unresolvedOption = SubstitutionMappingTools::UNRESOLVED_ZERO,
283  double threshold = -1,
284  bool verbose = true)
285  {
286  std::vector<uint> edgeIds = rltc.substitutionProcess().parametrizablePhyloTree().getAllEdgesIndexes();
287  return computeNormalizedCounts(rltc, edgeIds, nullModels, reg, weights, distances, perTimeUnit, siteSize, unresolvedOption, threshold, verbose);
288  }
289 
290  static std::unique_ptr<ProbabilisticSubstitutionMapping> computeNormalizedCounts(
291  const ProbabilisticSubstitutionMapping& counts,
292  const ProbabilisticSubstitutionMapping& factors,
293  bool perTimeUnit,
294  uint siteSize = 1)
295  {
296  std::vector<uint> edgeIds = factors.getAllEdgesIndexes();
297  return computeNormalizedCounts(counts, factors, edgeIds, perTimeUnit, siteSize);
298  }
299 
309  static std::unique_ptr<ProbabilisticSubstitutionMapping> computeOneJumpCounts(
311  bool verbose = true)
312  {
314  return computeCounts(rltc, ojsm, 0);
315  }
316 
317 
339  static std::unique_ptr<PhyloTree> getTreeForType(
340  const ProbabilisticSubstitutionMapping& counts,
341  size_t type);
342 
352  static std::unique_ptr<PhyloTree> getTreeForType(
353  const ProbabilisticSubstitutionMapping& counts,
354  const ProbabilisticSubstitutionMapping& factors,
355  size_t type);
356 
383  const ProbabilisticSubstitutionMapping& counts,
384  const std::vector<uint>& ids = Vuint(0));
385 
386 
397  const ProbabilisticSubstitutionMapping& counts,
398  size_t site);
399 
401  const ProbabilisticSubstitutionMapping& counts,
402  const ProbabilisticSubstitutionMapping& factors,
403  size_t site);
404 
415  const ProbabilisticSubstitutionMapping& counts,
416  const std::vector<uint>& ids = Vuint(0));
417 
419  const ProbabilisticSubstitutionMapping& counts,
420  const ProbabilisticSubstitutionMapping& factors,
421  const std::vector<uint>& ids = Vuint(0));
422 
431  const ProbabilisticSubstitutionMapping& counts,
432  uint branchId);
433 
445  const ProbabilisticSubstitutionMapping& counts,
446  const std::vector<uint>& ids = Vuint(0));
447 
449  const ProbabilisticSubstitutionMapping& counts,
450  const std::vector<uint>& ids = Vuint(0));
451 
476  const std::vector<uint>& ids,
477  std::shared_ptr<const SubstitutionRegisterInterface> reg,
478  std::shared_ptr<const AlphabetIndex2> weights = 0,
479  std::shared_ptr<const AlphabetIndex2> distances = 0,
480  short unresolvedOption = UNRESOLVED_ZERO,
481  double threshold = -1,
482  bool verbose = true);
483 
508  const ProbabilisticSubstitutionMapping& counts,
509  size_t site,
510  const std::vector<uint>& ids = Vuint(0));
511 
527  const ProbabilisticSubstitutionMapping& counts,
528  const ProbabilisticSubstitutionMapping& factors,
529  size_t site,
530  bool perTimeUnit,
531  uint siteSize = 1);
532 
550  const ProbabilisticSubstitutionMapping& counts,
551  const ProbabilisticSubstitutionMapping& factors,
552  size_t site,
553  const std::vector<uint>& ids,
554  bool perTimeUnit,
555  uint siteSize = 1);
556 
566  const ProbabilisticSubstitutionMapping& counts,
567  const std::vector<uint>& ids = Vuint(0));
568 
583  const ProbabilisticSubstitutionMapping& counts,
584  const ProbabilisticSubstitutionMapping& factors,
585  bool perTimeUnit,
586  uint siteSize = 1);
587 
604  const ProbabilisticSubstitutionMapping& counts,
605  const ProbabilisticSubstitutionMapping& factors,
606  const std::vector<uint>& ids,
607  bool perTimeUnit,
608  uint siteSize = 1);
609 
621  static double getNormForSite(
622  const ProbabilisticSubstitutionMapping& counts,
623  size_t site);
624 
642  static void outputPerSitePerBranch(const std::string& filename,
643  const std::vector<uint>& ids,
644  const AlignmentDataInterface& sites,
645  const VVdouble& counts);
646 
650  static void outputPerSitePerType(const std::string& filename,
652  const AlignmentDataInterface& sites,
653  const VVdouble& counts);
654 
658  static void outputPerType(const std::string& filename,
660  const AlignmentDataInterface& sites,
661  const VVdouble& counts);
662 
666  static void outputPerSitePerBranchPerType(const std::string& filenamePrefix,
667  const std::vector<uint>& ids,
669  const AlignmentDataInterface& sites,
670  const VVVdouble& counts);
671 
672 
684  static void writeToStream(
685  const ProbabilisticSubstitutionMapping& substitutions,
686  const AlignmentDataInterface& sites,
687  size_t type,
688  std::ostream& out);
689 
698  static void readFromStream(
699  std::istream& in,
700  ProbabilisticSubstitutionMapping& substitutions,
701  size_t type);
702 
711 };
712 } // end of namespace bpp.
713 
714 
715 #endif // BPP_PHYL_MAPPING_SUBSTITUTIONMAPPINGTOOLS_H
virtual std::vector< EdgeIndex > getAllEdgesIndexes() const=0
const SubstitutionProcessInterface & substitutionProcess() const
Return the ref to the SubstitutionProcess.
std::shared_ptr< const StateMapInterface > getStateMap() const
Computes the probability that at least one jump occurred on a branch, given the initial and final sta...
Data storage class for probabilistic substitution mappings.
The SubstitutionsCount interface.
Provide methods to compute substitution mappings.
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeCounts(LikelihoodCalculationSingleProcess &rltc, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > weights=0, std::shared_ptr< const AlphabetIndex2 > distances=0, short unresolvedOption=UNRESOLVED_ZERO, double threshold=-1, bool verbose=true)
Compute the substitutions tree for a particular dataset.
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeNormalizedCounts(LikelihoodCalculationSingleProcess &rltc, std::shared_ptr< const BranchedModelSet > nullModels, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > weights=0, std::shared_ptr< const AlphabetIndex2 > distances=0, bool perTimeUnit=false, uint siteSize=1, short unresolvedOption=SubstitutionMappingTools::UNRESOLVED_ZERO, double threshold=-1, bool verbose=true)
static Vdouble getCountsForSitePerType(const ProbabilisticSubstitutionMapping &counts, size_t site, const std::vector< uint > &ids=Vuint(0))
Methods that sum on branches need raw mapping trees, or raw mapping trees and normalization trees (ie...
static void readFromStream(std::istream &in, ProbabilisticSubstitutionMapping &substitutions, size_t type)
Read the substitutions std::vectors from a stream.
static const short UNRESOLVED_ZERO
Constants describing how unresolved characters are counted:
static std::unique_ptr< PhyloTree > getTreeForType(const ProbabilisticSubstitutionMapping &counts, size_t type)
Methods to get trees of counts. These methods can use raw or normalized counting trees (ie computed w...
static VVdouble getCountsPerSitePerBranch(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
Sum all type of substitutions for each site for each branch.
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeNormalizedCounts(const ProbabilisticSubstitutionMapping &counts, const ProbabilisticSubstitutionMapping &factors, bool perTimeUnit, uint siteSize=1)
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeOneJumpCounts(LikelihoodCalculationSingleProcess &rltc, bool verbose=true)
This method computes for each site and for each branch the probability that at least one jump occurre...
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeCounts(LikelihoodCalculationSingleProcess &rltc, SubstitutionCountInterface &substitutionCount, short unresolvedOption=UNRESOLVED_ZERO, double threshold=-1, bool verbose=true)
Methods to compute mapping Trees.
static void outputPerSitePerType(const std::string &filename, const SubstitutionRegisterInterface &reg, const AlignmentDataInterface &sites, const VVdouble &counts)
Output Per Site Per Type in SGED format.
static Vdouble getCountsForSitePerBranch(const ProbabilisticSubstitutionMapping &counts, size_t site)
Sum all type of substitutions for each branch of a given position.
static VVdouble computeCountsPerTypePerBranch(LikelihoodCalculationSingleProcess &rltc, const std::vector< uint > &ids, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > weights=0, std::shared_ptr< const AlphabetIndex2 > distances=0, short unresolvedOption=UNRESOLVED_ZERO, double threshold=-1, bool verbose=true)
Compute the sum over all branches of the counts per type per branch.
static VVVdouble getCountsPerSitePerBranchPerType(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
Methods to get std::vectors of counts.
static void writeToStream(const ProbabilisticSubstitutionMapping &substitutions, const AlignmentDataInterface &sites, size_t type, std::ostream &out)
Write the substitutions std::vectors to a stream.
static Vdouble getCountsForBranchPerType(const ProbabilisticSubstitutionMapping &counts, uint branchId)
Sum all sites substitutions for each type of a given branch.
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeNormalizedCounts(LikelihoodCalculationSingleProcess &rltc, const std::vector< uint > &edgeIds, std::shared_ptr< const BranchedModelSet > nullModels, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > weights=0, std::shared_ptr< const AlphabetIndex2 > distances=0, bool perTimeUnit=false, uint siteSize=1, short unresolvedOption=SubstitutionMappingTools::UNRESOLVED_ZERO, double threshold=-1, bool verbose=true)
Compute the substitution counts tree, normalized by the models of "null" process on each branch,...
static VVdouble getCountsPerTypePerBranch(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
static void outputPerSitePerBranch(const std::string &filename, const std::vector< uint > &ids, const AlignmentDataInterface &sites, const VVdouble &counts)
Outputs of counts.
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeNormalizations(LikelihoodCalculationSingleProcess &rltc, std::shared_ptr< const BranchedModelSet > nullModels, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > distances=0, short unresolvedOption=UNRESOLVED_ZERO, bool verbose=true)
Compute the normalizations tree due to the models of "null" process on each branch,...
static void outputPerSitePerBranchPerType(const std::string &filenamePrefix, const std::vector< uint > &ids, const SubstitutionRegisterInterface &reg, const AlignmentDataInterface &sites, const VVVdouble &counts)
Output Per Site Per Branch Per Type, one SGED file per type.
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeNormalizations(LikelihoodCalculationSingleProcess &rltc, const std::vector< uint > &edgeIds, std::shared_ptr< const BranchedModelSet > nullModels, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > distances=0, short unresolvedOption=UNRESOLVED_ZERO, bool verbose=true)
Compute the normalizations tree due to the models of "null" process on each branch,...
static VVdouble getCountsPerBranchPerType(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
Sum all sites substitutions for each branch for each type.
static VVdouble getCountsPerSitePerType(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
Sum all type of substitutions for each site for each type.
static double getNormForSite(const ProbabilisticSubstitutionMapping &counts, size_t site)
Compute the norm of a substitution std::vector for a given position.
static void outputPerType(const std::string &filename, const SubstitutionRegisterInterface &reg, const AlignmentDataInterface &sites, const VVdouble &counts)
Output Per Type.
virtual const ParametrizablePhyloTree & parametrizablePhyloTree() const =0
The SubstitutionRegister interface.
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
std::vector< VVdouble > VVVdouble
std::vector< Vdouble > VVdouble
std::vector< unsigned int > Vuint