bpp-phyl3 3.0.0
SubstitutionMappingTools.h
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#ifndef BPP_PHYL_MAPPING_SUBSTITUTIONMAPPINGTOOLS_H
6#define BPP_PHYL_MAPPING_SUBSTITUTIONMAPPINGTOOLS_H
7
9
10#include "../Likelihood/DataFlow/LikelihoodCalculationSingleProcess.h"
11#include "BranchedModelSet.h"
14#include "SubstitutionCount.h"
15
16namespace bpp
17{
33{
34public:
47 static const short UNRESOLVED_ZERO = 0;
48 static const short UNRESOLVED_AVERAGE = 2;
49 static const short UNRESOLVED_ONE = 1;
50
51public:
53
55
56public:
74 static std::unique_ptr<ProbabilisticSubstitutionMapping> computeCounts(
76 SubstitutionCountInterface& substitutionCount,
77 short unresolvedOption = UNRESOLVED_ZERO,
78 double threshold = -1,
79 bool verbose = true)
80 {
81 std::vector<uint> edgeIds = rltc.substitutionProcess().parametrizablePhyloTree().getAllEdgesIndexes();
82 return computeCounts(rltc, edgeIds, substitutionCount, unresolvedOption, threshold, verbose);
83 }
84
103 static std::unique_ptr<ProbabilisticSubstitutionMapping> computeCounts(
105 std::shared_ptr<const SubstitutionRegisterInterface> reg,
106 std::shared_ptr<const AlphabetIndex2> weights = 0,
107 std::shared_ptr<const AlphabetIndex2> distances = 0,
108 short unresolvedOption = UNRESOLVED_ZERO,
109 double threshold = -1,
110 bool verbose = true)
111 {
112 std::vector<uint> edgeIds = rltc.substitutionProcess().parametrizablePhyloTree().getAllEdgesIndexes();
113 return computeCounts(rltc, edgeIds, reg, weights, distances, unresolvedOption, threshold, verbose);
114 }
115
130 static std::unique_ptr<ProbabilisticSubstitutionMapping> computeCounts(
132 const std::vector<uint>& speciesIds,
133 SubstitutionCountInterface& substitutionCount,
134 short unresolvedOption = UNRESOLVED_ZERO,
135 double threshold = -1,
136 bool verbose = true);
137
159 static std::unique_ptr<ProbabilisticSubstitutionMapping> computeCounts(
161 const std::vector<uint>& edgeIds,
162 std::shared_ptr<const SubstitutionRegisterInterface> reg,
163 std::shared_ptr<const AlphabetIndex2> weights = 0,
164 std::shared_ptr<const AlphabetIndex2> distances = 0,
165 short unresolvedOption = UNRESOLVED_ZERO,
166 double threshold = -1,
167 bool verbose = true);
168
187 static std::unique_ptr<ProbabilisticSubstitutionMapping> computeNormalizations(
189 const std::vector<uint>& edgeIds,
190 std::shared_ptr<const BranchedModelSet> nullModels,
191 std::shared_ptr<const SubstitutionRegisterInterface> reg,
192 std::shared_ptr<const AlphabetIndex2> distances = 0,
193 short unresolvedOption = UNRESOLVED_ZERO,
194 bool verbose = true);
195
211 static std::unique_ptr<ProbabilisticSubstitutionMapping> computeNormalizations(
213 std::shared_ptr<const BranchedModelSet> nullModels,
214 std::shared_ptr<const SubstitutionRegisterInterface> reg,
215 std::shared_ptr<const AlphabetIndex2> distances = 0,
216 short unresolvedOption = UNRESOLVED_ZERO,
217 bool verbose = true)
218 {
219 std::vector<uint> edgeIds = rltc.substitutionProcess().parametrizablePhyloTree().getAllEdgesIndexes();
220 return computeNormalizations(rltc, edgeIds, nullModels, reg, distances, unresolvedOption, verbose);
221 }
222
254 static std::unique_ptr<ProbabilisticSubstitutionMapping> computeNormalizedCounts(
256 const std::vector<uint>& edgeIds,
257 std::shared_ptr<const BranchedModelSet> nullModels,
258 std::shared_ptr<const SubstitutionRegisterInterface> reg,
259 std::shared_ptr<const AlphabetIndex2> weights = 0,
260 std::shared_ptr<const AlphabetIndex2> distances = 0,
261 bool perTimeUnit = false,
262 uint siteSize = 1,
263 short unresolvedOption = SubstitutionMappingTools::UNRESOLVED_ZERO,
264 double threshold = -1,
265 bool verbose = true);
266
267 static std::unique_ptr<ProbabilisticSubstitutionMapping> computeNormalizedCounts(
270 const std::vector<uint>& edgeIds,
271 bool perTimeUnit = false,
272 uint siteSize = 1);
273
274 static std::unique_ptr<ProbabilisticSubstitutionMapping> computeNormalizedCounts(
276 std::shared_ptr<const BranchedModelSet> nullModels,
277 std::shared_ptr<const SubstitutionRegisterInterface> reg,
278 std::shared_ptr<const AlphabetIndex2> weights = 0,
279 std::shared_ptr<const AlphabetIndex2> distances = 0,
280 bool perTimeUnit = false,
281 uint siteSize = 1,
282 short unresolvedOption = SubstitutionMappingTools::UNRESOLVED_ZERO,
283 double threshold = -1,
284 bool verbose = true)
285 {
286 std::vector<uint> edgeIds = rltc.substitutionProcess().parametrizablePhyloTree().getAllEdgesIndexes();
287 return computeNormalizedCounts(rltc, edgeIds, nullModels, reg, weights, distances, perTimeUnit, siteSize, unresolvedOption, threshold, verbose);
288 }
289
290 static std::unique_ptr<ProbabilisticSubstitutionMapping> computeNormalizedCounts(
293 bool perTimeUnit,
294 uint siteSize = 1)
295 {
296 std::vector<uint> edgeIds = factors.getAllEdgesIndexes();
297 return computeNormalizedCounts(counts, factors, edgeIds, perTimeUnit, siteSize);
298 }
299
309 static std::unique_ptr<ProbabilisticSubstitutionMapping> computeOneJumpCounts(
311 bool verbose = true)
312 {
314 return computeCounts(rltc, ojsm, 0);
315 }
316
317
339 static std::unique_ptr<PhyloTree> getTreeForType(
341 size_t type);
342
352 static std::unique_ptr<PhyloTree> getTreeForType(
355 size_t type);
356
384 const std::vector<uint>& ids = Vuint(0));
385
386
398 size_t site);
399
403 size_t site);
404
416 const std::vector<uint>& ids = Vuint(0));
417
421 const std::vector<uint>& ids = Vuint(0));
422
432 uint branchId);
433
446 const std::vector<uint>& ids = Vuint(0));
447
450 const std::vector<uint>& ids = Vuint(0));
451
476 const std::vector<uint>& ids,
477 std::shared_ptr<const SubstitutionRegisterInterface> reg,
478 std::shared_ptr<const AlphabetIndex2> weights = 0,
479 std::shared_ptr<const AlphabetIndex2> distances = 0,
480 short unresolvedOption = UNRESOLVED_ZERO,
481 double threshold = -1,
482 bool verbose = true);
483
509 size_t site,
510 const std::vector<uint>& ids = Vuint(0));
511
529 size_t site,
530 bool perTimeUnit,
531 uint siteSize = 1);
532
552 size_t site,
553 const std::vector<uint>& ids,
554 bool perTimeUnit,
555 uint siteSize = 1);
556
567 const std::vector<uint>& ids = Vuint(0));
568
585 bool perTimeUnit,
586 uint siteSize = 1);
587
606 const std::vector<uint>& ids,
607 bool perTimeUnit,
608 uint siteSize = 1);
609
621 static double getNormForSite(
623 size_t site);
624
642 static void outputPerSitePerBranch(const std::string& filename,
643 const std::vector<uint>& ids,
644 const AlignmentDataInterface& sites,
645 const VVdouble& counts);
646
650 static void outputPerSitePerType(const std::string& filename,
652 const AlignmentDataInterface& sites,
653 const VVdouble& counts);
654
658 static void outputPerType(const std::string& filename,
660 const AlignmentDataInterface& sites,
661 const VVdouble& counts);
662
666 static void outputPerSitePerBranchPerType(const std::string& filenamePrefix,
667 const std::vector<uint>& ids,
669 const AlignmentDataInterface& sites,
670 const VVVdouble& counts);
671
672
684 static void writeToStream(
685 const ProbabilisticSubstitutionMapping& substitutions,
686 const AlignmentDataInterface& sites,
687 size_t type,
688 std::ostream& out);
689
698 static void readFromStream(
699 std::istream& in,
701 size_t type);
702
711};
712} // end of namespace bpp.
713
714
715#endif // BPP_PHYL_MAPPING_SUBSTITUTIONMAPPINGTOOLS_H
virtual std::vector< EdgeIndex > getAllEdgesIndexes() const=0
const SubstitutionProcessInterface & substitutionProcess() const
Return the ref to the SubstitutionProcess.
std::shared_ptr< const StateMapInterface > getStateMap() const
Computes the probability that at least one jump occurred on a branch, given the initial and final sta...
Data storage class for probabilistic substitution mappings.
The SubstitutionsCount interface.
Provide methods to compute substitution mappings.
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeNormalizedCounts(LikelihoodCalculationSingleProcess &rltc, std::shared_ptr< const BranchedModelSet > nullModels, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > weights=0, std::shared_ptr< const AlphabetIndex2 > distances=0, bool perTimeUnit=false, uint siteSize=1, short unresolvedOption=SubstitutionMappingTools::UNRESOLVED_ZERO, double threshold=-1, bool verbose=true)
static Vdouble getCountsForSitePerType(const ProbabilisticSubstitutionMapping &counts, size_t site, const std::vector< uint > &ids=Vuint(0))
Methods that sum on branches need raw mapping trees, or raw mapping trees and normalization trees (ie...
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeCounts(LikelihoodCalculationSingleProcess &rltc, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > weights=0, std::shared_ptr< const AlphabetIndex2 > distances=0, short unresolvedOption=UNRESOLVED_ZERO, double threshold=-1, bool verbose=true)
Compute the substitutions tree for a particular dataset.
static void readFromStream(std::istream &in, ProbabilisticSubstitutionMapping &substitutions, size_t type)
Read the substitutions std::vectors from a stream.
static const short UNRESOLVED_ZERO
Constants describing how unresolved characters are counted:
static std::unique_ptr< PhyloTree > getTreeForType(const ProbabilisticSubstitutionMapping &counts, size_t type)
Methods to get trees of counts. These methods can use raw or normalized counting trees (ie computed w...
static VVdouble getCountsPerSitePerBranch(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
Sum all type of substitutions for each site for each branch.
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeNormalizedCounts(const ProbabilisticSubstitutionMapping &counts, const ProbabilisticSubstitutionMapping &factors, bool perTimeUnit, uint siteSize=1)
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeNormalizations(LikelihoodCalculationSingleProcess &rltc, std::shared_ptr< const BranchedModelSet > nullModels, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > distances=0, short unresolvedOption=UNRESOLVED_ZERO, bool verbose=true)
Compute the normalizations tree due to the models of "null" process on each branch,...
static void outputPerSitePerType(const std::string &filename, const SubstitutionRegisterInterface &reg, const AlignmentDataInterface &sites, const VVdouble &counts)
Output Per Site Per Type in SGED format.
static Vdouble getCountsForSitePerBranch(const ProbabilisticSubstitutionMapping &counts, size_t site)
Sum all type of substitutions for each branch of a given position.
static VVdouble computeCountsPerTypePerBranch(LikelihoodCalculationSingleProcess &rltc, const std::vector< uint > &ids, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > weights=0, std::shared_ptr< const AlphabetIndex2 > distances=0, short unresolvedOption=UNRESOLVED_ZERO, double threshold=-1, bool verbose=true)
Compute the sum over all branches of the counts per type per branch.
static VVVdouble getCountsPerSitePerBranchPerType(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
Methods to get std::vectors of counts.
static void writeToStream(const ProbabilisticSubstitutionMapping &substitutions, const AlignmentDataInterface &sites, size_t type, std::ostream &out)
Write the substitutions std::vectors to a stream.
static Vdouble getCountsForBranchPerType(const ProbabilisticSubstitutionMapping &counts, uint branchId)
Sum all sites substitutions for each type of a given branch.
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeNormalizedCounts(LikelihoodCalculationSingleProcess &rltc, const std::vector< uint > &edgeIds, std::shared_ptr< const BranchedModelSet > nullModels, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > weights=0, std::shared_ptr< const AlphabetIndex2 > distances=0, bool perTimeUnit=false, uint siteSize=1, short unresolvedOption=SubstitutionMappingTools::UNRESOLVED_ZERO, double threshold=-1, bool verbose=true)
Compute the substitution counts tree, normalized by the models of "null" process on each branch,...
static VVdouble getCountsPerTypePerBranch(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
static void outputPerSitePerBranch(const std::string &filename, const std::vector< uint > &ids, const AlignmentDataInterface &sites, const VVdouble &counts)
Outputs of counts.
static void outputPerSitePerBranchPerType(const std::string &filenamePrefix, const std::vector< uint > &ids, const SubstitutionRegisterInterface &reg, const AlignmentDataInterface &sites, const VVVdouble &counts)
Output Per Site Per Branch Per Type, one SGED file per type.
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeNormalizations(LikelihoodCalculationSingleProcess &rltc, const std::vector< uint > &edgeIds, std::shared_ptr< const BranchedModelSet > nullModels, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > distances=0, short unresolvedOption=UNRESOLVED_ZERO, bool verbose=true)
Compute the normalizations tree due to the models of "null" process on each branch,...
static VVdouble getCountsPerBranchPerType(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
Sum all sites substitutions for each branch for each type.
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeOneJumpCounts(LikelihoodCalculationSingleProcess &rltc, bool verbose=true)
This method computes for each site and for each branch the probability that at least one jump occurre...
static VVdouble getCountsPerSitePerType(const ProbabilisticSubstitutionMapping &counts, const std::vector< uint > &ids=Vuint(0))
Sum all type of substitutions for each site for each type.
static double getNormForSite(const ProbabilisticSubstitutionMapping &counts, size_t site)
Compute the norm of a substitution std::vector for a given position.
static void outputPerType(const std::string &filename, const SubstitutionRegisterInterface &reg, const AlignmentDataInterface &sites, const VVdouble &counts)
Output Per Type.
static std::unique_ptr< ProbabilisticSubstitutionMapping > computeCounts(LikelihoodCalculationSingleProcess &rltc, SubstitutionCountInterface &substitutionCount, short unresolvedOption=UNRESOLVED_ZERO, double threshold=-1, bool verbose=true)
Methods to compute mapping Trees.
virtual const ParametrizablePhyloTree & parametrizablePhyloTree() const =0
The SubstitutionRegister interface.
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
std::vector< VVdouble > VVVdouble
std::vector< Vdouble > VVdouble
std::vector< unsigned int > Vuint