bpp-phyl3 3.0.0
BppPhylogeneticsApplication.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5// From the STL:
6#include <iostream>
7#include <iomanip>
8#include <limits>
9
13
14using namespace std;
15using namespace bpp;
16
17/******************************************************************************/
18
19map<size_t, std::shared_ptr<PhyloTree>> BppPhylogeneticsApplication::getPhyloTreesMap(
20 const map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mSites,
21 map<string, string>& unparsedParams,
22 const std::string& prefix,
23 const std::string& suffix,
24 bool suffixIsOptional) const
25{
26 map<size_t, std::shared_ptr<PhyloTree>> mpTree = PhylogeneticsApplicationTools::getPhyloTrees(params_, mSites, unparsedParams, prefix, suffix, suffixIsOptional, verbose_, warn_);
27
28 // Scaling of trees:
29 double scale = ApplicationTools::getDoubleParameter("input.tree.scale", params_, 1, suffix, suffixIsOptional, warn_);
30
31 if (scale != 1)
32 {
33 if (verbose_)
34 {
35 ApplicationTools::displayResult("Trees are scaled by", scale);
36 }
37
38 for (auto it : mpTree)
39 {
40 it.second->scaleTree(scale);
41 }
42 }
43
44 return mpTree;
45}
46
47unique_ptr<SubstitutionProcessCollection> BppPhylogeneticsApplication::getCollection(
48 std::shared_ptr<const Alphabet> alphabet,
49 std::shared_ptr<const GeneticCode> gCode,
50 const map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mSites,
51 map<string, string>& unparsedParams,
52 const std::string& prefix,
53 const std::string& suffix,
54 bool suffixIsOptional) const
55{
56 auto mpTree = getPhyloTreesMap(mSites, unparsedParams, prefix, suffix, suffixIsOptional);
57 auto SPC = getCollection(alphabet, gCode, mSites, mpTree, unparsedParams, prefix, suffix, suffixIsOptional);
58 return SPC;
59}
60
61std::unique_ptr<SubstitutionProcessCollection> BppPhylogeneticsApplication::getCollection(
62 std::shared_ptr<const Alphabet> alphabet,
63 std::shared_ptr<const GeneticCode> gCode,
64 const map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mSites,
65 const map<size_t, std::shared_ptr<PhyloTree>>& mpTree,
66 map<string, string>& unparsedParams,
67 const std::string& prefix,
68 const std::string& suffix,
69 bool suffixIsOptional) const
70{
71 auto mDist = PhylogeneticsApplicationTools::getRateDistributions(params_, suffix, suffixIsOptional, verbose_);
72 auto mModU = PhylogeneticsApplicationTools::getBranchModels(alphabet, gCode, mSites, params_, unparsedParams, suffix, suffixIsOptional, verbose_, warn_);
73 auto mMod = PhylogeneticsApplicationTools::uniqueToSharedMap<BranchModelInterface>(mModU);
74 auto mRootFreqU = PhylogeneticsApplicationTools::getRootFrequencySets(alphabet, gCode, mSites, params_, unparsedParams, suffix, suffixIsOptional, verbose_, warn_);
75 auto mRootFreq = PhylogeneticsApplicationTools::uniqueToSharedMap<FrequencySetInterface>(mRootFreqU);
76 auto mModelPathU = PhylogeneticsApplicationTools::getModelPaths(params_, mMod, suffix, suffixIsOptional, verbose_, warn_);
77 auto mModelPath = PhylogeneticsApplicationTools::uniqueToSharedMap<ModelPath>(mModelPathU);
78 auto mScenarioU = PhylogeneticsApplicationTools::getModelScenarios(params_, mModelPath, mMod, suffix, suffixIsOptional, verbose_, warn_);
79 auto mScenario = PhylogeneticsApplicationTools::uniqueToSharedMap<ModelScenario>(mScenarioU);
80
81 auto SPC = PhylogeneticsApplicationTools::getSubstitutionProcessCollection(alphabet, gCode, mpTree, mMod, mRootFreq, mDist, mScenario, params_, unparsedParams, suffix, suffixIsOptional, verbose_, warn_);
82
83 return SPC;
84}
85
86
87map<size_t, std::unique_ptr<SequenceEvolution>> BppPhylogeneticsApplication::getProcesses(
88 shared_ptr<SubstitutionProcessCollection> collection,
89 map<string, string>& unparsedParams,
90 const std::string& suffix,
91 bool suffixIsOptional) const
92{
94 collection, params_, unparsedParams, suffix, suffixIsOptional, verbose_, warn_);
95}
96
97
98std::shared_ptr<PhyloLikelihoodContainer> BppPhylogeneticsApplication::getPhyloLikelihoods(
99 Context& context,
100 map<size_t, shared_ptr<SequenceEvolution>> mSeqEvol,
101 shared_ptr<SubstitutionProcessCollection> collection,
102 const map<size_t, shared_ptr<const AlignmentDataInterface>>& mSites,
103 const std::string& suffix,
104 bool suffixIsOptional) const
105{
107 context, collection, mSeqEvol, mSites, params_, suffix, suffixIsOptional, verbose_, warn_);
108}
109
110
112 shared_ptr<const Alphabet> alphabet,
113 shared_ptr<const GeneticCode> gCode,
114 shared_ptr<PhyloLikelihoodInterface> phylolik,
115 const std::string& suffix,
116 bool suffixIsOptional) const
117{
118 double logL = phylolik->getValue();
119
120 if (!std::isnormal(logL))
121 {
122 // This may be due to null branch lengths, leading to null likelihood!
123 ApplicationTools::displayWarning("!!! Warning!!! Initial likelihood is zero.");
124 ApplicationTools::displayWarning("!!! This may be due to branch length == 0.");
125 ApplicationTools::displayWarning("!!! All null branch lengths will be set to 0.001.");
126 ParameterList pl = phylolik->getBranchLengthParameters();
127
128 for (size_t i = 0; i < pl.size(); i++)
129 {
130 if (pl[i].getValue() < 0.000001)
131 pl[i].setValue(0.001);
132 }
133 phylolik->matchParametersValues(pl);
134 logL = phylolik->getValue();
135 }
136
138 ApplicationTools::displayResult("Initial log likelihood", TextTools::toString(-logL, 15));
139 if (!std::isnormal(logL))
140 {
141 ApplicationTools::displayError("!!! Unexpected initial likelihood == 0.");
142
143 map<size_t, shared_ptr<SingleDataPhyloLikelihoodInterface>> mSD;
144
145 if (dynamic_pointer_cast<SingleDataPhyloLikelihoodInterface>(phylolik))
146 mSD[1] = dynamic_pointer_cast<SingleDataPhyloLikelihoodInterface>(phylolik);
147 else
148 {
149 auto sOAP = dynamic_pointer_cast<PhyloLikelihoodSetInterface>(phylolik);
150 if (sOAP)
151 {
152 const vector<size_t>& nSD = sOAP->getNumbersOfPhyloLikelihoods();
153
154 for (size_t iSD = 0; iSD < nSD.size(); ++iSD)
155 {
156 auto pASDP = dynamic_pointer_cast<SingleDataPhyloLikelihoodInterface>(sOAP->getPhyloLikelihood(nSD[iSD]));
157
158 if (pASDP)
159 mSD[nSD[iSD]] = pASDP;
160 }
161 }
162 }
163
164 for (auto& itm : mSD)
165 {
166 ApplicationTools::displayWarning("Checking for phyloLikelihood " + TextTools::toString(itm.first));
167
168 if (!std::isnormal(itm.second->getValue()))
169 {
170 auto sDP = itm.second;
171
172 auto vData = std::shared_ptr<AlignmentDataInterface>(sDP->getData()->clone());
173
174 auto vSC = std::dynamic_pointer_cast<SiteContainerInterface>(vData);
175 auto pSC = std::dynamic_pointer_cast<ProbabilisticSiteContainerInterface>(vData);
176
177 if (AlphabetTools::isCodonAlphabet(*alphabet))
178 {
179 bool f = false;
180 size_t s;
181 for (size_t i = 0; i < vData->getNumberOfSites(); ++i)
182 {
183 if (!std::isnormal(sDP->getLogLikelihoodForASite(i)))
184 {
185 if (vSC)
186 {
187 const Site& site = vSC->site(i);
188 s = site.size();
189 for (size_t j = 0; j < s; ++j)
190 {
191 if (gCode->isStop(site.getValue(j)))
192 {
193 (*ApplicationTools::error << "Stop Codon at site " << site.getCoordinate() << " in sequence " << vData->sequence(j).getName()).endLine();
194 f = true;
195 }
196 }
197 }
198 else
199 {
200 const ProbabilisticSite& site = pSC->site(i);
201 s = site.size();
202 for (size_t j = 0; j < s; ++j)
203 {
204 bool g = false;
205 for (int st = 0; !g && st < static_cast<int>(alphabet->getSize()); ++st)
206 {
207 g = (site.getStateValueAt(j, st) != 0 && !gCode->isStop(st));
208 }
209
210 if (!g)
211 {
212 (*ApplicationTools::error << "Only stop Codons at site " << site.getCoordinate() << " in sequence " << vData->sequence(j).getName()).endLine();
213 f = true;
214 }
215 }
216 }
217 }
218 }
219 if (f)
220 exit(-1);
221 }
222
223 // Then remove saturated positions
224
225 bool removeSaturated = ApplicationTools::getBooleanParameter("input.sequence.remove_saturated_sites", params_, false, suffix, suffixIsOptional, warn_);
226
227
228 if (removeSaturated)
229 {
230 ApplicationTools::displayBooleanResult("Saturated site removal enabled", true);
231 for (size_t i = vData->getNumberOfSites(); i > 0; --i)
232 {
233 if (!std::isnormal(sDP->getLogLikelihoodForASite(i - 1)))
234 {
235 ApplicationTools::displayResult("Ignore saturated site", vData->site(i - 1).getCoordinate());
236 vData->deleteSites(i - 1, 1);
237 }
238 }
239 ApplicationTools::displayResult("Number of sites retained", vData->getNumberOfSites());
240
241 sDP->setData(vData);
242 }
243
244 logL = sDP->getValue();
245
246 vector<size_t> vsiteok, vsitemin;
247
248 if (!std::isnormal(logL))
249 {
250 ApplicationTools::displayError("!!! Removing problem sites:");
251 for (unsigned int i = 0; i < vData->getNumberOfSites(); i++)
252 {
253 auto x = sDP->getLogLikelihoodForASite(i);
254 if (!std::isnormal(x))
255 {
256 (*ApplicationTools::error << "Site " << vData->site(i).getCoordinate() << "\tlog likelihood = " << x).endLine();
257 vsitemin.push_back(i);
258 }
259 else
260 vsiteok.push_back(i);
261 }
262
263 shared_ptr<AlignmentDataInterface> vDataok = SiteContainerTools::getSelectedSites(*vData, vsiteok);
264 // auto vDatamin = SiteContainerTools::getSelectedSites(*vData, vsitemin); Not taken into account yet
265
266 sDP->setData(vDataok);
267 logL = sDP->getValue();
268 ApplicationTools::displayResult("Filtered log likelihood", TextTools::toString(-logL, 15));
269
270 // auto phylo2 = itm.second->clone(); To be finished
271 // phylo2->setData(*vDatamin);
272 // auto logL2 = phylo2->getValue();
273
274 // ApplicationTools::displayResult("Left log likelihood", TextTools::toString(-logL2, 15));
275 }
276 else
277 ApplicationTools::displayResult("Initial log likelihood", TextTools::toString(-logL, 15));
278 }
279 else
280 ApplicationTools::displayResult("Initial log likelihood", TextTools::toString(-itm.second->getValue(), 15));
281 }
282 }
283}
284
285
287{
288 // Write parameters to screen:
289 if (displaylL)
291
293 ApplicationTools::displayMessage("Too many parameters for screen output!");
294 else
295 {
296 ParameterList parameters = tl.getParameters();
298 for (size_t i = 0; i < parameters.size(); i++)
299 {
300 ApplicationTools::displayResult(parameters[i].getName(), TextTools::toString(parameters[i].getValue()));
301 }
302 }
303}
int getCoordinate() const override
const int & getValue(size_t pos) const override
static bool isCodonAlphabet(const Alphabet &alphabet)
static void displayMessage(const std::string &text)
static std::shared_ptr< OutputStream > error
static void displayError(const std::string &text)
static bool getBooleanParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, bool defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayWarning(const std::string &text)
static void displayBooleanResult(const std::string &text, bool result)
static double getDoubleParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, double defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayResult(const std::string &text, const T &result)
std::map< std::string, std::string > params_
virtual void fixLikelihood(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, std::shared_ptr< PhyloLikelihoodInterface > phylolik, const std::string &suffix="", bool suffixIsOptional=true) const
Method to have a clean likelihood (ie not saturated, nor infinite).
virtual std::map< size_t, std::unique_ptr< SequenceEvolution > > getProcesses(std::shared_ptr< SubstitutionProcessCollection > collection, std::map< std::string, std::string > &unparsedParams, const std::string &suffix="", bool suffixIsOptional=true) const
get the substitution processes.
virtual std::map< size_t, std::shared_ptr< PhyloTree > > getPhyloTreesMap(const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mSites, std::map< std::string, std::string > &unparsedParams, const std::string &prefix="input.", const std::string &suffix="", bool suffixIsOptional=true) const
Methods to build objects.
virtual std::unique_ptr< SubstitutionProcessCollection > getCollection(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mSites, const std::map< size_t, std::shared_ptr< PhyloTree > > &mpTree, std::map< std::string, std::string > &unparsedParams, const std::string &prefix="input.", const std::string &suffix="", bool suffixIsOptional=true) const
get the collection of objects necessary to build substitution processes.
virtual void displayParameters(const PhyloLikelihoodInterface &tl, bool displaylL=true) const
Display parameter values.
virtual std::shared_ptr< PhyloLikelihoodContainer > getPhyloLikelihoods(Context &context, std::map< size_t, std::shared_ptr< SequenceEvolution > > mSeqEvol, std::shared_ptr< SubstitutionProcessCollection > collection, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mSites, const std::string &suffix="", bool suffixIsOptional=true) const
get the phylolikelihoods.
Context for dataflow node construction.
Definition: DataFlow.h:527
virtual size_t size() const=0
virtual double getValue() const=0
size_t size() const
virtual std::vector< std::string > getParameterNames() const
virtual bool matchParametersValues(const ParameterList &params, std::vector< size_t > *updatedParameters=0)
virtual void deleteParameters(const std::vector< std::string > &names, bool mustExist=true)
virtual size_t getNumberOfParameters() const=0
virtual const ParameterList & getParameters() const=0
The PhyloLikelihood interface, for phylogenetic likelihood.
virtual ParameterList getBranchLengthParameters() const =0
Get the independent branch lengths parameters.
static std::map< size_t, std::unique_ptr< SequenceEvolution > > getSequenceEvolutions(std::shared_ptr< SubstitutionProcessCollection > SPC, const std::map< std::string, std::string > &params, map< string, string > &unparsedParams, const string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a map of Sequence Evolution, ie ways how sequence evolve, which may use several processes.
static std::map< size_t, std::unique_ptr< FrequencySetInterface > > getRootFrequencySets(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, const std::map< std::string, std::string > &params, std::map< std::string, std::string > &sharedparams, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
The same, but returns a map <number, shared_ptr<FrequencySetInterface>>.
static std::shared_ptr< PhyloLikelihoodContainer > getPhyloLikelihoodContainer(Context &context, std::shared_ptr< SubstitutionProcessCollection > SPC, std::map< size_t, std::shared_ptr< SequenceEvolution > > &mSeqEvol, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, const std::map< std::string, std::string > &params, const string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a Phylogeny container from parameters map.
static std::map< size_t, std::unique_ptr< BranchModelInterface > > getBranchModels(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, const std::map< std::string, std::string > &params, std::map< std::string, std::string > &unparsedparams, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
The same as before, but returns a map <number, branch model>.
static map< size_t, std::unique_ptr< ModelScenario > > getModelScenarios(const std::map< std::string, std::string > &params, const map< size_t, std::shared_ptr< ModelPath > > &mModelPath, const map< size_t, std::shared_ptr< BranchModelInterface > > &mModel, const string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build map of ModelScenarios from a map of ModelPaths.
static std::map< size_t, std::shared_ptr< DiscreteDistributionInterface > > getRateDistributions(const std::map< std::string, std::string > &params, const string &suffix="", bool suffixIsOptional=true, bool verbose=true)
static map< size_t, std::unique_ptr< ModelPath > > getModelPaths(const std::map< std::string, std::string > &params, const map< size_t, std::shared_ptr< BranchModelInterface > > &mModel, const string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build map of ModelPaths from a map of BranchModel.
static std::map< size_t, std::shared_ptr< PhyloTree > > getPhyloTrees(const std::map< std::string, std::string > &params, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mSeq, std::map< std::string, std::string > &unparsedParams, const std::string &prefix="input.", const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a map of <number,PhyloTree> according to options.
static std::unique_ptr< SubstitutionProcessCollection > getSubstitutionProcessCollection(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, const map< size_t, std::shared_ptr< PhyloTree > > &mTree, const map< size_t, std::shared_ptr< BranchModelInterface > > &mMod, const map< size_t, std::shared_ptr< FrequencySetInterface > > &mRootFreq, const map< size_t, std::shared_ptr< DiscreteDistributionInterface > > &mDist, const map< size_t, std::shared_ptr< ModelScenario > > &mScen, const std::map< std::string, std::string > &params, map< string, string > &unparsedparams, const string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Builds a SubstitutionProcessCollection from many maps of relevant objects.
double getStateValueAt(size_t sequencePosition, int state) const
static void getSelectedSites(const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites, const SiteSelection &selection, TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &outputSites)
std::string toString(T t)
Defines the basic types of data flow nodes.