bpp-phyl3  3.0.0
BppPhylogeneticsApplication.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 // From the STL:
6 #include <iostream>
7 #include <iomanip>
8 #include <limits>
9 
13 
14 using namespace std;
15 using namespace bpp;
16 
17 /******************************************************************************/
18 
19 map<size_t, std::shared_ptr<PhyloTree>> BppPhylogeneticsApplication::getPhyloTreesMap(
20  const map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mSites,
21  map<string, string>& unparsedParams,
22  const std::string& prefix,
23  const std::string& suffix,
24  bool suffixIsOptional) const
25 {
26  map<size_t, std::shared_ptr<PhyloTree>> mpTree = PhylogeneticsApplicationTools::getPhyloTrees(params_, mSites, unparsedParams, prefix, suffix, suffixIsOptional, verbose_, warn_);
27 
28  // Scaling of trees:
29  double scale = ApplicationTools::getDoubleParameter("input.tree.scale", params_, 1, suffix, suffixIsOptional, warn_);
30 
31  if (scale != 1)
32  {
33  if (verbose_)
34  {
35  ApplicationTools::displayResult("Trees are scaled by", scale);
36  }
37 
38  for (auto it : mpTree)
39  {
40  it.second->scaleTree(scale);
41  }
42  }
43 
44  return mpTree;
45 }
46 
47 unique_ptr<SubstitutionProcessCollection> BppPhylogeneticsApplication::getCollection(
48  std::shared_ptr<const Alphabet> alphabet,
49  std::shared_ptr<const GeneticCode> gCode,
50  const map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mSites,
51  map<string, string>& unparsedParams,
52  const std::string& prefix,
53  const std::string& suffix,
54  bool suffixIsOptional) const
55 {
56  auto mpTree = getPhyloTreesMap(mSites, unparsedParams, prefix, suffix, suffixIsOptional);
57  auto SPC = getCollection(alphabet, gCode, mSites, mpTree, unparsedParams, prefix, suffix, suffixIsOptional);
58  return SPC;
59 }
60 
61 std::unique_ptr<SubstitutionProcessCollection> BppPhylogeneticsApplication::getCollection(
62  std::shared_ptr<const Alphabet> alphabet,
63  std::shared_ptr<const GeneticCode> gCode,
64  const map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mSites,
65  const map<size_t, std::shared_ptr<PhyloTree>>& mpTree,
66  map<string, string>& unparsedParams,
67  const std::string& prefix,
68  const std::string& suffix,
69  bool suffixIsOptional) const
70 {
71  auto mDist = PhylogeneticsApplicationTools::getRateDistributions(params_, suffix, suffixIsOptional, verbose_);
72  auto mModU = PhylogeneticsApplicationTools::getBranchModels(alphabet, gCode, mSites, params_, unparsedParams, suffix, suffixIsOptional, verbose_, warn_);
73  auto mMod = PhylogeneticsApplicationTools::uniqueToSharedMap<BranchModelInterface>(mModU);
74  auto mRootFreqU = PhylogeneticsApplicationTools::getRootFrequencySets(alphabet, gCode, mSites, params_, unparsedParams, suffix, suffixIsOptional, verbose_, warn_);
75  auto mRootFreq = PhylogeneticsApplicationTools::uniqueToSharedMap<FrequencySetInterface>(mRootFreqU);
76  auto mModelPathU = PhylogeneticsApplicationTools::getModelPaths(params_, mMod, suffix, suffixIsOptional, verbose_, warn_);
77  auto mModelPath = PhylogeneticsApplicationTools::uniqueToSharedMap<ModelPath>(mModelPathU);
78  auto mScenarioU = PhylogeneticsApplicationTools::getModelScenarios(params_, mModelPath, mMod, suffix, suffixIsOptional, verbose_, warn_);
79  auto mScenario = PhylogeneticsApplicationTools::uniqueToSharedMap<ModelScenario>(mScenarioU);
80 
81  auto SPC = PhylogeneticsApplicationTools::getSubstitutionProcessCollection(alphabet, gCode, mpTree, mMod, mRootFreq, mDist, mScenario, params_, unparsedParams, suffix, suffixIsOptional, verbose_, warn_);
82 
83  return SPC;
84 }
85 
86 
87 map<size_t, std::unique_ptr<SequenceEvolution>> BppPhylogeneticsApplication::getProcesses(
88  shared_ptr<SubstitutionProcessCollection> collection,
89  map<string, string>& unparsedParams,
90  const std::string& suffix,
91  bool suffixIsOptional) const
92 {
93  return PhylogeneticsApplicationTools::getSequenceEvolutions(
94  collection, params_, unparsedParams, suffix, suffixIsOptional, verbose_, warn_);
95 }
96 
97 
98 std::shared_ptr<PhyloLikelihoodContainer> BppPhylogeneticsApplication::getPhyloLikelihoods(
99  Context& context,
100  map<size_t, shared_ptr<SequenceEvolution>> mSeqEvol,
101  shared_ptr<SubstitutionProcessCollection> collection,
102  const map<size_t, shared_ptr<const AlignmentDataInterface>>& mSites,
103  const std::string& suffix,
104  bool suffixIsOptional) const
105 {
106  return PhylogeneticsApplicationTools::getPhyloLikelihoodContainer(
107  context, collection, mSeqEvol, mSites, params_, suffix, suffixIsOptional, verbose_, warn_);
108 }
109 
110 
111 void BppPhylogeneticsApplication::fixLikelihood(
112  shared_ptr<const Alphabet> alphabet,
113  shared_ptr<const GeneticCode> gCode,
114  shared_ptr<PhyloLikelihoodInterface> phylolik,
115  const std::string& suffix,
116  bool suffixIsOptional) const
117 {
118  double logL = phylolik->getValue();
119 
120  if (!std::isnormal(logL))
121  {
122  // This may be due to null branch lengths, leading to null likelihood!
123  ApplicationTools::displayWarning("!!! Warning!!! Initial likelihood is zero.");
124  ApplicationTools::displayWarning("!!! This may be due to branch length == 0.");
125  ApplicationTools::displayWarning("!!! All null branch lengths will be set to 0.001.");
126  ParameterList pl = phylolik->getBranchLengthParameters();
127 
128  for (size_t i = 0; i < pl.size(); i++)
129  {
130  if (pl[i].getValue() < 0.000001)
131  pl[i].setValue(0.001);
132  }
133  phylolik->matchParametersValues(pl);
134  logL = phylolik->getValue();
135  }
136 
137  ApplicationTools::displayMessage("");
138  ApplicationTools::displayResult("Initial log likelihood", TextTools::toString(-logL, 15));
139  if (!std::isnormal(logL))
140  {
141  ApplicationTools::displayError("!!! Unexpected initial likelihood == 0.");
142 
143  map<size_t, shared_ptr<SingleDataPhyloLikelihoodInterface>> mSD;
144 
145  if (dynamic_pointer_cast<SingleDataPhyloLikelihoodInterface>(phylolik))
146  mSD[1] = dynamic_pointer_cast<SingleDataPhyloLikelihoodInterface>(phylolik);
147  else
148  {
149  auto sOAP = dynamic_pointer_cast<PhyloLikelihoodSetInterface>(phylolik);
150  if (sOAP)
151  {
152  const vector<size_t>& nSD = sOAP->getNumbersOfPhyloLikelihoods();
153 
154  for (size_t iSD = 0; iSD < nSD.size(); ++iSD)
155  {
156  auto pASDP = dynamic_pointer_cast<SingleDataPhyloLikelihoodInterface>(sOAP->getPhyloLikelihood(nSD[iSD]));
157 
158  if (pASDP)
159  mSD[nSD[iSD]] = pASDP;
160  }
161  }
162  }
163 
164  for (auto& itm : mSD)
165  {
166  ApplicationTools::displayWarning("Checking for phyloLikelihood " + TextTools::toString(itm.first));
167 
168  if (!std::isnormal(itm.second->getValue()))
169  {
170  auto sDP = itm.second;
171 
172  auto vData = std::shared_ptr<AlignmentDataInterface>(sDP->getData()->clone());
173 
174  auto vSC = std::dynamic_pointer_cast<SiteContainerInterface>(vData);
175  auto pSC = std::dynamic_pointer_cast<ProbabilisticSiteContainerInterface>(vData);
176 
177  if (AlphabetTools::isCodonAlphabet(*alphabet))
178  {
179  bool f = false;
180  size_t s;
181  for (size_t i = 0; i < vData->getNumberOfSites(); ++i)
182  {
183  if (!std::isnormal(sDP->getLogLikelihoodForASite(i)))
184  {
185  if (vSC)
186  {
187  const Site& site = vSC->site(i);
188  s = site.size();
189  for (size_t j = 0; j < s; ++j)
190  {
191  if (gCode->isStop(site.getValue(j)))
192  {
193  (*ApplicationTools::error << "Stop Codon at site " << site.getCoordinate() << " in sequence " << vData->sequence(j).getName()).endLine();
194  f = true;
195  }
196  }
197  }
198  else
199  {
200  const ProbabilisticSite& site = pSC->site(i);
201  s = site.size();
202  for (size_t j = 0; j < s; ++j)
203  {
204  bool g = false;
205  for (int st = 0; !g && st < static_cast<int>(alphabet->getSize()); ++st)
206  {
207  g = (site.getStateValueAt(j, st) != 0 && !gCode->isStop(st));
208  }
209 
210  if (!g)
211  {
212  (*ApplicationTools::error << "Only stop Codons at site " << site.getCoordinate() << " in sequence " << vData->sequence(j).getName()).endLine();
213  f = true;
214  }
215  }
216  }
217  }
218  }
219  if (f)
220  exit(-1);
221  }
222 
223  // Then remove saturated positions
224 
225  bool removeSaturated = ApplicationTools::getBooleanParameter("input.sequence.remove_saturated_sites", params_, false, suffix, suffixIsOptional, warn_);
226 
227 
228  if (removeSaturated)
229  {
230  ApplicationTools::displayBooleanResult("Saturated site removal enabled", true);
231  for (size_t i = vData->getNumberOfSites(); i > 0; --i)
232  {
233  if (!std::isnormal(sDP->getLogLikelihoodForASite(i - 1)))
234  {
235  ApplicationTools::displayResult("Ignore saturated site", vData->site(i - 1).getCoordinate());
236  vData->deleteSites(i - 1, 1);
237  }
238  }
239  ApplicationTools::displayResult("Number of sites retained", vData->getNumberOfSites());
240 
241  sDP->setData(vData);
242  }
243 
244  logL = sDP->getValue();
245 
246  vector<size_t> vsiteok, vsitemin;
247 
248  if (!std::isnormal(logL))
249  {
250  ApplicationTools::displayError("!!! Removing problem sites:");
251  for (unsigned int i = 0; i < vData->getNumberOfSites(); i++)
252  {
253  auto x = sDP->getLogLikelihoodForASite(i);
254  if (!std::isnormal(x))
255  {
256  (*ApplicationTools::error << "Site " << vData->site(i).getCoordinate() << "\tlog likelihood = " << x).endLine();
257  vsitemin.push_back(i);
258  }
259  else
260  vsiteok.push_back(i);
261  }
262 
263  shared_ptr<AlignmentDataInterface> vDataok = SiteContainerTools::getSelectedSites(*vData, vsiteok);
264 // auto vDatamin = SiteContainerTools::getSelectedSites(*vData, vsitemin); Not taken into account yet
265 
266  sDP->setData(vDataok);
267  logL = sDP->getValue();
268  ApplicationTools::displayResult("Filtered log likelihood", TextTools::toString(-logL, 15));
269 
270  // auto phylo2 = itm.second->clone(); To be finished
271  // phylo2->setData(*vDatamin);
272  // auto logL2 = phylo2->getValue();
273 
274  // ApplicationTools::displayResult("Left log likelihood", TextTools::toString(-logL2, 15));
275  }
276  else
277  ApplicationTools::displayResult("Initial log likelihood", TextTools::toString(-logL, 15));
278  }
279  else
280  ApplicationTools::displayResult("Initial log likelihood", TextTools::toString(-itm.second->getValue(), 15));
281  }
282  }
283 }
284 
285 
286 void BppPhylogeneticsApplication::displayParameters(const PhyloLikelihoodInterface& tl, bool displaylL) const{
287 
288  // Write parameters to screen:
289  if (displaylL)
290  ApplicationTools::displayResult("Log likelihood", TextTools::toString(-tl.getValue(), 15));
291 
292  if (tl.getNumberOfParameters() - tl.getBranchLengthParameters().size() >= 30)
293  ApplicationTools::displayMessage("Too many parameters for screen output!");
294  else
295  {
296  ParameterList parameters = tl.getParameters();
298  for (size_t i = 0; i < parameters.size(); i++)
299  {
300  ApplicationTools::displayResult(parameters[i].getName(), TextTools::toString(parameters[i].getValue()));
301  }
302  }
303 }
int getCoordinate() const override
const int & getValue(size_t pos) const override
Context for dataflow node construction.
Definition: DataFlow.h:527
virtual size_t size() const=0
virtual double getValue() const=0
size_t size() const
virtual std::vector< std::string > getParameterNames() const
virtual bool matchParametersValues(const ParameterList &params, std::vector< size_t > *updatedParameters=0)
virtual void deleteParameters(const std::vector< std::string > &names, bool mustExist=true)
virtual size_t getNumberOfParameters() const=0
virtual const ParameterList & getParameters() const=0
The PhyloLikelihood interface, for phylogenetic likelihood.
virtual ParameterList getBranchLengthParameters() const =0
Get the independent branch lengths parameters.
double getStateValueAt(size_t sequencePosition, int state) const
Defines the basic types of data flow nodes.