bpp-phyl3  3.0.0
PhylogeneticsApplicationTools.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include "../Io/BppOBranchModelFormat.h"
6 #include "../Io/BppOFrequencySetFormat.h"
7 #include "../Io/BppOMultiTreeReaderFormat.h"
8 #include "../Io/BppOMultiTreeWriterFormat.h"
9 #include "../Io/BppORateDistributionFormat.h"
10 #include "../Io/BppOTreeReaderFormat.h"
11 #include "../Io/BppOTreeWriterFormat.h"
12 #include "../Io/Newick.h"
13 #include "../Io/NexusIoTree.h"
14 #include "../Io/Nhx.h"
15 #include "../Likelihood/AutoCorrelationSequenceEvolution.h"
16 #include "../Likelihood/HmmSequenceEvolution.h"
17 #include "../Likelihood/MixtureSequenceEvolution.h"
18 #include "../Likelihood/NonHomogeneousSubstitutionProcess.h"
19 #include "../Likelihood/OneProcessSequenceEvolution.h"
20 #include "../Likelihood/ParametrizablePhyloTree.h"
21 #include "../Likelihood/PartitionSequenceEvolution.h"
22 #include "../Likelihood/PhyloLikelihoods/AlignedPhyloLikelihoodAutoCorrelation.h"
23 #include "../Likelihood/PhyloLikelihoods/AutoCorrelationProcessPhyloLikelihood.h"
24 #include "../Likelihood/PhyloLikelihoods/PhyloLikelihoodFormula.h"
25 #include "../Likelihood/PhyloLikelihoods/AlignedPhyloLikelihoodHmm.h"
26 #include "../Likelihood/PhyloLikelihoods/HmmProcessPhyloLikelihood.h"
27 #include "../Likelihood/PhyloLikelihoods/AlignedPhyloLikelihoodMixture.h"
28 #include "../Likelihood/PhyloLikelihoods/MixtureProcessPhyloLikelihood.h"
29 #include "../Likelihood/PhyloLikelihoods/OneProcessSequencePhyloLikelihood.h"
30 #include "../Likelihood/PhyloLikelihoods/PartitionProcessPhyloLikelihood.h"
31 #include "../Likelihood/PhyloLikelihoods/AlignedPhyloLikelihoodProduct.h"
32 #include "../Likelihood/PhyloLikelihoods/SingleDataPhyloLikelihood.h"
33 #include "../Likelihood/PhyloLikelihoods/SingleProcessPhyloLikelihood.h"
34 #include "../Likelihood/RateAcrossSitesSubstitutionProcess.h"
35 #include "../Likelihood/SimpleSubstitutionProcess.h"
36 #include "../Likelihood/SubstitutionProcessCollection.h"
37 #include "../Likelihood/SubstitutionProcessCollectionMember.h"
38 #include "../Mapping/DecompositionSubstitutionCount.h"
39 #include "../Mapping/LaplaceSubstitutionCount.h"
40 #include "../Mapping/NaiveSubstitutionCount.h"
41 #include "../Mapping/OneJumpSubstitutionCount.h"
42 #include "../Mapping/UniformizationSubstitutionCount.h"
43 #include "../Model/FrequencySet/MvaFrequencySet.h"
44 #include "../Model/MixedTransitionModel.h"
45 #include "../Model/Protein/Coala.h"
46 #include "../Model/SubstitutionModel.h"
47 #include "../Model/WrappedModel.h"
48 #include "../Model/RateDistribution/ConstantRateDistribution.h"
49 #include "../OptimizationTools.h"
50 #include "../Tree/PhyloTree.h"
51 #include "../Tree/PhyloTreeTools.h"
53 
54 // From bpp-core
55 #include <Bpp/BppString.h>
58 #include <Bpp/Io/FileTools.h>
59 #include <Bpp/Text/TextTools.h>
62 #include <Bpp/Text/KeyvalTools.h>
64 #include <Bpp/Numeric/DataTable.h>
72 
73 // From bpp-seq:
78 
79 using namespace bpp;
80 
81 // From the STL:
82 #include <fstream>
83 #include <memory>
84 #include <set>
85 #include <vector>
86 #include <algorithm>
87 
88 using namespace std;
89 
90 /*************************************************************/
91 /***************** TREES ************************************/
92 /*************************************************************/
93 
94 
95 /******************************************************************************/
96 
98  const map<string, string>& params,
99  const string& prefix,
100  const string& suffix,
101  bool suffixIsOptional,
102  bool verbose,
103  int warn)
104 {
105  string format = ApplicationTools::getStringParameter(prefix + "tree.format", params, "Newick", suffix, suffixIsOptional, warn);
106  string treeFilePath = ApplicationTools::getAFilePath(prefix + "tree.file", params, true, true, suffix, suffixIsOptional, "none", warn);
107 
108  BppOTreeReaderFormat bppoReader(warn);
109  auto iTree = bppoReader.readITree(format);
110  if (verbose)
111  {
112  ApplicationTools::displayResult("Input tree file " + suffix, treeFilePath);
113  ApplicationTools::displayResult("Input tree format " + suffix, iTree->getFormatName());
114  }
115  return iTree->readTree(treeFilePath);
116 }
117 
118 /******************************************************************************/
119 
121  const map<string, string>& params,
122  const string& prefix,
123  const string& suffix,
124  bool suffixIsOptional,
125  bool verbose,
126  int warn)
127 {
128  string format = ApplicationTools::getStringParameter(prefix + "tree.format", params, "Newick", suffix, suffixIsOptional, warn);
129  string treeFilePath = ApplicationTools::getAFilePath(prefix + "tree.file", params, true, true, suffix, suffixIsOptional, "none", warn);
130 
131  BppOMultiTreeReaderFormat bppoReader(warn);
132  auto iTrees = bppoReader.readIMultiTree(format);
133  if (verbose)
134  {
135  ApplicationTools::displayResult("Input trees file " + suffix, treeFilePath);
136  ApplicationTools::displayResult("Input trees format " + suffix, iTrees->getFormatName());
137  }
138  vector<unique_ptr<Tree>> trees;
139  iTrees->readTrees(treeFilePath, trees);
140 
141  if (verbose)
142  {
143  ApplicationTools::displayResult("Number of trees in file", trees.size());
144  }
145  return trees;
146 }
147 
148 /******************************************************************************/
149 
150 map<size_t, std::shared_ptr<PhyloTree>> PhylogeneticsApplicationTools::getPhyloTrees(
151  const map<string, string>& params,
152  const map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mSeq,
153  map<string, string>& unparsedParams,
154  const string& prefix,
155  const string& suffix,
156  bool suffixIsOptional,
157  bool verbose,
158  int warn)
159 {
160 
161  vector<string> vTreesName = ApplicationTools::matchingParameters(prefix + "tree*", params);
162 
163  map<size_t, shared_ptr<PhyloTree>> mTree;
164 
165  for (size_t nT = 0; nT < vTreesName.size(); nT++)
166  {
167  size_t poseq = vTreesName[nT].find("=");
168  size_t num = 0;
169  size_t len = (prefix + "tree").size();
170 
171  string suff = vTreesName[nT].substr(len, poseq - len);
172  bool flag = 0;
173  size_t nbTree = 1;
174 
175  if (TextTools::isDecimalInteger(suff, '$'))
176  num = static_cast<size_t>(TextTools::toInt(suff));
177  else
178  {
179  flag = 1;
180  num = 1;
181  }
182 
183  if (!flag)
184  {
187  }
188 
189  string treeDesc = ApplicationTools::getStringParameter(vTreesName[nT], params, "", suffix, suffixIsOptional);
190 
191  string treeName;
192 
193  map<string, string> args;
194 
195  KeyvalTools::parseProcedure(treeDesc, treeName, args);
196 
197  if (treeName == "user")
198  {
199  string format;
200 
201  if (args.find("format") != args.end())
202  format = args["format"];
203  else
204  {
205  format = "Newick";
206  ApplicationTools::displayWarning("Warning, " + vTreesName[nT] + " format set to Newick");
207  }
208 
209  string treeFilePath = ApplicationTools::getAFilePath("file", args, true, true, suffix, suffixIsOptional, "none", warn);
210 
211  IMultiPhyloTree* treeReader;
212  if (format == "Newick")
213  treeReader = new Newick(true);
214  else if (format == "Nexus")
215  treeReader = new NexusIOTree();
216  else if (format == "NHX")
217  treeReader = new Nhx();
218  else
219  throw Exception("Unknown format for tree reading: " + format);
220 
221  vector<unique_ptr<PhyloTree>> trees;
222  treeReader->readPhyloTrees(treeFilePath, trees);
223 
224  if (args.find("data") != args.end())
225  {
226  auto seqNum = (size_t) TextTools::toInt(args["data"]);
227  if (mSeq.find(seqNum) == mSeq.end())
228  throw Exception("Error : Wrong number of data " + TextTools::toString(seqNum));
229  else
230  {
231  ApplicationTools::displayMessage("Tree leaves pruned to fit data " + TextTools::toString(seqNum));
232  vector<string> names = mSeq.find(seqNum)->second->getSequenceNames();
233 
234  for (auto& tree:trees)
235  {
236  auto nb1 = tree->getNumberOfLeaves();
237  tree->pruneTree(names);
238  auto nb2 = tree->getNumberOfLeaves();
239  if (nb1 != nb2)
240  {
241  ApplicationTools::displayResult("Number of removed leaves", nb1 - nb2);
242  }
243  }
244  }
245  }
246 
247  if (verbose)
248  {
249  if (flag)
250  {
252  ApplicationTools::displayResult("Tree file", treeFilePath);
253  }
254 
255  ApplicationTools::displayResult("Number of trees in file", trees.size());
256  }
257 
258  if (flag)
259  {
260  nbTree = trees.size();
261 
262  for (size_t i2 = 0; i2 < trees.size(); i2++)
263  {
264  if (mTree.find(i2 + 1) != mTree.end())
265  {
266  ApplicationTools::displayWarning("Tree " + TextTools::toString(i2 + 1) + " already assigned, replaced by new one.");
267  mTree.erase(i2 + 1);
268  }
269 
270  mTree[i2 + 1] = std::move(trees[i2]);
271  ApplicationTools::displayResult("Number of leaves", mTree[i2 + 1]->getNumberOfLeaves());
272  }
273  }
274  else
275  {
276  if (trees.size() > 1)
277  throw Exception("Error : Several trees for description of " + vTreesName[nT] + ".");
278 
279  if (trees.size() == 1)
280  {
281  if (mTree.find(num) != mTree.end())
282  {
283  ApplicationTools::displayWarning("Tree " + TextTools::toString(num) + " already assigned, replaced by new one.");
284  mTree.erase(num);
285  }
286  mTree[num] = std::move(trees[0]);
287  ApplicationTools::displayResult("Number of leaves", mTree[num]->getNumberOfLeaves());
288  }
289  }
290  }
291  else if (treeName == "random")
292  {
293  size_t seqNum;
294 
295  if (args.find("data") == args.end())
296  {
297  ApplicationTools::displayWarning("Random tree set from data 1");
298  seqNum = 1;
299  }
300  else
301  seqNum = (size_t) TextTools::toInt(args["data"]);
302 
303 
304  if (mSeq.find(seqNum) == mSeq.end())
305  throw Exception("Error : Wrong number of data " + TextTools::toString(seqNum));
306 
307  vector<string> names = mSeq.find(seqNum)->second->getSequenceNames();
308 
309  // Not optimal process: make random PhlyoTree directly
310  auto treetemp = TreeTemplateTools::getRandomTree(names);
311  treetemp->setBranchLengths(1.);
312 
313  auto tree = PhyloTreeTools::buildFromTreeTemplate(*treetemp);
314 
315  if (mTree.find(num) != mTree.end())
316  {
317  ApplicationTools::displayWarning("Tree " + TextTools::toString(num) + " already assigned, replaced by new one.");
318  mTree.erase(num);
319  }
320  mTree[num] = tree;
321  ApplicationTools::displayResult("Number of leaves", tree->getNumberOfLeaves());
322  }
323 
324 
325  // //////////
326  // Setting branch lengths?
327  string initBrLenMethod = ApplicationTools::getStringParameter("init.brlen.method", args, "Input", "", true, 1);
328  string cmdName;
329  map<string, string> cmdArgs;
330 
331  KeyvalTools::parseProcedure(initBrLenMethod, cmdName, cmdArgs);
332 
333  ApplicationTools::displayResult("Branch lengths", cmdName);
334 
335  if (cmdName == "Input")
336  {
337  // Is the root has to be moved to the midpoint position along the branch that contains it ? If no, do nothing!
338  string midPointRootBrLengths = ApplicationTools::getStringParameter("midPointRootBrLengths", cmdArgs, "no", "", true, 2);
339  if (midPointRootBrLengths == "yes")
340  {
341  ApplicationTools::displayResult(" Mid Point Rooting", midPointRootBrLengths);
342  if (flag)
343  {
344  for (size_t i = 0; i < nbTree; i++)
345  {
347  }
348  }
349  else
351  }
352  }
353  else if (cmdName == "Equal")
354  {
355  double value = ApplicationTools::getDoubleParameter("value", cmdArgs, 0.1, "", true, 2);
356  if (value <= 0)
357  throw Exception("Value for branch length must be superior to 0");
358  ApplicationTools::displayResult("Branch lengths set to", value);
359  if (flag)
360  {
361  for (size_t i = 0; i < nbTree; i++)
362  {
363  mTree[i + 1]->setBranchLengths(value);
364  }
365  }
366  else
367  mTree[num]->setBranchLengths(value);
368  }
369  else if (cmdName == "Clock")
370  {
371  if (flag)
372  {
373  for (size_t i = 0; i < nbTree; i++)
374  {
375  PhyloTreeTools::convertToClockTree(*mTree[i + 1], mTree[i + 1]->getRoot());
376  }
377  }
378  else
379  PhyloTreeTools::convertToClockTree(*mTree[num], mTree[num]->getRoot());
380  }
381  else if (cmdName == "Grafen")
382  {
383  string grafenHeight = ApplicationTools::getStringParameter("height", cmdArgs, "input", "", true, 2);
384  double h;
385  if (flag)
386  {
387  for (size_t i = 0; i < nbTree; i++)
388  {
389  auto tree = mTree[i + 1];
390  if (grafenHeight == "input")
391  {
392  h = PhyloTreeTools::getHeight(*tree, tree->getRoot());
393  }
394  else
395  {
396  h = TextTools::toDouble(grafenHeight);
397  if (h <= 0)
398  throw Exception("Height must be positive in Grafen's method.");
399  }
401 
402  double rho = ApplicationTools::getDoubleParameter("rho", cmdArgs, 1., "", true, 2);
403  ApplicationTools::displayResult("Grafen's rho", rho);
405 
406  double nh = PhyloTreeTools::getHeight(*tree, tree->getRoot());
407  tree->scaleTree(h / nh);
408  }
409  }
410  else
411  {
412  auto tree = mTree[num];
413  if (grafenHeight == "input")
414  h = PhyloTreeTools::getHeight(*tree, tree->getRoot());
415  else
416  {
417  h = TextTools::toDouble(grafenHeight);
418  if (h <= 0)
419  throw Exception("Height must be positive in Grafen's method.");
420  }
422 
423  double rho = ApplicationTools::getDoubleParameter("rho", cmdArgs, 1., "", true, 2);
424  ApplicationTools::displayResult("Grafen's rho", rho);
425 
427  double nh = PhyloTreeTools::getHeight(*tree, tree->getRoot());
428 
429  tree->scaleTree(h / nh);
430  }
431  }
432  else
433  throw Exception("Method '" + initBrLenMethod + "' unknown for computing branch lengths.");
434 
435  // //////////// Setting branch lengths with aliases
436 
437  vector<string> vBrNb = ApplicationTools::matchingParameters("BrLen*", args);
438 
439  for (size_t ib = 0; ib < vBrNb.size(); ib++)
440  {
441  string apeq = args[vBrNb[ib]];
442  string aveq = vBrNb[ib];
443 
444  if (TextTools::isDecimalInteger(apeq))
445  {
446  shared_ptr<PhyloBranch> branch = mTree[num]->getEdgeToFather(mTree[num]->getNode(static_cast<PhyloTree::NodeIndex>(TextTools::toInt(aveq.substr(5, string::npos)))));
447  if (branch)
448  branch->setLength(TextTools::toDouble(apeq));
449  }
450  else
451  {
452  size_t posun = apeq.find("_");
453  size_t posd = aveq.find("_");
454  unparsedParams[aveq + (posd != string::npos ? "" : "_" + TextTools::toString(num))] = apeq + (posun != string::npos ? "" : "_" + TextTools::toString(num));
455  }
456  }
457  }
458 
459  return mTree;
460 }
461 
462 /******************************************************/
463 /**** SUBSTITUTION RATES *******************************/
464 /******************************************************/
465 
466 std::unique_ptr<SubstitutionModelInterface> PhylogeneticsApplicationTools::getSubstitutionModel(
467  std::shared_ptr<const Alphabet> alphabet,
468  std::shared_ptr<const GeneticCode> gCode,
469  std::shared_ptr<const AlignmentDataInterface> data,
470  const map<string, string>& params,
471  map<string, string>& unparsedParams,
472  const string& suffix,
473  bool suffixIsOptional,
474  bool verbose,
475  int warn)
476 {
477  BppOSubstitutionModelFormat bIO(BppOSubstitutionModelFormat::ALL, true, true, true, verbose, warn + 1);
478  string modelDescription;
479  auto ca = dynamic_pointer_cast<const CodonAlphabet>(alphabet);
480  if (ca)
481  {
482  modelDescription = ApplicationTools::getStringParameter("model", params, "CodonRate(model=JC69)", suffix, suffixIsOptional, warn);
483  if (!gCode)
484  throw Exception("PhylogeneticsApplicationTools::getSubstitutionModel(): a GeneticCode instance is required for instantiating a codon model.");
485  bIO.setGeneticCode(gCode);
486  }
487  else if (AlphabetTools::isWordAlphabet(*alphabet))
488  modelDescription = ApplicationTools::getStringParameter("model", params, "Word(model=JC69)", suffix, suffixIsOptional, warn);
489  else
490  modelDescription = ApplicationTools::getStringParameter("model", params, "JC69", suffix, suffixIsOptional, warn);
491 
492  std::map<size_t, std::shared_ptr<const AlignmentDataInterface>> mData;
493  mData[1]=data;
494 
495  auto model = bIO.readSubstitutionModel(alphabet, modelDescription, mData, 1, true);
496 
497  unparsedParams.insert(bIO.getUnparsedArguments().begin(), bIO.getUnparsedArguments().end());
498 
499  return model;
500 }
501 
502 std::unique_ptr<BranchModelInterface> PhylogeneticsApplicationTools::getBranchModel(
503  std::shared_ptr<const Alphabet> alphabet,
504  std::shared_ptr<const GeneticCode> gCode,
505  std::shared_ptr<const AlignmentDataInterface> data,
506  const map<string, string>& params,
507  map<string, string>& unparsedParams,
508  const string& suffix,
509  bool suffixIsOptional,
510  bool verbose,
511  int warn)
512 {
513  BppOBranchModelFormat bIO(BppOSubstitutionModelFormat::ALL, true, true, true, verbose, warn + 1);
514  string modelDescription;
515  auto ca = dynamic_pointer_cast<const CodonAlphabet>(alphabet);
516  if (ca)
517  {
518  modelDescription = ApplicationTools::getStringParameter("model", params, "CodonRate(model=JC69)", suffix, suffixIsOptional, warn);
519  if (!gCode)
520  throw Exception("PhylogeneticsApplicationTools::getBranchModel(): a GeneticCode instance is required for instantiating a codon model.");
521  bIO.setGeneticCode(gCode);
522  }
523  else if (AlphabetTools::isWordAlphabet(*alphabet))
524  modelDescription = ApplicationTools::getStringParameter("model", params, "Word(model=JC69)", suffix, suffixIsOptional, warn);
525  else
526  modelDescription = ApplicationTools::getStringParameter("model", params, "JC69", suffix, suffixIsOptional, warn);
527 
528  std::map<size_t, std::shared_ptr<const AlignmentDataInterface>> mData;
529  mData[1]=data;
530 
531  auto model = bIO.readBranchModel(alphabet, modelDescription, mData, 1, true);
532  map<string, string> tmpUnparsedParameterValues(bIO.getUnparsedArguments());
533 
534  unparsedParams.insert(tmpUnparsedParameterValues.begin(), tmpUnparsedParameterValues.end());
535 
536  return model;
537 }
538 
539 /******************************************************************************/
540 
541 map<size_t, std::shared_ptr<DiscreteDistributionInterface>> PhylogeneticsApplicationTools::getRateDistributions(
542  const map<string, string>& params,
543  const string& suffix,
544  bool suffixIsOptional,
545  bool verbose)
546 {
547  string DistFilePath = ApplicationTools::getAFilePath("rate_distribution.file", params, false, false, suffix, suffixIsOptional, "none", 1);
548 
549  map<string, string> paramDist;
550 
551  if (DistFilePath != "none")
552  paramDist = AttributesTools::getAttributesMapFromFile(DistFilePath, "=");
553 
554  paramDist.insert(params.begin(), params.end());
555 
556  vector<string> vratesName = ApplicationTools::matchingParameters("rate_distribution*", paramDist);
557 
558  BppORateDistributionFormat bIO(true);
559  map<size_t, std::shared_ptr<DiscreteDistributionInterface>> mDist;
560 
561 
562  for (size_t i = 0; i < vratesName.size(); ++i)
563  {
564  size_t poseq = vratesName[i].find("=");
565  size_t num = 0;
566  string suff = vratesName[i].substr(17, poseq - 17);
567  bool flag = 0;
568 
569 
570  if (TextTools::isDecimalInteger(suff, '$'))
571  num = static_cast<size_t>(TextTools::toInt(suff));
572  else
573  {
574  flag = 1;
575  num = 0;
576  }
577 
578  if (verbose)
579  if (!flag)
580  {
583  }
584 
585  string distDescription = ApplicationTools::getStringParameter(vratesName[i], paramDist, "", suffix, suffixIsOptional);
586 
587  if (num!=0)
588  mDist[num] = std::shared_ptr<DiscreteDistributionInterface>(bIO.readDiscreteDistribution(distDescription, true));
589  }
590 
591  if (mDist.size() == 0)
592  {
595  string distDescription = ApplicationTools::getStringParameter("rate_distribution", paramDist, "Constant()", suffix, suffixIsOptional);
596  mDist[1] = std::shared_ptr<DiscreteDistributionInterface>(bIO.readDiscreteDistribution(distDescription, true));
597  }
598 
599  return mDist;
600 }
601 
602 
603 /*************************************************************/
604 /******* MODELS **********************************************/
605 /*************************************************************/
606 
607 map<size_t, std::unique_ptr<BranchModelInterface>> PhylogeneticsApplicationTools::getBranchModels(
608  std::shared_ptr<const Alphabet> alphabet,
609  std::shared_ptr<const GeneticCode> gCode,
610  const map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
611  const map<string, string>& params,
612  map<string, string>& unparsedParams,
613  const string& suffix,
614  bool suffixIsOptional,
615  bool verbose,
616  int warn)
617 {
618  if (dynamic_pointer_cast<const CodonAlphabet>(alphabet) && !gCode)
619  throw Exception("PhylogeneticsApplicationTools::getBranchModels(): a GeneticCode instance is required for instantiating codon models.");
620 
621  string ModelFilePath = ApplicationTools::getAFilePath("models.file", params, false, false, suffix, suffixIsOptional, "none", 1);
622 
623  map<string, string> paramModel;
624 
625  if (ModelFilePath != "none")
626  paramModel = AttributesTools::getAttributesMapFromFile(ModelFilePath, "=");
627 
628  paramModel.insert(params.begin(), params.end());
629 
630  vector<string> modelsName = ApplicationTools::matchingParameters("model*", paramModel);
631 
632  vector<size_t> modelsNum;
633  for (const auto& name : modelsName)
634  {
635  size_t poseq = name.find("=");
636  if (name.find("nodes_id") == string::npos)
637  {
638  modelsNum.push_back(TextTools::to<size_t>(name.substr(5, poseq - 5)));
639  }
640  }
641 
642  map<size_t, std::unique_ptr<BranchModelInterface>> mModel;
643 
644  BppOBranchModelFormat bIO(BppOSubstitutionModelFormat::ALL, true, true, true, verbose, warn);
645  bIO.setGeneticCode(gCode);
646 
647  for (size_t i = 0; i < modelsNum.size(); ++i)
648  {
649  if (i >= 10)
650  {
651  bIO.setVerbose(false);
652  warn = 10;
653  if (i == 10)
655  ApplicationTools::displayResult("Model " + TextTools::toString(modelsNum[i]), string("..."));
656  }
657  else
658  {
660  ApplicationTools::displayMessage("Model " + TextTools::toString(modelsNum[i]));
661  }
662 
663  string modelDescription = ApplicationTools::getStringParameter("model" + TextTools::toString(modelsNum[i]), paramModel, "", suffix, suffixIsOptional, warn);
664 
665  unique_ptr<BranchModelInterface> model;
666  model = bIO.readBranchModel(alphabet, modelDescription, mData, 0, true);
667 
668  map<string, string> tmpUnparsedParameterValues(bIO.getUnparsedArguments());
669 
670  for (auto& it : tmpUnparsedParameterValues)
671  {
672  unparsedParams[it.first + "_" + TextTools::toString(modelsNum[i])] = it.second;
673  }
674 
675  mModel[modelsNum[i]] = std::move(model);
676  }
677 
678  return mModel;
679 }
680 
681 
682 /******************************************************/
683 /**** FREQUENCIES SET *********************************/
684 /******************************************************/
685 
686 std::unique_ptr<FrequencySetInterface> PhylogeneticsApplicationTools::getFrequencySet(
687  std::shared_ptr<const Alphabet> alphabet,
688  std::shared_ptr<const GeneticCode> gCode,
689  const string& freqDescription,
690  const std::map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
691  size_t nData,
692  map<string, string>& sharedparams,
693  const vector<double>& rateFreqs,
694  bool verbose,
695  int warn)
696 {
697  map<string, string> unparsedParameterValues;
699  if (AlphabetTools::isCodonAlphabet(*alphabet))
700  {
701  if (!gCode)
702  throw Exception("PhylogeneticsApplicationTools::getFrequencySet(): a GeneticCode instance is required for instantiating a codon frequencies set.");
703  bIO.setGeneticCode(gCode);
704  }
705 
706  auto pFS = bIO.readFrequencySet(alphabet, freqDescription, mData, nData, true);
707 
708  map<string, string> unparsedparam = bIO.getUnparsedArguments();
709 
710  sharedparams.insert(unparsedparam.begin(), unparsedparam.end());
711 
712  // /////// To be changed for input normalization
713  if (rateFreqs.size() > 0)
714  {
715  pFS = std::make_unique<MarkovModulatedFrequencySet>(std::move(pFS), rateFreqs);
716  }
717 
718  return pFS;
719 }
720 
721 
722 std::unique_ptr<FrequencySetInterface> PhylogeneticsApplicationTools::getRootFrequencySet(
723  std::shared_ptr<const Alphabet> alphabet,
724  std::shared_ptr<const GeneticCode> gCode,
725  const std::map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
726  size_t nData,
727  const map<string, string>& params,
728  map<string, string>& sharedparams,
729  const vector<double>& rateFreqs,
730  const string& suffix,
731  bool suffixIsOptional,
732  bool verbose,
733  int warn)
734 {
735  string freqDescription = ApplicationTools::getStringParameter("nonhomogeneous.root_freq", params, "Full(init=observed)", suffix, suffixIsOptional, warn);
736  if (freqDescription == "None")
737  {
738  return 0;
739  }
740  else
741  {
742  map<string, string> unparams;
743 
744  auto freq = getFrequencySet(alphabet, gCode, freqDescription, mData, nData, unparams, rateFreqs, verbose, warn + 1);
745  freq->setNamespace("root." + freq->getNamespace());
746 
747  for (auto& it : unparams)
748  {
749  sharedparams["root." + it.first] = it.second;
750  }
751 
752  if (verbose)
753  ApplicationTools::displayResult("Root frequencies ", freq->getName());
754  return freq;
755  }
756 }
757 
758 
759 map<size_t, std::unique_ptr<FrequencySetInterface>> PhylogeneticsApplicationTools::getRootFrequencySets(
760  std::shared_ptr<const Alphabet> alphabet,
761  std::shared_ptr<const GeneticCode> gCode,
762  const map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
763  const map<string, string>& params,
764  map<string, string>& sharedparams,
765  const string& suffix,
766  bool suffixIsOptional,
767  bool verbose,
768  int warn)
769 {
770  if (dynamic_pointer_cast<const CodonAlphabet>(alphabet) && !gCode)
771  throw Exception("PhylogeneticsApplicationTools::getRootFrequencySets(): a GeneticCode instance is required for instantiating codon frequencies sets.");
772 
773  string RootFilePath = ApplicationTools::getAFilePath("root_freq.file", params, false, false, suffix, suffixIsOptional, "none", 1);
774  map<string, string> paramRF;
775 
776  if (RootFilePath != "none")
777  paramRF = AttributesTools::getAttributesMapFromFile(RootFilePath, "=");
778 
779  paramRF.insert(params.begin(), params.end());
780 
781  vector<string> vrfName = ApplicationTools::matchingParameters("root_freq*", paramRF);
782 
783  vector<size_t> rfNum;
784  for (const auto& rfName : vrfName)
785  {
786  size_t poseq = rfName.find("=");
787  try
788  {
789  rfNum.push_back(TextTools::to<size_t>(rfName.substr(9, poseq - 9)));
790  }
791  catch (Exception& e)
792  {}
793  }
794 
796  bIO.setGeneticCode(gCode);
797 
798  map<size_t, std::unique_ptr<FrequencySetInterface>> mFS;
799 
800  for (size_t i = 0; i < rfNum.size(); ++i)
801  {
803  ApplicationTools::displayMessage("Root Frequencies Set " + TextTools::toString(rfNum[i]));
804 
805  string freqDescription = ApplicationTools::getStringParameter("root_freq" + TextTools::toString(rfNum[i]), paramRF, "", suffix, suffixIsOptional, warn);
806 
807  map<string, string> args;
808  string freqName;
809 
810  KeyvalTools::parseProcedure(freqDescription, freqName, args);
811 
812  size_t nData = 0;
813 
814  if (args.find("data") != args.end())
815  nData = TextTools::to<size_t>(args["data"]);
816 
817  unique_ptr<FrequencySetInterface> rFS;
818 
819  rFS = bIO.readFrequencySet(alphabet, freqDescription, mData, nData, true);
820 
821  rFS->setNamespace("root." + rFS->getNamespace());
822 
823  map<string, string> unparsedparam = bIO.getUnparsedArguments();
824 
825  for (auto& it : unparsedparam)
826  {
827  sharedparams["root." + it.first + "_" + TextTools::toString(rfNum[i])] = it.second;
828  }
829 
830  if (verbose)
831  {
832  // ApplicationTools::displayResult("Root Frequencies Set " + TextTools::toString(rfNum[i]), rFS->getName());
833  if (nData != 0)
835  }
836 
837  mFS[rfNum[i]] = std::move(rFS);
838  }
839 
840  return mFS;
841 }
842 
843 /******************************************************/
844 /**** SETOFMODELPATH **********************************/
845 /******************************************************/
846 
847 map<size_t, std::unique_ptr<ModelPath>> PhylogeneticsApplicationTools::getModelPaths(
848  const std::map<std::string, std::string>& params,
849  const map<size_t, std::shared_ptr<BranchModelInterface>>& mModel,
850  const string& suffix,
851  bool suffixIsOptional,
852  bool verbose,
853  int warn)
854 {
855  string ModelPathsPath = ApplicationTools::getAFilePath("path.file", params, false, false, suffix, suffixIsOptional, "none", warn);
856  map<string, string> paramMP;
857 
858  if (ModelPathsPath != "none")
859  paramMP = AttributesTools::getAttributesMapFromFile(ModelPathsPath, "=");
860 
861  paramMP.insert(params.begin(), params.end());
862 
863  vector<string> vmpName = ApplicationTools::matchingParameters("path*", paramMP);
864 
865  map<size_t, std::unique_ptr<ModelPath>> modelPaths;
866 
867  for (size_t i = 0; i < vmpName.size(); ++i)
868  {
869  const auto& name = vmpName[i];
870 
871  string desc = ApplicationTools::getStringParameter(name, paramMP, "", "", true);
872 
873  size_t num;
874  try
875  {
876  num = TextTools::to<size_t>(name.substr(4));
877  }
878  catch (const Exception& e)
879  {
880  throw Exception("PhylogeneticsApplicationTools::getModelPaths: bad path number in line " + name);
881  }
882 
883  modelPaths[num] = std::make_unique<ModelPath>();
884 
885  if (verbose)
886  {
887  if (i >= 10)
888  {
889  if (i == 10)
891  ApplicationTools::displayResult("Path " + TextTools::toString(num), string("..."));
892  }
893  else
894  {
897  }
898  }
899 
900  StringTokenizer st(desc, "&");
901  while (st.hasMoreToken())
902  {
903  string submodel = st.nextToken();
904  Vuint submodelNb;
905  auto indexo = submodel.find("[");
906  auto indexf = submodel.find("]");
907  if ((indexo == string::npos) | (indexf == string::npos))
908  throw Exception("PhylogeneticsApplicationTools::getModelPaths. Bad path syntax, should contain `[]' symbols: " + submodel);
909 
910  auto pos = submodel.find("model");
911  if (pos == string::npos)
912  throw Exception("PhylogeneticsApplicationTools::getModelPaths. Missing identifier 'model' in description: " + submodel);
913 
914  size_t num2 = TextTools::to<size_t>(submodel.substr(pos + 5, indexo - 5 - pos));
915  if (mModel.find(num2) == mModel.end())
916  throw BadIntegerException("PhylogeneticsApplicationTools::getModelPaths: Wrong model number", static_cast<int>(num2));
917 
918  auto pSM = std::dynamic_pointer_cast<MixedTransitionModelInterface>(mModel.at(num2));
919  if (!pSM)
920  throw Exception("PhylogeneticsApplicationTools::getModelPaths: Model number " + TextTools::toString(num2) + " ( " + mModel.at(num2)->getName() + " ) is not Mixed.");
921 
922  string lp2 = submodel.substr(indexo + 1, indexf - indexo - 1);
923  StringTokenizer stp2(lp2, ",");
924  while (stp2.hasMoreToken())
925  {
926  string p2 = stp2.nextToken();
927 
928  unsigned int n2;
929  bool n2ok = true;
930  try
931  {
932  n2 = TextTools::to<unsigned int>(p2);
933  if (n2 <= 0 || n2 > pSM->getNumberOfModels())
934  n2ok = false;
935  else
936  submodelNb.push_back(n2 - 1);
937  }
938  catch (Exception& e)
939  {
940  Vuint submodnb = pSM->getSubmodelNumbers(p2);
941  if (submodelNb.size() == 0)
942  submodelNb = submodnb;
943  else
944  submodelNb = VectorTools::vectorIntersection(submodelNb, submodnb);
945  }
946 
947  if (!n2ok)
948  throw BadIntegerException("PhylogeneticsApplicationTools::getModelPaths: Wrong model number for model " + TextTools::toString(num2), int(n2));
949  }
950 
951  modelPaths[num]->setModel(pSM, submodelNb);
952  if (!modelPaths[num]->getLeadModel())
953  modelPaths[num]->setLeadModel(pSM);
954  }
955 
956  if (verbose && (i < 10))
957  ApplicationTools::displayResult("Model Path", desc);
958  }
959 
960  return modelPaths;
961 }
962 
963 
964 map<size_t, std::unique_ptr<ModelScenario>> PhylogeneticsApplicationTools::getModelScenarios(
965  const std::map<std::string, std::string>& params,
966  const map<size_t, std::shared_ptr<ModelPath>>& mModelPath,
967  const map<size_t, std::shared_ptr<BranchModelInterface>>& mModel,
968  const string& suffix,
969  bool suffixIsOptional,
970  bool verbose,
971  int warn)
972 {
973  string ModelPathsPath = ApplicationTools::getAFilePath("scenario.file", params, false, false, suffix, suffixIsOptional, "none", warn);
974  map<string, string> paramMS;
975 
976  if (ModelPathsPath != "none")
977  paramMS = AttributesTools::getAttributesMapFromFile(ModelPathsPath, "=");
978 
979  paramMS.insert(params.begin(), params.end());
980 
981  vector<string> vmsName = ApplicationTools::matchingParameters("scenario*", paramMS);
982 
983  map<size_t, std::unique_ptr<ModelScenario>> somp;
984 
985  for (size_t i = 0; i < vmsName.size(); ++i)
986  {
987  const auto& name = vmsName[i];
988 
989  string desc = ApplicationTools::getStringParameter(name, paramMS, "", suffix, suffixIsOptional, warn);
990 
991  size_t num;
992  try
993  {
994  num = TextTools::to<size_t>(name.substr(8));
995  }
996  catch (const Exception& e)
997  {
998  throw Exception("PhylogeneticsApplicationTools::getModelScenarios: bad scenario number in line " + name);
999  }
1000 
1001  somp[num] = std::make_unique<ModelScenario>();
1002 
1003  if (verbose)
1004  {
1005  if (i >= 10)
1006  {
1007  if (i == 10)
1009  ApplicationTools::displayResult("Scenario " + TextTools::toString(num), string("..."));
1010  }
1011  else
1012  {
1015  }
1016  }
1017 
1018  bool complete = false;
1019  size_t numpath;
1020 
1021  StringTokenizer st(desc, "&");
1022  while (st.hasMoreToken())
1023  {
1024  string path = st.nextToken();
1025  bool numok = true;
1026  try
1027  {
1028  if (path == "complete")
1029  complete = true;
1030  else if (path.substr(0, 5) == "split")
1031  {
1032  auto pos = path.find("model");
1033  if (pos == string::npos)
1034  throw Exception("PhylogeneticsApplicationTools::getModelScenarios. Missing identifier 'model' in scenario description: " + path);
1035 
1036  auto poseq = path.find("=", pos);
1037  size_t num2 = TextTools::to<size_t>(path.substr(poseq + 1));
1038 
1039  if (mModel.find(num2) == mModel.end())
1040  throw BadIntegerException("PhylogeneticsApplicationTools::getModelScenarios: Wrong model number", static_cast<int>(num2));
1041 
1042  auto pSM = std::dynamic_pointer_cast<MixedTransitionModelInterface>(mModel.at(num2));
1043  if (!pSM)
1044  throw Exception("PhylogeneticsApplicationTools::getModelScenarios: Model number " + TextTools::toString(num2) + " ( " + mModel.at(num2)->getName() + " ) is not Mixed.");
1045 
1046  std::vector<std::shared_ptr<ModelPath>> modelPaths;
1047 
1048  auto nmod = pSM->getNumberOfModels();
1049 
1050  for (unsigned int nm = 0; nm < static_cast<unsigned int>(nmod); ++nm)
1051  {
1052  auto mp = std::make_shared<ModelPath>();
1053  mp->setModel(pSM, Vuint({nm}));
1054  mp->setLeadModel(pSM);
1055  somp[num]->addModelPath(mp);
1056  }
1057  }
1058  else
1059  {
1060  numpath = TextTools::to<size_t>(path.substr(4));
1061  if (mModelPath.find(numpath) == mModelPath.end())
1062  numok = false;
1063  else
1064  somp[num]->addModelPath(mModelPath.at(numpath));
1065  }
1066  }
1067  catch (Exception& e)
1068  {
1069  Exception("PhylogeneticsApplicationTools::getModelScenarios: wrong path description " + path);
1070  }
1071 
1072  if (!numok)
1073  throw BadIntegerException("PhylogeneticsApplicationTools::getModelScenarios: Wrong path number", static_cast<int>(numpath));
1074  }
1075 
1076 
1077  if (verbose && (i < 10))
1078  ApplicationTools::displayResult("Model Scenario", desc);
1079 
1080  if (complete)
1081  {
1082  if (somp[num]->getNumberOfModelPaths() == 0)
1083  throw Exception("PhylogeneticsApplicationTools::getModelScenarios: 'complete' is not possible on empty scenarios");
1084  somp[num]->complete();
1085  }
1086 
1087  somp[num]->computeModelPathsProbabilities();
1088  }
1089 
1090  return somp;
1091 }
1092 
1093 /******************************************************/
1094 /********** SUBSTITUTION PROCESS ********************/
1095 /******************************************************/
1096 
1097 unique_ptr<AutonomousSubstitutionProcessInterface> PhylogeneticsApplicationTools::getSubstitutionProcess(
1098  std::shared_ptr<const Alphabet> alphabet,
1099  std::shared_ptr<const GeneticCode> gCode,
1100  std::shared_ptr<const AlignmentDataInterface> pData,
1101  const vector< shared_ptr<PhyloTree>>& vTree,
1102  const map<string, string>& params,
1103  const string& suffix,
1104  bool suffixIsOptional,
1105  bool verbose,
1106  int warn)
1107 {
1108  // Read files with same process as SubstitutionCollection
1109 
1110  map<string, string> unparsedParams;
1111 
1112  map<size_t, std::shared_ptr<const AlignmentDataInterface>> mData;
1113  mData[1] = pData;
1114 
1115  map<size_t, std::shared_ptr<PhyloTree>> mTree;
1116  size_t i = 1;
1117  for (auto it : vTree)
1118  {
1119  mTree[i++] = it;
1120  }
1121 
1122  auto mModU = getBranchModels(alphabet, gCode, mData, params, unparsedParams, suffix, suffixIsOptional, verbose, warn);
1123  auto mMod = uniqueToSharedMap<BranchModelInterface>(mModU);
1124 
1125  auto mRootFreqU = getRootFrequencySets(alphabet, gCode, mData, params, unparsedParams, suffix, suffixIsOptional, verbose, warn);
1126  auto mRootFreq = uniqueToSharedMap<FrequencySetInterface>(mRootFreqU);
1127 
1128  auto mDist = getRateDistributions(params, suffix, suffixIsOptional, verbose);
1129 
1130  auto mPathU = getModelPaths(params, mMod, suffix, suffixIsOptional, verbose, warn);
1131  auto mPath = uniqueToSharedMap<ModelPath>(mPathU);
1132 
1133  auto mScenU = getModelScenarios(params, mPath, mMod, suffix, suffixIsOptional, verbose, warn);
1134  auto mScen = uniqueToSharedMap<ModelScenario>(mScenU);
1135 
1136  auto SPC = getSubstitutionProcessCollection(alphabet, gCode, mTree,
1137  mMod, mRootFreq, mDist, mScen,
1138  params, unparsedParams, suffix, suffixIsOptional, verbose, warn);
1139 
1140  // Get relevant objects from Collection to build an AutonomousSubstitutionProcess
1141  unique_ptr<AutonomousSubstitutionProcessInterface> ASP;
1142 
1143  auto psNum = SPC->getSubstitutionProcessNumbers();
1144  if (psNum.size() == 0)
1145  throw Exception("PhylogeneticsApplicationTools::getSubstitutionProcess : missing process in parameters.");
1146 
1147  size_t maxps = *max_element(psNum.begin(), psNum.end());
1148 
1149  SubstitutionProcessCollectionMember& procm = SPC->substitutionProcess(maxps);
1150 
1151  auto distproc = procm.getRateDistribution();
1152 
1153  auto rootproc = procm.getRootFrequencySet();
1154 
1155  auto scen = procm.getModelScenario();
1156 
1157  auto vmodnb = procm.getModelNumbers();
1158 
1159  if (vmodnb.size() == 1)
1160  {
1161  if (!distproc)
1162  ASP = make_unique<SimpleSubstitutionProcess>(procm.getModel(1), procm.getParametrizablePhyloTree(), rootproc);
1163  else
1164  ASP = make_unique<RateAcrossSitesSubstitutionProcess>(procm.getModel(1), procm.getRateDistribution(), procm.getParametrizablePhyloTree(), rootproc);
1165  }
1166  else
1167  {
1168  auto NHSP = make_unique<NonHomogeneousSubstitutionProcess>(procm.getRateDistribution(), procm.getParametrizablePhyloTree(), rootproc);
1169 
1170  for (auto nb:vmodnb)
1171  {
1172  NHSP->addModel(procm.getModel(nb), procm.getNodesWithModel(nb));
1173  }
1174 
1175  if (!NHSP->isFullySetUp(false))
1176  throw Exception("PhylogeneticsApplicationTools::getSubstitutionProcess: process not fully set up.");
1177 
1178  ASP = std::move(NHSP);
1179  }
1180 
1181  if (procm.getModelScenario())
1182  ASP->setModelScenario(procm.getModelScenario());
1183 
1184  return ASP;
1185 }
1186 
1187 
1188 /************************************************************/
1189 
1191  SubstitutionProcessCollection& SubProColl,
1192  size_t procNum,
1193  const map<string, string>& params,
1194  bool verbose,
1195  int warn)
1196 {
1197  string procName = "";
1198  map<string, string> args;
1199 
1200  string procDesc = ApplicationTools::getStringParameter("process", params, "", TextTools::toString(procNum), warn);
1201 
1202  KeyvalTools::parseProcedure(procDesc, procName, args);
1203 
1204  if ((procName != "OnePerBranch") && (procName != "Homogeneous") && (procName != "Nonhomogeneous") && (procName != "NonHomogeneous"))
1205  {
1206  if (warn >= 2)
1207  ApplicationTools::displayWarning("Warning, unknown process name: " + procName);
1208 
1209  return 0;
1210  }
1211 
1212  // ///
1213  // tree number
1214 
1215  size_t numTree;
1216 
1217  if (args.find("tree") == args.end())
1218  {
1219  if (warn)
1220  ApplicationTools::displayWarning("Warning, missing tree for process name: " + procName);
1221  numTree = 0;
1222  }
1223  else
1224  {
1225  numTree = (size_t) ApplicationTools::getIntParameter("tree", args, 1, "", true, warn);
1226 
1227  if (numTree != 0 && !SubProColl.hasTreeNumber(numTree))
1228  throw BadIntegerException("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember : unknown tree number", (int)numTree);
1229  }
1230 
1231  // /////
1232  // rate number
1233 
1234  size_t numRate = 0;
1235  if (args.find("rate") == args.end())
1236  {
1237  const auto& vrdn = SubProColl.getRateDistributionNumbers();
1238  numRate = 0;
1239  for (auto rdn:vrdn)
1240  {
1241  if (SubProColl.rateDistribution(rdn).getName() == "Constant")
1242  {
1243  numRate = rdn;
1244  break;
1245  }
1246  }
1247  if (numRate == 0)
1248  {
1249  for (uint i = 1; i <= *std::max_element(vrdn.begin(), vrdn.end()) + 1; i++)
1250  {
1251  if (std::find(vrdn.begin(), vrdn.end(), i) == vrdn.end())
1252  {
1253  numRate = i;
1254  break;
1255  }
1256  }
1257  SubProColl.addDistribution(std::make_shared<ConstantRateDistribution>(), numRate);
1258  }
1259  }
1260  else
1261  {
1262  string sRate = ApplicationTools::getStringParameter("rate", args, "1", "", true, warn);
1263 
1264  size_t pp = sRate.find(".");
1265 
1266  numRate = TextTools::to<size_t>(sRate.substr(0, pp));
1267 
1268  if (!SubProColl.hasDistributionNumber(numRate))
1269  throw BadIntegerException("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember : unknown rate number", (int)numRate);
1270 
1271  if (pp != string::npos)
1272  {
1273  size_t numSRate = TextTools::to<size_t>(sRate.substr(pp + 1));
1274  SubProColl.addDistribution(
1275  std::make_shared<ConstantDistribution>(
1276  SubProColl.rateDistribution(numRate).getCategory(numSRate)),
1277  10000 * (numRate + 1) + numSRate);
1278 
1279  numRate = 10000 * (numRate + 1) + numSRate;
1280  }
1281  }
1282 
1283  // ////////
1284  // root freq number
1285 
1286  bool stationarity = (args.find("root_freq") == args.end());
1287  size_t numFreq = 0;
1288 
1289  if (!stationarity)
1290  {
1291  numFreq = (size_t) ApplicationTools::getIntParameter("root_freq", args, 1, "", true, warn);
1292  if (!SubProColl.hasFrequenciesNumber(numFreq))
1293  throw BadIntegerException("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember : unknown root frequencies number", (int)numFreq);
1294  }
1295 
1296  // ///
1297  // scenario number
1298 
1299  size_t numScen = 0;
1300 
1301  if (args.find("scenario") != args.end())
1302  {
1303  numScen = (size_t) ApplicationTools::getIntParameter("scenario", args, 1, "", true, warn);
1304 
1305  if (!SubProColl.hasModelScenario(numScen))
1306  throw BadIntegerException("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember : unknown scenario number", (int)numScen);
1307  }
1308 
1309  // ////////////////
1310  // / models
1311 
1312  if (verbose)
1313  {
1316  }
1317 
1318  if (procName == "Homogeneous")
1319  {
1320  if (args.find("model") == args.end())
1321  throw Exception("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember. A model number is compulsory.");
1322 
1323  size_t numModel = (size_t) ApplicationTools::getIntParameter("model", args, 1, "", true, warn);
1324 
1325  if (!SubProColl.hasModelNumber(numModel))
1326  throw BadIntegerException("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember : unknown model number", static_cast<int>(numModel));
1327 
1328  map<size_t, vector<unsigned int>> mModBr;
1329 
1330  vector<uint> vNodes;
1331  if (numTree != 0)
1332  vNodes = SubProColl.tree(numTree).getAllEdgesIndexes();
1333  else
1334  vNodes = {0}
1335  ;
1336  mModBr[numModel] = vNodes;
1337 
1338  if (verbose)
1339  {
1340  ApplicationTools::displayResult("Process type", string("Homogeneous"));
1341  ApplicationTools::displayResult (" Model number", TextTools::toString(numModel));
1342  ApplicationTools::displayResult (" Tree number", TextTools::toString(numTree));
1343  if (numRate < 10000)
1344  ApplicationTools::displayResult (" Rate number", TextTools::toString(numRate));
1345  else
1346  ApplicationTools::displayResult (" Rate number", TextTools::toString(numRate / 10000 - 1) + "." + TextTools::toString(numRate % 10000));
1347 
1348  if (numScen != 0)
1349  ApplicationTools::displayResult (" Scenario number", TextTools::toString(numScen));
1350 
1351  if (!stationarity)
1352  ApplicationTools::displayResult (" Root frequencies number", TextTools::toString(numFreq));
1353  else
1354  ApplicationTools::displayMessage(" Stationarity assumed.");
1355  }
1356 
1357  if (stationarity)
1358  SubProColl.addSubstitutionProcess(procNum, mModBr, numTree, numRate);
1359  else
1360  SubProColl.addSubstitutionProcess(procNum, mModBr, numTree, numRate, numFreq);
1361  }
1362 
1363  else if ((procName == "Nonhomogeneous") || (procName == "NonHomogeneous"))
1364  {
1365  if (numTree == 0)
1366  throw Exception("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember : missing tree number for process " + TextTools::toString(procName));
1367 
1368  size_t indModel = 1;
1369  map<size_t, vector<unsigned int>> mModBr;
1370 
1371  while (args.find("model" + TextTools::toString(indModel)) != args.end())
1372  {
1373  size_t numModel = (size_t) ApplicationTools::getIntParameter("model" + TextTools::toString(indModel), args, 1, "", true, warn);
1374 
1375  if (mModBr.find(numModel) != mModBr.end())
1376  throw BadIntegerException("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember : model number seen twice.", (int)numModel);
1377 
1378  vector<unsigned int> nodesId;
1379 
1380  auto snodesid = "model" + TextTools::toString(indModel) + ".nodes_id";
1381  auto descnodes = ApplicationTools::getStringParameter(snodesid, args, "", "", true, warn);
1382 
1383 
1384  auto tree = SubProColl.getTree(numTree);
1385  if (descnodes == "All")
1386  {
1387  nodesId = tree->getEdgeIndexes(tree->getSubtreeEdges(tree->getRoot()));
1388  }
1389  else if (descnodes == "Leaves")
1390  {
1391  nodesId = tree->getNodeIndexes(tree->getLeavesUnderNode(tree->getRoot()));
1392  }
1393  else if (descnodes == "NoLeaves")
1394  {
1395  auto allIds = tree->getEdgeIndexes(tree->getSubtreeEdges(tree->getRoot()));
1396  auto leavesId = tree->getNodeIndexes(tree->getLeavesUnderNode(tree->getRoot()));
1397  VectorTools::diff(allIds, leavesId, nodesId);
1398  }
1399  else
1400  nodesId = ApplicationTools::getVectorParameter<unsigned int>(snodesid, args, ',', ':', "", "", true, warn);
1401 
1402  mModBr[numModel] = nodesId;
1403  indModel++;
1404  }
1405 
1406  if (verbose)
1407  {
1408  ApplicationTools::displayResult("Process type", string("NonHomogeneous"));
1409 
1410  for (auto& it : mModBr)
1411  {
1412  ApplicationTools::displayResult (" Model number" + TextTools::toString(it.first) + " associated to", TextTools::toString(it.second.size()) + " node(s).");
1413  }
1414  ApplicationTools::displayResult (" Tree number", TextTools::toString(numTree));
1415  ApplicationTools::displayResult (" Rate number", TextTools::toString(numRate));
1416  if (!stationarity)
1417  ApplicationTools::displayResult (" Root frequencies number", TextTools::toString(numFreq));
1418  else
1419  ApplicationTools::displayMessage(" Stationarity assumed.");
1420  }
1421 
1422  if (stationarity)
1423  SubProColl.addSubstitutionProcess(procNum, mModBr, numTree, numRate);
1424  else
1425  SubProColl.addSubstitutionProcess(procNum, mModBr, numTree, numRate, numFreq);
1426  }
1427  else if (procName == "OnePerBranch")
1428  {
1429  if (numTree == 0)
1430  throw Exception("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember : missing tree number for process " + TextTools::toString(procName));
1431 
1432  if (args.find("model") == args.end())
1433  throw Exception("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember. A model number is compulsory.");
1434 
1435  size_t numModel = (size_t) ApplicationTools::getIntParameter("model", args, 1, "", true, warn);
1436 
1437  if (!SubProColl.hasModelNumber(numModel))
1438  throw BadIntegerException("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember : unknown model number", (int)numModel);
1439 
1440  vector<string> sharedParameters = ApplicationTools::getVectorParameter<string>("shared_parameters", args, ',', "", "", true, 1);
1441 
1442  if (stationarity)
1443  SubProColl.addOnePerBranchSubstitutionProcess(procNum, numModel, numTree, numRate, sharedParameters);
1444  else
1445  SubProColl.addOnePerBranchSubstitutionProcess(procNum, numModel, numTree, numRate, numFreq, sharedParameters);
1446 
1447  if (verbose)
1448  {
1449  ApplicationTools::displayResult("Process type", string("OnePerBranch"));
1450 
1451  ApplicationTools::displayResult (" Model number", TextTools::toString(numModel));
1452  ApplicationTools::displayResult (" Tree number", TextTools::toString(numTree));
1453  ApplicationTools::displayResult (" Rate number", TextTools::toString(numRate));
1454  if (!stationarity)
1455  ApplicationTools::displayResult (" Root frequencies number", TextTools::toString(numFreq));
1456  else
1457  ApplicationTools::displayMessage(" Stationarity assumed.");
1458 
1459  for (const auto& sP : sharedParameters)
1460  {
1461  ApplicationTools::displayResult(" Shared parameter", sP);
1462  }
1463  }
1464  }
1465 
1466  if (numScen != 0)
1467  SubProColl.substitutionProcess(procNum).setModelScenario(numScen);
1468 
1469  return true;
1470 }
1471 
1472 
1473 /******************************************************************************/
1474 
1476  std::shared_ptr<const Alphabet> alphabet,
1477  std::shared_ptr<const GeneticCode> gCode,
1478  const map<size_t, std::shared_ptr<PhyloTree>>& mTree,
1479  const map<size_t, std::shared_ptr<BranchModelInterface>>& mMod,
1480  const map<size_t, std::shared_ptr<FrequencySetInterface>>& mRootFreq,
1481  const map<size_t, std::shared_ptr<DiscreteDistributionInterface>>& mDist,
1482  const map<size_t, std::shared_ptr<ModelScenario>>& mScen,
1483  const map<string, string>& params,
1484  map<string, string>& unparsedParams,
1485  const string& suffix,
1486  bool suffixIsOptional,
1487  bool verbose,
1488  int warn)
1489 {
1490  auto SPC = make_unique<SubstitutionProcessCollection>();
1491 
1492  map<string, double> existingParameters;
1493 
1494  // ///////////////////////
1495  // Trees
1496 
1497  for (const auto& itt : mTree)
1498  {
1499  if (itt.second)
1500  {
1501  SPC->addTree(std::make_shared<ParametrizablePhyloTree>(*(itt.second)), itt.first);
1502  }
1503  }
1504 
1505  // ///////////////////////
1506  // Rates
1507 
1508  for (const auto& itd : mDist)
1509  {
1510  SPC->addDistribution(itd.second, itd.first);
1511  }
1512 
1513  // ////////////////////////
1514  // Models
1515 
1516  for (const auto& itm : mMod)
1517  {
1518  SPC->addModel(itm.second, itm.first);
1519  }
1520 
1521  // ///////////////////////////
1522  // Root Frequencies
1523 
1524  for (const auto& itr : mRootFreq)
1525  {
1526  SPC->addFrequencies(itr.second, itr.first);
1527  }
1528 
1529  // ///////////////////////
1530  // Scenarios
1531 
1532  for (const auto& itt : mScen)
1533  {
1534  SPC->addScenario(itt.second, itt.first);
1535  }
1536 
1537  // //////////////////////////////
1538  // Now processes
1539 
1540  vector<string> vProcName = ApplicationTools::matchingParameters("process*", params);
1541 
1542  if (vProcName.size() == 0)
1543  throw Exception("Missing process in construction of SubstitutionProcessCollection.");
1544 
1545  for (size_t nT = 0; nT < vProcName.size(); nT++)
1546  {
1547  size_t poseq = vProcName[nT].find("=");
1548  size_t num;
1549  size_t len = 7;
1550 
1551  string suff = vProcName[nT].substr(len, poseq - len);
1552 
1553  if (TextTools::isDecimalInteger(suff, '$'))
1554  num = TextTools::to<size_t>(suff);
1555  else
1556  num = 1;
1557 
1558  bool addok = addSubstitutionProcessCollectionMember(*SPC, num, params, (nT < 10 ? verbose : false), warn);
1559 
1560  if (addok)
1561  {
1562  if (nT == 10)
1564 
1565  if (nT >= 10)
1566  ApplicationTools::displayResult("Process" + TextTools::toString(num), string("..."));
1567  }
1568  }
1569 
1570 
1571  // string ProcessFilePath = ApplicationTools::getAFilePath("processes.file", params, false, false, suffix, suffixIsOptional);
1572 
1573  // map<string, string> paramProcess;
1574 
1575  // if (ProcessFilePath!="none")
1576  // paramModel=AttributesTools::getAttributesMapFromFile(ProcessFilePath,"=");
1577 
1578  // paramProcess.insert(params.begin(), params.end());
1579 
1580  // vector<string> processName=ApplicationTools::matchingParameters("process*", paramProcess);
1581 
1582  // vector<size_t> processNum;
1583  // for (size_t i=0; i< processName.size(); i++)
1584  // {
1585  // size_t poseq=processName[i].find("=");
1586  // processNum.push_back((size_t)TextTools::toInt(processName[i].substr(7,poseq-7)));
1587  // }
1588 
1589  // if (processNum.size()==0)
1590  // throw Exception("Missing process in construction of SubstitutionProcessCollection.");
1591 
1592  // for (size_t i=0; i<processNum.size(); i++)
1593  // addSubstitutionProcessCollectionMember(*SPC, params, processNum[i]);
1594 
1595 
1596  // /////////////////////////
1597  // Now set shared parameters:
1598 
1599  // ////// Aliasing
1600  // Finally check parameter aliasing:
1601 
1602  for (const auto& param : params)
1603  {
1604  try
1605  {
1606  auto v2 = TextTools::toDouble(param.second);
1607  SPC->setParameterValue(param.first, v2);
1608  }
1609  catch (Exception& e)
1610  {
1611  if (SPC->hasParameter(param.first))
1612  unparsedParams[param.first] = param.second;
1613  }
1614  }
1615 
1616  SPC->aliasParameters(unparsedParams, verbose);
1617 
1618  return SPC;
1619 }
1620 
1621 /******************************************************/
1622 /**** SEQUENCE EVOLUTIONS *****************************/
1623 /******************************************************/
1624 
1625 map<size_t, unique_ptr<SequenceEvolution>> PhylogeneticsApplicationTools::getSequenceEvolutions(
1626  shared_ptr<SubstitutionProcessCollection> SPC,
1627  const map<string, string>& params,
1628  map<string, string>& unparsedParams,
1629  const string& suffix,
1630  bool suffixIsOptional,
1631  bool verbose,
1632  int warn)
1633 {
1634  map<string, string> paramEvol;
1635 
1636  paramEvol.insert(params.begin(), params.end());
1637 
1638  vector<string> evolsName = ApplicationTools::matchingParameters("process*", paramEvol);
1639 
1640  vector<size_t> evolsNum;
1641  for (size_t i = 0; i < evolsName.size(); ++i)
1642  {
1643  size_t poseq = evolsName[i].find("=");
1644  evolsNum.push_back(TextTools::to<size_t>(evolsName[i].substr(7, poseq - 7)));
1645  }
1646 
1647  map<size_t, unique_ptr<SequenceEvolution>> mEvol;
1648 
1649  for (size_t mPi = 0; mPi < evolsNum.size(); ++mPi)
1650  {
1651  if (SPC->hasSubstitutionProcessNumber(evolsNum[mPi]))
1652  continue;
1653 
1654  unique_ptr<SequenceEvolution> nEvol;
1655 
1656  string evolName = "";
1657  map<string, string> args;
1658 
1659  string evolDesc = ApplicationTools::getStringParameter("process", params, "", TextTools::toString(evolsNum[mPi]), warn);
1660 
1661  KeyvalTools::parseProcedure(evolDesc, evolName, args);
1662 
1663  // Process
1664  if (verbose)
1665  {
1667  ApplicationTools::displayMessage("Process " + TextTools::toString(evolsNum[mPi]));
1668  }
1669 
1670  if (evolName == "Simple")
1671  {
1672  size_t nproc = (size_t) ApplicationTools::getIntParameter("process", args, ',', "");
1673  if (!SPC->hasSubstitutionProcessNumber(nproc))
1674  throw BadIntegerException("PhylogeneticsApplicationTools::getEvolutions. Unknown process number:", (int)nproc);
1675 
1676  nEvol = make_unique<OneProcessSequenceEvolution>(SPC->getSubstitutionProcess(nproc), nproc);
1677  if (verbose)
1678  {
1679  ApplicationTools::displayResult("Process type", string("Simple"));
1680  ApplicationTools::displayResult (" Process number", TextTools::toString(nproc));
1681  }
1682  }
1683  else
1684  {
1685  size_t indProc = 1;
1686  vector<size_t> vproc;
1687 
1688  while (args.find("process" + TextTools::toString(indProc)) != args.end())
1689  {
1690  size_t numProc = (size_t) ApplicationTools::getIntParameter("process" + TextTools::toString(indProc), args, 1, "", true, warn);
1691 
1692  vproc.push_back(numProc);
1693  indProc++;
1694  }
1695 
1696  if (vproc.size() == 0)
1697  throw BadIntegerException("PhylogeneticsApplicationTools::getEvolutions. A process number is compulsory for process", (int)indProc);
1698 
1699  for (size_t i = 0; i < vproc.size(); ++i)
1700  {
1701  if (!SPC->hasSubstitutionProcessNumber(vproc[i]))
1702  throw BadIntegerException("PhylogeneticsApplicationTools::getEvolutions. Unknown process number:", (int)vproc[i]);
1703  }
1704 
1705  if (evolName == "Partition")
1706  {
1707  // parse all processes sites
1708 
1709  vector<size_t> vMap;
1710 
1711  map<size_t, size_t> posProc;
1712 
1713  for (size_t i = 0; i < vproc.size(); ++i)
1714  {
1715  string prefix = "process" + TextTools::toString(i + 1);
1716 
1717  vector<size_t> procPos = ApplicationTools::getVectorParameter<size_t>(prefix + ".sites", args, ',', ':', TextTools::toString(i), "", true, true);
1718 
1719  for (size_t j = 0; j < procPos.size(); ++j)
1720  {
1721  if (posProc.find(procPos[j]) != posProc.end())
1722  throw BadIntegerException("A process position is defined twice ", (int)procPos[j]);
1723  else
1724  posProc[procPos[j]] = vproc[i];
1725  }
1726  }
1727 
1728  size_t pos = posProc.begin()->first;
1729 
1730  while (posProc.find(pos) != posProc.end())
1731  {
1732  vMap.push_back(posProc[pos]);
1733  pos++;
1734  }
1735 
1736  if (vMap.size() != posProc.size())
1737  throw Exception("Error : there are gaps in the process sites");
1738 
1739  if (verbose)
1740  ApplicationTools::displayResult("Process type", string("Partition"));
1741 
1742  auto pMP = make_unique<PartitionSequenceEvolution>(SPC, vMap);
1743 
1744  nEvol = std::move(pMP);
1745  }
1746  else if (evolName == "Mixture")
1747  {
1748  auto pMP = make_unique<MixtureSequenceEvolution>(SPC, vproc);
1749 
1750  if (verbose)
1751  ApplicationTools::displayResult("Process type", string("Mixture"));
1752 
1753  size_t nbP = pMP->getNumberOfSubstitutionProcess();
1754 
1755  vector<double> vprob = ApplicationTools::getVectorParameter<double>("probas", args, ',', "(" + VectorTools::paste(vector<double>(nbP, 1. / (double)nbP)) + ")");
1756  if (vprob.size() != 1)
1757  {
1758  if (vprob.size() != nbP)
1759  throw BadSizeException("Wrong size of probas description in Mixture", vprob.size(), nbP);
1760  Simplex si(vprob);
1761  pMP->setSubProcessProb(si);
1762  }
1763 
1764  nEvol = std::move(pMP);
1765  }
1766  else if (evolName == "HMM")
1767  {
1768  auto pMP = make_unique<HmmSequenceEvolution>(SPC, vproc);
1769 
1770  if (verbose)
1771  ApplicationTools::displayResult("Process type", string("HMM"));
1772 
1773  size_t nbP = pMP->getNumberOfSubstitutionProcess();
1774 
1775  string vs = "(" + VectorTools::paste(vector<double>(nbP, 1. / (double)nbP), ",") + ")";
1776  string vvs = "(";
1777  for (size_t i = 0; i < nbP; ++i)
1778  {
1779  vvs += (i == 0 ? "" : ",") + vs;
1780  }
1781  vvs += ")";
1782 
1783  RowMatrix<double> mat = ApplicationTools::getMatrixParameter<double>("probas", args, ',', vvs);
1784 
1785  FullHmmTransitionMatrix fhtm(pMP->hmmTransitionMatrix().getHmmStateAlphabet(), pMP->getNamespace());
1786  fhtm.setTransitionProbabilities(mat);
1787 
1788  pMP->matchParametersValues(fhtm.getParameters());
1789 
1790  nEvol = std::move(pMP);
1791  }
1792  else if (evolName == "AutoCorr")
1793  {
1794  auto pMP = make_unique<AutoCorrelationSequenceEvolution>(SPC, vproc);
1795 
1796  size_t nbP = pMP->getNumberOfSubstitutionProcess();
1797 
1798  if (verbose)
1799  ApplicationTools::displayResult("Process type", string("AutoCorr"));
1800 
1801  string vs = "(" + VectorTools::paste(vector<double>(nbP, 0.95), ",") + ")";
1802 
1803  vector<double> v = ApplicationTools::getVectorParameter<double>("lambdas", args, ',', vs);
1804 
1805  ParameterList pl;
1806 
1807  for (size_t i = 0; i < v.size(); ++i)
1808  {
1809  pl.addParameter(Parameter("AutoCorr.lambda" + TextTools::toString(i + 1), v[i]));
1810  }
1811 
1812  pMP->matchParametersValues(pl);
1813 
1814  nEvol = std::move(pMP);
1815  }
1816  else
1817  throw Exception("Unknown Process description : " + evolName);
1818 
1819  if (verbose)
1820  {
1821  ApplicationTools::displayResult (" Process numbers", VectorTools::paste(vproc, ","));
1823  }
1824  }
1825 
1826  mEvol[evolsNum[mPi]] = std::move(nEvol);
1827  }
1828 
1829  return mEvol;
1830 }
1831 
1832 
1833 /******************************************************/
1834 /**** PHYLO LIKELIHOODS *********************************/
1835 /******************************************************/
1836 
1837 std::shared_ptr<PhyloLikelihoodContainer> PhylogeneticsApplicationTools::getPhyloLikelihoodContainer(
1838  Context& context,
1839  shared_ptr<SubstitutionProcessCollection> SPC,
1840  map<size_t, std::shared_ptr<SequenceEvolution>>& mSeqEvol,
1841  const map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
1842  const map<string, string>& params,
1843  const string& suffix,
1844  bool suffixIsOptional,
1845  bool verbose,
1846  int warn)
1847 {
1848  auto mPhylo = std::make_shared<PhyloLikelihoodContainer>(context, SPC);
1849 
1850  // get all members of the collection and link then to Configured Objects
1851  auto collNodes = mPhylo->getCollectionNodes();
1852 
1853  // the phylo members
1854  map<string, string> paramPhyl;
1855  paramPhyl.insert(params.begin(), params.end());
1856  vector<string> phylosName = ApplicationTools::matchingParameters("phylo*", paramPhyl);
1857 
1858  // map of dependencies between phylolikelihoods
1859 
1860  map<size_t, vector<size_t>> phylosMap;
1861 
1862  for (size_t i = 0; i < phylosName.size(); ++i)
1863  {
1864  size_t poseq = phylosName[i].find("=");
1865  size_t phyln = TextTools::to<size_t>(phylosName[i].substr(5, poseq - 5));
1866 
1867  if (phyln == 0)
1868  throw BadIntegerException("PhylogeneticsApplicationTools::getPhyloLikelihoodContainer : Forbidden Phylo Number", 0);
1869 
1870  string phyloDesc = ApplicationTools::getStringParameter("phylo", params, "Single", TextTools::toString(phyln), 2);
1871 
1872  map<string, string> args;
1873 
1874  string phyloName = "";
1875 
1876  KeyvalTools::parseProcedure(phyloDesc, phyloName, args);
1877 
1878  size_t indPhyl = 1;
1879  vector<size_t> vphyl;
1880 
1881  while (args.find("phylo" + TextTools::toString(indPhyl)) != args.end())
1882  {
1883  size_t numPhyl = (size_t) ApplicationTools::getIntParameter("phylo" + TextTools::toString(indPhyl), args, 1, "", true, warn);
1884  vphyl.push_back(numPhyl);
1885  indPhyl++;
1886  }
1887 
1888  phylosMap[phyln] = vphyl;
1889  }
1890 
1891  vector<size_t> usedPhylo;
1892 
1893  // //////////////////////////////////////////
1894  // First the phylos that do not depend on other phylos
1895 
1896  uint nbPh(0);
1897  bool verbhere(verbose);
1898 
1899  for (const auto& it : phylosMap)
1900  {
1901  nbPh++;
1902 
1903  if (it.second.size() != 0)
1904  continue;
1905 
1906  size_t phylonum = it.first;
1907 
1908  std::shared_ptr<PhyloLikelihoodInterface> nPL;
1909  string phyloName = "";
1910 
1911  map<string, string> args;
1912 
1913  string phyloDesc = ApplicationTools::getStringParameter("phylo", params, "Single", TextTools::toString(phylonum), warn);
1914 
1915  if (verbose)
1916  {
1917  if (nbPh <= 20)
1919  else
1920  verbhere = false;
1921 
1922  ApplicationTools::displayMessage("Phylolikelihood " + TextTools::toString(phylonum));
1923  }
1924 
1925  KeyvalTools::parseProcedure(phyloDesc, phyloName, args);
1926 
1927  // Data
1928 
1929  size_t nData = (args.find("data") == args.end() ? 1 : TextTools::to<size_t>(args["data"]));
1930 
1931  if (mData.find(nData) == mData.end())
1932  {
1933  ApplicationTools::displayWarning("PhylogeneticsApplicationTools::getPhyloLikelihoodContainer. Data number is wrong:" + TextTools::toString(nData) + ". Not built.");
1934  continue;
1935  }
1936 
1937  auto data = mData.find(nData)->second;
1938 
1939  if (!data)
1940  {
1941  ApplicationTools::displayWarning("PhylogeneticsApplicationTools::getPhyloLikelihoodContainer. Data " + TextTools::toString(nData) + " does not match with aligned sequences. Not built.");
1942  continue;
1943  }
1944 
1945  if (verbhere)
1947 
1948  // Sequence Evolution or process
1949 
1950  size_t nProcess = (args.find("process") == args.end() ? 1 : TextTools::to<size_t>(args["process"]));
1951  if (verbhere)
1952  ApplicationTools::displayResult(" Process ", TextTools::toString(nProcess));
1953 
1954 
1955  // Construction
1956 
1957  if (SPC->hasSubstitutionProcessNumber(nProcess))
1958  {
1959  auto l = std::make_shared<LikelihoodCalculationSingleProcess>(collNodes, data, nProcess);
1960  nPL = make_unique<SingleProcessPhyloLikelihood>(context, l, nProcess, nData);
1961  }
1962  else if (mSeqEvol.find(nProcess) != mSeqEvol.end())
1963  {
1964  // ////////////////
1965  // / from sequence evolutions to phyloLikelihoods
1966 
1967  auto opse = dynamic_pointer_cast<OneProcessSequenceEvolution>(mSeqEvol[nProcess]);
1968 
1969  if (opse)
1970  nPL = make_unique<OneProcessSequencePhyloLikelihood>(data, opse, collNodes, nProcess, nData);
1971  else
1972  {
1973  auto mse = dynamic_pointer_cast<MixtureSequenceEvolution>(mSeqEvol[nProcess]);
1974 
1975  if (mse)
1976  nPL = make_unique<MixtureProcessPhyloLikelihood>(data, mse, collNodes, nProcess, nData);
1977 
1978  else
1979  {
1980  auto hse = dynamic_pointer_cast<HmmSequenceEvolution>(mSeqEvol[nProcess]);
1981 
1982  if (hse)
1983  nPL = make_unique<HmmProcessPhyloLikelihood>(data, hse, collNodes, nProcess, nData);
1984 
1985  else
1986  {
1987  auto ase = dynamic_pointer_cast<AutoCorrelationSequenceEvolution>(mSeqEvol[nProcess]);
1988 
1989  if (ase)
1990  nPL = make_unique<AutoCorrelationProcessPhyloLikelihood>(data, ase, collNodes, nProcess, nData);
1991  else
1992  {
1993  auto pse = dynamic_pointer_cast<PartitionSequenceEvolution>(mSeqEvol[nProcess]);
1994 
1995  if (pse)
1996  nPL = make_unique<PartitionProcessPhyloLikelihood>(data, pse, collNodes, nProcess, nData);
1997 
1998  else
1999  throw Exception("PhylogeneticsApplicationTools::getPhyloLikelihoodContainer : Unknown Sequence Evolution.");
2000  }
2001  }
2002  }
2003  }
2004  }
2005  else
2006  throw BadIntegerException("PhylogeneticsApplicationTools::getPhyloLikelihoodContainer : Unknown Process number.", (int)nProcess);
2007 
2008  mPhylo->addPhyloLikelihood(phylonum, std::move(nPL));
2009  usedPhylo.push_back(phylonum);
2010  }
2011 
2012  // Now clean the map
2013  for (auto it = phylosMap.begin(); it != phylosMap.end();)
2014  {
2015  if (it->second.size() == 0)
2016  {
2017  phylosMap.erase(it++);
2018  continue;
2019  }
2020 
2021  vector<size_t>& vphyl = it->second;
2022  for (size_t i = vphyl.size(); i > 0; i--)
2023  {
2024  vector<size_t>::iterator posp = find(usedPhylo.begin(), usedPhylo.end(), vphyl[i - 1]);
2025  if (posp != usedPhylo.end())
2026  vphyl.erase(vphyl.begin() + static_cast<ptrdiff_t>(i - 1));
2027  }
2028  ++it;
2029  }
2030 
2031  // Proceed the other phylos
2032 
2033  while (phylosMap.size() != 0) // there is still phylos to be treated
2034  {
2035  if (usedPhylo.size() == 0)
2036  {
2037  ApplicationTools::displayWarning("Warning, some phylolikelihoods are not used.");
2038  break;
2039  }
2040 
2041  usedPhylo.clear();
2042 
2043  for (map<size_t, vector<size_t>>::iterator it = phylosMap.begin(); it != phylosMap.end(); it++)
2044  {
2045  nbPh++;
2046 
2047  if (it->second.size() == 0)
2048  {
2049  size_t phylonum = it->first;
2050 
2051  unique_ptr<PhyloLikelihoodInterface> nPL;
2052  string phyloName = "";
2053 
2054  map<string, string> args;
2055 
2056  string phyloDesc = ApplicationTools::getStringParameter("phylo", params, "Single", TextTools::toString(phylonum), warn);
2057  KeyvalTools::parseProcedure(phyloDesc, phyloName, args);
2058 
2059  if (verbose)
2060  {
2061  if (nbPh <= 20)
2063  else
2064  verbhere = false;
2065 
2066  ApplicationTools::displayMessage("Phylolikelihood " + TextTools::toString(phylonum));
2067  }
2068 
2069  KeyvalTools::parseProcedure(phyloDesc, phyloName, args);
2070 
2071  size_t indPhylo = 1;
2072  vector<size_t> vPhylo;
2073 
2074  while (args.find("phylo" + TextTools::toString(indPhylo)) != args.end())
2075  {
2076  size_t numPhylo = (size_t) ApplicationTools::getIntParameter("phylo" + TextTools::toString(indPhylo), args, 1, "", true, warn);
2077 
2078  vPhylo.push_back(numPhylo);
2079  indPhylo++;
2080  }
2081 
2082  if (phyloName == "Mixture")
2083  {
2084  auto pMA = make_unique<AlignedPhyloLikelihoodMixture>(context, std::move(mPhylo), vPhylo);
2085  vector<double> vprob = ApplicationTools::getVectorParameter<double>("probas", args, ',', "(" + VectorTools::paste(vector<double>(vPhylo.size(), 1. / (double)vPhylo.size())) + ")");
2086  if (vprob.size() != 1)
2087  {
2088  if (vprob.size() != vPhylo.size())
2089  throw BadSizeException("Wrong size of probas description in Mixture", vprob.size(), vPhylo.size());
2090  Simplex si(vprob);
2091  pMA->setPhyloProb(si);
2092  }
2093 
2094  nPL = std::move(pMA);
2095  }
2096  else if (phyloName == "HMM")
2097  {
2098  auto pMA = make_unique<AlignedPhyloLikelihoodHmm>(context, std::move(mPhylo), vPhylo);
2099 
2100  size_t nbP = pMA->getNumbersOfPhyloLikelihoods().size();
2101 
2102  string vs = "(" + VectorTools::paste(vector<double>(nbP, 1. / static_cast<double>(nbP)), ",") + ")";
2103  string vvs = "(";
2104  for (size_t i = 0; i < nbP; ++i)
2105  {
2106  vvs += (i == 0 ? "" : ",") + vs;
2107  }
2108  vvs += ")";
2109 
2110  RowMatrix<double> mat = ApplicationTools::getMatrixParameter<double>("probas", args, ',', vvs);
2111 
2112  FullHmmTransitionMatrix fhtm(pMA->getHmmStateAlphabet(), pMA->getNamespace());
2113  fhtm.setTransitionProbabilities(mat);
2114 
2115  pMA->matchParametersValues(fhtm.getParameters());
2116 
2117  nPL = std::move(pMA);
2118  }
2119  else if (phyloName == "AutoCorr")
2120  {
2121  auto pMA = make_unique<AlignedPhyloLikelihoodAutoCorrelation>(context, std::move(mPhylo), vPhylo);
2122 
2123  size_t nbP = pMA->getNumbersOfPhyloLikelihoods().size();
2124 
2125  string vs = "(" + VectorTools::paste(vector<double>(nbP, 0.95), ",") + ")";
2126 
2127  vector<double> v = ApplicationTools::getVectorParameter<double>("lambdas", args, ',', vs);
2128 
2129  ParameterList pl;
2130 
2131  for (size_t i = 0; i < v.size(); ++i)
2132  {
2133  pl.addParameter(Parameter("AutoCorr.lambda" + TextTools::toString(i + 1), v[i]));
2134  }
2135 
2136  pMA->matchParametersValues(pl);
2137 
2138  nPL = std::move(pMA);
2139  }
2140  else if (phyloName == "Product")
2141  {
2142  auto pAP = make_unique<AlignedPhyloLikelihoodProduct>(context, std::move(mPhylo), vPhylo);
2143 
2144  nPL = std::move(pAP);
2145  }
2146  else
2147  throw Exception("PhylogeneticsApplicationTools::getPhyloLikelihoodContainer : Unknown Phylo name " + phyloName);
2148 
2149  if (verbhere)
2150  {
2151  ApplicationTools::displayResult(" Phylolikelihood type", phyloName);
2152  ApplicationTools::displayResult(" Phylo numbers", VectorTools::paste(vPhylo, ","));
2153  }
2154 
2155  mPhylo->addPhyloLikelihood(phylonum, std::move(nPL));
2156  usedPhylo.push_back(phylonum);
2157  }
2158  }
2159 
2160  // Now clean the map
2161  for (auto it = phylosMap.begin(); it != phylosMap.end();)
2162  {
2163  if (it->second.size() == 0)
2164  {
2165  phylosMap.erase(it++);
2166  continue;
2167  }
2168 
2169  vector<size_t>& vphyl = it->second;
2170  for (size_t i = vphyl.size(); i > 0; i--)
2171  {
2172  vector<size_t>::iterator posp = find(usedPhylo.begin(), usedPhylo.end(), vphyl[i - 1]);
2173  if (posp != usedPhylo.end())
2174  vphyl.erase(vphyl.begin() + static_cast<ptrdiff_t>(i - 1));
2175  }
2176  ++it;
2177  }
2178  }
2179 
2180 
2181  if (mPhylo->getNumbersOfPhyloLikelihoods().size() == 0)
2182  {
2183  if (warn)
2184  ApplicationTools::displayMessage("PhylogeneticsApplicationTools::getPhyloLikelihoodContainer : No phyloLikelihoods described");
2185  return mPhylo;
2186  }
2187 
2188  // get the result phylogeny => with number 0 in the
2189  // PhyloLikelihoodContainer
2190 
2192  ApplicationTools::displayMessage("Result Phylolikelihood ");
2193 
2194  string sumAll;
2195  const vector<size_t>& nPhyl = mPhylo->getNumbersOfPhyloLikelihoods();
2196 
2197  for (size_t i = 0; i < nPhyl.size(); i++)
2198  {
2199  if (i != 0)
2200  sumAll += " + ";
2201 
2202  sumAll += "phylo" + TextTools::toString(nPhyl[i]);
2203  }
2204 
2205  string resultDesc = ApplicationTools::getStringParameter("result", params, sumAll);
2206 
2207  // check if really formula, or previous phylo
2208 
2209  std::shared_ptr<PhyloLikelihoodInterface> nPL;
2210  size_t nP(0);
2211  bool flag(resultDesc.substr(0, 5) == "phylo");
2212 
2213  if (flag)
2214  {
2215  try
2216  {
2217  nP = TextTools::to<size_t>(resultDesc.substr(5));
2218  }
2219  catch (Exception& e)
2220  {
2221  flag = false;
2222  }
2223  }
2224 
2225  if (!flag)
2226  {
2227  nPL = make_shared<PhyloLikelihoodFormula>(context, mPhylo, resultDesc);
2228  if (verbose)
2229  ApplicationTools::displayResult(" Result", dynamic_cast<PhyloLikelihoodFormula*>(nPL.get())->output());
2230  }
2231  else
2232  {
2233  if (!mPhylo->hasPhyloLikelihood(nP))
2234  throw BadIntegerException("Unknown Phylolikelihood number for result", (int)nP);
2235  else
2236  nPL = mPhylo->getPhyloLikelihood(nP);
2237  if (verbose)
2238  ApplicationTools::displayResult(" Result", resultDesc);
2239  }
2240 
2241  mPhylo->addPhyloLikelihood(0, nPL);
2242 
2243  return mPhylo;
2244 }
2245 
2246 
2247 /******************************************************/
2248 /*** DISTRIBUTIONS ********************************/
2249 /******************************************************/
2250 
2251 
2253  const string& distDescription,
2254  map<string, string>& unparsedParameterValues,
2255  bool verbose)
2256 {
2257  string distName;
2258  MultipleDiscreteDistribution* pMDD = 0;
2259  map<string, string> args;
2260  KeyvalTools::parseProcedure(distDescription, distName, args);
2261 
2262  if (distName == "Dirichlet")
2263  {
2264  if (args.find("classes") == args.end())
2265  throw Exception("Missing argument 'classes' (vector of number of classes) in " + distName
2266  + " distribution");
2267  if (args.find("alphas") == args.end())
2268  throw Exception("Missing argument 'alphas' (vector of Dirichlet shape parameters) in Dirichlet distribution");
2269  vector<double> alphas;
2270  vector<size_t> classes;
2271 
2272  string rf = args["alphas"];
2273  StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
2274  while (strtok.hasMoreToken())
2275  alphas.push_back(TextTools::toDouble(strtok.nextToken()));
2276 
2277  rf = args["classes"];
2278  StringTokenizer strtok2(rf.substr(1, rf.length() - 2), ",");
2279  while (strtok2.hasMoreToken())
2280  classes.push_back(TextTools::to<size_t>(strtok2.nextToken()));
2281 
2282  pMDD = new DirichletDiscreteDistribution(classes, alphas);
2283  vector<string> v = pMDD->getParameters().getParameterNames();
2284 
2285  for (size_t i = 0; i < v.size(); i++)
2286  {
2287  unparsedParameterValues[v[i]] = TextTools::toString(pMDD->getParameterValue(pMDD->getParameterNameWithoutNamespace(v[i])));
2288  }
2289  }
2290  else
2291  throw Exception("Unknown multiple distribution name: " + distName);
2292 
2293  return pMDD;
2294 }
2295 
2296 /******************************************************************************/
2297 
2298 unique_ptr<DiscreteDistributionInterface> PhylogeneticsApplicationTools::getRateDistribution(
2299  const map<string, string>& params,
2300  const string& suffix,
2301  bool suffixIsOptional,
2302  bool verbose)
2303 {
2304  string distDescription = ApplicationTools::getStringParameter("rate_distribution", params, "Constant()", suffix, suffixIsOptional);
2305 
2306  string distName;
2307  map<string, string> args;
2308  KeyvalTools::parseProcedure(distDescription, distName, args);
2309 
2310  BppORateDistributionFormat bIO(true);
2311  unique_ptr<DiscreteDistributionInterface> rDist(bIO.readDiscreteDistribution(distDescription, true));
2312 
2313  if (verbose)
2314  {
2315  ApplicationTools::displayResult("Rate distribution", distName);
2316  ApplicationTools::displayResult("Number of classes", TextTools::toString(rDist->getNumberOfCategories()));
2317  }
2318 
2319  return rDist;
2320 }
2321 
2322 
2323 /*************************************************************/
2324 /***** OPTIMIZATORS *****************************************/
2325 /*************************************************************/
2326 
2327 std::shared_ptr<PhyloLikelihoodInterface> PhylogeneticsApplicationTools::optimizeParameters(
2328  std::shared_ptr<PhyloLikelihoodInterface> lik,
2329  const map<string, string>& params,
2330  const string& suffix,
2331  bool suffixIsOptional,
2332  bool verbose,
2333  int warn)
2334 {
2335  OptimizationTools::OptimizationOptions optopt(lik, params, suffix, suffixIsOptional, verbose, warn);
2336 
2337  if (optopt.optMethodModel == "None")
2338  return lik;
2339 
2340 
2341  unsigned int n = 0;
2342 
2344  {
2345  if (verbose && optopt.nstep > 1)
2346  ApplicationTools::displayResult("# of precision steps", TextTools::toString(optopt.nstep));
2347 
2348  optopt.parameters.matchParametersValues(lik->getParameters());
2350  }
2352  {
2353  // Uses Newton-raphson algorithm with numerical derivatives when required.
2354  optopt.parameters.matchParametersValues(lik->getParameters());
2355  if (dynamic_pointer_cast<SingleProcessPhyloLikelihood>(lik))
2357  dynamic_pointer_cast<SingleProcessPhyloLikelihood>(lik),
2358  optopt);
2359  else
2361  }
2362  else
2363  throw Exception("Unknown optimization method: " + optopt.optMethodModel);
2364 
2365  string finalMethod = ApplicationTools::getStringParameter("optimization.final", params, "none", suffix, suffixIsOptional, warn + 1);
2366  unique_ptr<OptimizerInterface> finalOptimizer = nullptr;
2367  if (finalMethod == "none")
2368  {}
2369  else if (finalMethod == "simplex")
2370  {
2371  finalOptimizer = make_unique<DownhillSimplexMethod>(lik);
2372  }
2373  else if (finalMethod == "powell")
2374  {
2375  finalOptimizer = make_unique<PowellMultiDimensions>(lik);
2376  }
2377  else
2378  throw Exception("Unknown final optimization method: " + finalMethod);
2379 
2380  if (finalOptimizer)
2381  {
2382  optopt.parameters.matchParametersValues(lik->getParameters());
2383  if (verbose)
2384  ApplicationTools::displayResult("Final optimization step", finalMethod);
2385  finalOptimizer->setProfiler(optopt.profiler);
2386  finalOptimizer->setMessageHandler(optopt.messenger);
2387  finalOptimizer->setMaximumNumberOfEvaluations(optopt.nbEvalMax);
2388  finalOptimizer->getStopCondition()->setTolerance(optopt.tolerance);
2389  finalOptimizer->setVerbose(verbose);
2390  finalOptimizer->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
2391  finalOptimizer->init(optopt.parameters);
2392  finalOptimizer->optimize();
2393  n += finalOptimizer->getNumberOfEvaluations();
2394  }
2395 
2396  if (verbose)
2397  ApplicationTools::displayResult("Performed", TextTools::toString(n) + " function evaluations.");
2398  if (optopt.backupFile != "none")
2399  {
2400  string bf = optopt.backupFile + ".def";
2401  rename(optopt.backupFile.c_str(), bf.c_str());
2402  }
2403  return lik;
2404 }
2405 
2406 /******************************************************************************/
2407 
2409 {
2410  for (size_t i = 0; i < pl.size(); ++i)
2411  {
2412  auto constraint = pl[i].getConstraint();
2413  if (constraint)
2414  {
2415  double value = pl[i].getValue();
2416  if (!constraint->isCorrect(value - 1e-6) || !constraint->isCorrect(value + 1e-6))
2417  {
2418  ApplicationTools::displayWarning("This parameter has a value close to the boundary: " + pl[i].getName() + "(" + TextTools::toString(value) + ").");
2419  }
2420  }
2421  }
2422 }
2423 
2424 
2425 /******************************************************************************/
2426 /**************** Output ************************************/
2427 /******************************************************************************/
2428 
2430  const TreeTemplate<Node>& tree,
2431  const map<string, string>& params,
2432  const string& prefix,
2433  const string& suffix,
2434  bool suffixIsOptional,
2435  bool verbose,
2436  bool checkOnly,
2437  int warn)
2438 {
2439  string format = ApplicationTools::getStringParameter(prefix + "tree.format", params, "Newick", suffix, suffixIsOptional, warn);
2440  string file = ApplicationTools::getAFilePath(prefix + "tree.file", params, true, false, suffix, suffixIsOptional, "none", warn);
2441  OTree* treeWriter;
2442  if (format == "Newick")
2443  treeWriter = new Newick();
2444  else if (format == "Nexus")
2445  treeWriter = new NexusIOTree();
2446  else if (format == "NHX")
2447  treeWriter = new Nhx(false);
2448  else
2449  throw Exception("Unknown format for tree writing: " + format);
2450  if (!checkOnly)
2451  treeWriter->writeTree(tree, file, true);
2452  delete treeWriter;
2453  if (verbose)
2454  ApplicationTools::displayResult("Wrote tree to file ", file);
2455 }
2456 
2457 /******************************************************************************/
2458 
2460  const vector<const PhyloTree*>& trees,
2461  const map<string, string>& params,
2462  const string& prefix,
2463  const string& suffix,
2464  bool suffixIsOptional,
2465  bool verbose,
2466  bool checkOnly,
2467  int warn)
2468 {
2469  string format = ApplicationTools::getStringParameter(prefix + "tree.format", params, "Newick", suffix, suffixIsOptional, warn);
2470  string file = ApplicationTools::getAFilePath(prefix + "tree.file", params, true, false, suffix, suffixIsOptional, "none", warn);
2471  OMultiPhyloTree* treeWriter;
2472  if (format == "Newick")
2473  treeWriter = new Newick();
2474  else if (format == "Nexus")
2475  treeWriter = new NexusIOTree();
2476  else if (format == "NHX")
2477  treeWriter = new Nhx();
2478  else
2479  throw Exception("Unknown format for tree writing: " + format);
2480 
2481  if (!checkOnly)
2482  {
2483  treeWriter->writePhyloTrees(trees, file, true);
2484 
2485  if (verbose)
2486  ApplicationTools::displayResult("Wrote trees to file ", file);
2487  }
2488 
2489  delete treeWriter;
2490 }
2491 
2492 /******************************************************************************/
2493 
2495  const SubstitutionProcessCollection& spc,
2496  const map<string, string>& params,
2497  const string& prefix,
2498  const string& suffix,
2499  bool suffixIsOptional,
2500  bool verbose,
2501  bool checkOnly,
2502  bool withIds,
2503  int warn)
2504 {
2505  string format = ApplicationTools::getStringParameter(prefix + "tree.format", params, "Newick", suffix, suffixIsOptional, warn + 1);
2506  string file = ApplicationTools::getAFilePath(prefix + "tree.file", params, true, false, suffix, suffixIsOptional);
2507 
2508  OPhyloTree* treeWriter;
2509  if (format == "Newick")
2510  treeWriter = new Newick();
2511  else if (format == "Nexus")
2512  treeWriter = new NexusIOTree();
2513  else if (format == "NHX")
2514  treeWriter = new Nhx();
2515  else
2516  throw Exception("Unknown format for tree writing: " + format);
2517 
2518  if (!checkOnly)
2519  {
2520  vector<size_t> vTN = spc.getTreeNumbers();
2521 
2522  for (size_t i = 0; i < vTN.size(); i++)
2523  {
2524  auto tree = spc.getTree(vTN[i]);
2525 
2526  std::vector<shared_ptr<PhyloNode>> nodes = tree->getAllNodes();
2527 
2528  for (auto& node : nodes)
2529  {
2530  if (tree->isLeaf(node) && withIds)
2531  node->setName(TextTools::toString(tree->getNodeIndex(node)) + "_" + node->getName());
2532  else
2533  node->setProperty("NodeId", BppString(TextTools::toString(tree->getNodeIndex(node))));
2534  }
2535 
2536  Newick* nt = dynamic_cast<Newick*>(treeWriter);
2537  if (nt)
2538  nt->enableExtendedBootstrapProperty("NodeId");
2539 
2540  treeWriter->writePhyloTree(*tree, file + "_" + TextTools::toString(vTN[i]), true);
2541  }
2542  if (verbose)
2543  ApplicationTools::displayResult("Wrote trees to files : ", file + "_...");
2544  }
2545 
2546  delete treeWriter;
2547 }
2548 
2550 {
2551  out << "model=";
2552  map<string, string> globalAliases;
2553  vector<string> writtenNames;
2554  BppOBranchModelFormat bIO(BppOSubstitutionModelFormat::ALL, true, true, true, false, warn);
2555  bIO.write(model, out, globalAliases, writtenNames);
2556  out.endLine();
2557 }
2558 
2560  const SubstitutionProcessInterface& process,
2561  OutputStream& out,
2562  int warn)
2563 {
2564  try
2565  {
2566  auto& sp = dynamic_cast<const SimpleSubstitutionProcess&>(process);
2567  (out << "nonhomogeneous=no").endLine();
2568 
2569  out << "model=";
2570  map<string, string> globalAliases;
2571  vector<string> writtenNames;
2572  BppOBranchModelFormat bIO(BppOSubstitutionModelFormat::ALL, true, true, true, false, warn);
2573  bIO.write(sp.model(0, 0), out, globalAliases, writtenNames);
2574  out.endLine();
2575  return;
2576  }
2577  catch (bad_cast& e)
2578  {}
2579 
2580  try
2581  {
2582  const RateAcrossSitesSubstitutionProcess& pRA = dynamic_cast<const RateAcrossSitesSubstitutionProcess&>(process);
2583 
2584  (out << "nonhomogeneous=no").endLine();
2585 
2586  out << "model=";
2587  map<string, string> globalAliases;
2588  vector<string> writtenNames;
2589  const BppOBranchModelFormat bIO(BppOSubstitutionModelFormat::ALL, true, true, true, false, warn);
2590  bIO.write(process.model(0, 0), out, globalAliases, writtenNames);
2591  out.endLine();
2592  out.endLine();
2593 
2594  // Rate distribution
2595 
2596  out << "rate_distribution=";
2597  const BppORateDistributionFormat bIOR(true);
2598  bIOR.writeDiscreteDistribution(*pRA.getRateDistribution(), out, globalAliases, writtenNames);
2599  out.endLine();
2600  return;
2601  }
2602  catch (bad_cast& e)
2603  {}
2604 
2605  try
2606  {
2607  const NonHomogeneousSubstitutionProcess& pNH = dynamic_cast<const NonHomogeneousSubstitutionProcess&>(process);
2608 
2609  (out << "nonhomogeneous=general").endLine();
2610  (out << "nonhomogeneous.number_of_models=" << pNH.getNumberOfModels()).endLine();
2611 
2612  vector<string> writtenNames;
2613 
2614  // Loop over all models:
2615  for (size_t i = 0; i < pNH.getNumberOfModels(); ++i)
2616  {
2617  const auto model = pNH.getModel(i);
2618 
2619  // First get the aliases for this model:
2620  map<string, string> aliases;
2621 
2622  ParameterList pl = model->getParameters();
2623 
2624  for (size_t np = 0; np < pl.size(); ++np)
2625  {
2626  string nfrom = pNH.getFrom(pl[np].getName() + "_" + TextTools::toString(i + 1));
2627  if (nfrom != "")
2628  aliases[pl[np].getName()] = nfrom;
2629  }
2630 
2631  // Now print it:
2632  writtenNames.clear();
2633  out.endLine() << "model" << (i + 1) << "=";
2634  BppOBranchModelFormat bIOsm(BppOSubstitutionModelFormat::ALL, true, true, true, false, warn);
2635  bIOsm.write(*model, out, aliases, writtenNames);
2636  out.endLine();
2637  vector<unsigned int> ids = pNH.getNodesWithModel(i);
2638  out << "model" << (i + 1) << ".nodes_id=" << ids[0];
2639  for (size_t j = 1; j < ids.size(); ++j)
2640  {
2641  out << "," << ids[j];
2642  }
2643  out.endLine();
2644  }
2645 
2646  // Root frequencies:
2647  out.endLine();
2648  if (pNH.getRootFrequencySet())
2649  {
2650  out << "nonhomogeneous.root_freq=";
2651 
2652  map<string, string> aliases;
2653 
2655 
2656  for (size_t np = 0; np < pl.size(); ++np)
2657  {
2658  string nfrom = pNH.getFrom(pl[np].getName());
2659  if (nfrom != "")
2660  aliases[pl[np].getName()] = nfrom;
2661  }
2662 
2664  bIO.writeFrequencySet(pNH.rootFrequencySet(), out, aliases, writtenNames);
2665  }
2666  else
2667  out << "nonhomogeneous.stationarity=true";
2668  out.endLine();
2669 
2670  // Rate distribution
2671 
2672  map<string, string> aliases;
2673  auto& pdd = pNH.rateDistribution();
2674 
2675  ParameterList pl = pdd.getParameters();
2676  for (size_t np = 0; np < pl.size(); ++np)
2677  {
2678  string nfrom = pNH.getFrom(pl[np].getName());
2679  if (nfrom != "")
2680  aliases[pl[np].getName()] = nfrom;
2681  }
2682  out.endLine();
2683  out << "rate_distribution=";
2684  const BppORateDistributionFormat bIO(true);
2685  bIO.writeDiscreteDistribution(pdd, out, aliases, writtenNames);
2686  out.endLine();
2687  return;
2688  }
2689  catch (bad_cast& e)
2690  {}
2691 
2692  return;
2693 }
2694 
2696 {
2697  vector<string> writtenNames;
2698 
2699  // The models
2700  vector<size_t> vModN = collection.getModelNumbers();
2701 
2702  for (auto modn : vModN)
2703  {
2704  const auto& model = *collection.getModel(modn);
2705 
2706  // First get the aliases for this model:
2707  map<string, string> aliases;
2708 
2709  if (withAlias)
2710  {
2711  ParameterList pl = model.getParameters();
2712 
2713  for (size_t np = 0; np < pl.size(); ++np)
2714  {
2715  string nfrom = collection.getFrom(pl[np].getName() + "_" + TextTools::toString(modn));
2716  if (nfrom != "")
2717  aliases[pl[np].getName()] = nfrom;
2718  }
2719  }
2720 
2721  // Now print it:
2722  writtenNames.clear();
2723  out << "model" << modn << "=";
2724  BppOBranchModelFormat bIOsm(BppOSubstitutionModelFormat::ALL, true, true, true, false, warn);
2725  bIOsm.write(model, out, aliases, writtenNames);
2726  out.endLine();
2727  out.endLine();
2728  }
2729 
2730  // Root frequencies:
2731  vector<size_t> rootFreqN = collection.getFrequenciesNumbers();
2732 
2733  for (size_t i = 0; i < rootFreqN.size(); ++i)
2734  {
2735  auto rootFreq = collection.getFrequencySet(rootFreqN[i]);
2736 
2737  // Now print it:
2738  writtenNames.clear();
2739  out.endLine() << "root_freq" << rootFreqN[i] << "=";
2741 
2742  map<string, string> aliases;
2743 
2744  if (withAlias)
2745  {
2746  ParameterList pl = rootFreq->getParameters();
2747 
2748  for (size_t np = 0; np < pl.size(); ++np)
2749  {
2750  string nfrom = collection.getFrom(pl[np].getName() + "_" + TextTools::toString(rootFreqN[i]));
2751  if (nfrom != "")
2752  aliases[pl[np].getName()] = nfrom;
2753  }
2754  }
2755 
2756  bIOf.writeFrequencySet(*rootFreq, out, aliases, writtenNames);
2757  out.endLine();
2758  }
2759 
2760  // Rate distribution
2761 
2762  vector<size_t> vDistN = collection.getRateDistributionNumbers();
2763 
2764  for (auto distn : vDistN)
2765  {
2766  if (distn < 10000)
2767  {
2768  auto dist = collection.getRateDistribution(distn);
2769 
2770  // First get the aliases for this model:
2771  map<string, string> aliases;
2772 
2773  if (withAlias)
2774  {
2775  ParameterList pl = dist->getParameters();
2776 
2777  for (size_t np = 0; np < pl.size(); ++np)
2778  {
2779  string nfrom = collection.getFrom(pl[np].getName() + "_" + TextTools::toString(distn));
2780  if (nfrom != "")
2781  aliases[pl[np].getName()] = nfrom;
2782  }
2783  }
2784 
2785  // Now print it:
2786  writtenNames.clear();
2787  out.endLine() << "rate_distribution" << distn << "=";
2788  BppORateDistributionFormat bIOd(true);
2789  bIOd.writeDiscreteDistribution(*dist, out, aliases, writtenNames);
2790  out.endLine();
2791  }
2792  }
2793 
2794  // scenarios
2795 
2796  vector<size_t> vSce = collection.getScenarioNumbers();
2797 
2798  if (vSce.size() > 0)
2799  out.endLine();
2800 
2801  vector<const ModelPath*> vMP;
2802 
2803  // first output the scenarios
2804  for (const auto& scennum : vSce)
2805  {
2806  const auto scen = collection.getModelScenario(scennum);
2807 
2808  out.endLine();
2809 
2810  out << "scenario" << scennum << "=";
2811 
2812  size_t nbMP = scen->getNumberOfModelPaths();
2813 
2814  for (size_t mpn = 0; mpn < nbMP; mpn++)
2815  {
2816  const auto& mp = scen->getModelPath(mpn);
2817 
2818  auto itmp = find(vMP.begin(), vMP.end(), mp.get());
2819  auto inmp = std::distance(vMP.begin(), itmp);
2820  if (itmp == vMP.end())
2821  vMP.push_back(mp.get());
2822 
2823  if (mpn != 0)
2824  out << "&";
2825  out << "path" << TextTools::toString(inmp + 1);
2826  }
2827  out.endLine();
2828  }
2829 
2830  // then the model path
2831  for (size_t inmp = 0; inmp < vMP.size(); inmp++)
2832  {
2833  out.endLine();
2834  out << "path" << inmp + 1 << "=";
2835 
2836  const ModelPath& mp = *vMP[inmp];
2837 
2838  auto vMod = mp.getModels();
2839 
2840  bool dem = true;
2841  for (const auto& mod:vMod)
2842  {
2843  // look for model number in collection
2844  size_t modN = collection.getModelIndex(mod);
2845 
2846  if (!dem)
2847  out << "&";
2848 
2849  out << "model" << modN;
2850  out << "[" << mp.getPathNode(mod).to_string() << "]";
2851  dem = false;
2852  }
2853  out.endLine();
2854  }
2855 
2856  // processes
2857  out.endLine();
2858 
2859  vector<size_t> vprocN = collection.getSubstitutionProcessNumbers();
2860 
2861  for (size_t i = 0; i < vprocN.size(); ++i)
2862  {
2863  const auto& spcm = collection.substitutionProcess(vprocN[i]);
2864 
2865  out << "process" << vprocN[i] << "=";
2866 
2867  if (spcm.getNumberOfModels() == 1)
2868  out << "Homogeneous(model=" << spcm.getModelNumbers()[0];
2869  else
2870  {
2871  out << "Nonhomogeneous(";
2872  vector<size_t> vMN = spcm.getModelNumbers();
2873  for (size_t j = 0; j < vMN.size(); ++j)
2874  {
2875  if (j != 0)
2876  out << ",";
2877 
2878  out << "model" << (j + 1) << "=" << vMN[j];
2879  out << ",";
2880 
2881  vector<unsigned int> ids = spcm.getNodesWithModel(vMN[j]);
2882  out << "model" << (j + 1) << ".nodes_id=(" << ids[0];
2883  for (size_t k = 1; k < ids.size(); ++k)
2884  {
2885  out << "," << ids[k];
2886  }
2887  out << ")";
2888  }
2889  }
2890 
2891  out << ", tree=" << spcm.getTreeNumber();
2892 
2893  out << ", rate=";
2894  size_t dN = spcm.getRateDistributionNumber();
2895 
2896  if (dN < 10000)
2897  out << dN;
2898  else
2899  out << size_t(dN / 10000 - 1) << "." << dN % 10000;
2900 
2901  if (spcm.getRootFrequencySet())
2902  out << ", root_freq=" << spcm.getRootFrequenciesNumber();
2903 
2904  if (spcm.getModelScenario())
2905  out << ", scenario=" << spcm.getModelScenarioNumber();
2906 
2907  out << ")";
2908  out.endLine();
2909  out.endLine();
2910  }
2911 }
2912 
2913 
2915 {
2916  out << "# Log likelihood = ";
2917 
2918  std::shared_ptr<const PhyloLikelihoodInterface> result = phylocont[0];
2919 
2920  if (!result)
2921  {
2922  out << "Nan";
2923  out.endLine();
2924  return;
2925  }
2926 
2927  out.setPrecision(20) << (-result->getValue());
2928  out.endLine();
2929  out.endLine();
2930 
2931 
2932  // First output result
2933  out << "result=";
2934 
2935  auto pop = dynamic_pointer_cast<const PhyloLikelihoodFormula>(result);
2936 
2937  vector<size_t> phyldep;
2938 
2939  if (!pop)
2940  {
2941  out << "phylo1";
2942  phyldep.push_back(1);
2943  }
2944  else
2945  {
2946  string popout = pop->output();
2947 
2948  out << popout;
2949 
2950  StringTokenizer st(popout, "phylo", true, true);
2951  st.nextToken();
2952 
2953 
2954  while (st.hasMoreToken())
2955  {
2956  string ex = st.nextToken();
2957  phyldep.push_back((size_t)(atoi(ex.c_str())));
2958  }
2959  }
2960 
2961  out.endLine();
2962  out.endLine();
2963 
2964  // Then the other phylolikelihoods
2965 
2966  while (phyldep.size() != 0)
2967  {
2968  size_t num = phyldep[0];
2969  std::shared_ptr<const PhyloLikelihoodInterface> phyloLike = phylocont[num];
2970 
2971  // remove phylolikelihoods with this number
2972  auto itf = find(phyldep.begin(), phyldep.end(), num);
2973  while (itf != phyldep.end())
2974  {
2975  phyldep.erase(itf);
2976  itf = find(itf, phyldep.end(), num);
2977  }
2978 
2979 
2980  // then output
2981 
2982  if (dynamic_pointer_cast<const SingleDataPhyloLikelihoodInterface>(phyloLike))
2983  printParameters(*dynamic_pointer_cast<const SingleDataPhyloLikelihoodInterface>(phyloLike), out, num, warn);
2984  else
2985  {
2986  out << "phylo" << num << "=";
2987 
2988  auto mDP = dynamic_pointer_cast<const PhyloLikelihoodSetInterface>(phyloLike);
2989  if (mDP)
2990  {
2991  if (dynamic_pointer_cast<const AlignedPhyloLikelihoodMixture>(phyloLike))
2992  {
2993  auto pM = dynamic_pointer_cast<const AlignedPhyloLikelihoodMixture>(phyloLike);
2994 
2995  out << "Mixture(probas=(" << VectorTools::paste(pM->getPhyloProbabilities(), ",");
2996 
2997  out << "),";
2998  }
2999 
3000  else if (dynamic_pointer_cast<const AlignedPhyloLikelihoodHmm>(phyloLike))
3001  {
3002  auto pM = dynamic_pointer_cast<const AlignedPhyloLikelihoodHmm>(phyloLike);
3003  out << "HMM(probas=";
3004 
3005  RowMatrix<double> tMt;
3006  copyEigenToBpp(pM->getHmmTransitionMatrix(), tMt);
3007  MatrixTools::print(tMt, out);
3008 
3009  out << ",";
3010  }
3011 
3012  else if (dynamic_pointer_cast<const AlignedPhyloLikelihoodAutoCorrelation>(phyloLike))
3013  {
3014  auto pM = dynamic_pointer_cast<const AlignedPhyloLikelihoodAutoCorrelation>(phyloLike);
3015 
3016  out << "AutoCorr(lambdas=(";
3017 
3018  Vdouble vP;
3019  for (unsigned int i = 0; i < pM->getHmmTransitionMatrix().cols(); ++i)
3020  {
3021  vP.push_back(pM->getHmmTransitionMatrix()(i, i));
3022  }
3023 
3024  out << VectorTools::paste(vP, ",");
3025 
3026  out << "),";
3027  }
3028  else if (dynamic_pointer_cast<const AlignedPhyloLikelihoodProduct>(phyloLike))
3029  {
3030  out << "Product(";
3031  }
3032  else
3033  throw Exception("PhylogeneticsApplicationTools::printParameters - unknown phylolikelihood type : phylo " + TextTools::toString(num));
3034 
3035 
3036  vector<size_t> vPN = mDP->getNumbersOfPhyloLikelihoods();
3037 
3038  for (size_t i = 0; i < vPN.size(); i++)
3039  {
3040  out << "phylo" << i + 1 << "=" << vPN[i];
3041  if (i != vPN.size() - 1)
3042  out << ",";
3043  }
3044 
3045  out << ")";
3046 
3047  // update phyldep
3048  for (size_t i = 0; i < vPN.size(); i++)
3049  {
3050  if (find(phyldep.begin(), phyldep.end(), vPN[i]) == phyldep.end())
3051  phyldep.push_back(vPN[i]);
3052  }
3053  }
3054  out.endLine();
3055  }
3056  out.endLine();
3057  }
3058  out.endLine();
3059 }
3060 
3061 
3063 {
3064  out << "phylo" << TextTools::toString(nPhylo) << "=";
3065 
3066  out << "Single(";
3067 
3068  try
3069  {
3070  auto& pMP = dynamic_cast<const SequencePhyloLikelihoodInterface&>(phyloLike);
3071  out << "process=" << pMP.getSequenceEvolutionNumber();
3072  goto finish;
3073  }
3074  catch (bad_cast& e)
3075  {}
3076 
3077  try
3078  {
3079  auto& pS = dynamic_cast<const SingleProcessPhyloLikelihood&>(phyloLike);
3080  out << "process=" << pS.getSubstitutionProcessNumber();
3081  }
3082  catch (bad_cast& e)
3083  {}
3084 
3085 finish:
3086  out << ", data=" << TextTools::toString(phyloLike.getNData()) << ")";
3087  out.endLine();
3088 }
3089 
3091 {
3092  out << "process" << TextTools::toString(nEvol) << "=";
3093 
3094  if (dynamic_cast<const OneProcessSequenceEvolution*>(&evol))
3095  {
3096  const OneProcessSequenceEvolution* pOP = dynamic_cast<const OneProcessSequenceEvolution*>(&evol);
3097 
3098  out << "Simple(process=" << pOP->getSubstitutionProcessNumber() << ")";
3099  }
3100  else if (dynamic_cast<const MultiProcessSequenceEvolution*>(&evol))
3101  {
3102  const MultiProcessSequenceEvolution* pMP = dynamic_cast<const MultiProcessSequenceEvolution*>(&evol);
3103 
3104  if (dynamic_cast<const MixtureSequenceEvolution*>(&evol))
3105  {
3106  const MixtureSequenceEvolution* pM = dynamic_cast<const MixtureSequenceEvolution*>(&evol);
3107 
3108  out << "Mixture(probas=(" << VectorTools::paste(pM->getSubProcessProbabilities(), ",");
3109  out << "),";
3110  }
3111 
3112  else if (dynamic_cast<const HmmSequenceEvolution*>(&evol))
3113  {
3114  const HmmSequenceEvolution* pM = dynamic_cast<const HmmSequenceEvolution*>(&evol);
3115  out << "HMM(probas=";
3116 
3117  const Matrix<double>& tMt = pM->hmmTransitionMatrix().getPij();
3118  MatrixTools::print(tMt, out);
3119 
3120  out << ",";
3121  }
3122  else if (dynamic_cast<const AutoCorrelationSequenceEvolution*>(&evol))
3123  {
3124  const AutoCorrelationSequenceEvolution* pM = dynamic_cast<const AutoCorrelationSequenceEvolution*>(&evol);
3125 
3126  out << "AutoCorr(lambdas=(";
3127 
3128  Vdouble vP;
3129  for (unsigned int i = 0; i < pM->getNumberOfSubstitutionProcess(); i++)
3130  {
3131  vP.push_back(pM->hmmTransitionMatrix().Pij(i, i));
3132  }
3133 
3134  out << VectorTools::paste(vP, ",");
3135 
3136  out << "),";
3137  }
3138  else if (dynamic_cast<const PartitionSequenceEvolution*>(&evol))
3139  {
3140  const PartitionSequenceEvolution* pM = dynamic_cast<const PartitionSequenceEvolution*>(&evol);
3141 
3142  out << "Partition(";
3143 
3144  const map<size_t, vector<size_t>>& mProcPos = pM->mapOfProcessSites();
3145 
3146  vector<size_t> vP = pMP->getSubstitutionProcessNumbers();
3147 
3148  for (unsigned int i = 0; i < vP.size(); i++)
3149  {
3150  out << "process" << TextTools::toString(i + 1) << ".sites=";
3151 
3152  vector<size_t> v = mProcPos.find(vP[i])->second + 1;
3153 
3154  if (v.size() > 1)
3155  out << "(";
3156 
3157  VectorTools::printRange(v, out, ",", ":");
3158 
3159  if (v.size() > 1)
3160  out << ")";
3161 
3162  out << ",";
3163  }
3164  }
3165 
3166  vector<size_t> vPN = pMP->getSubstitutionProcessNumbers();
3167 
3168  for (size_t i = 0; i < vPN.size(); i++)
3169  {
3170  out << "process" << i + 1 << "=" << vPN[i];
3171  if (i != vPN.size() - 1)
3172  out << ",";
3173  }
3174 
3175  out << ")";
3176  }
3177 
3178  out.endLine();
3179 }
3180 
3181 
3182 // ///////////////////////////////////////////////////////
3183 // Analysis Information
3184 // //////////////////////////////////////////////////////
3185 
3186 
3187 void PhylogeneticsApplicationTools::printAnalysisInformation(const PhyloLikelihoodContainer& phylocont, const string& infosFile, int warn)
3188 {
3189  std::shared_ptr<const PhyloLikelihoodInterface> result = phylocont[0];
3190 
3191  if (!result)
3192  return;
3193 
3194  vector<size_t> phyldep = phylocont.getNumbersOfPhyloLikelihoods();
3195 
3196  while (phyldep.size() != 0)
3197  {
3198  size_t num = phyldep[0];
3199 
3200  phyldep.erase(phyldep.begin());
3201 
3202  std::shared_ptr<const PhyloLikelihoodInterface> phyloLike = phylocont[num];
3203  // output
3204 
3205  string info_out = infosFile + "_" + TextTools::toString(num);
3206 
3207  if (dynamic_pointer_cast<const SingleDataPhyloLikelihoodInterface>(phyloLike) && num != 0)
3208  printAnalysisInformation(*dynamic_pointer_cast<const SingleDataPhyloLikelihoodInterface>(phyloLike), info_out, warn);
3209  else
3210  {
3211  auto sOAP = dynamic_pointer_cast<const AlignedPhyloLikelihoodSetInterface>(phyloLike);
3212  if (sOAP)
3213  {
3214  if (num != 0)
3215  printAnalysisInformation(*sOAP, info_out, warn);
3216 
3217  vector<size_t> vPN = sOAP->getNumbersOfPhyloLikelihoods();
3218 
3219  // update phyldep
3220  phyldep.assign(vPN.begin(), vPN.end());
3221  phyldep = VectorTools::unique(phyldep);
3222  }
3223  else
3224  {
3225  auto sOAB = dynamic_pointer_cast<const PhyloLikelihoodSetInterface>(phyloLike);
3226  if (sOAB)
3227  {
3228  vector<size_t> vPN = sOAB->getNumbersOfPhyloLikelihoods();
3229 
3230  // update phyldep
3231  phyldep.assign(vPN.begin(), vPN.end());
3232  phyldep = VectorTools::unique(phyldep);
3233  }
3234  }
3235  }
3236  }
3237 }
3238 
3239 
3241 {
3242  const AlignedPhyloLikelihoodMixture* mOAP = nullptr;
3243  const AlignedPhyloLikelihoodHmm* hOAP = nullptr;
3244  const AlignedPhyloLikelihoodAutoCorrelation* aCOAP = nullptr;
3245 
3246  vector<size_t> phyloNum = sOAP.getNumbersOfPhyloLikelihoods();
3247  size_t nbP = phyloNum.size();
3248 
3249  if (dynamic_cast<const AlignedPhyloLikelihoodProduct*>(&sOAP) == nullptr)
3250  {
3251  StlOutputStream out(make_unique<ofstream>(infosFile.c_str(), ios::out));
3252 
3253  if (dynamic_cast<const AlignedPhyloLikelihoodMixture*>(&sOAP) != nullptr)
3254  mOAP = dynamic_cast<const AlignedPhyloLikelihoodMixture*>(&sOAP);
3255  else if (dynamic_cast<const AlignedPhyloLikelihoodHmm*>(&sOAP) != nullptr)
3256  hOAP = dynamic_cast<const AlignedPhyloLikelihoodHmm*>(&sOAP);
3257  else if (dynamic_cast<const AlignedPhyloLikelihoodAutoCorrelation*>(&sOAP) != nullptr)
3258  aCOAP = dynamic_cast<const AlignedPhyloLikelihoodAutoCorrelation*>(&sOAP);
3259 
3260  vector<string> colNames;
3261  colNames.push_back("Sites");
3262  colNames.push_back("lnL");
3263 
3264  for (size_t i = 0; i < nbP; i++)
3265  {
3266  colNames.push_back("lnL_phylo_" + TextTools::toString(phyloNum[i]));
3267  }
3268  for (size_t i = 0; i < nbP; i++)
3269  {
3270  colNames.push_back("prob_phylo_" + TextTools::toString(phyloNum[i]));
3271  }
3272 
3273 
3274  vector<string> row(2 + (nbP > 1 ? 2 * nbP : 0));
3275  DataTable* infos = new DataTable(colNames);
3276 
3277  Vdouble vap(0);
3278  Vdouble vlog(nbP);
3279 
3280  if (mOAP)
3281  vap = mOAP->getPhyloProbabilities();
3282 
3283  size_t nSites = sOAP.getNumberOfSites();
3284 
3285  for (size_t i = 0; i < nSites; i++)
3286  {
3287  row[0] = (string("[" + TextTools::toString(i) + "]"));
3289 
3290  if (nbP > 1)
3291  {
3292  for (size_t j = 0; j < nbP; j++)
3293  {
3294  vlog[j] = sOAP.getLogLikelihoodForASiteForAPhyloLikelihood(i, phyloNum[j]);
3295  }
3296 
3297  for (size_t j = 0; j < nbP; j++)
3298  {
3299  row[2 + j] = TextTools::toString(vlog[j]);
3300  }
3301 
3302  if (mOAP)
3303  {
3304  double sum = VectorTools::sumExp(vlog, vap);
3305  for (size_t j = 0; j < nbP; j++)
3306  {
3307  row[2 + nbP + j] = TextTools::toString(exp(vlog[j]) * vap[j] / sum);
3308  }
3309  }
3310  else
3311  {
3312  if (hOAP)
3314  else if (aCOAP)
3316 
3317  for (size_t j = 0; j < vap.size(); j++)
3318  {
3319  row[2 + nbP + j] = TextTools::toString(vap[j]);
3320  }
3321  }
3322  }
3323 
3324  infos->addRow(row);
3325  }
3326 
3327  DataTable::write(*infos, out, "\t");
3328  delete infos;
3329  }
3330 }
3331 
3332 /******************************************************************************/
3333 
3335  const SingleDataPhyloLikelihoodInterface& phyloLike,
3336  const string& infosFile,
3337  int warn)
3338 {
3339  try
3340  {
3341  auto& pSPL = dynamic_cast<const SingleProcessPhyloLikelihood&>(phyloLike);
3342 
3343  StlOutputStream out(make_unique<ofstream>(infosFile.c_str(), ios::out));
3344 
3345  std::shared_ptr<const SubstitutionProcessInterface> pSP = pSPL.getSubstitutionProcess();
3346 
3347  vector<string> colNames;
3348  colNames.push_back("Sites");
3349  colNames.push_back("is.complete");
3350  colNames.push_back("is.constant");
3351  colNames.push_back("lnL");
3352 
3353  auto pDD = pSP->getRateDistribution();
3354  size_t nbR = 0;
3355 
3356  if (pDD)
3357  {
3358  nbR = pDD->getNumberOfCategories();
3359 
3360  if (nbR > 1)
3361  for (size_t i = 0; i < nbR; ++i)
3362  {
3363  colNames.push_back("Pr_rate=" + TextTools::toString(pDD->getCategory(i)));
3364  }
3365  }
3366  colNames.push_back("rc");
3367  colNames.push_back("pr");
3368 
3369  std::shared_ptr<const AlignmentDataInterface> sites = phyloLike.getData();
3370 
3371  vector<string> row(6 + (nbR > 1 ? nbR : 0));
3372  auto infos = make_unique<DataTable>(colNames);
3373 
3374  VVdouble vvPP(pSPL.getPosteriorProbabilitiesPerSitePerClass());
3375 
3376  for (size_t i = 0; i < sites->getNumberOfSites(); ++i)
3377  {
3378  double lnL = phyloLike.getLogLikelihoodForASite(i);
3379 
3380  const CoreSiteInterface& currentSite = sites->site(i);
3381  int currentSiteCoordinate = currentSite.getCoordinate();
3382  string isCompl = "NA";
3383  string isConst = "NA";
3384  try
3385  {
3386  isCompl = (SymbolListTools::isComplete(currentSite) ? "1" : "0");
3387  }
3388  catch (EmptySiteException& ex)
3389  {}
3390  try
3391  {
3392  isConst = (SymbolListTools::isConstant(currentSite) ? "1" : "0");
3393  }
3394  catch (EmptySiteException& ex)
3395  {}
3396  row[0] = (string("[" + TextTools::toString(currentSiteCoordinate) + "]"));
3397  row[1] = isCompl;
3398  row[2] = isConst;
3399  row[3] = TextTools::toString(lnL);
3400 
3401  if (nbR > 1)
3402  {
3403  double pr = 0;
3404  for (size_t j = 0; j < nbR; ++j)
3405  {
3406  row[4 + j] = TextTools::toString(vvPP[i][j]);
3407  pr += vvPP[i][j] * pDD->getCategory(j);
3408  }
3409 
3410  row[4 + nbR] = TextTools::toString(VectorTools::whichMax(vvPP[i]) + 1);
3411  row[5 + nbR] = TextTools::toString(pr);
3412  }
3413  else
3414  {
3415  row[4] = "1";
3416  row[5] = "1";
3417  }
3418 
3419  infos->addRow(row);
3420  }
3421 
3422  DataTable::write(*infos, out, "\t");
3423  return;
3424  }
3425  catch (bad_cast& e)
3426  {}
3427 
3428  try
3429  {
3430  auto& pPPL = dynamic_cast<const PartitionProcessPhyloLikelihood&>(phyloLike);
3431 
3432  auto& pSE = dynamic_cast<const PartitionSequenceEvolution&>(pPPL.sequenceEvolution());
3433 
3434  const map<size_t, vector<size_t>>& mProcPos = pSE.mapOfProcessSites();
3435 
3436  vector<size_t> nbProc = pSE.getSubstitutionProcessNumbers();
3437 
3438  map<size_t, size_t> mNbr;
3439 
3440  for (auto nP : nbProc)
3441  {
3442  auto& sp = pSE.substitutionProcess(nP);
3443  auto pDD = sp.getRateDistribution();
3444  mNbr[nP] = (pDD ? pDD->getNumberOfCategories() : 1);
3445  }
3446 
3447  size_t maxR = max_element(mNbr.begin(), mNbr.end(), [](const std::pair<size_t, size_t>& p1, const std::pair<size_t, size_t>& p2){
3448  return p1.second < p2.second;
3449  })->second;
3450 
3451  StlOutputStream out(make_unique<ofstream>(infosFile.c_str(), ios::out));
3452 
3453  vector<string> colNames;
3454  colNames.push_back("Sites");
3455  colNames.push_back("is.complete");
3456  colNames.push_back("is.constant");
3457  colNames.push_back("lnL");
3458 
3459  if (maxR > 1)
3460  for (size_t i = 0; i < maxR; i++)
3461  {
3462  colNames.push_back("prob" + TextTools::toString(i + 1));
3463  }
3464 
3465  size_t nbSites = pSE.getNumberOfSites();
3466 
3467  vector<string> row(4 + (maxR > 1 ? maxR : 0));
3468  auto infos = make_unique<DataTable>(nbSites, colNames);
3469  for (auto nP : nbProc)
3470  {
3471  auto pSPPL = dynamic_pointer_cast<const SingleProcessPhyloLikelihood>(pPPL.getPhyloLikelihood(nP));
3472 
3473  if (!pSPPL)
3474  throw Exception("PhylogeneticsApplicationTools::printAnalysisInformation : no SingleProcessPhyloLikelihood in PartitionProcessPhyloLikelihood.");
3475 
3476  size_t nbr = mNbr[pSPPL->getSubstitutionProcessNumber()];
3477 
3478  const vector<size_t>& mPos = mProcPos.at(nP);
3479 
3480  auto sites = pSPPL->getData();
3481 
3482  for (size_t i = 0; i < sites->getNumberOfSites(); ++i)
3483  {
3484  double lnL = pSPPL->getLogLikelihoodForASite(i);
3485 
3486  const CoreSiteInterface& currentSite = sites->site(i);
3487  int currentSiteCoordinate = currentSite.getCoordinate();
3488  string isCompl = "NA";
3489  string isConst = "NA";
3490  try
3491  {
3492  isCompl = (SymbolListTools::isComplete(currentSite) ? "1" : "0");
3493  }
3494  catch (EmptySiteException& ex)
3495  {}
3496  try
3497  {
3498  isConst = (SymbolListTools::isConstant(currentSite) ? "1" : "0");
3499  }
3500  catch (EmptySiteException& ex)
3501  {}
3502  row[0] = (string("[" + TextTools::toString(currentSiteCoordinate) + "]"));
3503  row[1] = isCompl;
3504  row[2] = isConst;
3505  row[3] = TextTools::toString(lnL);
3506 
3507  if (nbr > 1)
3508  {
3509  Vdouble vPP = pSPPL->getPosteriorProbabilitiesForSitePerClass(i);
3510 
3511  for (size_t j = 0; j < nbr; ++j)
3512  {
3513  row[4 + j] = TextTools::toString(vPP[j]);
3514  }
3515  }
3516 
3517  for (size_t j = nbr; j < maxR; j++)
3518  {
3519  row[4 + j] = "NA";
3520  }
3521 
3522  infos->setRow(mPos[i], row);
3523  }
3524  }
3525 
3526  DataTable::write(*infos, out, "\t");
3527  return;
3528  }
3529  catch (bad_cast& e)
3530  {}
3531 
3532  try
3533  {
3534  auto& pMPL = dynamic_cast<const MultiProcessSequencePhyloLikelihood&>(phyloLike);
3535 
3536  StlOutputStream out(make_unique<ofstream>(infosFile.c_str(), ios::out));
3537 
3538  vector<string> colNames;
3539  colNames.push_back("Sites");
3540  colNames.push_back("is.complete");
3541  colNames.push_back("is.constant");
3542  colNames.push_back("lnL");
3543 
3544  size_t nbP = pMPL.getNumberOfSubstitutionProcess();
3545 
3546  if (nbP > 1)
3547  {
3548  for (size_t i = 0; i < nbP; ++i)
3549  {
3550  colNames.push_back("lnL" + TextTools::toString(i + 1));
3551  }
3552  for (size_t i = 0; i < nbP; ++i)
3553  {
3554  colNames.push_back("prob" + TextTools::toString(i + 1));
3555  }
3556  }
3557 
3558  auto sites = phyloLike.getData();
3559 
3560  vector<string> row(4 + (nbP > 1 ? 2 * nbP : 0));
3561  DataTable* infos = new DataTable(colNames);
3562 
3563  VVdouble vvPP = pMPL.getPosteriorProbabilitiesPerSitePerProcess();
3564  VVdouble vvL = pMPL.getLikelihoodPerSitePerProcess();
3565 
3566  for (size_t i = 0; i < sites->getNumberOfSites(); ++i)
3567  {
3568  double lnL = phyloLike.getLogLikelihoodForASite(i);
3569  const CoreSiteInterface& currentSite = sites->site(i);
3570  int currentSiteCoordinate = currentSite.getCoordinate();
3571  string isCompl = "NA";
3572  string isConst = "NA";
3573  try
3574  {
3575  isCompl = (SymbolListTools::isComplete(currentSite) ? "1" : "0");
3576  }
3577  catch (EmptySiteException& ex)
3578  {}
3579  try
3580  {
3581  isConst = (SymbolListTools::isConstant(currentSite) ? "1" : "0");
3582  }
3583  catch (EmptySiteException& ex)
3584  {}
3585  row[0] = (string("[" + TextTools::toString(currentSiteCoordinate) + "]"));
3586  row[1] = isCompl;
3587  row[2] = isConst;
3588  row[3] = TextTools::toString(lnL);
3589 
3590  if (nbP > 1)
3591  {
3592  for (size_t j = 0; j < nbP; j++)
3593  {
3594  row[4 + j] = TextTools::toString(std::log(vvL[i][j]));
3595  }
3596  for (size_t j = 0; j < nbP; j++)
3597  {
3598  row[4 + nbP + j] = TextTools::toString(vvPP[i][j]);
3599  }
3600  }
3601  infos->addRow(row);
3602  }
3603 
3604  DataTable::write(*infos, out, "\t");
3605  delete infos;
3606  return;
3607  }
3608  catch (bad_cast& e)
3609  {}
3610 
3611  return;
3612 }
3613 
3614 
3615 /******************************************************************************/
3616 
3618 {
3619  out << "rate_distribution=";
3620  map<string, string> globalAliases;
3621  vector<string> writtenNames;
3623  bIO->writeDiscreteDistribution(rDist, out, globalAliases, writtenNames);
3624  delete bIO;
3625  out.endLine();
3626 }
3627 
3628 /************************
3629 * Substitution Mapping *
3630 ************************/
3631 unique_ptr<SubstitutionCountInterface> PhylogeneticsApplicationTools::getSubstitutionCount(
3632  std::shared_ptr<const Alphabet> alphabet,
3633  std::shared_ptr<const SubstitutionModelInterface> model,
3634  const map<string, string>& params,
3635  string suffix,
3636  bool verbose,
3637  int warn)
3638 {
3639  auto stateMap = model->getStateMap();
3640 
3641  unique_ptr<SubstitutionCountInterface> substitutionCount = nullptr;
3642  string nijtOption;
3643  map<string, string> nijtParams;
3644  string nijtText = ApplicationTools::getStringParameter("nijt", params, "Uniformization", suffix, true, warn);
3645  KeyvalTools::parseProcedure(nijtText, nijtOption, nijtParams);
3646 
3647  if (nijtOption == "Laplace")
3648  {
3649  size_t trunc = ApplicationTools::getParameter<size_t>("trunc", nijtParams, 10, suffix, true, warn + 1);
3650  substitutionCount = make_unique<LaplaceSubstitutionCount>(model, trunc);
3651  }
3652  else if (nijtOption == "Uniformization")
3653  {
3654  string weightOption = ApplicationTools::getStringParameter("weight", nijtParams, "None", "", true, warn + 1);
3655  shared_ptr<const AlphabetIndex2> weights(SequenceApplicationTools::getAlphabetIndex2(alphabet, weightOption, "Substitution weight scheme:"));
3656  string distanceOption = ApplicationTools::getStringParameter("distance", nijtParams, "None", "", true, warn + 1);
3657  shared_ptr<const AlphabetIndex2> distances(SequenceApplicationTools::getAlphabetIndex2(alphabet, distanceOption, "Substitution distances:"));
3658  substitutionCount = make_unique<UniformizationSubstitutionCount>(model, make_shared<TotalSubstitutionRegister>(stateMap), weights, distances);
3659  }
3660  else if (nijtOption == "Decomposition")
3661  {
3662  string weightOption = ApplicationTools::getStringParameter("weight", nijtParams, "None", "", true, warn + 1);
3663  shared_ptr<const AlphabetIndex2> weights(SequenceApplicationTools::getAlphabetIndex2(alphabet, weightOption, "Substitution weight scheme:"));
3664  string distanceOption = ApplicationTools::getStringParameter("distance", nijtParams, "None", "", true, warn + 1);
3665  shared_ptr<const AlphabetIndex2> distances(SequenceApplicationTools::getAlphabetIndex2(alphabet, distanceOption, "Substitution distances:"));
3666  auto revModel = dynamic_pointer_cast<const ReversibleSubstitutionModelInterface>(model);
3667  if (revModel)
3668  substitutionCount = make_unique<DecompositionSubstitutionCount>(revModel, make_shared<TotalSubstitutionRegister>(stateMap), weights, distances);
3669  else
3670  throw Exception("Decomposition method can only be used with reversible substitution models.");
3671  }
3672  else if (nijtOption == "Naive")
3673  {
3674  string weightOption = ApplicationTools::getStringParameter("weight", nijtParams, "None", "", true, warn + 1);
3675  std::shared_ptr<const AlphabetIndex2> weights(SequenceApplicationTools::getAlphabetIndex2(alphabet, weightOption, "Substitution weight scheme:"));
3676  string distanceOption = ApplicationTools::getStringParameter("distance", nijtParams, "", "", true, warn + 1);
3677  if (distanceOption != "")
3678  ApplicationTools::displayMessage("Naive substitution count: distances not handled");
3679 
3680  substitutionCount = make_unique<NaiveSubstitutionCount>(model, make_shared<TotalSubstitutionRegister>(stateMap), false, weights);
3681  }
3682  else if (nijtOption == "Label")
3683  {
3684  substitutionCount = make_unique<LabelSubstitutionCount>(model);
3685  }
3686  else if (nijtOption == "ProbOneJump")
3687  {
3688  substitutionCount = make_unique<OneJumpSubstitutionCount>(model);
3689  }
3690  else
3691  {
3692  ApplicationTools::displayError("Invalid option '" + nijtOption + ", in 'nijt' parameter.");
3693  exit(-1);
3694  }
3695  ApplicationTools::displayResult("Substitution count procedure", nijtOption);
3696 
3697  // Send results:
3698  return substitutionCount;
3699 }
3700 
3701 /****************************************************************************/
3702 
3703 
3704 unique_ptr<SubstitutionRegisterInterface> PhylogeneticsApplicationTools::getSubstitutionRegister(
3705  const string& regTypeDesc,
3706  std::shared_ptr<const StateMapInterface> stateMap,
3707  std::shared_ptr<const GeneticCode> genCode,
3708  std::shared_ptr<AlphabetIndex2>& weights,
3709  std::shared_ptr<AlphabetIndex2>& distances,
3710  bool verbose)
3711 {
3712  string regType = "";
3713  map<string, string> regArgs;
3714  KeyvalTools::parseProcedure(regTypeDesc, regType, regArgs);
3715 
3716  auto alphabet = stateMap->getAlphabet();
3717 
3718  unique_ptr<SubstitutionRegisterInterface> reg = nullptr;
3719  weights = nullptr;
3720  distances = nullptr;
3721 
3722  string weightOption = ApplicationTools::getStringParameter("weight", regArgs, "None", "", true, 1);
3723  string distanceOption = ApplicationTools::getStringParameter("distance", regArgs, "None", "", true, 1);
3724 
3725  if (AlphabetTools::isCodonAlphabet(*alphabet))
3726  {
3727  weights = SequenceApplicationTools::getAlphabetIndex2(dynamic_pointer_cast<const CodonAlphabet>(alphabet), genCode, weightOption, "Substitution weight scheme:");
3728  distances = SequenceApplicationTools::getAlphabetIndex2(dynamic_pointer_cast<const CodonAlphabet>(alphabet), genCode, distanceOption, "Substitution distances:");
3729  }
3730  else
3731  {
3732  weights = SequenceApplicationTools::getAlphabetIndex2(alphabet, weightOption, "Substitution weight scheme:");
3733  distances = SequenceApplicationTools::getAlphabetIndex2(alphabet, distanceOption, "Substitution distances:");
3734  }
3735 
3736  if (regType == "Combination")
3737  {
3738  shared_ptr<AlphabetIndex2> w2;
3739  shared_ptr<AlphabetIndex2> d2;
3740 
3741  auto vreg = make_unique<VectorOfSubstitutionRegisters>(stateMap);
3742 
3743  size_t i = 0;
3744  while (++i)
3745  {
3746  string regDesc = ApplicationTools::getStringParameter("reg" + TextTools::toString(i), regArgs, "", "", false, 1);
3747  if (regDesc == "")
3748  break;
3749 
3750  auto sreg = getSubstitutionRegister(regDesc, stateMap, genCode, w2, d2);
3751 
3752  vreg->addRegister(std::move(sreg));
3753  }
3754 
3755  if (vreg->getNumberOfSubstitutionTypes()==0)
3756  throw Exception("Missing registers reg1, reg2, ... in description of Combination");
3757 
3758  reg = std::move(vreg);
3759  }
3760  else if (regType == "All")
3761  {
3762  reg = make_unique<ComprehensiveSubstitutionRegister>(stateMap, false);
3763  }
3764  else if (regType == "Total")
3765  {
3766  reg = make_unique<TotalSubstitutionRegister>(stateMap);
3767  }
3768  else if (regType == "Selected")
3769  {
3770  string subsList = ApplicationTools::getStringParameter("substitution.list", regArgs, "All", "", true, false);
3771  reg = make_unique<SelectedSubstitutionRegister>(stateMap, subsList);
3772  }
3773 
3774 
3775  // Alphabet dependent registers
3776  else if (AlphabetTools::isNucleicAlphabet(*alphabet))
3777  {
3778  if (regType == "GC")
3779  reg = make_unique<GCSubstitutionRegister>(stateMap, false);
3780  else if (regType == "GCw")
3781  reg = make_unique<GCSubstitutionRegister>(stateMap, true);
3782  else if (regType == "TsTv")
3783  reg = make_unique<TsTvSubstitutionRegister>(stateMap);
3784  else if (regType == "SW")
3785  reg = make_unique<SWSubstitutionRegister>(stateMap);
3786  else
3787  throw Exception("PhylogeneticsApplicationTools::getSubstitutionRegister: unsupported substitution categorization:" + regType + " for alphabet " + alphabet->getAlphabetType());
3788  }
3789  else if (AlphabetTools::isCodonAlphabet(*alphabet))
3790  {
3791  if (regType == "IntraAA")
3792  reg = make_unique<AAInteriorSubstitutionRegister>(stateMap, genCode);
3793  else if (regType == "InterAA")
3794  reg = make_unique<AAExteriorSubstitutionRegister>(stateMap, genCode);
3795  else if (regType == "GC")
3796  reg = make_unique<GCSynonymousSubstitutionRegister>(stateMap, genCode);
3797  else if (regType == "GC1")
3798  reg = make_unique<GCPositionSubstitutionRegister>(stateMap, genCode, 0);
3799  else if (regType == "GC2")
3800  reg = make_unique<GCPositionSubstitutionRegister>(stateMap, genCode, 1);
3801  else if (regType == "GC3")
3802  reg = make_unique<GCPositionSubstitutionRegister>(stateMap, genCode, 2);
3803  else if (regType == "GCw")
3804  reg = make_unique<GCSynonymousSubstitutionRegister>(stateMap, genCode, true);
3805  else if (regType == "GC1w")
3806  reg = make_unique<GCPositionSubstitutionRegister>(stateMap, genCode, 0, true);
3807  else if (regType == "GC2w")
3808  reg = make_unique<GCPositionSubstitutionRegister>(stateMap, genCode, 1, true);
3809  else if (regType == "GC3w")
3810  reg = make_unique<GCPositionSubstitutionRegister>(stateMap, genCode, 2, true);
3811  else if (regType == "TsTv")
3812  reg = make_unique<TsTvSubstitutionRegister>(stateMap, genCode);
3813  else if (regType == "SW")
3814  reg = make_unique<SWSubstitutionRegister>(stateMap, genCode);
3815  else if (regType == "KrKc")
3816  reg = make_unique<KrKcSubstitutionRegister>(stateMap, genCode);
3817  else if (regType == "DnDs")
3818  reg = make_unique<DnDsSubstitutionRegister>(stateMap, genCode, false);
3819  else
3820  throw Exception("Unsupported substitution categorization: " + regType + " for alphabet " + alphabet->getAlphabetType());
3821  }
3822 
3823  else if (AlphabetTools::isProteicAlphabet(*alphabet))
3824  {
3825  if (regType == "KrKc")
3826  reg = make_unique<KrKcSubstitutionRegister>(stateMap);
3827  else
3828  throw Exception("Unsupported substitution categorization: " + regType + " for alphabet " + alphabet->getAlphabetType());
3829  }
3830 
3831  auto csr = dynamic_cast<CategorySubstitutionRegister*>(reg.get());
3832  if (csr)
3833  csr->setStationarity(ApplicationTools::getBooleanParameter("stationarity", regArgs, true));
3834 
3835  if (verbose)
3836  ApplicationTools::displayResult("Substitution Register", regType);
3837 
3838  return reg;
3839 }
std::shared_ptr< const FrequencySetInterface > getRootFrequencySet() const
std::string getFrom(const std::string &name) const
Likelihood framework based on a hmm of simple likelihoods.
Likelihood framework based on a hmm of simple likelihoods.
Vdouble getPosteriorProbabilitiesForASitePerAligned(size_t site) const
virtual double getLogLikelihoodForASite(size_t site) const =0
Get the log likelihood for a site, and its derivatives.
virtual size_t getNumberOfSites() const =0
Get the number of sites in the dataset.
Likelihood framework based on a mixture of aligned likelihoods.
Vdouble getPhyloProbabilities() const
Get the probabilities of the simplex.
The AlignedPhyloLikelihoodProduct class, for phylogenetic likelihood on several independent data.
Joint interface SetOf+Aligned PhylLikelihood.
virtual double getLogLikelihoodForASiteForAPhyloLikelihood(size_t site, size_t nPhyl) const =0
Get the log likelihood for a site for an aligned phyloLikelihood.
static bool isProteicAlphabet(const Alphabet &alphabet)
static bool isNucleicAlphabet(const Alphabet &alphabet)
static bool isCodonAlphabet(const Alphabet &alphabet)
static bool isWordAlphabet(const Alphabet &alphabet)
static void displayMessage(const std::string &text)
static std::vector< std::string > matchingParameters(const std::string &pattern, const std::map< std::string, std::string > &params)
static void displayError(const std::string &text)
static bool getBooleanParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, bool defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static std::string getStringParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, const std::string &defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayWarning(const std::string &text)
static double getDoubleParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, double defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static int getIntParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, int defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayResult(const std::string &text, const T &result)
static std::string getAFilePath(const std::string &parameter, const std::map< std::string, std::string > &params, bool isRequired=true, bool mustExist=true, const std::string &suffix="", bool suffixIsOptional=false, const std::string &defaultPath="none", int warn=0)
AssociationGraphObserver< N, E >::NodeIndex NodeIndex
virtual std::vector< EdgeIndex > getAllEdgesIndexes() const=0
static std::map< std::string, std::string > getAttributesMapFromFile(const std::string &file, const std::string &delimiter)
Sequence evolution framework based on an auto-correlation of substitution processes.
const AutoCorrelationTransitionMatrix & hmmTransitionMatrix() const
double Pij(size_t i, size_t j) const override
static std::string CONSTRAINTS_AUTO
Branch model I/O in BppO format.
std::unique_ptr< BranchModelInterface > readBranchModel(std::shared_ptr< const Alphabet > alphabet, const std::string &modelDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface >> &mData, size_t nData, bool parseArguments=true)
Frequencies set I/O in BppO format.
void writeFrequencySet(const FrequencySetInterface &freqset, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const override
Write a substitution model to a stream.
std::unique_ptr< FrequencySetInterface > readFrequencySet(std::shared_ptr< const Alphabet > alphabet, const std::string &freqDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface >> &mData, size_t nData, bool parseArguments=true) override
Read a frequencies set from a string.
void setGeneticCode(std::shared_ptr< const GeneticCode > gCode)
Set the genetic code to use in case a codon frequencies set should be built.
const std::map< std::string, std::string > & getUnparsedArguments() const override
IMultiTree * readIMultiTree(const std::string &description)
Read a IMultiTree object from a string.
Rate Distribution I/O in BppO format.
std::unique_ptr< DiscreteDistributionInterface > readDiscreteDistribution(const std::string &distDescription, bool parseArguments)
void writeDiscreteDistribution(const DiscreteDistributionInterface &dist, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const
Substitution model I/O in BppO format.
void write(const BranchModelInterface &model, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const override
Write a substitution model to a stream.
std::unique_ptr< SubstitutionModelInterface > readSubstitutionModel(std::shared_ptr< const Alphabet > alphabet, const std::string &modelDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface >> &mData, size_t nData, bool parseArguments=true) override
Read a substitution model from a string.
const std::map< std::string, std::string > & getUnparsedArguments() const override
void setGeneticCode(std::shared_ptr< const GeneticCode > gCode)
Set the genetic code to use in case a codon frequencies set should be built.
Tree I/O in BppO format.
std::unique_ptr< ITree > readITree(const std::string &description)
Read a ITree object from a string.
Interface for all Branch models.
The CategorySubstitutionRegisters.
Context for dataflow node construction.
Definition: DataFlow.h:527
virtual int getCoordinate() const=0
static void write(const DataTable &data, std::ostream &out, const std::string &sep="\t", bool alignHeaders=false)
void addRow(const std::vector< std::string > &newRow)
virtual double getCategory(size_t categoryIndex) const=0
virtual std::string getName() const=0
void setTransitionProbabilities(const Matrix< double > &mat)
const Matrix< double > & getPij() const
Sequence evolution framework based on a hmm.
const FullHmmTransitionMatrix & hmmTransitionMatrix() const
virtual void readPhyloTrees(const std::string &path, std::vector< std::unique_ptr< PhyloTree >> &trees) const =0
Read trees from a file.
static void parseProcedure(const std::string &desc, std::string &name, std::map< std::string, std::string > &args)
static void print(const Matrix &m, std::ostream &out=std::cout)
Sequence evolution framework based on a mixture of substitution processes.
const std::vector< double > & getSubProcessProbabilities() const
std::string to_string() const
Output.
Definition: ModelPath.cpp:257
Organization of submodels in mixed substitution models in a path. See class ModelScenario for a thoro...
Definition: ModelPath.h:22
const PathNode & getPathNode(std::shared_ptr< MixedTransitionModelInterface > mMod) const
gets the pathnode associated with a model
Definition: ModelPath.h:251
std::vector< std::shared_ptr< MixedTransitionModelInterface > > getModels() const
gets the MixedTransitionModel used in the ModelPath
Definition: ModelPath.cpp:132
Partial implementation of multiple processes of sequences.
size_t getNumberOfSubstitutionProcess() const
Return the number of process used for computation.
const std::vector< size_t > & getSubstitutionProcessNumbers() const
Partial implementation of the Likelihood interface for multiple processes.
The so-called 'newick' parenthetic format.
Definition: Newick.h:58
void enableExtendedBootstrapProperty(const std::string &propertyName)
Definition: Newick.h:86
a simple parser for reading trees from a Nexus file.
Definition: NexusIoTree.h:36
The so-called 'Nhx - New Hampshire eXtended' parenthetic format.
Definition: Nhx.h:62
Substitution process manager for non-homogeneous / non-reversible models of evolution.
const DiscreteDistributionInterface & rateDistribution() const override
Get the rate distribution.
const std::vector< unsigned int > getNodesWithModel(size_t i) const override
Get a list of nodes id for which the given model is associated.
std::shared_ptr< const BranchModelInterface > getModel(size_t n) const override
General interface for tree writers.
Definition: IoTree.h:419
virtual void writePhyloTrees(const std::vector< const PhyloTree * > &trees, const std::string &path, bool overwrite) const =0
Write trees to a file.
General interface for tree writers.
Definition: IoTree.h:157
virtual void writePhyloTree(const PhyloTree &tree, const std::string &path, bool overwrite) const =0
Write a tree to a file.
General interface for tree writers.
Definition: IoTree.h:125
virtual void writeTree(const Tree &tree, const std::string &path, bool overwrite) const =0
Write a tree to a file.
Evolution of a sequence performed by a unique SubstitutionProcess all along the sequence.
std::shared_ptr< OutputStream > profiler
std::shared_ptr< OutputStream > messenger
static std::string OPTIMIZATION_NEWTON
static unsigned int optimizeNumericalParameters2(std::shared_ptr< PhyloLikelihoodInterface > lik, const OptimizationOptions &optopt)
Optimize numerical parameters (branch length, substitution model & rate distribution) of a TreeLikeli...
static std::string OPTIMIZATION_BFGS
static unsigned int optimizeNumericalParameters(std::shared_ptr< PhyloLikelihoodInterface > lik, const OptimizationOptions &optopt)
Optimize numerical parameters (branch length, substitution model & rate distribution) of a TreeLikeli...
static std::string OPTIMIZATION_BRENT
virtual OutputStream & endLine()=0
virtual OutputStream & setPrecision(int digit)=0
size_t size() const
virtual std::vector< std::string > getParameterNames() const
virtual void addParameter(const Parameter &param)
virtual bool matchParametersValues(const ParameterList &params, std::vector< size_t > *updatedParameters=0)
virtual std::string getParameterNameWithoutNamespace(const std::string &name) const=0
virtual double getParameterValue(const std::string &name) const=0
virtual const ParameterList & getParameters() const=0
Sequence evolution framework based on a mixture of substitution processes.
std::map< size_t, std::vector< size_t > > & mapOfProcessSites()
The PhyloLikelihoodContainer, owns and assigns numbers to Phylolikelihoods.
std::vector< size_t > getNumbersOfPhyloLikelihoods() const
The PhyloLikelihoodFormula class, for phylogenetic likelihood on several independent data.
virtual const std::vector< size_t > & getNumbersOfPhyloLikelihoods() const =0
static void constrainedMidPointRooting(PhyloTree &tree)
Determine the mid-point position of the root along the branch that already contains the root....
static std::shared_ptr< PhyloTree > buildFromTreeTemplate(const TreeTemplate< Node > &treetemp)
static double getHeight(const PhyloTree &tree, const std::shared_ptr< PhyloNode > node)
Get the height of the subtree defined by node 'node', i.e. the maximum distance between leaves and th...
static void computeBranchLengthsGrafen(PhyloTree &tree, double power=1, bool init=true)
Compute branch lengths using Grafen's method.
static double convertToClockTree(PhyloTree &tree, std::shared_ptr< PhyloNode > node)
Modify a tree's branch lengths to make a clock tree, by rebalancing branch lengths.
static void printAnalysisInformation(const PhyloLikelihoodContainer &phylocont, const std::string &infosFile, int warn=1)
Output information on the computation to a file.
static std::unique_ptr< BranchModelInterface > getBranchModel(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, std::shared_ptr< const AlignmentDataInterface > data, const std::map< std::string, std::string > &params, std::map< std::string, std::string > &unparsedparams, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a BranchModel (most general class of branch models) object according to options.
static std::unique_ptr< FrequencySetInterface > getRootFrequencySet(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface >> &mData, size_t nData, const std::map< std::string, std::string > &params, std::map< std::string, std::string > &sharedparams, const std::vector< double > &rateFreqs, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Get A FrequencySet object for root frequencies (NH models) according to options.
static std::map< size_t, std::unique_ptr< SequenceEvolution > > getSequenceEvolutions(std::shared_ptr< SubstitutionProcessCollection > SPC, const std::map< std::string, std::string > &params, map< string, string > &unparsedParams, const string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a map of Sequence Evolution, ie ways how sequence evolve, which may use several processes.
static MultipleDiscreteDistribution * getMultipleDistributionDefaultInstance(const std::string &distDescription, std::map< std::string, std::string > &unparsedParameterValues, bool verbose=true)
Build a multi-dimension distribution as a MultipleDiscreteDistribution object with default parameter ...
static std::unique_ptr< FrequencySetInterface > getFrequencySet(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, const std::string &freqDescription, std::shared_ptr< const AlignmentDataInterface > data, std::map< std::string, std::string > &sharedParams, const std::vector< double > &rateFreqs, bool verbose=true, int warn=1)
Get A FrequencySet object according to options.
static void printParameters(const BranchModelInterface &model, OutputStream &out, int warn=1)
Output a SubstitutionModel description to a file.
static void checkEstimatedParameters(const ParameterList &pl)
Check if parameter values are close to their definition boundary.
static std::unique_ptr< AutonomousSubstitutionProcessInterface > getSubstitutionProcess(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, std::shared_ptr< const AlignmentDataInterface > data, const vector< std::shared_ptr< PhyloTree >> &vTree, const std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Sets a SubstitutionProcess object according to options.
static std::map< size_t, std::shared_ptr< PhyloTree > > getPhyloTrees(const std::map< std::string, std::string > &params, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface >> &mSeq, std::map< std::string, std::string > &unparsedParams, const std::string &prefix="input.", const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a map of <number,PhyloTree> according to options.
static std::unique_ptr< SubstitutionCountInterface > getSubstitutionCount(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const SubstitutionModelInterface > model, const std::map< std::string, std::string > &params, string suffix="", bool verbose=true, int warn=1)
Get a SubstitutionCount instance.
static std::shared_ptr< PhyloLikelihoodContainer > getPhyloLikelihoodContainer(Context &context, std::shared_ptr< SubstitutionProcessCollection > SPC, std::map< size_t, std::shared_ptr< SequenceEvolution >> &mSeqEvol, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface >> &mData, const std::map< std::string, std::string > &params, const string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a Phylogeny container from parameters map.
static std::vector< std::unique_ptr< Tree > > getTrees(const std::map< std::string, std::string > &params, const std::string &prefix="input.", const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a list of Tree objects according to options.
static std::map< size_t, std::unique_ptr< FrequencySetInterface > > getRootFrequencySets(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface >> &mData, const std::map< std::string, std::string > &params, std::map< std::string, std::string > &sharedparams, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
The same, but returns a map <number, shared_ptr<FrequencySetInterface>>.
static std::unique_ptr< DiscreteDistributionInterface > getRateDistribution(const std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true)
Build a DiscreteDistribution object according to options.
static std::unique_ptr< Tree > getTree(const std::map< std::string, std::string > &params, const std::string &prefix="input.", const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a Tree object according to options.
static void writePhyloTrees(const std::vector< const PhyloTree * > &trees, const std::map< std::string, std::string > &params, const std::string &prefix="output.", const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, bool checkOnly=false, int warn=1)
Write a tree according to options.
static std::shared_ptr< PhyloLikelihoodInterface > optimizeParameters(std::shared_ptr< PhyloLikelihoodInterface > lik, const std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Optimize parameters according to options.
static map< size_t, std::unique_ptr< ModelPath > > getModelPaths(const std::map< std::string, std::string > &params, const map< size_t, std::shared_ptr< BranchModelInterface >> &mModel, const string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build map of ModelPaths from a map of BranchModel.
static std::map< size_t, std::shared_ptr< DiscreteDistributionInterface > > getRateDistributions(const std::map< std::string, std::string > &params, const string &suffix="", bool suffixIsOptional=true, bool verbose=true)
static std::map< size_t, std::unique_ptr< BranchModelInterface > > getBranchModels(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface >> &mData, const std::map< std::string, std::string > &params, std::map< std::string, std::string > &unparsedparams, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
The same as before, but returns a map <number, branch model>.
static std::unique_ptr< SubstitutionProcessCollection > getSubstitutionProcessCollection(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, const map< size_t, std::shared_ptr< PhyloTree >> &mTree, const map< size_t, std::shared_ptr< BranchModelInterface >> &mMod, const map< size_t, std::shared_ptr< FrequencySetInterface >> &mRootFreq, const map< size_t, std::shared_ptr< DiscreteDistributionInterface >> &mDist, const map< size_t, std::shared_ptr< ModelScenario >> &mScen, const std::map< std::string, std::string > &params, map< string, string > &unparsedparams, const string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Builds a SubstitutionProcessCollection from many maps of relevant objects.
static void writeTree(const TreeTemplate< Node > &tree, const std::map< std::string, std::string > &params, const std::string &prefix="output.", const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, bool checkOnly=false, int warn=1)
Output methods:
static std::unique_ptr< SubstitutionModelInterface > getSubstitutionModel(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, std::shared_ptr< const AlignmentDataInterface > data, const std::map< std::string, std::string > &params, std::map< std::string, std::string > &unparsedparams, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a SubstitutionModel object according to options (ie a BranchModel with a generator).
static std::unique_ptr< SubstitutionRegisterInterface > getSubstitutionRegister(const std::string &regTypeDesc, std::shared_ptr< const StateMapInterface > stateMap, std::shared_ptr< const GeneticCode > genCode, std::shared_ptr< AlphabetIndex2 > &weights, std::shared_ptr< AlphabetIndex2 > &distances, bool verbose=true)
Get a Register instance.
static map< size_t, std::unique_ptr< ModelScenario > > getModelScenarios(const std::map< std::string, std::string > &params, const map< size_t, std::shared_ptr< ModelPath >> &mModelPath, const map< size_t, std::shared_ptr< BranchModelInterface >> &mModel, const string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build map of ModelScenarios from a map of ModelPaths.
static bool addSubstitutionProcessCollectionMember(SubstitutionProcessCollection &SubProColl, size_t procNum, const std::map< std::string, std::string > &params, bool verbose=true, int warn=1)
Adds a SubstitutionProcessCollectionMember to a Collection, under a given number.
std::shared_ptr< const DiscreteDistributionInterface > getRateDistribution() const override
Get a pointer to the rate distribution (or null if there is no rate distribution).
static std::unique_ptr< AlphabetIndex2 > getAlphabetIndex2(std::shared_ptr< const Alphabet > alphabet, const std::string &description, const std::string &message="Alphabet distance:", bool verbose=true)
This interface describes the evolution process of a sequence.
PhyloLikelihoods based on Sequence Evolution class, ie for which there is a (set of) processes able t...
Space and time homogeneous substitution process, without mixture.
The SingleDataPhyloLikelihood interface, for phylogenetic likelihood.
virtual std::shared_ptr< const AlignmentDataInterface > getData() const =0
Get the dataset for which the likelihood must be evaluated.
virtual size_t getNData() const =0
Get the number of dataset concerned.
Wraps a dataflow graph as a function: resultNode = f(variableNodes).
const std::string & nextToken()
bool hasMoreToken() const
std::vector< size_t > getModelNumbers() const override
std::shared_ptr< const DiscreteDistributionInterface > getRateDistribution() const override
Get a pointer to the rate distribution (or null if there is no rate distribution).
std::shared_ptr< const ModelScenario > getModelScenario() const override
Get the Model Scenario associated with this process, in case there are mixture models involved.
const std::vector< unsigned int > getNodesWithModel(size_t i) const override
Get a list of nodes id for which the given model is associated.
std::shared_ptr< const FrequencySetInterface > getRootFrequencySet() const override
std::shared_ptr< const BranchModelInterface > getModel(size_t n) const override
std::shared_ptr< const ParametrizablePhyloTree > getParametrizablePhyloTree() const override
Collection of Substitution Process, which owns all the necessary objects: Substitution models,...
std::vector< size_t > getRateDistributionNumbers() const
DiscreteDistributionInterface & rateDistribution(size_t distributionIndex)
std::vector< size_t > getSubstitutionProcessNumbers() const
std::vector< size_t > getModelNumbers() const
Get the numbers of the specified objects from the collections.
std::shared_ptr< const ModelScenario > getModelScenario(size_t numPath) const
Get a ModelScenario from the set.
std::shared_ptr< FrequencySetInterface > getFrequencySet(size_t frequenciesIndex)
size_t getModelIndex(std::shared_ptr< BranchModelInterface > model) const
Return the number of a BranchModel in the collection.
SubstitutionProcessCollectionMember & substitutionProcess(size_t i)
std::shared_ptr< DiscreteDistributionInterface > getRateDistribution(size_t distributionIndex)
Get a DiscreteDistribution from the collection.
std::vector< size_t > getScenarioNumbers() const
void addOnePerBranchSubstitutionProcess(size_t nProc, size_t nMod, size_t nTree, size_t nRate, size_t nFreq, const std::vector< std::string > &sharedParameterNames)
std::vector< size_t > getFrequenciesNumbers() const
bool hasModelScenario(size_t numPath) const
checks if the set has a ModelScenario
std::shared_ptr< BranchModelInterface > getModel(size_t modelIndex)
void addDistribution(std::shared_ptr< DiscreteDistributionInterface > distribution, size_t distributionIndex)
void addSubstitutionProcess(size_t nProc, std::map< size_t, std::vector< unsigned int >> mModBr, size_t nTree, size_t nRate, size_t nFreq)
std::shared_ptr< ParametrizablePhyloTree > getTree(size_t treeIndex)
ParametrizablePhyloTree & tree(size_t treeIndex)
Get a tree from the set.
This interface describes the substitution process along the tree and sites of the alignment.
virtual const BranchModelInterface & model(size_t i) const =0
static bool isConstant(const IntSymbolListInterface &site, bool ignoreUnknown=false, bool unresolvedRaisesException=true)
static bool isComplete(const IntSymbolListInterface &site)
static std::unique_ptr< TreeTemplate< Node > > getRandomTree(std::vector< std::string > &leavesNames, bool rooted=true)
Draw a random tree from a list of taxa, using a Yule process.
The phylogenetic tree class.
Definition: TreeTemplate.h:59
static T sumExp(const std::vector< T > &v1)
static std::string paste(const std::vector< T > &v, const std::string &delim=" ")
static std::vector< T > vectorIntersection(const std::vector< T > &vec1, const std::vector< T > &vec2)
static std::vector< T > unique(const std::vector< T > &v)
static void diff(std::vector< T > &v1, std::vector< T > &v2, std::vector< T > &v3)
static void printRange(const std::vector< T > &v1, OutputStream &out=*ApplicationTools::message, const std::string &delim=",", const std::string &rangeDelim="-")
static size_t whichMax(const std::vector< T > &v)
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
bool isDecimalInteger(const std::string &s, char scientificNotation='e')
std::string toString(T t)
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
ExtendedFloat exp(const ExtendedFloat &ef)
template void copyEigenToBpp(const ExtendedFloatMatrixXd &eigenMatrix, std::vector< std::vector< double >> &bppMatrix)
std::vector< Vdouble > VVdouble
std::vector< unsigned int > Vuint