bpp-phyl3 3.0.0
PhylogeneticsApplicationTools.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#include "../Io/BppOBranchModelFormat.h"
6#include "../Io/BppOFrequencySetFormat.h"
7#include "../Io/BppOMultiTreeReaderFormat.h"
8#include "../Io/BppOMultiTreeWriterFormat.h"
9#include "../Io/BppORateDistributionFormat.h"
10#include "../Io/BppOTreeReaderFormat.h"
11#include "../Io/BppOTreeWriterFormat.h"
12#include "../Io/Newick.h"
13#include "../Io/NexusIoTree.h"
14#include "../Io/Nhx.h"
15#include "../Likelihood/AutoCorrelationSequenceEvolution.h"
16#include "../Likelihood/HmmSequenceEvolution.h"
17#include "../Likelihood/MixtureSequenceEvolution.h"
18#include "../Likelihood/NonHomogeneousSubstitutionProcess.h"
19#include "../Likelihood/OneProcessSequenceEvolution.h"
20#include "../Likelihood/ParametrizablePhyloTree.h"
21#include "../Likelihood/PartitionSequenceEvolution.h"
22#include "../Likelihood/PhyloLikelihoods/AlignedPhyloLikelihoodAutoCorrelation.h"
23#include "../Likelihood/PhyloLikelihoods/AutoCorrelationProcessPhyloLikelihood.h"
24#include "../Likelihood/PhyloLikelihoods/PhyloLikelihoodFormula.h"
25#include "../Likelihood/PhyloLikelihoods/AlignedPhyloLikelihoodHmm.h"
26#include "../Likelihood/PhyloLikelihoods/HmmProcessPhyloLikelihood.h"
27#include "../Likelihood/PhyloLikelihoods/AlignedPhyloLikelihoodMixture.h"
28#include "../Likelihood/PhyloLikelihoods/MixtureProcessPhyloLikelihood.h"
29#include "../Likelihood/PhyloLikelihoods/OneProcessSequencePhyloLikelihood.h"
30#include "../Likelihood/PhyloLikelihoods/PartitionProcessPhyloLikelihood.h"
31#include "../Likelihood/PhyloLikelihoods/AlignedPhyloLikelihoodProduct.h"
32#include "../Likelihood/PhyloLikelihoods/SingleDataPhyloLikelihood.h"
33#include "../Likelihood/PhyloLikelihoods/SingleProcessPhyloLikelihood.h"
34#include "../Likelihood/RateAcrossSitesSubstitutionProcess.h"
35#include "../Likelihood/SimpleSubstitutionProcess.h"
36#include "../Likelihood/SubstitutionProcessCollection.h"
37#include "../Likelihood/SubstitutionProcessCollectionMember.h"
38#include "../Mapping/DecompositionSubstitutionCount.h"
39#include "../Mapping/LaplaceSubstitutionCount.h"
40#include "../Mapping/NaiveSubstitutionCount.h"
41#include "../Mapping/OneJumpSubstitutionCount.h"
42#include "../Mapping/UniformizationSubstitutionCount.h"
43#include "../Model/FrequencySet/MvaFrequencySet.h"
44#include "../Model/MixedTransitionModel.h"
45#include "../Model/Protein/Coala.h"
46#include "../Model/SubstitutionModel.h"
47#include "../Model/WrappedModel.h"
48#include "../Model/RateDistribution/ConstantRateDistribution.h"
49#include "../OptimizationTools.h"
50#include "../Tree/PhyloTree.h"
51#include "../Tree/PhyloTreeTools.h"
53
54// From bpp-core
55#include <Bpp/BppString.h>
58#include <Bpp/Io/FileTools.h>
59#include <Bpp/Text/TextTools.h>
72
73// From bpp-seq:
78
79using namespace bpp;
80
81// From the STL:
82#include <fstream>
83#include <memory>
84#include <set>
85#include <vector>
86#include <algorithm>
87
88using namespace std;
89
90/*************************************************************/
91/***************** TREES ************************************/
92/*************************************************************/
93
94
95/******************************************************************************/
96
97unique_ptr<Tree> PhylogeneticsApplicationTools::getTree(
98 const map<string, string>& params,
99 const string& prefix,
100 const string& suffix,
101 bool suffixIsOptional,
102 bool verbose,
103 int warn)
104{
105 string format = ApplicationTools::getStringParameter(prefix + "tree.format", params, "Newick", suffix, suffixIsOptional, warn);
106 string treeFilePath = ApplicationTools::getAFilePath(prefix + "tree.file", params, true, true, suffix, suffixIsOptional, "none", warn);
107
108 BppOTreeReaderFormat bppoReader(warn);
109 auto iTree = bppoReader.readITree(format);
110 if (verbose)
111 {
112 ApplicationTools::displayResult("Input tree file " + suffix, treeFilePath);
113 ApplicationTools::displayResult("Input tree format " + suffix, iTree->getFormatName());
114 }
115 return iTree->readTree(treeFilePath);
116}
117
118/******************************************************************************/
119
121 const map<string, string>& params,
122 const string& prefix,
123 const string& suffix,
124 bool suffixIsOptional,
125 bool verbose,
126 int warn)
127{
128 string format = ApplicationTools::getStringParameter(prefix + "tree.format", params, "Newick", suffix, suffixIsOptional, warn);
129 string treeFilePath = ApplicationTools::getAFilePath(prefix + "tree.file", params, true, true, suffix, suffixIsOptional, "none", warn);
130
131 BppOMultiTreeReaderFormat bppoReader(warn);
132 auto iTrees = bppoReader.readIMultiTree(format);
133 if (verbose)
134 {
135 ApplicationTools::displayResult("Input trees file " + suffix, treeFilePath);
136 ApplicationTools::displayResult("Input trees format " + suffix, iTrees->getFormatName());
137 }
138 vector<unique_ptr<Tree>> trees;
139 iTrees->readTrees(treeFilePath, trees);
140
141 if (verbose)
142 {
143 ApplicationTools::displayResult("Number of trees in file", trees.size());
144 }
145 return trees;
146}
147
148/******************************************************************************/
149
150map<size_t, std::shared_ptr<PhyloTree>> PhylogeneticsApplicationTools::getPhyloTrees(
151 const map<string, string>& params,
152 const map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mSeq,
153 map<string, string>& unparsedParams,
154 const string& prefix,
155 const string& suffix,
156 bool suffixIsOptional,
157 bool verbose,
158 int warn)
159{
160 vector<string> vTreesName = ApplicationTools::matchingParameters(prefix + "tree*", params);
161
162 map<size_t, shared_ptr<PhyloTree>> mTree;
163
164 for (size_t nT = 0; nT < vTreesName.size(); nT++)
165 {
166 size_t poseq = vTreesName[nT].find("=");
167 size_t num = 0;
168 size_t len = (prefix + "tree").size();
169
170 string suff = vTreesName[nT].substr(len, poseq - len);
171 bool flag = 0;
172 size_t nbTree = 1;
173
174 if (TextTools::isDecimalInteger(suff, '$'))
175 num = static_cast<size_t>(TextTools::toInt(suff));
176 else
177 {
178 flag = 1;
179 num = 1;
180 }
181
182 if (!flag)
183 {
186 }
187
188 string treeDesc = ApplicationTools::getStringParameter(vTreesName[nT], params, "", suffix, suffixIsOptional);
189
190 string treeName;
191
192 map<string, string> args;
193
194 KeyvalTools::parseProcedure(treeDesc, treeName, args);
195
196 if (treeName == "user")
197 {
198 string format;
199
200 if (args.find("format") != args.end())
201 format = args["format"];
202 else
203 {
204 format = "Newick";
205 ApplicationTools::displayWarning("Warning, " + vTreesName[nT] + " format set to Newick");
206 }
207
208 string treeFilePath = ApplicationTools::getAFilePath("file", args, true, true, suffix, suffixIsOptional, "none", warn);
209
210 IMultiPhyloTree* treeReader;
211 if (format == "Newick")
212 treeReader = new Newick(true);
213 else if (format == "Nexus")
214 treeReader = new NexusIOTree();
215 else if (format == "NHX")
216 treeReader = new Nhx();
217 else
218 throw Exception("Unknown format for tree reading: " + format);
219
220 vector<unique_ptr<PhyloTree>> trees;
221 treeReader->readPhyloTrees(treeFilePath, trees);
222
223 if (args.find("data") != args.end())
224 {
225 auto seqNum = (size_t) TextTools::toInt(args["data"]);
226 if (mSeq.find(seqNum) == mSeq.end())
227 throw Exception("Error : Wrong number of data " + TextTools::toString(seqNum));
228 else
229 {
230 ApplicationTools::displayMessage("Tree leaves pruned to fit data " + TextTools::toString(seqNum));
231 vector<string> names = mSeq.find(seqNum)->second->getSequenceNames();
232
233 for (auto& tree:trees)
234 {
235 auto nb1 = tree->getNumberOfLeaves();
236 tree->pruneTree(names);
237 auto nb2 = tree->getNumberOfLeaves();
238 if (nb1 != nb2)
239 {
240 ApplicationTools::displayResult("Number of removed leaves", nb1 - nb2);
241 }
242 }
243 }
244 }
245
246 if (verbose)
247 {
248 if (flag)
249 {
251 ApplicationTools::displayResult("Tree file", treeFilePath);
252 }
253
254 ApplicationTools::displayResult("Number of trees in file", trees.size());
255 }
256
257 if (flag)
258 {
259 nbTree = trees.size();
260
261 for (size_t i2 = 0; i2 < trees.size(); i2++)
262 {
263 if (mTree.find(i2 + 1) != mTree.end())
264 {
265 ApplicationTools::displayWarning("Tree " + TextTools::toString(i2 + 1) + " already assigned, replaced by new one.");
266 mTree.erase(i2 + 1);
267 }
268
269 mTree[i2 + 1] = std::move(trees[i2]);
270 ApplicationTools::displayResult("Number of leaves", mTree[i2 + 1]->getNumberOfLeaves());
271 }
272 }
273 else
274 {
275 if (trees.size() > 1)
276 throw Exception("Error : Several trees for description of " + vTreesName[nT] + ".");
277
278 if (trees.size() == 1)
279 {
280 if (mTree.find(num) != mTree.end())
281 {
282 ApplicationTools::displayWarning("Tree " + TextTools::toString(num) + " already assigned, replaced by new one.");
283 mTree.erase(num);
284 }
285 mTree[num] = std::move(trees[0]);
286 ApplicationTools::displayResult("Number of leaves", mTree[num]->getNumberOfLeaves());
287 }
288 }
289 }
290 else if (treeName == "random")
291 {
292 size_t seqNum;
293
294 if (args.find("data") == args.end())
295 {
296 ApplicationTools::displayWarning("Random tree set from data 1");
297 seqNum = 1;
298 }
299 else
300 seqNum = (size_t) TextTools::toInt(args["data"]);
301
302
303 if (mSeq.find(seqNum) == mSeq.end())
304 throw Exception("Error : Wrong number of data " + TextTools::toString(seqNum));
305
306 vector<string> names = mSeq.find(seqNum)->second->getSequenceNames();
307
308 // Not optimal process: make random PhlyoTree directly
309 auto treetemp = TreeTemplateTools::getRandomTree(names);
310 treetemp->setBranchLengths(1.);
311
312 auto tree = PhyloTreeTools::buildFromTreeTemplate(*treetemp);
313
314 if (mTree.find(num) != mTree.end())
315 {
316 ApplicationTools::displayWarning("Tree " + TextTools::toString(num) + " already assigned, replaced by new one.");
317 mTree.erase(num);
318 }
319 mTree[num] = tree;
320 ApplicationTools::displayResult("Number of leaves", tree->getNumberOfLeaves());
321 }
322
323
324 // //////////
325 // Setting branch lengths?
326 string initBrLenMethod = ApplicationTools::getStringParameter("init.brlen.method", args, "Input", "", true, 1);
327 string cmdName;
328 map<string, string> cmdArgs;
329
330 KeyvalTools::parseProcedure(initBrLenMethod, cmdName, cmdArgs);
331
332 ApplicationTools::displayResult("Branch lengths", cmdName);
333
334 if (cmdName == "Input")
335 {
336 // Is the root has to be moved to the midpoint position along the branch that contains it ? If no, do nothing!
337 string midPointRootBrLengths = ApplicationTools::getStringParameter("midPointRootBrLengths", cmdArgs, "no", "", true, 2);
338 if (midPointRootBrLengths == "yes")
339 {
340 ApplicationTools::displayResult(" Mid Point Rooting", midPointRootBrLengths);
341 if (flag)
342 {
343 for (size_t i = 0; i < nbTree; i++)
344 {
346 }
347 }
348 else
350 }
351 }
352 else if (cmdName == "Equal")
353 {
354 double value = ApplicationTools::getDoubleParameter("value", cmdArgs, 0.1, "", true, 2);
355 if (value <= 0)
356 throw Exception("Value for branch length must be superior to 0");
357 ApplicationTools::displayResult("Branch lengths set to", value);
358 if (flag)
359 {
360 for (size_t i = 0; i < nbTree; i++)
361 {
362 mTree[i + 1]->setBranchLengths(value);
363 }
364 }
365 else
366 mTree[num]->setBranchLengths(value);
367 }
368 else if (cmdName == "Clock")
369 {
370 if (flag)
371 {
372 for (size_t i = 0; i < nbTree; i++)
373 {
374 PhyloTreeTools::convertToClockTree(*mTree[i + 1], mTree[i + 1]->getRoot());
375 }
376 }
377 else
378 PhyloTreeTools::convertToClockTree(*mTree[num], mTree[num]->getRoot());
379 }
380 else if (cmdName == "Grafen")
381 {
382 string grafenHeight = ApplicationTools::getStringParameter("height", cmdArgs, "input", "", true, 2);
383 double h;
384 if (flag)
385 {
386 for (size_t i = 0; i < nbTree; i++)
387 {
388 auto tree = mTree[i + 1];
389 if (grafenHeight == "input")
390 {
391 h = PhyloTreeTools::getHeight(*tree, tree->getRoot());
392 }
393 else
394 {
395 h = TextTools::toDouble(grafenHeight);
396 if (h <= 0)
397 throw Exception("Height must be positive in Grafen's method.");
398 }
400
401 double rho = ApplicationTools::getDoubleParameter("rho", cmdArgs, 1., "", true, 2);
402 ApplicationTools::displayResult("Grafen's rho", rho);
404
405 double nh = PhyloTreeTools::getHeight(*tree, tree->getRoot());
406 tree->scaleTree(h / nh);
407 }
408 }
409 else
410 {
411 auto tree = mTree[num];
412 if (grafenHeight == "input")
413 h = PhyloTreeTools::getHeight(*tree, tree->getRoot());
414 else
415 {
416 h = TextTools::toDouble(grafenHeight);
417 if (h <= 0)
418 throw Exception("Height must be positive in Grafen's method.");
419 }
421
422 double rho = ApplicationTools::getDoubleParameter("rho", cmdArgs, 1., "", true, 2);
423 ApplicationTools::displayResult("Grafen's rho", rho);
424
426 double nh = PhyloTreeTools::getHeight(*tree, tree->getRoot());
427
428 tree->scaleTree(h / nh);
429 }
430 }
431 else
432 throw Exception("Method '" + initBrLenMethod + "' unknown for computing branch lengths.");
433
434 // //////////// Setting branch lengths with aliases
435
436 vector<string> vBrNb = ApplicationTools::matchingParameters("BrLen*", args);
437
438 for (size_t ib = 0; ib < vBrNb.size(); ib++)
439 {
440 string apeq = args[vBrNb[ib]];
441 string aveq = vBrNb[ib];
442
444 {
445 shared_ptr<PhyloBranch> branch = mTree[num]->getEdgeToFather(mTree[num]->getNode(static_cast<PhyloTree::NodeIndex>(TextTools::toInt(aveq.substr(5, string::npos)))));
446 if (branch)
447 branch->setLength(TextTools::toDouble(apeq));
448 }
449 else
450 {
451 size_t posun = apeq.find("_");
452 size_t posd = aveq.find("_");
453 unparsedParams[aveq + (posd != string::npos ? "" : "_" + TextTools::toString(num))] = apeq + (posun != string::npos ? "" : "_" + TextTools::toString(num));
454 }
455 }
456 }
457
458 return mTree;
459}
460
461/******************************************************/
462/**** SUBSTITUTION RATES *******************************/
463/******************************************************/
464
465std::unique_ptr<SubstitutionModelInterface> PhylogeneticsApplicationTools::getSubstitutionModel(
466 std::shared_ptr<const Alphabet> alphabet,
467 std::shared_ptr<const GeneticCode> gCode,
468 std::shared_ptr<const AlignmentDataInterface> data,
469 const map<string, string>& params,
470 map<string, string>& unparsedParams,
471 const string& suffix,
472 bool suffixIsOptional,
473 bool verbose,
474 int warn)
475{
476 BppOSubstitutionModelFormat bIO(BppOSubstitutionModelFormat::ALL, true, true, true, verbose, warn + 1);
477 string modelDescription;
478 auto ca = dynamic_pointer_cast<const CodonAlphabet>(alphabet);
479 if (ca)
480 {
481 modelDescription = ApplicationTools::getStringParameter("model", params, "CodonRate(model=JC69)", suffix, suffixIsOptional, warn);
482 if (!gCode)
483 throw Exception("PhylogeneticsApplicationTools::getSubstitutionModel(): a GeneticCode instance is required for instantiating a codon model.");
484 bIO.setGeneticCode(gCode);
485 }
486 else if (AlphabetTools::isWordAlphabet(*alphabet))
487 modelDescription = ApplicationTools::getStringParameter("model", params, "Word(model=JC69)", suffix, suffixIsOptional, warn);
488 else
489 modelDescription = ApplicationTools::getStringParameter("model", params, "JC69", suffix, suffixIsOptional, warn);
490
491 std::map<size_t, std::shared_ptr<const AlignmentDataInterface>> mData;
492 mData[1] = data;
493
494 auto model = bIO.readSubstitutionModel(alphabet, modelDescription, mData, 1, true);
495
496 unparsedParams.insert(bIO.getUnparsedArguments().begin(), bIO.getUnparsedArguments().end());
497
498 return model;
499}
500
501std::unique_ptr<BranchModelInterface> PhylogeneticsApplicationTools::getBranchModel(
502 std::shared_ptr<const Alphabet> alphabet,
503 std::shared_ptr<const GeneticCode> gCode,
504 std::shared_ptr<const AlignmentDataInterface> data,
505 const map<string, string>& params,
506 map<string, string>& unparsedParams,
507 const string& suffix,
508 bool suffixIsOptional,
509 bool verbose,
510 int warn)
511{
512 BppOBranchModelFormat bIO(BppOSubstitutionModelFormat::ALL, true, true, true, verbose, warn + 1);
513 string modelDescription;
514 auto ca = dynamic_pointer_cast<const CodonAlphabet>(alphabet);
515 if (ca)
516 {
517 modelDescription = ApplicationTools::getStringParameter("model", params, "CodonRate(model=JC69)", suffix, suffixIsOptional, warn);
518 if (!gCode)
519 throw Exception("PhylogeneticsApplicationTools::getBranchModel(): a GeneticCode instance is required for instantiating a codon model.");
520 bIO.setGeneticCode(gCode);
521 }
522 else if (AlphabetTools::isWordAlphabet(*alphabet))
523 modelDescription = ApplicationTools::getStringParameter("model", params, "Word(model=JC69)", suffix, suffixIsOptional, warn);
524 else
525 modelDescription = ApplicationTools::getStringParameter("model", params, "JC69", suffix, suffixIsOptional, warn);
526
527 std::map<size_t, std::shared_ptr<const AlignmentDataInterface>> mData;
528 mData[1] = data;
529
530 auto model = bIO.readBranchModel(alphabet, modelDescription, mData, 1, true);
531 map<string, string> tmpUnparsedParameterValues(bIO.getUnparsedArguments());
532
533 unparsedParams.insert(tmpUnparsedParameterValues.begin(), tmpUnparsedParameterValues.end());
534
535 return model;
536}
537
538/******************************************************************************/
539
540map<size_t, std::shared_ptr<DiscreteDistributionInterface>> PhylogeneticsApplicationTools::getRateDistributions(
541 const map<string, string>& params,
542 const string& suffix,
543 bool suffixIsOptional,
544 bool verbose)
545{
546 string DistFilePath = ApplicationTools::getAFilePath("rate_distribution.file", params, false, false, suffix, suffixIsOptional, "none", 1);
547
548 map<string, string> paramDist;
549
550 if (DistFilePath != "none")
551 paramDist = AttributesTools::getAttributesMapFromFile(DistFilePath, "=");
552
553 paramDist.insert(params.begin(), params.end());
554
555 vector<string> vratesName = ApplicationTools::matchingParameters("rate_distribution*", paramDist);
556
558 map<size_t, std::shared_ptr<DiscreteDistributionInterface>> mDist;
559
560
561 for (size_t i = 0; i < vratesName.size(); ++i)
562 {
563 size_t poseq = vratesName[i].find("=");
564 size_t num = 0;
565 string suff = vratesName[i].substr(17, poseq - 17);
566 bool flag = 0;
567
568
569 if (TextTools::isDecimalInteger(suff, '$'))
570 num = static_cast<size_t>(TextTools::toInt(suff));
571 else
572 {
573 flag = 1;
574 num = 0;
575 }
576
577 if (verbose)
578 if (!flag)
579 {
582 }
583
584 string distDescription = ApplicationTools::getStringParameter(vratesName[i], paramDist, "", suffix, suffixIsOptional);
585
586 if (num != 0)
587 mDist[num] = std::shared_ptr<DiscreteDistributionInterface>(bIO.readDiscreteDistribution(distDescription, true));
588 }
589
590 if (mDist.size() == 0)
591 {
594 string distDescription = ApplicationTools::getStringParameter("rate_distribution", paramDist, "Constant()", suffix, suffixIsOptional);
595 mDist[1] = std::shared_ptr<DiscreteDistributionInterface>(bIO.readDiscreteDistribution(distDescription, true));
596 }
597
598 return mDist;
599}
600
601
602/*************************************************************/
603/******* MODELS **********************************************/
604/*************************************************************/
605
606map<size_t, std::unique_ptr<BranchModelInterface>> PhylogeneticsApplicationTools::getBranchModels(
607 std::shared_ptr<const Alphabet> alphabet,
608 std::shared_ptr<const GeneticCode> gCode,
609 const map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
610 const map<string, string>& params,
611 map<string, string>& unparsedParams,
612 const string& suffix,
613 bool suffixIsOptional,
614 bool verbose,
615 int warn)
616{
617 if (dynamic_pointer_cast<const CodonAlphabet>(alphabet) && !gCode)
618 throw Exception("PhylogeneticsApplicationTools::getBranchModels(): a GeneticCode instance is required for instantiating codon models.");
619
620 string ModelFilePath = ApplicationTools::getAFilePath("models.file", params, false, false, suffix, suffixIsOptional, "none", 1);
621
622 map<string, string> paramModel;
623
624 if (ModelFilePath != "none")
625 paramModel = AttributesTools::getAttributesMapFromFile(ModelFilePath, "=");
626
627 paramModel.insert(params.begin(), params.end());
628
629 vector<string> modelsName = ApplicationTools::matchingParameters("model*", paramModel);
630
631 vector<size_t> modelsNum;
632 for (const auto& name : modelsName)
633 {
634 size_t poseq = name.find("=");
635 if (name.find("nodes_id") == string::npos)
636 {
637 modelsNum.push_back(TextTools::to<size_t>(name.substr(5, poseq - 5)));
638 }
639 }
640
641 map<size_t, std::unique_ptr<BranchModelInterface>> mModel;
642
643 BppOBranchModelFormat bIO(BppOSubstitutionModelFormat::ALL, true, true, true, verbose, warn);
644 bIO.setGeneticCode(gCode);
645
646 for (size_t i = 0; i < modelsNum.size(); ++i)
647 {
648 if (i >= 10)
649 {
650 bIO.setVerbose(false);
651 warn = 10;
652 if (i == 10)
654 ApplicationTools::displayResult("Model " + TextTools::toString(modelsNum[i]), string("..."));
655 }
656 else
657 {
660 }
661
662 string modelDescription = ApplicationTools::getStringParameter("model" + TextTools::toString(modelsNum[i]), paramModel, "", suffix, suffixIsOptional, warn);
663
664 unique_ptr<BranchModelInterface> model;
665 model = bIO.readBranchModel(alphabet, modelDescription, mData, 0, true);
666
667 map<string, string> tmpUnparsedParameterValues(bIO.getUnparsedArguments());
668
669 for (auto& it : tmpUnparsedParameterValues)
670 {
671 unparsedParams[it.first + "_" + TextTools::toString(modelsNum[i])] = it.second;
672 }
673
674 mModel[modelsNum[i]] = std::move(model);
675 }
676
677 return mModel;
678}
679
680
681/******************************************************/
682/**** FREQUENCIES SET *********************************/
683/******************************************************/
684
685std::unique_ptr<FrequencySetInterface> PhylogeneticsApplicationTools::getFrequencySet(
686 std::shared_ptr<const Alphabet> alphabet,
687 std::shared_ptr<const GeneticCode> gCode,
688 const string& freqDescription,
689 const std::map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
690 size_t nData,
691 map<string, string>& sharedparams,
692 const vector<double>& rateFreqs,
693 bool verbose,
694 int warn)
695{
696 map<string, string> unparsedParameterValues;
698 if (AlphabetTools::isCodonAlphabet(*alphabet))
699 {
700 if (!gCode)
701 throw Exception("PhylogeneticsApplicationTools::getFrequencySet(): a GeneticCode instance is required for instantiating a codon frequencies set.");
702 bIO.setGeneticCode(gCode);
703 }
704
705 auto pFS = bIO.readFrequencySet(alphabet, freqDescription, mData, nData, true);
706
707 map<string, string> unparsedparam = bIO.getUnparsedArguments();
708
709 sharedparams.insert(unparsedparam.begin(), unparsedparam.end());
710
711 // /////// To be changed for input normalization
712 if (rateFreqs.size() > 0)
713 {
714 pFS = std::make_unique<MarkovModulatedFrequencySet>(std::move(pFS), rateFreqs);
715 }
716
717 return pFS;
718}
719
720
721std::unique_ptr<FrequencySetInterface> PhylogeneticsApplicationTools::getRootFrequencySet(
722 std::shared_ptr<const Alphabet> alphabet,
723 std::shared_ptr<const GeneticCode> gCode,
724 const std::map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
725 size_t nData,
726 const map<string, string>& params,
727 map<string, string>& sharedparams,
728 const vector<double>& rateFreqs,
729 const string& suffix,
730 bool suffixIsOptional,
731 bool verbose,
732 int warn)
733{
734 string freqDescription = ApplicationTools::getStringParameter("nonhomogeneous.root_freq", params, "Full(init=observed)", suffix, suffixIsOptional, warn);
735 if (freqDescription == "None")
736 {
737 return 0;
738 }
739 else
740 {
741 map<string, string> unparams;
742
743 auto freq = getFrequencySet(alphabet, gCode, freqDescription, mData, nData, unparams, rateFreqs, verbose, warn + 1);
744 freq->setNamespace("root." + freq->getNamespace());
745
746 for (auto& it : unparams)
747 {
748 sharedparams["root." + it.first] = it.second;
749 }
750
751 if (verbose)
752 ApplicationTools::displayResult("Root frequencies ", freq->getName());
753 return freq;
754 }
755}
756
757
758map<size_t, std::unique_ptr<FrequencySetInterface>> PhylogeneticsApplicationTools::getRootFrequencySets(
759 std::shared_ptr<const Alphabet> alphabet,
760 std::shared_ptr<const GeneticCode> gCode,
761 const map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
762 const map<string, string>& params,
763 map<string, string>& sharedparams,
764 const string& suffix,
765 bool suffixIsOptional,
766 bool verbose,
767 int warn)
768{
769 if (dynamic_pointer_cast<const CodonAlphabet>(alphabet) && !gCode)
770 throw Exception("PhylogeneticsApplicationTools::getRootFrequencySets(): a GeneticCode instance is required for instantiating codon frequencies sets.");
771
772 string RootFilePath = ApplicationTools::getAFilePath("root_freq.file", params, false, false, suffix, suffixIsOptional, "none", 1);
773 map<string, string> paramRF;
774
775 if (RootFilePath != "none")
776 paramRF = AttributesTools::getAttributesMapFromFile(RootFilePath, "=");
777
778 paramRF.insert(params.begin(), params.end());
779
780 vector<string> vrfName = ApplicationTools::matchingParameters("root_freq*", paramRF);
781
782 vector<size_t> rfNum;
783 for (const auto& rfName : vrfName)
784 {
785 size_t poseq = rfName.find("=");
786 try
787 {
788 rfNum.push_back(TextTools::to<size_t>(rfName.substr(9, poseq - 9)));
789 }
790 catch (Exception& e)
791 {}
792 }
793
795 bIO.setGeneticCode(gCode);
796
797 map<size_t, std::unique_ptr<FrequencySetInterface>> mFS;
798
799 for (size_t i = 0; i < rfNum.size(); ++i)
800 {
802 ApplicationTools::displayMessage("Root Frequencies Set " + TextTools::toString(rfNum[i]));
803
804 string freqDescription = ApplicationTools::getStringParameter("root_freq" + TextTools::toString(rfNum[i]), paramRF, "", suffix, suffixIsOptional, warn);
805
806 map<string, string> args;
807 string freqName;
808
809 KeyvalTools::parseProcedure(freqDescription, freqName, args);
810
811 size_t nData = 0;
812
813 if (args.find("data") != args.end())
814 nData = TextTools::to<size_t>(args["data"]);
815
816 unique_ptr<FrequencySetInterface> rFS;
817
818 rFS = bIO.readFrequencySet(alphabet, freqDescription, mData, nData, true);
819
820 rFS->setNamespace("root." + rFS->getNamespace());
821
822 map<string, string> unparsedparam = bIO.getUnparsedArguments();
823
824 for (auto& it : unparsedparam)
825 {
826 sharedparams["root." + it.first + "_" + TextTools::toString(rfNum[i])] = it.second;
827 }
828
829 if (verbose)
830 {
831 // ApplicationTools::displayResult("Root Frequencies Set " + TextTools::toString(rfNum[i]), rFS->getName());
832 if (nData != 0)
834 }
835
836 mFS[rfNum[i]] = std::move(rFS);
837 }
838
839 return mFS;
840}
841
842/******************************************************/
843/**** SETOFMODELPATH **********************************/
844/******************************************************/
845
846map<size_t, std::unique_ptr<ModelPath>> PhylogeneticsApplicationTools::getModelPaths(
847 const std::map<std::string, std::string>& params,
848 const map<size_t, std::shared_ptr<BranchModelInterface>>& mModel,
849 const string& suffix,
850 bool suffixIsOptional,
851 bool verbose,
852 int warn)
853{
854 string ModelPathsPath = ApplicationTools::getAFilePath("path.file", params, false, false, suffix, suffixIsOptional, "none", warn);
855 map<string, string> paramMP;
856
857 if (ModelPathsPath != "none")
858 paramMP = AttributesTools::getAttributesMapFromFile(ModelPathsPath, "=");
859
860 paramMP.insert(params.begin(), params.end());
861
862 vector<string> vmpName = ApplicationTools::matchingParameters("path*", paramMP);
863
864 map<size_t, std::unique_ptr<ModelPath>> modelPaths;
865
866 for (size_t i = 0; i < vmpName.size(); ++i)
867 {
868 const auto& name = vmpName[i];
869
870 string desc = ApplicationTools::getStringParameter(name, paramMP, "", "", true);
871
872 size_t num;
873 try
874 {
875 num = TextTools::to<size_t>(name.substr(4));
876 }
877 catch (const Exception& e)
878 {
879 throw Exception("PhylogeneticsApplicationTools::getModelPaths: bad path number in line " + name);
880 }
881
882 modelPaths[num] = std::make_unique<ModelPath>();
883
884 if (verbose)
885 {
886 if (i >= 10)
887 {
888 if (i == 10)
890 ApplicationTools::displayResult("Path " + TextTools::toString(num), string("..."));
891 }
892 else
893 {
896 }
897 }
898
899 StringTokenizer st(desc, "&");
900 while (st.hasMoreToken())
901 {
902 string submodel = st.nextToken();
903 Vuint submodelNb;
904 auto indexo = submodel.find("[");
905 auto indexf = submodel.find("]");
906 if ((indexo == string::npos) | (indexf == string::npos))
907 throw Exception("PhylogeneticsApplicationTools::getModelPaths. Bad path syntax, should contain `[]' symbols: " + submodel);
908
909 auto pos = submodel.find("model");
910 if (pos == string::npos)
911 throw Exception("PhylogeneticsApplicationTools::getModelPaths. Missing identifier 'model' in description: " + submodel);
912
913 size_t num2 = TextTools::to<size_t>(submodel.substr(pos + 5, indexo - 5 - pos));
914 if (mModel.find(num2) == mModel.end())
915 throw BadIntegerException("PhylogeneticsApplicationTools::getModelPaths: Wrong model number", static_cast<int>(num2));
916
917 auto pSM = std::dynamic_pointer_cast<MixedTransitionModelInterface>(mModel.at(num2));
918 if (!pSM)
919 throw Exception("PhylogeneticsApplicationTools::getModelPaths: Model number " + TextTools::toString(num2) + " ( " + mModel.at(num2)->getName() + " ) is not Mixed.");
920
921 string lp2 = submodel.substr(indexo + 1, indexf - indexo - 1);
922 StringTokenizer stp2(lp2, ",");
923 while (stp2.hasMoreToken())
924 {
925 string p2 = stp2.nextToken();
926
927 unsigned int n2;
928 bool n2ok = true;
929 try
930 {
931 n2 = TextTools::to<unsigned int>(p2);
932 if (n2 <= 0 || n2 > pSM->getNumberOfModels())
933 n2ok = false;
934 else
935 submodelNb.push_back(n2 - 1);
936 }
937 catch (Exception& e)
938 {
939 Vuint submodnb = pSM->getSubmodelNumbers(p2);
940 if (submodelNb.size() == 0)
941 submodelNb = submodnb;
942 else
943 submodelNb = VectorTools::vectorIntersection(submodelNb, submodnb);
944 }
945
946 if (!n2ok)
947 throw BadIntegerException("PhylogeneticsApplicationTools::getModelPaths: Wrong model number for model " + TextTools::toString(num2), int(n2));
948 }
949
950 modelPaths[num]->setModel(pSM, submodelNb);
951 if (!modelPaths[num]->getLeadModel())
952 modelPaths[num]->setLeadModel(pSM);
953 }
954
955 if (verbose && (i < 10))
956 ApplicationTools::displayResult("Model Path", desc);
957 }
958
959 return modelPaths;
960}
961
962
963map<size_t, std::unique_ptr<ModelScenario>> PhylogeneticsApplicationTools::getModelScenarios(
964 const std::map<std::string, std::string>& params,
965 const map<size_t, std::shared_ptr<ModelPath>>& mModelPath,
966 const map<size_t, std::shared_ptr<BranchModelInterface>>& mModel,
967 const string& suffix,
968 bool suffixIsOptional,
969 bool verbose,
970 int warn)
971{
972 string ModelPathsPath = ApplicationTools::getAFilePath("scenario.file", params, false, false, suffix, suffixIsOptional, "none", warn);
973 map<string, string> paramMS;
974
975 if (ModelPathsPath != "none")
976 paramMS = AttributesTools::getAttributesMapFromFile(ModelPathsPath, "=");
977
978 paramMS.insert(params.begin(), params.end());
979
980 vector<string> vmsName = ApplicationTools::matchingParameters("scenario*", paramMS);
981
982 map<size_t, std::unique_ptr<ModelScenario>> somp;
983
984 for (size_t i = 0; i < vmsName.size(); ++i)
985 {
986 const auto& name = vmsName[i];
987
988 string desc = ApplicationTools::getStringParameter(name, paramMS, "", suffix, suffixIsOptional, warn);
989
990 size_t num;
991 try
992 {
993 num = TextTools::to<size_t>(name.substr(8));
994 }
995 catch (const Exception& e)
996 {
997 throw Exception("PhylogeneticsApplicationTools::getModelScenarios: bad scenario number in line " + name);
998 }
999
1000 somp[num] = std::make_unique<ModelScenario>();
1001
1002 if (verbose)
1003 {
1004 if (i >= 10)
1005 {
1006 if (i == 10)
1008 ApplicationTools::displayResult("Scenario " + TextTools::toString(num), string("..."));
1009 }
1010 else
1011 {
1014 }
1015 }
1016
1017 bool complete = false;
1018 size_t numpath;
1019
1020 StringTokenizer st(desc, "|");
1021 while (st.hasMoreToken())
1022 {
1023 string path = st.nextToken();
1024 bool numok = true;
1025
1026 if (path == "complete")
1027 complete = true;
1028 else if (path.substr(0, 5) == "split")
1029 {
1030 auto pos = path.find("model");
1031
1032 if (pos == string::npos)
1033 throw Exception("PhylogeneticsApplicationTools::getModelScenarios. Missing identifier 'model' in scenario description: " + path);
1034
1035 auto poseq = path.find("=", pos);
1036 StringTokenizer stm(path.substr(poseq + 1), " ,()");
1037
1038 // get all models in the split
1039 std::vector<std::shared_ptr<MixedTransitionModelInterface>> vpSM;
1040 while (stm.hasMoreToken())
1041 {
1042 string dmod = stm.nextToken();
1043 size_t num2 = TextTools::to<size_t>(dmod);
1044
1045 if (mModel.find(num2) == mModel.end())
1046 throw BadIntegerException("PhylogeneticsApplicationTools::getModelScenarios: Wrong model number", static_cast<int>(num2));
1047
1048 auto pSM = std::dynamic_pointer_cast<MixedTransitionModelInterface>(mModel.at(num2));
1049 if (!pSM)
1050 throw Exception("PhylogeneticsApplicationTools::getModelScenarios: Model number " + TextTools::toString(num2) + " ( " + mModel.at(num2)->getName() + " ) is not Mixed.");
1051
1052 vpSM.push_back(pSM);
1053 }
1054
1055 // Then makes all combinations of paths
1056 vector<vector<uint>> vvnmod;
1057 for (const auto& pSM:vpSM)
1058 {
1059 auto nmod = pSM->getNumberOfModels();
1060
1061 vector<vector<uint>> vvnmod2;
1062
1063 if (vvnmod.size() == 0)
1064 {
1065 for (uint nm = 0; nm < static_cast<unsigned int>(nmod); ++nm)
1066 {
1067 auto v2 = vector<uint>({nm});
1068 vvnmod2.push_back(v2);
1069 }
1070 }
1071 else
1072 {
1073 for (const auto& vnmod:vvnmod)
1074 {
1075 for (uint nm = 0; nm < static_cast<unsigned int>(nmod); ++nm)
1076 {
1077 auto v2 = vnmod;
1078 v2.push_back(nm);
1079 vvnmod2.push_back(v2);
1080 }
1081 }
1082 }
1083 vvnmod = vvnmod2;
1084 }
1085
1086 // Finally, sets all paths
1087
1088 for (const auto& vn:vvnmod)
1089 {
1090 auto mp = std::make_shared<ModelPath>();
1091
1092 for (size_t j = 0; j < vpSM.size(); j++)
1093 {
1094 mp->setModel(vpSM[j], Vuint({vn[j]}));
1095 }
1096
1097 mp->setLeadModel(vpSM[0]);
1098 somp[num]->addModelPath(mp);
1099 }
1100 }
1101 else
1102 {
1103 numpath = TextTools::to<size_t>(path.substr(4));
1104 if (mModelPath.find(numpath) == mModelPath.end())
1105 numok = false;
1106 else
1107 somp[num]->addModelPath(mModelPath.at(numpath));
1108 }
1109
1110 if (!numok)
1111 throw BadIntegerException("PhylogeneticsApplicationTools::getModelScenarios: Wrong path number", static_cast<int>(numpath));
1112 }
1113
1114 if (verbose && (i < 10))
1115 ApplicationTools::displayResult("Model Scenario", desc);
1116
1117 if (complete)
1118 {
1119 if (somp[num]->getNumberOfModelPaths() == 0)
1120 throw Exception("PhylogeneticsApplicationTools::getModelScenarios: 'complete' is not possible on empty scenarios");
1121 somp[num]->complete();
1122 }
1123
1124 if (somp[num]->getNumberOfModelPaths() == 0)
1125 somp.erase(somp.find(num));
1126 else
1127 somp[num]->computeModelPathsProbabilities();
1128 }
1129
1130
1131 return somp;
1132}
1133
1134/******************************************************/
1135/********** SUBSTITUTION PROCESS ********************/
1136/******************************************************/
1137
1138unique_ptr<AutonomousSubstitutionProcessInterface> PhylogeneticsApplicationTools::getSubstitutionProcess(
1139 std::shared_ptr<const Alphabet> alphabet,
1140 std::shared_ptr<const GeneticCode> gCode,
1141 std::shared_ptr<const AlignmentDataInterface> pData,
1142 const vector< shared_ptr<PhyloTree>>& vTree,
1143 const map<string, string>& params,
1144 const string& suffix,
1145 bool suffixIsOptional,
1146 bool verbose,
1147 int warn)
1148{
1149 // Read files with same process as SubstitutionCollection
1150
1151 map<string, string> unparsedParams;
1152
1153 map<size_t, std::shared_ptr<const AlignmentDataInterface>> mData;
1154 mData[1] = pData;
1155
1156 map<size_t, std::shared_ptr<PhyloTree>> mTree;
1157 size_t i = 1;
1158 for (auto it : vTree)
1159 {
1160 mTree[i++] = it;
1161 }
1162
1163 auto mModU = getBranchModels(alphabet, gCode, mData, params, unparsedParams, suffix, suffixIsOptional, verbose, warn);
1164 auto mMod = uniqueToSharedMap<BranchModelInterface>(mModU);
1165
1166 auto mRootFreqU = getRootFrequencySets(alphabet, gCode, mData, params, unparsedParams, suffix, suffixIsOptional, verbose, warn);
1167 auto mRootFreq = uniqueToSharedMap<FrequencySetInterface>(mRootFreqU);
1168
1169 auto mDist = getRateDistributions(params, suffix, suffixIsOptional, verbose);
1170
1171 auto mPathU = getModelPaths(params, mMod, suffix, suffixIsOptional, verbose, warn);
1172 auto mPath = uniqueToSharedMap<ModelPath>(mPathU);
1173
1174 auto mScenU = getModelScenarios(params, mPath, mMod, suffix, suffixIsOptional, verbose, warn);
1175 auto mScen = uniqueToSharedMap<ModelScenario>(mScenU);
1176
1177 auto SPC = getSubstitutionProcessCollection(alphabet, gCode, mTree,
1178 mMod, mRootFreq, mDist, mScen,
1179 params, unparsedParams, suffix, suffixIsOptional, verbose, warn);
1180
1181 // Get relevant objects from Collection to build an AutonomousSubstitutionProcess
1182 unique_ptr<AutonomousSubstitutionProcessInterface> ASP;
1183
1184 auto psNum = SPC->getSubstitutionProcessNumbers();
1185 if (psNum.size() == 0)
1186 throw Exception("PhylogeneticsApplicationTools::getSubstitutionProcess : missing process in parameters.");
1187
1188 size_t maxps = *max_element(psNum.begin(), psNum.end());
1189
1190 SubstitutionProcessCollectionMember& procm = SPC->substitutionProcess(maxps);
1191
1192 auto distproc = procm.getRateDistribution();
1193
1194 auto rootproc = procm.getRootFrequencySet();
1195
1196 auto scen = procm.getModelScenario();
1197
1198 auto vmodnb = procm.getModelNumbers();
1199
1200 if (vmodnb.size() == 1)
1201 {
1202 if (!distproc)
1203 ASP = make_unique<SimpleSubstitutionProcess>(procm.getModel(1), procm.getParametrizablePhyloTree(), rootproc);
1204 else
1205 ASP = make_unique<RateAcrossSitesSubstitutionProcess>(procm.getModel(1), procm.getRateDistribution(), procm.getParametrizablePhyloTree(), rootproc);
1206 }
1207 else
1208 {
1209 auto NHSP = make_unique<NonHomogeneousSubstitutionProcess>(procm.getRateDistribution(), procm.getParametrizablePhyloTree(), rootproc);
1210
1211 for (auto nb:vmodnb)
1212 {
1213 NHSP->addModel(procm.getModel(nb), procm.getNodesWithModel(nb));
1214 }
1215
1216 if (!NHSP->isFullySetUp(false))
1217 throw Exception("PhylogeneticsApplicationTools::getSubstitutionProcess: process not fully set up.");
1218
1219 ASP = std::move(NHSP);
1220 }
1221
1222 if (procm.getModelScenario())
1223 ASP->setModelScenario(procm.getModelScenario());
1224
1225 return ASP;
1226}
1227
1228
1229/************************************************************/
1230
1233 size_t procNum,
1234 const map<string, string>& params,
1235 bool verbose,
1236 int warn)
1237{
1238 string procName = "";
1239 map<string, string> args;
1240
1241 string procDesc = ApplicationTools::getStringParameter("process", params, "", TextTools::toString(procNum), warn);
1242
1243 KeyvalTools::parseProcedure(procDesc, procName, args);
1244
1245 if ((procName != "OnePerBranch") && (procName != "Homogeneous") && (procName != "Nonhomogeneous") && (procName != "NonHomogeneous"))
1246 {
1247 if (warn >= 2)
1248 ApplicationTools::displayWarning("Warning, unknown process name: " + procName);
1249
1250 return 0;
1251 }
1252
1253 // ///
1254 // tree number
1255
1256 size_t numTree;
1257
1258 if (args.find("tree") == args.end())
1259 {
1260 if (warn)
1261 ApplicationTools::displayWarning("Warning, missing tree for process name: " + procName);
1262 numTree = 0;
1263 }
1264 else
1265 {
1266 numTree = (size_t) ApplicationTools::getIntParameter("tree", args, 1, "", true, warn);
1267
1268 if (numTree != 0 && !SubProColl.hasTreeNumber(numTree))
1269 throw BadIntegerException("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember : unknown tree number", (int)numTree);
1270 }
1271
1272 // /////
1273 // rate number
1274
1275 size_t numRate = 0;
1276 if (args.find("rate") == args.end())
1277 {
1278 const auto& vrdn = SubProColl.getRateDistributionNumbers();
1279 numRate = 0;
1280 for (auto rdn:vrdn)
1281 {
1282 if (SubProColl.rateDistribution(rdn).getName() == "Constant")
1283 {
1284 numRate = rdn;
1285 break;
1286 }
1287 }
1288 if (numRate == 0)
1289 {
1290 for (uint i = 1; i <= *std::max_element(vrdn.begin(), vrdn.end()) + 1; i++)
1291 {
1292 if (std::find(vrdn.begin(), vrdn.end(), i) == vrdn.end())
1293 {
1294 numRate = i;
1295 break;
1296 }
1297 }
1298 SubProColl.addDistribution(std::make_shared<ConstantRateDistribution>(), numRate);
1299 }
1300 }
1301 else
1302 {
1303 string sRate = ApplicationTools::getStringParameter("rate", args, "1", "", true, warn);
1304
1305 size_t pp = sRate.find(".");
1306
1307 numRate = TextTools::to<size_t>(sRate.substr(0, pp));
1308
1309 if (!SubProColl.hasDistributionNumber(numRate))
1310 throw BadIntegerException("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember : unknown rate number", (int)numRate);
1311
1312 if (pp != string::npos)
1313 {
1314 size_t numSRate = TextTools::to<size_t>(sRate.substr(pp + 1));
1315 SubProColl.addDistribution(
1316 std::make_shared<ConstantDistribution>(
1317 SubProColl.rateDistribution(numRate).getCategory(numSRate)),
1318 10000 * (numRate + 1) + numSRate);
1319
1320 numRate = 10000 * (numRate + 1) + numSRate;
1321 }
1322 }
1323
1324 // ////////
1325 // root freq number
1326
1327 bool stationarity = (args.find("root_freq") == args.end());
1328 size_t numFreq = 0;
1329
1330 if (!stationarity)
1331 {
1332 numFreq = (size_t) ApplicationTools::getIntParameter("root_freq", args, 1, "", true, warn);
1333 if (!SubProColl.hasFrequenciesNumber(numFreq))
1334 throw BadIntegerException("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember : unknown root frequencies number", (int)numFreq);
1335 }
1336
1337 // ///
1338 // scenario number
1339
1340 size_t numScen = 0;
1341
1342 if (args.find("scenario") != args.end())
1343 {
1344 numScen = (size_t) ApplicationTools::getIntParameter("scenario", args, 1, "", true, warn);
1345
1346 if (!SubProColl.hasModelScenario(numScen))
1347 throw BadIntegerException("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember : unknown scenario number", (int)numScen);
1348 }
1349
1350 // ////////////////
1351 // / models
1352
1353 if (verbose)
1354 {
1357 }
1358
1359 if (procName == "Homogeneous")
1360 {
1361 if (args.find("model") == args.end())
1362 throw Exception("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember. A model number is compulsory.");
1363
1364 size_t numModel = (size_t) ApplicationTools::getIntParameter("model", args, 1, "", true, warn);
1365
1366 if (!SubProColl.hasModelNumber(numModel))
1367 throw BadIntegerException("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember : unknown model number", static_cast<int>(numModel));
1368
1369 map<size_t, vector<unsigned int>> mModBr;
1370
1371 vector<uint> vNodes;
1372 if (numTree != 0)
1373 vNodes = SubProColl.tree(numTree).getAllEdgesIndexes();
1374 else
1375 vNodes = {0}
1376 ;
1377 mModBr[numModel] = vNodes;
1378
1379 if (verbose)
1380 {
1381 ApplicationTools::displayResult("Process type", string("Homogeneous"));
1382 ApplicationTools::displayResult (" Model number", TextTools::toString(numModel));
1383 ApplicationTools::displayResult (" Tree number", TextTools::toString(numTree));
1384 if (numRate < 10000)
1385 ApplicationTools::displayResult (" Rate number", TextTools::toString(numRate));
1386 else
1387 ApplicationTools::displayResult (" Rate number", TextTools::toString(numRate / 10000 - 1) + "." + TextTools::toString(numRate % 10000));
1388
1389 if (numScen != 0)
1390 ApplicationTools::displayResult (" Scenario number", TextTools::toString(numScen));
1391
1392 if (!stationarity)
1393 ApplicationTools::displayResult (" Root frequencies number", TextTools::toString(numFreq));
1394 else
1395 ApplicationTools::displayMessage(" Stationarity assumed.");
1396 }
1397
1398 if (stationarity)
1399 SubProColl.addSubstitutionProcess(procNum, mModBr, numTree, numRate);
1400 else
1401 SubProColl.addSubstitutionProcess(procNum, mModBr, numTree, numRate, numFreq);
1402 }
1403
1404 else if ((procName == "Nonhomogeneous") || (procName == "NonHomogeneous"))
1405 {
1406 if (numTree == 0)
1407 throw Exception("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember : missing tree number for process " + TextTools::toString(procName));
1408
1409 size_t indModel = 1;
1410 map<size_t, vector<unsigned int>> mModBr;
1411
1412 while (args.find("model" + TextTools::toString(indModel)) != args.end())
1413 {
1414 size_t numModel = (size_t) ApplicationTools::getIntParameter("model" + TextTools::toString(indModel), args, 1, "", true, warn);
1415
1416 if (mModBr.find(numModel) != mModBr.end())
1417 throw BadIntegerException("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember : model number seen twice.", (int)numModel);
1418
1419 vector<unsigned int> nodesId;
1420
1421 auto snodesid = "model" + TextTools::toString(indModel) + ".nodes_id";
1422 auto descnodes = ApplicationTools::getStringParameter(snodesid, args, "", "", true, warn);
1423
1424
1425 auto tree = SubProColl.getTree(numTree);
1426 if (descnodes == "All")
1427 {
1428 nodesId = tree->getEdgeIndexes(tree->getSubtreeEdges(tree->getRoot()));
1429 }
1430 else if (descnodes == "Leaves")
1431 {
1432 nodesId = tree->getNodeIndexes(tree->getLeavesUnderNode(tree->getRoot()));
1433 }
1434 else if (descnodes == "NoLeaves")
1435 {
1436 auto allIds = tree->getEdgeIndexes(tree->getSubtreeEdges(tree->getRoot()));
1437 auto leavesId = tree->getNodeIndexes(tree->getLeavesUnderNode(tree->getRoot()));
1438 VectorTools::diff(allIds, leavesId, nodesId);
1439 }
1440 else
1441 nodesId = ApplicationTools::getVectorParameter<unsigned int>(snodesid, args, ',', ':', "", "", true, warn);
1442
1443 mModBr[numModel] = nodesId;
1444 indModel++;
1445 }
1446
1447 if (verbose)
1448 {
1449 ApplicationTools::displayResult("Process type", string("NonHomogeneous"));
1450
1451 for (auto& it : mModBr)
1452 {
1453 ApplicationTools::displayResult (" Model number" + TextTools::toString(it.first) + " associated to", TextTools::toString(it.second.size()) + " node(s).");
1454 }
1455 ApplicationTools::displayResult (" Tree number", TextTools::toString(numTree));
1456 ApplicationTools::displayResult (" Rate number", TextTools::toString(numRate));
1457 if (!stationarity)
1458 ApplicationTools::displayResult (" Root frequencies number", TextTools::toString(numFreq));
1459 else
1460 ApplicationTools::displayMessage(" Stationarity assumed.");
1461 }
1462
1463 if (stationarity)
1464 SubProColl.addSubstitutionProcess(procNum, mModBr, numTree, numRate);
1465 else
1466 SubProColl.addSubstitutionProcess(procNum, mModBr, numTree, numRate, numFreq);
1467 }
1468 else if (procName == "OnePerBranch")
1469 {
1470 if (numTree == 0)
1471 throw Exception("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember : missing tree number for process " + TextTools::toString(procName));
1472
1473 if (args.find("model") == args.end())
1474 throw Exception("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember. A model number is compulsory.");
1475
1476 size_t numModel = (size_t) ApplicationTools::getIntParameter("model", args, 1, "", true, warn);
1477
1478 if (!SubProColl.hasModelNumber(numModel))
1479 throw BadIntegerException("PhylogeneticsApplicationTools::addSubstitutionProcessCollectionMember : unknown model number", (int)numModel);
1480
1481 vector<string> sharedParameters = ApplicationTools::getVectorParameter<string>("shared_parameters", args, ',', "", "", true, 1);
1482
1483 if (stationarity)
1484 SubProColl.addOnePerBranchSubstitutionProcess(procNum, numModel, numTree, numRate, sharedParameters);
1485 else
1486 SubProColl.addOnePerBranchSubstitutionProcess(procNum, numModel, numTree, numRate, numFreq, sharedParameters);
1487
1488 if (verbose)
1489 {
1490 ApplicationTools::displayResult("Process type", string("OnePerBranch"));
1491
1492 ApplicationTools::displayResult (" Model number", TextTools::toString(numModel));
1493 ApplicationTools::displayResult (" Tree number", TextTools::toString(numTree));
1494 ApplicationTools::displayResult (" Rate number", TextTools::toString(numRate));
1495 if (!stationarity)
1496 ApplicationTools::displayResult (" Root frequencies number", TextTools::toString(numFreq));
1497 else
1498 ApplicationTools::displayMessage(" Stationarity assumed.");
1499
1500 for (const auto& sP : sharedParameters)
1501 {
1502 ApplicationTools::displayResult(" Shared parameter", sP);
1503 }
1504 }
1505 }
1506
1507 if (numScen != 0)
1508 SubProColl.substitutionProcess(procNum).setModelScenario(numScen);
1509
1510 return true;
1511}
1512
1513
1514/******************************************************************************/
1515
1517 std::shared_ptr<const Alphabet> alphabet,
1518 std::shared_ptr<const GeneticCode> gCode,
1519 const map<size_t, std::shared_ptr<PhyloTree>>& mTree,
1520 const map<size_t, std::shared_ptr<BranchModelInterface>>& mMod,
1521 const map<size_t, std::shared_ptr<FrequencySetInterface>>& mRootFreq,
1522 const map<size_t, std::shared_ptr<DiscreteDistributionInterface>>& mDist,
1523 const map<size_t, std::shared_ptr<ModelScenario>>& mScen,
1524 const map<string, string>& params,
1525 map<string, string>& unparsedParams,
1526 const string& suffix,
1527 bool suffixIsOptional,
1528 bool verbose,
1529 int warn)
1530{
1531 auto SPC = make_unique<SubstitutionProcessCollection>();
1532
1533 map<string, double> existingParameters;
1534
1535 // ///////////////////////
1536 // Trees
1537
1538 for (const auto& itt : mTree)
1539 {
1540 if (itt.second)
1541 {
1542 SPC->addTree(std::make_shared<ParametrizablePhyloTree>(*(itt.second)), itt.first);
1543 }
1544 }
1545
1546 // ///////////////////////
1547 // Rates
1548
1549 for (const auto& itd : mDist)
1550 {
1551 SPC->addDistribution(itd.second, itd.first);
1552 }
1553
1554 // ////////////////////////
1555 // Models
1556
1557 for (const auto& itm : mMod)
1558 {
1559 SPC->addModel(itm.second, itm.first);
1560 }
1561
1562 // ///////////////////////////
1563 // Root Frequencies
1564
1565 for (const auto& itr : mRootFreq)
1566 {
1567 SPC->addFrequencies(itr.second, itr.first);
1568 }
1569
1570 // ///////////////////////
1571 // Scenarios
1572
1573 for (const auto& itt : mScen)
1574 {
1575 SPC->addScenario(itt.second, itt.first);
1576 }
1577
1578 // //////////////////////////////
1579 // Now processes
1580
1581 vector<string> vProcName = ApplicationTools::matchingParameters("process*", params);
1582
1583 if (vProcName.size() == 0)
1584 throw Exception("Missing process in construction of SubstitutionProcessCollection.");
1585
1586 for (size_t nT = 0; nT < vProcName.size(); nT++)
1587 {
1588 size_t poseq = vProcName[nT].find("=");
1589 size_t num;
1590 size_t len = 7;
1591
1592 string suff = vProcName[nT].substr(len, poseq - len);
1593
1594 if (TextTools::isDecimalInteger(suff, '$'))
1595 num = TextTools::to<size_t>(suff);
1596 else
1597 num = 1;
1598
1599 bool addok = addSubstitutionProcessCollectionMember(*SPC, num, params, (nT < 10 ? verbose : false), warn);
1600
1601 if (addok)
1602 {
1603 if (nT == 10)
1605
1606 if (nT >= 10)
1607 ApplicationTools::displayResult("Process" + TextTools::toString(num), string("..."));
1608 }
1609 }
1610
1611
1612 // string ProcessFilePath = ApplicationTools::getAFilePath("processes.file", params, false, false, suffix, suffixIsOptional);
1613
1614 // map<string, string> paramProcess;
1615
1616 // if (ProcessFilePath!="none")
1617 // paramModel=AttributesTools::getAttributesMapFromFile(ProcessFilePath,"=");
1618
1619 // paramProcess.insert(params.begin(), params.end());
1620
1621 // vector<string> processName=ApplicationTools::matchingParameters("process*", paramProcess);
1622
1623 // vector<size_t> processNum;
1624 // for (size_t i=0; i< processName.size(); i++)
1625 // {
1626 // size_t poseq=processName[i].find("=");
1627 // processNum.push_back((size_t)TextTools::toInt(processName[i].substr(7,poseq-7)));
1628 // }
1629
1630 // if (processNum.size()==0)
1631 // throw Exception("Missing process in construction of SubstitutionProcessCollection.");
1632
1633 // for (size_t i=0; i<processNum.size(); i++)
1634 // addSubstitutionProcessCollectionMember(*SPC, params, processNum[i]);
1635
1636
1637 // /////////////////////////
1638 // Now set shared parameters:
1639
1640 // ////// Aliasing
1641 // Finally check parameter aliasing:
1642
1643 for (const auto& param : params)
1644 {
1645 try
1646 {
1647 auto v2 = TextTools::toDouble(param.second);
1648 SPC->setParameterValue(param.first, v2);
1649 }
1650 catch (Exception& e)
1651 {
1652 if (SPC->hasParameter(param.first))
1653 unparsedParams[param.first] = param.second;
1654 }
1655 }
1656
1657 SPC->aliasParameters(unparsedParams, verbose);
1658
1659 return SPC;
1660}
1661
1662/******************************************************/
1663/**** SEQUENCE EVOLUTIONS *****************************/
1664/******************************************************/
1665
1666map<size_t, unique_ptr<SequenceEvolution>> PhylogeneticsApplicationTools::getSequenceEvolutions(
1667 shared_ptr<SubstitutionProcessCollection> SPC,
1668 const map<string, string>& params,
1669 map<string, string>& unparsedParams,
1670 const string& suffix,
1671 bool suffixIsOptional,
1672 bool verbose,
1673 int warn)
1674{
1675 map<string, string> paramEvol;
1676
1677 paramEvol.insert(params.begin(), params.end());
1678
1679 vector<string> evolsName = ApplicationTools::matchingParameters("process*", paramEvol);
1680
1681 vector<size_t> evolsNum;
1682 for (size_t i = 0; i < evolsName.size(); ++i)
1683 {
1684 size_t poseq = evolsName[i].find("=");
1685 evolsNum.push_back(TextTools::to<size_t>(evolsName[i].substr(7, poseq - 7)));
1686 }
1687
1688 map<size_t, unique_ptr<SequenceEvolution>> mEvol;
1689
1690 for (size_t mPi = 0; mPi < evolsNum.size(); ++mPi)
1691 {
1692 if (SPC->hasSubstitutionProcessNumber(evolsNum[mPi]))
1693 continue;
1694
1695 unique_ptr<SequenceEvolution> nEvol;
1696
1697 string evolName = "";
1698 map<string, string> args;
1699
1700 string evolDesc = ApplicationTools::getStringParameter("process", params, "", TextTools::toString(evolsNum[mPi]), warn);
1701
1702 KeyvalTools::parseProcedure(evolDesc, evolName, args);
1703
1704 // Process
1705 if (verbose)
1706 {
1708 ApplicationTools::displayMessage("Process " + TextTools::toString(evolsNum[mPi]));
1709 }
1710
1711 if (evolName == "Simple")
1712 {
1713 size_t nproc = (size_t) ApplicationTools::getIntParameter("process", args, ',', "");
1714 if (!SPC->hasSubstitutionProcessNumber(nproc))
1715 throw BadIntegerException("PhylogeneticsApplicationTools::getEvolutions. Unknown process number:", (int)nproc);
1716
1717 nEvol = make_unique<OneProcessSequenceEvolution>(SPC->getSubstitutionProcess(nproc), nproc);
1718 if (verbose)
1719 {
1720 ApplicationTools::displayResult("Process type", string("Simple"));
1721 ApplicationTools::displayResult (" Process number", TextTools::toString(nproc));
1722 }
1723 }
1724 else
1725 {
1726 size_t indProc = 1;
1727 vector<size_t> vproc;
1728
1729 while (args.find("process" + TextTools::toString(indProc)) != args.end())
1730 {
1731 size_t numProc = (size_t) ApplicationTools::getIntParameter("process" + TextTools::toString(indProc), args, 1, "", true, warn);
1732
1733 vproc.push_back(numProc);
1734 indProc++;
1735 }
1736
1737 if (vproc.size() == 0)
1738 throw BadIntegerException("PhylogeneticsApplicationTools::getEvolutions. A process number is compulsory for process", (int)indProc);
1739
1740 for (size_t i = 0; i < vproc.size(); ++i)
1741 {
1742 if (!SPC->hasSubstitutionProcessNumber(vproc[i]))
1743 throw BadIntegerException("PhylogeneticsApplicationTools::getEvolutions. Unknown process number:", (int)vproc[i]);
1744 }
1745
1746 if (evolName == "Partition")
1747 {
1748 // parse all processes sites
1749
1750 vector<size_t> vMap;
1751
1752 map<size_t, size_t> posProc;
1753
1754 for (size_t i = 0; i < vproc.size(); ++i)
1755 {
1756 string prefix = "process" + TextTools::toString(i + 1);
1757
1758 vector<size_t> procPos = ApplicationTools::getVectorParameter<size_t>(prefix + ".sites", args, ',', ':', TextTools::toString(i), "", true, true);
1759
1760 for (size_t j = 0; j < procPos.size(); ++j)
1761 {
1762 if (posProc.find(procPos[j]) != posProc.end())
1763 throw BadIntegerException("A process position is defined twice ", (int)procPos[j]);
1764 else
1765 posProc[procPos[j]] = vproc[i];
1766 }
1767 }
1768
1769 size_t pos = posProc.begin()->first;
1770
1771 while (posProc.find(pos) != posProc.end())
1772 {
1773 vMap.push_back(posProc[pos]);
1774 pos++;
1775 }
1776
1777 if (vMap.size() != posProc.size())
1778 throw Exception("Error : there are gaps in the process sites");
1779
1780 if (verbose)
1781 ApplicationTools::displayResult("Process type", string("Partition"));
1782
1783 auto pMP = make_unique<PartitionSequenceEvolution>(SPC, vMap);
1784
1785 nEvol = std::move(pMP);
1786 }
1787 else if (evolName == "Mixture")
1788 {
1789 auto pMP = make_unique<MixtureSequenceEvolution>(SPC, vproc);
1790
1791 if (verbose)
1792 ApplicationTools::displayResult("Process type", string("Mixture"));
1793
1794 size_t nbP = pMP->getNumberOfSubstitutionProcess();
1795
1796 vector<double> vprob = ApplicationTools::getVectorParameter<double>("probas", args, ',', "(" + VectorTools::paste(vector<double>(nbP, 1. / (double)nbP)) + ")");
1797 if (vprob.size() != 1)
1798 {
1799 if (vprob.size() != nbP)
1800 throw BadSizeException("Wrong size of probas description in Mixture", vprob.size(), nbP);
1801 Simplex si(vprob);
1802 pMP->setSubProcessProb(si);
1803 }
1804
1805 nEvol = std::move(pMP);
1806 }
1807 else if (evolName == "HMM")
1808 {
1809 auto pMP = make_unique<HmmSequenceEvolution>(SPC, vproc);
1810
1811 if (verbose)
1812 ApplicationTools::displayResult("Process type", string("HMM"));
1813
1814 size_t nbP = pMP->getNumberOfSubstitutionProcess();
1815
1816 string vs = "(" + VectorTools::paste(vector<double>(nbP, 1. / (double)nbP), ",") + ")";
1817 string vvs = "(";
1818 for (size_t i = 0; i < nbP; ++i)
1819 {
1820 vvs += (i == 0 ? "" : ",") + vs;
1821 }
1822 vvs += ")";
1823
1824 RowMatrix<double> mat = ApplicationTools::getMatrixParameter<double>("probas", args, ',', vvs);
1825
1826 FullHmmTransitionMatrix fhtm(pMP->hmmTransitionMatrix().getHmmStateAlphabet(), pMP->getNamespace());
1828
1829 pMP->matchParametersValues(fhtm.getParameters());
1830
1831 nEvol = std::move(pMP);
1832 }
1833 else if (evolName == "AutoCorr")
1834 {
1835 auto pMP = make_unique<AutoCorrelationSequenceEvolution>(SPC, vproc);
1836
1837 size_t nbP = pMP->getNumberOfSubstitutionProcess();
1838
1839 if (verbose)
1840 ApplicationTools::displayResult("Process type", string("AutoCorr"));
1841
1842 string vs = "(" + VectorTools::paste(vector<double>(nbP, 0.95), ",") + ")";
1843
1844 vector<double> v = ApplicationTools::getVectorParameter<double>("lambdas", args, ',', vs);
1845
1846 ParameterList pl;
1847
1848 for (size_t i = 0; i < v.size(); ++i)
1849 {
1850 pl.addParameter(Parameter("AutoCorr.lambda" + TextTools::toString(i + 1), v[i]));
1851 }
1852
1853 pMP->matchParametersValues(pl);
1854
1855 nEvol = std::move(pMP);
1856 }
1857 else
1858 throw Exception("Unknown Process description : " + evolName);
1859
1860 if (verbose)
1861 {
1862 ApplicationTools::displayResult (" Process numbers", VectorTools::paste(vproc, ","));
1864 }
1865 }
1866
1867 mEvol[evolsNum[mPi]] = std::move(nEvol);
1868 }
1869
1870 return mEvol;
1871}
1872
1873
1874/******************************************************/
1875/**** PHYLO LIKELIHOODS *********************************/
1876/******************************************************/
1877
1878std::shared_ptr<PhyloLikelihoodContainer> PhylogeneticsApplicationTools::getPhyloLikelihoodContainer(
1879 Context& context,
1880 shared_ptr<SubstitutionProcessCollection> SPC,
1881 map<size_t, std::shared_ptr<SequenceEvolution>>& mSeqEvol,
1882 const map<size_t, std::shared_ptr<const AlignmentDataInterface>>& mData,
1883 const map<string, string>& params,
1884 const string& suffix,
1885 bool suffixIsOptional,
1886 bool verbose,
1887 int warn)
1888{
1889 auto mPhylo = std::make_shared<PhyloLikelihoodContainer>(context, SPC);
1890
1891 // get all members of the collection and link then to Configured Objects
1892 auto collNodes = mPhylo->getCollectionNodes();
1893
1894 // the phylo members
1895 map<string, string> paramPhyl;
1896 paramPhyl.insert(params.begin(), params.end());
1897 vector<string> phylosName = ApplicationTools::matchingParameters("phylo*", paramPhyl);
1898
1899 // map of dependencies between phylolikelihoods
1900
1901 map<size_t, vector<size_t>> phylosMap;
1902
1903 for (size_t i = 0; i < phylosName.size(); ++i)
1904 {
1905 size_t poseq = phylosName[i].find("=");
1906 size_t phyln = TextTools::to<size_t>(phylosName[i].substr(5, poseq - 5));
1907
1908 if (phyln == 0)
1909 throw BadIntegerException("PhylogeneticsApplicationTools::getPhyloLikelihoodContainer : Forbidden Phylo Number", 0);
1910
1911 string phyloDesc = ApplicationTools::getStringParameter("phylo", params, "Single", TextTools::toString(phyln), 2);
1912
1913 map<string, string> args;
1914
1915 string phyloName = "";
1916
1917 KeyvalTools::parseProcedure(phyloDesc, phyloName, args);
1918
1919 size_t indPhyl = 1;
1920 vector<size_t> vphyl;
1921
1922 while (args.find("phylo" + TextTools::toString(indPhyl)) != args.end())
1923 {
1924 size_t numPhyl = (size_t) ApplicationTools::getIntParameter("phylo" + TextTools::toString(indPhyl), args, 1, "", true, warn);
1925 vphyl.push_back(numPhyl);
1926 indPhyl++;
1927 }
1928
1929 phylosMap[phyln] = vphyl;
1930 }
1931
1932 vector<size_t> usedPhylo;
1933
1934 // //////////////////////////////////////////
1935 // First the phylos that do not depend on other phylos
1936
1937 uint nbPh(0);
1938 bool verbhere(verbose);
1939
1940 for (const auto& it : phylosMap)
1941 {
1942 nbPh++;
1943
1944 if (it.second.size() != 0)
1945 continue;
1946
1947 size_t phylonum = it.first;
1948
1949 std::shared_ptr<PhyloLikelihoodInterface> nPL;
1950 string phyloName = "";
1951
1952 map<string, string> args;
1953
1954 string phyloDesc = ApplicationTools::getStringParameter("phylo", params, "Single", TextTools::toString(phylonum), warn);
1955
1956 if (verbose)
1957 {
1958 if (nbPh <= 20)
1960 else
1961 verbhere = false;
1962
1963 ApplicationTools::displayMessage("Phylolikelihood " + TextTools::toString(phylonum));
1964 }
1965
1966 KeyvalTools::parseProcedure(phyloDesc, phyloName, args);
1967
1968 // Data
1969
1970 size_t nData = (args.find("data") == args.end() ? 1 : TextTools::to<size_t>(args["data"]));
1971
1972 if (mData.find(nData) == mData.end())
1973 {
1974 ApplicationTools::displayWarning("PhylogeneticsApplicationTools::getPhyloLikelihoodContainer. Data number is wrong:" + TextTools::toString(nData) + ". Not built.");
1975 continue;
1976 }
1977
1978 auto data = mData.find(nData)->second;
1979
1980 if (!data)
1981 {
1982 ApplicationTools::displayWarning("PhylogeneticsApplicationTools::getPhyloLikelihoodContainer. Data " + TextTools::toString(nData) + " does not match with aligned sequences. Not built.");
1983 continue;
1984 }
1985
1986 if (verbhere)
1988
1989 // Sequence Evolution or process
1990
1991 size_t nProcess = (args.find("process") == args.end() ? 1 : TextTools::to<size_t>(args["process"]));
1992 if (verbhere)
1994
1995
1996 // Construction
1997
1998 if (SPC->hasSubstitutionProcessNumber(nProcess))
1999 {
2000 auto l = std::make_shared<LikelihoodCalculationSingleProcess>(collNodes, data, nProcess);
2001 nPL = make_unique<SingleProcessPhyloLikelihood>(context, l, nProcess, nData);
2002 }
2003 else if (mSeqEvol.find(nProcess) != mSeqEvol.end())
2004 {
2005 // ////////////////
2006 // / from sequence evolutions to phyloLikelihoods
2007
2008 auto opse = dynamic_pointer_cast<OneProcessSequenceEvolution>(mSeqEvol[nProcess]);
2009
2010 if (opse)
2011 nPL = make_unique<OneProcessSequencePhyloLikelihood>(data, opse, collNodes, nProcess, nData);
2012 else
2013 {
2014 auto mse = dynamic_pointer_cast<MixtureSequenceEvolution>(mSeqEvol[nProcess]);
2015
2016 if (mse)
2017 nPL = make_unique<MixtureProcessPhyloLikelihood>(data, mse, collNodes, nProcess, nData);
2018
2019 else
2020 {
2021 auto hse = dynamic_pointer_cast<HmmSequenceEvolution>(mSeqEvol[nProcess]);
2022
2023 if (hse)
2024 nPL = make_unique<HmmProcessPhyloLikelihood>(data, hse, collNodes, nProcess, nData);
2025
2026 else
2027 {
2028 auto ase = dynamic_pointer_cast<AutoCorrelationSequenceEvolution>(mSeqEvol[nProcess]);
2029
2030 if (ase)
2031 nPL = make_unique<AutoCorrelationProcessPhyloLikelihood>(data, ase, collNodes, nProcess, nData);
2032 else
2033 {
2034 auto pse = dynamic_pointer_cast<PartitionSequenceEvolution>(mSeqEvol[nProcess]);
2035
2036 if (pse)
2037 nPL = make_unique<PartitionProcessPhyloLikelihood>(data, pse, collNodes, nProcess, nData);
2038
2039 else
2040 throw Exception("PhylogeneticsApplicationTools::getPhyloLikelihoodContainer : Unknown Sequence Evolution.");
2041 }
2042 }
2043 }
2044 }
2045 }
2046 else
2047 throw BadIntegerException("PhylogeneticsApplicationTools::getPhyloLikelihoodContainer : Unknown Process number.", (int)nProcess);
2048
2049 mPhylo->addPhyloLikelihood(phylonum, std::move(nPL));
2050 usedPhylo.push_back(phylonum);
2051 }
2052
2053 // Now clean the map
2054 for (auto it = phylosMap.begin(); it != phylosMap.end();)
2055 {
2056 if (it->second.size() == 0)
2057 {
2058 phylosMap.erase(it++);
2059 continue;
2060 }
2061
2062 vector<size_t>& vphyl = it->second;
2063 for (size_t i = vphyl.size(); i > 0; i--)
2064 {
2065 vector<size_t>::iterator posp = find(usedPhylo.begin(), usedPhylo.end(), vphyl[i - 1]);
2066 if (posp != usedPhylo.end())
2067 vphyl.erase(vphyl.begin() + static_cast<ptrdiff_t>(i - 1));
2068 }
2069 ++it;
2070 }
2071
2072 // Proceed the other phylos
2073
2074 while (phylosMap.size() != 0) // there is still phylos to be treated
2075 {
2076 if (usedPhylo.size() == 0)
2077 {
2078 ApplicationTools::displayWarning("Warning, some phylolikelihoods are not used.");
2079 break;
2080 }
2081
2082 usedPhylo.clear();
2083
2084 for (map<size_t, vector<size_t>>::iterator it = phylosMap.begin(); it != phylosMap.end(); it++)
2085 {
2086 nbPh++;
2087
2088 if (it->second.size() == 0)
2089 {
2090 size_t phylonum = it->first;
2091
2092 unique_ptr<PhyloLikelihoodInterface> nPL;
2093 string phyloName = "";
2094
2095 map<string, string> args;
2096
2097 string phyloDesc = ApplicationTools::getStringParameter("phylo", params, "Single", TextTools::toString(phylonum), warn);
2098 KeyvalTools::parseProcedure(phyloDesc, phyloName, args);
2099
2100 if (verbose)
2101 {
2102 if (nbPh <= 20)
2104 else
2105 verbhere = false;
2106
2107 ApplicationTools::displayMessage("Phylolikelihood " + TextTools::toString(phylonum));
2108 }
2109
2110 KeyvalTools::parseProcedure(phyloDesc, phyloName, args);
2111
2112 size_t indPhylo = 1;
2113 vector<size_t> vPhylo;
2114
2115 while (args.find("phylo" + TextTools::toString(indPhylo)) != args.end())
2116 {
2117 size_t numPhylo = (size_t) ApplicationTools::getIntParameter("phylo" + TextTools::toString(indPhylo), args, 1, "", true, warn);
2118
2119 vPhylo.push_back(numPhylo);
2120 indPhylo++;
2121 }
2122
2123 if (phyloName == "Mixture")
2124 {
2125 auto pMA = make_unique<AlignedPhyloLikelihoodMixture>(context, std::move(mPhylo), vPhylo);
2126 vector<double> vprob = ApplicationTools::getVectorParameter<double>("probas", args, ',', "(" + VectorTools::paste(vector<double>(vPhylo.size(), 1. / (double)vPhylo.size())) + ")");
2127 if (vprob.size() != 1)
2128 {
2129 if (vprob.size() != vPhylo.size())
2130 throw BadSizeException("Wrong size of probas description in Mixture", vprob.size(), vPhylo.size());
2131 Simplex si(vprob);
2132 pMA->setPhyloProb(si);
2133 }
2134
2135 nPL = std::move(pMA);
2136 }
2137 else if (phyloName == "HMM")
2138 {
2139 auto pMA = make_unique<AlignedPhyloLikelihoodHmm>(context, std::move(mPhylo), vPhylo);
2140
2141 size_t nbP = pMA->getNumbersOfPhyloLikelihoods().size();
2142
2143 string vs = "(" + VectorTools::paste(vector<double>(nbP, 1. / static_cast<double>(nbP)), ",") + ")";
2144 string vvs = "(";
2145 for (size_t i = 0; i < nbP; ++i)
2146 {
2147 vvs += (i == 0 ? "" : ",") + vs;
2148 }
2149 vvs += ")";
2150
2151 RowMatrix<double> mat = ApplicationTools::getMatrixParameter<double>("probas", args, ',', vvs);
2152
2153 FullHmmTransitionMatrix fhtm(pMA->getHmmStateAlphabet(), pMA->getNamespace());
2155
2156 pMA->matchParametersValues(fhtm.getParameters());
2157
2158 nPL = std::move(pMA);
2159 }
2160 else if (phyloName == "AutoCorr")
2161 {
2162 auto pMA = make_unique<AlignedPhyloLikelihoodAutoCorrelation>(context, std::move(mPhylo), vPhylo);
2163
2164 size_t nbP = pMA->getNumbersOfPhyloLikelihoods().size();
2165
2166 string vs = "(" + VectorTools::paste(vector<double>(nbP, 0.95), ",") + ")";
2167
2168 vector<double> v = ApplicationTools::getVectorParameter<double>("lambdas", args, ',', vs);
2169
2170 ParameterList pl;
2171
2172 for (size_t i = 0; i < v.size(); ++i)
2173 {
2174 pl.addParameter(Parameter("AutoCorr.lambda" + TextTools::toString(i + 1), v[i]));
2175 }
2176
2177 pMA->matchParametersValues(pl);
2178
2179 nPL = std::move(pMA);
2180 }
2181 else if (phyloName == "Product")
2182 {
2183 auto pAP = make_unique<AlignedPhyloLikelihoodProduct>(context, std::move(mPhylo), vPhylo);
2184
2185 nPL = std::move(pAP);
2186 }
2187 else
2188 throw Exception("PhylogeneticsApplicationTools::getPhyloLikelihoodContainer : Unknown Phylo name " + phyloName);
2189
2190 if (verbhere)
2191 {
2192 ApplicationTools::displayResult(" Phylolikelihood type", phyloName);
2193 ApplicationTools::displayResult(" Phylo numbers", VectorTools::paste(vPhylo, ","));
2194 }
2195
2196 mPhylo->addPhyloLikelihood(phylonum, std::move(nPL));
2197 usedPhylo.push_back(phylonum);
2198 }
2199 }
2200
2201 // Now clean the map
2202 for (auto it = phylosMap.begin(); it != phylosMap.end();)
2203 {
2204 if (it->second.size() == 0)
2205 {
2206 phylosMap.erase(it++);
2207 continue;
2208 }
2209
2210 vector<size_t>& vphyl = it->second;
2211 for (size_t i = vphyl.size(); i > 0; i--)
2212 {
2213 vector<size_t>::iterator posp = find(usedPhylo.begin(), usedPhylo.end(), vphyl[i - 1]);
2214 if (posp != usedPhylo.end())
2215 vphyl.erase(vphyl.begin() + static_cast<ptrdiff_t>(i - 1));
2216 }
2217 ++it;
2218 }
2219 }
2220
2221
2222 if (mPhylo->getNumbersOfPhyloLikelihoods().size() == 0)
2223 {
2224 if (warn)
2225 ApplicationTools::displayMessage("PhylogeneticsApplicationTools::getPhyloLikelihoodContainer : No phyloLikelihoods described");
2226 return mPhylo;
2227 }
2228
2229 // get the result phylogeny => with number 0 in the
2230 // PhyloLikelihoodContainer
2231
2233 ApplicationTools::displayMessage("Result Phylolikelihood ");
2234
2235 string sumAll;
2236 const vector<size_t>& nPhyl = mPhylo->getNumbersOfPhyloLikelihoods();
2237
2238 for (size_t i = 0; i < nPhyl.size(); i++)
2239 {
2240 if (i != 0)
2241 sumAll += " + ";
2242
2243 sumAll += "phylo" + TextTools::toString(nPhyl[i]);
2244 }
2245
2246 string resultDesc = ApplicationTools::getStringParameter("result", params, sumAll);
2247
2248 // check if really formula, or previous phylo
2249
2250 std::shared_ptr<PhyloLikelihoodInterface> nPL;
2251 size_t nP(0);
2252 bool flag(resultDesc.substr(0, 5) == "phylo");
2253
2254 if (flag)
2255 {
2256 try
2257 {
2258 nP = TextTools::to<size_t>(resultDesc.substr(5));
2259 }
2260 catch (Exception& e)
2261 {
2262 flag = false;
2263 }
2264 }
2265
2266 if (!flag)
2267 {
2268 nPL = make_shared<PhyloLikelihoodFormula>(context, mPhylo, resultDesc);
2269 if (verbose)
2270 ApplicationTools::displayResult(" Result", dynamic_cast<PhyloLikelihoodFormula*>(nPL.get())->output());
2271 }
2272 else
2273 {
2274 if (!mPhylo->hasPhyloLikelihood(nP))
2275 throw BadIntegerException("Unknown Phylolikelihood number for result", (int)nP);
2276 else
2277 nPL = mPhylo->getPhyloLikelihood(nP);
2278 if (verbose)
2279 ApplicationTools::displayResult(" Result", resultDesc);
2280 }
2281
2282 mPhylo->addPhyloLikelihood(0, nPL);
2283
2284 return mPhylo;
2285}
2286
2287
2288/******************************************************/
2289/*** DISTRIBUTIONS ********************************/
2290/******************************************************/
2291
2292
2294 const string& distDescription,
2295 map<string, string>& unparsedParameterValues,
2296 bool verbose)
2297{
2298 string distName;
2300 map<string, string> args;
2301 KeyvalTools::parseProcedure(distDescription, distName, args);
2302
2303 if (distName == "Dirichlet")
2304 {
2305 if (args.find("classes") == args.end())
2306 throw Exception("Missing argument 'classes' (vector of number of classes) in " + distName
2307 + " distribution");
2308 if (args.find("alphas") == args.end())
2309 throw Exception("Missing argument 'alphas' (vector of Dirichlet shape parameters) in Dirichlet distribution");
2310 vector<double> alphas;
2311 vector<size_t> classes;
2312
2313 string rf = args["alphas"];
2314 StringTokenizer strtok(rf.substr(1, rf.length() - 2), ",");
2315 while (strtok.hasMoreToken())
2316 alphas.push_back(TextTools::toDouble(strtok.nextToken()));
2317
2318 rf = args["classes"];
2319 StringTokenizer strtok2(rf.substr(1, rf.length() - 2), ",");
2320 while (strtok2.hasMoreToken())
2321 classes.push_back(TextTools::to<size_t>(strtok2.nextToken()));
2322
2323 pMDD = new DirichletDiscreteDistribution(classes, alphas);
2324 vector<string> v = pMDD->getParameters().getParameterNames();
2325
2326 for (size_t i = 0; i < v.size(); i++)
2327 {
2328 unparsedParameterValues[v[i]] = TextTools::toString(pMDD->getParameterValue(pMDD->getParameterNameWithoutNamespace(v[i])));
2329 }
2330 }
2331 else
2332 throw Exception("Unknown multiple distribution name: " + distName);
2333
2334 return pMDD;
2335}
2336
2337/******************************************************************************/
2338
2339unique_ptr<DiscreteDistributionInterface> PhylogeneticsApplicationTools::getRateDistribution(
2340 const map<string, string>& params,
2341 const string& suffix,
2342 bool suffixIsOptional,
2343 bool verbose)
2344{
2345 string distDescription = ApplicationTools::getStringParameter("rate_distribution", params, "Constant()", suffix, suffixIsOptional);
2346
2347 string distName;
2348 map<string, string> args;
2349 KeyvalTools::parseProcedure(distDescription, distName, args);
2350
2352 unique_ptr<DiscreteDistributionInterface> rDist(bIO.readDiscreteDistribution(distDescription, true));
2353
2354 if (verbose)
2355 {
2356 ApplicationTools::displayResult("Rate distribution", distName);
2357 ApplicationTools::displayResult("Number of classes", TextTools::toString(rDist->getNumberOfCategories()));
2358 }
2359
2360 return rDist;
2361}
2362
2363
2364/*************************************************************/
2365/***** OPTIMIZATORS *****************************************/
2366/*************************************************************/
2367
2368std::shared_ptr<PhyloLikelihoodInterface> PhylogeneticsApplicationTools::optimizeParameters(
2369 std::shared_ptr<PhyloLikelihoodInterface> lik,
2370 const map<string, string>& params,
2371 const string& suffix,
2372 bool suffixIsOptional,
2373 bool verbose,
2374 int warn)
2375{
2376 OptimizationTools::OptimizationOptions optopt(lik, params, suffix, suffixIsOptional, verbose, warn);
2377
2378 if (optopt.optMethodModel == "None")
2379 return lik;
2380
2381
2382 unsigned int n = 0;
2383
2385 {
2386 if (verbose && optopt.nstep > 1)
2387 ApplicationTools::displayResult("# of precision steps", TextTools::toString(optopt.nstep));
2388
2389 optopt.parameters.matchParametersValues(lik->getParameters());
2391 }
2393 {
2394 // Uses Newton-raphson algorithm with numerical derivatives when required.
2395 optopt.parameters.matchParametersValues(lik->getParameters());
2396 if (dynamic_pointer_cast<SingleProcessPhyloLikelihood>(lik))
2398 dynamic_pointer_cast<SingleProcessPhyloLikelihood>(lik),
2399 optopt);
2400 else
2402 }
2403 else
2404 throw Exception("Unknown optimization method: " + optopt.optMethodModel);
2405
2406 string finalMethod = ApplicationTools::getStringParameter("optimization.final", params, "none", suffix, suffixIsOptional, warn + 1);
2407 unique_ptr<OptimizerInterface> finalOptimizer = nullptr;
2408 if (finalMethod == "none")
2409 {}
2410 else if (finalMethod == "simplex")
2411 {
2412 finalOptimizer = make_unique<DownhillSimplexMethod>(lik);
2413 }
2414 else if (finalMethod == "powell")
2415 {
2416 finalOptimizer = make_unique<PowellMultiDimensions>(lik);
2417 }
2418 else
2419 throw Exception("Unknown final optimization method: " + finalMethod);
2420
2421 if (finalOptimizer)
2422 {
2423 optopt.parameters.matchParametersValues(lik->getParameters());
2424 if (verbose)
2425 ApplicationTools::displayResult("Final optimization step", finalMethod);
2426 finalOptimizer->setProfiler(optopt.profiler);
2427 finalOptimizer->setMessageHandler(optopt.messenger);
2428 finalOptimizer->setMaximumNumberOfEvaluations(optopt.nbEvalMax);
2429 finalOptimizer->getStopCondition()->setTolerance(optopt.tolerance);
2430 finalOptimizer->setVerbose(verbose);
2431 finalOptimizer->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
2432 finalOptimizer->init(optopt.parameters);
2433 finalOptimizer->optimize();
2434 n += finalOptimizer->getNumberOfEvaluations();
2435 }
2436
2437 if (verbose)
2438 ApplicationTools::displayResult("Performed", TextTools::toString(n) + " function evaluations.");
2439 if (optopt.backupFile != "none")
2440 {
2441 string bf = optopt.backupFile + ".def";
2442 rename(optopt.backupFile.c_str(), bf.c_str());
2443 }
2444 return lik;
2445}
2446
2447/******************************************************************************/
2448
2450{
2451 for (size_t i = 0; i < pl.size(); ++i)
2452 {
2453 auto constraint = pl[i].getConstraint();
2454 if (constraint)
2455 {
2456 double value = pl[i].getValue();
2457 if (!constraint->isCorrect(value - 1e-6) || !constraint->isCorrect(value + 1e-6))
2458 {
2459 ApplicationTools::displayWarning("This parameter has a value close to the boundary: " + pl[i].getName() + "(" + TextTools::toString(value) + ").");
2460 }
2461 }
2462 }
2463}
2464
2465
2466/******************************************************************************/
2467/**************** Output ************************************/
2468/******************************************************************************/
2469
2471 const TreeTemplate<Node>& tree,
2472 const map<string, string>& params,
2473 const string& prefix,
2474 const string& suffix,
2475 bool suffixIsOptional,
2476 bool verbose,
2477 bool checkOnly,
2478 int warn)
2479{
2480 string format = ApplicationTools::getStringParameter(prefix + "tree.format", params, "Newick", suffix, suffixIsOptional, warn);
2481 string file = ApplicationTools::getAFilePath(prefix + "tree.file", params, true, false, suffix, suffixIsOptional, "none", warn);
2482 OTree* treeWriter;
2483 if (format == "Newick")
2484 treeWriter = new Newick();
2485 else if (format == "Nexus")
2486 treeWriter = new NexusIOTree();
2487 else if (format == "NHX")
2488 treeWriter = new Nhx(false);
2489 else
2490 throw Exception("Unknown format for tree writing: " + format);
2491 if (!checkOnly)
2492 treeWriter->writeTree(tree, file, true);
2493 delete treeWriter;
2494 if (verbose)
2495 ApplicationTools::displayResult("Wrote tree to file ", file);
2496}
2497
2498/******************************************************************************/
2499
2501 const vector<const PhyloTree*>& trees,
2502 const map<string, string>& params,
2503 const string& prefix,
2504 const string& suffix,
2505 bool suffixIsOptional,
2506 bool verbose,
2507 bool checkOnly,
2508 int warn)
2509{
2510 string format = ApplicationTools::getStringParameter(prefix + "tree.format", params, "Newick", suffix, suffixIsOptional, warn);
2511 string file = ApplicationTools::getAFilePath(prefix + "tree.file", params, true, false, suffix, suffixIsOptional, "none", warn);
2512 OMultiPhyloTree* treeWriter;
2513 if (format == "Newick")
2514 treeWriter = new Newick();
2515 else if (format == "Nexus")
2516 treeWriter = new NexusIOTree();
2517 else if (format == "NHX")
2518 treeWriter = new Nhx();
2519 else
2520 throw Exception("Unknown format for tree writing: " + format);
2521
2522 if (!checkOnly)
2523 {
2524 treeWriter->writePhyloTrees(trees, file, true);
2525
2526 if (verbose)
2527 ApplicationTools::displayResult("Wrote trees to file ", file);
2528 }
2529
2530 delete treeWriter;
2531}
2532
2533/******************************************************************************/
2534
2537 const map<string, string>& params,
2538 const string& prefix,
2539 const string& suffix,
2540 bool suffixIsOptional,
2541 bool verbose,
2542 bool checkOnly,
2543 bool withIds,
2544 int warn)
2545{
2546 string format = ApplicationTools::getStringParameter(prefix + "tree.format", params, "Newick", suffix, suffixIsOptional, warn + 1);
2547 string file = ApplicationTools::getAFilePath(prefix + "tree.file", params, true, false, suffix, suffixIsOptional);
2548
2549 OPhyloTree* treeWriter;
2550 if (format == "Newick")
2551 treeWriter = new Newick();
2552 else if (format == "Nexus")
2553 treeWriter = new NexusIOTree();
2554 else if (format == "NHX")
2555 treeWriter = new Nhx();
2556 else
2557 throw Exception("Unknown format for tree writing: " + format);
2558
2559 if (!checkOnly)
2560 {
2561 string outtrees = "";
2562 vector<size_t> vTN = spc.getTreeNumbers();
2563
2564 for (size_t i = 0; i < vTN.size(); i++)
2565 {
2566 auto tree = spc.getTree(vTN[i]);
2567
2568 std::vector<shared_ptr<PhyloNode>> nodes = tree->getAllNodes();
2569
2570 for (auto& node : nodes)
2571 {
2572 if (tree->isLeaf(node) && withIds)
2573 node->setName(TextTools::toString(tree->getNodeIndex(node)) + "_" + node->getName());
2574 else
2575 node->setProperty("NodeId", BppString(TextTools::toString(tree->getNodeIndex(node))));
2576 }
2577
2578 Newick* nt = dynamic_cast<Newick*>(treeWriter);
2579 if (nt)
2580 nt->enableExtendedBootstrapProperty("NodeId");
2581
2582 treeWriter->writePhyloTree(*tree, file + "_" + TextTools::toString(vTN[i]), true);
2583 outtrees += (i == 0 ? "" : " ") + file + "_" + TextTools::toString(vTN[i]);
2584 }
2585 if (verbose)
2586 ApplicationTools::displayResult("Wrote trees to files : ", outtrees);
2587 }
2588
2589 delete treeWriter;
2590}
2591
2593{
2594 out << "model=";
2595 map<string, string> globalAliases;
2596 vector<string> writtenNames;
2597 BppOBranchModelFormat bIO(BppOSubstitutionModelFormat::ALL, true, true, true, false, warn);
2598 bIO.write(model, out, globalAliases, writtenNames);
2599 out.endLine();
2600}
2601
2603 const SubstitutionProcessInterface& process,
2604 OutputStream& out,
2605 int warn)
2606{
2607 try
2608 {
2609 auto& sp = dynamic_cast<const SimpleSubstitutionProcess&>(process);
2610 (out << "nonhomogeneous=no").endLine();
2611
2612 out << "model=";
2613 map<string, string> globalAliases;
2614 vector<string> writtenNames;
2615 BppOBranchModelFormat bIO(BppOSubstitutionModelFormat::ALL, true, true, true, false, warn);
2616 bIO.write(sp.model(0, 0), out, globalAliases, writtenNames);
2617 out.endLine();
2618 return;
2619 }
2620 catch (bad_cast& e)
2621 {}
2622
2623 try
2624 {
2625 const RateAcrossSitesSubstitutionProcess& pRA = dynamic_cast<const RateAcrossSitesSubstitutionProcess&>(process);
2626
2627 (out << "nonhomogeneous=no").endLine();
2628
2629 out << "model=";
2630 map<string, string> globalAliases;
2631 vector<string> writtenNames;
2632 const BppOBranchModelFormat bIO(BppOSubstitutionModelFormat::ALL, true, true, true, false, warn);
2633 bIO.write(process.model(0, 0), out, globalAliases, writtenNames);
2634 out.endLine();
2635 out.endLine();
2636
2637 // Rate distribution
2638
2639 out << "rate_distribution=";
2640 const BppORateDistributionFormat bIOR(true);
2641 bIOR.writeDiscreteDistribution(*pRA.getRateDistribution(), out, globalAliases, writtenNames);
2642 out.endLine();
2643 return;
2644 }
2645 catch (bad_cast& e)
2646 {}
2647
2648 try
2649 {
2650 const NonHomogeneousSubstitutionProcess& pNH = dynamic_cast<const NonHomogeneousSubstitutionProcess&>(process);
2651
2652 (out << "nonhomogeneous=general").endLine();
2653 (out << "nonhomogeneous.number_of_models=" << pNH.getNumberOfModels()).endLine();
2654
2655 vector<string> writtenNames;
2656
2657 // Loop over all models:
2658 for (size_t i = 0; i < pNH.getNumberOfModels(); ++i)
2659 {
2660 const auto model = pNH.getModel(i);
2661
2662 // First get the aliases for this model:
2663 map<string, string> aliases;
2664
2665 ParameterList pl = model->getParameters();
2666
2667 for (size_t np = 0; np < pl.size(); ++np)
2668 {
2669 string nfrom = pNH.getFrom(pl[np].getName() + "_" + TextTools::toString(i + 1));
2670 if (nfrom != "")
2671 aliases[pl[np].getName()] = nfrom;
2672 }
2673
2674 // Now print it:
2675 writtenNames.clear();
2676 out.endLine() << "model" << (i + 1) << "=";
2677 BppOBranchModelFormat bIOsm(BppOSubstitutionModelFormat::ALL, true, true, true, false, warn);
2678 bIOsm.write(*model, out, aliases, writtenNames);
2679 out.endLine();
2680 vector<unsigned int> ids = pNH.getNodesWithModel(i);
2681 out << "model" << (i + 1) << ".nodes_id=" << ids[0];
2682 for (size_t j = 1; j < ids.size(); ++j)
2683 {
2684 out << "," << ids[j];
2685 }
2686 out.endLine();
2687 }
2688
2689 // Root frequencies:
2690 out.endLine();
2691 if (pNH.getRootFrequencySet())
2692 {
2693 out << "nonhomogeneous.root_freq=";
2694
2695 map<string, string> aliases;
2696
2698
2699 for (size_t np = 0; np < pl.size(); ++np)
2700 {
2701 string nfrom = pNH.getFrom(pl[np].getName());
2702 if (nfrom != "")
2703 aliases[pl[np].getName()] = nfrom;
2704 }
2705
2707 bIO.writeFrequencySet(pNH.rootFrequencySet(), out, aliases, writtenNames);
2708 }
2709 else
2710 out << "nonhomogeneous.stationarity=true";
2711 out.endLine();
2712
2713 // Rate distribution
2714
2715 map<string, string> aliases;
2716 auto& pdd = pNH.rateDistribution();
2717
2718 ParameterList pl = pdd.getParameters();
2719 for (size_t np = 0; np < pl.size(); ++np)
2720 {
2721 string nfrom = pNH.getFrom(pl[np].getName());
2722 if (nfrom != "")
2723 aliases[pl[np].getName()] = nfrom;
2724 }
2725 out.endLine();
2726 out << "rate_distribution=";
2727 const BppORateDistributionFormat bIO(true);
2728 bIO.writeDiscreteDistribution(pdd, out, aliases, writtenNames);
2729 out.endLine();
2730 return;
2731 }
2732 catch (bad_cast& e)
2733 {}
2734
2735 return;
2736}
2737
2739{
2740 vector<string> writtenNames;
2741
2742 // The models
2743 vector<size_t> vModN = collection.getModelNumbers();
2744
2745 for (auto modn : vModN)
2746 {
2747 const auto& model = *collection.getModel(modn);
2748
2749 // First get the aliases for this model:
2750 map<string, string> aliases;
2751
2752 if (withAlias)
2753 {
2754 ParameterList pl = model.getParameters();
2755
2756 for (size_t np = 0; np < pl.size(); ++np)
2757 {
2758 string nfrom = collection.getFrom(pl[np].getName() + "_" + TextTools::toString(modn));
2759 if (nfrom != "")
2760 aliases[pl[np].getName()] = nfrom;
2761 }
2762 }
2763
2764 // Now print it:
2765 writtenNames.clear();
2766 out << "model" << modn << "=";
2767 BppOBranchModelFormat bIOsm(BppOSubstitutionModelFormat::ALL, true, true, true, false, warn);
2768 bIOsm.write(model, out, aliases, writtenNames);
2769 out.endLine();
2770 out.endLine();
2771 }
2772
2773 // Root frequencies:
2774 vector<size_t> rootFreqN = collection.getFrequenciesNumbers();
2775
2776 for (size_t i = 0; i < rootFreqN.size(); ++i)
2777 {
2778 auto rootFreq = collection.getFrequencySet(rootFreqN[i]);
2779
2780 // Now print it:
2781 writtenNames.clear();
2782 out.endLine() << "root_freq" << rootFreqN[i] << "=";
2784
2785 map<string, string> aliases;
2786
2787 if (withAlias)
2788 {
2789 ParameterList pl = rootFreq->getParameters();
2790
2791 for (size_t np = 0; np < pl.size(); ++np)
2792 {
2793 string nfrom = collection.getFrom(pl[np].getName() + "_" + TextTools::toString(rootFreqN[i]));
2794 if (nfrom != "")
2795 aliases[pl[np].getName()] = nfrom;
2796 }
2797 }
2798
2799 bIOf.writeFrequencySet(*rootFreq, out, aliases, writtenNames);
2800 out.endLine();
2801 }
2802
2803 // Rate distribution
2804
2805 vector<size_t> vDistN = collection.getRateDistributionNumbers();
2806
2807 for (auto distn : vDistN)
2808 {
2809 if (distn < 10000)
2810 {
2811 auto dist = collection.getRateDistribution(distn);
2812
2813 // First get the aliases for this model:
2814 map<string, string> aliases;
2815
2816 if (withAlias)
2817 {
2818 ParameterList pl = dist->getParameters();
2819
2820 for (size_t np = 0; np < pl.size(); ++np)
2821 {
2822 string nfrom = collection.getFrom(pl[np].getName() + "_" + TextTools::toString(distn));
2823 if (nfrom != "")
2824 aliases[pl[np].getName()] = nfrom;
2825 }
2826 }
2827
2828 // Now print it:
2829 writtenNames.clear();
2830 out.endLine() << "rate_distribution" << distn << "=";
2831 BppORateDistributionFormat bIOd(true);
2832 bIOd.writeDiscreteDistribution(*dist, out, aliases, writtenNames);
2833 out.endLine();
2834 }
2835 }
2836
2837 // scenarios
2838
2839 vector<size_t> vSce = collection.getScenarioNumbers();
2840
2841 if (vSce.size() > 0)
2842 out.endLine();
2843
2844 vector<const ModelPath*> vMP;
2845
2846 // first output the scenarios
2847 for (const auto& scennum : vSce)
2848 {
2849 const auto scen = collection.getModelScenario(scennum);
2850
2851 out.endLine();
2852
2853 out << "scenario" << scennum << "=";
2854
2855 size_t nbMP = scen->getNumberOfModelPaths();
2856
2857 for (size_t mpn = 0; mpn < nbMP; mpn++)
2858 {
2859 const auto& mp = scen->getModelPath(mpn);
2860
2861 auto itmp = find(vMP.begin(), vMP.end(), mp.get());
2862 auto inmp = std::distance(vMP.begin(), itmp);
2863 if (itmp == vMP.end())
2864 vMP.push_back(mp.get());
2865
2866 if (mpn != 0)
2867 out << "|";
2868 out << "path" << TextTools::toString(inmp + 1);
2869 }
2870 out.endLine();
2871 }
2872
2873 // then the model paths
2874 for (size_t inmp = 0; inmp < vMP.size(); inmp++)
2875 {
2876 out.endLine();
2877 out << "path" << inmp + 1 << "=";
2878
2879 const ModelPath& mp = *vMP[inmp];
2880
2881 auto vMod = mp.getModels();
2882
2883 bool dem = true;
2884 for (const auto& mod:vMod)
2885 {
2886 // look for model number in collection
2887 size_t modN = collection.getModelIndex(mod);
2888
2889 if (!dem)
2890 out << "&";
2891
2892 out << "model" << modN;
2893 out << "[" << mp.getPathNode(mod).to_string() << "]";
2894 dem = false;
2895 }
2896 out.endLine();
2897 }
2898
2899 // processes
2900 out.endLine();
2901
2902 vector<size_t> vprocN = collection.getSubstitutionProcessNumbers();
2903
2904 for (size_t i = 0; i < vprocN.size(); ++i)
2905 {
2906 const auto& spcm = collection.substitutionProcess(vprocN[i]);
2907
2908 out << "process" << vprocN[i] << "=";
2909
2910 if (spcm.getNumberOfModels() == 1)
2911 out << "Homogeneous(model=" << spcm.getModelNumbers()[0];
2912 else
2913 {
2914 out << "Nonhomogeneous(";
2915 vector<size_t> vMN = spcm.getModelNumbers();
2916 for (size_t j = 0; j < vMN.size(); ++j)
2917 {
2918 if (j != 0)
2919 out << ",";
2920
2921 out << "model" << (j + 1) << "=" << vMN[j];
2922 out << ",";
2923
2924 vector<unsigned int> ids = spcm.getNodesWithModel(vMN[j]);
2925 out << "model" << (j + 1) << ".nodes_id=(" << ids[0];
2926 for (size_t k = 1; k < ids.size(); ++k)
2927 {
2928 out << "," << ids[k];
2929 }
2930 out << ")";
2931 }
2932 }
2933
2934 out << ", tree=" << spcm.getTreeNumber();
2935
2936 out << ", rate=";
2937 size_t dN = spcm.getRateDistributionNumber();
2938
2939 if (dN < 10000)
2940 out << dN;
2941 else
2942 out << size_t(dN / 10000 - 1) << "." << dN % 10000;
2943
2944 if (spcm.getRootFrequencySet())
2945 out << ", root_freq=" << spcm.getRootFrequenciesNumber();
2946
2947 if (spcm.getModelScenario())
2948 out << ", scenario=" << spcm.getModelScenarioNumber();
2949
2950 out << ")";
2951 out.endLine();
2952 out.endLine();
2953 }
2954}
2955
2956
2958{
2959 out << "# Log likelihood = ";
2960
2961 std::shared_ptr<const PhyloLikelihoodInterface> result = phylocont[0];
2962
2963 if (!result)
2964 {
2965 out << "Nan";
2966 out.endLine();
2967 return;
2968 }
2969
2970 out.setPrecision(20) << (-result->getValue());
2971 out.endLine();
2972 out.endLine();
2973
2974
2975 // First output result
2976 out << "result=";
2977
2978 auto pop = dynamic_pointer_cast<const PhyloLikelihoodFormula>(result);
2979
2980 vector<size_t> phyldep;
2981
2982 if (!pop)
2983 {
2984 out << "phylo1";
2985 phyldep.push_back(1);
2986 }
2987 else
2988 {
2989 string popout = pop->output();
2990
2991 out << popout;
2992
2993 StringTokenizer st(popout, "phylo", true, true);
2994 st.nextToken();
2995
2996
2997 while (st.hasMoreToken())
2998 {
2999 string ex = st.nextToken();
3000 phyldep.push_back((size_t)(atoi(ex.c_str())));
3001 }
3002 }
3003
3004 out.endLine();
3005 out.endLine();
3006
3007 // Then the other phylolikelihoods
3008
3009 while (phyldep.size() != 0)
3010 {
3011 size_t num = phyldep[0];
3012 std::shared_ptr<const PhyloLikelihoodInterface> phyloLike = phylocont[num];
3013
3014 // remove phylolikelihoods with this number
3015 auto itf = find(phyldep.begin(), phyldep.end(), num);
3016 while (itf != phyldep.end())
3017 {
3018 phyldep.erase(itf);
3019 itf = find(itf, phyldep.end(), num);
3020 }
3021
3022
3023 // then output
3024
3025 if (dynamic_pointer_cast<const SingleDataPhyloLikelihoodInterface>(phyloLike))
3026 printParameters(*dynamic_pointer_cast<const SingleDataPhyloLikelihoodInterface>(phyloLike), out, num, warn);
3027 else
3028 {
3029 out << "phylo" << num << "=";
3030
3031 auto mDP = dynamic_pointer_cast<const PhyloLikelihoodSetInterface>(phyloLike);
3032 if (mDP)
3033 {
3034 if (dynamic_pointer_cast<const AlignedPhyloLikelihoodMixture>(phyloLike))
3035 {
3036 auto pM = dynamic_pointer_cast<const AlignedPhyloLikelihoodMixture>(phyloLike);
3037
3038 out << "Mixture(probas=(" << VectorTools::paste(pM->getPhyloProbabilities(), ",");
3039
3040 out << "),";
3041 }
3042
3043 else if (dynamic_pointer_cast<const AlignedPhyloLikelihoodHmm>(phyloLike))
3044 {
3045 auto pM = dynamic_pointer_cast<const AlignedPhyloLikelihoodHmm>(phyloLike);
3046 out << "HMM(probas=";
3047
3049 copyEigenToBpp(pM->getHmmTransitionMatrix(), tMt);
3050 MatrixTools::print(tMt, out);
3051
3052 out << ",";
3053 }
3054
3055 else if (dynamic_pointer_cast<const AlignedPhyloLikelihoodAutoCorrelation>(phyloLike))
3056 {
3057 auto pM = dynamic_pointer_cast<const AlignedPhyloLikelihoodAutoCorrelation>(phyloLike);
3058
3059 out << "AutoCorr(lambdas=(";
3060
3061 Vdouble vP;
3062 for (unsigned int i = 0; i < pM->getHmmTransitionMatrix().cols(); ++i)
3063 {
3064 vP.push_back(pM->getHmmTransitionMatrix()(i, i));
3065 }
3066
3067 out << VectorTools::paste(vP, ",");
3068
3069 out << "),";
3070 }
3071 else if (dynamic_pointer_cast<const AlignedPhyloLikelihoodProduct>(phyloLike))
3072 {
3073 out << "Product(";
3074 }
3075 else
3076 throw Exception("PhylogeneticsApplicationTools::printParameters - unknown phylolikelihood type : phylo " + TextTools::toString(num));
3077
3078
3079 vector<size_t> vPN = mDP->getNumbersOfPhyloLikelihoods();
3080
3081 for (size_t i = 0; i < vPN.size(); i++)
3082 {
3083 out << "phylo" << i + 1 << "=" << vPN[i];
3084 if (i != vPN.size() - 1)
3085 out << ",";
3086 }
3087
3088 out << ")";
3089
3090 // update phyldep
3091 for (size_t i = 0; i < vPN.size(); i++)
3092 {
3093 if (find(phyldep.begin(), phyldep.end(), vPN[i]) == phyldep.end())
3094 phyldep.push_back(vPN[i]);
3095 }
3096 }
3097 out.endLine();
3098 }
3099 out.endLine();
3100 }
3101 out.endLine();
3102}
3103
3104
3106{
3107 out << "phylo" << TextTools::toString(nPhylo) << "=";
3108
3109 out << "Single(";
3110
3111 try
3112 {
3113 auto& pMP = dynamic_cast<const SequencePhyloLikelihoodInterface&>(phyloLike);
3114 out << "process=" << pMP.getSequenceEvolutionNumber();
3115 goto finish;
3116 }
3117 catch (bad_cast& e)
3118 {}
3119
3120 try
3121 {
3122 auto& pS = dynamic_cast<const SingleProcessPhyloLikelihood&>(phyloLike);
3123 out << "process=" << pS.getSubstitutionProcessNumber();
3124 }
3125 catch (bad_cast& e)
3126 {}
3127
3128finish:
3129 out << ", data=" << TextTools::toString(phyloLike.getNData()) << ")";
3130 out.endLine();
3131}
3132
3134{
3135 out << "process" << TextTools::toString(nEvol) << "=";
3136
3137 if (dynamic_cast<const OneProcessSequenceEvolution*>(&evol))
3138 {
3139 const OneProcessSequenceEvolution* pOP = dynamic_cast<const OneProcessSequenceEvolution*>(&evol);
3140
3141 out << "Simple(process=" << pOP->getSubstitutionProcessNumber() << ")";
3142 }
3143 else if (dynamic_cast<const MultiProcessSequenceEvolution*>(&evol))
3144 {
3145 const MultiProcessSequenceEvolution* pMP = dynamic_cast<const MultiProcessSequenceEvolution*>(&evol);
3146
3147 if (dynamic_cast<const MixtureSequenceEvolution*>(&evol))
3148 {
3149 const MixtureSequenceEvolution* pM = dynamic_cast<const MixtureSequenceEvolution*>(&evol);
3150
3151 out << "Mixture(probas=(" << VectorTools::paste(pM->getSubProcessProbabilities(), ",");
3152 out << "),";
3153 }
3154
3155 else if (dynamic_cast<const HmmSequenceEvolution*>(&evol))
3156 {
3157 const HmmSequenceEvolution* pM = dynamic_cast<const HmmSequenceEvolution*>(&evol);
3158 out << "HMM(probas=";
3159
3160 const Matrix<double>& tMt = pM->hmmTransitionMatrix().getPij();
3161 MatrixTools::print(tMt, out);
3162
3163 out << ",";
3164 }
3165 else if (dynamic_cast<const AutoCorrelationSequenceEvolution*>(&evol))
3166 {
3167 const AutoCorrelationSequenceEvolution* pM = dynamic_cast<const AutoCorrelationSequenceEvolution*>(&evol);
3168
3169 out << "AutoCorr(lambdas=(";
3170
3171 Vdouble vP;
3172 for (unsigned int i = 0; i < pM->getNumberOfSubstitutionProcess(); i++)
3173 {
3174 vP.push_back(pM->hmmTransitionMatrix().Pij(i, i));
3175 }
3176
3177 out << VectorTools::paste(vP, ",");
3178
3179 out << "),";
3180 }
3181 else if (dynamic_cast<const PartitionSequenceEvolution*>(&evol))
3182 {
3183 const PartitionSequenceEvolution* pM = dynamic_cast<const PartitionSequenceEvolution*>(&evol);
3184
3185 out << "Partition(";
3186
3187 const map<size_t, vector<size_t>>& mProcPos = pM->mapOfProcessSites();
3188
3189 vector<size_t> vP = pMP->getSubstitutionProcessNumbers();
3190
3191 for (unsigned int i = 0; i < vP.size(); i++)
3192 {
3193 out << "process" << TextTools::toString(i + 1) << ".sites=";
3194
3195 vector<size_t> v = mProcPos.find(vP[i])->second + 1;
3196
3197 if (v.size() > 1)
3198 out << "(";
3199
3200 VectorTools::printRange(v, out, ",", ":");
3201
3202 if (v.size() > 1)
3203 out << ")";
3204
3205 out << ",";
3206 }
3207 }
3208
3209 vector<size_t> vPN = pMP->getSubstitutionProcessNumbers();
3210
3211 for (size_t i = 0; i < vPN.size(); i++)
3212 {
3213 out << "process" << i + 1 << "=" << vPN[i];
3214 if (i != vPN.size() - 1)
3215 out << ",";
3216 }
3217
3218 out << ")";
3219 }
3220
3221 out.endLine();
3222}
3223
3224
3225// ///////////////////////////////////////////////////////
3226// Analysis Information
3227// //////////////////////////////////////////////////////
3228
3229
3230void PhylogeneticsApplicationTools::printAnalysisInformation(const PhyloLikelihoodContainer& phylocont, const string& infosFile, int warn)
3231{
3232 std::shared_ptr<const PhyloLikelihoodInterface> result = phylocont[0];
3233
3234 if (!result)
3235 return;
3236
3237 vector<size_t> phyldep = phylocont.getNumbersOfPhyloLikelihoods();
3238
3239 while (phyldep.size() != 0)
3240 {
3241 size_t num = phyldep[0];
3242
3243 phyldep.erase(phyldep.begin());
3244
3245 std::shared_ptr<const PhyloLikelihoodInterface> phyloLike = phylocont[num];
3246 // output
3247
3248 string info_out = infosFile + "_" + TextTools::toString(num);
3249
3250 if (dynamic_pointer_cast<const SingleDataPhyloLikelihoodInterface>(phyloLike) && num != 0)
3251 printAnalysisInformation(*dynamic_pointer_cast<const SingleDataPhyloLikelihoodInterface>(phyloLike), info_out, warn);
3252 else
3253 {
3254 auto sOAP = dynamic_pointer_cast<const AlignedPhyloLikelihoodSetInterface>(phyloLike);
3255 if (sOAP)
3256 {
3257 if (num != 0)
3258 printAnalysisInformation(*sOAP, info_out, warn);
3259
3260 vector<size_t> vPN = sOAP->getNumbersOfPhyloLikelihoods();
3261
3262 // update phyldep
3263 phyldep.assign(vPN.begin(), vPN.end());
3264 phyldep = VectorTools::unique(phyldep);
3265 }
3266 else
3267 {
3268 auto sOAB = dynamic_pointer_cast<const PhyloLikelihoodSetInterface>(phyloLike);
3269 if (sOAB)
3270 {
3271 vector<size_t> vPN = sOAB->getNumbersOfPhyloLikelihoods();
3272
3273 // update phyldep
3274 phyldep.assign(vPN.begin(), vPN.end());
3275 phyldep = VectorTools::unique(phyldep);
3276 }
3277 }
3278 }
3279 }
3280}
3281
3282
3284{
3285 const AlignedPhyloLikelihoodMixture* mOAP = nullptr;
3286 const AlignedPhyloLikelihoodHmm* hOAP = nullptr;
3287 const AlignedPhyloLikelihoodAutoCorrelation* aCOAP = nullptr;
3288
3289 vector<size_t> phyloNum = sOAP.getNumbersOfPhyloLikelihoods();
3290 size_t nbP = phyloNum.size();
3291
3292 if (dynamic_cast<const AlignedPhyloLikelihoodProduct*>(&sOAP) == nullptr)
3293 {
3294 StlOutputStream out(make_unique<ofstream>(infosFile.c_str(), ios::out));
3295
3296 if (dynamic_cast<const AlignedPhyloLikelihoodMixture*>(&sOAP) != nullptr)
3297 mOAP = dynamic_cast<const AlignedPhyloLikelihoodMixture*>(&sOAP);
3298 else if (dynamic_cast<const AlignedPhyloLikelihoodHmm*>(&sOAP) != nullptr)
3299 hOAP = dynamic_cast<const AlignedPhyloLikelihoodHmm*>(&sOAP);
3300 else if (dynamic_cast<const AlignedPhyloLikelihoodAutoCorrelation*>(&sOAP) != nullptr)
3301 aCOAP = dynamic_cast<const AlignedPhyloLikelihoodAutoCorrelation*>(&sOAP);
3302
3303 vector<string> colNames;
3304 colNames.push_back("Sites");
3305 colNames.push_back("lnL");
3306
3307 for (size_t i = 0; i < nbP; i++)
3308 {
3309 colNames.push_back("lnL_phylo_" + TextTools::toString(phyloNum[i]));
3310 }
3311 for (size_t i = 0; i < nbP; i++)
3312 {
3313 colNames.push_back("prob_phylo_" + TextTools::toString(phyloNum[i]));
3314 }
3315
3316
3317 vector<string> row(2 + (nbP > 1 ? 2 * nbP : 0));
3318 DataTable* infos = new DataTable(colNames);
3319
3320 Vdouble vap(0);
3321 Vdouble vlog(nbP);
3322
3323 if (mOAP)
3324 vap = mOAP->getPhyloProbabilities();
3325
3326 size_t nSites = sOAP.getNumberOfSites();
3327
3328 for (size_t i = 0; i < nSites; i++)
3329 {
3330 row[0] = (string("[" + TextTools::toString(i) + "]"));
3332
3333 if (nbP > 1)
3334 {
3335 for (size_t j = 0; j < nbP; j++)
3336 {
3337 vlog[j] = sOAP.getLogLikelihoodForASiteForAPhyloLikelihood(i, phyloNum[j]);
3338 }
3339
3340 for (size_t j = 0; j < nbP; j++)
3341 {
3342 row[2 + j] = TextTools::toString(vlog[j]);
3343 }
3344
3345 if (mOAP)
3346 {
3347 double sum = VectorTools::sumExp(vlog, vap);
3348 for (size_t j = 0; j < nbP; j++)
3349 {
3350 row[2 + nbP + j] = TextTools::toString(exp(vlog[j]) * vap[j] / sum);
3351 }
3352 }
3353 else
3354 {
3355 if (hOAP)
3357 else if (aCOAP)
3359
3360 for (size_t j = 0; j < vap.size(); j++)
3361 {
3362 row[2 + nbP + j] = TextTools::toString(vap[j]);
3363 }
3364 }
3365 }
3366
3367 infos->addRow(row);
3368 }
3369
3370 DataTable::write(*infos, out, "\t");
3371 delete infos;
3372 }
3373}
3374
3375/******************************************************************************/
3376
3378 const SingleDataPhyloLikelihoodInterface& phyloLike,
3379 const string& infosFile,
3380 int warn)
3381{
3382 try
3383 {
3384 auto& pSPL = dynamic_cast<const SingleProcessPhyloLikelihood&>(phyloLike);
3385
3386 StlOutputStream out(make_unique<ofstream>(infosFile.c_str(), ios::out));
3387
3388 std::shared_ptr<const SubstitutionProcessInterface> pSP = pSPL.getSubstitutionProcess();
3389
3390 vector<string> colNames;
3391 colNames.push_back("Sites");
3392 colNames.push_back("is.complete");
3393 colNames.push_back("is.constant");
3394 colNames.push_back("lnL");
3395
3396 auto pDD = pSP->getRateDistribution();
3397 size_t nbR = 0;
3398
3399 if (pDD)
3400 {
3401 nbR = pDD->getNumberOfCategories();
3402
3403 if (nbR > 1)
3404 for (size_t i = 0; i < nbR; ++i)
3405 {
3406 colNames.push_back("Pr_rate=" + TextTools::toString(pDD->getCategory(i)));
3407 }
3408 }
3409 colNames.push_back("rc");
3410 colNames.push_back("pr");
3411
3412 std::shared_ptr<const AlignmentDataInterface> sites = phyloLike.getData();
3413
3414 vector<string> row(6 + (nbR > 1 ? nbR : 0));
3415 auto infos = make_unique<DataTable>(colNames);
3416
3417 VVdouble vvPP(pSPL.getPosteriorProbabilitiesPerSitePerClass());
3418
3419 for (size_t i = 0; i < sites->getNumberOfSites(); ++i)
3420 {
3421 double lnL = phyloLike.getLogLikelihoodForASite(i);
3422
3423 const CoreSiteInterface& currentSite = sites->site(i);
3424 int currentSiteCoordinate = currentSite.getCoordinate();
3425 string isCompl = "NA";
3426 string isConst = "NA";
3427 try
3428 {
3429 isCompl = (SymbolListTools::isComplete(currentSite) ? "1" : "0");
3430 }
3431 catch (EmptySiteException& ex)
3432 {}
3433 try
3434 {
3435 isConst = (SymbolListTools::isConstant(currentSite) ? "1" : "0");
3436 }
3437 catch (EmptySiteException& ex)
3438 {}
3439 row[0] = (string("[" + TextTools::toString(currentSiteCoordinate) + "]"));
3440 row[1] = isCompl;
3441 row[2] = isConst;
3442 row[3] = TextTools::toString(lnL);
3443
3444 if (nbR > 1)
3445 {
3446 double pr = 0;
3447 for (size_t j = 0; j < nbR; ++j)
3448 {
3449 row[4 + j] = TextTools::toString(vvPP[i][j]);
3450 pr += vvPP[i][j] * pDD->getCategory(j);
3451 }
3452
3453 row[4 + nbR] = TextTools::toString(VectorTools::whichMax(vvPP[i]) + 1);
3454 row[5 + nbR] = TextTools::toString(pr);
3455 }
3456 else
3457 {
3458 row[4] = "1";
3459 row[5] = "1";
3460 }
3461
3462 infos->addRow(row);
3463 }
3464
3465 DataTable::write(*infos, out, "\t");
3466 return;
3467 }
3468 catch (bad_cast& e)
3469 {}
3470
3471 try
3472 {
3473 auto& pPPL = dynamic_cast<const PartitionProcessPhyloLikelihood&>(phyloLike);
3474
3475 auto& pSE = dynamic_cast<const PartitionSequenceEvolution&>(pPPL.sequenceEvolution());
3476
3477 const map<size_t, vector<size_t>>& mProcPos = pSE.mapOfProcessSites();
3478
3479 vector<size_t> nbProc = pSE.getSubstitutionProcessNumbers();
3480
3481 map<size_t, size_t> mNbr;
3482
3483 for (auto nP : nbProc)
3484 {
3485 auto& sp = pSE.substitutionProcess(nP);
3486 auto pDD = sp.getRateDistribution();
3487 mNbr[nP] = (pDD ? pDD->getNumberOfCategories() : 1);
3488 }
3489
3490 size_t maxR = max_element(mNbr.begin(), mNbr.end(), [](const std::pair<size_t, size_t>& p1, const std::pair<size_t, size_t>& p2){
3491 return p1.second < p2.second;
3492 })->second;
3493
3494 StlOutputStream out(make_unique<ofstream>(infosFile.c_str(), ios::out));
3495
3496 vector<string> colNames;
3497 colNames.push_back("Sites");
3498 colNames.push_back("is.complete");
3499 colNames.push_back("is.constant");
3500 colNames.push_back("lnL");
3501
3502 if (maxR > 1)
3503 for (size_t i = 0; i < maxR; i++)
3504 {
3505 colNames.push_back("prob" + TextTools::toString(i + 1));
3506 }
3507
3508 size_t nbSites = pSE.getNumberOfSites();
3509
3510 vector<string> row(4 + (maxR > 1 ? maxR : 0));
3511 auto infos = make_unique<DataTable>(nbSites, colNames);
3512 for (auto nP : nbProc)
3513 {
3514 auto pSPPL = dynamic_pointer_cast<const SingleProcessPhyloLikelihood>(pPPL.getPhyloLikelihood(nP));
3515
3516 if (!pSPPL)
3517 throw Exception("PhylogeneticsApplicationTools::printAnalysisInformation : no SingleProcessPhyloLikelihood in PartitionProcessPhyloLikelihood.");
3518
3519 size_t nbr = mNbr[pSPPL->getSubstitutionProcessNumber()];
3520
3521 const vector<size_t>& mPos = mProcPos.at(nP);
3522
3523 auto sites = pSPPL->getData();
3524
3525 for (size_t i = 0; i < sites->getNumberOfSites(); ++i)
3526 {
3527 double lnL = pSPPL->getLogLikelihoodForASite(i);
3528
3529 const CoreSiteInterface& currentSite = sites->site(i);
3530 int currentSiteCoordinate = currentSite.getCoordinate();
3531 string isCompl = "NA";
3532 string isConst = "NA";
3533 try
3534 {
3535 isCompl = (SymbolListTools::isComplete(currentSite) ? "1" : "0");
3536 }
3537 catch (EmptySiteException& ex)
3538 {}
3539 try
3540 {
3541 isConst = (SymbolListTools::isConstant(currentSite) ? "1" : "0");
3542 }
3543 catch (EmptySiteException& ex)
3544 {}
3545 row[0] = (string("[" + TextTools::toString(currentSiteCoordinate) + "]"));
3546 row[1] = isCompl;
3547 row[2] = isConst;
3548 row[3] = TextTools::toString(lnL);
3549
3550 if (nbr > 1)
3551 {
3552 Vdouble vPP = pSPPL->getPosteriorProbabilitiesForSitePerClass(i);
3553
3554 for (size_t j = 0; j < nbr; ++j)
3555 {
3556 row[4 + j] = TextTools::toString(vPP[j]);
3557 }
3558 }
3559
3560 for (size_t j = nbr; j < maxR; j++)
3561 {
3562 row[4 + j] = "NA";
3563 }
3564
3565 infos->setRow(mPos[i], row);
3566 }
3567 }
3568
3569 DataTable::write(*infos, out, "\t");
3570 return;
3571 }
3572 catch (bad_cast& e)
3573 {}
3574
3575 try
3576 {
3577 auto& pMPL = dynamic_cast<const MultiProcessSequencePhyloLikelihood&>(phyloLike);
3578
3579 StlOutputStream out(make_unique<ofstream>(infosFile.c_str(), ios::out));
3580
3581 vector<string> colNames;
3582 colNames.push_back("Sites");
3583 colNames.push_back("is.complete");
3584 colNames.push_back("is.constant");
3585 colNames.push_back("lnL");
3586
3587 size_t nbP = pMPL.getNumberOfSubstitutionProcess();
3588
3589 if (nbP > 1)
3590 {
3591 for (size_t i = 0; i < nbP; ++i)
3592 {
3593 colNames.push_back("lnL" + TextTools::toString(i + 1));
3594 }
3595 for (size_t i = 0; i < nbP; ++i)
3596 {
3597 colNames.push_back("prob" + TextTools::toString(i + 1));
3598 }
3599 }
3600
3601 auto sites = phyloLike.getData();
3602
3603 vector<string> row(4 + (nbP > 1 ? 2 * nbP : 0));
3604 DataTable* infos = new DataTable(colNames);
3605
3606 VVdouble vvPP = pMPL.getPosteriorProbabilitiesPerSitePerProcess();
3607 VVdouble vvL = pMPL.getLikelihoodPerSitePerProcess();
3608
3609 for (size_t i = 0; i < sites->getNumberOfSites(); ++i)
3610 {
3611 double lnL = phyloLike.getLogLikelihoodForASite(i);
3612 const CoreSiteInterface& currentSite = sites->site(i);
3613 int currentSiteCoordinate = currentSite.getCoordinate();
3614 string isCompl = "NA";
3615 string isConst = "NA";
3616 try
3617 {
3618 isCompl = (SymbolListTools::isComplete(currentSite) ? "1" : "0");
3619 }
3620 catch (EmptySiteException& ex)
3621 {}
3622 try
3623 {
3624 isConst = (SymbolListTools::isConstant(currentSite) ? "1" : "0");
3625 }
3626 catch (EmptySiteException& ex)
3627 {}
3628 row[0] = (string("[" + TextTools::toString(currentSiteCoordinate) + "]"));
3629 row[1] = isCompl;
3630 row[2] = isConst;
3631 row[3] = TextTools::toString(lnL);
3632
3633 if (nbP > 1)
3634 {
3635 for (size_t j = 0; j < nbP; j++)
3636 {
3637 row[4 + j] = TextTools::toString(std::log(vvL[i][j]));
3638 }
3639 for (size_t j = 0; j < nbP; j++)
3640 {
3641 row[4 + nbP + j] = TextTools::toString(vvPP[i][j]);
3642 }
3643 }
3644 infos->addRow(row);
3645 }
3646
3647 DataTable::write(*infos, out, "\t");
3648 delete infos;
3649 return;
3650 }
3651 catch (bad_cast& e)
3652 {}
3653
3654 return;
3655}
3656
3657
3658/******************************************************************************/
3659
3661{
3662 out << "rate_distribution=";
3663 map<string, string> globalAliases;
3664 vector<string> writtenNames;
3666 bIO->writeDiscreteDistribution(rDist, out, globalAliases, writtenNames);
3667 delete bIO;
3668 out.endLine();
3669}
3670
3671/************************
3672 * Substitution Mapping *
3673 ************************/
3674unique_ptr<SubstitutionCountInterface> PhylogeneticsApplicationTools::getSubstitutionCount(
3675 std::shared_ptr<const Alphabet> alphabet,
3676 std::shared_ptr<const SubstitutionModelInterface> model,
3677 const map<string, string>& params,
3678 string suffix,
3679 bool verbose,
3680 int warn)
3681{
3682 auto stateMap = model->getStateMap();
3683
3684 unique_ptr<SubstitutionCountInterface> substitutionCount = nullptr;
3685 string nijtOption;
3686 map<string, string> nijtParams;
3687 string nijtText = ApplicationTools::getStringParameter("nijt", params, "Uniformization", suffix, true, warn);
3688 KeyvalTools::parseProcedure(nijtText, nijtOption, nijtParams);
3689
3690 if (nijtOption == "Laplace")
3691 {
3692 size_t trunc = ApplicationTools::getParameter<size_t>("trunc", nijtParams, 10, suffix, true, warn + 1);
3693 substitutionCount = make_unique<LaplaceSubstitutionCount>(model, trunc);
3694 }
3695 else if (nijtOption == "Uniformization")
3696 {
3697 string weightOption = ApplicationTools::getStringParameter("weight", nijtParams, "None", "", true, warn + 1);
3698 shared_ptr<const AlphabetIndex2> weights(SequenceApplicationTools::getAlphabetIndex2(alphabet, weightOption, "Substitution weight scheme:"));
3699 string distanceOption = ApplicationTools::getStringParameter("distance", nijtParams, "None", "", true, warn + 1);
3700 shared_ptr<const AlphabetIndex2> distances(SequenceApplicationTools::getAlphabetIndex2(alphabet, distanceOption, "Substitution distances:"));
3701 substitutionCount = make_unique<UniformizationSubstitutionCount>(model, make_shared<TotalSubstitutionRegister>(stateMap), weights, distances);
3702 }
3703 else if (nijtOption == "Decomposition")
3704 {
3705 string weightOption = ApplicationTools::getStringParameter("weight", nijtParams, "None", "", true, warn + 1);
3706 shared_ptr<const AlphabetIndex2> weights(SequenceApplicationTools::getAlphabetIndex2(alphabet, weightOption, "Substitution weight scheme:"));
3707 string distanceOption = ApplicationTools::getStringParameter("distance", nijtParams, "None", "", true, warn + 1);
3708 shared_ptr<const AlphabetIndex2> distances(SequenceApplicationTools::getAlphabetIndex2(alphabet, distanceOption, "Substitution distances:"));
3709 auto revModel = dynamic_pointer_cast<const ReversibleSubstitutionModelInterface>(model);
3710 if (revModel)
3711 substitutionCount = make_unique<DecompositionSubstitutionCount>(revModel, make_shared<TotalSubstitutionRegister>(stateMap), weights, distances);
3712 else
3713 throw Exception("Decomposition method can only be used with reversible substitution models.");
3714 }
3715 else if (nijtOption == "Naive")
3716 {
3717 string weightOption = ApplicationTools::getStringParameter("weight", nijtParams, "None", "", true, warn + 1);
3718 std::shared_ptr<const AlphabetIndex2> weights(SequenceApplicationTools::getAlphabetIndex2(alphabet, weightOption, "Substitution weight scheme:"));
3719 string distanceOption = ApplicationTools::getStringParameter("distance", nijtParams, "", "", true, warn + 1);
3720 if (distanceOption != "")
3721 ApplicationTools::displayMessage("Naive substitution count: distances not handled");
3722
3723 substitutionCount = make_unique<NaiveSubstitutionCount>(model, make_shared<TotalSubstitutionRegister>(stateMap), false, weights);
3724 }
3725 else if (nijtOption == "Label")
3726 {
3727 substitutionCount = make_unique<LabelSubstitutionCount>(model);
3728 }
3729 else if (nijtOption == "ProbOneJump")
3730 {
3731 substitutionCount = make_unique<OneJumpSubstitutionCount>(model);
3732 }
3733 else
3734 {
3735 ApplicationTools::displayError("Invalid option '" + nijtOption + ", in 'nijt' parameter.");
3736 exit(-1);
3737 }
3738 ApplicationTools::displayResult("Substitution count procedure", nijtOption);
3739
3740 // Send results:
3741 return substitutionCount;
3742}
3743
3744/****************************************************************************/
3745
3746
3747unique_ptr<SubstitutionRegisterInterface> PhylogeneticsApplicationTools::getSubstitutionRegister(
3748 const string& regTypeDesc,
3749 std::shared_ptr<const StateMapInterface> stateMap,
3750 std::shared_ptr<const GeneticCode> genCode,
3751 std::shared_ptr<AlphabetIndex2>& weights,
3752 std::shared_ptr<AlphabetIndex2>& distances,
3753 bool verbose)
3754{
3755 string regType = "";
3756 map<string, string> regArgs;
3757 KeyvalTools::parseProcedure(regTypeDesc, regType, regArgs);
3758
3759 auto alphabet = stateMap->getAlphabet();
3760
3761 unique_ptr<SubstitutionRegisterInterface> reg = nullptr;
3762 weights = nullptr;
3763 distances = nullptr;
3764
3765 string weightOption = ApplicationTools::getStringParameter("weight", regArgs, "None", "", true, 1);
3766 string distanceOption = ApplicationTools::getStringParameter("distance", regArgs, "None", "", true, 1);
3767
3768 if (AlphabetTools::isCodonAlphabet(*alphabet))
3769 {
3770 weights = SequenceApplicationTools::getAlphabetIndex2(dynamic_pointer_cast<const CodonAlphabet>(alphabet), genCode, weightOption, "Substitution weight scheme:");
3771 distances = SequenceApplicationTools::getAlphabetIndex2(dynamic_pointer_cast<const CodonAlphabet>(alphabet), genCode, distanceOption, "Substitution distances:");
3772 }
3773 else
3774 {
3775 weights = SequenceApplicationTools::getAlphabetIndex2(alphabet, weightOption, "Substitution weight scheme:");
3776 distances = SequenceApplicationTools::getAlphabetIndex2(alphabet, distanceOption, "Substitution distances:");
3777 }
3778
3779 if (regType == "Combination")
3780 {
3781 shared_ptr<AlphabetIndex2> w2;
3782 shared_ptr<AlphabetIndex2> d2;
3783
3784 auto vreg = make_unique<VectorOfSubstitutionRegisters>(stateMap);
3785
3786 size_t i = 0;
3787 while (++i)
3788 {
3789 string regDesc = ApplicationTools::getStringParameter("reg" + TextTools::toString(i), regArgs, "", "", false, 1);
3790 if (regDesc == "")
3791 break;
3792
3793 auto sreg = getSubstitutionRegister(regDesc, stateMap, genCode, w2, d2);
3794
3795 vreg->addRegister(std::move(sreg));
3796 }
3797
3798 if (vreg->getNumberOfSubstitutionTypes() == 0)
3799 throw Exception("Missing registers reg1, reg2, ... in description of Combination");
3800
3801 reg = std::move(vreg);
3802 }
3803 else if (regType == "All")
3804 {
3805 reg = make_unique<ComprehensiveSubstitutionRegister>(stateMap, false);
3806 }
3807 else if (regType == "Total")
3808 {
3809 reg = make_unique<TotalSubstitutionRegister>(stateMap);
3810 }
3811 else if (regType == "Selected")
3812 {
3813 string subsList = ApplicationTools::getStringParameter("substitution.list", regArgs, "All", "", true, false);
3814 reg = make_unique<SelectedSubstitutionRegister>(stateMap, subsList);
3815 }
3816
3817
3818 // Alphabet dependent registers
3819 else if (AlphabetTools::isNucleicAlphabet(*alphabet))
3820 {
3821 if (regType == "GC")
3822 reg = make_unique<GCSubstitutionRegister>(stateMap, false);
3823 else if (regType == "GCw")
3824 reg = make_unique<GCSubstitutionRegister>(stateMap, true);
3825 else if (regType == "TsTv")
3826 reg = make_unique<TsTvSubstitutionRegister>(stateMap);
3827 else if (regType == "SW")
3828 reg = make_unique<SWSubstitutionRegister>(stateMap);
3829 else
3830 throw Exception("PhylogeneticsApplicationTools::getSubstitutionRegister: unsupported substitution categorization:" + regType + " for alphabet " + alphabet->getAlphabetType());
3831 }
3832 else if (AlphabetTools::isCodonAlphabet(*alphabet))
3833 {
3834 if (regType == "IntraAA")
3835 reg = make_unique<AAInteriorSubstitutionRegister>(stateMap, genCode);
3836 else if (regType == "InterAA")
3837 reg = make_unique<AAExteriorSubstitutionRegister>(stateMap, genCode);
3838 else if (regType == "GC")
3839 reg = make_unique<GCSynonymousSubstitutionRegister>(stateMap, genCode);
3840 else if (regType == "GC1")
3841 reg = make_unique<GCPositionSubstitutionRegister>(stateMap, genCode, 0);
3842 else if (regType == "GC2")
3843 reg = make_unique<GCPositionSubstitutionRegister>(stateMap, genCode, 1);
3844 else if (regType == "GC3")
3845 reg = make_unique<GCPositionSubstitutionRegister>(stateMap, genCode, 2);
3846 else if (regType == "GCw")
3847 reg = make_unique<GCSynonymousSubstitutionRegister>(stateMap, genCode, true);
3848 else if (regType == "GC1w")
3849 reg = make_unique<GCPositionSubstitutionRegister>(stateMap, genCode, 0, true);
3850 else if (regType == "GC2w")
3851 reg = make_unique<GCPositionSubstitutionRegister>(stateMap, genCode, 1, true);
3852 else if (regType == "GC3w")
3853 reg = make_unique<GCPositionSubstitutionRegister>(stateMap, genCode, 2, true);
3854 else if (regType == "TsTv")
3855 reg = make_unique<TsTvSubstitutionRegister>(stateMap, genCode);
3856 else if (regType == "SW")
3857 reg = make_unique<SWSubstitutionRegister>(stateMap, genCode);
3858 else if (regType == "KrKc")
3859 reg = make_unique<KrKcSubstitutionRegister>(stateMap, genCode);
3860 else if (regType == "DnDs")
3861 reg = make_unique<DnDsSubstitutionRegister>(stateMap, genCode, false);
3862 else
3863 throw Exception("Unsupported substitution categorization: " + regType + " for alphabet " + alphabet->getAlphabetType());
3864 }
3865
3866 else if (AlphabetTools::isProteicAlphabet(*alphabet))
3867 {
3868 if (regType == "KrKc")
3869 reg = make_unique<KrKcSubstitutionRegister>(stateMap);
3870 else
3871 throw Exception("Unsupported substitution categorization: " + regType + " for alphabet " + alphabet->getAlphabetType());
3872 }
3873
3874 auto csr = dynamic_cast<CategorySubstitutionRegister*>(reg.get());
3875 if (csr)
3876 csr->setStationarity(ApplicationTools::getBooleanParameter("stationarity", regArgs, true));
3877
3878 if (verbose)
3879 ApplicationTools::displayResult("Substitution Register", regType);
3880
3881 return reg;
3882}
std::shared_ptr< const FrequencySetInterface > getRootFrequencySet() const
std::string getFrom(const std::string &name) const
Likelihood framework based on a hmm of simple likelihoods.
Likelihood framework based on a hmm of simple likelihoods.
Vdouble getPosteriorProbabilitiesForASitePerAligned(size_t site) const
virtual double getLogLikelihoodForASite(size_t site) const =0
Get the log likelihood for a site, and its derivatives.
virtual size_t getNumberOfSites() const =0
Get the number of sites in the dataset.
Likelihood framework based on a mixture of aligned likelihoods.
Vdouble getPhyloProbabilities() const
Get the probabilities of the simplex.
The AlignedPhyloLikelihoodProduct class, for phylogenetic likelihood on several independent data.
Joint interface SetOf+Aligned PhylLikelihood.
virtual double getLogLikelihoodForASiteForAPhyloLikelihood(size_t site, size_t nPhyl) const =0
Get the log likelihood for a site for an aligned phyloLikelihood.
static bool isProteicAlphabet(const Alphabet &alphabet)
static bool isNucleicAlphabet(const Alphabet &alphabet)
static bool isCodonAlphabet(const Alphabet &alphabet)
static bool isWordAlphabet(const Alphabet &alphabet)
static void displayMessage(const std::string &text)
static std::vector< std::string > matchingParameters(const std::string &pattern, const std::map< std::string, std::string > &params)
static void displayError(const std::string &text)
static bool getBooleanParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, bool defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static std::string getStringParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, const std::string &defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayWarning(const std::string &text)
static double getDoubleParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, double defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static int getIntParameter(const std::string &parameterName, const std::map< std::string, std::string > &params, int defaultValue, const std::string &suffix="", bool suffixIsOptional=true, int warn=0)
static void displayResult(const std::string &text, const T &result)
static std::string getAFilePath(const std::string &parameter, const std::map< std::string, std::string > &params, bool isRequired=true, bool mustExist=true, const std::string &suffix="", bool suffixIsOptional=false, const std::string &defaultPath="none", int warn=0)
AssociationGraphObserver< N, E >::NodeIndex NodeIndex
virtual std::vector< EdgeIndex > getAllEdgesIndexes() const=0
static std::map< std::string, std::string > getAttributesMapFromFile(const std::string &file, const std::string &delimiter)
Sequence evolution framework based on an auto-correlation of substitution processes.
const AutoCorrelationTransitionMatrix & hmmTransitionMatrix() const
double Pij(size_t i, size_t j) const override
static std::string CONSTRAINTS_AUTO
Branch model I/O in BppO format.
std::unique_ptr< BranchModelInterface > readBranchModel(std::shared_ptr< const Alphabet > alphabet, const std::string &modelDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, size_t nData, bool parseArguments=true)
Frequencies set I/O in BppO format.
void writeFrequencySet(const FrequencySetInterface &freqset, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const override
Write a substitution model to a stream.
std::unique_ptr< FrequencySetInterface > readFrequencySet(std::shared_ptr< const Alphabet > alphabet, const std::string &freqDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, size_t nData, bool parseArguments=true) override
Read a frequencies set from a string.
const std::map< std::string, std::string > & getUnparsedArguments() const override
void setGeneticCode(std::shared_ptr< const GeneticCode > gCode)
Set the genetic code to use in case a codon frequencies set should be built.
IMultiTree * readIMultiTree(const std::string &description)
Read a IMultiTree object from a string.
Rate Distribution I/O in BppO format.
std::unique_ptr< DiscreteDistributionInterface > readDiscreteDistribution(const std::string &distDescription, bool parseArguments)
void writeDiscreteDistribution(const DiscreteDistributionInterface &dist, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const
Substitution model I/O in BppO format.
void write(const BranchModelInterface &model, OutputStream &out, std::map< std::string, std::string > &globalAliases, std::vector< std::string > &writtenNames) const override
Write a substitution model to a stream.
std::unique_ptr< SubstitutionModelInterface > readSubstitutionModel(std::shared_ptr< const Alphabet > alphabet, const std::string &modelDescription, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, size_t nData, bool parseArguments=true) override
Read a substitution model from a string.
void setGeneticCode(std::shared_ptr< const GeneticCode > gCode)
Set the genetic code to use in case a codon frequencies set should be built.
const std::map< std::string, std::string > & getUnparsedArguments() const override
Tree I/O in BppO format.
std::unique_ptr< ITree > readITree(const std::string &description)
Read a ITree object from a string.
Interface for all Branch models.
The CategorySubstitutionRegisters.
Context for dataflow node construction.
Definition: DataFlow.h:527
virtual int getCoordinate() const=0
static void write(const DataTable &data, std::ostream &out, const std::string &sep="\t", bool alignHeaders=false)
void addRow(const std::vector< std::string > &newRow)
virtual double getCategory(size_t categoryIndex) const=0
virtual std::string getName() const=0
void setTransitionProbabilities(const Matrix< double > &mat)
const Matrix< double > & getPij() const
Sequence evolution framework based on a hmm.
const FullHmmTransitionMatrix & hmmTransitionMatrix() const
virtual void readPhyloTrees(const std::string &path, std::vector< std::unique_ptr< PhyloTree > > &trees) const =0
Read trees from a file.
static void parseProcedure(const std::string &desc, std::string &name, std::map< std::string, std::string > &args)
static void print(const Matrix &m, std::ostream &out=std::cout)
Sequence evolution framework based on a mixture of substitution processes.
const std::vector< double > & getSubProcessProbabilities() const
std::string to_string() const
Output.
Definition: ModelPath.cpp:257
Organization of submodels in mixed substitution models in a path. See class ModelScenario for a thoro...
Definition: ModelPath.h:22
const PathNode & getPathNode(std::shared_ptr< MixedTransitionModelInterface > mMod) const
gets the pathnode associated with a model
Definition: ModelPath.h:251
std::vector< std::shared_ptr< MixedTransitionModelInterface > > getModels() const
gets the MixedTransitionModel used in the ModelPath
Definition: ModelPath.cpp:132
Partial implementation of multiple processes of sequences.
size_t getNumberOfSubstitutionProcess() const
Return the number of process used for computation.
const std::vector< size_t > & getSubstitutionProcessNumbers() const
Partial implementation of the Likelihood interface for multiple processes.
The so-called 'newick' parenthetic format.
Definition: Newick.h:58
void enableExtendedBootstrapProperty(const std::string &propertyName)
Definition: Newick.h:86
a simple parser for reading trees from a Nexus file.
Definition: NexusIoTree.h:36
The so-called 'Nhx - New Hampshire eXtended' parenthetic format.
Definition: Nhx.h:62
Substitution process manager for non-homogeneous / non-reversible models of evolution.
const DiscreteDistributionInterface & rateDistribution() const override
Get the rate distribution.
const std::vector< unsigned int > getNodesWithModel(size_t i) const override
Get a list of nodes id for which the given model is associated.
std::shared_ptr< const BranchModelInterface > getModel(size_t n) const override
General interface for tree writers.
Definition: IoTree.h:419
virtual void writePhyloTrees(const std::vector< const PhyloTree * > &trees, const std::string &path, bool overwrite) const =0
Write trees to a file.
General interface for tree writers.
Definition: IoTree.h:157
virtual void writePhyloTree(const PhyloTree &tree, const std::string &path, bool overwrite) const =0
Write a tree to a file.
General interface for tree writers.
Definition: IoTree.h:125
virtual void writeTree(const Tree &tree, const std::string &path, bool overwrite) const =0
Write a tree to a file.
Evolution of a sequence performed by a unique SubstitutionProcess all along the sequence.
std::shared_ptr< OutputStream > profiler
std::shared_ptr< OutputStream > messenger
static std::string OPTIMIZATION_NEWTON
static unsigned int optimizeNumericalParameters2(std::shared_ptr< PhyloLikelihoodInterface > lik, const OptimizationOptions &optopt)
Optimize numerical parameters (branch length, substitution model & rate distribution) of a TreeLikeli...
static std::string OPTIMIZATION_BFGS
static unsigned int optimizeNumericalParameters(std::shared_ptr< PhyloLikelihoodInterface > lik, const OptimizationOptions &optopt)
Optimize numerical parameters (branch length, substitution model & rate distribution) of a TreeLikeli...
static std::string OPTIMIZATION_BRENT
virtual OutputStream & endLine()=0
virtual OutputStream & setPrecision(int digit)=0
size_t size() const
virtual std::vector< std::string > getParameterNames() const
virtual void addParameter(const Parameter &param)
virtual bool matchParametersValues(const ParameterList &params, std::vector< size_t > *updatedParameters=0)
virtual std::string getParameterNameWithoutNamespace(const std::string &name) const=0
virtual double getParameterValue(const std::string &name) const=0
virtual const ParameterList & getParameters() const=0
Sequence evolution framework based on a mixture of substitution processes.
std::map< size_t, std::vector< size_t > > & mapOfProcessSites()
The PhyloLikelihoodContainer, owns and assigns numbers to Phylolikelihoods.
std::vector< size_t > getNumbersOfPhyloLikelihoods() const
The PhyloLikelihoodFormula class, for phylogenetic likelihood on several independent data.
virtual const std::vector< size_t > & getNumbersOfPhyloLikelihoods() const =0
static void constrainedMidPointRooting(PhyloTree &tree)
Determine the mid-point position of the root along the branch that already contains the root....
static std::shared_ptr< PhyloTree > buildFromTreeTemplate(const TreeTemplate< Node > &treetemp)
static double getHeight(const PhyloTree &tree, const std::shared_ptr< PhyloNode > node)
Get the height of the subtree defined by node 'node', i.e. the maximum distance between leaves and th...
static void computeBranchLengthsGrafen(PhyloTree &tree, double power=1, bool init=true)
Compute branch lengths using Grafen's method.
static double convertToClockTree(PhyloTree &tree, std::shared_ptr< PhyloNode > node)
Modify a tree's branch lengths to make a clock tree, by rebalancing branch lengths.
static void printAnalysisInformation(const PhyloLikelihoodContainer &phylocont, const std::string &infosFile, int warn=1)
Output information on the computation to a file.
static std::unique_ptr< BranchModelInterface > getBranchModel(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, std::shared_ptr< const AlignmentDataInterface > data, const std::map< std::string, std::string > &params, std::map< std::string, std::string > &unparsedparams, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a BranchModel (most general class of branch models) object according to options.
static std::map< size_t, std::unique_ptr< SequenceEvolution > > getSequenceEvolutions(std::shared_ptr< SubstitutionProcessCollection > SPC, const std::map< std::string, std::string > &params, map< string, string > &unparsedParams, const string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a map of Sequence Evolution, ie ways how sequence evolve, which may use several processes.
static MultipleDiscreteDistribution * getMultipleDistributionDefaultInstance(const std::string &distDescription, std::map< std::string, std::string > &unparsedParameterValues, bool verbose=true)
Build a multi-dimension distribution as a MultipleDiscreteDistribution object with default parameter ...
static std::map< size_t, std::unique_ptr< FrequencySetInterface > > getRootFrequencySets(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, const std::map< std::string, std::string > &params, std::map< std::string, std::string > &sharedparams, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
The same, but returns a map <number, shared_ptr<FrequencySetInterface>>.
static void printParameters(const BranchModelInterface &model, OutputStream &out, int warn=1)
Output a SubstitutionModel description to a file.
static std::shared_ptr< PhyloLikelihoodContainer > getPhyloLikelihoodContainer(Context &context, std::shared_ptr< SubstitutionProcessCollection > SPC, std::map< size_t, std::shared_ptr< SequenceEvolution > > &mSeqEvol, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, const std::map< std::string, std::string > &params, const string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a Phylogeny container from parameters map.
static void checkEstimatedParameters(const ParameterList &pl)
Check if parameter values are close to their definition boundary.
static std::map< size_t, std::unique_ptr< BranchModelInterface > > getBranchModels(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, const std::map< std::string, std::string > &params, std::map< std::string, std::string > &unparsedparams, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
The same as before, but returns a map <number, branch model>.
static std::unique_ptr< FrequencySetInterface > getFrequencySet(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, const std::string &freqDescription, std::shared_ptr< const AlignmentDataInterface > data, std::map< std::string, std::string > &sharedParams, const std::vector< double > &rateFreqs, bool verbose=true, int warn=1)
Get A FrequencySet object according to options.
static std::unique_ptr< SubstitutionCountInterface > getSubstitutionCount(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const SubstitutionModelInterface > model, const std::map< std::string, std::string > &params, string suffix="", bool verbose=true, int warn=1)
Get a SubstitutionCount instance.
static std::vector< std::unique_ptr< Tree > > getTrees(const std::map< std::string, std::string > &params, const std::string &prefix="input.", const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a list of Tree objects according to options.
static std::unique_ptr< FrequencySetInterface > getRootFrequencySet(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mData, size_t nData, const std::map< std::string, std::string > &params, std::map< std::string, std::string > &sharedparams, const std::vector< double > &rateFreqs, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Get A FrequencySet object for root frequencies (NH models) according to options.
static std::unique_ptr< DiscreteDistributionInterface > getRateDistribution(const std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true)
Build a DiscreteDistribution object according to options.
static void writePhyloTrees(const std::vector< const PhyloTree * > &trees, const std::map< std::string, std::string > &params, const std::string &prefix="output.", const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, bool checkOnly=false, int warn=1)
Write a tree according to options.
static map< size_t, std::unique_ptr< ModelScenario > > getModelScenarios(const std::map< std::string, std::string > &params, const map< size_t, std::shared_ptr< ModelPath > > &mModelPath, const map< size_t, std::shared_ptr< BranchModelInterface > > &mModel, const string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build map of ModelScenarios from a map of ModelPaths.
static std::shared_ptr< PhyloLikelihoodInterface > optimizeParameters(std::shared_ptr< PhyloLikelihoodInterface > lik, const std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Optimize parameters according to options.
static std::map< size_t, std::shared_ptr< DiscreteDistributionInterface > > getRateDistributions(const std::map< std::string, std::string > &params, const string &suffix="", bool suffixIsOptional=true, bool verbose=true)
static std::unique_ptr< AutonomousSubstitutionProcessInterface > getSubstitutionProcess(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, std::shared_ptr< const AlignmentDataInterface > data, const vector< std::shared_ptr< PhyloTree > > &vTree, const std::map< std::string, std::string > &params, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Sets a SubstitutionProcess object according to options.
static void writeTree(const TreeTemplate< Node > &tree, const std::map< std::string, std::string > &params, const std::string &prefix="output.", const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, bool checkOnly=false, int warn=1)
Output methods:
static std::unique_ptr< SubstitutionModelInterface > getSubstitutionModel(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, std::shared_ptr< const AlignmentDataInterface > data, const std::map< std::string, std::string > &params, std::map< std::string, std::string > &unparsedparams, const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a SubstitutionModel object according to options (ie a BranchModel with a generator).
static map< size_t, std::unique_ptr< ModelPath > > getModelPaths(const std::map< std::string, std::string > &params, const map< size_t, std::shared_ptr< BranchModelInterface > > &mModel, const string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build map of ModelPaths from a map of BranchModel.
static std::map< size_t, std::shared_ptr< PhyloTree > > getPhyloTrees(const std::map< std::string, std::string > &params, const std::map< size_t, std::shared_ptr< const AlignmentDataInterface > > &mSeq, std::map< std::string, std::string > &unparsedParams, const std::string &prefix="input.", const std::string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Build a map of <number,PhyloTree> according to options.
static std::unique_ptr< SubstitutionRegisterInterface > getSubstitutionRegister(const std::string &regTypeDesc, std::shared_ptr< const StateMapInterface > stateMap, std::shared_ptr< const GeneticCode > genCode, std::shared_ptr< AlphabetIndex2 > &weights, std::shared_ptr< AlphabetIndex2 > &distances, bool verbose=true)
Get a Register instance.
static std::unique_ptr< SubstitutionProcessCollection > getSubstitutionProcessCollection(std::shared_ptr< const Alphabet > alphabet, std::shared_ptr< const GeneticCode > gCode, const map< size_t, std::shared_ptr< PhyloTree > > &mTree, const map< size_t, std::shared_ptr< BranchModelInterface > > &mMod, const map< size_t, std::shared_ptr< FrequencySetInterface > > &mRootFreq, const map< size_t, std::shared_ptr< DiscreteDistributionInterface > > &mDist, const map< size_t, std::shared_ptr< ModelScenario > > &mScen, const std::map< std::string, std::string > &params, map< string, string > &unparsedparams, const string &suffix="", bool suffixIsOptional=true, bool verbose=true, int warn=1)
Builds a SubstitutionProcessCollection from many maps of relevant objects.
static bool addSubstitutionProcessCollectionMember(SubstitutionProcessCollection &SubProColl, size_t procNum, const std::map< std::string, std::string > &params, bool verbose=true, int warn=1)
Adds a SubstitutionProcessCollectionMember to a Collection, under a given number.
std::shared_ptr< const DiscreteDistributionInterface > getRateDistribution() const override
Get a pointer to the rate distribution (or null if there is no rate distribution).
static std::unique_ptr< AlphabetIndex2 > getAlphabetIndex2(std::shared_ptr< const Alphabet > alphabet, const std::string &description, const std::string &message="Alphabet distance:", bool verbose=true)
This interface describes the evolution process of a sequence.
PhyloLikelihoods based on Sequence Evolution class, ie for which there is a (set of) processes able t...
Space and time homogeneous substitution process, without mixture.
The SingleDataPhyloLikelihood interface, for phylogenetic likelihood.
virtual std::shared_ptr< const AlignmentDataInterface > getData() const =0
Get the dataset for which the likelihood must be evaluated.
virtual size_t getNData() const =0
Get the number of dataset concerned.
Wraps a dataflow graph as a function: resultNode = f(variableNodes).
const std::string & nextToken()
bool hasMoreToken() const
std::vector< size_t > getModelNumbers() const override
std::shared_ptr< const DiscreteDistributionInterface > getRateDistribution() const override
Get a pointer to the rate distribution (or null if there is no rate distribution).
const std::vector< unsigned int > getNodesWithModel(size_t i) const override
Get a list of nodes id for which the given model is associated.
std::shared_ptr< const ModelScenario > getModelScenario() const override
Get the Model Scenario associated with this process, in case there are mixture models involved.
std::shared_ptr< const FrequencySetInterface > getRootFrequencySet() const override
std::shared_ptr< const BranchModelInterface > getModel(size_t n) const override
std::shared_ptr< const ParametrizablePhyloTree > getParametrizablePhyloTree() const override
Collection of Substitution Process, which owns all the necessary objects: Substitution models,...
std::vector< size_t > getFrequenciesNumbers() const
std::shared_ptr< FrequencySetInterface > getFrequencySet(size_t frequenciesIndex)
std::vector< size_t > getModelNumbers() const
Get the numbers of the specified objects from the collections.
size_t getModelIndex(std::shared_ptr< BranchModelInterface > model) const
Return the number of a BranchModel in the collection.
DiscreteDistributionInterface & rateDistribution(size_t distributionIndex)
std::vector< size_t > getSubstitutionProcessNumbers() const
std::shared_ptr< ParametrizablePhyloTree > getTree(size_t treeIndex)
void addOnePerBranchSubstitutionProcess(size_t nProc, size_t nMod, size_t nTree, size_t nRate, size_t nFreq, const std::vector< std::string > &sharedParameterNames)
std::shared_ptr< BranchModelInterface > getModel(size_t modelIndex)
bool hasModelScenario(size_t numPath) const
checks if the set has a ModelScenario
std::vector< size_t > getScenarioNumbers() const
std::vector< size_t > getRateDistributionNumbers() const
std::shared_ptr< const ModelScenario > getModelScenario(size_t numPath) const
Get a ModelScenario from the set.
void addSubstitutionProcess(size_t nProc, std::map< size_t, std::vector< unsigned int > > mModBr, size_t nTree, size_t nRate, size_t nFreq)
void addDistribution(std::shared_ptr< DiscreteDistributionInterface > distribution, size_t distributionIndex)
SubstitutionProcessCollectionMember & substitutionProcess(size_t i)
std::shared_ptr< DiscreteDistributionInterface > getRateDistribution(size_t distributionIndex)
Get a DiscreteDistribution from the collection.
ParametrizablePhyloTree & tree(size_t treeIndex)
Get a tree from the set.
This interface describes the substitution process along the tree and sites of the alignment.
virtual const BranchModelInterface & model(size_t i) const =0
static bool isConstant(const IntSymbolListInterface &site, bool ignoreUnknown=false, bool unresolvedRaisesException=true)
static bool isComplete(const IntSymbolListInterface &site)
static std::unique_ptr< TreeTemplate< Node > > getRandomTree(std::vector< std::string > &leavesNames, bool rooted=true)
Draw a random tree from a list of taxa, using a Yule process.
The phylogenetic tree class.
Definition: TreeTemplate.h:59
static T sumExp(const std::vector< T > &v1)
static std::string paste(const std::vector< T > &v, const std::string &delim=" ")
static std::vector< T > vectorIntersection(const std::vector< T > &vec1, const std::vector< T > &vec2)
static std::vector< T > unique(const std::vector< T > &v)
static void diff(std::vector< T > &v1, std::vector< T > &v2, std::vector< T > &v3)
static void printRange(const std::vector< T > &v1, OutputStream &out=*ApplicationTools::message, const std::string &delim=",", const std::string &rangeDelim="-")
static size_t whichMax(const std::vector< T > &v)
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
bool isDecimalInteger(const std::string &s, char scientificNotation='e')
std::string toString(T t)
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
ExtendedFloat exp(const ExtendedFloat &ef)
template void copyEigenToBpp(const ExtendedFloatMatrixXd &eigenMatrix, std::vector< std::vector< double > > &bppMatrix)
std::vector< Vdouble > VVdouble
std::vector< unsigned int > Vuint