14#include "../Distance/BioNJ.h"
15#include "../Distance/DistanceEstimation.h"
16#include "../Legacy/OptimizationTools.h"
17#include "../Parsimony/DRTreeParsimonyScore.h"
18#include "../Model/Nucleotide/JCnuc.h"
37const string TreeTools::BOOTSTRAP =
"bootstrap";
41vector<int> TreeTools::getLeavesId(
const Tree& tree,
int nodeId)
54 leaves.push_back(nodeId);
56 vector<int> sons = tree.
getSonsId(nodeId);
57 for (
size_t i = 0; i < sons.size(); i++)
73 vector<int> sons = tree.
getSonsId(nodeId);
74 for (
size_t i = 0; i < sons.size(); ++i)
103 id =
new int(nodeId);
108 for (
size_t i = 0; i < sons.size(); i++)
127 vector<int> sons = tree.
getSonsId(nodeId);
128 for (
size_t i = 0; i < sons.size(); i++)
132 nodes.push_back(nodeId);
142 vector<int> sons = tree.
getSonsId(nodeId);
143 for (
size_t i = 0; i < sons.size(); i++)
145 size_t c =
getDepth(tree, sons[i]) + 1;
159 vector<int> sons = tree.
getSonsId(nodeId);
160 for (
size_t i = 0; i < sons.size(); i++)
162 size_t c =
getDepths(tree, sons[i], depths) + 1;
177 vector<int> sons = tree.
getSonsId(nodeId);
178 for (
size_t i = 0; i < sons.size(); i++)
185 double c =
getHeight(tree, sons[i]) + dist;
199 vector<int> sons = tree.
getSonsId(nodeId);
200 for (
size_t i = 0; i < sons.size(); i++)
207 double c =
getHeights(tree, sons[i], heights) + dist;
229 vector<int> sonsId = tree.
getSonsId(nodeId);
231 for (
size_t i = 1; i < sonsId.size(); i++)
268 vector<int> sonsId = tree.
getSonsId(nodeId);
270 for (
size_t i = 1; i < sonsId.size(); i++)
300 vector<int> sonsId = tree.
getSonsId(rootId);
304 for (
size_t i = 0; i < sonsId.size(); i++)
311 if (sonsId.size() > 0)
314 for (
size_t i = 1; i < sonsId.size(); i++)
332 vector<int> sonsId = tree.
getSonsId(rootId);
337 for (
size_t i = 0; i < sonsId.size(); i++)
345 for (
size_t i = 1; i < sonsId.size(); i++)
375 vector<int> pathMatrix1;
376 vector<int> pathMatrix2;
378 int nodeUp = nodeId1;
381 pathMatrix1.push_back(nodeUp);
384 pathMatrix1.push_back(nodeUp);
389 pathMatrix2.push_back(nodeUp);
392 pathMatrix2.push_back(nodeUp);
395 size_t tmp1 = pathMatrix1.size();
396 size_t tmp2 = pathMatrix2.size();
398 while ((tmp1 > 0) && (tmp2 > 0))
400 if (pathMatrix1[tmp1 - 1] != pathMatrix2[tmp2 - 1])
406 for (
size_t y = 0; y < tmp1; ++y)
408 path.push_back(pathMatrix1[y]);
411 path.push_back(pathMatrix1[tmp1]);
412 for (
size_t j = tmp2; j > 0; --j)
414 path.push_back(pathMatrix2[j - 1]);
429 for (
size_t i = 0; i < path.size(); i++)
446 throw NodeException(
"TreeTools::getbranchLengths(). No branch length.", nodeId);
447 vector<int> sons = tree.
getSonsId(nodeId);
448 for (
size_t i = 0; i < sons.size(); i++)
451 for (
size_t j = 0; j < sonBrLen.size(); j++)
453 brLen.push_back(sonBrLen[j]);
466 throw NodeException(
"TreeTools::getTotalLength(). No branch length.", nodeId);
468 vector<int> sons = tree.
getSonsId(nodeId);
469 for (
size_t i = 0; i < sons.size(); i++)
483 for (
size_t i = 0; i < nodes.size(); i++)
496 for (
size_t i = 0; i < nodes.size(); i++)
510 for (
size_t i = 0; i < nodes.size(); i++)
515 throw NodeException(
"TreeTools::scaleTree(). Branch with no length", nodes[i]);
528 vector<int> sons = tree.
getSonsId(nodeId);
529 vector<size_t> h(sons.size());
530 for (
size_t i = 0; i < sons.size(); i++)
534 size_t thish = sons.size() == 0 ? 0 : VectorTools::sum<size_t>(h) + sons.size() - 1;
535 for (
size_t i = 0; i < sons.size(); i++)
555 double& heightRaised)
559 vector<int> sons = tree.
getSonsId(nodeId);
560 vector<double> hr(sons.size());
562 for (
size_t i = 0; i < sons.size(); i++)
574 throw NodeException (
"TreeTools::computeBranchLengthsGrafen. Branch length lacking.", son);
576 heightRaised =
std::pow(height / total, power) * total;
577 for (
size_t i = 0; i < sons.size(); i++)
591 double totalHeight =
getHeight(tree, rootId);
602 vector<int> sons = tree.
getSonsId(nodeId);
603 vector<double> h(sons.size());
607 for (
size_t i = 0; i < sons.size(); i++)
618 throw NodeException (
"TreeTools::convertToClockTree. Branch length lacking.", son);
621 l /= (double)sons.size();
624 for (
size_t i = 0; i < sons.size(); i++)
637 vector<int> sons = tree.
getSonsId(nodeId);
638 vector<double> h(sons.size());
642 for (
size_t i = 0; i < sons.size(); i++)
653 throw NodeException (
"TreeTools::convertToClockTree2. Branch length lacking.", son);
656 l /= (double)sons.size();
657 for (
size_t i = 0; i < sons.size(); i++)
659 scaleTree(tree, sons[i], h[i] > 0 ? l / h[i] : 0);
669 auto mat = make_unique<DistanceMatrix>(names);
670 for (
size_t i = 0; i < names.size(); i++)
673 for (
size_t j = 0; j < i; j++)
685 throw Exception(
"TreeTools::midpointRooting(Tree). This function is deprecated, use TreeTemplateTools::midRoot instead!");
690 double dmid = (*dist)(pos[0], pos[1]) / 2;
691 int id1 = tree.
getLeafId(dist->getName(pos[0]));
692 int id2 = tree.
getLeafId(dist->getName(pos[1]));
696 int current = d2 > d1 ? id2 : id1;
707 if (brother == current)
719 for (
size_t i = 0; i < sonsId.size(); i++)
721 int subMax =
getMaxId(tree, sonsId[i]);
733 sort(ids.begin(), ids.end());
735 for (
size_t i = 0; i < ids.size(); i++)
737 if (ids[i] != (
int)i)
741 return (
int)ids.size();
749 sort(ids.begin(), ids.end());
750 for (
size_t i = 1; i < ids.size(); i++)
752 if (ids[i] == ids[i - 1])
766 vector<unique_ptr<BipartitionList>> vecBipL;
767 for (
size_t i = 0; i < vecTr.size(); i++)
769 vecBipL.push_back(make_unique<BipartitionList>(*vecTr[i]));
779 vector<unique_ptr<BipartitionList>> vecBipL;
780 for (
size_t i = 0; i < vecTr.size(); i++)
782 vecBipL.push_back(make_unique<BipartitionList>(*vecTr[i]));
793 unique_ptr<BipartitionList> bipL1, bipL2;
794 vector<size_t> size1, size2;
801 bipL1 = make_unique<BipartitionList>(tr1,
true);
802 bipL1->removeTrivialBipartitions();
803 bipL1->removeRedundantBipartitions();
804 bipL1->sortByPartitionSize();
805 bipL2 = make_unique<BipartitionList>(tr2,
true);
806 bipL2->removeTrivialBipartitions();
807 bipL2->removeRedundantBipartitions();
808 bipL2->sortByPartitionSize();
811 if (bipL1->getNumberOfBipartitions() != bipL2->getNumberOfBipartitions())
813 nbbip = bipL1->getNumberOfBipartitions();
816 for (
size_t i = 0; i < nbbip; i++)
818 size1.push_back(bipL1->getPartitionSize(i));
819 size2.push_back(bipL1->getPartitionSize(i));
820 if (size1[i] != size2[i])
825 for (
size_t i = 0; i < nbbip; i++)
827 for (jj = 0; jj < nbbip; jj++)
843 unique_ptr<BipartitionList> bipL1, bipL2;
845 vector<size_t> size1, size2;
850 throw Exception(
"Distinct leaf sets between trees ");
856 bipL1 = make_unique<BipartitionList>(tr1,
true);
857 bipL1->removeTrivialBipartitions();
858 bipL1->sortByPartitionSize();
859 bipL2 = make_unique<BipartitionList>(tr2,
true);
860 bipL2->removeTrivialBipartitions();
861 bipL2->sortByPartitionSize();
864 for (i = 0; i < bipL1->getNumberOfBipartitions(); i++)
866 size1.push_back(bipL1->getPartitionSize(i));
868 for (i = 0; i < bipL2->getNumberOfBipartitions(); i++)
870 size2.push_back(bipL2->getPartitionSize(i));
873 for (i = 0; i < bipL2->getNumberOfBipartitions(); i++)
875 bipOK2.push_back(
false);
880 for (i = 0; i < bipL1->getNumberOfBipartitions(); i++)
882 for (j = 0; j < bipL2->getNumberOfBipartitions(); j++)
892 if (j == bipL2->getNumberOfBipartitions())
896 missing1 =
static_cast<int>(bipL2->getNumberOfBipartitions()) -
static_cast<int>(bipL1->getNumberOfBipartitions()) + missing2;
899 *missing_in_tr1 = missing1;
901 *missing_in_tr2 = missing2;
902 return missing1 + missing2;
909 vector<unique_ptr<BipartitionList>> vecBipL;
910 unique_ptr<BipartitionList> mergedBipL;
911 vector<size_t> bipSize;
915 for (
size_t i = 0; i < vecTr.size(); i++)
917 vecBipL.push_back(make_unique<BipartitionList>(*vecTr[i]));
922 mergedBipL->removeTrivialBipartitions();
923 nbBip = mergedBipL->getNumberOfBipartitions();
925 for (
size_t i = 0; i < nbBip; i++)
927 bipSize.push_back(mergedBipL->getPartitionSize(i));
928 bipScore.push_back(1);
932 for (
size_t i = nbBip; i > 0; i--)
934 if (bipScore[i - 1] == 0)
936 for (
size_t j = i - 1; j > 0; j--)
938 if (bipScore[j - 1] && bipSize[i - 1] == bipSize[j - 1] && mergedBipL->areIdentical(i - 1, j - 1))
947 for (
size_t i = nbBip; i > 0; i--)
949 if (bipScore[i - 1] == 0)
951 bipScore.erase(bipScore.begin() +
static_cast<ptrdiff_t
>(i - 1));
952 mergedBipL->deleteBipartition(i - 1);
957 mergedBipL->addTrivialBipartitions(
false);
958 for (
size_t i = 0; i < mergedBipL->getNumberOfElements(); i++)
960 bipScore.push_back(vecTr.size());
970 vector<size_t> bipScore;
971 vector<string> tr0leaves;
972 unique_ptr<BipartitionList> bipL;
975 if (vecTr.size() == 0)
976 throw Exception(
"TreeTools::thresholdConsensus. Empty vector passed");
981 tr0leaves = vecTr[0]->getLeavesNames();
982 for (
size_t i = 1; i < vecTr.size(); i++)
985 throw Exception(
"TreeTools::thresholdConsensus. Distinct leaf sets between trees");
991 for (
size_t i = bipL->getNumberOfBipartitions(); i > 0; i--)
993 if (bipL->getPartitionSize(i - 1) == 1)
995 score =
static_cast<int>(bipScore[i - 1]) /
static_cast<double>(vecTr.size());
996 if (score <= threshold && score != 1.)
998 bipL->deleteBipartition(i - 1);
1003 for (
size_t j = bipL->getNumberOfBipartitions(); j > i; j--)
1005 if (!bipL->areCompatible(i - 1, j - 1))
1007 bipL->deleteBipartition(i - 1);
1013 return bipL->toTree();
1045 auto alphabet = dynamic_pointer_cast<const DNA>(sites->getAlphabet());
1046 auto jc = make_shared<JCnuc>(alphabet);
1047 auto constRate = make_shared<ConstantDistribution>(1.);
1049 BioNJ bionjTreeBuilder(
false,
false);
1057 auto MPScore = make_shared<DRTreeParsimonyScore>(startTree, sites,
false);
1059 auto retTree = make_unique<TreeTemplate<Node>>(MPScore->tree());
1071 vector<size_t> occurences;
1081 for (
size_t j = 0; j < bpList->getNumberOfBipartitions(); j++)
1085 bootstrapValues[i] = format >= 0 ? round(
static_cast<double>(occurences[j]) *
std::pow(10., 2 + format) /
static_cast<double>(vecTr.size())) /
std::pow(10., format) :
static_cast<double>(occurences[j]);
1091 for (
size_t i = 0; i < index.size(); i++)
1093 if (!tree.
isLeaf(index[i]))
1103 int currentId = nodeId;
1107 ids.push_back(currentId);
1116 if (nodeIds.size() == 0)
1117 throw Exception(
"TreeTools::getLastCommonAncestor(). You must provide at least one node id.");
1118 vector< vector<int>> ancestors(nodeIds.size());
1119 for (
size_t i = 0; i < nodeIds.size(); i++)
1122 ancestors[i].insert(ancestors[i].begin(), nodeIds[i]);
1128 if (ancestors[0].size() <=
count)
1130 int current = ancestors[0][ancestors[0].size() -
count - 1];
1131 for (
size_t i = 1; i < nodeIds.size(); i++)
1133 if (ancestors[i].size() <=
count)
1135 if (ancestors[i][ancestors[i].size() -
count - 1] != current)
1151 throw Exception(
"The tree has to be rooted on the branch of interest to determine the midpoint position of the root");
1154 throw Exception(
"The tree is multifurcated, which is not allowed.");
1178 A = 4 * m1.
N * (m2.
N * length) * length;
1179 B = 4 * length * (m2.
N * m1.
sum - m1.
N * m2.
sum - length * m1.
N * m2.
N);
1213 vector<int> sonsId = tree.
getSonsId(rootId);
1214 for (
size_t i = 0; i < sonsId.size(); i++)
1219 m.
sum += mtmp.
sum + bLength * mtmp.
N;
1231 throw Exception(
"TreeTools::MRPMultilabel not updated.");
1237 shared_ptr<const DNA> alphabet = dynamic_pointer_cast<const DNA>(sites->getAlphabet());
1238 auto jc = make_shared<JCnuc>(alphabet);
1239 auto constRate = make_shared<ConstantDistribution>(1.);
1241 BioNJ bionjTreeBuilder(
false,
false);
1249 auto MPScore = make_shared<DRTreeParsimonyScore>(startTree, sites,
false);
1251 auto retTree = make_unique<TreeTemplate<Node>>(MPScore->tree());
const Tree & tree() const override
The BioNJ distance method.
void setDistanceMatrix(const DistanceMatrix &matrix)
Set the distance matrix to use.
void computeTree()
Compute the tree corresponding to the distance matrix.
This class deals with the bipartitions defined by trees.
size_t getNumberOfBipartitions() const
std::unique_ptr< DistanceMatrix > getMatrix() const
Get the distance matrix.
General exception thrown when something is wrong with a particular node.
Exception thrown when something is wrong with a particular node.
The phylogenetic tree class.
Interface for phylogenetic tree objects.
virtual bool isRooted() const =0
Tell if the tree is rooted.
virtual bool hasFather(int nodeId) const =0
virtual void setDistanceToFather(int nodeId, double length)=0
virtual void setBranchProperty(int nodeId, const std::string &name, const Clonable &property)=0
virtual std::string getNodeName(int nodeId) const =0
virtual std::vector< int > getSonsId(int parentId) const =0
virtual int getFatherId(int parentId) const =0
virtual std::vector< int > getNodesId() const =0
virtual bool unroot()=0
Unroot a rooted tree.
virtual bool hasNode(int nodeId) const =0
virtual bool isLeaf(int nodeId) const =0
virtual Clonable * getBranchProperty(int nodeId, const std::string &name)=0
virtual bool isMultifurcating() const =0
Tell if the tree is multifurcating.
virtual bool hasDistanceToFather(int nodeId) const =0
virtual std::vector< std::string > getLeavesNames() const =0
virtual double getDistanceToFather(int nodeId) const =0
virtual bool hasNoSon(int nodeId) const =0
virtual void newOutGroup(int nodeId)=0
Root a tree by specifying an outgroup.
virtual bool hasBranchProperty(int nodeId, const std::string &name) const =0
virtual int getRootId() const =0
virtual int getLeafId(const std::string &name) const =0
std::string toString(T t)
std::size_t count(const std::string &s, const std::string &pattern)
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
ExtendedFloat pow(const ExtendedFloat &ef, double exp)