bpp-phyl3 3.0.0
DRTreeParsimonyScore.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
7
8#include "../PatternTools.h"
9#include "../Tree/TreeTemplateTools.h" // Needed for NNIs
10#include "../Tree/TreeIterator.h" // Needed for parsimony solution
12
13using namespace bpp;
14using namespace std;
15
16/******************************************************************************/
17
18DRTreeParsimonyScore::DRTreeParsimonyScore(
19 std::shared_ptr<TreeTemplate<Node>> tree,
20 std::shared_ptr<const SiteContainerInterface> data,
21 bool verbose,
22 bool includeGaps) :
23 AbstractTreeParsimonyScore(tree, data, verbose, includeGaps),
24 parsimonyData_(new DRTreeParsimonyData(tree)),
25 nbDistinctSites_()
26{
27 init_(data, verbose);
28}
29
31 std::shared_ptr<TreeTemplate<Node>> tree,
32 std::shared_ptr<const SiteContainerInterface> data,
33 std::shared_ptr<const StateMapInterface> statesMap,
34 bool verbose) :
35 AbstractTreeParsimonyScore(tree, data, statesMap, verbose),
36 parsimonyData_(new DRTreeParsimonyData(tree)),
37 nbDistinctSites_()
38{
39 init_(data, verbose);
40}
41
42void DRTreeParsimonyScore::init_(std::shared_ptr<const SiteContainerInterface> data, bool verbose)
43{
44 if (verbose)
45 ApplicationTools::displayTask("Initializing data structure");
46 parsimonyData_->init(data, getStateMap());
47 nbDistinctSites_ = parsimonyData_->getNumberOfDistinctSites();
49 if (verbose)
51 if (verbose)
52 ApplicationTools::displayResult("Number of distinct sites",
54}
55
56/******************************************************************************/
57
60 parsimonyData_(dynamic_cast<DRTreeParsimonyData*>(tp.parsimonyData_->clone())),
61 nbDistinctSites_(tp.nbDistinctSites_)
62{
64}
65
66/******************************************************************************/
67
69{
70 AbstractTreeParsimonyScore::operator=(tp);
71 parsimonyData_.reset(tp.parsimonyData_->clone());
74 return *this;
75}
76
77/******************************************************************************/
78
80
81/******************************************************************************/
82
84{
85 computeScoresPostorder(treeTemplate().getRootNode());
86 computeScoresPreorder(treeTemplate().getRootNode());
88 parsimonyData_->nodeData(treeTemplate().getRootId()),
89 parsimonyData_->getRootBitsets(),
90 parsimonyData_->getRootScores());
91}
92
94{
95 if (node->isLeaf())
96 return;
97 DRTreeParsimonyNodeData& pData = parsimonyData_->nodeData(node->getId());
98 for (unsigned int k = 0; k < node->getNumberOfSons(); k++)
99 {
100 const Node* son = node->getSon(k);
102 vector<Bitset>* bitsets = &pData.getBitsetsArrayForNeighbor(son->getId());
103 vector<unsigned int>* scores = &pData.getScoresArrayForNeighbor(son->getId());
104 if (son->isLeaf())
105 {
106 // son has no NodeData associated, must use LeafData instead
107 vector<Bitset>* sonBitsets = &parsimonyData_->leafData(son->getId()).getBitsetsArray();
108 for (unsigned int i = 0; i < sonBitsets->size(); i++)
109 {
110 (*bitsets)[i] = (*sonBitsets)[i];
111 (*scores)[i] = 0;
112 }
113 }
114 else
115 {
117 parsimonyData_->nodeData(son->getId()),
118 *bitsets,
119 *scores);
120 }
121 }
122}
123
124void DRTreeParsimonyScore::computeScoresPostorderForNode(const DRTreeParsimonyNodeData& pData, vector<Bitset>& rBitsets, vector<unsigned int>& rScores)
125{
126 // First initialize the vectors from input:
127 const Node* node = pData.getNode();
128 const Node* source = node->getFather();
129 vector<const Node*> neighbors = node->getNeighbors();
130 size_t nbNeighbors = node->degree();
131 vector< const vector<Bitset>*> iBitsets;
132 vector< const vector<unsigned int>*> iScores;
133 for (unsigned int k = 0; k < nbNeighbors; k++)
134 {
135 const Node* n = neighbors[k];
136 if (n != source)
137 {
138 iBitsets.push_back(&pData.getBitsetsArrayForNeighbor(n->getId()));
139 iScores.push_back(&pData.getScoresArrayForNeighbor(n->getId()));
140 }
141 }
142 // Then call the general method on these arrays:
143 computeScoresFromArrays(iBitsets, iScores, rBitsets, rScores);
144}
145
147{
148 if (node->getNumberOfSons() == 0)
149 return;
150 DRTreeParsimonyNodeData& pData = parsimonyData_->nodeData(node->getId());
151 if (node->hasFather())
152 {
153 const Node* father = node->getFather();
154 vector<Bitset>* bitsets = &pData.getBitsetsArrayForNeighbor(father->getId());
155 vector<unsigned int>* scores = &pData.getScoresArrayForNeighbor(father->getId());
156 if (father->isLeaf())
157 { // Means that the tree is rooted by a leaf... dunno if we must allow that! Let it be for now.
158 // son has no NodeData associated, must use LeafData instead
159 vector<Bitset>* sonBitsets = &parsimonyData_->leafData(father->getId()).getBitsetsArray();
160 for (unsigned int i = 0; i < sonBitsets->size(); i++)
161 {
162 (*bitsets)[i] = (*sonBitsets)[i];
163 (*scores)[i] = 0;
164 }
165 }
166 else
167 {
169 parsimonyData_->nodeData(father->getId()),
170 node,
171 *bitsets,
172 *scores);
173 }
174 }
175 // Recurse call:
176 for (unsigned int k = 0; k < node->getNumberOfSons(); k++)
177 {
179 }
180}
181
182void DRTreeParsimonyScore::computeScoresPreorderForNode(const DRTreeParsimonyNodeData& pData, const Node* source, std::vector<Bitset>& rBitsets, std::vector<unsigned int>& rScores)
183{
184 // First initialize the vectors from input:
185 const Node* node = pData.getNode();
186 vector<const Node*> neighbors = node->getNeighbors();
187 size_t nbNeighbors = node->degree();
188 vector< const vector<Bitset>*> iBitsets;
189 vector< const vector<unsigned int>*> iScores;
190 for (unsigned int k = 0; k < nbNeighbors; k++)
191 {
192 const Node* n = neighbors[k];
193 if (n != source)
194 {
195 iBitsets.push_back(&pData.getBitsetsArrayForNeighbor(n->getId()));
196 iScores.push_back(&pData.getScoresArrayForNeighbor(n->getId()));
197 }
198 }
199 // Then call the general method on these arrays:
200 computeScoresFromArrays(iBitsets, iScores, rBitsets, rScores);
201}
202
203void DRTreeParsimonyScore::computeScoresForNode(const DRTreeParsimonyNodeData& pData, std::vector<Bitset>& rBitsets, std::vector<unsigned int>& rScores)
204{
205 const Node* node = pData.getNode();
206 size_t nbNeighbors = node->degree();
207 vector<const Node*> neighbors = node->getNeighbors();
208 // First initialize the vectors from input:
209 vector< const vector<Bitset>*> iBitsets(nbNeighbors);
210 vector< const vector<unsigned int>*> iScores(nbNeighbors);
211 for (unsigned int k = 0; k < nbNeighbors; k++)
212 {
213 const Node* n = neighbors[k];
214 iBitsets[k] = &pData.getBitsetsArrayForNeighbor(n->getId());
215 iScores [k] = &pData.getScoresArrayForNeighbor(n->getId());
216 }
217 // Then call the general method on these arrays:
218 computeScoresFromArrays(iBitsets, iScores, rBitsets, rScores);
219}
220
221/******************************************************************************/
223{
224 unsigned int score = 0;
225 for (unsigned int i = 0; i < nbDistinctSites_; i++)
226 {
227 score += parsimonyData_->getRootScore(i) * parsimonyData_->getWeight(i);
228 }
229 return score;
230}
231
232/******************************************************************************/
233unsigned int DRTreeParsimonyScore::getScoreForSite(size_t site) const
234{
235 return parsimonyData_->getRootScore(parsimonyData_->getRootArrayPosition(site));
236}
237
238/******************************************************************************/
240 const vector< const vector<Bitset>*>& iBitsets,
241 const vector< const vector<unsigned int>*>& iScores,
242 vector<Bitset>& oBitsets,
243 vector<unsigned int>& oScores)
244{
245 size_t nbPos = oBitsets.size();
246 size_t nbNodes = iBitsets.size();
247 if (iScores.size() != nbNodes)
248 throw Exception("DRTreeParsimonyScore::computeScores(); Error, input arrays must have the same length.");
249 if (nbNodes < 1)
250 throw Exception("DRTreeParsimonyScore::computeScores(); Error, input arrays must have a size >= 1.");
251 const vector<Bitset>* bitsets0 = iBitsets[0];
252 const vector<unsigned int>* scores0 = iScores[0];
253 for (size_t i = 0; i < nbPos; i++)
254 {
255 oBitsets[i] = (*bitsets0)[i];
256 oScores[i] = (*scores0)[i];
257 }
258 for (size_t k = 1; k < nbNodes; k++)
259 {
260 const vector<Bitset>* bitsetsk = iBitsets[k];
261 const vector<unsigned int>* scoresk = iScores[k];
262 for (unsigned int i = 0; i < nbPos; i++)
263 {
264 Bitset bs = oBitsets[i] & (*bitsetsk)[i];
265 oScores[i] += (*scoresk)[i];
266 if (bs == 0)
267 {
268 bs = oBitsets[i] | (*bitsetsk)[i];
269 oScores[i] += 1;
270 }
271 oBitsets[i] = bs;
272 }
273 }
274}
275
276/******************************************************************************/
277double DRTreeParsimonyScore::testNNI(int nodeId) const
278{
279 const Node* son = treeTemplate().getNode(nodeId);
280 if (!son->hasFather())
281 throw NodePException("DRTreeParsimonyScore::testNNI(). Node 'son' must not be the root node.", son);
282 const Node* parent = son->getFather();
283 if (!parent->hasFather())
284 throw NodePException("DRTreeParsimonyScore::testNNI(). Node 'parent' must not be the root node.", parent);
285 const Node* grandFather = parent->getFather();
286 // From here: Bifurcation assumed.
287 // In case of multifurcation, an arbitrary uncle is chosen.
288 // If we are at root node with a trifurcation, this does not matter, since 2 NNI are possible (see doc of the NNISearchable interface).
289 size_t parentPosition = grandFather->getSonPosition(parent);
290 const Node* uncle = grandFather->getSon(parentPosition > 1 ? parentPosition - 1 : 1 - parentPosition);
291
292 // Retrieving arrays of interest:
293 const DRTreeParsimonyNodeData& parentData = parsimonyData_->nodeData(parent->getId());
294 const vector<Bitset>* sonBitsets = &parentData.getBitsetsArrayForNeighbor(son->getId());
295 const vector<unsigned int>* sonScores = &parentData.getScoresArrayForNeighbor(son->getId());
296 vector<const Node*> parentNeighbors = TreeTemplateTools::getRemainingNeighbors(parent, grandFather, son);
297 size_t nbParentNeighbors = parentNeighbors.size();
298 vector< const vector<Bitset>*> parentBitsets(nbParentNeighbors);
299 vector< const vector<unsigned int>*> parentScores(nbParentNeighbors);
300 for (unsigned int k = 0; k < nbParentNeighbors; k++)
301 {
302 const Node* n = parentNeighbors[k]; // This neighbor
303 parentBitsets[k] = &parentData.getBitsetsArrayForNeighbor(n->getId());
304 parentScores[k] = &parentData.getScoresArrayForNeighbor(n->getId());
305 }
306
307 const DRTreeParsimonyNodeData& grandFatherData = parsimonyData_->nodeData(grandFather->getId());
308 const vector<Bitset>* uncleBitsets = &grandFatherData.getBitsetsArrayForNeighbor(uncle->getId());
309 const vector<unsigned int>* uncleScores = &grandFatherData.getScoresArrayForNeighbor(uncle->getId());
310 vector<const Node*> grandFatherNeighbors = TreeTemplateTools::getRemainingNeighbors(grandFather, parent, uncle);
311 size_t nbGrandFatherNeighbors = grandFatherNeighbors.size();
312 vector< const vector<Bitset>*> grandFatherBitsets(nbGrandFatherNeighbors);
313 vector< const vector<unsigned int>*> grandFatherScores(nbGrandFatherNeighbors);
314 for (unsigned int k = 0; k < nbGrandFatherNeighbors; k++)
315 {
316 const Node* n = grandFatherNeighbors[k]; // This neighbor
317 grandFatherBitsets[k] = &grandFatherData.getBitsetsArrayForNeighbor(n->getId());
318 grandFatherScores[k] = &grandFatherData.getScoresArrayForNeighbor(n->getId());
319 }
320
321 // Compute arrays and scores for grand-father node:
322 grandFatherBitsets.push_back(sonBitsets);
323 grandFatherScores.push_back(sonScores);
324 // Init arrays:
325 vector<Bitset> gfBitsets(sonBitsets->size()); // All arrays supposed to have the same size!
326 vector<unsigned int> gfScores(sonScores->size());
327 // Fill arrays:
328 computeScoresFromArrays(grandFatherBitsets, grandFatherScores, gfBitsets, gfScores);
329
330 // Now computes arrays and scores for parent node:
331 parentBitsets.push_back(uncleBitsets);
332 parentScores.push_back(uncleScores);
333 parentBitsets.push_back(&gfBitsets);
334 parentScores.push_back(&gfScores);
335 // Init arrays:
336 vector<Bitset> pBitsets(sonBitsets->size()); // All arrays supposed to have the same size!
337 vector<unsigned int> pScores(sonScores->size());
338 // Fill arrays:
339 computeScoresFromArrays(parentBitsets, parentScores, pBitsets, pScores);
340
341 // Final computation:
342 unsigned int score = 0;
343 for (unsigned int i = 0; i < nbDistinctSites_; i++)
344 {
345 score += pScores[i] * parsimonyData_->getWeight(i);
346 }
347 return (double)score - (double)getScore();
348}
349
350/******************************************************************************/
351
353{
354 Node* son = treeTemplate_().getNode(nodeId);
355 if (!son->hasFather())
356 throw NodePException("DRTreeParsimonyScore::doNNI(). Node 'son' must not be the root node.", son);
357 Node* parent = son->getFather();
358 if (!parent->hasFather())
359 throw NodePException("DRTreeParsimonyScore::doNNI(). Node 'parent' must not be the root node.", parent);
360 Node* grandFather = parent->getFather();
361 // From here: Bifurcation assumed.
362 // In case of multifurcation, an arbitrary uncle is chosen.
363 // If we are at root node with a trifurcation, this does not matter, since 2 NNI are possible (see doc of the NNISearchable interface).
364 size_t parentPosition = grandFather->getSonPosition(parent);
365 Node* uncle = grandFather->getSon(parentPosition > 1 ? parentPosition - 1 : 1 - parentPosition);
366 // Swap nodes:
367 parent->removeSon(son);
368 grandFather->removeSon(uncle);
369 parent->addSon(uncle);
370 grandFather->addSon(son);
371}
372
373/******************************************************************************/
374
376
378{
379 Number<size_t> stateProperty(state);
380 node->setNodeProperty(PARSIMONY_SOLUTION_STATE, stateProperty);
381}
382
383/******************************************************************************/
384
386{
387 return (dynamic_cast<const Number<size_t>*>(node->getNodeProperty(PARSIMONY_SOLUTION_STATE)))->getValue(); // exception on root on the true history - why didn't the root receive a state?
388}
389
390/******************************************************************************/
391
393{
394 map< int, vector<size_t>> nodeToPossibleStates;
395 vector<Node*> nodes = treeTemplate_().getNodes();
396 vector<Bitset> nodeBitsets;
397 for (size_t n = 0; n < nodes.size(); ++n)
398 {
399 // extract the node's bisets (i.e, possible states assignments)
400 if (nodes[n]->isLeaf())
401 {
402 nodeBitsets = parsimonyData_->leafData(nodes[n]->getId()).getBitsetsArray();
403 }
404 else if (nodes[n]->hasFather())
405 {
406 nodeBitsets = parsimonyData_->nodeData(nodes[n]->getFather()->getId()).getBitsetsArrayForNeighbor(nodes[n]->getId()); // extract the bitset corresponding to the son from its father's bitSet array
407 }
408 else // get the node's possible states from its first internal neighbor
409 {
410 int neighborId = 0;
411 vector<Node*> neighbors = nodes[n]->getNeighbors();
412 for (size_t nn = 0; nn < neighbors.size(); ++nn)
413 {
414 if (!neighbors[nn]->isLeaf())
415 {
416 neighborId = neighbors[nn]->getId();
417 break;
418 }
419 }
420 nodeBitsets = parsimonyData_->nodeData(neighborId).getBitsetsArrayForNeighbor(nodes[n]->getId());
421 }
422 // map the node id to its possible states
423 vector<size_t> possibleStates;
424 for (size_t s = 0; s < stateMap().getNumberOfModelStates(); ++s)
425 {
426 if (nodeBitsets[0].test(s))
427 {
428 possibleStates.push_back(s);
429 }
430 }
431 nodeToPossibleStates[nodes[n]->getId()] = possibleStates;
432 }
433
434 // set states for the nodes according to their possible assignments and parent state
436 for (const Node* node = treeIt->begin(); node != treeIt->end(); node = treeIt->next())
437 {
438 size_t nodeState;
439 vector<size_t> possibleStates = nodeToPossibleStates[node->getId()];
440 if (possibleStates.size() == 1)
441 {
442 nodeState = possibleStates[0];
443 }
444 else if (node->hasFather()) // if the node has a father -> set its state according to its father's state
445 {
446 nodeState = getNodeState(node->getFather());
447 }
448 else // if there is no restriction from the father -> set the state randomely
449 {
450 nodeState = RandomTools::pickOne(possibleStates);
451 }
452 setNodeState(treeTemplate_().getNode(node->getId()), nodeState);
453 }
454 delete treeIt;
455}
Partial implementation of the TreeParsimonyScore interface.
std::shared_ptr< const StateMapInterface > getStateMap() const override
Get the state map associated to this instance.
virtual std::shared_ptr< const TreeTemplate< Node > > getTreeTemplate() const
virtual TreeTemplate< Node > & treeTemplate_()
virtual const TreeTemplate< Node > & treeTemplate() const
const StateMapInterface & stateMap() const override
Get the state map associated to this instance.
static void displayTask(const std::string &text, bool eof=false)
static void displayTaskDone()
static void displayResult(const std::string &text, const T &result)
Parsimony data structure for double-recursive (DR) algorithm.
Parsimony data structure for a node.
const Node * getNode() const
Get the node associated to this data structure.
std::vector< Bitset > & getBitsetsArrayForNeighbor(int neighborId)
std::vector< unsigned int > & getScoresArrayForNeighbor(int neighborId)
Double recursive implementation of interface TreeParsimonyScore.
static void computeScoresPostorderForNode(const DRTreeParsimonyNodeData &pData, std::vector< Bitset > &rBitsets, std::vector< unsigned int > &rScores)
Compute bitsets and scores for each site for a node, in postorder.
void init_(std::shared_ptr< const SiteContainerInterface > data, bool verbose)
static void computeScoresFromArrays(const std::vector< const std::vector< Bitset > * > &iBitsets, const std::vector< const std::vector< unsigned int > * > &iScores, std::vector< Bitset > &oBitsets, std::vector< unsigned int > &oScores)
Compute bitsets and scores from an array of arrays.
unsigned int getScoreForSite(size_t site) const override
Get the score for a given site for the current tree, i.e. the total minimum number of changes in the ...
DRTreeParsimonyScore(std::shared_ptr< TreeTemplate< Node > > tree, std::shared_ptr< const SiteContainerInterface > data, bool verbose=true, bool includeGaps=false)
virtual void computeScoresPreorder(const Node *)
Compute scores (preorder algorithm).
static std::string PARSIMONY_SOLUTION_STATE
std::unique_ptr< DRTreeParsimonyData > parsimonyData_
size_t getNodeState(const Node *node)
Extracts the state of a node in a mapping.
DRTreeParsimonyScore & operator=(const DRTreeParsimonyScore &tp)
unsigned int getScore() const override
Get the score for the current tree, i.e. the total minimum number of changes in the tree.
void setNodeState(Node *node, size_t state)
Sets the state of a node in a mapping.
virtual void computeScores()
Compute all scores.
double testNNI(int nodeId) const override
Send the score of a NNI movement, without performing it.
static void computeScoresForNode(const DRTreeParsimonyNodeData &pData, std::vector< Bitset > &rBitsets, std::vector< unsigned int > &rScores)
Compute bitsets and scores for each site for a node, in all directions.
virtual void computeScoresPostorder(const Node *)
Compute scores (postorder algorithm).
void doNNI(int nodeId) override
Perform a NNI movement.
static void computeScoresPreorderForNode(const DRTreeParsimonyNodeData &pData, const Node *source, std::vector< Bitset > &rBitsets, std::vector< unsigned int > &rScores)
Compute bitsets and scores for each site for a node, in preorder.
void computeSolution()
Compute a maximum parsimony solution in DELTRAN manner.
General exception thrown when something is wrong with a particular node.
The phylogenetic node class.
Definition: Node.h:59
virtual Node * removeSon(size_t pos)
Definition: Node.h:423
virtual int getId() const
Get the node's id.
Definition: Node.h:170
virtual Clonable * getNodeProperty(const std::string &name)
Definition: Node.h:514
virtual void addSon(size_t pos, Node *node)
Definition: Node.h:386
virtual void setNodeProperty(const std::string &name, const Clonable &property)
Set/add a node property.
Definition: Node.h:507
virtual const Node * getFather() const
Get the father of this node is there is one.
Definition: Node.h:306
virtual const Node * getSon(size_t pos) const
Definition: Node.h:362
virtual bool isLeaf() const
Definition: Node.h:679
virtual size_t getSonPosition(const Node *son) const
Definition: Node.cpp:153
std::vector< const Node * > getNeighbors() const
Definition: Node.cpp:129
virtual bool hasFather() const
Tell if this node has a father node.
Definition: Node.h:346
virtual size_t getNumberOfSons() const
Definition: Node.h:355
virtual size_t degree() const
Definition: Node.h:475
static T pickOne(std::vector< T > &v, bool replace=false)
virtual size_t getNumberOfModelStates() const =0
const Node * end()
Definition: TreeIterator.h:72
const Node * begin()
virtual const Node * next()=0
static std::vector< const Node * > getRemainingNeighbors(const Node *node1, const Node *node2, const Node *node3)
Get a subset of node neighbors.
The phylogenetic tree class.
Definition: TreeTemplate.h:59
std::string toString(T t)
Defines the basic types of data flow nodes.
std::bitset< 21 > Bitset