bpp-phyl3  3.0.0
DRTreeParsimonyScore.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
7 
8 #include "../PatternTools.h"
9 #include "../Tree/TreeTemplateTools.h" // Needed for NNIs
10 #include "DRTreeParsimonyScore.h"
11 
12 using namespace bpp;
13 using namespace std;
14 
15 /******************************************************************************/
16 
18  std::shared_ptr<TreeTemplate<Node>> tree,
19  std::shared_ptr<const SiteContainerInterface> data,
20  bool verbose,
21  bool includeGaps) :
22  AbstractTreeParsimonyScore(tree, data, verbose, includeGaps),
23  parsimonyData_(new DRTreeParsimonyData(tree)),
24  nbDistinctSites_()
25 {
26  init_(data, verbose);
27 }
28 
30  std::shared_ptr<TreeTemplate<Node>> tree,
31  std::shared_ptr<const SiteContainerInterface> data,
32  std::shared_ptr<const StateMapInterface> statesMap,
33  bool verbose) :
34  AbstractTreeParsimonyScore(tree, data, statesMap, verbose),
35  parsimonyData_(new DRTreeParsimonyData(tree)),
36  nbDistinctSites_()
37 {
38  init_(data, verbose);
39 }
40 
41 void DRTreeParsimonyScore::init_(std::shared_ptr<const SiteContainerInterface> data, bool verbose)
42 {
43  if (verbose)
44  ApplicationTools::displayTask("Initializing data structure");
45  parsimonyData_->init(data, getStateMap());
46  nbDistinctSites_ = parsimonyData_->getNumberOfDistinctSites();
47  computeScores();
48  if (verbose)
50  if (verbose)
51  ApplicationTools::displayResult("Number of distinct sites",
53 }
54 
55 /******************************************************************************/
56 
59  parsimonyData_(dynamic_cast<DRTreeParsimonyData*>(tp.parsimonyData_->clone())),
60  nbDistinctSites_(tp.nbDistinctSites_)
61 {
62  parsimonyData_->setTree(getTreeTemplate());
63 }
64 
65 /******************************************************************************/
66 
68 {
69  AbstractTreeParsimonyScore::operator=(tp);
70  parsimonyData_.reset(tp.parsimonyData_->clone());
71  parsimonyData_->setTree(getTreeTemplate());
73  return *this;
74 }
75 
76 /******************************************************************************/
77 
79 
80 /******************************************************************************/
81 
83 {
84  computeScoresPostorder(treeTemplate().getRootNode());
85  computeScoresPreorder(treeTemplate().getRootNode());
87  parsimonyData_->nodeData(treeTemplate().getRootId()),
88  parsimonyData_->getRootBitsets(),
89  parsimonyData_->getRootScores());
90 }
91 
93 {
94  if (node->isLeaf())
95  return;
96  DRTreeParsimonyNodeData& pData = parsimonyData_->nodeData(node->getId());
97  for (unsigned int k = 0; k < node->getNumberOfSons(); k++)
98  {
99  const Node* son = node->getSon(k);
101  vector<Bitset>* bitsets = &pData.getBitsetsArrayForNeighbor(son->getId());
102  vector<unsigned int>* scores = &pData.getScoresArrayForNeighbor(son->getId());
103  if (son->isLeaf())
104  {
105  // son has no NodeData associated, must use LeafData instead
106  vector<Bitset>* sonBitsets = &parsimonyData_->leafData(son->getId()).getBitsetsArray();
107  for (unsigned int i = 0; i < sonBitsets->size(); i++)
108  {
109  (*bitsets)[i] = (*sonBitsets)[i];
110  (*scores)[i] = 0;
111  }
112  }
113  else
114  {
116  parsimonyData_->nodeData(son->getId()),
117  *bitsets,
118  *scores);
119  }
120  }
121 }
122 
123 void DRTreeParsimonyScore::computeScoresPostorderForNode(const DRTreeParsimonyNodeData& pData, vector<Bitset>& rBitsets, vector<unsigned int>& rScores)
124 {
125  // First initialize the vectors from input:
126  const Node* node = pData.getNode();
127  const Node* source = node->getFather();
128  vector<const Node*> neighbors = node->getNeighbors();
129  size_t nbNeighbors = node->degree();
130  vector< const vector<Bitset>*> iBitsets;
131  vector< const vector<unsigned int>*> iScores;
132  for (unsigned int k = 0; k < nbNeighbors; k++)
133  {
134  const Node* n = neighbors[k];
135  if (n != source)
136  {
137  iBitsets.push_back(&pData.getBitsetsArrayForNeighbor(n->getId()));
138  iScores.push_back(&pData.getScoresArrayForNeighbor(n->getId()));
139  }
140  }
141  // Then call the general method on these arrays:
142  computeScoresFromArrays(iBitsets, iScores, rBitsets, rScores);
143 }
144 
146 {
147  if (node->getNumberOfSons() == 0)
148  return;
149  DRTreeParsimonyNodeData& pData = parsimonyData_->nodeData(node->getId());
150  if (node->hasFather())
151  {
152  const Node* father = node->getFather();
153  vector<Bitset>* bitsets = &pData.getBitsetsArrayForNeighbor(father->getId());
154  vector<unsigned int>* scores = &pData.getScoresArrayForNeighbor(father->getId());
155  if (father->isLeaf())
156  { // Means that the tree is rooted by a leaf... dunno if we must allow that! Let it be for now.
157  // son has no NodeData associated, must use LeafData instead
158  vector<Bitset>* sonBitsets = &parsimonyData_->leafData(father->getId()).getBitsetsArray();
159  for (unsigned int i = 0; i < sonBitsets->size(); i++)
160  {
161  (*bitsets)[i] = (*sonBitsets)[i];
162  (*scores)[i] = 0;
163  }
164  }
165  else
166  {
168  parsimonyData_->nodeData(father->getId()),
169  node,
170  *bitsets,
171  *scores);
172  }
173  }
174  // Recurse call:
175  for (unsigned int k = 0; k < node->getNumberOfSons(); k++)
176  {
177  computeScoresPreorder(node->getSon(k));
178  }
179 }
180 
181 void DRTreeParsimonyScore::computeScoresPreorderForNode(const DRTreeParsimonyNodeData& pData, const Node* source, std::vector<Bitset>& rBitsets, std::vector<unsigned int>& rScores)
182 {
183  // First initialize the vectors from input:
184  const Node* node = pData.getNode();
185  vector<const Node*> neighbors = node->getNeighbors();
186  size_t nbNeighbors = node->degree();
187  vector< const vector<Bitset>*> iBitsets;
188  vector< const vector<unsigned int>*> iScores;
189  for (unsigned int k = 0; k < nbNeighbors; k++)
190  {
191  const Node* n = neighbors[k];
192  if (n != source)
193  {
194  iBitsets.push_back(&pData.getBitsetsArrayForNeighbor(n->getId()));
195  iScores.push_back(&pData.getScoresArrayForNeighbor(n->getId()));
196  }
197  }
198  // Then call the general method on these arrays:
199  computeScoresFromArrays(iBitsets, iScores, rBitsets, rScores);
200 }
201 
202 void DRTreeParsimonyScore::computeScoresForNode(const DRTreeParsimonyNodeData& pData, std::vector<Bitset>& rBitsets, std::vector<unsigned int>& rScores)
203 {
204  const Node* node = pData.getNode();
205  size_t nbNeighbors = node->degree();
206  vector<const Node*> neighbors = node->getNeighbors();
207  // First initialize the vectors from input:
208  vector< const vector<Bitset>*> iBitsets(nbNeighbors);
209  vector< const vector<unsigned int>*> iScores(nbNeighbors);
210  for (unsigned int k = 0; k < nbNeighbors; k++)
211  {
212  const Node* n = neighbors[k];
213  iBitsets[k] = &pData.getBitsetsArrayForNeighbor(n->getId());
214  iScores [k] = &pData.getScoresArrayForNeighbor(n->getId());
215  }
216  // Then call the general method on these arrays:
217  computeScoresFromArrays(iBitsets, iScores, rBitsets, rScores);
218 }
219 
220 /******************************************************************************/
221 unsigned int DRTreeParsimonyScore::getScore() const
222 {
223  unsigned int score = 0;
224  for (unsigned int i = 0; i < nbDistinctSites_; i++)
225  {
226  score += parsimonyData_->getRootScore(i) * parsimonyData_->getWeight(i);
227  }
228  return score;
229 }
230 
231 /******************************************************************************/
232 unsigned int DRTreeParsimonyScore::getScoreForSite(size_t site) const
233 {
234  return parsimonyData_->getRootScore(parsimonyData_->getRootArrayPosition(site));
235 }
236 
237 /******************************************************************************/
239  const vector< const vector<Bitset>*>& iBitsets,
240  const vector< const vector<unsigned int>*>& iScores,
241  vector<Bitset>& oBitsets,
242  vector<unsigned int>& oScores)
243 {
244  size_t nbPos = oBitsets.size();
245  size_t nbNodes = iBitsets.size();
246  if (iScores.size() != nbNodes)
247  throw Exception("DRTreeParsimonyScore::computeScores(); Error, input arrays must have the same length.");
248  if (nbNodes < 1)
249  throw Exception("DRTreeParsimonyScore::computeScores(); Error, input arrays must have a size >= 1.");
250  const vector<Bitset>* bitsets0 = iBitsets[0];
251  const vector<unsigned int>* scores0 = iScores[0];
252  for (size_t i = 0; i < nbPos; i++)
253  {
254  oBitsets[i] = (*bitsets0)[i];
255  oScores[i] = (*scores0)[i];
256  }
257  for (size_t k = 1; k < nbNodes; k++)
258  {
259  const vector<Bitset>* bitsetsk = iBitsets[k];
260  const vector<unsigned int>* scoresk = iScores[k];
261  for (unsigned int i = 0; i < nbPos; i++)
262  {
263  Bitset bs = oBitsets[i] & (*bitsetsk)[i];
264  oScores[i] += (*scoresk)[i];
265  if (bs == 0)
266  {
267  bs = oBitsets[i] | (*bitsetsk)[i];
268  oScores[i] += 1;
269  }
270  oBitsets[i] = bs;
271  }
272  }
273 }
274 
275 /******************************************************************************/
276 double DRTreeParsimonyScore::testNNI(int nodeId) const
277 {
278  const Node* son = treeTemplate().getNode(nodeId);
279  if (!son->hasFather())
280  throw NodePException("DRTreeParsimonyScore::testNNI(). Node 'son' must not be the root node.", son);
281  const Node* parent = son->getFather();
282  if (!parent->hasFather())
283  throw NodePException("DRTreeParsimonyScore::testNNI(). Node 'parent' must not be the root node.", parent);
284  const Node* grandFather = parent->getFather();
285  // From here: Bifurcation assumed.
286  // In case of multifurcation, an arbitrary uncle is chosen.
287  // If we are at root node with a trifurcation, this does not matter, since 2 NNI are possible (see doc of the NNISearchable interface).
288  size_t parentPosition = grandFather->getSonPosition(parent);
289  const Node* uncle = grandFather->getSon(parentPosition > 1 ? parentPosition - 1 : 1 - parentPosition);
290 
291  // Retrieving arrays of interest:
292  const DRTreeParsimonyNodeData& parentData = parsimonyData_->nodeData(parent->getId());
293  const vector<Bitset>* sonBitsets = &parentData.getBitsetsArrayForNeighbor(son->getId());
294  const vector<unsigned int>* sonScores = &parentData.getScoresArrayForNeighbor(son->getId());
295  vector<const Node*> parentNeighbors = TreeTemplateTools::getRemainingNeighbors(parent, grandFather, son);
296  size_t nbParentNeighbors = parentNeighbors.size();
297  vector< const vector<Bitset>*> parentBitsets(nbParentNeighbors);
298  vector< const vector<unsigned int>*> parentScores(nbParentNeighbors);
299  for (unsigned int k = 0; k < nbParentNeighbors; k++)
300  {
301  const Node* n = parentNeighbors[k]; // This neighbor
302  parentBitsets[k] = &parentData.getBitsetsArrayForNeighbor(n->getId());
303  parentScores[k] = &parentData.getScoresArrayForNeighbor(n->getId());
304  }
305 
306  const DRTreeParsimonyNodeData& grandFatherData = parsimonyData_->nodeData(grandFather->getId());
307  const vector<Bitset>* uncleBitsets = &grandFatherData.getBitsetsArrayForNeighbor(uncle->getId());
308  const vector<unsigned int>* uncleScores = &grandFatherData.getScoresArrayForNeighbor(uncle->getId());
309  vector<const Node*> grandFatherNeighbors = TreeTemplateTools::getRemainingNeighbors(grandFather, parent, uncle);
310  size_t nbGrandFatherNeighbors = grandFatherNeighbors.size();
311  vector< const vector<Bitset>*> grandFatherBitsets(nbGrandFatherNeighbors);
312  vector< const vector<unsigned int>*> grandFatherScores(nbGrandFatherNeighbors);
313  for (unsigned int k = 0; k < nbGrandFatherNeighbors; k++)
314  {
315  const Node* n = grandFatherNeighbors[k]; // This neighbor
316  grandFatherBitsets[k] = &grandFatherData.getBitsetsArrayForNeighbor(n->getId());
317  grandFatherScores[k] = &grandFatherData.getScoresArrayForNeighbor(n->getId());
318  }
319 
320  // Compute arrays and scores for grand-father node:
321  grandFatherBitsets.push_back(sonBitsets);
322  grandFatherScores.push_back(sonScores);
323  // Init arrays:
324  vector<Bitset> gfBitsets(sonBitsets->size()); // All arrays supposed to have the same size!
325  vector<unsigned int> gfScores(sonScores->size());
326  // Fill arrays:
327  computeScoresFromArrays(grandFatherBitsets, grandFatherScores, gfBitsets, gfScores);
328 
329  // Now computes arrays and scores for parent node:
330  parentBitsets.push_back(uncleBitsets);
331  parentScores.push_back(uncleScores);
332  parentBitsets.push_back(&gfBitsets);
333  parentScores.push_back(&gfScores);
334  // Init arrays:
335  vector<Bitset> pBitsets(sonBitsets->size()); // All arrays supposed to have the same size!
336  vector<unsigned int> pScores(sonScores->size());
337  // Fill arrays:
338  computeScoresFromArrays(parentBitsets, parentScores, pBitsets, pScores);
339 
340  // Final computation:
341  unsigned int score = 0;
342  for (unsigned int i = 0; i < nbDistinctSites_; i++)
343  {
344  score += pScores[i] * parsimonyData_->getWeight(i);
345  }
346  return (double)score - (double)getScore();
347 }
348 
349 /******************************************************************************/
350 
352 {
353  Node* son = treeTemplate_().getNode(nodeId);
354  if (!son->hasFather())
355  throw NodePException("DRTreeParsimonyScore::doNNI(). Node 'son' must not be the root node.", son);
356  Node* parent = son->getFather();
357  if (!parent->hasFather())
358  throw NodePException("DRTreeParsimonyScore::doNNI(). Node 'parent' must not be the root node.", parent);
359  Node* grandFather = parent->getFather();
360  // From here: Bifurcation assumed.
361  // In case of multifurcation, an arbitrary uncle is chosen.
362  // If we are at root node with a trifurcation, this does not matter, since 2 NNI are possible (see doc of the NNISearchable interface).
363  size_t parentPosition = grandFather->getSonPosition(parent);
364  Node* uncle = grandFather->getSon(parentPosition > 1 ? parentPosition - 1 : 1 - parentPosition);
365  // Swap nodes:
366  parent->removeSon(son);
367  grandFather->removeSon(uncle);
368  parent->addSon(uncle);
369  grandFather->addSon(son);
370 }
371 
372 /******************************************************************************/
373 
374 // /******************************************************************************/
375 //
376 // void DRTreeParsimonyScore::setNodeState(Node* node, size_t state)
377 // {
378 // Number<size_t>* stateProperty = new Number<size_t>(state);
379 // node->setNodeProperty(STATE, *stateProperty);
380 // delete stateProperty;
381 // }
382 //
383 // /******************************************************************************/
384 //
385 // size_t DRTreeParsimonyScore::getNodeState(const Node* node)
386 // {
387 // return (dynamic_cast<const Number<size_t>*>(node->getNodeProperty(STATE)))->getValue(); // exception on root on the true history - why didn't the root receive a state?
388 // }
389 //
390 // /******************************************************************************/
391 //
392 // void DRTreeParsimonyScore::computeSolution()
393 // {
394 // map< int, vector<size_t> > nodeToPossibleStates;
395 // TreeTemplate<Node>* tree = getTreeP_();
396 // vector<Node*> nodes = tree->getNodes();
397 // vector<Bitset> nodeBitsets;
398 // for (size_t n = 0; n < nodes.size(); ++n)
399 // {
400 // // extract the node's bisets (i.e, possible states assignments)
401 // if (nodes[n]->isLeaf())
402 // {
403 // nodeBitsets = (&parsimonyData_->getLeafData(nodes[n]->getId()))->getBitsetsArray();
404 // }
405 // else if (nodes[n]->hasFather())
406 // {
407 // nodeBitsets = (&parsimonyData_->getNodeData(nodes[n]->getFather()->getId()))->getBitsetsArrayForNeighbor(nodes[n]->getId()); // extract the bitset corresponding to the son from its father's bitSet array
408 // }
409 // else // get the node's possible states from its first internal neighbor
410 // {
411 // int neighborId = 0;
412 // vector<Node*> neighbors = nodes[n]->getNeighbors();
413 // for (unsigned int nn=0; nn<neighbors.size(); ++nn)
414 // {
415 // if (!neighbors[nn]->isLeaf())
416 // {
417 // neighborId = neighbors[nn]->getId();
418 // break;
419 // }
420 // }
421 // nodeBitsets = (&parsimonyData_->getNodeData(neighborId))->getBitsetsArrayForNeighbor(nodes[n]->getId());
422 // }
423 // // map the node id to its possible states
424 // vector <size_t> possibleStates;
425 // for (size_t s = 0; s < getStateMap().getNumberOfModelStates(); ++s)
426 // {
427 // if (nodeBitsets[0].test(s))
428 // {
429 // possibleStates.push_back(s);
430 // }
431 // }
432 // nodeToPossibleStates[nodes[n]->getId()] = possibleStates;
433 // }
434 //
435 // // set states for the nodes according to their possible assignments and parent state
436 // TreeIterator* treeIt = new PreOrderTreeIterator(*tree);
437 // for (const Node* node = treeIt->begin(); node != treeIt->end(); node = treeIt->next())
438 // {
439 // size_t nodeState;
440 // vector<size_t> possibleStates = nodeToPossibleStates[node->getId()];
441 // if (possibleStates.size() == 1)
442 // {
443 // nodeState = possibleStates[0];
444 // }
445 // else if (node->hasFather()) // if the node has a father -> set its state according to its father's state
446 // {
447 // nodeState = getNodeState(node->getFather());
448 // }
449 // else // if there is no restriction from the father -> set the state randomely
450 // {
451 // nodeState = RandomTools::pickOne(possibleStates);
452 // }
453 // setNodeState(tree->getNode(node->getId()), nodeState);
454 // }
455 // delete treeIt;
456 // }
Partial implementation of the TreeParsimonyScore interface.
virtual const TreeTemplate< Node > & treeTemplate() const
virtual std::shared_ptr< const TreeTemplate< Node > > getTreeTemplate() const
virtual TreeTemplate< Node > & treeTemplate_()
std::shared_ptr< const StateMapInterface > getStateMap() const override
Get the state map associated to this instance.
static void displayTask(const std::string &text, bool eof=false)
static void displayTaskDone()
static void displayResult(const std::string &text, const T &result)
Parsimony data structure for double-recursive (DR) algorithm.
Parsimony data structure for a node.
std::vector< Bitset > & getBitsetsArrayForNeighbor(int neighborId)
const Node * getNode() const
Get the node associated to this data structure.
std::vector< unsigned int > & getScoresArrayForNeighbor(int neighborId)
Double recursive implementation of interface TreeParsimonyScore.
static void computeScoresPostorderForNode(const DRTreeParsimonyNodeData &pData, std::vector< Bitset > &rBitsets, std::vector< unsigned int > &rScores)
Compute bitsets and scores for each site for a node, in postorder.
void init_(std::shared_ptr< const SiteContainerInterface > data, bool verbose)
static void computeScoresFromArrays(const std::vector< const std::vector< Bitset > * > &iBitsets, const std::vector< const std::vector< unsigned int > * > &iScores, std::vector< Bitset > &oBitsets, std::vector< unsigned int > &oScores)
Compute bitsets and scores from an array of arrays.
unsigned int getScoreForSite(size_t site) const override
Get the score for a given site for the current tree, i.e. the total minimum number of changes in the ...
virtual void computeScoresPreorder(const Node *)
Compute scores (preorder algorithm).
DRTreeParsimonyScore(std::shared_ptr< TreeTemplate< Node >> tree, std::shared_ptr< const SiteContainerInterface > data, bool verbose=true, bool includeGaps=false)
std::unique_ptr< DRTreeParsimonyData > parsimonyData_
DRTreeParsimonyScore & operator=(const DRTreeParsimonyScore &tp)
unsigned int getScore() const override
Get the score for the current tree, i.e. the total minimum number of changes in the tree.
virtual void computeScores()
Compute all scores.
double testNNI(int nodeId) const override
Send the score of a NNI movement, without performing it.
static void computeScoresForNode(const DRTreeParsimonyNodeData &pData, std::vector< Bitset > &rBitsets, std::vector< unsigned int > &rScores)
Compute bitsets and scores for each site for a node, in all directions.
virtual void computeScoresPostorder(const Node *)
Compute scores (postorder algorithm).
void doNNI(int nodeId) override
Perform a NNI movement.
static void computeScoresPreorderForNode(const DRTreeParsimonyNodeData &pData, const Node *source, std::vector< Bitset > &rBitsets, std::vector< unsigned int > &rScores)
Compute bitsets and scores for each site for a node, in preorder.
General exception thrown when something is wrong with a particular node.
The phylogenetic node class.
Definition: Node.h:59
virtual Node * removeSon(size_t pos)
Definition: Node.h:411
virtual int getId() const
Get the node's id.
Definition: Node.h:170
virtual void addSon(size_t pos, Node *node)
Definition: Node.h:374
virtual const Node * getSon(size_t pos) const
Definition: Node.h:362
virtual const Node * getFather() const
Get the father of this node is there is one.
Definition: Node.h:306
virtual bool isLeaf() const
Definition: Node.h:667
virtual size_t getSonPosition(const Node *son) const
Definition: Node.cpp:151
std::vector< const Node * > getNeighbors() const
Definition: Node.cpp:127
virtual bool hasFather() const
Tell if this node has a father node.
Definition: Node.h:346
virtual size_t getNumberOfSons() const
Definition: Node.h:355
virtual size_t degree() const
Definition: Node.h:463
static std::vector< const Node * > getRemainingNeighbors(const Node *node1, const Node *node2, const Node *node3)
Get a subset of node neighbors.
The phylogenetic tree class.
Definition: TreeTemplate.h:59
std::string toString(T t)
Defines the basic types of data flow nodes.
std::bitset< 21 > Bitset