bpp-phyl3  3.0.0
BipartitionTools.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include <Bpp/Exceptions.h>
6 #include <Bpp/Io/FileTools.h>
7 #include <Bpp/Text/TextTools.h>
8 
9 #include "BipartitionList.h"
10 #include "BipartitionTools.h"
11 #include "TreeTemplate.h"
12 
13 // From SeqLib
14 #include <Bpp/Seq/Alphabet/DNA.h>
17 
18 using namespace bpp;
19 
20 // From STL
21 #include <iostream>
22 #include <algorithm>
23 #include <limits.h> // defines CHAR_BIT
24 
25 using namespace std;
26 
27 /******************************************************************************/
28 
29 int BipartitionTools::LWORD = static_cast<int>(CHAR_BIT * sizeof(int));
30 
31 /******************************************************************************/
32 
33 /* functions dealing with int* seen as arrays of bits */
34 /* (provided by Manolo Gouy) */
35 
36 void BipartitionTools::bit1(int* plist, int num)
37 {
38  // num--;
39  plist += (num / LWORD);
40  *plist |= (1 << (num % LWORD));
41 }
42 
43 /******************************************************************************/
44 
45 void BipartitionTools::bit0(int* plist, int num)
46 {
47  // num--;
48  plist += (num / LWORD);
49  *plist &= ~(1 << (num % LWORD));
50 }
51 
52 /******************************************************************************/
53 
54 void BipartitionTools::bitAnd(int* listet, int* list1, int* list2, size_t len)
55 {
56  for (size_t i = 0; i < len; i++)
57  {
58  listet[i] = list1[i] & list2[i];
59  }
60 }
61 
62 /******************************************************************************/
63 
64 void BipartitionTools::bitOr(int* listou, int* list1, int* list2, size_t len)
65 {
66  for (size_t i = 0; i < len; i++)
67  {
68  listou[i] = list1[i] | list2[i];
69  }
70 }
71 
72 /******************************************************************************/
73 
74 void BipartitionTools::bitNot(int* listnon, int* list, size_t len)
75 {
76  for (size_t i = 0; i < len; i++)
77  {
78  listnon[i] = ~list[i];
79  }
80 }
81 
82 /******************************************************************************/
83 bool BipartitionTools::testBit(int* plist, int num)
84 {
85  // num--;
86  plist += (num / LWORD);
87  return (*plist) & (1 << (num % LWORD));
88 }
89 
90 /******************************************************************************/
91 
92 unique_ptr<BipartitionList> BipartitionTools::buildBipartitionPair(
93  const BipartitionList& bipartL1, size_t i1,
94  const BipartitionList& bipartL2, size_t i2,
95  bool checkElements)
96 {
97  vector<int*> bitBipL1, bitBipL2, twoBitBipL;
98  vector<string> elements;
99 
100  if (i1 >= bipartL1.getNumberOfBipartitions())
101  throw Exception("Bipartition index exceeds BipartitionList size");
102  if (i2 >= bipartL2.getNumberOfBipartitions())
103  throw Exception("Bipartition index exceeds BipartitionList size");
104 
105  if (checkElements && !VectorTools::haveSameElements(bipartL1.getElementNames(), bipartL2.getElementNames()))
106  throw Exception("Distinct bipartition element sets");
107 
108  /* get sorted bit bipartition lists */
109  /* (if input is sorted: easy; otherwise: first copy, then sort) */
110 
111  if (bipartL1.isSorted())
112  {
113  elements = bipartL1.getElementNames();
114  bitBipL1 = bipartL1.getBitBipartitionList();
115  }
116  else
117  {
118  BipartitionList provBipartL(bipartL1.getElementNames(), bipartL1.getBitBipartitionList());
119  provBipartL.sortElements();
120  elements = provBipartL.getElementNames();
121  bitBipL1 = provBipartL.getBitBipartitionList();
122  }
123 
124  if (bipartL2.isSorted())
125  {
126  bitBipL2 = bipartL2.getBitBipartitionList();
127  }
128  else
129  {
130  BipartitionList provBipartL(bipartL2.getElementNames(), bipartL2.getBitBipartitionList());
131  provBipartL.sortElements();
132  bitBipL2 = provBipartL.getBitBipartitionList();
133  }
134 
135  /* create a new BipartitionList with just the two focal bipartitions */
136 
137  twoBitBipL.push_back(bitBipL1[i1]);
138  twoBitBipL.push_back(bitBipL2[i2]);
139  return make_unique<BipartitionList>(elements, twoBitBipL);
140 }
141 
142 /******************************************************************************/
143 
145  const BipartitionList& bipartL1, size_t i1,
146  const BipartitionList& bipartL2, size_t i2,
147  bool checkElements)
148 {
149  auto twoBipL = buildBipartitionPair(bipartL1, i1, bipartL2, i2, checkElements);
150  return twoBipL->areIdentical(0, 1);
151 }
152 
153 /******************************************************************************/
154 
156  const BipartitionList& bipartL1, size_t i1,
157  const BipartitionList& bipartL2, size_t i2,
158  bool checkElements)
159 {
160  auto twoBipL = buildBipartitionPair(bipartL1, i1, bipartL2, i2, checkElements);
161  return twoBipL->areCompatible(0, 1);
162 }
163 
164 /******************************************************************************/
165 
166 std::unique_ptr<BipartitionList> BipartitionTools::mergeBipartitionLists(
167  const vector<unique_ptr<BipartitionList>>& vecBipartL,
168  bool checkElements)
169 {
170  vector<string> elements;
171  vector<int*> mergedBitBipL;
172  int* provBitBip;
173  unique_ptr<BipartitionList> mergedBipL;
174 
175  if (vecBipartL.size() == 0)
176  throw Exception("Empty vector passed");
177 
178  if (checkElements)
179  {
180  for (size_t i = 1; i < vecBipartL.size(); ++i)
181  {
182  if (!VectorTools::haveSameElements(vecBipartL[0]->getElementNames(), vecBipartL[0]->getElementNames()))
183  throw Exception("BipartitionTools::mergeBipartitionLists. Distinct bipartition element sets");
184  }
185  }
186 
187  size_t lword = static_cast<size_t>(BipartitionTools::LWORD);
188  size_t nbword = (vecBipartL[0]->getElementNames().size() + lword - 1) / lword;
189  size_t nbint = nbword * lword / (CHAR_BIT * sizeof(int));
190 
191  elements = vecBipartL[0]->getElementNames();
192  if (!vecBipartL[0]->isSorted())
193  std::sort(elements.begin(), elements.end());
194 
195  for (size_t i = 0; i < vecBipartL.size(); i++)
196  {
197  vector<int*> bitBipL;
198  if (vecBipartL[i]->isSorted())
199  {
200  bitBipL = vecBipartL[i]->getBitBipartitionList();
201  }
202  else
203  {
204  // We don't need the extra recopy here, do we?
205  // BipartitionList provBipartL(BipartitionList(vecBipartL[i]->getElementNames(), vecBipartL[i]->getBitBipartitionList()));
206  BipartitionList provBipartL(vecBipartL[i]->getElementNames(), vecBipartL[i]->getBitBipartitionList());
207  provBipartL.sortElements();
208  bitBipL = provBipartL.getBitBipartitionList();
209  }
210  for (size_t j = 0; j < bitBipL.size(); j++)
211  {
212  provBitBip = new int[nbint];
213  for (size_t k = 0; k < nbint; k++)
214  {
215  provBitBip[k] = bitBipL[j][k];
216  }
217  mergedBitBipL.push_back(provBitBip);
218  }
219  }
220 
221  mergedBipL = make_unique<BipartitionList>(elements, mergedBitBipL);
222  return mergedBipL;
223 }
224 
225 /******************************************************************************/
226 
227 unique_ptr<VectorSiteContainer> BipartitionTools::MRPEncode(
228  const vector<unique_ptr<BipartitionList>>& vecBipartL)
229 {
230  vector<string> all_elements;
231  map<string, bool> bip;
232  vector<string> bip_elements;
233  shared_ptr<const Alphabet> alpha = AlphabetTools::DNA_ALPHABET;
234  vector<string> sequences;
235 
236  if (vecBipartL.size() == 0)
237  throw Exception("Empty vector passed");
238 
239  vector< vector<string>> vecElementLists;
240  for (auto& vi : vecBipartL)
241  {
242  vecElementLists.push_back(vi->getElementNames());
243  }
244 
245  all_elements = VectorTools::vectorUnion(vecElementLists);
246 
247  sequences.resize(all_elements.size());
248 
249  for (auto& vi : vecBipartL)
250  {
251  for (size_t j = 0; j < vi->getNumberOfBipartitions(); j++)
252  {
253  bip = vi->getBipartition(j);
254  bip_elements = MapTools::getKeys(bip);
255 
256  for (size_t k = 0; k < all_elements.size(); k++)
257  {
258  if (VectorTools::contains(bip_elements, all_elements[k]))
259  {
260  if (bip[all_elements[k]])
261  sequences[k].push_back('C');
262  else
263  sequences[k].push_back('A');
264  }
265  else
266  sequences[k].push_back('N');
267  }
268  }
269  }
270 
271  vector<unique_ptr<Sequence>> vec_sequences;
272  for (size_t i = 0; i < all_elements.size(); i++)
273  {
274  vec_sequences.push_back(make_unique<Sequence>(all_elements[i], sequences[i], alpha));
275  }
276 
277  VectorSequenceContainer vec_seq_cont(alpha, vec_sequences);
278 
279  auto vec_site_cont = make_unique<VectorSiteContainer>(vec_seq_cont);
280 
281  return vec_site_cont;
282 }
283 
284 /******************************************************************************/
285 
286 unique_ptr<VectorSiteContainer> BipartitionTools::MRPEncodeMultilabel(
287  const vector<unique_ptr<BipartitionList>>& vecBipartL)
288 {
289  vector<string> all_elements;
290  map<string, bool> bip;
291  vector<string> bip_elements;
292  shared_ptr<const Alphabet> alpha = AlphabetTools::DNA_ALPHABET;
293  vector<string> sequences;
294 
295  if (vecBipartL.size() == 0)
296  throw Exception("Empty vector passed");
297 
298  vector< vector<string>> vecElementLists;
299  for (auto& vi : vecBipartL)
300  {
301  vecElementLists.push_back(vi->getElementNames());
302  }
303 
304  all_elements = VectorTools::vectorUnion(vecElementLists);
305 
306  sequences.resize(all_elements.size());
307 
308  for (auto& vi : vecBipartL)
309  {
310  for (size_t j = 0; j < vi->getNumberOfBipartitions(); j++)
311  {
312  bip = vi->getBipartition(j);
313  bip_elements = MapTools::getKeys(bip);
314  // Check for multilabel trees: if a taxa found on both sides, do not consider the entire bipartition
315  vector< string > zeroes;
316  vector< string > ones;
317  for (auto& element : all_elements)
318  {
319  if (VectorTools::contains(bip_elements, element))
320  {
321  if (bip[element])
322  ones.push_back(element);
323  else
324  zeroes.push_back(element);
325  }
326  }
327  vector<string> inter = VectorTools::vectorIntersection(ones, zeroes);
328  if (inter.size() != 0) // some taxa found on both sides of the bipartition
329  {
330  for (auto& sk : sequences)
331  {
332  sk.push_back('N');
333  }
334  }
335  else
336  {
337  for (size_t k = 0; k < all_elements.size(); k++)
338  {
339  if (VectorTools::contains(bip_elements, all_elements[k]))
340  {
341  if (bip[all_elements[k]])
342  sequences[k].push_back('C');
343  else
344  sequences[k].push_back('A');
345  }
346  else
347  sequences[k].push_back('N');
348  }
349  }
350  }
351  }
352 
353  vector<unique_ptr<Sequence>> vec_sequences;
354  for (size_t i = 0; i < all_elements.size(); i++)
355  {
356  vec_sequences.push_back(make_unique<Sequence>(all_elements[i], sequences[i], alpha));
357  }
358 
359  VectorSequenceContainer vec_seq_cont(alpha, vec_sequences);
360 
361  return make_unique<VectorSiteContainer>(vec_seq_cont);
362 }
363 
364 /******************************************************************************/
return element
static std::shared_ptr< const DNA > DNA_ALPHABET
This class deals with the bipartitions defined by trees.
size_t getNumberOfBipartitions() const
const std::vector< std::string > & getElementNames() const
const std::vector< int * > & getBitBipartitionList() const
static int LWORD
Unit length (in bits) of arrays of bits. Must be a multiple of CHAR_BIT*sizeof(int)....
static bool areCompatible(const BipartitionList &bipart1, size_t i1, const BipartitionList &bipart2, size_t i2, bool checkElements=true)
Tells whether two bipartitions from distinct lists are compatible.
static bool testBit(int *list, int num)
Tells whether bit number num in bit array list is one.
static std::unique_ptr< VectorSiteContainer > MRPEncodeMultilabel(const std::vector< std::unique_ptr< BipartitionList >> &vecBipartL)
Create a sequence data set corresponding to the Matrix Representation of the input BipartitionList ob...
static void bitAnd(int *listet, int *list1, int *list2, size_t len)
bit-wise logical AND between two arrays of bits
static void bit1(int *list, int num)
Sets bit number num of bit array list to one.
static void bitOr(int *listou, int *list1, int *list2, size_t len)
bit-wise logical OR between two arrays of bits
static std::unique_ptr< BipartitionList > buildBipartitionPair(const BipartitionList &bipartL1, size_t i1, const BipartitionList &bipartL2, size_t i2, bool checkElements=true)
Construct a BipartitionList containing two bipartitions taken from distinct input lists.
static std::unique_ptr< VectorSiteContainer > MRPEncode(const std::vector< std::unique_ptr< BipartitionList >> &vecBipartL)
Create a sequence data set corresponding to the Matrix Representation of the input BipartitionList ob...
static std::unique_ptr< BipartitionList > mergeBipartitionLists(const std::vector< std::unique_ptr< BipartitionList >> &vecBipartL, bool checkElements=true)
Makes one BipartitionList out of several.
static void bitNot(int *listnon, int *list, size_t len)
bit-wise logical NOT
static void bit0(int *list, int num)
Sets bit number num of bit array plist to zero.
static bool areIdentical(const BipartitionList &bipart1, size_t i1, const BipartitionList &bipart2, size_t i2, bool checkElements=true)
Tells whether two bipartitions from distinct lists are identical.
static std::vector< Key > getKeys(const std::map< Key, T, Cmp > &myMap)
static std::vector< T > vectorUnion(const std::vector< T > &vec1, const std::vector< T > &vec2)
static bool contains(const std::vector< T > &vec, T el)
static std::vector< T > vectorIntersection(const std::vector< T > &vec1, const std::vector< T > &vec2)
static bool haveSameElements(const std::vector< T > &v1, const std::vector< T > &v2)
Defines the basic types of data flow nodes.