bpp-phyl3 3.0.0
BipartitionTools.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#include <Bpp/Exceptions.h>
6#include <Bpp/Io/FileTools.h>
8
9#include "BipartitionList.h"
10#include "BipartitionTools.h"
11#include "TreeTemplate.h"
12
13// From SeqLib
17
18using namespace bpp;
19
20// From STL
21#include <iostream>
22#include <algorithm>
23#include <limits.h> // defines CHAR_BIT
24
25using namespace std;
26
27/******************************************************************************/
28
29int BipartitionTools::LWORD = static_cast<int>(CHAR_BIT * sizeof(int));
30
31/******************************************************************************/
32
33/* functions dealing with int* seen as arrays of bits */
34/* (provided by Manolo Gouy) */
35
36void BipartitionTools::bit1(int* plist, int num)
37{
38 // num--;
39 plist += (num / LWORD);
40 *plist |= (1 << (num % LWORD));
41}
42
43/******************************************************************************/
44
45void BipartitionTools::bit0(int* plist, int num)
46{
47 // num--;
48 plist += (num / LWORD);
49 *plist &= ~(1 << (num % LWORD));
50}
51
52/******************************************************************************/
53
54void BipartitionTools::bitAnd(int* listet, int* list1, int* list2, size_t len)
55{
56 for (size_t i = 0; i < len; i++)
57 {
58 listet[i] = list1[i] & list2[i];
59 }
60}
61
62/******************************************************************************/
63
64void BipartitionTools::bitOr(int* listou, int* list1, int* list2, size_t len)
65{
66 for (size_t i = 0; i < len; i++)
67 {
68 listou[i] = list1[i] | list2[i];
69 }
70}
71
72/******************************************************************************/
73
74void BipartitionTools::bitNot(int* listnon, int* list, size_t len)
75{
76 for (size_t i = 0; i < len; i++)
77 {
78 listnon[i] = ~list[i];
79 }
80}
81
82/******************************************************************************/
83bool BipartitionTools::testBit(int* plist, int num)
84{
85 // num--;
86 plist += (num / LWORD);
87 return (*plist) & (1 << (num % LWORD));
88}
89
90/******************************************************************************/
91
92unique_ptr<BipartitionList> BipartitionTools::buildBipartitionPair(
93 const BipartitionList& bipartL1, size_t i1,
94 const BipartitionList& bipartL2, size_t i2,
95 bool checkElements)
96{
97 vector<int*> bitBipL1, bitBipL2, twoBitBipL;
98 vector<string> elements;
99
100 if (i1 >= bipartL1.getNumberOfBipartitions())
101 throw Exception("Bipartition index exceeds BipartitionList size");
102 if (i2 >= bipartL2.getNumberOfBipartitions())
103 throw Exception("Bipartition index exceeds BipartitionList size");
104
105 if (checkElements && !VectorTools::haveSameElements(bipartL1.getElementNames(), bipartL2.getElementNames()))
106 throw Exception("Distinct bipartition element sets");
107
108 /* get sorted bit bipartition lists */
109 /* (if input is sorted: easy; otherwise: first copy, then sort) */
110
111 if (bipartL1.isSorted())
112 {
113 elements = bipartL1.getElementNames();
114 bitBipL1 = bipartL1.getBitBipartitionList();
115 }
116 else
117 {
118 BipartitionList provBipartL(bipartL1.getElementNames(), bipartL1.getBitBipartitionList());
119 provBipartL.sortElements();
120 elements = provBipartL.getElementNames();
121 bitBipL1 = provBipartL.getBitBipartitionList();
122 }
123
124 if (bipartL2.isSorted())
125 {
126 bitBipL2 = bipartL2.getBitBipartitionList();
127 }
128 else
129 {
130 BipartitionList provBipartL(bipartL2.getElementNames(), bipartL2.getBitBipartitionList());
131 provBipartL.sortElements();
132 bitBipL2 = provBipartL.getBitBipartitionList();
133 }
134
135 /* create a new BipartitionList with just the two focal bipartitions */
136
137 twoBitBipL.push_back(bitBipL1[i1]);
138 twoBitBipL.push_back(bitBipL2[i2]);
139 return make_unique<BipartitionList>(elements, twoBitBipL);
140}
141
142/******************************************************************************/
143
145 const BipartitionList& bipartL1, size_t i1,
146 const BipartitionList& bipartL2, size_t i2,
147 bool checkElements)
148{
149 auto twoBipL = buildBipartitionPair(bipartL1, i1, bipartL2, i2, checkElements);
150 return twoBipL->areIdentical(0, 1);
151}
152
153/******************************************************************************/
154
156 const BipartitionList& bipartL1, size_t i1,
157 const BipartitionList& bipartL2, size_t i2,
158 bool checkElements)
159{
160 auto twoBipL = buildBipartitionPair(bipartL1, i1, bipartL2, i2, checkElements);
161 return twoBipL->areCompatible(0, 1);
162}
163
164/******************************************************************************/
165
166std::unique_ptr<BipartitionList> BipartitionTools::mergeBipartitionLists(
167 const vector<unique_ptr<BipartitionList>>& vecBipartL,
168 bool checkElements)
169{
170 vector<string> elements;
171 vector<int*> mergedBitBipL;
172 int* provBitBip;
173 unique_ptr<BipartitionList> mergedBipL;
174
175 if (vecBipartL.size() == 0)
176 throw Exception("Empty vector passed");
177
178 if (checkElements)
179 {
180 for (size_t i = 1; i < vecBipartL.size(); ++i)
181 {
182 if (!VectorTools::haveSameElements(vecBipartL[0]->getElementNames(), vecBipartL[0]->getElementNames()))
183 throw Exception("BipartitionTools::mergeBipartitionLists. Distinct bipartition element sets");
184 }
185 }
186
187 size_t lword = static_cast<size_t>(BipartitionTools::LWORD);
188 size_t nbword = (vecBipartL[0]->getElementNames().size() + lword - 1) / lword;
189 size_t nbint = nbword * lword / (CHAR_BIT * sizeof(int));
190
191 elements = vecBipartL[0]->getElementNames();
192 if (!vecBipartL[0]->isSorted())
193 std::sort(elements.begin(), elements.end());
194
195 for (size_t i = 0; i < vecBipartL.size(); i++)
196 {
197 vector<int*> bitBipL;
198 if (vecBipartL[i]->isSorted())
199 {
200 bitBipL = vecBipartL[i]->getBitBipartitionList();
201 }
202 else
203 {
204 // We don't need the extra recopy here, do we?
205 // BipartitionList provBipartL(BipartitionList(vecBipartL[i]->getElementNames(), vecBipartL[i]->getBitBipartitionList()));
206 BipartitionList provBipartL(vecBipartL[i]->getElementNames(), vecBipartL[i]->getBitBipartitionList());
207 provBipartL.sortElements();
208 bitBipL = provBipartL.getBitBipartitionList();
209 }
210 for (size_t j = 0; j < bitBipL.size(); j++)
211 {
212 provBitBip = new int[nbint];
213 for (size_t k = 0; k < nbint; k++)
214 {
215 provBitBip[k] = bitBipL[j][k];
216 }
217 mergedBitBipL.push_back(provBitBip);
218 }
219 }
220
221 mergedBipL = make_unique<BipartitionList>(elements, mergedBitBipL);
222 return mergedBipL;
223}
224
225/******************************************************************************/
226
227unique_ptr<VectorSiteContainer> BipartitionTools::MRPEncode(
228 const vector<unique_ptr<BipartitionList>>& vecBipartL)
229{
230 vector<string> all_elements;
231 map<string, bool> bip;
232 vector<string> bip_elements;
233 shared_ptr<const Alphabet> alpha = AlphabetTools::DNA_ALPHABET;
234 vector<string> sequences;
235
236 if (vecBipartL.size() == 0)
237 throw Exception("Empty vector passed");
238
239 vector< vector<string>> vecElementLists;
240 for (auto& vi : vecBipartL)
241 {
242 vecElementLists.push_back(vi->getElementNames());
243 }
244
245 all_elements = VectorTools::vectorUnion(vecElementLists);
246
247 sequences.resize(all_elements.size());
248
249 for (auto& vi : vecBipartL)
250 {
251 for (size_t j = 0; j < vi->getNumberOfBipartitions(); j++)
252 {
253 bip = vi->getBipartition(j);
254 bip_elements = MapTools::getKeys(bip);
255
256 for (size_t k = 0; k < all_elements.size(); k++)
257 {
258 if (VectorTools::contains(bip_elements, all_elements[k]))
259 {
260 if (bip[all_elements[k]])
261 sequences[k].push_back('C');
262 else
263 sequences[k].push_back('A');
264 }
265 else
266 sequences[k].push_back('N');
267 }
268 }
269 }
270
271 vector<unique_ptr<Sequence>> vec_sequences;
272 for (size_t i = 0; i < all_elements.size(); i++)
273 {
274 vec_sequences.push_back(make_unique<Sequence>(all_elements[i], sequences[i], alpha));
275 }
276
277 VectorSequenceContainer vec_seq_cont(alpha, vec_sequences);
278
279 auto vec_site_cont = make_unique<VectorSiteContainer>(vec_seq_cont);
280
281 return vec_site_cont;
282}
283
284/******************************************************************************/
285
286unique_ptr<VectorSiteContainer> BipartitionTools::MRPEncodeMultilabel(
287 const vector<unique_ptr<BipartitionList>>& vecBipartL)
288{
289 vector<string> all_elements;
290 map<string, bool> bip;
291 vector<string> bip_elements;
292 shared_ptr<const Alphabet> alpha = AlphabetTools::DNA_ALPHABET;
293 vector<string> sequences;
294
295 if (vecBipartL.size() == 0)
296 throw Exception("Empty vector passed");
297
298 vector< vector<string>> vecElementLists;
299 for (auto& vi : vecBipartL)
300 {
301 vecElementLists.push_back(vi->getElementNames());
302 }
303
304 all_elements = VectorTools::vectorUnion(vecElementLists);
305
306 sequences.resize(all_elements.size());
307
308 for (auto& vi : vecBipartL)
309 {
310 for (size_t j = 0; j < vi->getNumberOfBipartitions(); j++)
311 {
312 bip = vi->getBipartition(j);
313 bip_elements = MapTools::getKeys(bip);
314 // Check for multilabel trees: if a taxa found on both sides, do not consider the entire bipartition
315 vector< string > zeroes;
316 vector< string > ones;
317 for (auto& element : all_elements)
318 {
319 if (VectorTools::contains(bip_elements, element))
320 {
321 if (bip[element])
322 ones.push_back(element);
323 else
324 zeroes.push_back(element);
325 }
326 }
327 vector<string> inter = VectorTools::vectorIntersection(ones, zeroes);
328 if (inter.size() != 0) // some taxa found on both sides of the bipartition
329 {
330 for (auto& sk : sequences)
331 {
332 sk.push_back('N');
333 }
334 }
335 else
336 {
337 for (size_t k = 0; k < all_elements.size(); k++)
338 {
339 if (VectorTools::contains(bip_elements, all_elements[k]))
340 {
341 if (bip[all_elements[k]])
342 sequences[k].push_back('C');
343 else
344 sequences[k].push_back('A');
345 }
346 else
347 sequences[k].push_back('N');
348 }
349 }
350 }
351 }
352
353 vector<unique_ptr<Sequence>> vec_sequences;
354 for (size_t i = 0; i < all_elements.size(); i++)
355 {
356 vec_sequences.push_back(make_unique<Sequence>(all_elements[i], sequences[i], alpha));
357 }
358
359 VectorSequenceContainer vec_seq_cont(alpha, vec_sequences);
360
361 return make_unique<VectorSiteContainer>(vec_seq_cont);
362}
363
364/******************************************************************************/
static std::shared_ptr< const DNA > DNA_ALPHABET
This class deals with the bipartitions defined by trees.
const std::vector< std::string > & getElementNames() const
const std::vector< int * > & getBitBipartitionList() const
size_t getNumberOfBipartitions() const
static int LWORD
Unit length (in bits) of arrays of bits. Must be a multiple of CHAR_BIT*sizeof(int)....
static bool areCompatible(const BipartitionList &bipart1, size_t i1, const BipartitionList &bipart2, size_t i2, bool checkElements=true)
Tells whether two bipartitions from distinct lists are compatible.
static bool testBit(int *list, int num)
Tells whether bit number num in bit array list is one.
static void bitAnd(int *listet, int *list1, int *list2, size_t len)
bit-wise logical AND between two arrays of bits
static void bitOr(int *listou, int *list1, int *list2, size_t len)
bit-wise logical OR between two arrays of bits
static std::unique_ptr< BipartitionList > buildBipartitionPair(const BipartitionList &bipartL1, size_t i1, const BipartitionList &bipartL2, size_t i2, bool checkElements=true)
Construct a BipartitionList containing two bipartitions taken from distinct input lists.
static std::unique_ptr< VectorSiteContainer > MRPEncodeMultilabel(const std::vector< std::unique_ptr< BipartitionList > > &vecBipartL)
Create a sequence data set corresponding to the Matrix Representation of the input BipartitionList ob...
static std::unique_ptr< VectorSiteContainer > MRPEncode(const std::vector< std::unique_ptr< BipartitionList > > &vecBipartL)
Create a sequence data set corresponding to the Matrix Representation of the input BipartitionList ob...
static void bitNot(int *listnon, int *list, size_t len)
bit-wise logical NOT
static std::unique_ptr< BipartitionList > mergeBipartitionLists(const std::vector< std::unique_ptr< BipartitionList > > &vecBipartL, bool checkElements=true)
Makes one BipartitionList out of several.
static void bit0(int *list, int num)
Sets bit number num of bit array plist to zero.
static bool areIdentical(const BipartitionList &bipart1, size_t i1, const BipartitionList &bipart2, size_t i2, bool checkElements=true)
Tells whether two bipartitions from distinct lists are identical.
static std::vector< Key > getKeys(const std::map< Key, T, Cmp > &myMap)
static std::vector< T > vectorUnion(const std::vector< T > &vec1, const std::vector< T > &vec2)
static bool contains(const std::vector< T > &vec, T el)
static std::vector< T > vectorIntersection(const std::vector< T > &vec1, const std::vector< T > &vec2)
static bool haveSameElements(const std::vector< T > &v1, const std::vector< T > &v2)
Defines the basic types of data flow nodes.