bpp-popgen3  3.0.0
PolymorphismMultiGContainerTools.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
6 
8 #include <algorithm>
9 
10 using namespace std;
11 using namespace bpp;
12 
13 /******************************************************************************/
14 
15 unique_ptr<PolymorphismMultiGContainer> PolymorphismMultiGContainerTools::permuteMultiG(
16  const PolymorphismMultiGContainer& pmgc)
17 {
18  auto permutedPmgc = make_unique<PolymorphismMultiGContainer>(pmgc);
19  vector<size_t> groups;
20  for (size_t i = 0; i < permutedPmgc->size(); ++i)
21  {
22  groups.push_back(permutedPmgc->getGroupId(i));
23  }
24  std::shuffle(groups.begin(), groups.end(), RandomTools::DEFAULT_GENERATOR);
25  for (size_t i = 0; i < permutedPmgc->size(); ++i)
26  {
27  permutedPmgc->setGroupId(i, groups[i]);
28  }
29  return permutedPmgc;
30 }
31 
32 /******************************************************************************/
33 
34 unique_ptr<PolymorphismMultiGContainer> PolymorphismMultiGContainerTools::permuteMonoG(
35  const PolymorphismMultiGContainer& pmgc,
36  const std::set<size_t>& groups)
37 {
38  unique_ptr<PolymorphismMultiGContainer> permutedPmgc;
39  size_t locNum = pmgc.getNumberOfLoci();
40  vector<vector<unique_ptr<const MonolocusGenotypeInterface>>> monoGens;
41  monoGens.resize(locNum);
42  // Get all the MonolocusGenotypes to permute
43  for (size_t i = 0; i < pmgc.size(); ++i)
44  {
45  if (groups.find(pmgc.getGroupId(i)) != groups.end())
46  {
47  for (size_t j = 0; j < locNum; ++j)
48  {
49  monoGens[j].push_back(unique_ptr<const MonolocusGenotypeInterface>(pmgc.multilocusGenotype(i).monolocusGenotype(j).clone()));
50  }
51  }
52  }
53  // PermutE the MonolocusGenotypes
54  for (size_t i = 0; i < locNum; ++i)
55  {
56  std::shuffle(monoGens[i].begin(), monoGens[i].end(), RandomTools::DEFAULT_GENERATOR);
57  }
58  // Build the new PolymorphismMultiGContainer
59  size_t k = 0;
60  for (size_t i = 0; i < pmgc.size(); ++i)
61  {
62  if (groups.find(pmgc.getGroupId(i)) != groups.end())
63  {
64  auto tmpMg = make_unique<MultilocusGenotype>(locNum);
65  for (size_t j = 0; j < locNum; ++j)
66  {
67  if (monoGens[j][k])
68  tmpMg->setMonolocusGenotype(j, *(monoGens[j][k]));
69  }
70  permutedPmgc->addMultilocusGenotype(tmpMg, pmgc.getGroupId(i));
71  k++;
72  }
73  else
74  {
75  auto tmpMg = make_unique<MultilocusGenotype>(pmgc.multilocusGenotype(i));
76  permutedPmgc->addMultilocusGenotype(tmpMg, pmgc.getGroupId(i));
77  }
78  }
79 
80  // update group names
81  set<size_t> grpIds = pmgc.getAllGroupsIds();
82  for (auto& id : grpIds)
83  {
84  string name = pmgc.getGroupName(id);
85  permutedPmgc->setGroupName(id, name);
86  }
87 
88  return permutedPmgc;
89 }
90 
91 /******************************************************************************/
92 
93 unique_ptr<PolymorphismMultiGContainer> PolymorphismMultiGContainerTools::permuteIntraGroupMonoG(
94  const PolymorphismMultiGContainer& pmgc,
95  const set<size_t>& groups)
96 {
97  auto permutedPmgc = make_unique<PolymorphismMultiGContainer>();
98  size_t locNum = pmgc.getNumberOfLoci();
99  vector<vector<unique_ptr<const MonolocusGenotypeInterface>>> monoGens;
100  monoGens.resize(locNum);
101 
102  for (auto& g : groups) // for each group
103  {
104  size_t nbIndInGroup = 0;
105  // Get all the MonolocusGenotypes of group g to permute
106  for (size_t i = 0; i < pmgc.size(); ++i)
107  {
108  size_t indivGrp = pmgc.getGroupId(i);
109  if (groups.find(indivGrp) != groups.end())
110  {
111  if (indivGrp == g)
112  {
113  nbIndInGroup++;
114 
115  for (size_t j = 0; j < locNum; ++j)
116  {
117  monoGens[j].push_back(unique_ptr<const MonolocusGenotypeInterface>(pmgc.multilocusGenotype(i).monolocusGenotype(j).clone()));
118  }
119  }
120  }
121  else // insert as is
122  {
123  auto tmpMg = make_unique<MultilocusGenotype>(pmgc.multilocusGenotype(i));
124  permutedPmgc->addMultilocusGenotype(tmpMg, indivGrp);
125  }
126  } // for i
127 
128  // Permut the MonolocusGenotypes
129  if (nbIndInGroup > 0)
130  {
131  for (size_t j = 0; j < locNum; ++j)
132  {
133  std::shuffle(monoGens[j].begin(), monoGens[j].end(), RandomTools::DEFAULT_GENERATOR);
134  }
135 
136  // Build the new multilocus genotypes
137  auto tmpMg = make_unique<MultilocusGenotype>(locNum);
138  for (size_t k = 0; k < nbIndInGroup; ++k)
139  {
140  for (size_t j = 0; j < locNum; ++j)
141  {
142  if (monoGens[j][k])
143  tmpMg->setMonolocusGenotype(j, *(monoGens[j][k]));
144  } // for j
145 
146  permutedPmgc->addMultilocusGenotype(tmpMg, g);
147  } // for k
148  } // if nbIndInGroup
149  } // for g
150 
151  // update groups names
152  set<size_t> grpIds = pmgc.getAllGroupsIds();
153  for (auto& id : grpIds)
154  {
155  string name = pmgc.getGroupName(id);
156  permutedPmgc->setGroupName(id, name);
157  }
158 
159  return permutedPmgc;
160 }
161 
162 /******************************************************************************/
163 
164 unique_ptr<PolymorphismMultiGContainer> PolymorphismMultiGContainerTools::permuteAlleles(
165  const PolymorphismMultiGContainer& pmgc,
166  const std::set<size_t>& groups)
167 {
168  auto permutedPmgc = make_unique<PolymorphismMultiGContainer>();
169  size_t locNum = pmgc.getNumberOfLoci();
170  vector<vector<size_t>> alleles;
171  alleles.resize(locNum);
172  // Get all the alleles to permut
173  for (size_t i = 0; i < pmgc.size(); ++i)
174  {
175  if (groups.find(pmgc.getGroupId(i)) != groups.end())
176  {
177  for (size_t j = 0; j < locNum; ++j)
178  {
180  for (size_t k = 0; k < pmgc.multilocusGenotype(i).monolocusGenotype(j).getAlleleIndex().size(); ++k)
181  {
182  alleles[j].push_back(pmgc.multilocusGenotype(i).monolocusGenotype(j).getAlleleIndex()[k]);
183  }
184  }
185  }
186  }
187  // Permut the alleles
188  for (size_t i = 0; i < locNum; ++i)
189  {
190  std::shuffle(alleles[i].begin(), alleles[i].end(), RandomTools::DEFAULT_GENERATOR);
191  }
192  // Build the new PolymorphismMultiGContainer
193  vector<size_t> k(locNum, 0);
194  for (size_t i = 0; i < pmgc.size(); ++i)
195  {
196  if (groups.find(pmgc.getGroupId(i)) != groups.end())
197  {
198  auto tmpMg = make_unique<MultilocusGenotype>(locNum);
199  for (size_t j = 0; j < locNum; ++j)
200  {
202  {
203  if (pmgc.multilocusGenotype(i).monolocusGenotype(j).getAlleleIndex().size() == 1)
204  tmpMg->setMonolocusGenotype(j, MonoAlleleMonolocusGenotype(alleles[j][k[j]++]));
205  if (pmgc.multilocusGenotype(i).monolocusGenotype(j).getAlleleIndex().size() == 2)
206  tmpMg->setMonolocusGenotype(j, BiAlleleMonolocusGenotype(alleles[j][k[j]++], alleles[j][k[j]++]));
207  }
208  }
209  permutedPmgc->addMultilocusGenotype(tmpMg, pmgc.getGroupId(i));
210  }
211  else
212  {
213  auto tmpMg = make_unique<MultilocusGenotype>(pmgc.multilocusGenotype(i));
214  permutedPmgc->addMultilocusGenotype(tmpMg, pmgc.getGroupId(i));
215  }
216  }
217 
218  // update groups names
219  set<size_t> grpIds = pmgc.getAllGroupsIds();
220  for (auto& id : grpIds)
221  {
222  string name = pmgc.getGroupName(id);
223  permutedPmgc->setGroupName(id, name);
224  }
225 
226  return permutedPmgc;
227 }
228 
229 /******************************************************************************/
230 
231 unique_ptr<PolymorphismMultiGContainer> PolymorphismMultiGContainerTools::permuteIntraGroupAlleles(
232  const PolymorphismMultiGContainer& pmgc,
233  const set<size_t>& groups)
234 {
235  auto permutedPmgc = make_unique<PolymorphismMultiGContainer>();
236  size_t locNum = pmgc.getNumberOfLoci();
237  vector<vector<size_t>> alleles;
238  alleles.resize(locNum);
239 
240  for (auto& g : groups) // for each group
241  {
242  size_t nbIndInGroup = 0;
243 
244  vector<vector<size_t>> nbAllelesForInds;
245  nbAllelesForInds.resize(locNum);
246  // Get all the alleles to permute
247  for (size_t i = 0; i < pmgc.size(); ++i)
248  {
249  size_t indivGrp = pmgc.getGroupId(i);
250  if (groups.find(indivGrp) != groups.end() )
251  {
252  if (indivGrp == g)
253  {
254  nbIndInGroup++;
255  for (size_t j = 0; j < locNum; ++j)
256  {
258  {
259  size_t nbAlls = pmgc.multilocusGenotype(i).monolocusGenotype(j).getAlleleIndex().size();
260  nbAllelesForInds[j].push_back(nbAlls);
261  for (size_t k = 0; k < nbAlls; ++k)
262  {
263  alleles[j].push_back(pmgc.multilocusGenotype(i).monolocusGenotype(j).getAlleleIndex()[k]);
264  }
265  }
266  }
267  }
268  }
269  else // insert as is
270  {
271  auto tmpMg = make_unique<MultilocusGenotype>(pmgc.multilocusGenotype(i));
272  permutedPmgc->addMultilocusGenotype(tmpMg, indivGrp);
273  }
274  } // for i
275 
276  // Permute the alleles
277  if (nbIndInGroup > 0)
278  {
279  for (size_t i = 0; i < locNum; ++i)
280  {
281  // alleles[i] = RandomTools::getSample(alleles[i], alleles[i].size());
282  std::shuffle(alleles[i].begin(), alleles[i].end(), RandomTools::DEFAULT_GENERATOR);
283  }
284 
285  // Build the new PolymorphismMultiGContainer
286  vector<size_t> k(locNum, 0);
287 
288  for (size_t ind = 0; ind < nbIndInGroup; ind++)
289  {
290  auto tmpMg = make_unique<MultilocusGenotype>(locNum);
291  for (size_t j = 0; j < locNum; ++j)
292  {
293  if (nbAllelesForInds[j][ind] == 1)
294  tmpMg->setMonolocusGenotype(j, MonoAlleleMonolocusGenotype(alleles[j][k[j]++]));
295  if (nbAllelesForInds[j][ind] == 2)
296  tmpMg->setMonolocusGenotype(j, BiAlleleMonolocusGenotype(alleles[j][k[j]++], alleles[j][k[j]++]));
297  } // for j
298 
299  permutedPmgc->addMultilocusGenotype(tmpMg, g);
300  } // for ind
301  } // if nbIndInGroup
302  } // for g
303 
304 
305  // update groups names
306  auto grpIds = pmgc.getAllGroupsIds();
307  for (auto& id : grpIds)
308  {
309  string name = pmgc.getGroupName(id);
310  permutedPmgc->setGroupName(id, name);
311  }
312 
313  return permutedPmgc;
314 }
315 
316 /******************************************************************************/
317 
318 unique_ptr<PolymorphismMultiGContainer> PolymorphismMultiGContainerTools::extractGroups(
319  const PolymorphismMultiGContainer& pmgc,
320  const set<size_t>& groups)
321 {
322  auto subPmgc = make_unique<PolymorphismMultiGContainer>();
323  for (auto& g : groups) // for each group
324  {
325  // Get all the MonolocusGenotypes of group g to extract
326  for (size_t i = 0; i < pmgc.size(); ++i)
327  {
328  size_t indivGrp = pmgc.getGroupId(i);
329  if (groups.find(indivGrp) != groups.end() )
330  {
331  if (indivGrp == g)
332  {
333  auto tmpMg = make_unique<MultilocusGenotype>(pmgc.multilocusGenotype(i));
334  subPmgc->addMultilocusGenotype(tmpMg, indivGrp);
335  }
336  }
337  } // for i
338  } // for g
339 
340  // update group names
341  auto grpIds = subPmgc->getAllGroupsIds();
342  for (auto& id : grpIds)
343  {
344  string name = pmgc.getGroupName(id);
345  subPmgc->setGroupName(id, name);
346  }
347 
348  return subPmgc;
349 }
350 
351 /******************************************************************************/
The BiAlleleMonolocusGenotype class.
The MonoAlleleMonolocusGenotype class.
MonolocusGenotypeInterface * clone() const override=0
virtual std::vector< size_t > getAlleleIndex() const =0
Get the alleles' index.
const MonolocusGenotypeInterface & monolocusGenotype(size_t locusPosition) const
Get a MonolocusGenotype.
bool isMonolocusGenotypeMissing(size_t locusPosition) const
Tell if a MonolocusGenotype is a missing data.
The PolymorphismMultiGContainer class.
size_t getGroupId(size_t position) const
Get the Group id of a MultilocusGenotype.
size_t getNumberOfLoci() const
Get the number of loci if the MultilocusGenotypes are aligned.
const MultilocusGenotype & multilocusGenotype(size_t position) const
Get a MultilocusGenotype at a position.
std::string getGroupName(size_t groupId) const
Get the group name for a given group id or just the id if not available juste return it's id.
std::set< size_t > getAllGroupsIds() const
Get the groups' ids.
size_t size() const
Get the number of MultilocusGenotype.