bpp-popgen  3.0.0
PolymorphismMultiGContainerTools.cpp
Go to the documentation of this file.
1 //
2 // File PolymorphismMultiGContainerTools.cpp
3 // Author : Sylvain Gailard
4 // Khalid Belkhir
5 // Last modification : june 15 2006
6 //
7 
8 /*
9  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
10 
11  This software is a computer program whose purpose is to provide classes
12  for population genetics analysis.
13 
14  This software is governed by the CeCILL license under French law and
15  abiding by the rules of distribution of free software. You can use,
16  modify and/ or redistribute the software under the terms of the CeCILL
17  license as circulated by CEA, CNRS and INRIA at the following URL
18  "http://www.cecill.info".
19 
20  As a counterpart to the access to the source code and rights to copy,
21  modify and redistribute granted by the license, users are provided only
22  with a limited warranty and the software's author, the holder of the
23  economic rights, and the successive licensors have only limited
24  liability.
25 
26  In this respect, the user's attention is drawn to the risks associated
27  with loading, using, modifying and/or developing or reproducing the
28  software by the user in light of its specific status of free software,
29  that may mean that it is complicated to manipulate, and that also
30  therefore means that it is reserved for developers and experienced
31  professionals having in-depth computer knowledge. Users are therefore
32  encouraged to load and test the software's suitability as regards their
33  requirements in conditions enabling the security of their systems and/or
34  data to be ensured and, more generally, to use and operate it in the
35  same conditions as regards security.
36 
37  The fact that you are presently reading this means that you have had
38  knowledge of the CeCILL license and that you accept its terms.
39  */
40 
42 #include <algorithm>
43 
44 using namespace std;
45 using namespace bpp;
46 
47 /******************************************************************************/
48 
49 PolymorphismMultiGContainer PolymorphismMultiGContainerTools::permutMultiG(const PolymorphismMultiGContainer& pmgc)
50 {
51  PolymorphismMultiGContainer permuted_pmgc(pmgc);
52  vector<size_t> groups;
53  for (size_t i = 0; i < permuted_pmgc.size(); i++)
54  {
55  groups.push_back(permuted_pmgc.getGroupId(i));
56  }
57  // use std::random_shuffle instead of RandomTools::getSampl
58  // groups = RandomTools::getSample(groups, groups.size());
59  std::random_shuffle(groups.begin(), groups.end());
60  for (size_t i = 0; i < permuted_pmgc.size(); i++)
61  {
62  permuted_pmgc.setGroupId(i, groups[i]);
63  }
64  return permuted_pmgc;
65 }
66 
67 /******************************************************************************/
68 
69 PolymorphismMultiGContainer PolymorphismMultiGContainerTools::permutMonoG(const PolymorphismMultiGContainer& pmgc, const std::set<size_t>& groups)
70 {
71  PolymorphismMultiGContainer permuted_pmgc;
72  size_t loc_num = pmgc.getNumberOfLoci();
73  vector<vector<const MonolocusGenotype*> > mono_gens;
74  mono_gens.resize(loc_num);
75  // Get all the MonolocusGenotypes to permut
76  for (size_t i = 0; i < pmgc.size(); i++)
77  {
78  if (groups.find(pmgc.getGroupId(i)) != groups.end())
79  {
80  for (size_t j = 0; j < loc_num; j++)
81  {
82  mono_gens[j].push_back(&pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j));
83  }
84  }
85  }
86  // Permut the MonolocusGenotypes
87  for (size_t i = 0; i < loc_num; i++)
88  {
89  // mono_gens[i] = RandomTools::getSample(mono_gens[i], mono_gens[i].size());
90  std::random_shuffle(mono_gens[i].begin(), mono_gens[i].end());
91  }
92  // Build the new PolymorphismMultiGContainer
93  size_t k = 0;
94  for (size_t i = 0; i < pmgc.size(); i++)
95  {
96  if (groups.find(pmgc.getGroupId(i)) != groups.end())
97  {
98  MultilocusGenotype tmp_mg(loc_num);
99  for (size_t j = 0; j < loc_num; j++)
100  {
101  if (mono_gens[j][k] != NULL)
102  tmp_mg.setMonolocusGenotype(j, *(mono_gens[j][k]));
103  }
104  permuted_pmgc.addMultilocusGenotype(tmp_mg, pmgc.getGroupId(i));
105  k++;
106  }
107  else
108  {
109  permuted_pmgc.addMultilocusGenotype(*(pmgc.getMultilocusGenotype(i)), pmgc.getGroupId(i));
110  }
111  }
112 
113  // update groups names
114  set<size_t> grp_ids = pmgc.getAllGroupsIds();
115  for (set<size_t>::iterator it = grp_ids.begin(); it != grp_ids.end(); it++)
116  {
117  size_t id = *it;
118  string name = pmgc.getGroupName(id);
119  permuted_pmgc.setGroupName(id, name);
120  }
121 
122  return permuted_pmgc;
123 }
124 
125 /******************************************************************************/
126 
127 PolymorphismMultiGContainer PolymorphismMultiGContainerTools::permutIntraGroupMonoG(const PolymorphismMultiGContainer& pmgc, const std::set<size_t>& groups)
128 {
129  PolymorphismMultiGContainer permuted_pmgc;
130  size_t loc_num = pmgc.getNumberOfLoci();
131  vector<vector<const MonolocusGenotype*> > mono_gens;
132  mono_gens.resize(loc_num);
133 
134  for (set<size_t>::const_iterator g = groups.begin(); g != groups.end(); g++) // for each group
135  {
136  size_t nb_ind_in_group = 0;
137  // Get all the MonolocusGenotypes of group g to permut
138  for (size_t i = 0; i < pmgc.size(); i++)
139  {
140  size_t indiv_grp = pmgc.getGroupId(i);
141  if (groups.find(indiv_grp) != groups.end())
142  {
143  if (indiv_grp == *g)
144  {
145  nb_ind_in_group++;
146 
147  for (size_t j = 0; j < loc_num; j++)
148  {
149  mono_gens[j].push_back(&pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j));
150  }
151  }
152  }
153  else // insert as is
154  {
155  permuted_pmgc.addMultilocusGenotype(*(pmgc.getMultilocusGenotype(i)), indiv_grp);
156  }
157  } // for i
158 
159  // Permut the MonolocusGenotypes
160  if (nb_ind_in_group > 0)
161  {
162  for (size_t j = 0; j < loc_num; j++)
163  {
164  // mono_gens[j] = RandomTools::getSample(mono_gens[j], mono_gens[j].size());
165  std::random_shuffle(mono_gens[j].begin(), mono_gens[j].end());
166  }
167 
168  // Build the new multilocus genotypes
169  MultilocusGenotype tmp_mg(loc_num);
170  for (size_t k = 0; k < nb_ind_in_group; k++)
171  {
172  for (size_t j = 0; j < loc_num; j++)
173  {
174  if (mono_gens[j][k] != NULL)
175  tmp_mg.setMonolocusGenotype(j, *(mono_gens[j][k]));
176  } // for j
177 
178  permuted_pmgc.addMultilocusGenotype(tmp_mg, (*g));
179  } // for k
180  } // if nb_ind_in_group
181  } // for g
182 
183  // update groups names
184  set<size_t> grp_ids = pmgc.getAllGroupsIds();
185  for (set<size_t>::iterator it = grp_ids.begin(); it != grp_ids.end(); it++)
186  {
187  size_t id = *it;
188  string name = pmgc.getGroupName(id);
189  permuted_pmgc.setGroupName(id, name);
190  }
191 
192  return permuted_pmgc;
193 }
194 
195 /******************************************************************************/
196 
197 PolymorphismMultiGContainer PolymorphismMultiGContainerTools::permutAlleles(const PolymorphismMultiGContainer& pmgc, const std::set<size_t>& groups)
198 {
199  PolymorphismMultiGContainer permuted_pmgc;
200  size_t loc_num = pmgc.getNumberOfLoci();
201  vector<vector<size_t> > alleles;
202  alleles.resize(loc_num);
203  // Get all the alleles to permut
204  for (size_t i = 0; i < pmgc.size(); i++)
205  {
206  if (groups.find(pmgc.getGroupId(i)) != groups.end())
207  {
208  for (size_t j = 0; j < loc_num; j++)
209  {
211  for (size_t k = 0; k < pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j).getAlleleIndex().size(); k++)
212  {
213  alleles[j].push_back(pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j).getAlleleIndex()[k]);
214  }
215  }
216  }
217  }
218  // Permut the alleles
219  for (size_t i = 0; i < loc_num; i++)
220  {
221  // alleles[i] = RandomTools::getSample(alleles[i], alleles[i].size());
222  std::random_shuffle(alleles[i].begin(), alleles[i].end());
223  }
224  // Build the new PolymorphismMultiGContainer
225  vector<size_t> k(loc_num, 0);
226  for (size_t i = 0; i < pmgc.size(); i++)
227  {
228  if (groups.find(pmgc.getGroupId(i)) != groups.end())
229  {
230  MultilocusGenotype tmp_mg(loc_num);
231  for (size_t j = 0; j < loc_num; j++)
232  {
234  {
235  if (pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j).getAlleleIndex().size() == 1)
236  tmp_mg.setMonolocusGenotype(j, MonoAlleleMonolocusGenotype(alleles[j][k[j]++]));
237  if (pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j).getAlleleIndex().size() == 2)
238  tmp_mg.setMonolocusGenotype(j, BiAlleleMonolocusGenotype(alleles[j][k[j]++], alleles[j][k[j]++]));
239  }
240  }
241  permuted_pmgc.addMultilocusGenotype(tmp_mg, pmgc.getGroupId(i));
242  }
243  else
244  {
245  permuted_pmgc.addMultilocusGenotype(*(pmgc.getMultilocusGenotype(i)), pmgc.getGroupId(i));
246  }
247  }
248 
249  // update groups names
250  set<size_t> grp_ids = pmgc.getAllGroupsIds();
251  for (set<size_t>::iterator it = grp_ids.begin(); it != grp_ids.end(); it++)
252  {
253  size_t id = *it;
254  string name = pmgc.getGroupName(id);
255  permuted_pmgc.setGroupName(id, name);
256  }
257 
258  return permuted_pmgc;
259 }
260 
261 /******************************************************************************/
262 
263 PolymorphismMultiGContainer PolymorphismMultiGContainerTools::permutIntraGroupAlleles(const PolymorphismMultiGContainer& pmgc, const std::set<size_t>& groups)
264 {
265  PolymorphismMultiGContainer permuted_pmgc;
266  size_t loc_num = pmgc.getNumberOfLoci();
267  vector<vector<size_t> > alleles;
268  alleles.resize(loc_num);
269 
270  for (set<size_t>::const_iterator g = groups.begin(); g != groups.end(); g++) // for each group
271  {
272  size_t nb_ind_in_group = 0;
273 
274  vector< vector<size_t> > nb_alleles_for_inds;
275  nb_alleles_for_inds.resize(loc_num);
276  // Get all the alleles to permut
277  for (size_t i = 0; i < pmgc.size(); i++)
278  {
279  size_t indiv_grp = pmgc.getGroupId(i);
280  if (groups.find(indiv_grp) != groups.end() )
281  {
282  if (indiv_grp == *g)
283  {
284  nb_ind_in_group++;
285  for (size_t j = 0; j < loc_num; j++)
286  {
288  {
289  size_t nb_alls = pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j).getAlleleIndex().size();
290  nb_alleles_for_inds[j].push_back(nb_alls);
291  for (size_t k = 0; k < nb_alls; k++)
292  {
293  alleles[j].push_back(pmgc.getMultilocusGenotype(i)->getMonolocusGenotype(j).getAlleleIndex()[k]);
294  }
295  }
296  }
297  }
298  }
299  else // inserer tel quel
300  {
301  permuted_pmgc.addMultilocusGenotype(*(pmgc.getMultilocusGenotype(i)), indiv_grp);
302  }
303  } // for i
304 
305  // Permut the alleles
306  if (nb_ind_in_group > 0)
307  {
308  for (size_t i = 0; i < loc_num; i++)
309  {
310  // alleles[i] = RandomTools::getSample(alleles[i], alleles[i].size());
311  std::random_shuffle(alleles[i].begin(), alleles[i].end());
312  }
313 
314  // Build the new PolymorphismMultiGContainer
315  vector<size_t> k(loc_num, 0);
316 
317  for (size_t ind = 0; ind < nb_ind_in_group; ind++)
318  {
319  MultilocusGenotype tmp_mg(loc_num);
320  for (size_t j = 0; j < loc_num; j++)
321  {
322  if (nb_alleles_for_inds[j][ind] == 1)
323  tmp_mg.setMonolocusGenotype(j, MonoAlleleMonolocusGenotype(alleles[j][k[j]++]));
324  if (nb_alleles_for_inds[j][ind] == 2)
325  tmp_mg.setMonolocusGenotype(j, BiAlleleMonolocusGenotype(alleles[j][k[j]++], alleles[j][k[j]++]));
326  } // for j
327 
328  permuted_pmgc.addMultilocusGenotype(tmp_mg, (*g));
329  } // for ind
330  } // if nb_ind_in_group
331  } // for g
332 
333 
334  // update groups names
335  set<size_t> grp_ids = pmgc.getAllGroupsIds();
336  for (set<size_t>::iterator it = grp_ids.begin(); it != grp_ids.end(); it++)
337  {
338  size_t id = *it;
339  string name = pmgc.getGroupName(id);
340  permuted_pmgc.setGroupName(id, name);
341  }
342 
343  return permuted_pmgc;
344 }
345 
346 /******************************************************************************/
347 
348 PolymorphismMultiGContainer PolymorphismMultiGContainerTools::extractGroups(const PolymorphismMultiGContainer& pmgc, const std::set<size_t>& groups)
349 {
351  for (set<size_t>::const_iterator g = groups.begin(); g != groups.end(); g++) // for each group
352  {
353  // Get all the MonolocusGenotypes of group g to extract
354  for (size_t i = 0; i < pmgc.size(); i++)
355  {
356  size_t indiv_grp = pmgc.getGroupId(i);
357  if (groups.find(indiv_grp) != groups.end() )
358  {
359  if (indiv_grp == *g)
360  {
361  sub_pmgc.addMultilocusGenotype(*(pmgc.getMultilocusGenotype(i)), indiv_grp);
362  }
363  }
364  } // for i
365  } // for g
366 
367  // update groups names
368  set<size_t> grp_ids = sub_pmgc.getAllGroupsIds();
369  for (set<size_t>::iterator it = grp_ids.begin(); it != grp_ids.end(); it++)
370  {
371  size_t id = *it;
372  string name = pmgc.getGroupName(id);
373  sub_pmgc.setGroupName(id, name);
374  }
375 
376  return sub_pmgc;
377 }
378 
379 /******************************************************************************/
380 
The BiAlleleMonolocusGenotype class.
The MonoAlleleMonolocusGenotype class.
virtual std::vector< size_t > getAlleleIndex() const =0
Get the alleles' index.
The MultilocusGenotype class.
void setMonolocusGenotype(size_t locus_position, const MonolocusGenotype &monogen)
Set a MonolocusGenotype.
const MonolocusGenotype & getMonolocusGenotype(size_t locus_position) const
Get a MonolocusGenotype.
bool isMonolocusGenotypeMissing(size_t locus_position) const
Tell if a MonolocusGenotype is a missing data.
The PolymorphismMultiGContainer class.
size_t getGroupId(size_t position) const
Get the Group id of a MultilocusGenotype.
const MultilocusGenotype * getMultilocusGenotype(size_t position) const
Get a MultilocusGenotype at a position.
void setGroupId(size_t position, size_t group_id)
Set the Group id of a MultilocusGenotype.
size_t getNumberOfLoci() const
Get the number of loci if the MultilocusGenotypes are aligned.
std::string getGroupName(size_t group_id) const
Get the group name for a given group id or just the id if not available juste return it's id.
void addMultilocusGenotype(const MultilocusGenotype &mg, size_t group)
Add a MultilocusGenotype to the container.
std::set< size_t > getAllGroupsIds() const
Get the groups' ids.
size_t size() const
Get the number of MultilocusGenotype.
void setGroupName(size_t group_id, std::string name)
Set the name for the given group id.