bpp-popgen3  3.0.0
Genetix.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include "Genetix.h"
6 
7 using namespace bpp;
8 using namespace std;
9 
11 
13 
14 void Genetix::read(istream& is, DataSet& data_set)
15 {
16  if (!is)
17  throw IOException("Genetix::read: fail to open stream.");
18  // Loci number
19  string temp = FileTools::getNextLine(is);
20  unsigned int loc_nbr;
21  stringstream(temp) >> loc_nbr;
22  data_set.initAnalyzedLoci(loc_nbr);
23 
24  // Groups number
25  temp = FileTools::getNextLine(is);
26  unsigned int grp_nbr;
27  stringstream(temp) >> grp_nbr;
28 
29  // Loci data
30  for (unsigned int i = 0; i < loc_nbr; i++)
31  {
32  // Locus name
33  string name = FileTools::getNextLine(is);
35  LocusInfo tmp_loc(name);
36  // Alleles
37  stringstream values(FileTools::getNextLine(is));
38  unsigned int nbr_al;
39  values >> nbr_al;
40  for (unsigned int j = 0; j < nbr_al; j++)
41  {
42  string al_id;
43  values >> al_id;
44  BasicAlleleInfo tmp_al(al_id);
45  tmp_loc.addAlleleInfo(tmp_al);
46  }
47  data_set.setLocusInfo(i, tmp_loc);
48  }
49 
50  // Groups
51  for (unsigned int i = 0; i < grp_nbr; i++)
52  {
53  data_set.addEmptyGroup(i);
54  // Group name ... Now used khalid
55  temp = FileTools::getNextLine(is);
56  data_set.setGroupName(i, temp);
57 
58  // Number of individuals
59  unsigned int ind_nbr;
60  temp = FileTools::getNextLine(is);
61  stringstream tmp(temp);
62  tmp >> ind_nbr;
63  for (unsigned int j = 0; j < ind_nbr; j++)
64  {
65  temp = FileTools::getNextLine(is);
66  string ind_name(temp.begin(), temp.begin() + 11);
67  temp = string(temp.begin() + 11, temp.end());
68  data_set.addEmptyIndividualToGroup(i, TextTools::removeSurroundingWhiteSpaces(ind_name) + string("_") + TextTools::toString(i + 1) + string("_") + TextTools::toString(j + 1));
69  data_set.initIndividualGenotypeInGroup(i, j);
70  StringTokenizer alleles(temp, string(" "));
71  // cout << alleles.numberOfRemainingTokens() << endl;
72  for (unsigned int k = 0; k < loc_nbr; k++)
73  {
74  string tmp_string = alleles.nextToken();
75  vector<string> tmp_alleles;
76  tmp_alleles.push_back(string(tmp_string.begin(), tmp_string.begin() + 3));
77  tmp_alleles.push_back(string(tmp_string.begin() + 3, tmp_string.begin() + 6));
78  if (tmp_alleles[0] != string("000") && tmp_alleles[1] != string("000"))
79  data_set.setIndividualMonolocusGenotypeByAlleleIdInGroup(i, j, k, tmp_alleles);
80  }
81  }
82  }
83 }
84 
85 void Genetix::read(const string& path, DataSet& data_set)
86 {
87  AbstractIDataSet::read(path, data_set);
88 }
89 
90 DataSet* Genetix::read(istream& is)
91 {
92  return AbstractIDataSet::read(is);
93 }
94 
95 DataSet* Genetix::read(const string& path)
96 {
97  return AbstractIDataSet::read(path);
98 }
virtual void read(std::istream &is, DataSet &data_set)=0
Read a DataSet on istream.
The BasicAlleleInfo class.
The DataSet class.
Definition: DataSet.h:37
void initAnalyzedLoci(size_t numberOfLoci)
Initialize the AnalyzedLoci for number of loci.
Definition: DataSet.h:609
void setGroupName(size_t group_id, const std::string &group_name) const
set the name of a Group.
Definition: DataSet.cpp:206
void setLocusInfo(size_t locus_position, const LocusInfo &locus)
Set a LocusInfo.
Definition: DataSet.cpp:998
void setIndividualMonolocusGenotypeByAlleleIdInGroup(size_t groupPosition, size_t individualPosition, size_t locusPosition, const std::vector< std::string > alleleId)
Set a MonolocusGenotype of an Individual from a group.
Definition: DataSet.cpp:928
void addEmptyIndividualToGroup(size_t groupPosition, const std::string &individual_id)
Add an empty Individual to a Group.
Definition: DataSet.cpp:374
void initIndividualGenotypeInGroup(size_t groupPosition, size_t individualPosition)
Initialize the genotype of an Individual in a Group.
Definition: DataSet.cpp:828
void addEmptyGroup(size_t group_id)
Add an empty Group to the DataSet.
Definition: DataSet.cpp:166
static std::string getNextLine(std::istream &in)
void read(std::istream &is, DataSet &data_set)
Read a DataSet on istream.
Definition: Genetix.cpp:14
The LocusInfo class.
Definition: LocusInfo.h:31
void addAlleleInfo(const AlleleInfo &allele)
Add an AlleleInfo to the LocusInfo.
Definition: LocusInfo.cpp:21
const std::string & nextToken()
std::string removeSurroundingWhiteSpaces(const std::string &s)
std::string toString(T t)