bpp-popgen3  3.0.0
Genepop.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include "Genepop.h"
6 
7 using namespace bpp;
8 using namespace std;
9 
11 
13 
14 void Genepop::read(istream& is, DataSet& data_set)
15 {
16  if (!is)
17  throw IOException("Genepop::read: fail to open stream.");
18  // Skip first line
20  ios::pos_type entry_point = is.tellg();
21  bool eof_ok = false;
22  bool loc_def_ok = false;
23  bool loc_nbr_ok = false;
24  size_t grp_nbr = 0;
25  vector<LocusInfo> tmp_loc;
26  vector<set<string>> al_ids;
27  map<string, size_t> ind_id_count;
28  map<string, size_t> ind_id_index;
29 
30  string temp("");
31  // First read : file structure
32  while (!eof_ok)
33  {
34  if (is.peek() == EOF && !eof_ok)
35  {
36  // If eof rewind to entry_point
37  is.seekg(entry_point);
38  eof_ok = true;
39  }
40  else
41  {
42  // Count everything
43  temp = FileTools::getNextLine(is);
44  string cp_temp = TextTools::removeSurroundingWhiteSpaces(temp);
45  cp_temp = TextTools::toUpper(cp_temp);
46  if (cp_temp == string("POP"))
47  {
48  loc_def_ok = true;
49  grp_nbr++;
50  data_set.addEmptyGroup(grp_nbr);
51  }
52  if (!loc_def_ok)
53  {
54  StringTokenizer st(temp, string(", "), true);
55  while (st.hasMoreToken())
57  }
58  if (loc_def_ok && !loc_nbr_ok)
59  {
60  al_ids.resize(tmp_loc.size());
61  loc_nbr_ok = true;
62  }
63  if (loc_def_ok)
64  {
65  string alleles;
66  StringTokenizer st(temp, string(","));
67  if (st.numberOfRemainingTokens() == 2)
68  {
70  alleles = st.nextToken();
71  }
72  StringTokenizer st2(alleles);
73  if ((size_t)st2.numberOfRemainingTokens() == tmp_loc.size())
74  {
75  size_t i = 0;
76  while (st2.hasMoreToken())
77  {
79  string tmp_id = string(ids.begin(), ids.begin() + (ids.size() / 2));
80  if (tmp_id != string("00") && tmp_id != string("000"))
81  al_ids[i].insert(tmp_id);
82  tmp_id = string(ids.begin() + (ids.size() / 2), ids.end());
83  if (tmp_id != string("00") && tmp_id != string("000"))
84  al_ids[i].insert(tmp_id);
85  i++;
86  }
87  }
88  }
89  }
90  }
91 
92  // Set AnalyzedLoci
93  data_set.initAnalyzedLoci(tmp_loc.size());
94  for (size_t i = 0; i < tmp_loc.size(); i++)
95  {
96  data_set.setLocusInfo(i, tmp_loc[i]);
97  for (set<string>::iterator it = al_ids[i].begin(); it != al_ids[i].end(); it++)
98  {
100  }
101  }
102 
103  // Second read : file data
104  grp_nbr = 0;
105  size_t grp_pos = 0;
106  loc_def_ok = false;
107  while (!is.eof())
108  {
109  temp = FileTools::getNextLine(is);
110  string cp_temp = TextTools::removeSurroundingWhiteSpaces(temp);
111  cp_temp = TextTools::toUpper(cp_temp);
112  if (cp_temp == string("POP"))
113  {
114  grp_nbr++;
115  loc_def_ok = true;
116  grp_pos = data_set.getGroupPosition(grp_nbr);
117  }
118  else
119  {
120  if (loc_def_ok)
121  {
122  string alleles;
123  StringTokenizer st(temp, string(","));
124  size_t ind_pos = 0;
125  if (st.numberOfRemainingTokens() == 2)
126  {
128  if (ind_id_count[ind_id] > 1)
129  ind_id = ind_id + string("_") + TextTools::toString(++ind_id_index[ind_id]);
130  data_set.addEmptyIndividualToGroup(grp_pos, ind_id);
131  ind_pos = data_set.getIndividualPositionInGroup(grp_pos, ind_id);
132  data_set.initIndividualGenotypeInGroup(grp_pos, ind_pos);
133  alleles = st.nextToken();
134  }
135  StringTokenizer st2(alleles);
136  if ((size_t)st2.numberOfRemainingTokens() == tmp_loc.size())
137  {
138  size_t i = 0;
139  while (st2.hasMoreToken())
140  {
142  vector<string> tmp_ids;
143  tmp_ids.push_back(string(ids.begin(), ids.begin() + (ids.size() / 2)));
144  tmp_ids.push_back(string(ids.begin() + (ids.size() / 2), ids.end()));
145  if (tmp_ids[0] != string("00") && tmp_ids[0] != string("000")
146  && tmp_ids[1] != string("00") && tmp_ids[1] != string("000"))
147  {
148  data_set.setIndividualMonolocusGenotypeByAlleleIdInGroup(grp_pos, ind_pos, i, tmp_ids);
149  }
150  i++;
151  tmp_ids.clear();
152  }
153  }
154  }
155  }
156  }
157 }
158 
159 void Genepop::read(const string& path, DataSet& data_set)
160 {
161  AbstractIDataSet::read(path, data_set);
162 }
163 
164 DataSet* Genepop::read(istream& is)
165 {
166  return AbstractIDataSet::read(is);
167 }
168 
169 DataSet* Genepop::read(const string& path)
170 {
171  return AbstractIDataSet::read(path);
172 }
virtual void read(std::istream &is, DataSet &data_set)=0
Read a DataSet on istream.
The BasicAlleleInfo class.
The DataSet class.
Definition: DataSet.h:37
void addAlleleInfoByLocusPosition(size_t locus_position, const AlleleInfo &allele)
Add an AlleleInfo to a LocusInfo.
Definition: DataSet.cpp:1070
void initAnalyzedLoci(size_t numberOfLoci)
Initialize the AnalyzedLoci for number of loci.
Definition: DataSet.h:609
size_t getIndividualPositionInGroup(size_t groupPosition, const std::string &individual_id) const
Get the position of an Individual in a Group.
Definition: DataSet.cpp:399
size_t getGroupPosition(size_t group_id) const
Get the position of a Group.
Definition: DataSet.cpp:221
void setLocusInfo(size_t locus_position, const LocusInfo &locus)
Set a LocusInfo.
Definition: DataSet.cpp:998
void setIndividualMonolocusGenotypeByAlleleIdInGroup(size_t groupPosition, size_t individualPosition, size_t locusPosition, const std::vector< std::string > alleleId)
Set a MonolocusGenotype of an Individual from a group.
Definition: DataSet.cpp:928
void addEmptyIndividualToGroup(size_t groupPosition, const std::string &individual_id)
Add an empty Individual to a Group.
Definition: DataSet.cpp:374
void initIndividualGenotypeInGroup(size_t groupPosition, size_t individualPosition)
Initialize the genotype of an Individual in a Group.
Definition: DataSet.cpp:828
void addEmptyGroup(size_t group_id)
Add an empty Group to the DataSet.
Definition: DataSet.cpp:166
static std::string getNextLine(std::istream &in)
void read(std::istream &is, DataSet &data_set)
Read a DataSet on istream.
Definition: Genepop.cpp:14
The LocusInfo class.
Definition: LocusInfo.h:31
size_t numberOfRemainingTokens() const
const std::string & nextToken()
bool hasMoreToken() const
std::string removeSurroundingWhiteSpaces(const std::string &s)
std::string toUpper(const std::string &s)
std::string toString(T t)