bpp-popgen3  3.0.0
PopgenlibIO.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include "PopgenlibIO.h"
6 
7 using namespace bpp;
8 using namespace std;
9 
10 const string PopgenlibIO::WHITESPACE = string("WHITESPACE");
11 const string PopgenlibIO::TAB = string("TAB");
12 const string PopgenlibIO::COMA = string("COMA");
13 const string PopgenlibIO::SEMICOLON = string("SEMICOLON");
14 
15 const string PopgenlibIO::DIPLOID = string("DIPLOID");
16 const string PopgenlibIO::HAPLOID = string("HAPLOID");
17 const string PopgenlibIO::HAPLODIPLOID = string("HAPLODIPLOID");
18 const string PopgenlibIO::UNKNOWN = string("UNKNOWN");
19 
20 PopgenlibIO::PopgenlibIO() : data_separator_(' '),
21  missing_data_symbol_('$') {}
22 
23 PopgenlibIO::PopgenlibIO(const std::string& missing_data_symbol,
24  const std::string& data_separator) :
25  data_separator_(' '),
26  missing_data_symbol_('$')
27 {
28  try
29  {
30  setDataSeparator(data_separator);
31  setMissingDataSymbol(missing_data_symbol);
32  }
33  catch (Exception& e)
34  {
35  throw e;
36  }
37 }
38 
40 
41 void PopgenlibIO::setMissingDataSymbol(const std::string& missing_data_symbol)
42 {
43  if (missing_data_symbol.size() != 1 || isdigit(missing_data_symbol[0])
44  || TextTools::isWhiteSpaceCharacter(missing_data_symbol[0])
45  || missing_data_symbol[0] == data_separator_
46  )
47  throw Exception("PopgenlibIO::setMissingData: not expected value for missing_data_symbol.");
48 
49  missing_data_symbol_ = missing_data_symbol[0];
50 }
51 
52 void PopgenlibIO::setDataSeparator(const std::string& data_separator)
53 {
54  if (data_separator == WHITESPACE)
55  data_separator_ = ' ';
56  else if (data_separator == TAB)
57  data_separator_ = '\t';
58  else if (data_separator == COMA)
59  data_separator_ = ',';
60  else if (data_separator == SEMICOLON)
61  data_separator_ = ';';
62  else
63  {
64  if (isdigit(data_separator[0])
65  || data_separator == getMissingDataSymbol()
66  )
67  throw Exception("PopgenlibIO::setDataSeparator: not expected value for data_separator.");
68  data_separator_ = data_separator.c_str()[0];
69  }
70 }
71 
73 {
75 }
76 
77 std::string PopgenlibIO::getDataSeparator() const
78 {
79  switch (data_separator_)
80  {
81  case (' '): return WHITESPACE;
82  case ('\t'): return TAB;
83  case (','): return COMA;
84  case (';'): return SEMICOLON;
85  default: return TextTools::toString(data_separator_);
86  }
87 }
88 
90 {
91  return missing_data_symbol_;
92 }
93 
95 {
96  return data_separator_;
97 }
98 
99 void PopgenlibIO::read(std::istream& is, DataSet& dataset)
100 {
101  if (!is)
102  throw IOException("PopgenlibIO::read: fail to open stream.");
103  string temp = "";
104  vector<string> temp_v;
105  stringstream tmp_ss;
106  VectorSequenceContainer* tmp_vsc = NULL;
107  Locality<double> tmp_locality("tmp");
108  vector<LocusInfo> tmp_locinf;
109  Individual tmpIndiv;
110  bool section1 = true;
111  bool section2 = true;
112  bool section3 = true;
113  bool section4 = true;
114  bool section5 = true;
115  size_t current_section = 0;
116  size_t previous_section = 0;
117  // size_t linenum = 0;
118  // Main loop for all file lines
119  while (!is.eof())
120  {
121  temp = FileTools::getNextLine(is);
122  // linenum++;
123  // Get the correct current section
124  if (temp.find("[General]", 0) != string::npos)
125  {
126  previous_section = current_section;
127  current_section = 1;
128  continue;
129  }
130  else if (temp.find("[Localities]", 0) != string::npos)
131  {
132  previous_section = current_section;
133  current_section = 2;
134  continue;
135  }
136  else if (temp.find("[Sequences]", 0) != string::npos)
137  {
138  previous_section = current_section;
139  current_section = 3;
140  continue;
141  }
142  else if (temp.find("[Loci]", 0) != string::npos)
143  {
144  previous_section = current_section;
145  current_section = 4;
146  continue;
147  }
148  else if (temp.find("[Individuals]", 0) != string::npos)
149  {
150  previous_section = current_section;
151  current_section = 5;
152  continue;
153  }
154  // General section ------------------------------------
155  if (current_section == 1 && previous_section < 1)
156  {
157  temp_v.push_back(temp);
158  }
159  if (section1 && current_section != 1 && previous_section == 1)
160  {
161  section1 = false;
162  parseGeneral_(temp_v, dataset);
163  temp_v.clear();
164  if (dataset.hasSequenceData() && tmp_vsc == NULL)
165  tmp_vsc = new VectorSequenceContainer(dataset.getAlphabet());
166  }
167 
168  // Localities section ---------------------------------
169  if (current_section == 2 && previous_section < 2)
170  {
171  if (temp.find(">", 0) != string::npos)
172  {
173  parseLocality_(temp_v, dataset);
174  temp_v.clear();
175  temp_v.push_back(temp);
176  }
177  else
178  temp_v.push_back(temp);
179  }
180  if (section2 && current_section != 2 && previous_section == 2)
181  {
182  section2 = false;
183  parseLocality_(temp_v, dataset);
184  temp_v.clear();
185  }
186 
187  // Sequences section ----------------------------------
188  if (current_section == 3 && previous_section < 3)
189  {
190  if (temp.find(">", 0) != string::npos)
191  {
192  parseSequence_(temp_v, *tmp_vsc);
193  temp_v.clear();
194  temp_v.push_back(temp);
195  }
196  else
197  temp_v.push_back(temp);
198  }
199  if (section3 && current_section != 3 && previous_section == 3)
200  {
201  section3 = false;
202  parseSequence_(temp_v, *tmp_vsc);
203  temp_v.clear();
204  }
205 
206  // Loci section ---------------------------------------
207  if (current_section == 4 && previous_section < 4)
208  {
209  if (temp.find(">", 0) != string::npos)
210  {
211  parseLoci_(temp_v, tmp_locinf);
212  temp_v.clear();
213  temp_v.push_back(temp);
214  }
215  else
216  temp_v.push_back(temp);
217  }
218  if (section4 && current_section != 4 && previous_section == 4)
219  {
220  section4 = false;
221  parseLoci_(temp_v, tmp_locinf);
222  temp_v.clear();
223  AnalyzedLoci tmp_anloc(tmp_locinf.size());
224  for (size_t i = 0; i < tmp_locinf.size(); i++)
225  {
226  tmp_anloc.setLocusInfo(i, tmp_locinf[i]);
227  }
228  dataset.setAnalyzedLoci(tmp_anloc);
229  }
230 
231  // Individuals section --------------------------------
232  if (current_section == 5 && previous_section < 5)
233  {
234  if (temp.find(">", 0) != string::npos)
235  {
236  parseIndividual_(temp_v, dataset, *tmp_vsc);
237  temp_v.clear();
238  temp_v.push_back(temp);
239  }
240  else
241  temp_v.push_back(temp);
242  }
243  if (section5 && current_section != 5 && previous_section == 5)
244  {
245  section5 = false;
246  parseIndividual_(temp_v, dataset, *tmp_vsc);
247  temp_v.clear();
248  }
249  }
250  // Emptied the buffer if eof.
251  if (section2 && current_section == 2)
252  parseLocality_(temp_v, dataset);
253  if (section3 && current_section == 3)
254  parseSequence_(temp_v, *tmp_vsc);
255  if (section5 && current_section == 5)
256  parseIndividual_(temp_v, dataset, *tmp_vsc);
257  temp_v.clear();
258 }
259 
260 void PopgenlibIO::parseGeneral_(const std::vector<std::string>& in, DataSet& dataset)
261 {
262  stringstream is;
263  for (size_t i = 0; i < in.size(); i++)
264  {
265  is << in[i] << endl;
266  }
267  string temp;
268  while (!is.eof() && in.size() != 0)
269  {
270  temp = FileTools::getNextLine(is);
271  if (temp.find("MissingData", 0) != string::npos)
272  setMissingDataSymbol(getValues_(temp, "=")[0]);
273  if (temp.find("DataSeparator", 0) != string::npos)
274  setDataSeparator(getValues_(temp, "=")[0]);
275  if (temp.find("SequenceType", 0) != string::npos)
276  dataset.setAlphabet(getValues_(temp, "=")[0]);
277  }
278 }
279 
280 void PopgenlibIO::parseLocality_(const std::vector<std::string>& in, DataSet& dataset)
281 {
282  stringstream is;
283  for (size_t i = 0; i < in.size(); i++)
284  {
285  is << in[i] << endl;
286  }
287  Locality<double> tmp_locality("");
288  string temp;
289  while (!is.eof() && in.size() != 0)
290  {
291  temp = FileTools::getNextLine(is);
292  // cout << "_parseLocality: " << temp << endl;
293  if (temp.find(">", 0) != string::npos)
294  {
295  tmp_locality.setName(TextTools::removeSurroundingWhiteSpaces(string(temp.begin() + 1, temp.end())));
296  }
297  if (temp.find("Coord", 0) != string::npos)
298  {
299  vector<string> v = getValues_(temp, "=");
300  tmp_locality.setX(TextTools::toDouble(v[0]));
301  tmp_locality.setY(TextTools::toDouble(v[1]));
302  }
303  }
304  if (tmp_locality.getName() != "")
305  dataset.addLocality(tmp_locality);
306 }
307 
308 void PopgenlibIO::parseSequence_(const std::vector<std::string>& in, VectorSequenceContainer& vsc)
309 {
310  Fasta ifasta;
311  stringstream is;
312  for (size_t i = 0; i < in.size(); i++)
313  {
314  is << in[i] << endl;
315  }
316  ifasta.readSequences(is, vsc);
317 }
318 
319 void PopgenlibIO::parseLoci_(const std::vector<std::string>& in, std::vector<LocusInfo>& locus_info)
320 {
321  stringstream is;
322  for (size_t i = 0; i < in.size(); i++)
323  {
324  is << in[i] << endl;
325  }
326  string locinf_name = "";
327  unsigned int locinf_ploidy = LocusInfo::DIPLOID;
328  string temp;
329  while (!is.eof())
330  {
331  temp = FileTools::getNextLine(is);
332  if (temp.find(">", 0) != string::npos)
333  {
334  locinf_name = TextTools::removeSurroundingWhiteSpaces(string(temp.begin() + 1, temp.end()));
335  }
336  if (temp.find("Ploidy", 0) != string::npos)
337  {
338  vector<string> v = getValues_(temp, "=");
339  string tmp_str_ploidy = TextTools::removeSurroundingWhiteSpaces(v[0]);
340  tmp_str_ploidy = TextTools::toUpper(tmp_str_ploidy);
341  // cout << "ploidy : " << tmp_str_ploidy << endl;
342  if (tmp_str_ploidy == DIPLOID)
343  locinf_ploidy = LocusInfo::DIPLOID;
344  else if (tmp_str_ploidy == HAPLOID)
345  locinf_ploidy = LocusInfo::HAPLOID;
346  else if (tmp_str_ploidy == HAPLODIPLOID)
347  locinf_ploidy = LocusInfo::HAPLODIPLOID;
348  else if (tmp_str_ploidy == UNKNOWN)
349  locinf_ploidy = LocusInfo::UNKNOWN;
350  }
351  if (temp.find("NbAlleles", 0) != string::npos)
352  {
353  // not used ...
354  }
355  }
356  if (locinf_name != "")
357  locus_info.push_back(LocusInfo(locinf_name, locinf_ploidy));
358 }
359 
360 void PopgenlibIO::parseIndividual_(const std::vector<std::string>& in, DataSet& dataset, const VectorSequenceContainer& vsc)
361 {
362  Individual tmpIndiv;
363  size_t tmp_group_pos = 0;
364  string temp = "";
365  for (size_t i = 0; i < in.size(); i++)
366  {
367  // Get Individual Id
368  if (in[i].find(">", 0) != string::npos)
369  {
370  tmpIndiv.setId(TextTools::removeSurroundingWhiteSpaces(string(in[i].begin() + 1, in[i].end())));
371  }
372  // Get the Group
373  if (in[i].find("Group", 0) != string::npos)
374  {
375  temp = in[i];
376  tmp_group_pos = TextTools::to<size_t>(getValues_(temp, "=")[0]);
377  try
378  {
379  dataset.addEmptyGroup(tmp_group_pos);
380  }
381  catch (...)
382  {}
383  }
384  // Find the locality
385  if (in[i].find("Locality", 0) != string::npos)
386  {
387  temp = in[i];
388  size_t sep_pos = temp.find("=", 0);
389  string loc_name = TextTools::removeSurroundingWhiteSpaces(string(temp.begin() + static_cast<ptrdiff_t>(sep_pos + 1), temp.end()));
390  try
391  {
392  tmpIndiv.setLocality(dataset.getLocalityByName(loc_name));
393  }
394  catch (...)
395  {}
396  }
397  // Set the coord
398  if (in[i].find("Coord", 0) != string::npos)
399  {
400  temp = in[i];
401  tmpIndiv.setCoord(TextTools::toDouble(getValues_(temp, "=")[0]), TextTools::toDouble(getValues_(temp, "=")[1]));
402  }
403  // And the date
404  if (in[i].find("Date", 0) != string::npos)
405  {
406  int d, m, y;
407  temp = in[i];
408  string tmp_date = getValues_(temp, "=")[0];
409  d = TextTools::toInt(string(tmp_date.begin(), tmp_date.begin() + 2));
410  m = TextTools::toInt(string(tmp_date.begin() + 2, tmp_date.begin() + 4));
411  y = TextTools::toInt(string(tmp_date.begin() + 4, tmp_date.end()));
412  tmpIndiv.setDate(Date(d, m, y));
413  }
414  // Now the sequences
415  if (in[i].find("SequenceData", 0) != string::npos)
416  {
417  i++;
418  temp = in[i];
419  vector<string> seq_pos_str = getValues_(temp, "");
420  for (size_t j = 0; j < seq_pos_str.size(); ++j)
421  {
422  try
423  {
424  if (seq_pos_str[j] != getMissingDataSymbol())
425  {
426  auto tmpSeq = unique_ptr<Sequence>(vsc.sequence(TextTools::to<size_t>(seq_pos_str[j]) - 1).clone());
427  tmpIndiv.addSequence(j, tmpSeq);
428  }
429  }
430  catch (...)
431  {}
432  }
433  }
434  // Finally the loci
435  if (in[i].find("AllelicData", 0) != string::npos)
436  {
437  string temp1 = in[++i];
438  string temp2 = in[++i];
439  vector<string> allele_pos_str1 = getValues_(temp1, "");
440  vector<string> allele_pos_str2 = getValues_(temp2, "");
441  try
442  {
443  tmpIndiv.initGenotype(dataset.getNumberOfLoci());
444  }
445  catch (...)
446  {}
447  if (allele_pos_str1.size() == allele_pos_str2.size())
448  {
449  for (size_t j = 0; j < allele_pos_str1.size(); j++)
450  {
451  const LocusInfo& locus_info = dataset.getLocusInfoAtPosition(j);
452  allele_pos_str1[j] = TextTools::removeSurroundingWhiteSpaces(allele_pos_str1[j]);
453  vector<string> tmp_alleles_id;
454  if (allele_pos_str1[j] != getMissingDataSymbol())
455  {
456  BasicAlleleInfo tmp_allele_info(allele_pos_str1[j]);
457  try
458  {
459  dataset.addAlleleInfoByLocusPosition(j, tmp_allele_info);
460  }
461  catch (...)
462  {}
463  tmp_alleles_id.push_back(allele_pos_str1[j]);
464  }
465  allele_pos_str2[j] = TextTools::removeSurroundingWhiteSpaces(allele_pos_str2[j]);
466  if (allele_pos_str2[j] != getMissingDataSymbol())
467  {
468  BasicAlleleInfo tmp_allele_info(allele_pos_str2[j]);
469  try
470  {
471  dataset.addAlleleInfoByLocusPosition(j, tmp_allele_info);
472  }
473  catch (...)
474  {}
475  tmp_alleles_id.push_back(allele_pos_str2[j]);
476  }
477  try
478  {
479  tmpIndiv.setMonolocusGenotypeByAlleleId(j, tmp_alleles_id, locus_info);
480  }
481  catch (...)
482  {}
483  }
484  }
485  }
486  }
487  if (tmpIndiv.getId() != "")
488  {
489  try
490  {
491  dataset.addIndividualToGroup(dataset.getGroupPosition(tmp_group_pos), tmpIndiv);
492  }
493  catch (...)
494  {}
495  }
496 }
497 
498 void PopgenlibIO::read(const std::string& path, DataSet& dataset)
499 {
500  AbstractIDataSet::read(path, dataset);
501 }
502 
503 DataSet* PopgenlibIO::read(std::istream& is)
504 {
505  return AbstractIDataSet::read(is);
506 }
507 
508 DataSet* PopgenlibIO::read(const std::string& path)
509 {
510  return AbstractIDataSet::read(path);
511 }
512 
513 void PopgenlibIO::write(std::ostream& os, const DataSet& dataset) const
514 {
515  size_t seqcpt = 1;
516  // General section --------------------------------------
517  os << "[General]" << endl;
518  os << "MissingData = " << getMissingDataSymbol() << endl;
519  os << "DataSeparator = " << getDataSeparator() << endl;
520  if (dataset.hasSequenceData())
521  {
522  string seq_type = dataset.getAlphabetType();
523  os << "SequenceType = " << seq_type << endl;
524  }
525  // Localities section -----------------------------------
526  if (dataset.hasLocality())
527  {
528  os << endl << "[Localities]" << endl;
529  for (size_t i = 0; i < dataset.getNumberOfLocalities(); i++)
530  {
531  os << ">" << (dataset.localityAtPosition(i)).getName() << endl;
532  os << "Coord = " << (dataset.localityAtPosition(i)).getX();
533  os << " " << (dataset.localityAtPosition(i)).getY() << endl;
534  }
535  }
536 
537  // Sequences section ------------------------------------
538  if (dataset.hasSequenceData())
539  {
540  Fasta fasta(80);
541  os << endl << "[Sequences]" << endl;
542  for (size_t i = 0; i < dataset.getNumberOfGroups(); i++)
543  {
544  for (size_t j = 0; j < dataset.getNumberOfIndividualsInGroup(i); j++)
545  {
546  fasta.writeSequences(os, dataset.getIndividualAtPositionFromGroup(i, j).sequences());
547  }
548  }
549  }
550 
551  // AllelicData section ----------------------------------
552  if (dataset.hasAlleleicData())
553  {
554  os << endl << "[Loci]" << endl;
555  for (size_t i = 0; i < dataset.getNumberOfLoci(); i++)
556  {
557  const LocusInfo& tmp_locus_info = dataset.getLocusInfoAtPosition(i);
558  os << ">" << tmp_locus_info.getName() << endl;
559  os << "Ploidy = ";
560  if (tmp_locus_info.getPloidy() == LocusInfo::HAPLOID)
561  os << HAPLOID;
562  else if (tmp_locus_info.getPloidy() == LocusInfo::DIPLOID)
563  os << DIPLOID;
564  else if (tmp_locus_info.getPloidy() == LocusInfo::HAPLODIPLOID)
565  os << HAPLODIPLOID;
566  else if (tmp_locus_info.getPloidy() == LocusInfo::UNKNOWN)
567  os << UNKNOWN;
568  os << endl;
569  os << "NbAlleles = " << tmp_locus_info.getNumberOfAlleles() << endl;
570  }
571  }
572 
573  // Individuals section ----------------------------------
574  os << endl << "[Individuals]" << endl;
575  for (size_t i = 0; i < dataset.getNumberOfGroups(); i++)
576  {
577  for (size_t j = 0; j < dataset.getNumberOfIndividualsInGroup(i); j++)
578  {
579  if (i > 0 || j > 0)
580  os << endl;
581  const auto& tmpInd = dataset.getIndividualAtPositionFromGroup(i, j);
582  os << ">" << tmpInd.getId() << endl;
583  os << "Group = " << TextTools::toString((dataset.getGroupAtPosition(i)).getGroupId()) << endl;
584  if (tmpInd.hasLocality())
585  os << "Locality = " << tmpInd.locality().getName() << endl;
586  if (tmpInd.hasCoord())
587  os << "Coord = " << tmpInd.getX() << " " << tmpInd.getY() << endl;
588  if (tmpInd.hasDate())
589  os << "Date = " << tmpInd.date().getDateStr() << endl;
590  if (tmpInd.hasSequences())
591  {
592  size_t nbss = tmpInd.getNumberOfSequences();
593  os << "SequenceData = {" << endl;
594  for (size_t k = 0; k < nbss; k++)
595  {
596  try
597  {
598  tmpInd.sequenceAtPosition(k);
599  os << TextTools::toString(seqcpt++);
600  }
602  {
603  os << getMissingDataChar();
604  }
605  if (k < nbss - 1)
606  os << getDataSeparatorChar();
607  else
608  os << endl;
609  }
610  os << "}" << endl;
611  }
612  if (tmpInd.hasGenotype())
613  {
614  const MultilocusGenotype& tmp_genotype = tmpInd.getGenotype();
615  vector<vector<string>> output(tmp_genotype.size());
616  os << "AllelicData = {" << endl;
617  for (size_t k = 0; k < tmp_genotype.size(); k++)
618  {
619  output[k].resize(2);
620  if (tmp_genotype.isMonolocusGenotypeMissing(k))
621  {
622  output[k][0] = getMissingDataChar();
623  output[k][1] = getMissingDataChar();
624  }
625  else
626  {
627  vector<size_t> tmp_all_ind = tmp_genotype.monolocusGenotype(k).getAlleleIndex();
628  output[k][0] = dataset.getLocusInfoAtPosition(k).getAlleleInfoByKey(tmp_all_ind[0]).getId();
629  if (tmp_all_ind.size() > 1)
630  output[k][1] = dataset.getLocusInfoAtPosition(k).getAlleleInfoByKey(tmp_all_ind[1]).getId();
631  else
632  output[k][1] = getMissingDataChar();
633  }
634  }
635  for (size_t k = 0; k < output.size(); k++)
636  {
637  os << output[k][0];
638  if (k < output.size() - 1)
639  os << getDataSeparatorChar();
640  else
641  os << endl;
642  }
643  for (size_t k = 0; k < output.size(); k++)
644  {
645  os << output[k][1];
646  if (k < output.size() - 1)
647  os << getDataSeparatorChar();
648  else
649  os << endl;
650  }
651  os << "}" << endl;
652  }
653  }
654  }
655 }
656 
657 void PopgenlibIO::write(const std::string& path, const DataSet& dataset, bool overwrite) const
658 {
659  AbstractODataSet::write(path, dataset, overwrite);
660 }
661 
662 std::vector<std::string> PopgenlibIO::getValues_(std::string& param_line, const std::string& delim)
663 {
664  vector<string> values;
665  size_t limit = param_line.find(delim, 0);
666  if (limit != string::npos)
667  param_line = string(param_line.begin() + static_cast<ptrdiff_t>(limit + delim.size()), param_line.end());
668  param_line = TextTools::removeSurroundingWhiteSpaces(param_line);
669 
670  size_t bi = 0;
671  size_t bs = param_line.find(getDataSeparatorChar(), bi);
672  while (bs > 0)
673  {
674  values.push_back(string(param_line.begin() + static_cast<ptrdiff_t>(bi), param_line.begin() + static_cast<ptrdiff_t>(bs)));
675  bi = bs + 1;
676  bs = param_line.find(getDataSeparatorChar(), bi);
677  }
678  values.push_back(string(param_line.begin() + static_cast<ptrdiff_t>(bi), param_line.end()));
679  return values;
680 }
virtual void read(std::istream &is, DataSet &data_set)=0
Read a DataSet on istream.
void readSequences(std::istream &input, SequenceContainerInterface &sc) const override
virtual void write(std::ostream &os, const DataSet &data_set) const =0
Write a DataSet on ostream.
virtual const std::string & getId() const =0
Get the identitier of the allele.
The AnalyzedLoci class.
Definition: AnalyzedLoci.h:31
void setLocusInfo(size_t locusPosition, const LocusInfo &locus)
Set a LocusInfo.
The BasicAlleleInfo class.
The DataSet class.
Definition: DataSet.h:37
const Group & getGroupAtPosition(size_t groupPosition) const
Get a group by position.
Definition: DataSet.cpp:233
size_t getNumberOfGroups() const
Get the number of Groups.
Definition: DataSet.cpp:251
std::string getAlphabetType() const
Get the alphabet type as a string.
Definition: DataSet.h:585
bool hasAlleleicData() const
Tell if there is alelelic data.
Definition: DataSet.h:711
void addAlleleInfoByLocusPosition(size_t locus_position, const AlleleInfo &allele)
Add an AlleleInfo to a LocusInfo.
Definition: DataSet.cpp:1070
std::shared_ptr< const Alphabet > getAlphabet() const
Get a pointer toward the alphabet if there is sequence data.
Definition: DataSet.h:561
const Individual & getIndividualAtPositionFromGroup(size_t groupPosition, size_t individualPosition) const
Get an Individual from a Group.
Definition: DataSet.cpp:415
void setAnalyzedLoci(const AnalyzedLoci &analyzedLoci)
Set the AnalyzedLoci to the DataSet.
Definition: DataSet.h:599
size_t getGroupPosition(size_t group_id) const
Get the position of a Group.
Definition: DataSet.cpp:221
size_t getNumberOfIndividualsInGroup(size_t groupPosition) const
Get the number of Individuals in a Group.
Definition: DataSet.cpp:390
bool hasSequenceData() const
Tell if at least one individual has at least one sequence.
Definition: DataSet.h:706
void setAlphabet(std::shared_ptr< const Alphabet > alpha)
Set the alphabet of the AnalyzedSequences.
Definition: DataSet.h:549
const Locality< double > & localityAtPosition(size_t localityPosition) const
Get a Locality by localityPosition.
Definition: DataSet.cpp:93
size_t getNumberOfLocalities() const
Get the number of Localities.
Definition: DataSet.h:138
std::shared_ptr< const Locality< double > > getLocalityByName(const std::string &name) const
Get a Locality by name.
Definition: DataSet.cpp:102
const LocusInfo & getLocusInfoAtPosition(size_t locus_position) const
Get a LocusInfo by its position.
Definition: DataSet.cpp:1030
size_t getNumberOfLoci() const
Get the number of loci.
Definition: DataSet.cpp:1090
void addIndividualToGroup(size_t groupPosition, const Individual &individual)
Add an Individual to a Group.
Definition: DataSet.cpp:356
bool hasLocality() const
Tell if there is at least one locality.
Definition: DataSet.h:143
void addEmptyGroup(size_t group_id)
Add an empty Group to the DataSet.
Definition: DataSet.cpp:166
void addLocality(const Locality< double > &locality)
Add a locality to the DataSet.
Definition: DataSet.cpp:60
The Date class.
Definition: Date.h:21
void writeSequences(std::ostream &output, const SequenceContainerInterface &sc) const override
static std::string getNextLine(std::istream &in)
The Individual class.
Definition: Individual.h:40
void addSequence(size_t sequenceKey, std::unique_ptr< Sequence > &sequence)
Add a sequence to the Individual.
Definition: Individual.cpp:254
const std::string & getId() const
Get the id of the Individual.
Definition: Individual.h:110
void setCoord(const Point2D< double > &coord)
Set the coordinates of the Individual.
Definition: Individual.cpp:162
void setId(const std::string &id)
Set the id of the Individual.
Definition: Individual.cpp:121
void setLocality(std::shared_ptr< const Locality< double >> locality)
Set the locality of the Individual.
Definition: Individual.h:215
void setDate(const Date &date)
Set the date of the Individual.
Definition: Individual.cpp:137
void initGenotype(size_t lociNumber)
Init the genotype.
Definition: Individual.cpp:425
const SequenceContainerInterface & sequences() const
Get a reference to the sequence container.
Definition: Individual.h:372
void setMonolocusGenotypeByAlleleId(size_t locusPosition, const std::vector< std::string > alleleId, const LocusInfo &locusInfo)
Set a MonolocusGenotype.
Definition: Individual.cpp:502
The Locality class.
Definition: Locality.h:25
const std::string & getName() const
Get the name of the locality.
Definition: Locality.h:90
void setName(const std::string &name)
Set the name of the locality.
Definition: Locality.h:85
The LocusInfo class.
Definition: LocusInfo.h:31
static unsigned int UNKNOWN
Definition: LocusInfo.h:41
unsigned int getPloidy() const
Get the ploidy of the locus.
Definition: LocusInfo.h:101
size_t getNumberOfAlleles() const
Get the number of alleles at this locus.
Definition: LocusInfo.h:134
static unsigned int DIPLOID
Definition: LocusInfo.h:40
const AlleleInfo & getAlleleInfoByKey(size_t key) const
Retrieve an AlleleInfo object of the LocusInfo.
Definition: LocusInfo.cpp:42
static unsigned int HAPLOID
Definition: LocusInfo.h:39
static unsigned int HAPLODIPLOID
Definition: LocusInfo.h:38
const std::string & getName() const
Get the name of the locus.
Definition: LocusInfo.h:94
virtual std::vector< size_t > getAlleleIndex() const =0
Get the alleles' index.
The MultilocusGenotype class.
const MonolocusGenotypeInterface & monolocusGenotype(size_t locusPosition) const
Get a MonolocusGenotype.
bool isMonolocusGenotypeMissing(size_t locusPosition) const
Tell if a MonolocusGenotype is a missing data.
size_t size() const
Count the number of loci.
void setY(const T y)
void setX(const T x)
static const std::string UNKNOWN
Definition: PopgenlibIO.h:42
void parseIndividual_(const std::vector< std::string > &in, DataSet &data_set, const VectorSequenceContainer &vsc)
static const std::string HAPLODIPLOID
Definition: PopgenlibIO.h:41
void parseLocality_(const std::vector< std::string > &in, DataSet &data_set)
static const std::string COMA
Definition: PopgenlibIO.h:36
void write(std::ostream &os, const DataSet &data_set) const
Write a DataSet on ostream.
char missing_data_symbol_
Definition: PopgenlibIO.h:46
static const std::string TAB
Definition: PopgenlibIO.h:35
static const std::string SEMICOLON
Definition: PopgenlibIO.h:37
void read(std::istream &is, DataSet &data_set)
Read a DataSet on istream.
Definition: PopgenlibIO.cpp:99
std::string getDataSeparator() const
Get the code for data separator.
Definition: PopgenlibIO.cpp:77
static const std::string DIPLOID
Definition: PopgenlibIO.h:39
std::string getMissingDataSymbol() const
Get the code for missing data.
Definition: PopgenlibIO.cpp:72
static const std::string WHITESPACE
Definition: PopgenlibIO.h:34
std::vector< std::string > getValues_(std::string &param_line, const std::string &delim)
char getMissingDataChar() const
Get the character for missing data.
Definition: PopgenlibIO.cpp:89
void parseLoci_(const std::vector< std::string > &in, std::vector< LocusInfo > &locus_info)
void setDataSeparator(const std::string &data_separator)
Set the code for data separator.
Definition: PopgenlibIO.cpp:52
static const std::string HAPLOID
Definition: PopgenlibIO.h:40
void parseGeneral_(const std::vector< std::string > &in, DataSet &data_set)
void setMissingDataSymbol(const std::string &missing_data_symbol)
Set the code for missing data.
Definition: PopgenlibIO.cpp:41
void parseSequence_(const std::vector< std::string > &in, VectorSequenceContainer &vsc)
char getDataSeparatorChar() const
Get the data separator char.
Definition: PopgenlibIO.cpp:94
const SequenceType & sequence(const std::string &sequenceKey) const override
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
std::string removeSurroundingWhiteSpaces(const std::string &s)
std::string toUpper(const std::string &s)
bool isWhiteSpaceCharacter(char c)
std::string toString(T t)
TemplateVectorSequenceContainer< Sequence > VectorSequenceContainer