bpp-popgen  3.0.0
PopgenlibIO.cpp
Go to the documentation of this file.
1 //
2 // File PopgenlibIO.cpp
3 // Created by: Sylvain Gaillard
4 // Created on: Thursday July 29 2004
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for population genetics analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
40 #include "PopgenlibIO.h"
41 
42 using namespace bpp;
43 using namespace std;
44 
45 const string PopgenlibIO::WHITESPACE = string("WHITESPACE");
46 const string PopgenlibIO::TAB = string("TAB");
47 const string PopgenlibIO::COMA = string("COMA");
48 const string PopgenlibIO::SEMICOLON = string("SEMICOLON");
49 
50 const string PopgenlibIO::DIPLOID = string("DIPLOID");
51 const string PopgenlibIO::HAPLOID = string("HAPLOID");
52 const string PopgenlibIO::HAPLODIPLOID = string("HAPLODIPLOID");
53 const string PopgenlibIO::UNKNOWN = string("UNKNOWN");
54 
55 PopgenlibIO::PopgenlibIO() : data_separator_(' '),
56  missing_data_symbol_('$') {}
57 
58 PopgenlibIO::PopgenlibIO(const std::string& missing_data_symbol,
59  const std::string& data_separator) :
60  data_separator_(' '),
61  missing_data_symbol_('$')
62 {
63  try
64  {
65  setDataSeparator(data_separator);
66  setMissingDataSymbol(missing_data_symbol);
67  }
68  catch (Exception& e)
69  {
70  throw e;
71  }
72 }
73 
75 
76 void PopgenlibIO::setMissingDataSymbol(const std::string& missing_data_symbol)
77 {
78  if (missing_data_symbol.size() != 1 || isdigit(missing_data_symbol[0])
79  || TextTools::isWhiteSpaceCharacter(missing_data_symbol[0])
80  || missing_data_symbol[0] == data_separator_
81  )
82  throw Exception("PopgenlibIO::setMissingData: not expected value for missing_data_symbol.");
83 
84  missing_data_symbol_ = missing_data_symbol[0];
85 }
86 
87 void PopgenlibIO::setDataSeparator(const std::string& data_separator)
88 {
89  if (data_separator == WHITESPACE)
90  data_separator_ = ' ';
91  else if (data_separator == TAB)
92  data_separator_ = '\t';
93  else if (data_separator == COMA)
94  data_separator_ = ',';
95  else if (data_separator == SEMICOLON)
96  data_separator_ = ';';
97  else
98  {
99  if (isdigit(data_separator[0])
100  || data_separator == getMissingDataSymbol()
101  )
102  throw Exception("PopgenlibIO::setDataSeparator: not expected value for data_separator.");
103  data_separator_ = data_separator.c_str()[0];
104  }
105 }
106 
108 {
110 }
111 
112 std::string PopgenlibIO::getDataSeparator() const
113 {
114  switch (data_separator_)
115  {
116  case (' '): return WHITESPACE;
117  case ('\t'): return TAB;
118  case (','): return COMA;
119  case (';'): return SEMICOLON;
120  default: return TextTools::toString(data_separator_);
121  }
122 }
123 
125 {
126  return missing_data_symbol_;
127 }
128 
130 {
131  return data_separator_;
132 }
133 
134 void PopgenlibIO::read(std::istream& is, DataSet& data_set)
135 {
136  if (!is)
137  throw IOException("PopgenlibIO::read: fail to open stream.");
138  string temp = "";
139  vector<string> temp_v;
140  stringstream tmp_ss;
141  VectorSequenceContainer* tmp_vsc = NULL;
142  Locality<double> tmp_locality("tmp");
143  vector<LocusInfo> tmp_locinf;
144  Individual tmp_indiv;
145  bool section1 = true;
146  bool section2 = true;
147  bool section3 = true;
148  bool section4 = true;
149  bool section5 = true;
150  size_t current_section = 0;
151  size_t previous_section = 0;
152  size_t linenum = 0;
153  // Main loop for all file lines
154  while (!is.eof())
155  {
156  temp = FileTools::getNextLine(is);
157  linenum++;
158  // Get the correct current section
159  if (temp.find("[General]", 0) != string::npos)
160  {
161  previous_section = current_section;
162  current_section = 1;
163  continue;
164  }
165  else if (temp.find("[Localities]", 0) != string::npos)
166  {
167  previous_section = current_section;
168  current_section = 2;
169  continue;
170  }
171  else if (temp.find("[Sequences]", 0) != string::npos)
172  {
173  previous_section = current_section;
174  current_section = 3;
175  continue;
176  }
177  else if (temp.find("[Loci]", 0) != string::npos)
178  {
179  previous_section = current_section;
180  current_section = 4;
181  continue;
182  }
183  else if (temp.find("[Individuals]", 0) != string::npos)
184  {
185  previous_section = current_section;
186  current_section = 5;
187  continue;
188  }
189  // General section ------------------------------------
190  if (current_section == 1 && previous_section < 1)
191  {
192  temp_v.push_back(temp);
193  }
194  if (section1 && current_section != 1 && previous_section == 1)
195  {
196  section1 = false;
197  parseGeneral_(temp_v, data_set);
198  temp_v.clear();
199  if (data_set.hasSequenceData() && tmp_vsc == NULL)
200  tmp_vsc = new VectorSequenceContainer(data_set.getAlphabet());
201  }
202 
203  // Localities section ---------------------------------
204  if (current_section == 2 && previous_section < 2)
205  {
206  if (temp.find(">", 0) != string::npos)
207  {
208  parseLocality_(temp_v, data_set);
209  temp_v.clear();
210  temp_v.push_back(temp);
211  }
212  else
213  temp_v.push_back(temp);
214  }
215  if (section2 && current_section != 2 && previous_section == 2)
216  {
217  section2 = false;
218  parseLocality_(temp_v, data_set);
219  temp_v.clear();
220  }
221 
222  // Sequences section ----------------------------------
223  if (current_section == 3 && previous_section < 3)
224  {
225  if (temp.find(">", 0) != string::npos)
226  {
227  parseSequence_(temp_v, *tmp_vsc);
228  temp_v.clear();
229  temp_v.push_back(temp);
230  }
231  else
232  temp_v.push_back(temp);
233  }
234  if (section3 && current_section != 3 && previous_section == 3)
235  {
236  section3 = false;
237  parseSequence_(temp_v, *tmp_vsc);
238  temp_v.clear();
239  }
240 
241  // Loci section ---------------------------------------
242  if (current_section == 4 && previous_section < 4)
243  {
244  if (temp.find(">", 0) != string::npos)
245  {
246  parseLoci_(temp_v, tmp_locinf);
247  temp_v.clear();
248  temp_v.push_back(temp);
249  }
250  else
251  temp_v.push_back(temp);
252  }
253  if (section4 && current_section != 4 && previous_section == 4)
254  {
255  section4 = false;
256  parseLoci_(temp_v, tmp_locinf);
257  temp_v.clear();
258  AnalyzedLoci tmp_anloc(tmp_locinf.size());
259  for (size_t i = 0; i < tmp_locinf.size(); i++)
260  {
261  tmp_anloc.setLocusInfo(i, tmp_locinf[i]);
262  }
263  data_set.setAnalyzedLoci(tmp_anloc);
264  }
265 
266  // Individuals section --------------------------------
267  if (current_section == 5 && previous_section < 5)
268  {
269  if (temp.find(">", 0) != string::npos)
270  {
271  parseIndividual_(temp_v, data_set, *tmp_vsc);
272  temp_v.clear();
273  temp_v.push_back(temp);
274  }
275  else
276  temp_v.push_back(temp);
277  }
278  if (section5 && current_section != 5 && previous_section == 5)
279  {
280  section5 = false;
281  parseIndividual_(temp_v, data_set, *tmp_vsc);
282  temp_v.clear();
283  }
284  }
285  // Emptied the buffer if eof.
286  if (section2 && current_section == 2)
287  parseLocality_(temp_v, data_set);
288  if (section3 && current_section == 3)
289  parseSequence_(temp_v, *tmp_vsc);
290  if (section5 && current_section == 5)
291  parseIndividual_(temp_v, data_set, *tmp_vsc);
292  temp_v.clear();
293 }
294 
295 void PopgenlibIO::parseGeneral_(const std::vector<std::string>& in, DataSet& data_set)
296 {
297  stringstream is;
298  for (size_t i = 0; i < in.size(); i++)
299  {
300  is << in[i] << endl;
301  }
302  string temp;
303  while (!is.eof() && in.size() != 0)
304  {
305  temp = FileTools::getNextLine(is);
306  if (temp.find("MissingData", 0) != string::npos)
307  setMissingDataSymbol(getValues_(temp, "=")[0]);
308  if (temp.find("DataSeparator", 0) != string::npos)
309  setDataSeparator(getValues_(temp, "=")[0]);
310  if (temp.find("SequenceType", 0) != string::npos)
311  data_set.setAlphabet(getValues_(temp, "=")[0]);
312  }
313 }
314 
315 void PopgenlibIO::parseLocality_(const std::vector<std::string>& in, DataSet& data_set)
316 {
317  stringstream is;
318  for (size_t i = 0; i < in.size(); i++)
319  {
320  is << in[i] << endl;
321  }
322  Locality<double> tmp_locality("");
323  string temp;
324  while (!is.eof() && in.size() != 0)
325  {
326  temp = FileTools::getNextLine(is);
327  // cout << "_parseLocality: " << temp << endl;
328  if (temp.find(">", 0) != string::npos)
329  {
330  tmp_locality.setName(TextTools::removeSurroundingWhiteSpaces(string(temp.begin() + 1, temp.end())));
331  }
332  if (temp.find("Coord", 0) != string::npos)
333  {
334  vector<string> v = getValues_(temp, "=");
335  tmp_locality.setX(TextTools::toDouble(v[0]));
336  tmp_locality.setY(TextTools::toDouble(v[1]));
337  }
338  }
339  if (tmp_locality.getName() != "")
340  data_set.addLocality(tmp_locality);
341 }
342 
343 void PopgenlibIO::parseSequence_(const std::vector<std::string>& in, VectorSequenceContainer& vsc)
344 {
345  Fasta ifasta;
346  stringstream is;
347  for (size_t i = 0; i < in.size(); i++)
348  {
349  is << in[i] << endl;
350  }
351  ifasta.readSequences(is, vsc);
352 }
353 
354 void PopgenlibIO::parseLoci_(const std::vector<std::string>& in, std::vector<LocusInfo>& locus_info)
355 {
356  stringstream is;
357  for (size_t i = 0; i < in.size(); i++)
358  {
359  is << in[i] << endl;
360  }
361  string locinf_name = "";
362  unsigned int locinf_ploidy = LocusInfo::DIPLOID;
363  string temp;
364  while (!is.eof())
365  {
366  temp = FileTools::getNextLine(is);
367  if (temp.find(">", 0) != string::npos)
368  {
369  locinf_name = TextTools::removeSurroundingWhiteSpaces(string(temp.begin() + 1, temp.end()));
370  }
371  if (temp.find("Ploidy", 0) != string::npos)
372  {
373  vector<string> v = getValues_(temp, "=");
374  string tmp_str_ploidy = TextTools::removeSurroundingWhiteSpaces(v[0]);
375  tmp_str_ploidy = TextTools::toUpper(tmp_str_ploidy);
376  // cout << "ploidy : " << tmp_str_ploidy << endl;
377  if (tmp_str_ploidy == DIPLOID)
378  locinf_ploidy = LocusInfo::DIPLOID;
379  else if (tmp_str_ploidy == HAPLOID)
380  locinf_ploidy = LocusInfo::HAPLOID;
381  else if (tmp_str_ploidy == HAPLODIPLOID)
382  locinf_ploidy = LocusInfo::HAPLODIPLOID;
383  else if (tmp_str_ploidy == UNKNOWN)
384  locinf_ploidy = LocusInfo::UNKNOWN;
385  }
386  if (temp.find("NbAlleles", 0) != string::npos)
387  {
388  // not used ...
389  }
390  }
391  if (locinf_name != "")
392  locus_info.push_back(LocusInfo(locinf_name, locinf_ploidy));
393 }
394 
395 void PopgenlibIO::parseIndividual_(const std::vector<std::string>& in, DataSet& data_set, const VectorSequenceContainer& vsc)
396 {
397  Individual tmp_indiv;
398  size_t tmp_group_pos = 0;
399  string temp = "";
400  for (size_t i = 0; i < in.size(); i++)
401  {
402  // Get Individual Id
403  if (in[i].find(">", 0) != string::npos)
404  {
405  tmp_indiv.setId(TextTools::removeSurroundingWhiteSpaces(string(in[i].begin() + 1, in[i].end())));
406  }
407  // Get the Group
408  if (in[i].find("Group", 0) != string::npos)
409  {
410  temp = in[i];
411  tmp_group_pos = TextTools::to<size_t>(getValues_(temp, "=")[0]);
412  try
413  {
414  data_set.addEmptyGroup(tmp_group_pos);
415  }
416  catch (...)
417  {}
418  }
419  // Find the locality
420  if (in[i].find("Locality", 0) != string::npos)
421  {
422  temp = in[i];
423  size_t sep_pos = temp.find("=", 0);
424  string loc_name = TextTools::removeSurroundingWhiteSpaces(string(temp.begin() + static_cast<ptrdiff_t>(sep_pos + 1), temp.end()));
425  try
426  {
427  tmp_indiv.setLocality(&data_set.getLocalityByName(loc_name));
428  }
429  catch (...)
430  {}
431  }
432  // Set the coord
433  if (in[i].find("Coord", 0) != string::npos)
434  {
435  temp = in[i];
436  tmp_indiv.setCoord(TextTools::toDouble(getValues_(temp, "=")[0]), TextTools::toDouble(getValues_(temp, "=")[1]));
437  }
438  // And the date
439  if (in[i].find("Date", 0) != string::npos)
440  {
441  int d, m, y;
442  temp = in[i];
443  string tmp_date = getValues_(temp, "=")[0];
444  d = TextTools::toInt(string(tmp_date.begin(), tmp_date.begin() + 2));
445  m = TextTools::toInt(string(tmp_date.begin() + 2, tmp_date.begin() + 4));
446  y = TextTools::toInt(string(tmp_date.begin() + 4, tmp_date.end()));
447  tmp_indiv.setDate(Date(d, m, y));
448  }
449  // Now the sequences
450  if (in[i].find("SequenceData", 0) != string::npos)
451  {
452  i++;
453  temp = in[i];
454  vector<string> seq_pos_str = getValues_(temp, "");
455  for (size_t j = 0; j < seq_pos_str.size(); j++)
456  {
457  try
458  {
459  if (seq_pos_str[j] != getMissingDataSymbol())
460  tmp_indiv.addSequence(j, vsc.getSequence(TextTools::to<size_t>(seq_pos_str[j]) - 1));
461  }
462  catch (...)
463  {}
464  }
465  }
466  // Finally the loci
467  if (in[i].find("AllelicData", 0) != string::npos)
468  {
469  string temp1 = in[++i];
470  string temp2 = in[++i];
471  vector<string> allele_pos_str1 = getValues_(temp1, "");
472  vector<string> allele_pos_str2 = getValues_(temp2, "");
473  try
474  {
475  tmp_indiv.initGenotype(data_set.getNumberOfLoci());
476  }
477  catch (...)
478  {}
479  if (allele_pos_str1.size() == allele_pos_str2.size())
480  {
481  for (size_t j = 0; j < allele_pos_str1.size(); j++)
482  {
483  const LocusInfo& locus_info = data_set.getLocusInfoAtPosition(j);
484  allele_pos_str1[j] = TextTools::removeSurroundingWhiteSpaces(allele_pos_str1[j]);
485  vector<string> tmp_alleles_id;
486  if (allele_pos_str1[j] != getMissingDataSymbol())
487  {
488  BasicAlleleInfo tmp_allele_info(allele_pos_str1[j]);
489  try
490  {
491  data_set.addAlleleInfoByLocusPosition(j, tmp_allele_info);
492  }
493  catch (...)
494  {}
495  tmp_alleles_id.push_back(allele_pos_str1[j]);
496  }
497  allele_pos_str2[j] = TextTools::removeSurroundingWhiteSpaces(allele_pos_str2[j]);
498  if (allele_pos_str2[j] != getMissingDataSymbol())
499  {
500  BasicAlleleInfo tmp_allele_info(allele_pos_str2[j]);
501  try
502  {
503  data_set.addAlleleInfoByLocusPosition(j, tmp_allele_info);
504  }
505  catch (...)
506  {}
507  tmp_alleles_id.push_back(allele_pos_str2[j]);
508  }
509  try
510  {
511  tmp_indiv.setMonolocusGenotypeByAlleleId(j, tmp_alleles_id, locus_info);
512  }
513  catch (...)
514  {}
515  }
516  }
517  }
518  }
519  if (tmp_indiv.getId() != "")
520  {
521  try
522  {
523  data_set.addIndividualToGroup(data_set.getGroupPosition(tmp_group_pos), tmp_indiv);
524  }
525  catch (...)
526  {}
527  }
528 }
529 
530 void PopgenlibIO::read(const std::string& path, DataSet& data_set)
531 {
532  AbstractIDataSet::read(path, data_set);
533 }
534 
535 DataSet* PopgenlibIO::read(std::istream& is)
536 {
537  return AbstractIDataSet::read(is);
538 }
539 
540 DataSet* PopgenlibIO::read(const std::string& path)
541 {
542  return AbstractIDataSet::read(path);
543 }
544 
545 void PopgenlibIO::write(std::ostream& os, const DataSet& data_set) const
546 {
547  size_t seqcpt = 1;
548  // General section --------------------------------------
549  os << "[General]" << endl;
550  os << "MissingData = " << getMissingDataSymbol() << endl;
551  os << "DataSeparator = " << getDataSeparator() << endl;
552  if (data_set.hasSequenceData())
553  {
554  string seq_type = data_set.getAlphabetType();
555  os << "SequenceType = " << seq_type << endl;
556  }
557  // Localities section -----------------------------------
558  if (data_set.hasLocality())
559  {
560  os << endl << "[Localities]" << endl;
561  for (size_t i = 0; i < data_set.getNumberOfLocalities(); i++)
562  {
563  os << ">" << (data_set.getLocalityAtPosition(i)).getName() << endl;
564  os << "Coord = " << (data_set.getLocalityAtPosition(i)).getX();
565  os << " " << (data_set.getLocalityAtPosition(i)).getY() << endl;
566  }
567  }
568 
569  // Sequences section ------------------------------------
570  if (data_set.hasSequenceData())
571  {
572  Fasta fasta(80);
573  os << endl << "[Sequences]" << endl;
574  for (size_t i = 0; i < data_set.getNumberOfGroups(); i++)
575  {
576  for (size_t j = 0; j < data_set.getNumberOfIndividualsInGroup(i); j++)
577  {
578  fasta.writeSequences(os, data_set.getIndividualAtPositionFromGroup(i, j)->getSequences());
579  }
580  }
581  }
582 
583  // AllelicData section ----------------------------------
584  if (data_set.hasAlleleicData())
585  {
586  os << endl << "[Loci]" << endl;
587  for (size_t i = 0; i < data_set.getNumberOfLoci(); i++)
588  {
589  const LocusInfo& tmp_locus_info = data_set.getLocusInfoAtPosition(i);
590  os << ">" << tmp_locus_info.getName() << endl;
591  os << "Ploidy = ";
592  if (tmp_locus_info.getPloidy() == LocusInfo::HAPLOID)
593  os << HAPLOID;
594  else if (tmp_locus_info.getPloidy() == LocusInfo::DIPLOID)
595  os << DIPLOID;
596  else if (tmp_locus_info.getPloidy() == LocusInfo::HAPLODIPLOID)
597  os << HAPLODIPLOID;
598  else if (tmp_locus_info.getPloidy() == LocusInfo::UNKNOWN)
599  os << UNKNOWN;
600  os << endl;
601  os << "NbAlleles = " << tmp_locus_info.getNumberOfAlleles() << endl;
602  }
603  }
604 
605  // Individuals section ----------------------------------
606  os << endl << "[Individuals]" << endl;
607  for (size_t i = 0; i < data_set.getNumberOfGroups(); i++)
608  {
609  for (size_t j = 0; j < data_set.getNumberOfIndividualsInGroup(i); j++)
610  {
611  if (i > 0 || j > 0)
612  os << endl;
613  const Individual* tmp_ind = data_set.getIndividualAtPositionFromGroup(i, j);
614  os << ">" << tmp_ind->getId() << endl;
615  os << "Group = " << TextTools::toString((data_set.getGroupAtPosition(i)).getGroupId()) << endl;
616  if (tmp_ind->hasLocality())
617  os << "Locality = " << tmp_ind->getLocality()->getName() << endl;
618  if (tmp_ind->hasCoord())
619  os << "Coord = " << tmp_ind->getX() << " " << tmp_ind->getY() << endl;
620  if (tmp_ind->hasDate())
621  os << "Date = " << tmp_ind->getDate().getDateStr() << endl;
622  if (tmp_ind->hasSequences())
623  {
624  size_t nbss = tmp_ind->getNumberOfSequences();
625  os << "SequenceData = {" << endl;
626  for (size_t k = 0; k < nbss; k++)
627  {
628  try
629  {
630  tmp_ind->getSequenceAtPosition(k);
631  os << TextTools::toString(seqcpt++);
632  }
634  {
635  os << getMissingDataChar();
636  }
637  if (k < nbss - 1)
638  os << getDataSeparatorChar();
639  else
640  os << endl;
641  }
642  os << "}" << endl;
643  }
644  if (tmp_ind->hasGenotype())
645  {
646  const MultilocusGenotype& tmp_genotype = tmp_ind->getGenotype();
647  vector<vector<string> > output(tmp_genotype.size());
648  os << "AllelicData = {" << endl;
649  for (size_t k = 0; k < tmp_genotype.size(); k++)
650  {
651  output[k].resize(2);
652  if (tmp_genotype.isMonolocusGenotypeMissing(k))
653  {
654  output[k][0] = getMissingDataChar();
655  output[k][1] = getMissingDataChar();
656  }
657  else
658  {
659  vector<size_t> tmp_all_ind = tmp_genotype.getMonolocusGenotype(k).getAlleleIndex();
660  output[k][0] = data_set.getLocusInfoAtPosition(k).getAlleleInfoByKey(tmp_all_ind[0]).getId();
661  if (tmp_all_ind.size() > 1)
662  output[k][1] = data_set.getLocusInfoAtPosition(k).getAlleleInfoByKey(tmp_all_ind[1]).getId();
663  else
664  output[k][1] = getMissingDataChar();
665  }
666  }
667  for (size_t k = 0; k < output.size(); k++)
668  {
669  os << output[k][0];
670  if (k < output.size() - 1)
671  os << getDataSeparatorChar();
672  else
673  os << endl;
674  }
675  for (size_t k = 0; k < output.size(); k++)
676  {
677  os << output[k][1];
678  if (k < output.size() - 1)
679  os << getDataSeparatorChar();
680  else
681  os << endl;
682  }
683  os << "}" << endl;
684  }
685  }
686  }
687 }
688 
689 void PopgenlibIO::write(const std::string& path, const DataSet& data_set, bool overwrite) const
690 {
691  AbstractODataSet::write(path, data_set, overwrite);
692 }
693 
694 std::vector<std::string> PopgenlibIO::getValues_(std::string& param_line, const std::string& delim)
695 {
696  vector<string> values;
697  size_t limit = param_line.find(delim, 0);
698  if (limit != string::npos)
699  param_line = string(param_line.begin() + static_cast<ptrdiff_t>(limit + delim.size()), param_line.end());
700  param_line = TextTools::removeSurroundingWhiteSpaces(param_line);
701 
702  size_t bi = 0;
703  size_t bs = param_line.find(getDataSeparatorChar(), bi);
704  while (bs > 0)
705  {
706  values.push_back(string(param_line.begin() + static_cast<ptrdiff_t>(bi), param_line.begin() + static_cast<ptrdiff_t>(bs)));
707  bi = bs + 1;
708  bs = param_line.find(getDataSeparatorChar(), bi);
709  }
710  values.push_back(string(param_line.begin() + static_cast<ptrdiff_t>(bi), param_line.end()));
711  return values;
712 }
713 
virtual void read(std::istream &is, DataSet &data_set)=0
Read a DataSet on istream.
virtual void readSequences(std::istream &input, SequenceContainer &sc) const
virtual void write(std::ostream &os, const DataSet &data_set) const =0
Write a DataSet on ostream.
virtual const std::string & getId() const =0
Get the identitier of the allele.
The AnalyzedLoci class.
Definition: AnalyzedLoci.h:65
void setLocusInfo(size_t locus_position, const LocusInfo &locus)
Set a LocusInfo.
The BasicAlleleInfo class.
The DataSet class.
Definition: DataSet.h:73
std::string getAlphabetType() const
Get the alphabet type as a string.
Definition: DataSet.cpp:1055
void setAnalyzedLoci(const AnalyzedLoci &analyzedLoci)
Set the AnalyzedLoci to the DataSet.
Definition: DataSet.cpp:1066
bool hasAlleleicData() const
Tell if there is alelelic data.
Definition: DataSet.cpp:1356
size_t getNumberOfGroups() const
Get the number of Groups.
Definition: DataSet.cpp:300
size_t getNumberOfLocalities() const
Get the number of Localities.
Definition: DataSet.cpp:190
const Group & getGroupAtPosition(size_t group_position) const
Get a group by position.
Definition: DataSet.cpp:281
void addAlleleInfoByLocusPosition(size_t locus_position, const AlleleInfo &allele)
Add an AlleleInfo to a LocusInfo.
Definition: DataSet.cpp:1182
bool hasLocality() const
Tell if there is at least one locality.
Definition: DataSet.cpp:197
size_t getNumberOfIndividualsInGroup(size_t group_position) const
Get the number of Individuals in a Group.
Definition: DataSet.cpp:439
void setAlphabet(const Alphabet *alpha)
Set the alphabet of the AnalyzedSequences.
Definition: DataSet.cpp:1028
bool hasSequenceData() const
Tell if at least one individual has at least one sequence.
Definition: DataSet.cpp:1349
size_t getGroupPosition(size_t group_id) const
Get the position of a Group.
Definition: DataSet.cpp:269
const Alphabet * getAlphabet() const
Get the alphabet if there is sequence data.
Definition: DataSet.cpp:1046
const Locality< double > & getLocalityAtPosition(size_t locality_position) const
Get a Locality by locality_position.
Definition: DataSet.cpp:143
const LocusInfo & getLocusInfoAtPosition(size_t locus_position) const
Get a LocusInfo by its position.
Definition: DataSet.cpp:1142
size_t getNumberOfLoci() const
Get the number of loci.
Definition: DataSet.cpp:1202
void addIndividualToGroup(size_t group_position, const Individual &individual)
Add an Individual to a Group.
Definition: DataSet.cpp:405
void addEmptyGroup(size_t group_id)
Add an empty Group to the DataSet.
Definition: DataSet.cpp:217
const Individual * getIndividualAtPositionFromGroup(size_t group_position, size_t individual_position) const
Get an Individual from a Group.
Definition: DataSet.cpp:464
const Locality< double > & getLocalityByName(const std::string &name) const
Get a Locality by name.
Definition: DataSet.cpp:152
void addLocality(Locality< double > &locality)
Add a locality to the DataSet.
Definition: DataSet.cpp:119
The Date class.
Definition: Date.h:57
std::string getDateStr() const
Get the Date as a string.
Definition: Date.cpp:112
void writeSequences(std::ostream &output, const SequenceContainer &sc) const
static std::string getNextLine(std::istream &in)
The Individual class.
Definition: Individual.h:76
double getY() const
Get the Y coordinate of the Individual.
Definition: Individual.cpp:262
bool hasCoord() const
Tell if this Individual has coordinates.
Definition: Individual.cpp:225
const Sequence & getSequenceAtPosition(const size_t sequence_position) const
Get a sequence by its position.
Definition: Individual.cpp:338
bool hasSequences() const
Tell if the Individual has some sequences.
Definition: Individual.cpp:426
void addSequence(size_t sequence_key, const Sequence &sequence)
Add a sequence to the Individual.
Definition: Individual.cpp:298
void setMonolocusGenotypeByAlleleId(size_t locus_position, const std::vector< std::string > allele_id, const LocusInfo &locus_info)
Set a MonolocusGenotype.
Definition: Individual.cpp:567
void initGenotype(size_t loci_number)
Init the genotype.
Definition: Individual.cpp:492
const std::string & getId() const
Get the id of the Individual.
Definition: Individual.h:146
void setCoord(const Point2D< double > &coord)
Set the coodinates of the Individual.
Definition: Individual.cpp:201
void setId(const std::string &id)
Set the id of the Individual.
Definition: Individual.cpp:160
bool hasGenotype() const
Tell if the Individual has a MultilocusGenotype.
Definition: Individual.cpp:524
void setDate(const Date &date)
Set the date of the Individual.
Definition: Individual.cpp:176
const MultilocusGenotype & getGenotype() const
Get the genotype.
Definition: Individual.cpp:508
const OrderedSequenceContainer & getSequences() const
Get a reference to the sequence container.
Definition: Individual.cpp:474
const Locality< double > * getLocality() const
Get the locality of the Individual.
Definition: Individual.cpp:280
void setLocality(const Locality< double > *locality)
Set the locality of the Individual.
Definition: Individual.cpp:273
bool hasDate() const
Tell if this Individual has a date.
Definition: Individual.cpp:193
double getX() const
Get the X coordinate of the Individual.
Definition: Individual.cpp:252
const Date & getDate() const
Get the date of the Individual.
Definition: Individual.cpp:183
size_t getNumberOfSequences() const
Get the number of sequences.
Definition: Individual.cpp:458
bool hasLocality() const
Tell if this Individual has a locality.
Definition: Individual.cpp:290
const std::string & getName() const
Get the name of the locality.
Definition: Locality.h:125
void setName(const std::string &name)
Set the name of the locality.
Definition: Locality.h:120
The LocusInfo class.
Definition: LocusInfo.h:64
static unsigned int UNKNOWN
Definition: LocusInfo.h:74
unsigned int getPloidy() const
Get the ploidy of the locus.
Definition: LocusInfo.h:108
size_t getNumberOfAlleles() const
Get the number of alleles at this locus.
Definition: LocusInfo.cpp:121
static unsigned int DIPLOID
Definition: LocusInfo.h:73
const AlleleInfo & getAlleleInfoByKey(size_t key) const
Retrieve an AlleleInfo object of the LocusInfo.
Definition: LocusInfo.cpp:104
static unsigned int HAPLOID
Definition: LocusInfo.h:72
static unsigned int HAPLODIPLOID
Definition: LocusInfo.h:71
const std::string & getName() const
Get the name of the locus.
Definition: LocusInfo.h:101
virtual std::vector< size_t > getAlleleIndex() const =0
Get the alleles' index.
The MultilocusGenotype class.
const MonolocusGenotype & getMonolocusGenotype(size_t locus_position) const
Get a MonolocusGenotype.
bool isMonolocusGenotypeMissing(size_t locus_position) const
Tell if a MonolocusGenotype is a missing data.
size_t size() const
Count the number of loci.
void setY(const T y)
void setX(const T x)
static const std::string UNKNOWN
Definition: PopgenlibIO.h:77
void parseIndividual_(const std::vector< std::string > &in, DataSet &data_set, const VectorSequenceContainer &vsc)
static const std::string HAPLODIPLOID
Definition: PopgenlibIO.h:76
void parseLocality_(const std::vector< std::string > &in, DataSet &data_set)
static const std::string COMA
Definition: PopgenlibIO.h:71
void write(std::ostream &os, const DataSet &data_set) const
Write a DataSet on ostream.
char missing_data_symbol_
Definition: PopgenlibIO.h:81
static const std::string TAB
Definition: PopgenlibIO.h:70
static const std::string SEMICOLON
Definition: PopgenlibIO.h:72
void read(std::istream &is, DataSet &data_set)
Read a DataSet on istream.
std::string getDataSeparator() const
Get the code for data separator.
static const std::string DIPLOID
Definition: PopgenlibIO.h:74
std::string getMissingDataSymbol() const
Get the code for missing data.
static const std::string WHITESPACE
Definition: PopgenlibIO.h:69
std::vector< std::string > getValues_(std::string &param_line, const std::string &delim)
char getMissingDataChar() const
Get the character for missing data.
void parseLoci_(const std::vector< std::string > &in, std::vector< LocusInfo > &locus_info)
void setDataSeparator(const std::string &data_separator)
Set the code for data separator.
Definition: PopgenlibIO.cpp:87
static const std::string HAPLOID
Definition: PopgenlibIO.h:75
void parseGeneral_(const std::vector< std::string > &in, DataSet &data_set)
void setMissingDataSymbol(const std::string &missing_data_symbol)
Set the code for missing data.
Definition: PopgenlibIO.cpp:76
void parseSequence_(const std::vector< std::string > &in, VectorSequenceContainer &vsc)
char getDataSeparatorChar() const
Get the data separator char.
const Sequence & getSequence(const std::string &name) const
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
std::string removeSurroundingWhiteSpaces(const std::string &s)
std::string toUpper(const std::string &s)
bool isWhiteSpaceCharacter(char c)
std::string toString(T t)