56 missing_data_symbol_(
'$') {}
59 const std::string& data_separator) :
61 missing_data_symbol_(
'$')
78 if (missing_data_symbol.size() != 1 || isdigit(missing_data_symbol[0])
82 throw Exception(
"PopgenlibIO::setMissingData: not expected value for missing_data_symbol.");
91 else if (data_separator ==
TAB)
93 else if (data_separator ==
COMA)
99 if (isdigit(data_separator[0])
102 throw Exception(
"PopgenlibIO::setDataSeparator: not expected value for data_separator.");
117 case (
'\t'):
return TAB;
118 case (
','):
return COMA;
137 throw IOException(
"PopgenlibIO::read: fail to open stream.");
139 vector<string> temp_v;
143 vector<LocusInfo> tmp_locinf;
145 bool section1 =
true;
146 bool section2 =
true;
147 bool section3 =
true;
148 bool section4 =
true;
149 bool section5 =
true;
150 size_t current_section = 0;
151 size_t previous_section = 0;
159 if (temp.find(
"[General]", 0) != string::npos)
161 previous_section = current_section;
165 else if (temp.find(
"[Localities]", 0) != string::npos)
167 previous_section = current_section;
171 else if (temp.find(
"[Sequences]", 0) != string::npos)
173 previous_section = current_section;
177 else if (temp.find(
"[Loci]", 0) != string::npos)
179 previous_section = current_section;
183 else if (temp.find(
"[Individuals]", 0) != string::npos)
185 previous_section = current_section;
190 if (current_section == 1 && previous_section < 1)
192 temp_v.push_back(temp);
194 if (section1 && current_section != 1 && previous_section == 1)
204 if (current_section == 2 && previous_section < 2)
206 if (temp.find(
">", 0) != string::npos)
210 temp_v.push_back(temp);
213 temp_v.push_back(temp);
215 if (section2 && current_section != 2 && previous_section == 2)
223 if (current_section == 3 && previous_section < 3)
225 if (temp.find(
">", 0) != string::npos)
229 temp_v.push_back(temp);
232 temp_v.push_back(temp);
234 if (section3 && current_section != 3 && previous_section == 3)
242 if (current_section == 4 && previous_section < 4)
244 if (temp.find(
">", 0) != string::npos)
248 temp_v.push_back(temp);
251 temp_v.push_back(temp);
253 if (section4 && current_section != 4 && previous_section == 4)
259 for (
size_t i = 0; i < tmp_locinf.size(); i++)
267 if (current_section == 5 && previous_section < 5)
269 if (temp.find(
">", 0) != string::npos)
273 temp_v.push_back(temp);
276 temp_v.push_back(temp);
278 if (section5 && current_section != 5 && previous_section == 5)
286 if (section2 && current_section == 2)
288 if (section3 && current_section == 3)
290 if (section5 && current_section == 5)
298 for (
size_t i = 0; i < in.size(); i++)
303 while (!is.eof() && in.size() != 0)
306 if (temp.find(
"MissingData", 0) != string::npos)
308 if (temp.find(
"DataSeparator", 0) != string::npos)
310 if (temp.find(
"SequenceType", 0) != string::npos)
318 for (
size_t i = 0; i < in.size(); i++)
324 while (!is.eof() && in.size() != 0)
328 if (temp.find(
">", 0) != string::npos)
332 if (temp.find(
"Coord", 0) != string::npos)
339 if (tmp_locality.
getName() !=
"")
347 for (
size_t i = 0; i < in.size(); i++)
357 for (
size_t i = 0; i < in.size(); i++)
361 string locinf_name =
"";
367 if (temp.find(
">", 0) != string::npos)
371 if (temp.find(
"Ploidy", 0) != string::npos)
379 else if (tmp_str_ploidy ==
HAPLOID)
383 else if (tmp_str_ploidy ==
UNKNOWN)
386 if (temp.find(
"NbAlleles", 0) != string::npos)
391 if (locinf_name !=
"")
392 locus_info.push_back(
LocusInfo(locinf_name, locinf_ploidy));
398 size_t tmp_group_pos = 0;
400 for (
size_t i = 0; i < in.size(); i++)
403 if (in[i].find(
">", 0) != string::npos)
408 if (in[i].find(
"Group", 0) != string::npos)
411 tmp_group_pos = TextTools::to<size_t>(
getValues_(temp,
"=")[0]);
420 if (in[i].find(
"Locality", 0) != string::npos)
423 size_t sep_pos = temp.find(
"=", 0);
433 if (in[i].find(
"Coord", 0) != string::npos)
439 if (in[i].find(
"Date", 0) != string::npos)
450 if (in[i].find(
"SequenceData", 0) != string::npos)
454 vector<string> seq_pos_str =
getValues_(temp,
"");
455 for (
size_t j = 0; j < seq_pos_str.size(); j++)
467 if (in[i].find(
"AllelicData", 0) != string::npos)
469 string temp1 = in[++i];
470 string temp2 = in[++i];
471 vector<string> allele_pos_str1 =
getValues_(temp1,
"");
472 vector<string> allele_pos_str2 =
getValues_(temp2,
"");
479 if (allele_pos_str1.size() == allele_pos_str2.size())
481 for (
size_t j = 0; j < allele_pos_str1.size(); j++)
485 vector<string> tmp_alleles_id;
495 tmp_alleles_id.push_back(allele_pos_str1[j]);
507 tmp_alleles_id.push_back(allele_pos_str2[j]);
519 if (tmp_indiv.
getId() !=
"")
549 os <<
"[General]" << endl;
555 os <<
"SequenceType = " << seq_type << endl;
560 os << endl <<
"[Localities]" << endl;
573 os << endl <<
"[Sequences]" << endl;
586 os << endl <<
"[Loci]" << endl;
590 os <<
">" << tmp_locus_info.
getName() << endl;
606 os << endl <<
"[Individuals]" << endl;
614 os <<
">" << tmp_ind->
getId() << endl;
619 os <<
"Coord = " << tmp_ind->
getX() <<
" " << tmp_ind->
getY() << endl;
625 os <<
"SequenceData = {" << endl;
626 for (
size_t k = 0; k < nbss; k++)
647 vector<vector<string> > output(tmp_genotype.
size());
648 os <<
"AllelicData = {" << endl;
649 for (
size_t k = 0; k < tmp_genotype.
size(); k++)
661 if (tmp_all_ind.size() > 1)
667 for (
size_t k = 0; k < output.size(); k++)
670 if (k < output.size() - 1)
675 for (
size_t k = 0; k < output.size(); k++)
678 if (k < output.size() - 1)
696 vector<string> values;
697 size_t limit = param_line.find(delim, 0);
698 if (limit != string::npos)
699 param_line = string(param_line.begin() +
static_cast<ptrdiff_t
>(limit + delim.size()), param_line.end());
706 values.push_back(
string(param_line.begin() +
static_cast<ptrdiff_t
>(bi), param_line.begin() +
static_cast<ptrdiff_t
>(bs)));
710 values.push_back(
string(param_line.begin() +
static_cast<ptrdiff_t
>(bi), param_line.end()));
virtual void read(std::istream &is, DataSet &data_set)=0
Read a DataSet on istream.
virtual void readSequences(std::istream &input, SequenceContainer &sc) const
virtual void write(std::ostream &os, const DataSet &data_set) const =0
Write a DataSet on ostream.
virtual const std::string & getId() const =0
Get the identitier of the allele.
void setLocusInfo(size_t locus_position, const LocusInfo &locus)
Set a LocusInfo.
The BasicAlleleInfo class.
std::string getAlphabetType() const
Get the alphabet type as a string.
void setAnalyzedLoci(const AnalyzedLoci &analyzedLoci)
Set the AnalyzedLoci to the DataSet.
bool hasAlleleicData() const
Tell if there is alelelic data.
size_t getNumberOfGroups() const
Get the number of Groups.
size_t getNumberOfLocalities() const
Get the number of Localities.
const Group & getGroupAtPosition(size_t group_position) const
Get a group by position.
void addAlleleInfoByLocusPosition(size_t locus_position, const AlleleInfo &allele)
Add an AlleleInfo to a LocusInfo.
bool hasLocality() const
Tell if there is at least one locality.
size_t getNumberOfIndividualsInGroup(size_t group_position) const
Get the number of Individuals in a Group.
void setAlphabet(const Alphabet *alpha)
Set the alphabet of the AnalyzedSequences.
bool hasSequenceData() const
Tell if at least one individual has at least one sequence.
size_t getGroupPosition(size_t group_id) const
Get the position of a Group.
const Alphabet * getAlphabet() const
Get the alphabet if there is sequence data.
const Locality< double > & getLocalityAtPosition(size_t locality_position) const
Get a Locality by locality_position.
const LocusInfo & getLocusInfoAtPosition(size_t locus_position) const
Get a LocusInfo by its position.
size_t getNumberOfLoci() const
Get the number of loci.
void addIndividualToGroup(size_t group_position, const Individual &individual)
Add an Individual to a Group.
void addEmptyGroup(size_t group_id)
Add an empty Group to the DataSet.
const Individual * getIndividualAtPositionFromGroup(size_t group_position, size_t individual_position) const
Get an Individual from a Group.
const Locality< double > & getLocalityByName(const std::string &name) const
Get a Locality by name.
void addLocality(Locality< double > &locality)
Add a locality to the DataSet.
std::string getDateStr() const
Get the Date as a string.
void writeSequences(std::ostream &output, const SequenceContainer &sc) const
double getY() const
Get the Y coordinate of the Individual.
bool hasCoord() const
Tell if this Individual has coordinates.
const Sequence & getSequenceAtPosition(const size_t sequence_position) const
Get a sequence by its position.
bool hasSequences() const
Tell if the Individual has some sequences.
void addSequence(size_t sequence_key, const Sequence &sequence)
Add a sequence to the Individual.
void setMonolocusGenotypeByAlleleId(size_t locus_position, const std::vector< std::string > allele_id, const LocusInfo &locus_info)
Set a MonolocusGenotype.
void initGenotype(size_t loci_number)
Init the genotype.
const std::string & getId() const
Get the id of the Individual.
void setCoord(const Point2D< double > &coord)
Set the coodinates of the Individual.
void setId(const std::string &id)
Set the id of the Individual.
bool hasGenotype() const
Tell if the Individual has a MultilocusGenotype.
void setDate(const Date &date)
Set the date of the Individual.
const MultilocusGenotype & getGenotype() const
Get the genotype.
const OrderedSequenceContainer & getSequences() const
Get a reference to the sequence container.
const Locality< double > * getLocality() const
Get the locality of the Individual.
void setLocality(const Locality< double > *locality)
Set the locality of the Individual.
bool hasDate() const
Tell if this Individual has a date.
double getX() const
Get the X coordinate of the Individual.
const Date & getDate() const
Get the date of the Individual.
size_t getNumberOfSequences() const
Get the number of sequences.
bool hasLocality() const
Tell if this Individual has a locality.
const std::string & getName() const
Get the name of the locality.
void setName(const std::string &name)
Set the name of the locality.
static unsigned int UNKNOWN
unsigned int getPloidy() const
Get the ploidy of the locus.
size_t getNumberOfAlleles() const
Get the number of alleles at this locus.
static unsigned int DIPLOID
const AlleleInfo & getAlleleInfoByKey(size_t key) const
Retrieve an AlleleInfo object of the LocusInfo.
static unsigned int HAPLOID
static unsigned int HAPLODIPLOID
const std::string & getName() const
Get the name of the locus.
virtual std::vector< size_t > getAlleleIndex() const =0
Get the alleles' index.
The MultilocusGenotype class.
const MonolocusGenotype & getMonolocusGenotype(size_t locus_position) const
Get a MonolocusGenotype.
bool isMonolocusGenotypeMissing(size_t locus_position) const
Tell if a MonolocusGenotype is a missing data.
size_t size() const
Count the number of loci.
static const std::string UNKNOWN
void parseIndividual_(const std::vector< std::string > &in, DataSet &data_set, const VectorSequenceContainer &vsc)
static const std::string HAPLODIPLOID
void parseLocality_(const std::vector< std::string > &in, DataSet &data_set)
static const std::string COMA
void write(std::ostream &os, const DataSet &data_set) const
Write a DataSet on ostream.
char missing_data_symbol_
static const std::string TAB
static const std::string SEMICOLON
void read(std::istream &is, DataSet &data_set)
Read a DataSet on istream.
std::string getDataSeparator() const
Get the code for data separator.
static const std::string DIPLOID
std::string getMissingDataSymbol() const
Get the code for missing data.
static const std::string WHITESPACE
std::vector< std::string > getValues_(std::string ¶m_line, const std::string &delim)
char getMissingDataChar() const
Get the character for missing data.
void parseLoci_(const std::vector< std::string > &in, std::vector< LocusInfo > &locus_info)
void setDataSeparator(const std::string &data_separator)
Set the code for data separator.
static const std::string HAPLOID
void parseGeneral_(const std::vector< std::string > &in, DataSet &data_set)
void setMissingDataSymbol(const std::string &missing_data_symbol)
Set the code for missing data.
void parseSequence_(const std::vector< std::string > &in, VectorSequenceContainer &vsc)
char getDataSeparatorChar() const
Get the data separator char.
const Sequence & getSequence(const std::string &name) const
int toInt(const std::string &s, char scientificNotation='e')
double toDouble(const std::string &s, char dec='.', char scientificNotation='e')
std::string removeSurroundingWhiteSpaces(const std::string &s)
std::string toUpper(const std::string &s)
bool isWhiteSpaceCharacter(char c)
std::string toString(T t)