bpp-popgen  3.0.0
DataSet.h
Go to the documentation of this file.
1 //
2 // File DataSet.h
3 // Author : Sylvain Gaillard
4 // Last modification : April 4, 2008
5 //
6 
7 /*
8  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
9 
10  This software is a computer program whose purpose is to provide classes
11  for population genetics analysis.
12 
13  This software is governed by the CeCILL license under French law and
14  abiding by the rules of distribution of free software. You can use,
15  modify and/ or redistribute the software under the terms of the CeCILL
16  license as circulated by CEA, CNRS and INRIA at the following URL
17  "http://www.cecill.info".
18 
19  As a counterpart to the access to the source code and rights to copy,
20  modify and redistribute granted by the license, users are provided only
21  with a limited warranty and the software's author, the holder of the
22  economic rights, and the successive licensors have only limited
23  liability.
24 
25  In this respect, the user's attention is drawn to the risks associated
26  with loading, using, modifying and/or developing or reproducing the
27  software by the user in light of its specific status of free software,
28  that may mean that it is complicated to manipulate, and that also
29  therefore means that it is reserved for developers and experienced
30  professionals having in-depth computer knowledge. Users are therefore
31  encouraged to load and test the software's suitability as regards their
32  requirements in conditions enabling the security of their systems and/or
33  data to be ensured and, more generally, to use and operate it in the
34  same conditions as regards security.
35 
36  The fact that you are presently reading this means that you have had
37  knowledge of the CeCILL license and that you accept its terms.
38  */
39 
40 #ifndef _DATASET_H_
41 #define _DATASET_H_
42 
43 // From the STL
44 #include <algorithm>
45 #include <vector>
46 #include <map>
47 #include <string>
48 
49 #include <Bpp/Exceptions.h>
50 #include <Bpp/Graphics/Point2D.h>
51 #include <Bpp/Utils/MapTools.h>
52 
53 #include "Group.h"
54 #include "Individual.h"
55 #include "Locality.h"
56 #include "../GeneralExceptions.h"
57 #include "AnalyzedLoci.h"
58 #include "AnalyzedSequences.h"
59 #include "../PolymorphismMultiGContainer.h"
60 #include "../PolymorphismSequenceContainer.h"
61 
62 namespace bpp
63 {
72 class DataSet
73 {
74 private:
77  std::vector<Locality<double>*> localities_;
78  std::vector<Group*> groups_;
79 
80 public:
81  // Constructor and destructor
85  DataSet();
86 
90  ~DataSet();
91 
95  DataSet(const DataSet& ds);
96 
97  DataSet& operator=(const DataSet& ds);
98 
99 public:
100  // Methodes
101 // ** Locality manipulation ***************************************************/
108  void addLocality(Locality<double>& locality);
109 
117  size_t getLocalityPosition(const std::string& name) const;
118 
126  const Locality<double>& getLocalityAtPosition(size_t locality_position) const;
127 
133  const Locality<double>& getLocalityByName(const std::string& name) const;
134 
140  void deleteLocalityAtPosition(size_t locality_position);
141 
147  void deleteLocalityByName(const std::string& name);
148 
152  size_t getNumberOfLocalities() const;
153 
157  bool hasLocality() const;
158 
159  // ** Group manipulation ******************************************************/
167  void addGroup(const Group& group);
168 
172  void addEmptyGroup(size_t group_id);
173 
177  const Group& getGroupById(size_t group_id) const;
178 
184  size_t getGroupPosition(size_t group_id) const;
185 
191  std::string getGroupName(size_t group_id) const;
197  void setGroupName(size_t group_id, const std::string& group_name) const;
198 
204  const Group& getGroupAtPosition(size_t group_position) const;
205 
211  void deleteGroupAtPosition(size_t group_position);
212 
216  size_t getNumberOfGroups() const;
217 
224  void mergeTwoGroups(size_t source_id, size_t target_id);
225 
236  void mergeGroups(std::vector<size_t>& group_ids);
237 
246  void splitGroup(size_t group_id, std::vector<size_t> individuals_selection);
247 
248  // ** Individuals manipulation ************************************************/
255  void addIndividualToGroup(size_t group_position, const Individual& individual);
256 
263  void addEmptyIndividualToGroup(size_t group_position, const std::string& individual_id);
264 
270  size_t getNumberOfIndividualsInGroup(size_t group_position) const;
271 
278  size_t getIndividualPositionInGroup(size_t group_position, const std::string& individual_id) const;
279 
286  const Individual* getIndividualAtPositionFromGroup(size_t group_position, size_t individual_position) const;
287 
294  const Individual* getIndividualByIdFromGroup(size_t group_position, const std::string& individual_id) const;
295 
302  void deleteIndividualAtPositionFromGroup(size_t group_position, size_t individual_position);
303 
310  void deleteIndividualByIdFromGroup(size_t group_position, const std::string& individual_id);
311 
318  void setIndividualSexInGroup(size_t group_position, size_t individual_position, const unsigned short sex);
319 
326  unsigned short getIndividualSexInGroup(size_t group_position, size_t individual_position) const;
327 
334  void setIndividualDateInGroup(size_t group_position, size_t individual_position, const Date& date);
335 
343  const Date* getIndividualDateInGroup(size_t group_position, size_t individual_position) const;
344 
351  void setIndividualCoordInGroup(size_t group_position, size_t individual_position, const Point2D<double>& coord);
352 
360  const Point2D<double>* getIndividualCoordInGroup(size_t group_position, size_t individual_position) const;
361 
369  void setIndividualLocalityInGroupByName(size_t group_position, size_t individual_position, const std::string& locality_name);
370 
378  const Locality<double>* getIndividualLocalityInGroup(size_t group_position, size_t individual_position) const;
379 
388  void addIndividualSequenceInGroup(size_t group_position, size_t individual_position,
389  size_t sequence_position, const Sequence& sequence);
390 
400  const Sequence& getIndividualSequenceByNameInGroup(size_t group_position, size_t individual_position, const std::string& sequence_name) const;
401 
410  const Sequence& getIndividualSequenceAtPositionInGroup(size_t group_position, size_t individual_position, size_t sequence_position) const;
411 
420  void deleteIndividualSequenceByNameInGroup(size_t group_position, size_t individual_position, const std::string& sequence_name);
421 
430  void deleteIndividualSequenceAtPositionInGroup(size_t group_position, size_t individual_position, size_t sequence_position);
431 
439  std::vector<std::string> getIndividualSequencesNamesInGroup(size_t group_position, size_t individual_position) const;
440 
449  size_t getIndividualSequencePositionInGroup(size_t group_position, size_t individual_position, const std::string& sequence_name) const;
450 
458  size_t getIndividualNumberOfSequencesInGroup(size_t group_position, size_t individual_position) const;
459 
466  void setIndividualGenotypeInGroup(size_t group_position, size_t individual_position, const MultilocusGenotype& genotype);
467 
477  void initIndividualGenotypeInGroup(size_t group_position, size_t individual_position);
478 
485  void deleteIndividualGenotypeInGroup(size_t group_position, size_t individual_position);
486 
495  void setIndividualMonolocusGenotypeInGroup(size_t group_position, size_t individual_position, size_t locus_position, const MonolocusGenotype& monogen);
496 
506  void setIndividualMonolocusGenotypeByAlleleKeyInGroup(size_t group_position, size_t individual_position, size_t locus_position, const std::vector<size_t> allele_keys);
507 
517  void setIndividualMonolocusGenotypeByAlleleIdInGroup(size_t group_position, size_t individual_position, size_t locus_position, const std::vector<std::string> allele_id);
518 
528  const MonolocusGenotype* getIndividualMonolocusGenotypeInGroup(size_t group_position, size_t individual_position, size_t locus_position) const;
529 
530  // ** AnalyzedSequences manipulation ******************************************/
534  void setAlphabet(const Alphabet* alpha);
535 
539  void setAlphabet(const std::string& alpha_type);
540 
546  const Alphabet* getAlphabet() const;
547 
553  std::string getAlphabetType() const;
554 
555  // ** AnalyzedLoci manipulation ***********************************************/
561  void setAnalyzedLoci(const AnalyzedLoci& analyzedLoci);
562 
568  void initAnalyzedLoci(size_t number_of_loci);
569 
575  const AnalyzedLoci* getAnalyzedLoci() const;
576 
580  void deleteAnalyzedLoci();
581 
588  void setLocusInfo(size_t locus_position, const LocusInfo& locus);
589 
593  const LocusInfo& getLocusInfoByName(const std::string& locus_name) const;
594 
598  const LocusInfo& getLocusInfoAtPosition(size_t locus_position) const;
599 
603  void addAlleleInfoByLocusName(const std::string& locus_name, const AlleleInfo& allele);
604 
608  void addAlleleInfoByLocusPosition(size_t locus_position, const AlleleInfo& allele);
609 
613  size_t getNumberOfLoci() const;
614 
618  size_t getPloidyByLocusName(const std::string& locus_name) const;
619 
623  size_t getPloidyByLocusPosition(size_t locus_position) const;
624 
625  // ** Container extraction ***************************************************/
630 
636  PolymorphismMultiGContainer* getPolymorphismMultiGContainer(const std::map<size_t, std::vector<size_t> >& selection) const;
637 
646  const std::map<size_t,
647  std::vector<size_t> >& selection,
648  size_t sequence_position) const;
649 
650  // ** General tests **********************************************************/
654  bool hasSequenceData() const;
655 
659  bool hasAlleleicData() const;
660 };
661 } // end of namespace bpp;
662 
663 #endif // _DATASET_H_
664 
The AlleleInfo interface.
Definition: AlleleInfo.h:60
The AnalyzedLoci class.
Definition: AnalyzedLoci.h:65
The AnalyzedSequences class.
The DataSet class.
Definition: DataSet.h:73
std::string getAlphabetType() const
Get the alphabet type as a string.
Definition: DataSet.cpp:1055
void setAnalyzedLoci(const AnalyzedLoci &analyzedLoci)
Set the AnalyzedLoci to the DataSet.
Definition: DataSet.cpp:1066
void setIndividualSexInGroup(size_t group_position, size_t individual_position, const unsigned short sex)
Set the sex of an Individual in a Group.
Definition: DataSet.cpp:528
void setIndividualDateInGroup(size_t group_position, size_t individual_position, const Date &date)
Set the Date of an Individual in a Group.
Definition: DataSet.cpp:560
bool hasAlleleicData() const
Tell if there is alelelic data.
Definition: DataSet.cpp:1356
size_t getNumberOfGroups() const
Get the number of Groups.
Definition: DataSet.cpp:300
void setIndividualMonolocusGenotypeInGroup(size_t group_position, size_t individual_position, size_t locus_position, const MonolocusGenotype &monogen)
Set a MonolocusGenotype of an Individual from a group.
Definition: DataSet.cpp:921
size_t getNumberOfLocalities() const
Get the number of Localities.
Definition: DataSet.cpp:190
void initIndividualGenotypeInGroup(size_t group_position, size_t individual_position)
Initialyze the genotype of an Individual in a Group.
Definition: DataSet.cpp:877
const Sequence & getIndividualSequenceByNameInGroup(size_t group_position, size_t individual_position, const std::string &sequence_name) const
Get a Sequence from an Individual of a Group.
Definition: DataSet.cpp:701
const Group & getGroupById(size_t group_id) const
Get a group by identifier.
Definition: DataSet.cpp:229
void setIndividualCoordInGroup(size_t group_position, size_t individual_position, const Point2D< double > &coord)
Set the coordinates of an Individual in a Group.
Definition: DataSet.cpp:596
const Group & getGroupAtPosition(size_t group_position) const
Get a group by position.
Definition: DataSet.cpp:281
std::vector< Locality< double > * > localities_
Definition: DataSet.h:77
const Locality< double > * getIndividualLocalityInGroup(size_t group_position, size_t individual_position) const
Get the Locality of an Individual in a Group.
Definition: DataSet.cpp:652
void addAlleleInfoByLocusName(const std::string &locus_name, const AlleleInfo &allele)
Add an AlleleInfo to a LocusInfo.
Definition: DataSet.cpp:1162
void setIndividualLocalityInGroupByName(size_t group_position, size_t individual_position, const std::string &locality_name)
Set the Locality of an Individual in a Group.
Definition: DataSet.cpp:632
void addAlleleInfoByLocusPosition(size_t locus_position, const AlleleInfo &allele)
Add an AlleleInfo to a LocusInfo.
Definition: DataSet.cpp:1182
size_t getPloidyByLocusPosition(size_t locus_position) const
Get the ploidy of a locus.
Definition: DataSet.cpp:1227
void mergeTwoGroups(size_t source_id, size_t target_id)
Merge two groups.
Definition: DataSet.cpp:307
~DataSet()
Destroy a DataSet.
Definition: DataSet.cpp:98
void setIndividualGenotypeInGroup(size_t group_position, size_t individual_position, const MultilocusGenotype &genotype)
Set the MultilocusGenotype of an Individual in a Group.
Definition: DataSet.cpp:861
std::vector< Group * > groups_
Definition: DataSet.h:78
bool hasLocality() const
Tell if there is at least one locality.
Definition: DataSet.cpp:197
void initAnalyzedLoci(size_t number_of_loci)
Initialize the AnalyzedLoci for number of loci.
Definition: DataSet.cpp:1084
const Date * getIndividualDateInGroup(size_t group_position, size_t individual_position) const
Get the Date of an Individual in a Group.
Definition: DataSet.cpp:576
void deleteIndividualSequenceByNameInGroup(size_t group_position, size_t individual_position, const std::string &sequence_name)
Delete a Sequence of an Individual of a Group.
Definition: DataSet.cpp:749
void mergeGroups(std::vector< size_t > &group_ids)
Merge some Groups in one.
Definition: DataSet.cpp:338
void deleteLocalityByName(const std::string &name)
Delete a Locality from the DataSet.
Definition: DataSet.cpp:176
const MonolocusGenotype * getIndividualMonolocusGenotypeInGroup(size_t group_position, size_t individual_position, size_t locus_position) const
Get a MonolocusGenotype from an Individual of a Group.
Definition: DataSet.cpp:1002
size_t getNumberOfIndividualsInGroup(size_t group_position) const
Get the number of Individuals in a Group.
Definition: DataSet.cpp:439
void setGroupName(size_t group_id, const std::string &group_name) const
set the name of a Group.
Definition: DataSet.cpp:254
void setAlphabet(const Alphabet *alpha)
Set the alphabet of the AnalyzedSequences.
Definition: DataSet.cpp:1028
void deleteIndividualSequenceAtPositionInGroup(size_t group_position, size_t individual_position, size_t sequence_position)
Delete a Sequence of an Individual of a Group.
Definition: DataSet.cpp:773
void deleteIndividualAtPositionFromGroup(size_t group_position, size_t individual_position)
Delete an Individual from a group.
Definition: DataSet.cpp:496
void deleteGroupAtPosition(size_t group_position)
Delete a Group from the DataSet.
Definition: DataSet.cpp:290
size_t getIndividualSequencePositionInGroup(size_t group_position, size_t individual_position, const std::string &sequence_name) const
Get the position of a Sequence in an Individual of a Group.
Definition: DataSet.cpp:817
void addIndividualSequenceInGroup(size_t group_position, size_t individual_position, size_t sequence_position, const Sequence &sequence)
Add a Sequence to an Individual in a Group.
Definition: DataSet.cpp:672
bool hasSequenceData() const
Tell if at least one individual has at least one sequence.
Definition: DataSet.cpp:1349
size_t getGroupPosition(size_t group_id) const
Get the position of a Group.
Definition: DataSet.cpp:269
unsigned short getIndividualSexInGroup(size_t group_position, size_t individual_position) const
Get the sex of an Individual in a Group.
Definition: DataSet.cpp:544
std::string getGroupName(size_t group_id) const
Get the name of a Group. If the name is an empty string it just returns the group_id.
Definition: DataSet.cpp:241
const Individual * getIndividualByIdFromGroup(size_t group_position, const std::string &individual_id) const
Get an Individual from a Group.
Definition: DataSet.cpp:480
DataSet & operator=(const DataSet &ds)
Definition: DataSet.cpp:78
void setIndividualMonolocusGenotypeByAlleleIdInGroup(size_t group_position, size_t individual_position, size_t locus_position, const std::vector< std::string > allele_id)
Set a MonolocusGenotype of an Individual from a group.
Definition: DataSet.cpp:973
void addGroup(const Group &group)
Add a Group to the DataSet.
Definition: DataSet.cpp:205
const Alphabet * getAlphabet() const
Get the alphabet if there is sequence data.
Definition: DataSet.cpp:1046
std::vector< std::string > getIndividualSequencesNamesInGroup(size_t group_position, size_t individual_position) const
Get the Sequences' names from an Individual of a Group.
Definition: DataSet.cpp:797
void setLocusInfo(size_t locus_position, const LocusInfo &locus)
Set a LocusInfo.
Definition: DataSet.cpp:1110
void setIndividualMonolocusGenotypeByAlleleKeyInGroup(size_t group_position, size_t individual_position, size_t locus_position, const std::vector< size_t > allele_keys)
Set a MonolocusGenotype of an Individual from a group.
Definition: DataSet.cpp:945
const Locality< double > & getLocalityAtPosition(size_t locality_position) const
Get a Locality by locality_position.
Definition: DataSet.cpp:143
PolymorphismSequenceContainer * getPolymorphismSequenceContainer(const std::map< size_t, std::vector< size_t > > &selection, size_t sequence_position) const
Get a PolymorphismSequenceContainer from a selection of groups and individuals.
Definition: DataSet.cpp:1307
void addEmptyIndividualToGroup(size_t group_position, const std::string &individual_id)
Add an empty Individual to a Group.
Definition: DataSet.cpp:423
AnalyzedSequences * analyzedSequences_
Definition: DataSet.h:76
void deleteAnalyzedLoci()
Delete the AnalyzedLoci.
Definition: DataSet.cpp:1102
const LocusInfo & getLocusInfoByName(const std::string &locus_name) const
Get a LocusInfo by its name.
Definition: DataSet.cpp:1126
size_t getPloidyByLocusName(const std::string &locus_name) const
Get the ploidy of a locus.
Definition: DataSet.cpp:1211
const Sequence & getIndividualSequenceAtPositionInGroup(size_t group_position, size_t individual_position, size_t sequence_position) const
Get a Sequence from an Individual of a Group.
Definition: DataSet.cpp:725
PolymorphismMultiGContainer * getPolymorphismMultiGContainer() const
Get a PolymorphismMultiGContainer with all allelic data of the DataSet.
Definition: DataSet.cpp:1245
size_t getLocalityPosition(const std::string &name) const
Get the position of a locality in the container.
Definition: DataSet.cpp:131
size_t getIndividualNumberOfSequencesInGroup(size_t group_position, size_t individual_position) const
Get the number of Sequences in an Individual of a Group.
Definition: DataSet.cpp:841
void deleteIndividualGenotypeInGroup(size_t group_position, size_t individual_position)
Delete the MultilocusGenotype of an Individual from a Group.
Definition: DataSet.cpp:905
DataSet()
Build a new void DataSet.
Definition: DataSet.cpp:48
const LocusInfo & getLocusInfoAtPosition(size_t locus_position) const
Get a LocusInfo by its position.
Definition: DataSet.cpp:1142
size_t getNumberOfLoci() const
Get the number of loci.
Definition: DataSet.cpp:1202
const Point2D< double > * getIndividualCoordInGroup(size_t group_position, size_t individual_position) const
Get the coordinate of an Individual in a Group.
Definition: DataSet.cpp:612
const AnalyzedLoci * getAnalyzedLoci() const
Get the AnalyzedLoci if there is one.
Definition: DataSet.cpp:1093
AnalyzedLoci * analyzedLoci_
Definition: DataSet.h:75
void addIndividualToGroup(size_t group_position, const Individual &individual)
Add an Individual to a Group.
Definition: DataSet.cpp:405
size_t getIndividualPositionInGroup(size_t group_position, const std::string &individual_id) const
Get the position of an Individual in a Group.
Definition: DataSet.cpp:448
void deleteIndividualByIdFromGroup(size_t group_position, const std::string &individual_id)
Delete an Individual from a group.
Definition: DataSet.cpp:512
void deleteLocalityAtPosition(size_t locality_position)
Delete a Locality from the DataSet.
Definition: DataSet.cpp:166
void addEmptyGroup(size_t group_id)
Add an empty Group to the DataSet.
Definition: DataSet.cpp:217
void splitGroup(size_t group_id, std::vector< size_t > individuals_selection)
Split a group in two.
Definition: DataSet.cpp:369
const Individual * getIndividualAtPositionFromGroup(size_t group_position, size_t individual_position) const
Get an Individual from a Group.
Definition: DataSet.cpp:464
const Locality< double > & getLocalityByName(const std::string &name) const
Get a Locality by name.
Definition: DataSet.cpp:152
void addLocality(Locality< double > &locality)
Add a locality to the DataSet.
Definition: DataSet.cpp:119
The Date class.
Definition: Date.h:57
The Group class.
Definition: Group.h:71
The Individual class.
Definition: Individual.h:76
The LocusInfo class.
Definition: LocusInfo.h:64
The MonolocusGenotype virtual class.
The MultilocusGenotype class.
The PolymorphismMultiGContainer class.
The PolymorphismSequenceContainer class.