bpp-seq3  3.0.0
Mase.h
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #ifndef BPP_SEQ_IO_MASE_H
6 #define BPP_SEQ_IO_MASE_H
7 
8 #include <Bpp/Numeric/Range.h>
9 #include <Bpp/Utils/MapTools.h>
10 
11 #include "../Container/SequenceContainer.h"
12 #include "../Container/VectorSequenceContainer.h"
13 #include "../Sequence.h"
14 #include "AbstractIAlignment.h"
15 #include "AbstractISequence.h"
16 #include "AbstractOSequence.h"
17 
18 namespace bpp
19 {
26 {
27 private:
28  mutable std::map<std::string, std::string> trees_;
29  mutable std::map<std::string, MultiRange<size_t>> siteSelections_;
30  mutable std::map<std::string, std::vector<size_t>> sequenceSelections_;
31 
32 public:
34  virtual ~MaseHeader() {}
35 
36 public:
37  size_t getNumberOfTrees() const { return trees_.size(); }
38  size_t getNumberOfSiteSelections() const { return siteSelections_.size(); }
39  size_t getNumberOfSequenceSelections() const { return sequenceSelections_.size(); }
40 
41  std::vector<std::string> getTreeNames() const { return MapTools::getKeys(trees_); }
42  std::vector<std::string> getSiteSelectionNames() const { return MapTools::getKeys(siteSelections_); }
43  std::vector<std::string> getSequenceSelectionNames() const { return MapTools::getKeys(sequenceSelections_); }
44 
45  const std::string& getTree(const std::string& name) const
46  {
47  if (trees_.find(name) != trees_.end())
48  {
49  return trees_[name];
50  }
51  else
52  {
53  throw Exception("MaseHeader::getTree. No tree with name " + name);
54  }
55  }
56  const MultiRange<size_t>& getSiteSelection(const std::string& name) const
57  {
58  if (siteSelections_.find(name) != siteSelections_.end())
59  {
60  return siteSelections_[name];
61  }
62  else
63  {
64  throw Exception("MaseHeader::getSiteSelection. No site selection with name " + name);
65  }
66  }
67  const std::vector<size_t>& getSequenceSelection(const std::string& name) const
68  {
69  if (sequenceSelections_.find(name) != sequenceSelections_.end())
70  {
71  return sequenceSelections_[name];
72  }
73  else
74  {
75  throw Exception("MaseHeader::getSequenceSelection. No sequence selection with name " + name);
76  }
77  }
78 
79  void setTree(const std::string& name, const std::string& tree)
80  {
81  trees_[name] = tree;
82  }
83  void setSiteSelection(const std::string& name, const MultiRange<size_t>& ranges)
84  {
85  siteSelections_[name] = ranges;
86  }
87  void setSequenceSelection(const std::string& name, const std::vector<size_t>& set)
88  {
89  sequenceSelections_[name] = set;
90  }
91 };
92 
103 class Mase :
104  public AbstractISequence,
105  public AbstractIAlignment,
106  public AbstractOSequence2
107 {
108 private:
112  unsigned int charsByLine_;
114 
115 public:
122  Mase(unsigned int charsByLine = 100, bool checkSequenceNames = true) : charsByLine_(charsByLine), checkNames_(checkSequenceNames) {}
123 
124  // Class destructor
125  virtual ~Mase() {}
126 
127 public:
133  std::unique_ptr<SequenceContainerInterface> readMeta(std::istream& input, std::shared_ptr<const Alphabet>& alpha, MaseHeader& header) const
134  {
135  readHeader_(input, header);
136  return AbstractISequence::readSequences(input, alpha);
137  }
138 
139  std::unique_ptr<SequenceContainerInterface> readMeta(std::string& path, std::shared_ptr<const Alphabet>& alpha, MaseHeader& header) const
140  {
141  std::ifstream input(path.c_str(), std::ios::in);
142  auto sc = readMeta(input, alpha, header);
143  input.close();
144  return sc;
145  }
153  void appendSequencesFromStream(std::istream& input, SequenceContainerInterface& sc) const override;
161  void appendAlignmentFromStream(std::istream& input, SequenceContainerInterface& sc) const override
162  {
163  appendSequencesFromStream(input, sc); // This might cast an exception if sequences are not aligned!
164  }
173  void writeSequences(std::ostream& output, const SequenceContainerInterface& sc) const override;
174 
175  void writeSequences(const std::string& path, const SequenceContainerInterface& sc, bool overwrite = true) const override
176  {
177  AbstractOSequence::writeSequences(path, sc, overwrite);
178  }
186  void writeMeta(std::ostream& output, const SequenceContainerInterface& sc, const MaseHeader& header) const
187  {
188  writeHeader_(output, header);
189  writeSequences(output, sc);
190  }
191  void writeMeta(const std::string& path, const SequenceContainerInterface& sc, const MaseHeader& header, bool overwrite = true) const
192  {
193  // Open file in specified mode
194  std::ofstream output(path.c_str(), overwrite ? (std::ios::out) : (std::ios::out | std::ios::app));
195  writeHeader_(output, header);
196  writeSequences(output, sc);
197  output.close();
198  }
206  const std::string getFormatName() const override { return "MASE file"; }
207 
208  const std::string getFormatDescription() const override
209  {
210  return "Optional file comments (preceded by ;;), sequence comments (preceded by ;), sequence name, sequence";
211  }
217  bool checkNames() const { return checkNames_; }
218 
224  void checkNames(bool yn) { checkNames_ = yn; }
225 
226 private:
227  void readHeader_(std::istream& input, MaseHeader& header) const;
228  void writeHeader_(std::ostream& output, const MaseHeader& header) const;
229 };
230 } // end of namespace bpp.
231 #endif // BPP_SEQ_IO_MASE_H
Partial implementation of the IAlignment interface, dedicated to alignment readers.
Partial implementation of the ISequence interface.
void readSequences(std::istream &input, SequenceContainerInterface &sc) const override
Add sequences to a container from a stream.
Partial implementation of the OAlignment interface.
void writeSequences(std::ostream &output, const SequenceContainerInterface &sc) const override=0
Write a container to a stream.
static std::vector< Key > getKeys(const std::map< Key, T, Cmp > &myMap)
A class to store information from the header of Mase files.
Definition: Mase.h:26
virtual ~MaseHeader()
Definition: Mase.h:34
size_t getNumberOfSiteSelections() const
Definition: Mase.h:38
std::vector< std::string > getTreeNames() const
Definition: Mase.h:41
const std::vector< size_t > & getSequenceSelection(const std::string &name) const
Definition: Mase.h:67
std::map< std::string, std::vector< size_t > > sequenceSelections_
Definition: Mase.h:30
void setSiteSelection(const std::string &name, const MultiRange< size_t > &ranges)
Definition: Mase.h:83
std::map< std::string, MultiRange< size_t > > siteSelections_
Definition: Mase.h:29
std::vector< std::string > getSiteSelectionNames() const
Definition: Mase.h:42
const MultiRange< size_t > & getSiteSelection(const std::string &name) const
Definition: Mase.h:56
std::map< std::string, std::string > trees_
Definition: Mase.h:28
const std::string & getTree(const std::string &name) const
Definition: Mase.h:45
void setSequenceSelection(const std::string &name, const std::vector< size_t > &set)
Definition: Mase.h:87
MaseHeader()
Definition: Mase.h:33
size_t getNumberOfTrees() const
Definition: Mase.h:37
std::vector< std::string > getSequenceSelectionNames() const
Definition: Mase.h:43
size_t getNumberOfSequenceSelections() const
Definition: Mase.h:39
void setTree(const std::string &name, const std::string &tree)
Definition: Mase.h:79
The mase sequence file format.
Definition: Mase.h:107
void appendSequencesFromStream(std::istream &input, SequenceContainerInterface &sc) const override
Append sequences to a container from a stream.
Definition: Mase.cpp:13
void writeMeta(std::ostream &output, const SequenceContainerInterface &sc, const MaseHeader &header) const
Definition: Mase.h:186
Mase(unsigned int charsByLine=100, bool checkSequenceNames=true)
Build a new Mase object.
Definition: Mase.h:122
void checkNames(bool yn)
Tell whether the sequence names should be checked when reading from files.
Definition: Mase.h:224
virtual ~Mase()
Definition: Mase.h:125
void writeSequences(std::ostream &output, const SequenceContainerInterface &sc) const override
Write a container to a stream.
Definition: Mase.cpp:93
void writeHeader_(std::ostream &output, const MaseHeader &header) const
Definition: Mase.cpp:270
bool checkNames() const
Definition: Mase.h:217
unsigned int charsByLine_
The maximum number of chars to be written on a line.
Definition: Mase.h:112
const std::string getFormatName() const override
Definition: Mase.h:206
void appendAlignmentFromStream(std::istream &input, SequenceContainerInterface &sc) const override
Append sequences to a container from a stream.
Definition: Mase.h:161
void writeSequences(const std::string &path, const SequenceContainerInterface &sc, bool overwrite=true) const override
Write a container to a file.
Definition: Mase.h:175
const std::string getFormatDescription() const override
Definition: Mase.h:208
bool checkNames_
Definition: Mase.h:113
std::unique_ptr< SequenceContainerInterface > readMeta(std::string &path, std::shared_ptr< const Alphabet > &alpha, MaseHeader &header) const
Definition: Mase.h:139
std::unique_ptr< SequenceContainerInterface > readMeta(std::istream &input, std::shared_ptr< const Alphabet > &alpha, MaseHeader &header) const
Definition: Mase.h:133
void writeMeta(const std::string &path, const SequenceContainerInterface &sc, const MaseHeader &header, bool overwrite=true) const
Definition: Mase.h:191
void readHeader_(std::istream &input, MaseHeader &header) const
Definition: Mase.cpp:159
The SequenceContainer interface.
This alphabet is used to deal NumericAlphabet.