bpp-seq3  3.0.0
Dcse.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include <Bpp/Io/FileTools.h>
7 #include <Bpp/Text/TextTools.h>
8 
9 #include "../Container/SequenceContainer.h"
10 #include "../Container/VectorSequenceContainer.h"
11 #include "../Sequence.h"
12 #include "AbstractIAlignment.h"
13 #include "Dcse.h"
14 
15 using namespace bpp;
16 using namespace std;
17 
19 {
20  // Checking the existence of specified file
21  if (!input)
22  {
23  throw IOException ("DCSE::read : fail to open file");
24  }
25 
26  // Initialization
27  auto alphaPtr = sc.getAlphabet();
28  string line, name, sequence = "";
29 
30  line = FileTools::getNextLine(input); // Copy current line in temporary string
31  // StringTokenizer st(line);
32  // st.nextToken();
33  // First line ignored for now!
34  // int n1 = TextTools::toInt(st.nextToken());
35  // int n2 = TextTools::toInt(st.nextToken());
36  // int nbSites = n2 - n1
37  // cout << nbSpecies << " species and " << nbSites << " sites." << endl;
38 
39  // Main loop : for all file lines
40  while (!input.eof())
41  {
42  line = FileTools::getNextLine(input); // Copy current line in temporary string
43  if (line == "")
44  break;
45  string::size_type endOfSeq = line.find(" ");
46  if (endOfSeq == line.npos)
47  break;
48  sequence = string(line.begin(), line.begin() + static_cast<ptrdiff_t>(endOfSeq));
49  sequence = TextTools::removeWhiteSpaces(sequence);
50  sequence = TextTools::removeChar(sequence, '{');
51  sequence = TextTools::removeChar(sequence, '}');
52  sequence = TextTools::removeChar(sequence, '[');
53  sequence = TextTools::removeChar(sequence, ']');
54  sequence = TextTools::removeChar(sequence, '(');
55  sequence = TextTools::removeChar(sequence, ')');
56  sequence = TextTools::removeChar(sequence, '^');
57  name = string(line.begin() + static_cast<ptrdiff_t>(endOfSeq + 1), line.end()),
59  if (name.find("Helix numbering") == name.npos
60  && name.find("mask") == name.npos)
61  {
62  auto seqPtr = make_unique<Sequence>(name, sequence, alphaPtr);
63  sc.addSequence(name, seqPtr);
64  }
65  }
66 }
67 
68 const string DCSE::getFormatName() const { return "DCSE"; }
69 
70 const string DCSE::getFormatDescription() const { return "RNA structure format"; }
void appendAlignmentFromStream(std::istream &input, SequenceContainerInterface &sc) const override
Append sequences to a container from a stream.
Definition: Dcse.cpp:18
const std::string getFormatName() const override
Definition: Dcse.cpp:68
const std::string getFormatDescription() const override
Definition: Dcse.cpp:70
static std::string getNextLine(std::istream &in)
The SequenceContainer interface.
virtual void addSequence(const HashType &sequenceKey, std::unique_ptr< SequenceType > &sequencePtr)=0
Add a sequence to the container.
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
Get a pointer toward the container's alphabet.
std::string removeWhiteSpaces(const std::string &s)
std::string removeChar(const std::string &s, char c)
std::string removeFirstWhiteSpaces(const std::string &s)
This alphabet is used to deal NumericAlphabet.