bpp-seq3  3.0.0
Clustal.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include <Bpp/Io/FileTools.h>
7 #include <Bpp/Text/TextTools.h>
8 
9 #include "Clustal.h"
10 
11 using namespace bpp;
12 
13 // From the STL:
14 #include <iostream>
15 #include <iomanip>
16 using namespace std;
17 
19 {
20  // Checking the existence of specified file
21  if (!input)
22  {
23  throw IOException ("Clustal::read : fail to open file");
24  }
25 
26  auto alphaPtr = sc.getAlphabet();
27  vector< unique_ptr<Sequence>> sequences;
28 
29  string lineRead("");
30 
31  Comments comments(1);
32  comments[0] = FileTools::getNextLine(input); // First line gives file generator.
33 
34  lineRead = FileTools::getNextLine(input); // This is the first sequence of the first block.
35 
36  string::size_type beginSeq = 0;
37  unsigned int count = 0;
38  for (size_t i = lineRead.size(); i > 0; i--)
39  {
40  char c = lineRead[i - 1];
41  if (c == ' ')
42  {
43  count++;
44  if (count == nbSpacesBeforeSeq_)
45  {
46  beginSeq = i - 1 + nbSpacesBeforeSeq_;
47  break;
48  }
49  }
50  else
51  count = 0;
52  }
53  if (beginSeq == 0)
54  throw IOException("Clustal::read. Bad input file.");
55 
56  unsigned int countSequences = 0;
57 
58  // Read first sequences block:
59  bool test = true;
60  do
61  {
62  sequences.push_back(make_unique<Sequence>(TextTools::removeSurroundingWhiteSpaces(lineRead.substr(0, beginSeq - nbSpacesBeforeSeq_)), lineRead.substr(beginSeq), alphaPtr));
63  getline(input, lineRead, '\n');
64  countSequences++;
65  test = !TextTools::isEmpty(lineRead) && !TextTools::isEmpty(lineRead.substr(0, beginSeq - nbSpacesBeforeSeq_));
66  }
67  while (input && test);
68 
69  // Read other blocks
70  lineRead = FileTools::getNextLine(input); // Read first sequence of next block.
71  while (!TextTools::isEmpty(lineRead))
72  {
73  // Read next block:
74  for (unsigned int i = 0; i < countSequences; ++i)
75  {
76  // Complete sequences
77  if (TextTools::isEmpty(lineRead))
78  throw IOException("Clustal::read. Bad input file.");
79  sequences[i]->append(lineRead.substr(beginSeq));
80  getline(input, lineRead, '\n');
81  }
82  // At this point, lineRead is the first line after the current block.
83  lineRead = FileTools::getNextLine(input);
84  }
85 
86  for (unsigned int i = 0; i < countSequences; ++i)
87  {
88  sc.addSequence(sequences[i]->getName(), sequences[i]);
89  }
90  sc.setComments(comments);
91 }
92 
93 void Clustal::writeAlignment(std::ostream& output, const SiteContainerInterface& sc) const
94 {
95  output << "CLUSTAL W (1.81) multiple sequence alignment" << endl;
96  output << endl;
97  if (sc.getNumberOfSequences() == 0)
98  return;
99 
100  vector<string> text;
101  size_t length = 0;
102  for (size_t i = 0; i < sc.getNumberOfSequences(); ++i)
103  {
104  const Sequence& seq = sc.sequence(i);
105  if (seq.getName().size() > length)
106  length = seq.getName().size();
107  text.push_back(sc.sequence(i).toString());
108  }
109  length += nbSpacesBeforeSeq_;
110  for (unsigned int j = 0; j < text[0].size(); j += charsByLine_)
111  {
112  for (unsigned int i = 0; i < sc.getNumberOfSequences(); ++i)
113  {
114  output << TextTools::resizeRight(sc.sequence(i).getName(), length);
115  output << text[i].substr(j, charsByLine_) << endl;
116  }
117  output << endl;
118  }
119 }
const std::string & getName() const override
Get the name of this sequence.
Definition: CoreSequence.h:170
void writeAlignment(std::ostream &output, const SiteContainerInterface &sc) const override
Write a container to a stream.
Definition: Clustal.cpp:93
void appendAlignmentFromStream(std::istream &input, SequenceContainerInterface &sc) const override
Append sequences to a container from a stream.
Definition: Clustal.cpp:18
virtual void setComments(const Comments &comments)=0
Set the comments.
static std::string getNextLine(std::istream &in)
A basic implementation of the Sequence interface.
Definition: Sequence.h:117
The SequenceContainer interface.
virtual void addSequence(const HashType &sequenceKey, std::unique_ptr< SequenceType > &sequencePtr)=0
Add a sequence to the container.
virtual const SequenceType & sequence(const HashType &sequenceKey) const override=0
Retrieve a sequence object from the container.
virtual size_t getNumberOfSequences() const =0
Get the number of sequences in the container.
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
Get a pointer toward the container's alphabet.
std::string removeSurroundingWhiteSpaces(const std::string &s)
bool isEmpty(const std::string &s)
std::string resizeRight(const std::string &s, std::size_t newSize, char fill)
std::size_t count(const std::string &s, const std::string &pattern)
This alphabet is used to deal NumericAlphabet.
std::vector< std::string > Comments
Declaration of Comments type.
Definition: Commentable.h:21