52 if (!incomingBlock_)
return 0;
53 currentBlock_ = incomingBlock_;
54 incomingBlock_ = iterator_->nextBlock();
55 while (incomingBlock_) {
56 size_t globalSpace = 0;
57 for (
size_t i = 0; i < species_.size(); ++i) {
59 const MafSequence* seq1 = ¤tBlock_->getSequenceForSpecies(species_[i]);
60 const MafSequence* seq2 = &incomingBlock_->getSequenceForSpecies(species_[i]);
62 throw Exception(
"BlockMergerMafIterator::nextBlock. Species '" + species_[i] +
"' is missing coordinates in at least one block.");
66 size_t space = seq2->
start() - seq1->
stop();
72 if (space != globalSpace)
92 (*logstream_ <<
"BLOCK MERGER: merging two consecutive blocks.").endLine();
94 vector<string> sp1 = currentBlock_->getSpeciesList();
95 vector<string> sp2 = incomingBlock_->getSpeciesList();
100 unsigned int p1 = currentBlock_->getPass();
101 unsigned int p2 = incomingBlock_->getPass();
102 if (p1 == p2) mergedBlock->
setPass(p1);
103 double s1 = currentBlock_->getScore();
104 double n1 =
static_cast<double>(currentBlock_->getNumberOfSites());
105 double s2 = incomingBlock_->getScore();
106 double n2 =
static_cast<double>(incomingBlock_->getNumberOfSites());
107 mergedBlock->
setScore((s1 * n1 + s2 * n2) / (n1 + n2));
110 for (
size_t i = 0; i < allSp.size(); ++i) {
111 unique_ptr<MafSequence> seq;
113 seq.reset(
new MafSequence(currentBlock_->getSequenceForSpecies(allSp[i])));
117 unique_ptr<MafSequence> tmp(
new MafSequence(incomingBlock_->getSequenceForSpecies(allSp[i])));
118 string ref1 = seq->getDescription(), ref2 = tmp->getDescription();
120 if (globalSpace > 0) {
122 (*logstream_ <<
"BLOCK MERGER: a spacer of size " << globalSpace <<
" is inserted in sequence for species " << allSp[i] <<
".").endLine();
126 if (seq->getChromosome() != tmp->getChromosome()) {
127 if (renameChimericChromosomes_) {
128 if (seq->getChromosome().substr(0, 7) !=
"chimtig") {
130 chimericChromosomeCounts_[seq->getSpecies()]++;
131 seq->setChromosome(
"chimtig" +
TextTools::toString(chimericChromosomeCounts_[seq->getSpecies()]));
134 seq->setChromosome(seq->getChromosome() +
"-" + tmp->getChromosome());
136 seq->removeCoordinates();
138 if (seq->getStrand() != tmp->getStrand()) {
140 seq->removeCoordinates();
142 if (seq->getName() != tmp->getName())
143 tmp->setName(seq->getName());
146 (*logstream_ <<
"BLOCK MERGER: merging " << ref1 <<
" with " << ref2 <<
" into " << seq->getDescription()).endLine();
150 string ref1 = seq->getDescription();
151 seq->setToSizeR(seq->size() + incomingBlock_->getNumberOfSites() + globalSpace);
153 (*logstream_ <<
"BLOCK MERGER: extending " << ref1 <<
" with " << incomingBlock_->getNumberOfSites() <<
" gaps on the right.").endLine();
158 seq.reset(
new MafSequence(incomingBlock_->getSequenceForSpecies(allSp[i])));
159 string ref2 = seq->getDescription();
160 seq->setToSizeL(seq->size() + currentBlock_->getNumberOfSites() + globalSpace);
162 (*logstream_ <<
"BLOCK MERGER: adding " << ref2 <<
" and extend it with " << currentBlock_->getNumberOfSites() <<
" gaps on the left.").endLine();
168 delete currentBlock_;
169 delete incomingBlock_;
170 currentBlock_ = mergedBlock;
172 incomingBlock_ = iterator_->nextBlock();
174 return currentBlock_;
MafBlock * analyseCurrentBlock_()
A synteny block data structure, the basic unit of a MAF alignement file.
void setScore(double score)
void setPass(unsigned int pass)
void addSequence(const MafSequence &sequence)
A sequence class which is used to store data from MAF files.
const std::string & getChromosome() const
bool hasCoordinates() const
size_t getSrcSize() const
std::string toString(T t)