Lobry (2004) LNCS 3039:679
This page allows for the on-line reproduction of the figures
in the paper:
Lobry, J.R. (2004) Life history traits and genome structure: aerobiosis
and G+C content in bacteria. Lecture Notes in Computer Sciences , 3039 :679-686.
[
PDF ]
Abstract: Evolution is a tinkerer not an engineer: the term exaptation was coined to signify that old structures, that could be not significant in terms of fitness, get re-used when environmental conditions changed. Here I show that the average protein composition of G+C rich bacteria were exapted to the switch from anaerobic to aerobic conditions about 2 billions years ago. Because the proteome composition is under the strong control of directional mutation pressure, this is an example of exaptation at the molecular level for which the underlying mechanism is documented.
Note: Figure 1 is not a true reproduction
since clicking on the "do it again" button re-draws the figure from
the very last available data. The last figure about the linear geologic
time scale was not in the paper but just in my
talk , no extra charges,
but not automatically updated: based upon ICS 2004 data with USGS
color code (there's an alternative color code but it's ugly as far as
Archean Eras are concerned).
scale <- 1
cost <-
list(Ile = 32.3, Phe = 52.0, Lys = 30.3, Tyr = 50.0, Asn = 14.7,
Leu = 27.3, Met = 34.3, Asp = 12.7, Glu = 15.3, Ser = 11.7,
Val = 23.3, Thr = 18.7, His = 38.3, Gln = 16.3, Cys = 24.7,
Trp = 74.3, Arg = 27.3, Ala = 11.7, Pro = 20.3, Gly = 11.7)
gc.groups <- factor(rep(c(1, 2, 3), c(6, 10, 4)), ordered = TRUE,
label = c("low G+C", "mid G+C", "high G+C"))
stripchart(unlist(cost)~gc.groups, pch = 19, ylim = c(0.5,3.5),
xlim = c(0, max(unlist(cost))),
main = "Fig. 2 from Lobry (2004) LNCS, 3039:679\nMetabolic cost of the 20 amino-acids" )
boxplot(unlist(cost)~gc.groups, horizontal = TRUE, add = TRUE)
####################################################################
# #
# This is an R script to reproduce Figure 3 in #
# Lobry, J.R. (2004) Life History Traits and #
# Genome Structure: Aerobiosis and G+C Content #
# in Bacteria, submitted to LNCS #
# #
####################################################################
# #
# There are at least two ways to run it: #
# #
# 1) You don't have R at hand and don't even want to know more #
# about it. Then, just copy and paste this file in our RWeb #
# interface at the following URL: #
# #
# http://pbil.univ-lyon1.fr/Rweb/Rweb.general.html #
# #
# and click on the submit button (this may take a while if the #
# server is busy). #
# #
# 2) You do have R at hand. Check that capabilities("http/ftp") #
# is TRUE so that you can directly read data from the Web. #
# Make a local copy of this file in your working directory, #
# and then under R try: #
# #
# > pdf() #
# > source("Fig1.R") #
# > dev.off() #
# #
# You should get a file called "Rplots.pdf" with the figure. #
# Just sourcing the file should be OK, but I didn't try it #
# with all available devices (X11, pdf and postscript seems #
# to be OK. #
# #
# If capabilities("http/ftp") is FALSE, you have to copy #
# manually data files in the working folder and adapt the #
# script before runing it. #
# #
####################################################################
####################################################################
# #
# Get the data from the Lobry & Chessel 2003 JAG 44:235-261 paper #
# #
####################################################################
LCpath = "ftp://pbil.univ-lyon1.fr/pub/datasets/JAG2003/"
codcol <- read.table(paste(LCpath, "codcol.txt", sep=""))
codcol <- as.factor(codcol$V1)
labcol <- read.table(paste(LCpath, "labcol.txt", sep=""))
labcol <- as.character(labcol$V1)
lablig <- readLines(paste(LCpath, "bact293", sep=""))
tmp <- read.table(paste(LCpath, "bact293.udc", sep=""))
names(tmp) <- labcol
rownames(tmp) <- abbreviate(lablig)
#
# Re-organise by amino-acids:
#
bact293 <- tmp[, order(codcol)]
frasort <- labcol[order(codcol)]
codsort <- codcol[order(codcol)]
#
# Compute aa frequencies:
#
fbact1 <- function()
{
w <- data.frame(matrix(0,293,21))
names(w) <- levels(codsort)
row.names(w) <- row.names(bact293)
for( i in 1:21 )
{
a0 <- levels(codsort)[i]
c0 <- which(codsort==a0)
w[ ,i] <- rowSums(bact293[c0])
}
return(w)
}
fraaa <- fbact1()
fraaa <- fraaa[,names(fraaa)!="Stp"] # remove stop codons
fraaa <- fraaa/rowSums(fraaa) # as relative frequencies
#
# Compute base frequencies to get the G+C content:
#
fbact2 <- function()
{
codons <- names(bact293)
pos1 <- as.factor(substr(codons,1,1))
pos2 <- as.factor(substr(codons,2,2))
pos3 <- as.factor(substr(codons,3,3))
loc1 <- function(x)
{
x <- as.numeric(x)
w <- xtabs(x~pos1)
w <- w + xtabs(x~pos2)
w <- w + xtabs(x~pos3)
return(w)
}
w <- t(apply(bact293,1,loc1))
return(w)
}
frabase <- fbact2()
gc293 <- rowSums(frabase[,c(2,3)])/rowSums(frabase)
####################################################################
# #
# Compute observed average protein aerobic cost. Coefs are from #
# Table 1 in Akashi & Gojobori (2002) PNAS 99:3695-3700 paper. #
# #
####################################################################
ecoef <- c(11.7, 27.3, 14.7, 12.7, 24.7, 16.3,
15.3, 11.7, 38.3, 32.3, 27.3, 30.3,
34.3, 52.0, 20.3, 11.7, 18.7, 74.3,
50.0, 23.3)
e293 <- as.matrix(fraaa) %*% ecoef
####################################################################
# #
# Get the data from the Naya et al. (2002) JME 55:260-264 paper #
# #
####################################################################
Npath = "http://pbil.univ-lyon1.fr/R/donnees/"
gc02 <- read.table(paste(Npath, "gcO2.txt", sep=""))
aero <- gc02[gc02$V4 == "Aerobic",]$V3
anaero <- gc02[gc02$V4 != "Aerobic",]$V3
aerod <- density(aero/100)
anaerod <- density(anaero/100)
####################################################################
# #
# Define neutral model from Lobry (1997) Gene 205:309-316 paper #
# #
####################################################################
freqtheo <- function(x = 0.5, aa = "Ala")
{
denom = (8-(1-x)^2*(1+x))
switch( aa,
# Class 1
Ile = (1-x)^2*(2-x)/denom,
# Class 2
Phe = (1-x)^2/denom,
Lys = (1-x)^2/denom,
Tyr = (1-x)^2/denom,
Asn = (1-x)^2/denom,
# Class 3
Leu = (1-x^2)/denom,
# Class 4
Met = (1-x)^2*x/denom,
# Class 5
Asp = (1-x)*x/denom,
Glu = (1-x)*x/denom,
His = (1-x)*x/denom,
Gln = (1-x)*x/denom,
Cys = (1-x)*x/denom,
# Class 5bis
Val = 2*(1-x)*x/denom,
Thr = 2*(1-x)*x/denom,
# Class 5ter
Ser = 3*(1-x)*x/denom,
# Class 6
Trp = (1-x)*x^2/denom,
# Class 7
Arg = x*(x+1)/denom,
# Class 8
Gly = 2*x^2/denom,
Pro = 2*x^2/denom,
Ala = 2*x^2/denom
)
}
#
# Compute aerobic neutral cost as previously:
#
energy <- function(x)
{
11.7*freqtheo(x,"Ala") + 24.7*freqtheo(x,"Cys") + 12.7*freqtheo(x,"Asp") +
15.3*freqtheo(x,"Glu") + 52.0*freqtheo(x,"Phe") + 11.7*freqtheo(x,"Gly") +
38.8*freqtheo(x,"His") + 32.3*freqtheo(x,"Ile") + 30.3*freqtheo(x,"Lys") +
27.3*freqtheo(x,"Leu") + 34.3*freqtheo(x,"Met") + 14.7*freqtheo(x,"Asn") +
20.3*freqtheo(x,"Pro") + 16.3*freqtheo(x,"Gln") + 27.3*freqtheo(x,"Arg") +
11.7*freqtheo(x,"Ser") + 18.7*freqtheo(x,"Thr") + 23.3*freqtheo(x,"Val") +
74.3*freqtheo(x,"Trp") + 50.0*freqtheo(x,"Tyr")
}
####################################################################
# #
# And now, plot the whole thing #
# #
####################################################################
din <- par("din") # Device dimensions in inch
ratio <- din[2]/din[1]
#
# Top pannel:
#
par(fig=c(0, ratio, 0.5, 1.0))
par(omd=c(0, 0, 0, 0))
par(mai=c(0, 1, 0.8, 0.1))
plot(energy, type="l", ylim=c(21,26), xlim=c(0.25,0.75), col="black",
xlab="", xaxt="n", lty=3, lwd=2,
ylab="Cost per amino-acid in ~P",
main="Decrease of the average protein aerobic cost and\ndistribution of (an)erobic bacteria with G+C content")
points(gc293,e293, pch=20)
abline(lm(e293~gc293),lty=1)
rect(0.75, 24, 0.76, 26)
text(0.32, 21, "n = 293 bacteria")
text(0.57, 25.8, expression(paste(r^2 == ~~ 0.67," ; ", slope == -5.6 %+-% 0.2)))
#
# Bottom pannel:
#
par(fig=c(0, ratio, 0, 0.5))
par(mai=c(0.8, 1, 0.0, 0.1))
par(new=TRUE)
plot(aerod$x, aerod$y, type="l",xlab="Genomic G+C content", xlim=c(0.25,0.75),
ylab="Species density")
lines(anaerod$x, anaerod$y, lty=1, lwd=2, col="black")
text(0.35, 4.0, "Anaerobic")
text(0.35, 3.5, "n = 225")
text(0.67, 3.5, "Aerobic")
text(0.67, 3.0, "n = 326")
rug(jitter(anaero/100, amount = 0.01))
rug(jitter(aero/100, amount = 0.01), side = 3)
Bonus: not in the paper
gts <- read.table("http://pbil.univ-lyon1.fr/members/lobry/repro/lncs04/GeologicTimeScale.txt", sep = "\t", header = TRUE)
gts[,4] <- gts[,4]/1000
max <- gts[26, 4]
par(mar = c(0, 4, 4, 2) + 0.1)
plot(x = c(0, 0, 3, 3), y = c(0, -max, -max, 0),
xlab = "", ylab = "Age", las = 1, pch = ".", bty = "n", xaxt = "n",
main = expression(paste("Geologic Linear Time Scale in ",10^9," years")),
cex.main = 2, cex.axis = 1.5,
ylim = c(-5,0), xlim = c(0,4))
mtext("ICS 2004 data with USGS color code")
rect(xleft = 0, ybottom = -max, xright = 3, ytop = 0)
#
# Colored Periods
#
for( i in 1:25 )
{
rect(xleft = 2, ybottom = -gts[i+1,4], xright = 3, -gts[i,4],
col = rgb(gts[i,5],gts[i,6],gts[i,7],maxColorValue=255))
}
#
# White Period
#
#for( i in 21:25 )
#{
# rect(xleft = 2, ybottom = -gts[i+1,4], xright = 3, -gts[i,4])
#}
#
# Some Period names
#
for( i in 13:21 )
{
text( x = 2, y = -(gts[i,4]+gts[i+1,4])/2, labels = as.character(gts[i,3]), pos = 4)
}
#
# Cenozoic Era
#
rect(xleft = 1, ybottom = -gts[2+1,4], xright = 2, ytop = 0,
col = rgb(255, 255, 0,maxColorValue=255))
#
# Mezozoic Era
#
rect(xleft = 1, ybottom = -gts[5+1,4], xright = 2, ytop = -gts[2+1,4],
col = rgb(127, 173, 81,maxColorValue=255))
text(x = 1, y = -(gts[6,4]+gts[3,4])/2, label ="Mesozoic", pos = 4)
#
# Paleozoic Era
#
rect(xleft = 1, ybottom = -gts[11+1,4], xright = 2, ytop = -gts[5+1,4],
col = rgb(128, 181, 213,maxColorValue=255))
text(x = 1, y = -(gts[12,4]+gts[6,4])/2, label ="Paleozoic", pos = 4)
#
# Neo-proterozoic Era
#
rect(xleft = 1, ybottom = -gts[14+1,4], xright = 2, ytop = -gts[11+1,4],
col = rgb(202, 165, 149, maxColorValue=255))
text(x = 1, y = -(gts[15,4]+gts[12,4])/2, label ="Neo-\nproterozoic", pos = 4)
#
# Meso-proterozoic Era
#
rect(xleft = 1, ybottom = -gts[17+1,4], xright = 2, ytop = -gts[14+1,4],
col = rgb(221, 194, 136, maxColorValue=255))
text(x = 1, y = -(gts[18,4]+gts[15,4])/2, label ="Meso-\nproterozoic", pos = 4)
#
# Paleo-proterozoic Era
#
rect(xleft = 1, ybottom = -gts[21+1,4], xright = 2, ytop = -gts[17+1,4],
col = rgb(179, 178, 94, maxColorValue=255))
text(x = 1, y = -(gts[22,4]+gts[18,4])/2, label ="Paleo-\nproterozoic", pos = 4)
#
# Old Era
#
for( i in 22:25 )
{
rect(xleft = 1, ybottom = -gts[i+1,4], xright = 2, -gts[i,4],
col = rgb(gts[i,5],gts[i,6],gts[i,7],maxColorValue=255))
text( x = 1, y = -(gts[i,4]+gts[i+1,4])/2, labels = as.character(gts[i,2]), pos = 4)
}
#
# Phanerozoic Eon
#
rect(xleft = 0, ybottom = -gts[11+1,4], xright = 1, ytop = 0,
col = rgb(179, 226, 209, maxColorValue=255))
text(x = 0, y = -(gts[12,4])/2, label ="Phanerozoic", pos = 4)
#
# Proterozoic Eon
#
rect(xleft = 0, ybottom = -gts[21+1,4], xright = 1, ytop = -gts[11+1,4],
col = rgb(204, 216, 145, maxColorValue=255))
text(x = 0, y = -(gts[12,4]+gts[22,4])/2, label ="Proterozoic", pos = 4)
#
# Archaen Eon
#
rect(xleft = 0, ybottom = -gts[25+1,4], xright = 1, ytop = -gts[21+1,4],
col = rgb(153, 173, 172, maxColorValue=255))
text(x = 0, y = -(gts[26,4]+gts[22,4])/2, label ="Archean", pos = 4)
#
# Legends
#
text( x = 0, y = -4.8, label = "Eon", pos = 4)
text( x = 1, y = -4.8, label = "Era", pos = 4)
text( x = 2, y = -4.8, label = "Period", pos = 4)
#
# Arrows
#
arrows(x0 = 3.4, y0 = -2.3, x1 = 3.1 , y1 = -2.3, length = 0.1, col = "red")
text( x = 3.4, y = -2.35, label = expression(O[2]), pos = 4, cex = 3, col = "red")
arrows(x0 = 3.4, y0 = 0, x1 = 3.1 , y1 = 0, length = 0.1)
text( x = 3.4, y = 0, label = "Present", pos = 4)
arrows(x0 = 3.4, y0 = -max, x1 = 3.1 , y1 = -max, length = 0.1)
text( x = 3.4, y = -max, label = "Accretion\nof Earth", pos = 4)
arrows(x0 = 3.4, y0 = -0.57, x1 = 3.1 , y1 = -0.57, length = 0.1)
text( x = 3.4, y = -0.57, label = "Macro-\nfossils", pos = 4)
lines(c(3.1,3.1),c(-3.9,-4.2))
lines(c(3.1,3.4),c(-4.05,-4.05))
text( x = 3.4, y = -4.05, lab = "Last\nSterilizing\nImpact", pos = 4)
If you have any problems or comments, please contact
Jean Lobry .