Lobry & Necşulea (2006) Gene 385:128-136
This page allows for the on-line reproduction of the figures in the paper:
Lobry, J.R. ,
Necşulea, A. (2006)
Synonymous Codon Usage and its Potential Link with Optimal Growth Temperature in Prokaryotes.
Gene ,
385 :128-136.
Abstract:
The relationship between codon usage in prokaryotes and their ability to grow at extreme temperatures has been given much attention over the past years. Previous studies have indicated that the difference in synonymous codon usage between thermophiles and mesophiles is a consequence of a selective pressure linked to growth temperature.
Here, we have performed an updated analysis of the variation in synonymous codon usage with growth temperature; our study includes a large number of species from a wide taxonomic and growth temperature range. The presence of psychrophilic species in our study allowed us to test whether the same selective pressure acts on synonymous codon usage at very low growth temperature.
Our results show that the synonymous codon usage for Arg (through the AGG, AGA and CGT codons) is the most discriminating between thermophilic and non-thermophilic species, thus confirming the previous studies. We report the unusual clustering of an Archaeal psychrophile with the thermophilic and hyperthermophilic species on the synonymous codon usage factorial map; the other psychrophiles in our studies cluster with the mesophilic species.
Our conclusion is that the difference in synonymous codon usage between thermophilic and non-thermophilic species cannot be clearly attributed to a selective pressure linked to growth at high temperatures. We propose an alternative point of view to explain the observed pattern.
1. First factorial map for synonymous codon usage analysis
2. First factorial map for synonymous codon usage analysis in codon space
#
# Load data for codon usage:
#
uco.january <- read.table("http://pbil.univ-lyon1.fr/members/lobry/repro/gene06/uco.january.curated.table")
uco.january$species <- as.character(uco.january$species)
species <- uco.january$species
carsonella <- grep("carsonella ruddii",species,ignore.case=T)
# curated = standard genetic codes only + removal of ambiguous species, such as "anabaena", "bacillus sp."
uco.january <- uco.january[-grep("carsonella ruddii",species,ignore.case=T),]
species <- species[-grep("carsonella ruddii",species,ignore.case=T)]
rownames(uco.january) <- as.character(species)
uco.january <- uco.january[,3:66]
uco.january <- uco.january[ , -which(colnames(uco.january) %in% c("taa", "tag", "tga"))]
#
# Compute within and between analyses:
#
library(ade4)
library(seqinr)
facaa <- as.factor(aaa(unlist(lapply(colnames(uco.january), function(x) translate(s2c(x))))))
afc <- dudi.coa(uco.january, scann = FALSE, nf = 2)
wit <- t(within(t(afc), scann = FALSE, nf = 2, fac = facaa))
btw <- t(between(t(afc), scann = FALSE, nf = 2, fac = facaa))
ratio <- within(t(afc), scann = FALSE, nf = 2, fac = facaa)$ratio
#
# Plot the figure
#
par(mgp = c(1, 0, 0), mar = c(2, 2, 0, 0) + 0.1)
plot(0,
xlab = paste("First Factor (", round(100*wit$eig[1]/sum(wit$eig), 1), "%)", sep = ""),
ylab = paste("Second Factor (", round(100*wit$eig[2]/sum(wit$eig), 1), "%)", sep = ""),
type = "n",
xaxt = "n", yaxt = "n",
xlim = range(wit$co[,1]), ylim = range(wit$co[,2]))
sapply(sapply(rownames(wit$co), strsplit, split = ""), "[", 3) -> base3
sapply(base3, function(x) {ifelse(x == "c" || x == "g", "blue","red")}) -> base3col
text(x = wit$co[, 1], y = wit$co[ , 2], toupper(rownames(wit$co)), cex = 1.5,
col = "black")
X <- as.matrix(wit$co[base3col == "red", 1:2])
hpts <- chull(X)
hpts <- chull(X)
hpts <- c(hpts, hpts[1])
lines(X[hpts, ], col = "black")
X <- as.matrix(wit$co[base3col == "blue", 1:2])
hpts <- chull(X)
hpts <- chull(X)
hpts <- c(hpts, hpts[1])
polygon(X[hpts, ], density = 5, col = "black")
3. Evolution of condon frequencies with G+C content for the most salient codons
#
# Load data for codon usage:
#
uco.january <- read.table("http://pbil.univ-lyon1.fr/members/lobry/repro/gene06/uco.january.curated.table")
uco.january$species <- as.character(uco.january$species)
species <- uco.january$species
carsonella <- grep("carsonella ruddii",species,ignore.case=T)
# curated = standard genetic codes only + removal of ambiguous species, such as "anabaena", "bacillus sp."
uco.january <- uco.january[-grep("carsonella ruddii",species,ignore.case=T),]
species <- species[-grep("carsonella ruddii",species,ignore.case=T)]
rownames(uco.january) <- as.character(species)
uco.january <- uco.january[,3:66]
uco.january <- uco.january[ , -which(colnames(uco.january) %in% c("taa", "tag", "tga"))]
#
# Compute G+C content:
#
library(seqinr)
gccodon <- function(codon) {
tmp <- s2c(codon)
res <- 0
for(i in 1:3)
if( tmp[i] == "c" || tmp[i] == "g") res <- res +1
return(res)
}
tmp <- sapply(colnames(uco.january),gccodon)
gc.january <- 100 * (as.matrix(uco.january) %*% tmp) / (3*rowSums(uco.january))
#
# Load data about domains:
#
domaine <- read.table("http://pbil.univ-lyon1.fr/members/lobry/repro/gene06/topt.allsources.january.table", header = TRUE)$domain
domaine=domaine[-carsonella]
#
# Load data about Topt:
#
topt.january <- read.table("http://pbil.univ-lyon1.fr/members/lobry/repro/gene06/toptsummary.january.table", h=T)
topt.january$species <- as.character(topt.january$species)
topt.january <- topt.january[-grep("carsonella ruddii",topt.january$species,ignore.case=T),]
topt <- topt.january$toptmean
#
# Define color to be used:
#
colpsy <- "blue"
colmes <- "palegreen2"
colther <- "orange"
colhyp <- "red"
thema <- "black"
#
# Plot the figure
#
tmpplot <- function(codon, data = uco.january, ...)
{
par(mgp = c(2.5, 1, 0), mar = c(4, 4, 2, 0) + 0.1)
y <- 100*data[ , codon]/rowSums(data)
plot(gc.january, y, main = toupper(codon),
xlab = "G+C content [%]", ylab = "Codon frequency [%]", las = 1,
col.main = thema, col.lab = thema, col.axis = thema,
bg = colmes, col = "black", pch = 21, cex = 1.5, fg = thema,
cex.lab = 1.5, ...)
points(gc.january[topt>=59], y[topt>=59], bg = colther,col = "black", cex = 1.5, pch = 21)
points(gc.january[topt>=80], y[topt>=80], bg = colhyp, col = "black", cex = 1.5, pch = 21)
points(gc.january[topt<=20], y[topt<=20], bg = colpsy, col = "black", cex = 1.5, pch = 21)
}
op <- par(no.readonly = TRUE)
par(mfrow=c(2,2))
tmpplot("agg")
tmpplot("ata")
tmpplot("aga")
tmpplot("cgt")
par(op)
4. Evolution of condon frequencies (in perthousand) with temperature
for WWS, SSW and AGG codons
#
# Load data for codon usage:
#
uco.january <- read.table("http://pbil.univ-lyon1.fr/members/lobry/repro/gene06/uco.january.curated.table")
uco.january$species <- as.character(uco.january$species)
species <- uco.january$species
carsonella <- grep("carsonella ruddii",species,ignore.case=T)
# curated = standard genetic codes only + removal of ambiguous species, such as "anabaena", "bacillus sp."
uco.january <- uco.january[-grep("carsonella ruddii",species,ignore.case=T),]
species <- species[-grep("carsonella ruddii",species,ignore.case=T)]
rownames(uco.january) <- as.character(species)
uco.january <- uco.january[,3:66]
uco.january <- uco.january[ , -which(colnames(uco.january) %in% c("taa", "tag", "tga"))]
#
# Load data about Topt:
#
topt.january <- read.table("http://pbil.univ-lyon1.fr/members/lobry/repro/gene06/toptsummary.january.table", h=T)
topt.january$species <- as.character(topt.january$species)
topt.january <- topt.january[-grep("carsonella ruddii",topt.january$species,ignore.case=T),]
topt <- topt.january$toptmean
#
# Plot the figure
#
ucof <- uco.january/rowSums(uco.january)
iupac <- function(base){
if(base %in% c("A", "C", "G", "T")) return(tolower(base))
if(base == "W") return( c("a", "t") )
if(base == "S") return( c("c", "g") )
if(base == "B") return( c("c", "g", "t"))
if(base == "D") return( c("a", "g", "t"))
if(base == "H") return( c("a", "c", "t"))
if(base == "V") return( c("a", "c", "g"))
if(base == "N") return( c("a", "c", "g", "t"))
if(base == "R") return( c("a", "g"))
if(base == "Y") return( c("c", "t"))
}
getcodonlist <- function(codon){
kronecker(iupac(substr(codon,1,1)),
kronecker(iupac(substr(codon,2,2)), iupac(substr(codon,3,3)), paste, sep = ""), paste, sep = "")
}
factemp <- cut(x = topt, breaks <- c(0, 20.1,59,80, 120),
label = c("psychro", "meso", "thermo", "hyper"), right = FALSE)
plotbasak <- function(codon, ...) {
y <- 1000*rowSums(ucof[, which(colnames(ucof) %in% getcodonlist(codon)), drop = FALSE])
boxplot(y~factemp, las = 1, notch = TRUE, varwidth = TRUE,
pars = list(boxwex = 1, staplewex = 0.5,
outwex = 0.5), ...)
legend("topleft", codon)
}
par(mfrow = c(3,1), mar = c(2, 4, 0, 1) + 0.1)
plotbasak("WWS", col = grey(0.8))
plotbasak("SSW", col = grey(0.8))
plotbasak("AGG", col = grey(0.8))
If you have any problems or comments, please contact
Jean Lobry .