bootstrap codon alignment

Codon models start being used in the field of phylogenetic reconstruction. They were implemented by both bayesian method, like MrBayes and Beast, as well as ML method, like GARLI, IQPNNI and CodonPhyML. Although there are some approximate way to estimate tree reliability, bootstrap is still the most common used one in communication. However, some tools, for example CodonPhyML, do not provide tools to bootstrap codon independently. Bootstrap can cost a huge amount of time, even though they are sometimes implemented in a parallel way, which may not apply to servers of all users (like me ;-)). The easiest way would be just running bootstraps independently then generating consensus tree to get bootstrap value. Therefore, I wrote a small (dark and dirty) code to bootstrap codon alignment by R.

require("ape")
dnaToCodon <- function(dna.indices){
  #if(!is.matrix(dna.indices) || length(dna.indices) != 3) {
  #  stop("\"dna.indices\" must be a matrix of length 3")
  #}
  
  ncl <- ncol(dna.indices)
  nrw <- nrow(dna.indices)
  ncl <- ncl / 3
  codon <- matrix(nrow=nrw, ncol=ncl)
  
  for (j in seq(1, ncl)) {
    for (i in seq(1, nrw)) {
      codon[i,j] <- paste(dna.indices[i,(3*j-2)], 
                          dna.indices[i,(3*j-1)], 
                          dna.indices[i,(3*j)], 
                          sep="")
    }
  }
  row.names(codon) <- row.names(dna.indices)
  return(codon)
}

bootstrapCodon <- function(codon.indices) {
  c <- ncol(codon.indices)
  r <- nrow(codon.indices)
  rcodon <- matrix(nrow=r, ncol=c)
  sample.col <- sample(seq(1,c), c, replace=T)
  
  for (i in seq(1, c)) {
    rcodon[,i] <- codon.indices[,sample.col[i]]
  }
  
  row.names(rcodon) <- row.names(codon.indices)
  return(rcodon)
    
}

codonToDna <- function(codon.indices) {
  nrw <- nrow(codon.indices)
  ncl <- ncol(codon.indices)
  
  dna <- matrix(nrow=nrw, ncol=ncl*3)
  
  for (i in seq(1, nrw)) {
    for (j in seq(1, ncl)) {
      codonstr <- strsplit(codon.indices[i,j], split="")
      dna[i,(j*3-2)] <- codonstr[[1]][1]
      dna[i,(j*3-1)] <- codonstr[[1]][2]
      dna[i,(j*3)] <- codonstr[[1]][3]
    }
  }
  
  row.names(dna) <- row.names(codon.indices)
  
  return(dna)
}

Then, if I have a alignment codon by codon, I can bootstrap it with following script.

require("ape")
source("codon.R")

for (i in seq(1,100)) {
  my.align <- read.dna("seq.msa.phy", as.character=T)
  my.codon.align <- dnaToCodon(my.align)
  boots.codon <- bootstrapCodon(my.codon.align)
  boots.align <- codonToDna(boots.codon)
  file <- paste("./bootstrap/seq.msa.boot", i, ".fasta", sep="")
  write.dna(boots.align, file, format="fasta", colsep="")
}
__END__
Advertisements