# files can be dowloaded from 
# http://www.biomedcentral.com/content/supplementary/1471-2407-6-96-S1.xls
# http://www.biomedcentral.com/content/supplementary/1471-2407-6-96-S3.xls

library(curvclust)
library(WECCA)
library(CGHcall)
library(CGHbase)

data    = read.csv("1471-2407-6-96-s3.csv",dec=",",na.string="NA")
data    = data[,-c(2:4)]
data    = data[data$Chrom!=23,]
clinic  = read.csv("1471-2407-6-96-s1.csv",dec=",",na.string="NA")

#### remove individual without clinical info
A = data[,1:3]
z = c()
for (i in c(4:dim(data)[2])){
  if (colnames(data)[i] %in% as.character(clinic$Case[!is.na(clinic$follow.up..yrs.)])){
    A = cbind(A,data[,i])
    z = c(z,colnames(data)[i])
  }
}
colnames(A) = c(colnames(data[,1:3]),z)

data    = A
clinic  = clinic[-which(is.na(clinic$follow.up..yrs.)),]
n       = dim(data)[2]
M       = dim(data)[1]
colnames(data)[1:3] = c("BAC.clone","CHROMOSOME","START_POS")
dfraw   = as.data.frame(data)
NoBpEnd = TRUE 
if(NoBpEnd)
  dfraw = data.frame(dfraw[,1:3],bpend=dfraw[,3]+60,dfraw[,-(1:3)])
raw       = cghRaw(dfraw)
prep      = preprocess(raw, maxmiss = 30, nchrom = 23)

# median normalized
nor          = normalize(prep,method = "median", cellularity = 1, smoothOutliers = TRUE)
n            = 55
A            = cbind(featureData(nor)@data,sapply(1:n,FUN=function(i){copynumber(nor[,i])}))
A$Chromosome = factor(paste("chr",A$Chromosome,sep=""))
colnames(A)  = c("Chromosome","Start","End",sapply(1:n,FUN=function(i){colnames(copynumber(nor[,i]))}))

# format for curvclust
n                = dim(A)[2]-3
data.list        = lapply(1:n,FUN=function(i){A[,i+3]})
names(data.list) = colnames(A)[-c(1:3)]
CCD       = new("CClustData",data.list,filter.number=1)
CCD       = getUnionCoef(CCD)
CCO       = new("CClustO")
CCO["burn"]       = 500
CCO["init"]       = "rEM"
CCO["nbclust"]    = 5
CCO["Gamma2.structure"] = "group"


Res  = list()
BIC  = c()
ICL  = c()
LLik = c()

load(paste("fridlyand-FCMM-global-nbclust-1-Gamma2-constant.RData",sep=""))
BIC[1]  = getBIC(CCR,CCD)
ICL[1]  = getICL(CCR,CCD)
LLik[1] = CCR@loglik

for (ell in c(2:6)){
  load(paste("fridlyand-FCMM-global-nbclust-",ell,"-Gamma2-",CCO@Gamma2.structure,".RData",sep=""))
  Res[[ell]] = CCR
  BIC[ell]   = getBIC(CCR,CCD)
  LLik[ell]  = CCR@loglik
  ICL[ell]   = getICL(CCR,CCD)
}

plot(LLik,type="b",ylim=c(min(ICL)-1,max(LLik)+1))
lines(BIC,type="b",col="red")
lines(ICL,type="b",col="blue")

ell     = which.min(ICL)
load(paste("fridlyand-FCMM-global-nbclust-",ell,"-Gamma2-",CCO@Gamma2.structure,".RData",sep=""))
label   = data.frame(Case = names(apply(CCR@Tau,1,which.max)),label = apply(CCR@Tau,1,which.max))
clinic2 = merge(clinic, label,by="Case")
table(clinic2$CGH.subtype,clinic2$label)



