#
#  event : A Library of Special Functions for Event Histories
#  Copyright (C) 1998 J.K. Lindsey
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
#  SYNOPSIS
#
#	pp(y, censor=1)
#	ident(y, id)
#	tpast(y)
#	ttime(y, id)
#	bp(y, id, cens=1)
#	tccov(y, x, id)
#	tvcov(y, x, tx)
#	vdm(y, x, id=NULL, tx=NULL, factor=F, time=F)
#	ehr(point, p=p, lambda=NULL, linear=NULL, wt=1,
#		print.level=0, typsiz=rep(1,length(p)), ndigit=10,
#		gradtol=0.00001, stepmax=max(10*sqrt(p%*%p),10),
#		steptol=0.0004, iterlim=100, fscale=1)
#
#  DESCRIPTION
#
#    Functions for setting up and fitting counting process models

# point process created from times (y) between events
# y must contain integers
pp <- function(y, censor=1) {
	if(min(y)<=0)stop("All times must be positive")
	if(any(round(y)!=y))stop("Times must be integers")
	if(any(censor!=0&&censor!=1))
		stop("Censor indicator must be zeros and ones")
	if(length(censor)!=1&&length(censor)!=length(y))
		stop("Time and censor vectors must be the same length")
	point <- rep(0, sum(y))
	point[cumsum(y)] <- censor
	point}

# individual identification vector
ident <- function(y, id) {
	if(min(y)<=0)stop("All times must be positive")
	if(length(y)!=length(id))
		stop("Time and id vectors must be the same length")
	rep(id, y)}

# time past since previous event
tpast <- function(y) {
	if(min(y)<=0)stop("All times must be positive")
	unlist(lapply(as.list(y), seq))}

#	sequence(y)}
#sequence <- function(y) unlist(lapply(as.list(y), seq))

# total time elapsed for each individual
ttime <- function(y, id) {
	if(length(idd <- ident(y,id))==1)return(idd)
	z <- collapse(rep(1,length(idd)),idd,cumsum)
	names(z) <- NULL
	z}

# number of previous events for each individual, for birth processes
# add one if process starts at an event
bp <- function(y, id, cens=1) {
	bp1 <- function(i) c(0,cumsum(i)[1:(length(i)-1)])
	if(length(point <- pp(y, cens=cens))==1)return(point)
	if(length(idd <- ident(y, id))==1)return(idd)
	z <- collapse(point, idd, bp1)
	names(z) <- NULL
	z}

# time-constant covariate - id must be numbered consecutively
# x has one value for each distinct id
tccov <- function(y, x, id) {
	if(length(y)!=length(id))stop("Time and id must be the same length")
	if(length(x)!=length(unique(id)))
		stop("There must be one covariate value per individual")
	if(length(idd <- ident(y, id))==1)return(idd)
	x[idd]}

# time-varying covariate - tx gives the times at which x changes
# may also be used to create weight vector
tvcov <- function(y, x, tx) {
	if(min(y)<=0|min(tx)<0)stop("All times must be positive")
	if(length(x)!=length(tx))
		stop("Covariate and time vectors must be the same length")
	if(sum(y)!=sum(tx))
		stop("Total response time must equal total covariate time")
	rep(x, tx)}

# design matrix
vdm <- function(y, x, id=NULL, tx=NULL, factor=F, time=F) {
	if(time) {if(length(xx <- tvcov(y, x, tx))==1)return(xx)}
	else if(length(xx <- tccov(y, x, id))==1)return(xx)
	if(factor)xx <- factor(xx)
	wr(~xx)$design}

# fit an intensity function to event histories, where point is
# produced by point <- pp(y) and lambda is the log intensity function
ehr <- function(point, p=p, lambda=NULL, linear=NULL, wt=1,
	print.level=0, typsiz=rep(1,length(p)), ndigit=10, gradtol=0.00001,
	stepmax=max(10*sqrt(p%*%p),10), steptol=0.0004, iterlim=100, fscale=1){
	call <- sys.call()
	if(point!=0&point!=1)stop("Response vector must be zeros and ones")
	if(is.language(linear)){
		mt <- terms(linear)
		if(is.numeric(mt[[2]])){
			dm1 <- matrix(1)
			colnames(dm1) <- "(Intercept)"
			lambda1 <- function(p) p[1]}
		else {
			mf <- model.frame(mt, sys.frame(sys.parent()),
				na.action=na.fail)
			dm1 <- model.matrix(mt, mf)
			nlp1 <- dim(dm1)[2]
			if(!is.function(lambda))
				lambda1 <- function(p) dm1 %*% p[1:nlp1]
			else lambda1 <- function (p) lambda(p, dm1 %*% p[1:nlp1])}}
	else if (!is.function(lambda))
		lambda1 <- function(p) p[1]
	else lambda1 <- lambda
	if(!is.language(linear))cname <- paste("p",1:length(p),sep="")
	else cname <- colnames(dm1)
	if(length(cname)<length(p))cname <- c(cname,paste("p",(length(cname)+1):length(p),sep=""))
	fn <- function(p) {
		l <- lambda1(p)
		sum(wt*(exp(l)-point*l))}
	if(fscale==1)fscale <- fn(p)
	if (is.na(fscale))
		stop("Non-numerical function value: probably invalid initial values")
	z0 <- nlm(fn, p=p, hessian=T, print.level=print.level, typsiz=typsiz,
		ndigit=ndigit, gradtol=gradtol, stepmax=stepmax,
		steptol=steptol, iterlim=iterlim, fscale=fscale)
	if(length(p)==1){
		cov <- 1/z0$hessian
		se <- sqrt(cov)}
	else {
		a <- qr(z0$hessian)
		if(a$rank==length(p))cov <- solve(z0$hessian)
		else cov <- matrix(NA,ncol=length(p),nrow=length(p))
		se <- sqrt(diag(cov))}
	if(is.function(lambda))lambda1 <- lambda
	z1 <- list(
		call=call,
		intensity=lambda1,
		linear=linear,
		maxlike=z0$minimum,
		aic=z0$minimum+length(p),
		coefficients=z0$estimate,
		cname=cname,
		se=se,
		cov=cov,
		corr=cov/(se%o%se),
		gradient=z0$gradient,
		iterations=z0$iter,
		error=z0$error,
		code=z0$code)
	class(z1) <- "intensity"
	return(z1)}

coefficients.intensity <- function(z) z$coefficients
deviance.intensity <- function(z) 2*z$maxlike

print.intensity <- function(z) {
	np <- length(z$coefficients)
	cat("\nCall:\n",deparse(z$call),"\n\n",sep="")
	if(z$code>2)cat("Warning: no convergence - error",z$code,"\n\n")
	t <- deparse(z$intensity)
	cat("Log intensity function:",t[2:length(t)],sep="\n")
	if(is.language(z$linear))
		cat("Linear part: ",deparse(z$linear),"\n")
	cat("\n-Log likelihood   ",z$maxlike,"\n")
	cat("AIC               ",z$aic,"\n")
	cat("Iterations        ",z$iterations,"\n\n")
	cat("Coefficients:\n")
	coef.table <- cbind(z$coefficients, z$se)
	dimnames(coef.table) <- list(z$cname, c("estimate", "se"))
	print.default(coef.table, digits=4, print.gap=2)
	if(np>1){
		cat("\nCorrelations:\n")
		dimnames(z$corr) <- list(seq(1,np),seq(1,np))
		print.default(z$corr, digits=4)}
	invisible(z)}

# examples of linear log intensity functions
#exponential <- ~1
#weibull <- ~log(time(y))
#extreme.value <- ~time(y)
#birth1 <- ~bp(y,id)
#birth2 <- ~log(1+bp(y,id))

# examples of nonlinear log intensity functions
#negative.binomial <- function(p) p[1]+log(p[2]+bp(y,id))
#gen.negative.binomial <- function(p) p[1]+p[3]*log(p[2]+bp(y,id))
