# require(anesrake)

#' @import weights
#' @import anesrake
#' @import graphics
#' @import grDevices
#' @import methods
#' @importFrom stats dbinom mad qbeta qnorm sd uniroot

# internal function to add region to plot given by lower and upper lines
addRegion <- function(x,l,u,col,transparency=0.3) {
	newcol <- col2rgb(col)/255
	newcol <- rgb(newcol[1],newcol[2],newcol[3],transparency)
	polygon(c(rev(x),x), c(rev(u),l), col=newcol, border=NA)
}

#' Statification of input data matrix into given strata
#' 
#' @param x Input data matrix.
#' @param S Strata by row in matrix S, with 2 columns per variable aka startpoint [included] and endpoint [excluded].
#' @param index Index of the stratum in S.
#' 
#' @return Vector of indices belong to the given stratum
#' 
#' @importFrom Rdpack reprompt
#' @references .
#' 
#' @examples
#' require(chartreview)
#' x <- matrix(runif(10),ncol=1)
#' strata <- (0:10)/10
#' S <- cbind(strata[-length(strata)],strata[-1])
#' print(stratum(x,S,1))
#' 
#' @export
stratum <- function(x,S,index) {
	if(is.null(x)) {
		return(NULL)
	}
	else {
		indices <- rep(TRUE,nrow(x))
		# go through all columns of x and check intervals in S
		for(j in 1:ncol(x)) {
			indices <- indices & ( (S[index,2*j-1]<=x[,j]) & (x[,j]<S[index,2*j]) )
		}
		return(which(indices))
	}
}

#' Lai confidence sequence for binomial quantity
#' 
#' @param n Number of experiments
#' @param x Observed number of successes.
#' @param alpha Error probability.
#' 
#' @return Binomial confidence interval.
#' 
#' @importFrom Rdpack reprompt
#' @references Lai, TL (1976). On Confidence Sequences. Ann Statist 4(2):265-280.
#' 
#' @examples
#' require(chartreview)
#' print(lai(10,5,0.05))
#' 
#' @export
lai <- function(n,x,alpha) {
	f <- function(p) { (n+1)*dbinom(x=x,size=n,prob=p)-alpha }
	if(x<n-0.5) U <- uniroot(f,lower=x/n,upper=1)$root
	else U <- 1
	if(x>0.5) L <- uniroot(f,lower=0,upper=x/n)$root
	else L <- 0
	return(c(L,U))
}

#' Normal confidence interval for continuous quantity
#' 
#' @param x Vector of samples.
#' @param a Error probability.
#' 
#' @return Normal confidence interval.
#' 
#' @importFrom Rdpack reprompt
#' @references .
#' 
#' @examples
#' require(chartreview)
#' x <- rnorm(10)
#' print(normalci(x,0.05))
#' 
#' @export
normalci <- function(x,a) {
	n <- length(x)
	return(c(mean(x)+qnorm(a/2)*sd(x)/sqrt(n),mean(x)+qnorm(1-a/2)*sd(x)/sqrt(n)))
}

#' Bayesian credible interval for binomial quantity
#' 
#' @param k Number of experiments.
#' @param S Observed number of successes.
#' @param alpha Level.
#' 
#' @return Bayesian credible interval.
#' 
#' @importFrom Rdpack reprompt
#' @references .
#' 
#' @examples
#' require(chartreview)
#' print(credibleinterval(10,5,0.05))
#' 
#' @export
credibleinterval <- function(k,S,alpha) {
	# get quantiles of Beta posterior
	l <- qbeta(alpha/2,shape1=1+S,shape2=1+k-S)
	u <- qbeta(1-alpha/2,shape1=1+S,shape2=1+k-S)
	return(c(l,u))
}

#' Check if some interval is a subset of another interval
#' 
#' @param x First interval given by tuple.
#' @param y Second interval given by tuple.
#' 
#' @return Boolean answer if "x subseteq y".
#' 
#' @importFrom Rdpack reprompt
#' @references .
#' 
#' @examples
#' require(chartreview)
#' x <- sort(runif(2))
#' y <- sort(runif(2))
#' print(subsetInterval(x,y))
#' 
#' @export
subsetInterval <- function(x,y) {
	(x[1]>=y[1]) & (x[2]<=y[2])
}

#' Different options for the stopping criterion
#' 
#' @param ci Confidence interval as tuple vector.
#' @param stopCI Either a confidence interval for stoppingoption=1 and stoppingoption=2, or a scalar for stoppingoption=3.
#' @param stoppingoption Option to determine if the stopping criterion is satisfied (1 for confidence interval included in stopCI, 2 for upper bound below or lower bound above stopCI, 3 for length restriction on confidence interval).
#' 
#' @return Boolean answer if stopping criterion reached.
#' 
#' @importFrom Rdpack reprompt
#' @references .
#' 
#' @examples
#' require(chartreview)
#' stoppingcriterion(c(0.5,0.6), c(0.7,0.8), stoppingoption=1)
#' 
#' @export
stoppingcriterion <- function(ci,stopCI,stoppingoption=2) {
	# ci included in stopCI
	if(stoppingoption==1) return(subsetInterval(ci,stopCI))
	# (lower>=stopCI[1]) then return(1) or (upper<=stopCI[2]) then return(2) or both then return(3)
	if(stoppingoption==2) return( (ci[1]>=stopCI[1]) + 2*(ci[2]<=stopCI[2]) )
	# length of ci
	if(stoppingoption==3) return( abs(ci[2]-ci[1])<=stopCI )
}

#' Adaptive sampling algorithm which implements several types of sampling strategies
#' 
#' @param dat1 First dataset on which the strata are computed.
#' @param S Matrix defining the strata.
#' @param dat2 Second dataset on which confidence intervals are computed.
#' @param mode Sampling mode (1 for random sampling, 2 for stratified random sampling, 3 for Neyman's sampling).
#' @param batchsize Batch size in each wave.
#' @param raking Boolean flag to switch on raking.
#' @param rakingmode Option for raking (1 for random sampling, 2 for deterministic allocation, 3 for residual resampling).
#' @param rakingthreshold Threshold for applying raking to a stratum.
#' @param sdEstimate The estimate of the standard deviation as a function handle (usually sd or mad).
#' @param minSamples Minimum number of samples used in each iteration.
#' 
#' @return List with the resampled datasets per wave.
#' 
#' @importFrom Rdpack reprompt
#' @references .
#' 
#' @examples
#' require(chartreview)
#' 
#' @export
fullrun <- function(dat1,S,dat2,mode=1,batchsize=100,raking=TRUE,rakingmode=3,rakingthreshold=0.05,sdEstimate=mad,minSamples=10) {
	dataset1 <- list()
	dataset2 <- list()
	current1 <- NULL
	current2 <- NULL
	pool <- dat1
	response <- dat2
	niter <- 0
	nstrata <- nrow(S)
	allocation <- NULL
	# proportion of strata in full dataset
	strataN <- sapply(1:nstrata,function(i) length(stratum(dat1,S,i)) )
	strataW <- strataN/sum(strataN)
	
	while(TRUE) {
		niter <- niter+1
		
		# random sampling
		if(mode==1) {
			indices <- sample(1:nrow(pool),size=min(batchsize,nrow(pool)),replace=FALSE)
			store1 <- pool[indices,]
			store1 <- matrix(store1,ncol=ncol(dat1))
			store2 <- response[indices]
			toRemove <- indices
		}
		else {
			samplesLeft <- sapply(1:nstrata,function(i) length(stratum(pool,S,i)) )
			# stratified random sampling (weighted by number of strata)
			if(mode==2) {
				n_ <- floor(batchsize/sum(samplesLeft>0)) * (samplesLeft>0)
			}
			# stratified random sampling (weighted by strata size)
			if(mode==3) {
				n_ <- floor(batchsize*strataW) * (samplesLeft>0)
			}
			# Neyman sampling
			if(mode==4) {
				# sdev=standard deviation per stratum in "dataset2"
				sdev <- numeric(nstrata)
				for(i in 1:nstrata) {
					indices <- stratum(current1,S,i)
					sdev[i] <- sdEstimate(current2[indices])
				}
				sdev[is.na(sdev) | (sdev==0)] <- 1
				# prepare formula
				N <- strataN
				fraction <- N*sdev/sum(N*sdev)
				fraction[is.na(fraction)] <- 0
				if(sum(fraction)==0) fraction <- rep(1,nstrata)
				# formula
				n_ <- batchsize*fraction
				n_[n_<minSamples] <- minSamples
				if(sum(n_)==0) n_ <- rep(1,nstrata)
				# reweighting to spend exactly "batchsize"
				n_ <- floor(n_*batchsize/sum(n_))
			}
			# sampling
			store1 <- NULL
			store2 <- NULL
			toRemove <- NULL
			toSample <- pmin(n_,samplesLeft)
			allocation <- rbind(allocation,toSample)
			for(i in 1:nstrata) {
				indices <- sample(stratum(pool,S,i),size=toSample[i],replace=FALSE)
				temp <- pool[indices,]
				temp <- matrix(temp,ncol=ncol(dat1))
				store1 <- rbind(store1,temp)
				store2 <- c(store2,response[indices])
				toRemove <- c(toRemove,indices)
			}
		}
		
		# raking
		if(raking) {
			# vector with stratum ID for each entry in the sample seen so far
			tempdat <- rbind(current1,store1)
			indices1 <- numeric(nrow(tempdat))
			for(i in 1:nstrata) {
				temp <- stratum(tempdat,S,i)
				indices1[temp] <- i
			}
			# filter for only those strata that occur
			index_filter <- sort(unique(indices1))
			targets <- list(indices=strataW[index_filter])
			# recode indices1 to start from 1
			indices1 <- sapply(indices1, function(i) which(i==index_filter))
			rakingdat <- list(indices=indices1,caseid=1:length(indices1))
			# compute offset
			varoffset <- anesrakefinder(targets, data.frame(rakingdat), choosemethod="total")
			varoffset <- as.numeric(varoffset)
			# raking
			if(varoffset>rakingthreshold) {
				res <- tryCatch({
					rakingres <- anesrake(targets, data.frame(rakingdat), caseid = rakingdat$caseid, choosemethod="total", type="pctlim", pctlim=rakingthreshold, verbose=F,force1=F)
				}, warning=function(w) {w}, error=function(e) {e})
				issue <- is(res,"warning") | is(res,"error")
				if(!issue) {
					# reweigh store2 aka replace by artificial copy
					prop <- 0
					res <- tryCatch({
						prop <- wpct(store2,rakingres$weightvec)
					}, warning=function(w) {w}, error=function(e) {e})
					issue <- is(res,"warning") | is(res,"error")
					if( (!issue) & (length(prop)==2) ) {
						# option1: random sampling
						if(rakingmode==1) {
							store2 <- sample(0:1,size=length(store2),prob=prop,replace=TRUE)
						}
						# option2: deterministic allocation
						if(rakingmode==2) {
							num0 <- round(prop[1]*length(store2))
							store2 <- c( rep(0,num0), rep(1,length(store2)-num0) )
						}
						# option3: residual resampling
						if(rakingmode==3) {
							num <- floor(prop*length(store2))
							size <- length(store2)-sum(num)
							prob <- prop*length(store2)-num
							if(sum(prob)>0)	residual <- sample(0:1,size=size,prob=prob,replace=TRUE)
							else residual <- NULL
							store2 <- c( rep(0,num[1]), rep(1,num[2]), residual)
						}
					}
				}
			}
		}
		
		# save in list
		dataset1[[niter]] <- store1
		dataset2[[niter]] <- store2
		current1 <- rbind(current1,store1)
		current2 <- c(current2,store2)
		# remove used samples
		pool <- pool[-toRemove,]
		pool <- matrix(pool,ncol=ncol(dat1))
		response <- response[-toRemove]
		
		# break sampling if pool empty
		if(nrow(pool)==0) { break }
	}
	return(list(dataset1=dataset1,dataset2=dataset2,dat2=dat2,batchsize=batchsize))
}

#' Generate plots on confidence intervals and prediction
#' 
#' @param dataset2 The output dataset of the function 'fullrun'.
#' @param dat2 Second dataset on which confidence intervals are computed, see function 'fullrun'.
#' @param optionCI Parameter to switch between confidence intervals (1 for Lai's confidence bands, 2 for Bayesian credible intervals, 3 for normal confidence intervals).
#' @param stopCI The stopping bounds.
#' @param alpha The error used to compute confidence bands.
#' @param stoppingoption Type of stopping criterion (1 for confidence interval included in stopCI, 2 for upper bound below or lower bound above stopCI, 3 for length restriction on confidence interval).
#' @param xlim Optional parameter to set x-axis in plots.
#' @param ylim Optional parameter to set y-axis in plots.
#' @param main Optional parameter to set title of plots.
#' @param makePlot Parameter to control plot output.
#' 
#' @return List with confidence intervals (slot CIs), the stopping point (slot stopline), and the reason for stopping (stopreason, see function 'stoppingcriterion').
#' 
#' @importFrom Rdpack reprompt
#' @references .
#' 
#' @examples
#' require(chartreview)
#' 
#' @export
makeplot <- function(dataset2,dat2,optionCI=1,stopCI=NULL,alpha=0.05,stoppingoption=2,xlim=NULL,ylim=NULL,main=NULL,makePlot=TRUE) {
	# compute confidence intervals
	niter <- length(dataset2)
	pointestimate <- numeric(niter)
	CIs <- matrix(0,nrow=niter,ncol=2)
	pred <- matrix(NA,nrow=niter,ncol=3)
	temp <- NULL
	stopline <- NULL
	stopreason <- NULL
	
	# compute metrics
	for(i in 1:niter) {
		# update confidence interval
		temp <- c(temp, dataset2[[i]] )
		pointestimate[i] <- mean(temp)
		if(optionCI==1) CIs[i,] <- lai(length(temp),sum(temp),alpha)
		if(optionCI==2) CIs[i,] <- credibleinterval(length(temp),sum(temp),alpha)
		if(optionCI==3) CIs[i,] <- normalci(temp,alpha/niter)
		if(is.null(stopline) & !is.null(stopCI)) {
			stopreason <- stoppingcriterion(CIs[i,],stopCI,stoppingoption=stoppingoption)
			if(stopreason>0) {
				stopline <- i
			}
		}
	}
	
	if(makePlot) {
		# confidence intervals
		x <- 1:niter
		plot(x,pointestimate,type="l",lty=1,col="black",xlab="batch number",ylab="confidence interval",xlim=xlim,ylim=ylim,main=main)
		addRegion(x,CIs[,1],CIs[,2],col="red")
		
		# vertical stopping line
		abline(v=stopline,col="black",lty=2)
		
		# use all samples (horizonal)
		if(optionCI==1) oneCI <- lai(length(dat2),sum(dat2),alpha)
		if(optionCI==2) oneCI <- credibleinterval(length(dat2),sum(dat2),alpha)
		if(optionCI==3) oneCI <- normalci(dat2,alpha)
		#abline(h=oneCI,col="black",lty=3)
		
		# horizonal stopCI
		abline(h=stopCI,col="black",lty=3)
	}
	
	return(list(CIs=CIs,stopline=stopline,stopreason=stopreason))
}
