## compareCstat package structure
# File: R/compare_c_stat.R

#' Compare C-statistics Between Two Models with Bootstrapped or Uno's C Confidence Intervals
#'
#' This function compares the C-statistics of two fitted models using either bootstrap resampling
#' (Harrell's C) or Uno's C via perturbation-resampling (survC1 package).
#'
#' @param model_raw A fitted model (e.g., coxph) representing the base model.
#' @param model_ext A fitted model (e.g., coxph) representing the extended model.
#' @param data The dataset used for fitting the models.
#' @param R Number of bootstrap or perturbation-resampling replications. Default is 100.
#' @param ci_type Type of confidence interval to return ("perc", "norm", "basic", etc., for Harrell's C).
#' @param method Which C-statistic to use: "Harrell" (default) or "Uno".
#' @param tau Truncation time for Uno's C (default is max observed time in your data).
#'
#' @return A data frame showing C-statistics for each model, their confidence intervals, and the p-value for the difference.
#' @importFrom boot boot boot.ci
#' @importFrom stats formula terms pnorm update
#' @examples
#' library(survival)
#' data(lung)
#' lung$status <- ifelse(lung$status == 2, 1, 0)
#' model1 <- coxph(Surv(time, status) ~ age, data = lung)
#' model2 <- coxph(Surv(time, status) ~ age + sex, data = lung)
#' compare_c_stat(model1, model2, data = lung, R = 10, method = "Harrell")
#' compare_c_stat(model1, model2, data = lung, R = 10, method = "Uno")
#' compare_c_stat(model1, model2, data = lung, R = 10, method = "Uno", tau = 365.25*2)
#' @references
#' Uno H, Cai T, Pencina MJ, D'Agostino RB, Wei LJ. (2011) On the C-statistics for evaluating overall adequacy of risk prediction procedures with censored survival data. \emph{Statistics in Medicine}, 30(10):1105-1117. \doi{10.1002/sim.4154}
#' @export
compare_c_stat <- function(model_raw, model_ext, data, R = 10, ci_type = "perc", method = "Harrell", tau = NULL) {

  if (method == "Harrell") {
    stat_function_model <- function(model, data, indices) {
      d <- data[indices, ]
      model_refit <- update(model, data = d)
      return(summary(model_refit)$concordance[1])
    }

    get_ci_bounds <- function(ci_obj) {
      if (ci_type == "perc") {
        return(c(round(ci_obj$percent[4], 3), round(ci_obj$percent[5], 3)))
      } else {
        stop("Only 'perc' CI type currently supported. Modify to support other types.")
      }
    }

    boot_results_raw <- boot::boot(
      data = data,
      statistic = function(data, indices) stat_function_model(model_raw, data, indices),
      R = R
    )
    c_stat_raw <- summary(model_raw)$concordance[1]
    ci_raw <- boot::boot.ci(boot_results_raw, type = ci_type)

    boot_results_ext <- boot::boot(
      data = data,
      statistic = function(data, indices) stat_function_model(model_ext, data, indices),
      R = R
    )
    c_stat_ext <- summary(model_ext)$concordance[1]
    ci_ext <- boot::boot.ci(boot_results_ext, type = ci_type)

    stat_function_c_stat_diff <- function(data, indices) {
      d <- data[indices, ]
      model_raw_refit <- update(model_raw, data = d)
      model_ext_refit <- update(model_ext, data = d)
      return(summary(model_ext_refit)$concordance[1] - summary(model_raw_refit)$concordance[1])
    }

    boot_results_diff <- boot::boot(
      data = data,
      statistic = stat_function_c_stat_diff,
      R = R
    )
    c_stat_diff <- mean(boot_results_diff$t)
    ci_diff <- boot::boot.ci(boot_results_diff, type = ci_type)

    ci_raw_bounds <- get_ci_bounds(ci_raw)
    ci_ext_bounds <- get_ci_bounds(ci_ext)
    ci_diff_bounds <- round(get_ci_bounds(ci_diff), 4)

    p_val_raw <- 2 * min(mean(boot_results_diff$t >= 0), 1 - mean(boot_results_diff$t >= 0))
    p_val_display <- ifelse(p_val_raw < 0.001, "<0.001", sprintf("%.3f", p_val_raw))

  } else if (method == "Uno") {
    stopifnot(requireNamespace("survC1", quietly = TRUE))

    # 1. Extract response and covariate names from model formula
    formula_raw <- formula(model_raw)
    formula_ext <- formula(model_ext)

    # Get Surv(time, status) variable names
    surv_call <- as.list(formula_raw[[2]])
    time_var <- as.character(surv_call[[2]])
    status_var <- as.character(surv_call[[3]])

    # Covariate names for both models
    covs0_terms <- attr(terms(formula_raw), "term.labels")
    covs1_terms <- attr(terms(formula_ext), "term.labels")

    # Covariate matrices (no intercept)
    covs0 <- as.matrix(data[, covs0_terms, drop = FALSE])
    covs1 <- as.matrix(data[, covs1_terms, drop = FALSE])

    # Time & status vectors
    time <- data[[time_var]]
    status <- data[[status_var]]

    # Ensure status coding is correct
    if (!all(status %in% c(0, 1))) {
      stop("Status must be coded 0 = censored, 1 = event for Uno C.")
    }

    # Data for Inf.Cval.Delta
    mydata <- data.frame(time = time, event = status)

    # Set tau if not provided
    if (is.null(tau)) tau <- max(time, na.rm = TRUE)

    # Run Uno's C index comparison
    uno.C.delta <- survC1::Inf.Cval.Delta(
      mydata = mydata,
      covs0 = covs0,
      covs1 = covs1,
      tau = tau,
      itr = R
    )

    # uno.C.delta is a 3x4 matrix (Model1, Model0, Delta; Est, SE, Lower95, Upper95)
    # Calculate Wald p-value for Delta
    delta_est <- uno.C.delta["Delta", "Est"]
    delta_se  <- uno.C.delta["Delta", "SE"]
    z         <- delta_est / delta_se
    p_val     <- 2 * (1 - pnorm(abs(z)))
    p_val_display <- ifelse(p_val < 0.001, "<0.001", sprintf("%.3f", p_val))

    # "Model1" is the extended, "Model0" is the raw/base
    c_stat_raw  <- uno.C.delta["Model0", "Est"]
    ci_raw_bounds <- round(uno.C.delta["Model0", c("Lower95", "Upper95")], 4)
    c_stat_ext  <- uno.C.delta["Model1", "Est"]
    ci_ext_bounds <- round(uno.C.delta["Model1", c("Lower95", "Upper95")], 4)
    c_stat_diff <- delta_est
    ci_diff_bounds <- round(uno.C.delta["Delta", c("Lower95", "Upper95")], 4)

  } else {
    stop("Invalid method specified. Use 'Harrell' or 'Uno'.")
  }

  # Output table (always in same format)
  result_table <- data.frame(
    Model = c("Model 1 (Raw)", "Model 2 (Extended)", "Difference"),
    C_Statistic = c(round(c_stat_raw, 4), round(c_stat_ext, 4), round(c_stat_diff, 4)),
    CI_Lower = c(ci_raw_bounds[1], ci_ext_bounds[1], ci_diff_bounds[1]),
    CI_Upper = c(ci_raw_bounds[2], ci_ext_bounds[2], ci_diff_bounds[2]),
    P_Value = c(NA, NA, p_val_display)
  )

  return(result_table)
}
