## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse  = TRUE,
  comment   = "#>",
  fig.width = 7,
  fig.height = 4.5,
  fig.align = "center"
)
set.seed(42L)

## ----install, eval = FALSE----------------------------------------------------
# install.packages("kronxNBC")

## ----synthetic-data-----------------------------------------------------------
library(kronxNBC)

set.seed(42L)
n <- 300L
mk <- n / 3L

make_regime <- function(ret_fn, vol_mu, vol_sd, dd_mu, dd_sd,
                        ts_mu, ts_sd, rp_a, rp_b, rp_lambda) {
  data.frame(
    log_return         = ret_fn(mk),
    rolling_volatility = abs(rnorm(mk, vol_mu,  vol_sd)),
    drawdown           = rnorm(mk, dd_mu,  dd_sd),
    transition_stress  = abs(rnorm(mk, ts_mu,  ts_sd)),
    residence_pressure = rpois(mk, rp_lambda),
    ruin_proxy         = rbeta(mk, rp_a, rp_b)
  )
}

X_syn <- rbind(
  # Calm: near-Gaussian, low volatility, shallow drawdowns
  make_regime(
    ret_fn    = function(n) rnorm(n, 0.0002, 0.003),
    vol_mu    = 0.004, vol_sd = 0.001,
    dd_mu     = -0.002, dd_sd = 0.002,
    ts_mu     = 0.001, ts_sd = 0.0005,
    rp_a = 1, rp_b = 20, rp_lambda = 1
  ),
  # Steady: positive drift, moderate drawdowns
  make_regime(
    ret_fn    = function(n) rnorm(n, 0.0005, 0.005),
    vol_mu    = 0.008, vol_sd = 0.002,
    dd_mu     = -0.008, dd_sd = 0.004,
    ts_mu     = 0.003, ts_sd = 0.001,
    rp_a = 2, rp_b = 10, rp_lambda = 3
  ),
  # Stress: fat-tailed (t_3), high volatility, deep drawdowns
  make_regime(
    ret_fn    = function(n) rt(n, df = 3L) * 0.012,
    vol_mu    = 0.022, vol_sd = 0.005,
    dd_mu     = -0.030, dd_sd = 0.010,
    ts_mu     = 0.015, ts_sd = 0.005,
    rp_a = 5, rp_b = 3, rp_lambda = 12
  )
)
X_syn <- as.matrix(X_syn)

y_syn <- factor(
  rep(c("Calm", "Steady", "Stress"), each = mk),
  levels = c("Calm", "Steady", "Stress")
)

# 80 / 20 random split (see Section 7 for the rationale)
set.seed(7L)
tr_idx  <- sample(n, size = floor(0.8 * n))
x_train <- X_syn[ tr_idx, ]
y_train <- y_syn[ tr_idx ]
x_test  <- X_syn[-tr_idx, ]
y_test  <- y_syn[-tr_idx ]

cat("Training set:", nrow(x_train), "observations\n")
cat("Test set:    ", nrow(x_test),  "observations\n")
cat("Class balance (train):\n")
print(table(y_train))

## ----fit-model----------------------------------------------------------------
model <- student_t_naive_bayes(x_train, y_train)
summary(model)

## ----print-model--------------------------------------------------------------
print(model)

## ----tables-all---------------------------------------------------------------
tabs <- tables(model)
print(tabs)

## ----tables-subset------------------------------------------------------------
# By name
tables(model, which = "log_return")

# By index (second feature: rolling_volatility)
tables(model, which = 2L)

## ----coef---------------------------------------------------------------------
cf <- coef(model)
cf

## ----nu-matrix----------------------------------------------------------------
nu_mat <- model$params$nu
nu_mat

## ----nu-interpretation--------------------------------------------------------
cat(sprintf(
  "log_return: nu(Calm) = %g  |  nu(Steady) = %g  |  nu(Stress) = %g\n",
  nu_mat["Calm",   "log_return"],
  nu_mat["Steady", "log_return"],
  nu_mat["Stress", "log_return"]
))

cat(sprintf(
  "drawdown:   nu(Calm) = %g  |  nu(Steady) = %g  |  nu(Stress) = %g\n",
  nu_mat["Calm",   "drawdown"],
  nu_mat["Steady", "drawdown"],
  nu_mat["Stress", "drawdown"]
))

## ----plot-conditional, fig.cap = "Conditional Student-t densities by regime for all six features. Note the heavier tails on the Stress curve for log_return."----
plot(model,
     prob      = "conditional",
     arg.num   = list(col = c("steelblue", "forestgreen", "firebrick"),
                      lty = c(1L, 2L, 4L)),
     lwd       = 2)

## ----plot-marginal, fig.cap = "Marginal (prior-weighted) Student-t densities."----
plot(model,
     prob    = "marginal",
     arg.num = list(col = c("steelblue", "forestgreen", "firebrick"),
                    lty = c(1L, 2L, 4L)),
     lwd     = 2)

## ----plot-subset, fig.cap = "Conditional densities for log_return and drawdown only."----
plot(model,
     which   = c("log_return", "drawdown"),
     prob    = "conditional",
     arg.num = list(col = c("steelblue", "forestgreen", "firebrick")),
     lwd     = 2)

## ----predict-class------------------------------------------------------------
pred_class <- predict(model, newdata = x_test, type = "class")

accuracy <- mean(pred_class == y_test)
cat(sprintf("Out-of-sample accuracy: %.1f%%\n", 100 * accuracy))

## ----confusion----------------------------------------------------------------
cm <- table(Actual = y_test, Predicted = pred_class)
cm

## ----predict-prob-------------------------------------------------------------
pred_prob <- predict(model, newdata = x_test, type = "prob")
cat("First 6 test observations — posterior probabilities:\n")
round(head(pred_prob), 4)

## ----stress-alert-------------------------------------------------------------
stress_prob  <- pred_prob[, "Stress"]
alert_flag   <- ifelse(stress_prob > 0.60, "Stress Alert", "No Alert")

cat("Alert summary (test period):\n")
print(table(alert_flag))

cat("\nHighest Stress probabilities in test set:\n")
top_stress <- sort(stress_prob, decreasing = TRUE)
round(head(top_stress, 8L), 4)

## ----gnb-setup----------------------------------------------------------------
set.seed(99L)
n_stress <- 40L
n_calm   <- 20L

X_stress_test <- cbind(
  log_return         = rt(n_stress, df = 3L) * 0.012,
  rolling_volatility = abs(rnorm(n_stress, 0.022, 0.005)),
  drawdown           = rnorm(n_stress, -0.030, 0.010),
  transition_stress  = abs(rnorm(n_stress, 0.015, 0.005)),
  residence_pressure = as.numeric(rpois(n_stress, 12L)),
  ruin_proxy         = rbeta(n_stress, 5, 3)
)

X_calm_test <- cbind(
  log_return         = rnorm(n_calm, 0.0002, 0.003),
  rolling_volatility = abs(rnorm(n_calm, 0.004, 0.001)),
  drawdown           = rnorm(n_calm, -0.002, 0.002),
  transition_stress  = abs(rnorm(n_calm, 0.001, 0.0005)),
  residence_pressure = as.numeric(rpois(n_calm, 1L)),
  ruin_proxy         = rbeta(n_calm, 1, 20)
)

X_mixed_test <- rbind(X_stress_test, X_calm_test)
y_mixed_test <- factor(
  c(rep("Stress", n_stress), rep("Calm", n_calm)),
  levels = c("Calm", "Steady", "Stress")
)

## ----gnb-fit------------------------------------------------------------------
library(naivebayes)

gnb <- naive_bayes(x_train, y_train, usekernel = FALSE)

## ----gnb-compare--------------------------------------------------------------
# Student-t NBC predictions
st_pred  <- predict(model, newdata = X_mixed_test, type = "class")
st_acc   <- mean(st_pred == y_mixed_test)

# Gaussian NBC predictions
gnb_pred <- predict(gnb, newdata = X_mixed_test, type = "class")
gnb_acc  <- mean(gnb_pred == y_mixed_test)

cat(sprintf("Student-t NBC overall accuracy: %.1f%%\n", 100 * st_acc))
cat(sprintf("Gaussian  NBC overall accuracy: %.1f%%\n", 100 * gnb_acc))

## ----gnb-confusion------------------------------------------------------------
cat("Student-t NBC:\n")
table(Actual = y_mixed_test, Predicted = st_pred)

cat("\nGaussian NBC:\n")
table(Actual = y_mixed_test, Predicted = gnb_pred)

## ----gnb-recall---------------------------------------------------------------
st_stress_recall  <- mean(st_pred[y_mixed_test == "Stress"]  == "Stress")
gnb_stress_recall <- mean(gnb_pred[y_mixed_test == "Stress"] == "Stress")

cat(sprintf("Student-t NBC Stress recall: %.1f%%\n", 100 * st_stress_recall))
cat(sprintf("Gaussian  NBC Stress recall: %.1f%%\n", 100 * gnb_stress_recall))

## ----tail-density-------------------------------------------------------------
x_extreme <- quantile(
  abs(x_train[y_train == "Stress", "log_return"]),
  probs = 0.95
)
cat("Extreme log_return value:", round(x_extreme, 5), "\n")

# Student-t log-density (Stress class parameters)
mu_st  <- model$params$mu["Stress", "log_return"]
sd_st  <- model$params$sd["Stress", "log_return"]
nu_st  <- model$params$nu["Stress", "log_return"]

ld_st <- stats::dt((x_extreme - mu_st) / sd_st, df = nu_st, log = TRUE) - log(sd_st)

# Gaussian log-density (same location/scale, normal tails)
ld_gn <- stats::dnorm(x_extreme, mean = mu_st, sd = sd_st, log = TRUE)

cat(sprintf("Student-t log-density at extreme point: %+.2f  (nu = %g)\n",
            ld_st, nu_st))
cat(sprintf("Gaussian  log-density at extreme point: %+.2f\n", ld_gn))
cat(sprintf("Difference (Student-t advantage):       %+.2f log-units\n",
            ld_st - ld_gn))

## ----custom-prior-------------------------------------------------------------
# Stress is rare in real time: 10% prior weight
model_informed <- student_t_naive_bayes(
  x     = x_train,
  y     = y_train,
  prior = c(Calm = 0.50, Steady = 0.40, Stress = 0.10)
)

cat("Priors (custom):\n")
print(model_informed$prior)

# The classifier becomes more conservative about Stress alerts
prob_informed <- predict(model_informed, newdata = x_test, type = "prob")
cat("\nMean posterior P(Stress) — uniform prior:  ",
    round(mean(pred_prob[, "Stress"]), 4), "\n")
cat("Mean posterior P(Stress) — informed prior: ",
    round(mean(prob_informed[, "Stress"]), 4), "\n")

## ----custom-grid-heavy--------------------------------------------------------
model_heavy <- student_t_naive_bayes(
  x       = x_train,
  y       = y_train,
  nu_grid = c(3, 4, 5, 6, 7, 8, 10)
)
model_heavy$params$nu

## ----custom-grid-gaussian-----------------------------------------------------
model_wide <- student_t_naive_bayes(
  x       = x_train,
  y       = y_train,
  nu_grid = c(3:30, 40, 60, 100, 200, 500)
)

## ----raw-params---------------------------------------------------------------
# Regime-mean volatility (mu of rolling_volatility by class)
model$params$mu[, "rolling_volatility"]

# Tail-thickness ranking across regimes for each feature
apply(model$params$nu, 2, which.min)  # index of class with lowest nu

## ----subset-predict-----------------------------------------------------------
# Predict using only the two most important risk features
x_reduced <- x_test[, c("log_return", "drawdown")]
pred_reduced <- predict(model, newdata = x_reduced, type = "class")

cat("Accuracy on full feature set:    ",
    round(mean(pred_class == y_test), 4), "\n")
cat("Accuracy on 2-feature subset:    ",
    round(mean(pred_reduced == y_test), 4), "\n")

## ----cor-features, eval = FALSE-----------------------------------------------
# library(zoo)
# 
# es_data <- read.csv("data.csv",           stringsAsFactors = FALSE)
# decoded <- read.csv("decoded_states.csv", stringsAsFactors = FALSE)
# es_data <- es_data[!is.na(es_data$ret), ]
# n_roll  <- 24L
# 
# # 1. Rolling volatility (SD of log-returns)
# rolling_vol <- rollapply(es_data$ret, n_roll, sd,   fill = NA, align = "right")
# rolling_vol <- pmax(rolling_vol, 0.0001)
# 
# # 2. Drawdown from rolling peak
# rolling_max <- rollapply(es_data$close, n_roll, max, fill = NA, align = "right")
# drawdown    <- (es_data$close - rolling_max) / rolling_max
# 
# # 3. Downside semi-deviation (left-tail sensitivity)
# downside_dev <- function(x) {
#   neg_x <- x[x < 0]
#   if (length(neg_x) == 0L) return(0)
#   sqrt(mean(neg_x^2))
# }
# trans_stress <- rollapply(es_data$ret, n_roll, downside_dev, fill = NA, align = "right")
# 
# # 4. Residence pressure (consecutive hours in drawdown)
# is_dd  <- as.integer(drawdown < -0.005)
# is_dd[is.na(is_dd)] <- 0L
# res_pr <- ave(is_dd, cumsum(is_dd == 0L), FUN = cumsum)
# 
# # 5. Ruin proxy: P(return <= -2%) under rolling distribution
# rolling_mean <- rollapply(es_data$ret, n_roll, mean, fill = NA, align = "right")
# ruin_proxy   <- pnorm(-0.02, mean = rolling_mean, sd = rolling_vol)
# 
# # Assemble and attach HMM labels
# state_labels <- c("1" = "Stress", "2" = "Calm", "3" = "Steady")
# 
# cor_data <- data.frame(
#   timestamp          = es_data$timestamp,
#   log_return         = es_data$ret,
#   rolling_volatility = rolling_vol,
#   drawdown           = drawdown,
#   transition_stress  = trans_stress,
#   residence_pressure = pmax(res_pr, 0.0001),
#   ruin_proxy         = pmax(ruin_proxy, 0.0001),
#   regime             = factor(
#     state_labels[as.character(decoded$state)],
#     levels = c("Calm", "Steady", "Stress")
#   )
# )
# cor_data <- cor_data[complete.cases(cor_data), ]

## ----cor-split, eval = FALSE--------------------------------------------------
# features <- c("log_return", "rolling_volatility", "drawdown",
#               "transition_stress", "residence_pressure", "ruin_proxy")
# 
# set.seed(123L)
# tr_idx  <- sample(nrow(cor_data), floor(0.80 * nrow(cor_data)))
# x_train <- as.matrix(cor_data[ tr_idx, features])
# y_train <- cor_data$regime[ tr_idx]
# x_test  <- as.matrix(cor_data[-tr_idx, features])
# y_test  <- cor_data$regime[-tr_idx]
# 
# model <- student_t_naive_bayes(x_train, y_train)

## ----session-info-------------------------------------------------------------
sessionInfo()

