## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse  = TRUE,
  comment   = "#>",
  fig.width = 7,
  fig.height = 4.5
)

## ----feature-engineering, eval = FALSE----------------------------------------
# library(zoo)
# 
# es_data <- read.csv("data.csv",          stringsAsFactors = FALSE)
# decoded <- read.csv("decoded_states.csv", stringsAsFactors = FALSE)
# 
# es_data <- es_data[!is.na(es_data$ret), ]          # drop leading NA
# stopifnot(nrow(es_data) == nrow(decoded))
# 
# n_roll <- 24L                                       # 24-hour window
# 
# cor_data <- data.frame(
#   timestamp  = es_data$timestamp,
#   log_return = es_data$ret
# )

## ----feat-vol, eval = FALSE---------------------------------------------------
# cor_data$rolling_volatility <- rollapply(
#   es_data$ret, width = n_roll, FUN = sd, fill = NA, align = "right"
# )
# cor_data$rolling_volatility <- pmax(cor_data$rolling_volatility, 0.0001)

## ----feat-dd, eval = FALSE----------------------------------------------------
# rolling_max          <- rollapply(es_data$close, width = n_roll,
#                                   FUN = max, fill = NA, align = "right")
# cor_data$drawdown    <- (es_data$close - rolling_max) / rolling_max

## ----feat-semidev, eval = FALSE-----------------------------------------------
# downside_dev <- function(x) {
#   neg_x <- x[x < 0]
#   if (length(neg_x) == 0L) return(0)
#   sqrt(mean(neg_x^2))
# }
# cor_data$transition_stress <- rollapply(
#   es_data$ret, width = n_roll, FUN = downside_dev, fill = NA, align = "right"
# )
# cor_data$transition_stress[is.na(cor_data$transition_stress)] <- 0.0001

## ----feat-res, eval = FALSE---------------------------------------------------
# is_dd <- ifelse(cor_data$drawdown < -0.005, 1L, 0L)
# is_dd[is.na(is_dd)] <- 0L
# cor_data$residence_pressure <- ave(
#   is_dd, cumsum(is_dd == 0L), FUN = cumsum
# )
# cor_data$residence_pressure <- pmax(cor_data$residence_pressure, 0.0001)

## ----feat-ruin, eval = FALSE--------------------------------------------------
# rolling_mean        <- rollapply(es_data$ret, width = n_roll,
#                                  FUN = mean, fill = NA, align = "right")
# cor_data$ruin_proxy <- pnorm(-0.02,
#                              mean = rolling_mean,
#                              sd   = cor_data$rolling_volatility)
# cor_data$ruin_proxy <- pmax(cor_data$ruin_proxy, 0.0001)

## ----feat-labels, eval = FALSE------------------------------------------------
# # KRONX empirical label mapping (derived from HMM state ordering)
# state_labels    <- c("1" = "Stress", "2" = "Calm", "3" = "Steady")
# cor_data$regime <- factor(
#   state_labels[as.character(decoded$state)],
#   levels = c("Calm", "Steady", "Stress")
# )
# 
# cor_data <- cor_data[complete.cases(cor_data), ]   # drop rolling-window NAs
# write.csv(cor_data, file = "nbc_analysis_report.txt", row.names = FALSE)

## ----train-test-split, eval = FALSE-------------------------------------------
# cor_data <- read.csv("nbc_analysis_report.txt", stringsAsFactors = FALSE)
# cor_data$regime <- factor(cor_data$regime, levels = c("Calm", "Steady", "Stress"))
# cor_data <- cor_data[!is.na(cor_data$regime), ]
# 
# features <- c("log_return", "rolling_volatility", "drawdown",
#               "transition_stress", "residence_pressure", "ruin_proxy")
# 
# set.seed(123)
# train_idx <- sample(seq_len(nrow(cor_data)), size = floor(0.80 * nrow(cor_data)))
# train     <- cor_data[ train_idx, ]
# test      <- cor_data[-train_idx, ]
# 
# x_train <- as.matrix(train[, features]);  y_train <- train$regime
# x_test  <- as.matrix(test[,  features]);  y_test  <- test$regime

## ----fit-model, eval = FALSE--------------------------------------------------
# library(kronxNBC)
# 
# model <- student_t_naive_bayes(x = x_train, y = y_train)
# print(model)

## ----fit-synthetic------------------------------------------------------------
library(kronxNBC)

set.seed(42L)
n  <- 300L
mk <- n / 3L

# Mimic the distributional shape of each regime
X_syn <- rbind(
  data.frame(                                          # Calm
    log_return         = rnorm(mk, 0.0002, 0.003),
    rolling_volatility = rnorm(mk, 0.004,  0.001),
    drawdown           = rnorm(mk, -0.002, 0.002),
    transition_stress  = abs(rnorm(mk, 0.001, 0.0005)),
    residence_pressure = rpois(mk, 1),
    ruin_proxy         = rbeta(mk, 1, 20)
  ),
  data.frame(                                          # Steady
    log_return         = rnorm(mk, 0.0005, 0.005),
    rolling_volatility = rnorm(mk, 0.008,  0.002),
    drawdown           = rnorm(mk, -0.008, 0.004),
    transition_stress  = abs(rnorm(mk, 0.003, 0.001)),
    residence_pressure = rpois(mk, 3),
    ruin_proxy         = rbeta(mk, 2, 10)
  ),
  data.frame(                                          # Stress: fat-tailed
    log_return         = rt(mk, df = 3) * 0.012,
    rolling_volatility = rnorm(mk, 0.022,  0.005),
    drawdown           = rnorm(mk, -0.030, 0.010),
    transition_stress  = abs(rnorm(mk, 0.015, 0.005)),
    residence_pressure = rpois(mk, 12),
    ruin_proxy         = rbeta(mk, 5, 3)
  )
)
X_syn <- as.matrix(X_syn)

y_syn <- factor(
  rep(c("Calm", "Steady", "Stress"), each = mk),
  levels = c("Calm", "Steady", "Stress")
)

set.seed(7L)
tr_idx  <- sample(n, size = floor(0.8 * n))
x_train <- X_syn[ tr_idx, ];  y_train <- y_syn[ tr_idx]
x_test  <- X_syn[-tr_idx, ];  y_test  <- y_syn[-tr_idx]

model <- student_t_naive_bayes(x_train, y_train)
summary(model)

## ----tables-------------------------------------------------------------------
tabs <- tables(model)
print(tabs)

## ----coef---------------------------------------------------------------------
coef(model)

## ----plot-model, fig.cap = "Per-feature Student-t densities by regime.  Heavier tails in the Stress curves are visible where nu is low."----
plot(model, prob = "conditional")

## ----predict------------------------------------------------------------------
pred_class <- predict(model, newdata = x_test, type = "class")
pred_prob  <- predict(model, newdata = x_test, type = "prob")

accuracy <- mean(pred_class == y_test)
cat("Out-of-sample accuracy:", round(accuracy, 4), "\n")

## ----confusion----------------------------------------------------------------
table(Actual = y_test, Predicted = pred_class)

## ----alert--------------------------------------------------------------------
stress_prob <- pred_prob[, "Stress"]
alert_flag  <- ifelse(stress_prob > 0.60, "COR Stress Alert", "No Alert")

cat("\nCOR Stress Alert Summary (test period):\n")
print(table(alert_flag))

cat("\nPosterior Stress probability — first 10 test observations:\n")
print(round(head(stress_prob, 10L), 4))

## ----nu-table-----------------------------------------------------------------
nu_df <- as.data.frame(t(model$params$nu))
colnames(nu_df) <- paste0("nu.", c("Calm", "Steady", "Stress"))
nu_df

## ----nu-commentary------------------------------------------------------------
nu_stress_ret <- model$params$nu["Stress", "log_return"]
nu_calm_ret   <- model$params$nu["Calm",   "log_return"]

cat(sprintf(
  "log_return: nu(Stress) = %.0f  |  nu(Calm) = %.0f\n",
  nu_stress_ret, nu_calm_ret
))

if (nu_stress_ret < nu_calm_ret) {
  cat("=> Stress regime shows heavier tails on log_return, as hypothesised.\n")
} else {
  cat("=> Note: with this synthetic data nu ordering may differ from empirical results.\n")
}

## ----session-info-------------------------------------------------------------
sessionInfo()