## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 4
)

## ----setup--------------------------------------------------------------------
library(rLifting)

if (!requireNamespace("ggplot2", quietly = TRUE)) {
  knitr::opts_chunk$set(eval = FALSE)
  message("'ggplot2' is required to render plots. Vignette code will not run.")
} else {
  library(ggplot2)
}

set.seed(20260521)

## ----motivate-----------------------------------------------------------------
n = 1024
t = seq(0, 1, length.out = n)
pure = sqrt(t * (1 - t)) * sin((2 * pi * 1.05) / (t + 0.05))
noisy = pure + rnorm(n, sd = 0.15)

scheme = lifting_scheme("cdf53")

# Three configurations
default_clean = denoise_signal_offline(
  noisy, scheme, levels = 5,
  shrinkage = "semisoft", alpha = 0.3, beta = 1.2
)

oversmooth_clean = denoise_signal_offline(
  noisy, scheme, levels = 5,
  shrinkage = "semisoft", alpha = 5, beta = 2.5
)

undersmooth_clean = denoise_signal_offline(
  noisy, scheme, levels = 5,
  shrinkage = "hard", alpha = 0.1, beta = 0.5
)

mse = function(est) mean((pure - est)^2)
data.frame(
  config = c(
    "default (semisoft, α=0.3, β=1.2)",
    "over (semisoft, α=5, β=2.5)",
    "under (hard, α=0.1, β=0.5)"
  ),
  MSE = c(
    mse(default_clean), 
    mse(oversmooth_clean), 
    mse(undersmooth_clean)
  )
)

## ----motivate-benchmark-------------------------------------------------------
data("benchmark_rlifting", package = "rLifting")
sub = subset(
  benchmark_rlifting,
  Mode == "offline" & Wavelet == "cdf53" & Boundary == "symmetric"
)

aggregate(
  MSE_median ~ Signal, data = sub,
  FUN = function(x) c(
    min = min(x), median = median(x),
    max = max(x), ratio = round(max(x)/min(x), 2)
  )
)

## ----motivate-plot, echo=FALSE, fig.cap="Figure 1: The same noisy Doppler signal denoised with three different parameter combinations. The default semisoft configuration tracks the underlying signal closely; the over-smoothed run loses fine detail; the under-smoothed hard configuration retains visible residual noise."----
df = data.frame(
  index = rep(seq_len(n), 4),
  value = c(pure, default_clean, oversmooth_clean, undersmooth_clean),
  config = factor(
    rep(
      c("Original", "Default", "Over-smoothed", "Under-smoothed"), 
      each = n
    ),
    levels = c(
      "Original", "Default", 
      "Over-smoothed", "Under-smoothed"
    )
  )
)
ggplot(df, aes(x = index, y = value, colour = config)) +
  geom_line(linewidth = 0.5) +
  facet_wrap(~ config, ncol = 2) +
  scale_colour_manual(
    values = c(
      "Original" = "black",
      "Default" = "#0072B2",
      "Over-smoothed" = "#D55E00",
      "Under-smoothed" = "#009E73"
    )
  ) +
  theme_minimal() +
  theme(legend.position = "none") +
  labs(
    title = "Same signal, three parameter choices",
    x = "Sample index", y = "Amplitude"
  )

## ----mad-estimate-------------------------------------------------------------
lw = lwt(noisy, scheme, levels = 5)
d1 = lw$coeffs$d1

sigma_mad = mad(d1, constant = 1.4826) # 1/0.6745 ≈ 1.4826
sigma_sd = sd(d1) # for comparison

cat(sprintf("True noise sd  : 0.15\n"))
cat(sprintf("MAD-based  σ̂  : %.4f  (used by rLifting)\n", sigma_mad))
cat(
  sprintf(
    "Plain sd of d1 : %.4f  (sensitive to large signal coeffs)\n", sigma_sd
  )
)

## ----mad-robust---------------------------------------------------------------
# Inject five spurious large coefficients (simulating a sharp event)
d1_contam = d1
d1_contam[sample.int(length(d1), 5)] = c(2, -2, 1.8, -1.7, 1.6)
cat(sprintf("With 5 large coeffs added:\n"))

cat(
  sprintf(
    "  MAD-based σ̂ : %.4f  (essentially unchanged)\n",
    mad(d1_contam, constant = 1.4826)
  )
)

cat(
  sprintf(
    "  Plain sd     : %.4f  (inflated by outliers)\n",
    sd(d1_contam)
  )
)

## ----mad-hist, echo=FALSE, fig.cap="Figure 2: Distribution of the finest-level detail coefficients $d_1$ for the noisy Doppler signal. Dashed lines mark $\\pm\\hat{\\sigma}$, the MAD-based noise estimate used as input to the universal threshold rule."----
ggplot(data.frame(d1 = d1), aes(x = d1)) +
  geom_histogram(bins = 60, fill = "grey60", colour = "white") +
  geom_vline(
    xintercept = c(-sigma_mad, sigma_mad),
    linetype = "dashed", colour = "#D55E00", 
    linewidth = 0.6
  ) +
  annotate(
    "text", x = sigma_mad, y = Inf, label = "±σ̂",
    colour = "#D55E00", vjust = 1.5, hjust = -0.2
  ) +
  theme_minimal() +
  labs(
    title = "Distribution of d_1 coefficients with MAD-based σ̂",
    x = "d_1", y = "Count"
  )

## ----univ-vs-sure-------------------------------------------------------------
make_noisy = function(type, n, sd) {
  pure = rLifting:::.generate_signal(type, n = n)
  list(pure = pure, noisy = pure + rnorm(n, sd = sd))
}

n_3 = 1024
signals = list(
  doppler   = make_noisy("doppler",   n_3, sd = 0.15),
  heavisine = make_noisy("heavisine", n_3, sd = 0.35),
  blocks    = make_noisy("blocks",    n_3, sd = 0.50)
)

compare_rules = function(sig) {
  un = denoise_signal_offline(
    sig$noisy, scheme, levels = 5,
    threshold_method = "universal", shrinkage = "semisoft"
  )
  su = denoise_signal_offline(
    sig$noisy, scheme, levels = 5,
    threshold_method = "sure", shrinkage = "semisoft"
  )
  c(
    universal = mean((sig$pure - un)^2),
    sure = mean((sig$pure - su)^2)
  )
}

mse_table = t(sapply(signals, compare_rules))
round(mse_table, 4)

## ----univ-vs-sure-benchmark---------------------------------------------------
b = subset(
  benchmark_rlifting,
  Mode == "offline" & Wavelet == "cdf53" & Boundary == "symmetric" &
    ThresholdMethod %in% c("universal", "sure") & !grepl("tuned", Method)
)

b$rule = b$ThresholdMethod
mat = aggregate(MSE_median ~ Signal + Shrinkage + rule, data = b, FUN = mean)
rs = reshape(
  mat, idvar = c("Signal", "Shrinkage"),
  timevar = "rule", direction = "wide"
)

rs$winner = ifelse(
  rs$MSE_median.universal < rs$MSE_median.sure,
  "universal", "sure"
)

rs[
  order(rs$Signal, rs$Shrinkage),
  c("Signal", "Shrinkage", "MSE_median.universal", "MSE_median.sure", "winner")
]

## ----univ-vs-sure-lambdas-----------------------------------------------------
# Per-level lambda comparison on the Doppler signal
lw_dop = lwt(signals$doppler$noisy, scheme, levels = 5)
sigma_global = mad(lw_dop$coeffs$d1, constant = 1.4826)

# Universal: recursive decay with default alpha=0.3, beta=1.2
lam_univ = compute_adaptive_threshold(lw_dop, alpha = 0.3, beta = 1.2)
lam_univ_vec = unlist(lam_univ)

# SURE: per-level. Replicate by computing optimal lambda per level.
sure_lambda_level = function(d) {
  sigma_j = mad(d, constant = 1.4826)
  if (sigma_j < 1e-15) return(0)
  abs_d = sort(abs(d))
  m = length(d)
  cap = sigma_j * sqrt(2 * log(m))
  best = m * sigma_j^2
  bl = 0
  cs = c(0, cumsum(abs_d^2))
  for (k in seq_len(m)) {
    lam = abs_d[k]
    s = m * sigma_j^2 + cs[k + 1] + lam^2 * (m - k) - 2 * sigma_j^2 * k
    if (s < best) { best = s; bl = lam }
  }
  min(bl, cap)
}
lam_sure_vec = sapply(lw_dop$coeffs[paste0("d", 1:5)], sure_lambda_level)

data.frame(
  level = 1:5,
  universal = round(lam_univ_vec, 4),
  sure = round(unname(lam_sure_vec), 4)
)

## ----sure-inert---------------------------------------------------------------
a1 = denoise_signal_offline(
  signals$doppler$noisy, scheme, levels = 5,
  threshold_method = "sure", shrinkage = "semisoft",
  alpha = 0.3, beta = 1.2
)

a2 = denoise_signal_offline(
  signals$doppler$noisy, scheme, levels = 5,
  threshold_method = "sure", shrinkage = "semisoft",
  alpha = 999, beta = 0.01
)

identical(a1, a2)

## ----alpha-sweep--------------------------------------------------------------
n_alpha = 1024
sigma_demo = 0.15
levels_demo = 6
lam1 = 1.2 * sigma_demo * sqrt(2 * log(n_alpha))

recurse = function(alpha, k_max, lam1) {
  lam = numeric(k_max); lam[1] = lam1
  for (k in 2:k_max) lam[k] = lam[k-1] * (k-1) / (k + alpha - 1)
  lam
}

alphas = c(0, 0.1, 0.3, 1, 5, 50)
df_alpha = do.call(
  rbind, 
  lapply(
    alphas, 
    function(a) {
      data.frame(
        level = 1:levels_demo,
        lambda = recurse(a, levels_demo, lam1),
        alpha = factor(a)
      )
    }
  )
)

df_alpha

## ----alpha-plot, echo=FALSE, fig.cap="Figure 3: Effect of the parameter $\\alpha$ on the per-level threshold $\\lambda_k$, holding $\\lambda_1$ fixed and varying $\\alpha$ across six representative values. The y-axis is on a log scale. At $\\alpha = 0$ the threshold is constant across all levels; as $\\alpha$ grows the decay becomes increasingly aggressive."----
ggplot(df_alpha, aes(x = level, y = lambda, colour = alpha, group = alpha)) +
  geom_line(linewidth = 0.7) +
  geom_point(size = 2) +
  scale_y_log10() +
  theme_minimal() +
  labs(title = "α controls how aggressively λ decays across levels",
       subtitle = "Same λ_1; varying α only. Log scale on y.",
       x = "Decomposition level k", y = expression(lambda[k])) +
  scale_colour_brewer(palette = "RdBu", direction = -1)

## ----tune-run-----------------------------------------------------------------
opt = tune_alpha_beta(signals$heavisine$noisy, scheme, levels = 5)
opt

## ----tune-mse-----------------------------------------------------------------
def_clean = denoise_signal_offline(
  signals$heavisine$noisy, scheme, levels = 5,
  shrinkage = "semisoft", alpha = 0.3, beta = 1.2
)

tun_clean = denoise_signal_offline(
  signals$heavisine$noisy, scheme, levels = 5,
  shrinkage = "semisoft", alpha = opt$alpha, beta = opt$beta
)

c(
  default_MSE = mean((signals$heavisine$pure - def_clean)^2),
  tuned_MSE = mean((signals$heavisine$pure - tun_clean)^2)
)

## ----sure-landscape, fig.height=5, fig.cap="Figure 4: SURE landscape on the (α, β) grid for the noisy Doppler signal. Darker regions correspond to lower SURE. The white cross marks the optimum returned by tune_alpha_beta(). The landscape is broad and slightly multimodal, which is why phase 1 of the tuner is a grid search before Nelder-Mead refinement."----
lw_tune = lwt(signals$doppler$noisy, scheme, levels = 5)
details_tune = lw_tune$coeffs[paste0("d", 1:5)]
sigma_tune = mad(details_tune[[1]], constant = 1.4826)
n_finest = length(details_tune[[1]])

sure_at = function(a, b) {
  lam = numeric(5); lam[1] = b * sigma_tune * sqrt(2 * log(n_finest))
  for (k in 2:5) lam[k] = lam[k-1] * (k-1) / (k + a - 1)
  total = 0
  for (j in seq_along(details_tune)) {
    d = details_tune[[j]]; m = length(d)
    sj = mad(d, constant = 1.4826); if (sj < 1e-15) sj = sigma_tune
    total = total + m * sj^2 +
      sum(pmin(d^2, lam[j]^2)) -
      2 * sj^2 * sum(abs(d) <= lam[j])
  }
  total
}

a_grid = seq(0, 5, length.out = 26)
b_grid = seq(0.5, 3.0, length.out = 26)
grid_df = expand.grid(alpha = a_grid, beta = b_grid)
grid_df$sure = mapply(sure_at, grid_df$alpha, grid_df$beta)

ggplot(
  grid_df, aes(x = alpha, y = beta, fill = sure)
) +
  geom_raster(interpolate = TRUE) +
  geom_point(
    data = data.frame(alpha = opt$alpha, beta = opt$beta),
    aes(x = alpha, y = beta), inherit.aes = FALSE,
    colour = "white", size = 3, shape = 4, stroke = 1.2
  ) +
  scale_fill_viridis_c(option = "magma") +
  theme_minimal() +
  labs(
    title = "SURE landscape (white × marks the optimum)",
    x = expression(alpha), y = expression(beta), fill = "SURE"
  )

## ----shrinkage-curves, fig.height=3.5, echo=FALSE, fig.cap="Figure 5: The four shrinkage functions with $\\lambda = 1$. Soft introduces a constant bias of $\\lambda$ on large coefficients; hard is bias-free above $\\lambda$ but discontinuous; semisoft transitions continuously and asymptotes to the identity; SCAD is bias-free above $a\\lambda$ (default $a = 3.7$)."----
x_th = seq(-3, 3, length.out = 301)
lam = 1.0
df_th = data.frame(
  x = rep(x_th, 4),
  y = c(
    threshold(x_th, lam, "hard"),
    threshold(x_th, lam, "soft"),
    threshold(x_th, lam, "semisoft"),
    threshold(x_th, lam, "scad", a = 3.7)
  ),
  Method = factor(
    rep(
      c("Hard", "Soft", "Semisoft", "SCAD"),
      each = length(x_th)
    ),
    levels = c("Hard", "Soft", "Semisoft", "SCAD")
  )
)
ggplot(df_th, aes(x = x, y = y, colour = Method)) +
  geom_line(linewidth = 0.8) +
  geom_abline(
    slope = 1, intercept = 0,
    linetype = "dotted", colour = "grey60"
  ) +
  geom_vline(
    xintercept = c(-lam, lam),
    linetype = "dashed", colour = "grey40"
  ) +
  theme_minimal() +
  labs(
    title = "Four shrinkage functions (λ = 1)",
    x = "Input coefficient", y = "Output coefficient"
  ) +
  scale_colour_manual(
    values = c(
      "Hard" = "#E69F00", "Soft" = "#56B4E9",
      "Semisoft" = "#009E73", "SCAD" = "#CC79A7"
    )
  )

## ----shrinkage-mse------------------------------------------------------------
shrink_mse = function(sig, levels = 5) {
  m = sapply(c("hard", "soft", "semisoft", "scad"), function(s) {
    out = denoise_signal_offline(
      sig$noisy, scheme, levels = levels,
      shrinkage = s, threshold_method = "universal",
      alpha = 0.3, beta = 1.2
    )
    mean((sig$pure - out)^2)
  })
  round(m, 4)
}

shrink_table = t(sapply(signals, shrink_mse))
shrink_table

## ----shrinkage-by-signal, echo=FALSE, fig.height=3.5, fig.cap="Figure 6: MSE by shrinkage rule across three test signals (Doppler, HeaviSine, Blocks), holding the universal threshold rule and the default $\\alpha$, $\\beta$ fixed. Y-axes use independent scales."----
df_sh = as.data.frame(shrink_table)
df_sh$signal = rownames(df_sh)
df_long = reshape(
  df_sh, varying = c("hard", "soft", "semisoft", "scad"),
  v.names = "MSE", times = c("hard", "soft", "semisoft", "scad"),
  timevar = "shrinkage", direction = "long"
)

df_long$shrinkage = factor(
  df_long$shrinkage,
  levels = c("hard", "soft", "semisoft", "scad")
)

ggplot(df_long, aes(x = shrinkage, y = MSE, fill = shrinkage)) +
  geom_col() +
  facet_wrap(~ signal, scales = "free_y", ncol = 3) +
  scale_fill_manual(
    values = c(
      "hard" = "#E69F00", "soft" = "#56B4E9",
      "semisoft" = "#009E73", "scad" = "#CC79A7"
    )
  ) +
  theme_minimal() +
  theme(legend.position = "none") +
  labs(title = "Shrinkage MSE by signal",
       x = "Shrinkage", y = "MSE")

## ----shrinkage-benchmark------------------------------------------------------
bs = subset(
  benchmark_rlifting,
  Mode == "offline" & Wavelet == "cdf53" & Boundary == "symmetric" &
      ThresholdMethod == "universal" & !grepl("tuned", Method)
)
bs_agg = aggregate(MSE_median ~ Signal + Shrinkage, data = bs, FUN = mean)
bs_wide = reshape(
  bs_agg, idvar = "Signal", timevar = "Shrinkage", direction = "wide"
)

names(bs_wide) = sub("MSE_median\\.", "", names(bs_wide))
bs_wide$winner = c("hard", "soft", "semisoft", "scad")[
  apply(bs_wide[, c("hard", "soft", "semisoft", "scad")], 1, which.min)
]

bs_wide[, c("Signal", "hard", "soft", "semisoft", "scad", "winner")]