## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 4
)

## ----setup--------------------------------------------------------------------
library(rLifting)

if (!requireNamespace("ggplot2", quietly = TRUE)) {
  knitr::opts_chunk$set(eval = FALSE)
  message("'ggplot2' is required to render plots. Vignette code will not run.")
} else {
  library(ggplot2)
}

data("doppler_example", package = "rLifting")
data("benchmark_rlifting", package = "rLifting")
set.seed(20260522)

## ----causality-penalty--------------------------------------------------------
sub = subset(
  benchmark_rlifting,
  Wavelet == "cdf53" & Boundary == "symmetric" &
    ThresholdMethod == "universal" & !grepl("tuned", Method) &
    Shrinkage == "semisoft"
)

penalty = data.frame(
  Signal = unique(sub$Signal),
  offline_MSE = sapply(unique(sub$Signal),
  function(s)
    sub$MSE_median[sub$Signal == s & sub$Mode == "offline"]),

  causal_MSE_settled = sapply(unique(sub$Signal),
  function(s)
    sub$MSE_settled_median[sub$Signal == s & sub$Mode == "causal"]),

  stream_MSE_settled = sapply(unique(sub$Signal),
  function(s)
    sub$MSE_settled_median[sub$Signal == s & sub$Mode == "stream"])
)

penalty$causal_over_offline = round(
  penalty$causal_MSE_settled / penalty$offline_MSE, 2
)

penalty

## ----causality-plot, echo=FALSE, fig.cap="Figure 1: Causality penalty across the four Donoho–Johnstone signals. Offline (left) uses the full signal; causal and stream (right) only past samples. Penalty ranges from 1.3× on blocks to 3.3× on heavisine."----
df_pen = data.frame(
  Signal = rep(penalty$Signal, 3),
  Mode = factor(
    rep(
      c("Offline", "Causal", "Stream"), 
      each = nrow(penalty)
    ),
    levels = c("Offline", "Causal", "Stream")
  ),
  
  MSE = c(
    penalty$offline_MSE,
    penalty$causal_MSE_settled,
    penalty$stream_MSE_settled
  )
)

ggplot(df_pen, aes(x = Signal, y = MSE, fill = Mode)) +
  geom_col(position = "dodge") +
  scale_fill_manual(
    values = c(
      "Offline" = "#0072B2", "Causal" = "#D55E00", "Stream" = "#009E73"
    )
  ) +
  theme_minimal() +
  labs(
    title = "Causality penalty: MSE by mode and signal",
    subtitle = "cdf53, symmetric, universal/semisoft, default α/β",
    x = NULL, y = "MSE (settled for causal/stream)"
  )

## ----window-size-effect, eval=FALSE-------------------------------------------
# # Sketch of the trade-off (heuristic, not a chunk you should run blindly)
# window_size = 63 # short — fast, lower resolution, ~31 raw samples at start
# window_size = 127 # short-medium
# window_size = 255 # benchmark default
# window_size = 511 # long — better σ̂, more memory, longer warm-up

## ----latency-table------------------------------------------------------------
latency = aggregate(
  Per_sample_us_median ~ Mode + Wavelet, data = benchmark_rlifting,
  FUN = median
)

latency$Per_sample_us_median = round(latency$Per_sample_us_median, 2)

reshape(
  latency, idvar = "Wavelet", timevar = "Mode", direction = "wide"
)

## ----latency-plot, echo=FALSE, fig.cap="Figure 2: Median per-sample latency in microseconds across the three modes and five wavelets. Note the log scale — causal is ~80–115× slower per sample than offline, and stream adds another 1.5–2.1× over causal because of the R closure call overhead per sample."----
lat_long = aggregate(
  Per_sample_us_median ~ Mode + Wavelet, data = benchmark_rlifting,
  FUN = median
)

lat_long$Mode = factor(lat_long$Mode, levels = c("offline", "causal", "stream"))
ggplot(lat_long, aes(x = Wavelet, y = Per_sample_us_median, fill = Mode)) +
  geom_col(position = "dodge") +
  scale_y_log10() +
  scale_fill_manual(
    values = c(
      "offline" = "#0072B2", "causal" = "#D55E00", "stream" = "#009E73"
    )
  ) +
  theme_minimal() +
  labs(
    title = "Per-sample latency by mode and wavelet",
    subtitle = "Log scale on y; median across all benchmark configurations",
    x = NULL, y = "Microseconds per sample"
  )

## ----causal-best-wavelet------------------------------------------------------
best_causal = aggregate(
  MSE_settled_median ~ Signal + Wavelet,
  data = subset(benchmark_rlifting, Mode == "causal"),
  FUN = min
)

do.call(
  rbind,
  lapply(
    unique(best_causal$Signal),
    function(s) {
      ss = best_causal[best_causal$Signal == s, ]
      ss = ss[order(ss$MSE_settled_median), ]
      data.frame(
        Signal = s,
        ranking = paste(ss$Wavelet, collapse = " > ")
      )
    }
  )
)

## ----leakage-check------------------------------------------------------------
x = doppler_example$noisy
scheme = lifting_scheme("haar")
ws = 255

base_out = denoise_signal_causal(
  x, scheme,
  window_size = ws, levels = 4,
  shrinkage = "semisoft"
)

# Counterfactual: corrupt the second half of the signal severely
x_perturbed = x
half = floor(length(x) / 2)
x_perturbed[(half + 1):length(x)] = x_perturbed[(half + 1):length(x)] +
  rnorm(length(x) - half, sd = 5)

perturbed_out = denoise_signal_causal(
  x_perturbed, scheme,
  window_size = ws, levels = 4,
  shrinkage = "semisoft"
)

# Outputs at positions ≤ half must be unchanged
max_diff_before = max(abs(base_out[1:half] - perturbed_out[1:half]))
max_diff_after  = max(
  abs(
    base_out[(half + 1):length(x)] -
      perturbed_out[(half + 1):length(x)]
  )
)

data.frame(
  region = c(
    "samples 1..half (before perturbation)",
    "samples (half+1)..n (after perturbation)"
  ),
  max_abs_diff = c(max_diff_before, max_diff_after)
)