## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 4
)

## ----setup--------------------------------------------------------------------
library(rLifting)
if (!requireNamespace("ggplot2", quietly = TRUE)) {
  knitr::opts_chunk$set(eval = FALSE)
  message("'ggplot2' is required to render plots. Vignette code will not run.")
} else {
  library(ggplot2)
}

data("benchmark_rlifting", package = "rLifting")
data("benchmark_wavethresh", package = "rLifting")
data("benchmark_adlift", package = "rLifting")
data("benchmark_nlt", package = "rLifting")
data("benchmark_rlifting_irregular", package = "rLifting")
data("benchmark_adlift_irregular", package = "rLifting")
data("benchmark_nlt_irregular", package = "rLifting")

## ----reg-mse------------------------------------------------------------------
reg_best = function(df, mse_col, time_col, pkg_label) {
  do.call(
    rbind, 
    by(
      df, df$Signal, function(s) {
        i = which.min(s[[mse_col]])
        data.frame(
          Signal = s$Signal[i],
          Package = pkg_label,
          MSE = s[[mse_col]][i],
          us = s[[time_col]][i] * 1e6 / s$N[i]
        )
      }
    )
  )
}

off_reg = benchmark_rlifting[benchmark_rlifting$Mode == "offline", ]
reg = rbind(
  reg_best(off_reg, "MSE_median", "Time_total_median", "rLifting (offline)"),
  reg_best(benchmark_wavethresh, "MSE_median", "Time_median", "wavethresh"),
  reg_best(benchmark_adlift, "MSE_median", "Time_median", "adlift"),
  reg_best(benchmark_nlt, "MSE_median", "Time_median", "nlt")
)

reg_wide = reshape(
  reg[, c("Signal", "Package", "MSE")],
  idvar = "Signal", timevar = "Package", direction = "wide"
)
names(reg_wide) = sub("MSE\\.", "", names(reg_wide))

knitr::kable(
  reg_wide, row.names = FALSE, digits = 4,
  caption = "Regular-grid MSE at each package's best configuration."
)

## ----reg-mse-fig, fig.cap = "Regular-grid MSE at each package's best configuration, by signal."----
ggplot(reg, aes(x = Package, y = MSE, fill = Package)) +
  geom_col() +
  facet_wrap(~ Signal, scales = "free_y") +
  scale_fill_brewer(palette = "Set2") +
  labs(x = NULL, y = "Median MSE") +
  theme_minimal(base_size = 10) +
  theme(
    legend.position = "none",
    axis.text.x = element_text(angle = 30, hjust = 1)
  )

## ----reg-mse-geom-------------------------------------------------------------
geom_mean = function(x) exp(mean(log(x)))
geom_tbl = aggregate(MSE ~ Package, data = reg, FUN = geom_mean)
geom_tbl$MSE = round(geom_tbl$MSE, 4)
geom_tbl = geom_tbl[order(geom_tbl$MSE), ]
knitr::kable(
  geom_tbl, row.names = FALSE,
  caption = "Geometric mean MSE across the four DJ signals."
)

## ----reg-default--------------------------------------------------------------
rl_def = benchmark_rlifting[
  benchmark_rlifting$Mode == "offline" &
    benchmark_rlifting$Wavelet == "haar" &
    benchmark_rlifting$Boundary == "symmetric" &
    benchmark_rlifting$Method == "universal_semisoft",
  c("Signal", "MSE_median")
]

wt_def = benchmark_wavethresh[
  benchmark_wavethresh$Wavelet == "db1" &
    benchmark_wavethresh$Boundary == "BayesThresh_soft",
  c("Signal", "MSE_median")
]

ad_def = benchmark_adlift[
  benchmark_adlift$Wavelet == "LinearPred" &
    benchmark_adlift$Boundary == "linear_n1_int_noclo_mean",
  c("Signal", "MSE_median")
]

nlt_def = benchmark_nlt[
  benchmark_nlt$Wavelet == "LinearPred" &
    benchmark_nlt$Boundary == "linear_n1_int_noclo_mean",
  c("Signal", "MSE_median")
]

signals_ord = c("blocks", "bumps", "doppler", "heavisine")
def_wide = data.frame(
  Signal = signals_ord,
  rLifting = rl_def$MSE_median[match(signals_ord, rl_def$Signal)],
  wavethresh = wt_def$MSE_median[match(signals_ord, wt_def$Signal)],
  adlift = ad_def$MSE_median[match(signals_ord, ad_def$Signal)],
  nlt = nlt_def$MSE_median[match(signals_ord, nlt_def$Signal)]
)
knitr::kable(
  def_wide, row.names = FALSE, digits = 4,
  caption = "Regular-grid MSE at each package's default (out-of-the-box) configuration."
)

## ----reg-default-geom---------------------------------------------------------
def_geom = data.frame(
  Package = c("rLifting", "wavethresh", "adlift", "nlt"),
  Default = round(c(
    geom_mean(def_wide$rLifting),
    geom_mean(def_wide$wavethresh),
    geom_mean(def_wide$adlift),
    geom_mean(def_wide$nlt)
  ), 4)
)
knitr::kable(
  def_geom, row.names = FALSE,
  caption = "Geometric mean MSE at default configuration (regular grid)."
)

## ----reg-tuning-cost----------------------------------------------------------
best_geom = c(
  rLifting = geom_mean(reg$MSE[reg$Package == "rLifting (offline)"]),
  wavethresh = geom_mean(reg$MSE[reg$Package == "wavethresh"]),
  adlift = geom_mean(reg$MSE[reg$Package == "adlift"]),
  nlt = geom_mean(reg$MSE[reg$Package == "nlt"])
)

def_geom_v = c(
  rLifting = geom_mean(def_wide$rLifting),
  wavethresh = geom_mean(def_wide$wavethresh),
  adlift = geom_mean(def_wide$adlift),
  nlt = geom_mean(def_wide$nlt)
)

tuning_tbl = data.frame(
  Package = c("rLifting", "wavethresh", "adlift", "nlt"),
  Default = round(def_geom_v, 4),
  Best = round(best_geom,  4),
  Multiplier = round(def_geom_v / best_geom, 2)
)

knitr::kable(
  tuning_tbl, row.names = FALSE,
  caption = "Tuning cost: ratio of default to best geomean MSE. Larger = more benefit from tuning."
)

## ----reg-speed----------------------------------------------------------------
speed_tbl = aggregate(us ~ Package, data = reg, FUN = median)
speed_tbl$us = round(speed_tbl$us, 2)
speed_tbl = speed_tbl[order(speed_tbl$us), ]
names(speed_tbl) = c("Package", "us_per_sample_median")

knitr::kable(
  speed_tbl, row.names = FALSE,
  caption = "Per-sample time at best configuration, median across the four DJ signals."
)

## ----reg-speed-fig, fig.cap = "Per-sample time at each package's best configuration, log scale.", fig.height = 3.5----
ggplot(reg, aes(x = Package, y = us, fill = Package)) +
  geom_boxplot() +
  scale_y_log10() +
  scale_fill_brewer(palette = "Set2") +
  labs(x = NULL, y = expression("Per-sample time ("*mu*"s, log scale)")) +
  theme_minimal(base_size = 10) +
  theme(
    legend.position = "none",
    axis.text.x = element_text(angle = 30, hjust = 1)
  )

## ----modes-mse-reg------------------------------------------------------------
mode_best = function(mode_name) {
  sub = benchmark_rlifting[benchmark_rlifting$Mode == mode_name, ]
  do.call(
    rbind,
    by(
      sub, sub$Signal, 
      function(s) {
        i = which.min(s$MSE_median)
        data.frame(
          Signal = s$Signal[i], Mode = mode_name,
          MSE = s$MSE_median[i], us = s$Per_sample_us_median[i]
        )
      }
    )
  )
}

modes_reg = rbind(
  mode_best("offline"),
  mode_best("causal"),
  mode_best("stream")
)

modes_mse = reshape(
  modes_reg[, c("Signal","Mode","MSE")],
  idvar = "Signal", timevar = "Mode", direction = "wide"
)
names(modes_mse) = sub("MSE\\.", "", names(modes_mse))

modes_mse$causal_penalty = round(modes_mse$causal / modes_mse$offline, 2)
modes_mse$stream_penalty = round(modes_mse$stream / modes_mse$offline, 2)

modes_mse[, c("offline","causal","stream")] = round(
  modes_mse[, c("offline","causal","stream")],
  4
)

knitr::kable(
  modes_mse, row.names = FALSE,
  caption = "MSE per mode, regular grid. Penalty = causal/offline or stream/offline."
)

## ----modes-speed-reg----------------------------------------------------------
modes_speed = reshape(
  modes_reg[, c("Signal","Mode","us")],
  idvar = "Signal", timevar = "Mode", 
  direction = "wide"
)
names(modes_speed) = sub("us\\.", "", names(modes_speed))

modes_speed[, c("offline", "causal", "stream")] = round(
  modes_speed[, c("offline", "causal", "stream")], 2
)

knitr::kable(
  modes_speed, row.names = FALSE,
  caption = "Per-sample time per mode, regular grid (us)."
)

## ----wavelet-ranking-per-mode-------------------------------------------------
wav_rank = do.call(
  rbind, lapply(
    c("offline", "causal", "stream"),
    function(mode) {
      mse_col = if (mode == "offline") "MSE_median" else "MSE_settled_median"
      sub = benchmark_rlifting[benchmark_rlifting$Mode == mode, ]
      agg = aggregate(sub[[mse_col]] ~ sub$Signal + sub$Wavelet, FUN = min)
      names(agg) = c("Signal", "Wavelet", "MSE")
      do.call(
        rbind, lapply(
          unique(agg$Signal), 
          function(s) {
            ss = agg[agg$Signal == s, ]
            ss = ss[order(ss$MSE), ]
            data.frame(
              Mode = mode,
              Signal = s,
              rank1 = ss$Wavelet[1],
              rank2 = ss$Wavelet[2],
              rank3 = ss$Wavelet[3],
              rank4 = ss$Wavelet[4]
            )
          }
        )
      )
    }
  )
)

knitr::kable(
  wav_rank, row.names = FALSE,
  caption = paste0(
    "Wavelet ranking per mode and signal (ranks 1-4 of 5 wavelets; ",
    "dd4 is consistently 5th and omitted)."
  )
)

## ----irr-mse------------------------------------------------------------------
irr_best = function(df, mse_col, time_col, pkg_label) {
  do.call(
    rbind, by(
      df, df$Signal, 
      function(s) {
        i = which.min(s[[mse_col]])
        data.frame(
          Signal = s$Signal[i],
          Package = pkg_label,
          MSE = s[[mse_col]][i],
          us = s[[time_col]][i] * 1e6 / s$N[i]
        )
      }
    )
  )
}

off_irr = benchmark_rlifting_irregular[
  benchmark_rlifting_irregular$Mode == "offline",
]

irr = rbind(
  irr_best(off_irr, "MSEpos_median", "Timepos_median", "rLifting (offline)"),
  irr_best(benchmark_adlift_irregular, "MSE_median", "Time_median", "adlift"),
  irr_best(benchmark_nlt_irregular, "MSE_median", "Time_median", "nlt")
)

irr_wide = reshape(
  irr[, c("Signal","Package","MSE")],
  idvar = "Signal", timevar = "Package", direction = "wide"
)
names(irr_wide) = sub("MSE\\.", "", names(irr_wide))

knitr::kable(
  irr_wide, row.names = FALSE, digits = 4,
  caption = "Irregular-grid MSE at each package's best configuration."
)

## ----irr-mse-fig, fig.cap = "Irregular-grid MSE at each package's best configuration, by signal.", fig.height = 4.5----
ggplot(irr, aes(x = Package, y = MSE, fill = Package)) +
  geom_col() +
  facet_wrap(~ Signal, scales = "free_y", nrow = 2) +
  scale_fill_brewer(palette = "Set2") +
  labs(x = NULL, y = "Median MSE") +
  theme_minimal(base_size = 10) +
  theme(
    legend.position = "none",
    axis.text.x = element_text(angle = 30, hjust = 1)
  )

## ----irr-mse-geom-------------------------------------------------------------
geom_tbl_irr = aggregate(MSE ~ Package, data = irr, FUN = geom_mean)
geom_tbl_irr$MSE = round(geom_tbl_irr$MSE, 4)
geom_tbl_irr = geom_tbl_irr[order(geom_tbl_irr$MSE), ]
knitr::kable(
  geom_tbl_irr, row.names = FALSE,
  caption = "Geometric mean MSE across the seven irregular-grid signals."
)

## ----irr-default--------------------------------------------------------------
rl_irr_def = benchmark_rlifting_irregular[
  benchmark_rlifting_irregular$Mode == "offline" &
    benchmark_rlifting_irregular$Wavelet == "haar" &
    benchmark_rlifting_irregular$Boundary == "symmetric" &
    benchmark_rlifting_irregular$Method == "universal_semisoft",
  c("Signal", "MSEpos_median")
]
ad_irr_def = benchmark_adlift_irregular[
  benchmark_adlift_irregular$Wavelet == "LinearPred" &
    benchmark_adlift_irregular$Boundary == "linear_n1_int_noclo_mean",
  c("Signal", "MSE_median")
]
nlt_irr_def = benchmark_nlt_irregular[
  benchmark_nlt_irregular$Wavelet == "LinearPred" &
    benchmark_nlt_irregular$Boundary == "linear_n1_int_noclo_mean",
  c("Signal", "MSE_median")
]

irr_sigs_ord = c(
  "blocks_dj_irr", "bumps_dj_irr", "doppler_dj_irr", "heavisine_dj_irr",
  "blocks_gapped", "linear_phys", "trend_events"
)

irr_def_wide = data.frame(
  Signal = irr_sigs_ord,
  rLifting = rl_irr_def$MSEpos_median[match(irr_sigs_ord, rl_irr_def$Signal)],
  adlift = ad_irr_def$MSE_median[match(irr_sigs_ord, ad_irr_def$Signal)],
  nlt = nlt_irr_def$MSE_median[match(irr_sigs_ord, nlt_irr_def$Signal)]
)

knitr::kable(
  irr_def_wide, row.names = FALSE, digits = 4,
  caption = "Irregular-grid MSE at each package's default configuration."
)

## ----irr-tuning-cost----------------------------------------------------------
irr_best_geom = c(
  rLifting = geom_mean(irr$MSE[irr$Package == "rLifting (offline)"]),
  adlift = geom_mean(irr$MSE[irr$Package == "adlift"]),
  nlt = geom_mean(irr$MSE[irr$Package == "nlt"])
)

irr_def_geom = c(
  rLifting = geom_mean(irr_def_wide$rLifting),
  adlift = geom_mean(irr_def_wide$adlift),
  nlt = geom_mean(irr_def_wide$nlt)
)

irr_tuning_tbl = data.frame(
  Package = c("rLifting", "adlift", "nlt"),
  Default = round(irr_def_geom, 4),
  Best = round(irr_best_geom, 4),
  Multiplier = round(irr_def_geom / irr_best_geom, 2)
)

knitr::kable(
  irr_tuning_tbl, row.names = FALSE,
  caption = "Irregular-grid tuning cost: ratio of default to best geomean MSE."
)

## ----irr-speed----------------------------------------------------------------
irr_speed = aggregate(us ~ Package, data = irr, FUN = median)
irr_speed$us = round(irr_speed$us, 2)
irr_speed = irr_speed[order(irr_speed$us), ]
names(irr_speed) = c("Package", "us_per_sample_median")
knitr::kable(
  irr_speed, row.names = FALSE,
  caption = "Per-sample time at best configuration on irregular grids."
)

## ----irr-speed-fig, fig.cap = "Per-sample time on irregular grids, log scale.", fig.height = 3.5----
ggplot(irr, aes(x = Package, y = us, fill = Package)) +
  geom_boxplot() +
  scale_y_log10() +
  scale_fill_brewer(palette = "Set2") +
  labs(x = NULL, y = expression("Per-sample time ("*mu*"s, log scale)")) +
  theme_minimal(base_size = 10) +
  theme(
    legend.position = "none",
    axis.text.x = element_text(angle = 30, hjust = 1)
  )

## ----modes-mse-irr------------------------------------------------------------
mode_best_irr = function(mode_name) {
  sub = benchmark_rlifting_irregular[
    benchmark_rlifting_irregular$Mode == mode_name &
      !is.na(benchmark_rlifting_irregular$MSEpos_median), 
  ]
  do.call(
    rbind, by(
      sub, sub$Signal, 
      function(s) {
        i = which.min(s$MSEpos_median)
        data.frame(
          Signal = s$Signal[i], Mode = mode_name,
          MSE = s$MSEpos_median[i],
          us = s$Timepos_median[i] * 1e6 / s$N[i]
        )
      }
    )
  )
}

modes_irr = rbind(
  mode_best_irr("offline"),
  mode_best_irr("causal"),
  mode_best_irr("stream")
)

modes_mse_irr = reshape(
  modes_irr[, c("Signal","Mode","MSE")],
  idvar = "Signal", timevar = "Mode", direction = "wide"
)
names(modes_mse_irr) = sub("MSE\\.", "", names(modes_mse_irr))

modes_mse_irr$causal_penalty = round(
  modes_mse_irr$causal /modes_mse_irr$offline, 
  2
)

modes_mse_irr[, c("offline","causal","stream")] = round(
  modes_mse_irr[, c("offline","causal","stream")], 
  4
)

knitr::kable(
  modes_mse_irr, row.names = FALSE,
  caption = "MSE per mode, irregular grid. Causal/stream are essentially identical; penalty = causal/offline."
)

## ----pareto, fig.cap = "Speed-quality trade-off: median MSE vs. per-sample time at each package's best configuration (regular grid, log time scale)."----
ggplot(reg, aes(x = us, y = MSE, colour = Package, shape = Package)) +
  geom_point(size = 3) +
  scale_x_log10() +
  scale_colour_brewer(palette = "Set1") +
  labs(
    x = expression("Per-sample time ("*mu*"s, log scale)"),
    y = "Median MSE"
  ) +
  facet_wrap(~ Signal, scales = "free_y") +
  theme_minimal(base_size = 10) +
  theme(legend.position = "bottom")

