mixedBayes

Bayesian Longitudinal Regularized Quantile Mixed Model

With high-dimensional omics features, repeated measure ANOVA leads to longitudinal gene-environment interaction studies that have intra-cluster correlations, outlying observations and structured sparsity arising from the ANOVA design. In this package, we have developed robust sparse Bayesian mixed effect models tailored for the above studies (Fan et al. (2025)). An efficient Gibbs sampler has been developed to facilitate fast computation. The Markov chain Monte Carlo algorithms of the proposed and alternative methods are efficiently implemented in ‘C++’. The development of this software package and the associated statistical methods have been partially supported by an Innovative Research Award from Johnson Cancer Research Center, Kansas State University.

How to install

To install from github, run these two lines of code in R

install.packages("devtools")
devtools::install_github("kunfa/mixedBayes")

Released versions of mixedBayes are available on CRAN (link), and can be installed within R via

install.packages("mixedBayes")

Load the package

library(mixedBayes)

Data generation under random intercept-and-slope model with t(2) error

Data <- function (n,p,k,q,quant){
sigma2=1
# Generate genetic factors
sig = matrix(0,p,p)
for (i in 1:p)
{
for(j in 1:p)
{
  sig[i,j] = 0.5^abs(i-j)
}
}

g = mvrnorm(n,rep(0,p),sig)
g = as.matrix(g)
g = scale(g)

# True main effects for genetic variables 
beta_true = rep(0,p)
beta_true[c(2,4,7,12)] = runif(4,0.4,0.8)

# Generate environmental factors (a group of 3 dummy variables)
group <- sample(0:3, size = n, replace = TRUE)

e1 <- as.numeric(group == 1)
e2 <- as.numeric(group == 2)
e3 <- as.numeric(group == 3)

e <- cbind(e1, e2, e3)
e <- scale(e)

# Fixed effects for environmental main effects
alpha1= runif(q,0.4,0.8)

# Fixed effects for time (polynomial terms)
alpha2= runif(3,0.4,0.8)

# Interaction terms
w = c()

for (i in 1:ncol(g))
{

w = cbind(w,g[,i]*e)

}

# True interaction effects
eta_true <- rep(0, p * q)
nz_block <- list(4:6, 10:12, 16:18, 19:21, 34:36, 43:45)
for (idx in nz_block) {
 eta_true[idx] <- runif(q, 0.4, 0.8)
}

# Combine all coefficients
betas_true <- c(beta_true, eta_true)

#  Random effects design 
k_1 <- 1:k
k_1 <- k_1 - mean(k_1)
z <- cbind(1, k_1)

# fixed-effect design for time
xi <- cbind(1, 1:k, (1:k)^2)
X <- do.call(rbind, replicate(n, xi, simplify = FALSE))

# Generate response 
y <- matrix(0, n, k)
for (i in 1:n) {
ata <- runif(2,0,1)
ei <- matrix(rep(e[i, ], each = k), nrow = k)
gi <- matrix(rep(g[i, ], each = k), nrow = k)
wi <- matrix(rep(w[i, ], each = k), nrow = k)
error <- rt(k, 2)
error <- error -quantile(error, probs = quant)
y[i, ] <- ei %*% alpha1 +xi %*% alpha2 +gi %*% beta_true +wi %*% eta_true +z %*% ata + error
}

# Convert to long format
y = reformat(k,y,type="r");g = reformat(k,g,type="d");e = reformat(k,e,type="d")
dat <- list(y = y, g = g, e  = e, X  = X, coef = betas_true)
return (dat)
}

library(MASS)
library(mixedBayes)
n=200;p=100;q=3;k=5;quant=0.5
data = Data(n,p,k,q,quant)
y = data$y
e = data$e
g = data$g
X = data$X

Example.1 (proposed method: robust sparse bi-level selection under random intercept -and- slope model)

fit = mixedBayes(y,e,X,g,k,structure="bilevel")

# Estimated coefficients(posterior median)
fit$coefficient

# Identification
beta_est = selection(fit,sparse=TRUE)
coeff = data$coef
index = which(coeff!=0) # true active predictors
pos = which(beta_est != 0) # selected active predictors
tp = length(intersect(index, pos))
fp = length(pos) - tp

# Prediction
prediction=predict_mixedBayes(fit,y,X,e,g,k,slope=TRUE,loss = "L1")
prediction

Example.2 (alternative: robust sparse individual level selection under random intercept -and- slope model)

fit = mixedBayes(y,e,X,g,k,structure="individual")

# Estimated coefficients(posterior median)
fit$coefficient

# Identification
b = selection(fit,sparse=TRUE)
coeff = data$coef
index = which(coeff!=0)
pos = which(b != 0)
tp = length(intersect(index, pos))
fp = length(pos) - tp
list(tp=tp, fp=fp)

# Prediction
prediction=predict_mixedBayes(fit,y,X,e,g,k,slope=TRUE,loss = "L1")
prediction

Example.3 (alternative: non-robust sparse bi-level selection under random intercept -and- slope model)

fit = mixedBayes(y,e,X,g,k,robust=FALSE, quant = NULL,structure = "bilevel")

# Estimated coefficients(posterior median)
fit$coefficient

# Identification
b = selection(fit,sparse=TRUE)
coeff = data$coef
index = which(coeff!=0)
pos = which(b != 0)
tp = length(intersect(index, pos))
fp = length(pos) - tp
list(tp=tp, fp=fp)

# Prediction
prediction=predict_mixedBayes(fit,y,X,e,g,k,slope=TRUE,loss = "L2")
prediction

Example.4 (alternative: robust bi-level selection under random intercept -and- slope model)

fit = mixedBayes(y,e,X,g,k,robust=TRUE,sparse = FALSE,structure = "bilevel")

# Estimated coefficients(posterior median)
fit$coefficient

# Identification
b = selection(fit,sparse=FALSE)
coeff = data$coef
index = which(coeff!=0)
pos = which(b != 0)
tp = length(intersect(index, pos))
fp = length(pos) - tp
list(tp=tp, fp=fp)

# Prediction
prediction=predict_mixedBayes(fit,y,X,e,g,k,slope=TRUE,loss = "L1")
prediction

Data generation under random intercept model with t(2) error

Data <- function (n,p,k,q,quant){
sigma2=1
# Generate genetic factors
sig = matrix(0,p,p)
for (i in 1:p)
{
for(j in 1:p)
{
  sig[i,j] = 0.5^abs(i-j)
}
}

g = mvrnorm(n,rep(0,p),sig)
g = as.matrix(g)
g = scale(g)

# True main effects for genetic variables 
beta_true = rep(0,p)
beta_true[c(2,4,7,12)] = runif(4,0.4,0.8)

# Generate environmental factors (a group of 3 dummy variables)
group <- sample(0:3, size = n, replace = TRUE)

e1 <- as.numeric(group == 1)
e2 <- as.numeric(group == 2)
e3 <- as.numeric(group == 3)

e <- cbind(e1, e2, e3)
e <- scale(e)

# Fixed effects for environmental main effects
alpha1= runif(q,0.4,0.8)

# Fixed effects for time (polynomial terms)
alpha2= runif(3,0.4,0.8)

# Interaction terms
w = c()

for (i in 1:ncol(g))
{

w = cbind(w,g[,i]*e)

}

# True interaction effects
eta_true <- rep(0, p * q)
nz_block <- list(4:6, 10:12, 16:18, 19:21, 34:36, 43:45)
for (idx in nz_block) {
 eta_true[idx] <- runif(q, 0.4, 0.8)
}

# Combine all coefficients
betas_true <- c(beta_true, eta_true)

#  Random effects design 

z <- rep(1, k)

# fixed-effect design for time
xi <- cbind(1, 1:k, (1:k)^2)
X <- do.call(rbind, replicate(n, xi, simplify = FALSE))

# Generate response 
y <- matrix(0, n, k)
for (i in 1:n) {
ata <- runif(1,0,1)
ei <- matrix(rep(e[i, ], each = k), nrow = k)
gi <- matrix(rep(g[i, ], each = k), nrow = k)
wi <- matrix(rep(w[i, ], each = k), nrow = k)
error <- rt(k, 2)
error <- error -quantile(error, probs = quant)
y[i, ] <- ei %*% alpha1 +xi %*% alpha2 +gi %*% beta_true +wi %*% eta_true +z * ata + error
}

# Convert to long format
y = reformat(k,y,type="r");g = reformat(k,g,type="d");e = reformat(k,e,type="d")
dat <- list(y = y, g = g, e  = e, X  = X, coef = betas_true)
return (dat)
}

library(MASS)
library(mixedBayes)
n=200;p=100;q=3;k=5;quant=0.5
data = Data(n,p,k,q,quant)
y = data$y
e = data$e
g = data$g
X = data$X

Example.1 (proposed method: robust sparse bi-level selection under random intercept model)

fit = mixedBayes(y,e,X,g,k,slope=FALSE, structure="bilevel")

# Estimated coefficients(posterior median)
fit$coefficient

# Identification
b = selection(fit,sparse=TRUE)
coeff = data$coef
index = which(coeff!=0)
pos = which(b != 0)
tp = length(intersect(index, pos))
fp = length(pos) - tp
list(tp=tp, fp=fp)

# Prediction
prediction=predict_mixedBayes(fit,y,X,e,g,k,slope=FALSE,loss = "L1")
prediction

Example.2 (alternative: robust sparse individual level selection under random intercept model)

fit = mixedBayes(y,e,X,g,k,slope=FALSE, structure="individual")

# Estimated coefficients(posterior median)
fit$coefficient

# Identification
b = selection(fit,sparse=TRUE)
coeff = data$coef
index = which(coeff!=0)
pos = which(b != 0)
tp = length(intersect(index, pos))
fp = length(pos) - tp
list(tp=tp, fp=fp)

# Prediction
prediction=predict_mixedBayes(fit,y,X,e,g,k,slope=FALSE,loss = "L1")
prediction

Example.3 (alternative: non-robust sparse bi-level selection under random intercept model)

fit = mixedBayes(y,e,X,g,k,slope=FALSE,robust=FALSE, quant = NULL,structure = "bilevel")

# Estimated coefficients(posterior median)
fit$coefficient

# Identification
b = selection(fit,sparse=TRUE)
coeff = data$coef
index = which(coeff!=0)
pos = which(b != 0)
tp = length(intersect(index, pos))
fp = length(pos) - tp
list(tp=tp, fp=fp)

# Prediction
prediction=predict_mixedBayes(fit,y,X,e,g,k,slope=FALSE,loss = "L2")
prediction

Example.4 (alternative: robust bi-level selection under random intercept model)

fit = mixedBayes(y,e,X,g,k,slope=FALSE,robust=TRUE,sparse = FALSE,structure = "bilevel")

# Estimated coefficients(posterior median)
fit$coefficient

# Identification
b = selection(fit,sparse=FALSE)
coeff = data$coef
index = which(coeff!=0)
pos = which(b != 0)
tp = length(intersect(index, pos))
fp = length(pos) - tp
list(tp=tp, fp=fp)

# Prediction
prediction=predict_mixedBayes(fit,y,X,e,g,k,slope=FALSE,loss = "L1")
prediction

Methods

This package provides implementation for methods proposed in

Fan, K., Jiang, Y., Ma, S., Wang, W. and Wu, C. (2025). Robust Sparse Bayesian Regression for Longitudinal Gene-Environment Interactions. Journal of the Royal Statistical Society Series C: Applied Statistics, 74(5), 1372–1394.