% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ODBT.R
\name{ODBT}
\alias{ODBT}
\alias{ODBT.formula}
\alias{ODBT.default}
\title{Classification and Regression using the Ensemble of ODT-based Boosting Trees}
\usage{
ODBT(X, ...)

\method{ODBT}{formula}(
  formula,
  data = NULL,
  Xnew = NULL,
  type = "auto",
  model = c("ODT", "rpart", "rpart.cpp")[1],
  TreeRotate = TRUE,
  max.terms = 30,
  NodeRotateFun = "RotMatRF",
  FunDir = getwd(),
  paramList = NULL,
  ntrees = 100,
  storeOOB = TRUE,
  replacement = TRUE,
  stratify = TRUE,
  ratOOB = 0.368,
  parallel = TRUE,
  numCores = Inf,
  MaxDepth = Inf,
  numNode = Inf,
  MinLeaf = ceiling(sqrt(ifelse(replacement, 1, 1 - ratOOB) * ifelse(is.null(data),
    length(eval(formula[[2]])), nrow(data)))/3),
  subset = NULL,
  weights = NULL,
  na.action = na.fail,
  catLabel = NULL,
  Xcat = 0,
  Xscale = "No",
  ...
)

\method{ODBT}{default}(
  X,
  y,
  Xnew = NULL,
  type = "auto",
  model = c("ODT", "rpart", "rpart.cpp")[1],
  TreeRotate = TRUE,
  max.terms = 30,
  NodeRotateFun = "RotMatRF",
  FunDir = getwd(),
  paramList = NULL,
  ntrees = 100,
  storeOOB = TRUE,
  replacement = TRUE,
  stratify = TRUE,
  ratOOB = 0.368,
  parallel = TRUE,
  numCores = Inf,
  MaxDepth = Inf,
  numNode = Inf,
  MinLeaf = ceiling(sqrt(ifelse(replacement, 1, 1 - ratOOB) * length(y))/3),
  subset = NULL,
  weights = NULL,
  na.action = na.fail,
  catLabel = NULL,
  Xcat = 0,
  Xscale = "No",
  ...
)
}
\arguments{
\item{X}{An n by d numeric matrix (preferable) or data frame.}

\item{...}{Optional parameters to be passed to the low level function.}

\item{formula}{Object of class \code{formula} with a response describing the model to fit. If this is a data frame, it is taken as the model frame. (see \code{\link{model.frame}})}

\item{data}{Training data of class \code{data.frame} containing variables named in the formula. If \code{data} is missing it is obtained from the current environment by \code{formula}.}

\item{Xnew}{An n by d numeric matrix (preferable) or data frame containing predictors for the new data.}

\item{type}{Use \code{ODBT} for classification ("class") or regression ("reg").'auto' (default): If the response in \code{data} or \code{y} is a factor, "class" is used, otherwise regression is assumed.}

\item{model}{The basic tree model for boosting. We offer three options: "ODT" (default), "rpart" and "rpart.cpp" (improved "rpart").}

\item{TreeRotate}{If or not to rotate the training data with the rotation matrix estimated by logistic regression before building the tree (default TRUE).}

\item{max.terms}{The maximum number of iterations for boosting trees.}

\item{NodeRotateFun}{Name of the function of class \code{character} that implements a linear combination of predictors in the split node.
including \itemize{
\item{"RotMatPPO": projection pursuit optimization model (\code{\link{PPO}}), see \code{\link{RotMatPPO}} (default, model="PPR").}
\item{"RotMatRF": single feature similar to Random Forest, see \code{\link{RotMatRF}}.}
\item{"RotMatRand": random rotation, see \code{\link{RotMatRand}}.}
\item{"RotMatMake": users can define this function, for details see \code{\link{RotMatMake}}.}
}}

\item{FunDir}{The path to the \code{function} of the user-defined \code{NodeRotateFun} (default current working directory).}

\item{paramList}{List of parameters used by the functions \code{NodeRotateFun}. If left unchanged, default values will be used, for details see \code{\link{defaults}}.}

\item{ntrees}{The number of trees in the forest (default 100).}

\item{storeOOB}{If TRUE then the samples omitted during the creation of a tree are stored as part of the tree (default TRUE).}

\item{replacement}{if TRUE then n samples are chosen, with replacement, from training data (default TRUE).}

\item{stratify}{If TRUE then class sample proportions are maintained during the random sampling. Ignored if replacement = FALSE (default TRUE).}

\item{ratOOB}{Ratio of 'out-of-bag' (default 1/3).}

\item{parallel}{Parallel computing or not (default TRUE).}

\item{numCores}{Number of cores to be used for parallel computing (default \code{Inf}).}

\item{MaxDepth}{The maximum depth of the tree (default \code{Inf}).}

\item{numNode}{Number of nodes that can be used by the tree (default \code{Inf}).}

\item{MinLeaf}{Minimal node size (Default 5).}

\item{subset}{An index vector indicating which rows should be used. (NOTE: If given, this argument must be named.)}

\item{weights}{Vector of non-negative observational weights; fractional weights are allowed (default NULL).}

\item{na.action}{A function to specify the action to be taken if NAs are found. (NOTE: If given, this argument must be named.)}

\item{catLabel}{A category labels of class \code{list} in predictors. (default NULL, for details see Examples)}

\item{Xcat}{A class \code{vector} is used to indicate which predictor is the categorical variable, the default \code{Xcat}=0 means that no special treatment is given to category variables.
When Xcat=NULL, the predictor x that satisfies the condition (length(unique(x))<10) & (n>20) is judged to be a category variable.}

\item{Xscale}{Predictor standardization methods. " Min-max" (default), "Quantile", "No" denote Min-max transformation, Quantile transformation and No transformation respectively.}

\item{y}{A response vector of length n.}
}
\value{
An object of class ODBT Containing a list components:
\itemize{
\item{\code{call}: The original call to ODBT.}
\item{\code{terms}: An object of class \code{c("terms", "formula")} (see \code{\link{terms.object}}) summarizing the formula. Used by various methods, but typically not of direct relevance to users.}
\item{\code{ppTrees}: Each tree used to build the forest. \itemize{
\item{\code{oobErr}: 'out-of-bag' error for tree, misclassification rate (MR) for classification or mean square error (MSE) for regression.}
\item{\code{oobIndex}: Which training data to use as 'out-of-bag'.}
\item{\code{oobPred}: Predicted value for 'out-of-bag'.}
\item{\code{other}: For other tree related values \code{\link{ODT}}.}
}}
\item{\code{oobErr}: 'out-of-bag' error for forest, misclassification rate (MR) for classification or mean square error (MSE) for regression.}
\item{\code{oobConfusionMat}: 'out-of-bag' confusion matrix for forest.}
\item{\code{split}, \code{Levels} and \code{NodeRotateFun} are important parameters for building the tree.}
\item{\code{paramList}: Parameters in a named list to be used by \code{NodeRotateFun}.}
\item{\code{data}: The list of data related parameters used to build the forest.}
\item{\code{tree}: The list of tree related parameters used to build the tree.}
\item{\code{forest}: The list of forest related parameters used to build the forest.}
\item{\code{results}: The prediction results for new data \code{Xnew} using \code{ODBT}.}
}
}
\description{
We use ODT as the basic tree model (base learner). To improve the performance of a boosting tree, we apply the feature bagging in this process, in the same
way as the random forest. Our final estimator is called the ensemble of ODT-based boosting trees, denoted by \code{ODBT}, is the average of many boosting trees.
}
\examples{
# Classification with Oblique Decision Tree.
data(seeds)
set.seed(221212)
train <- sample(1:209, 100)
train_data <- data.frame(seeds[train, ])
test_data <- data.frame(seeds[-train, ])
\donttest{
forest <- ODBT(varieties_of_wheat ~ ., train_data, test_data[, -8],
  model = "rpart",
  type = "class", parallel = FALSE, NodeRotateFun = "RotMatRF"
)
pred <- forest$results$prediction
# classification error
(mean(pred != test_data[, 8]))
forest <- ODBT(varieties_of_wheat ~ ., train_data, test_data[, -8],
  model = "rpart.cpp",
  type = "class", parallel = FALSE, NodeRotateFun = "RotMatRF"
)
pred <- forest$results$prediction
# classification error
(mean(pred != test_data[, 8]))
}

# Regression with Oblique Decision Randome Forest.
data(body_fat)
set.seed(221212)
train <- sample(1:252, 80)
train_data <- data.frame(body_fat[train, ])
test_data <- data.frame(body_fat[-train, ])
# To use ODT as the basic tree model for boosting, you need to set
# the parameters model = "ODT" and NodeRotateFun = "RotMatPPO".
\donttest{
forest <- ODBT(Density ~ ., train_data, test_data[, -1],
  type = "reg", parallel = FALSE, model = "ODT",
  NodeRotateFun = "RotMatPPO"
)
pred <- forest$results$prediction
# estimation error
mean((pred - test_data[, 1])^2)
forest <- ODBT(Density ~ ., train_data, test_data[, -1],
  type = "reg", parallel = FALSE, model = "rpart.cpp",
  NodeRotateFun = "RotMatRF"
)
pred <- forest$results$prediction
# estimation error
mean((pred - test_data[, 1])^2)
}

}
\references{
Zhan, H., Liu, Y., & Xia, Y. (2024). Consistency of Oblique Decision Tree and its Boosting and Random Forest. arXiv preprint arXiv:2211.12653.

Tomita, T. M., Browne, J., Shen, C., Chung, J., Patsolic, J. L., Falk, B., ... & Vogelstein, J. T. (2020). Sparse projection oblique randomer forests. Journal of machine learning research, 21(104).
}
\seealso{
\code{\link{ODT}} \code{\link{best.cut.node}}
}
\author{
Yu Liu and Yingcun Xia
}
\keyword{forest}
