\name{CDF}
\alias{CDF}
\title{Centroid Decision Forest}
\description{
Trains an ensemble of centroid-splitting trees and predicts for new data.
Nodes select top-\code{k} features via a multi-class class separability score (CSS),
split by nearest class centroid, and aggregate votes.
}
\usage{
CDF(xtrain, ytrain, xtest, ntrees = 500, depth = 3, mnode = 3,
    k = round(2 * log(ncol(xtrain))), mtry = round(0.2 * ncol(xtrain)), seed = NULL)
}
\arguments{
  \item{xtrain}{Numeric matrix or data frame of training predictors.}
  \item{ytrain}{Factor or character vector of class labels (length = nrow(xtrain)).}
  \item{xtest}{Numeric matrix or data frame of test predictors.}
  \item{ntrees}{Integer. Number of trees (default 500).}
  \item{depth}{Integer. Maximum tree depth (default 3).}
  \item{mnode}{Integer. Minimum node size to split (default 3).}
  \item{k}{Integer. Top-\code{k} CSS-ranked features per split (default \code{round(2*log(p))}).}
  \item{mtry}{Integer. Candidate features per node (default \code{round(0.2*p)}).}
  \item{seed}{Optional integer seed for reproducibility.}
}
\value{
A list with:
\item{predictions}{Character vector of predicted classes for \code{xtest}.}
\item{probabilities}{Numeric matrix of class probabilities (columns are classes).}
\item{feature_importance}{Named numeric vector of normalized CSS importances.}
}
\author{
Amjad Ali, Saeed Aldahmani, Zardad Khan
}
\examples{
data(DARWIN)
set.seed(2025)
n <- nrow(DARWIN)
p <- ncol(DARWIN)

# Split the data into training (70\%) and test (30\%) sets
tr <- sample(seq_len(n), floor(0.7 * n))
te <- setdiff(seq_len(n), tr)

# Prepare training and test matrices
Xtr <- as.matrix(DARWIN[tr, 1:(p - 1), drop = FALSE])
ytr <- DARWIN$Y[tr]
Xte <- as.matrix(DARWIN[te, 1:(p - 1), drop = FALSE])
yte <- DARWIN$Y[te]

# Fit the CDF model
FitCDF <- CDF(Xtr, ytr, Xte, ntrees = 100, seed = 2025)

# Compute classification accuracy
mean(FitCDF$predictions == yte)

# Predicted classes for the test data
FitCDF$predictions

# Predicted class probabilities for the test data
FitCDF$probabilities

# Top 10 most important features
order(FitCDF$feature_importance, decreasing = TRUE)[1:10]
}
\references{
Ali, A., Khan, Z., and Aldahmani, S. (2025). \emph{Centroid Decision Forest}. arXiv:2503.19306.
}
