#' @title Introduces Missing at Random (MAR) Values into Data Sets.
#' @description
#' This function introduces missing values under the Missing at Random (MAR) mechanism into previously generated data sets (e.g., those produced by sim.skewed() or sim.normal()).
#' Under MAR, the probability of missingness is associated with other variables in the data set, but not with the variable itself.
#' If the baseV argument is not provided, two random variables (excluding the target variable itself) are selected. Their mean is then used to determine missingness in the target variable.
#' For example, assume a data set with 8 items where missing values are to be introduced for item 2. Two items are randomly selected from items 1, 3, 4, 5, 6, 7, and 8 (e.g., items 5 and 7).
#' Their mean is calculated, sorted, and used as the basis for assigning missingness to the item 2. Following the MAR rule, 90 percents of the missing values are drawn from the highest scores, and the remaining 10 percents are drawn randomly from the rest.
#' For instance, with a sample size of 300 and 20 percents missingness (60 cases), the mean of the selected auxiliary variables is sorted in decreasing order.
#' Missing values are then introduced in 54 cases (90 percents of 60) from the top portion, while 6 cases (10 percents of 60) are drawn randomly from the lower 240 observations.
#' The missing values are represented by NA in the output files. New data sets containing missing values are saved as separate files, preserving the originals.
#' Additionally, a file named "MAR_List.dat" is created, which contains the names of all data sets with MAR missingness.
#'
#' @author Fatih Orcan
#' @importFrom utils read.table write.table
#' @param misg A numeric vector of 0s and 1s specifying which items will contain missing values.
#' A value of 0 indicates the item will not include missingness, while 1 indicates missing values will be introduced.
#' If omitted, all items are treated as eligible for missingness.
#' @param perct The percentage of missingness to be applied (default = 10 percents).
#' @param dataList The file name containing the list of previously generated data sets (e.g., "Data_List.dat"), either created by this package or by external software.
#' @param f.loc The directory path where both the original data sets and the "dataList" file are located.
#' @param baseV A list specifying the auxiliary variables on which MAR missingness will be based.
#' This must match to the structure of misg. If not provided, two random variables (excluding the variable itself) are chosen automatically.
#' @export
#' @examples
#'
#' # Step 1: Generate data sets
#'
#' fc<-fcors.value(nf=3, cors=c(1,.5,.6,.5,1,.4,.6,.4,1))
#' fl<-loading.value(nf=3, fl.loads=c(.5,.5,.5,0,0,0,0,0,0,0,0,.6,.6,.6,0,0,0,0,0,0,0,0,.4,.4))
#' floc<-tempdir()
#' sim.normal(nd=10, ss=100, fcors=fc, loading<-fl,  f.loc=floc)
#'
#' # Step 2: Introduce MAR missing values
#'
#' mis.items<-c(1,0,1,1,0,0,0,0)
#' bV<-list(c(0,0,0,0,0,0,1,1),NA,c(0,0,0,0,0,1,1,0),c(0,0,0,0,0,1,1,1), NA,NA,NA,NA)
#' dl<-"Data_List.dat"  # must be located in the working directory
#' MAR.data(misg = mis.items, baseV=bV, perct = 20, dataList = dl, f.loc=floc )

MAR.data<-function(misg=NULL,baseV=NULL, perct=10, dataList="Data_List.dat", f.loc){

  data.names<-as.matrix(read.table(paste(f.loc, "/", dataList,sep=""), header = FALSE))
  misg.names<-data.names
  veri<-read.table(paste(f.loc,"/", data.names[1,],sep=""))
  colnames(veri)<-c("ID", paste("x",seq(1:(dim(veri)[2]-1)),sep=""))

  nd<-dim(data.names)[1]
  nitem<-dim(veri)[2]-1
  ss<-dim(veri)[1]

  n.vec<-c(1:nitem)

  basR<-list(NA)    # Empty list to save randomly selected item for the MAR
  for(bs in 1:nitem){
    r.i<-sample(n.vec[-bs],2)      # Two random items that MAR was based on for the given item.
    aa<-rep(0,nitem)
    aa[r.i]<-1
    ifelse(misg[bs]==1, basR[[bs]]<-aa, basR[[bs]]<-NA)
      }


  for(i in 1:nd){
    veri<-read.table(paste(f.loc,"/", data.names[i,],sep=""))
    misg.names[i,]<-c(paste("MAR_",i,".dat", sep=""))

    MAR.data<-veri
    for(j in 1:nitem){
      if(misg[j]==0){
        MAR.data[,j+1]<-veri[,j+1]}
      else if(misg[j]==1){
        mis.ss<-(perct/100)*ss

      if(is.null(baseV)==TRUE){

        c.v<-apply (veri[,which(basR[[j]]==1)+1], 1, mean, na.rm=T)
        } # Calculated values that MAR will be based on

        if(is.null(baseV)==FALSE){
        baseV[j]
        c.v<-apply (veri[,which(baseV[[j]]==1)+1], 1, mean, na.rm=T) # Calculated values that MAR will be based on
        }

        n.veri<-cbind(veri,c.v)

        MAR.data[sample(order(n.veri[,10],decreasing = T)[1:mis.ss],size=mis.ss*.9),j+1]<-NA
        MAR.data[sample(order(n.veri[,10],decreasing = T)[(mis.ss+1):ss],size=mis.ss*.1),j+1]<-NA

        message(paste("MAR_",i,".dat was completed", sep=""))
        }


      else {stop("Please use only 0s or 1s to indicated missingness")}}
    write.table(MAR.data, file= paste(f.loc, "/MAR_",i,".dat", sep=""), sep = "\t",
                col.names = FALSE, row.names = FALSE,quote = FALSE)
  }
  write.table(misg.names,file=paste(f.loc,"/MAR_List.dat", sep = ""),
              col.names = FALSE, row.names = FALSE, quote = FALSE)
  message("Done!...")

  if(is.null(baseV)==TRUE){
  sink(paste(f.loc,"/Model_MAR_relations.dat", sep = ""))
  print("Two random items that MAR was based on:")
  print(basR)
  sink()}

  if(is.null(baseV)==FALSE){
    sink(paste(f.loc,"/Model_MAR_relations.dat", sep = ""))
    print("The items that MAR was based on:")
    print(baseV)
    sink()}


}
