% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/sim_seq.R
\name{sim_seq}
\alias{sim_seq}
\title{Simulate data}
\description{
	This function simulate a sequential dataset from a mixture of first-order Markov models generating categorical sequences. The output is a dataframe, columns are "id" to identify a subject/sequence, "y" to identify a categorical observation related to the sequence and "clus" the cluster label.    
}
\usage{
sim_seq(M, K, ini.prob, trans.prob, clust.size, T.range)
}
\arguments{
\item{M}{is the number of components}

\item{K}{is the number of Markov model states}

\item{ini.prob}{is a list of initial probability vectors for each component}

\item{trans.prob}{is a list of transition matrices for each component}

\item{clust.size}{is a list of components' sizes}

\item{T.range}{is a vector of two elements: minimum and maximum sequence length}
}
\value{Object of class data.frame}
\author{ Furio Urso \email{furio.urso@unipa.it} }

\examples{
# Simulate dataset from a mixture of Markov models 
M <- 3    # number of components
K <- 5    # number of states
# define initial and transition probabilities for each component
ini1<-c(0.35, 0, 0.3, 0.2, 0.15) 
A1<-matrix(c(0.15, 0.1, 0.5, 0, 0.25,     
             0.2, 0, 0.1, 0.2, 0.5,       
             0.6, 0.1, 0.1, 0.2, 0,       
             0, 0.45, 0.35, 0.1, 0.1,       
             0.15, 0.25, 0, 0.1, 0.5),byrow=TRUE,nrow=5) 

ini2<-c(0.25, 0, 0.2, 0.25, 0.3)
A2<-matrix(c(0,0.8,0,0,0.2,         
             0.2,0,0.8,0,0,         
             0,0.2,0,0.8,0,         
             0,0,0.2,0,0.8,          
             0.8,0,0,0.2,0),byrow=TRUE,nrow=5) 

ini3<-c(0.3, 0, 0.25, 0.3, 0.15)
A3<-matrix(c(0,0.1,0.2,0,0.7,          
             0.7,0,0.2,0.1,0,         
             0.1,0.8,0,0.1,0,           
             0,0.1,0.7,0,0.2,                
             0.2,0,0,0.8,0),byrow=TRUE,nrow=5) 

trans.prob <- list(A1, A2, A3)
ini.prob <- list(ini1, ini2, ini3)

# sizes i.e. number of sequences in each component
N.sim1<-20
N.sim2<-30
N.sim3<-50

clust.size <- list(N.sim1, N.sim2, N.sim3)

T.range <- c(5, 30)  # sequences minimum length and maximum length

data<- sim_seq( M, K, ini.prob, trans.prob, clust.size, T.range)
}
