% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/vs_fastq_mergepairs.R
\name{vs_fastq_mergepairs}
\alias{vs_fastq_mergepairs}
\alias{vs_fastx_mergepairs}
\alias{vs_fasta_mergepairs}
\alias{vs_mergepairs}
\alias{fastq_mergepairs}
\alias{mergepairs}
\title{Merge paired-end sequence reads}
\usage{
vs_fastq_mergepairs(
  fastq_input,
  reverse = NULL,
  output_format = "fasta",
  fastaout = NULL,
  fastqout = NULL,
  minovlen = 10,
  minlen = 0,
  fasta_width = 0,
  sample = NULL,
  log_file = NULL,
  threads = 1,
  vsearch_options = NULL,
  tmpdir = NULL
)
}
\arguments{
\item{fastq_input}{(Required). A FASTQ file path, a FASTQ tibble (forward
reads), or a paired-end tibble of class \code{"pe_df"}. See \emph{Details}.}

\item{reverse}{(Optional). A FASTQ file path or a FASTQ tibble (reverse
reads). Optional if \code{fastq_input} is a \code{"pe_df"} object. See
\emph{Details}.}

\item{output_format}{(Optional). Desired output format of file or tibble:
\code{"fasta"} (default) or \code{"fastq"}.}

\item{fastaout}{(Optional). Name of the FASTA output file with the merged
reads. If \code{NULL} (default), no output is written to file. See
\emph{Details}.}

\item{fastqout}{(Optional). Name of the FASTQ output file with the merged
reads. If \code{NULL} (default) no output is written to file. See
\emph{Details}.}

\item{minovlen}{(Optional). Minimum overlap between the merged reads. Must be
at least 5. Defaults to \code{10}.}

\item{minlen}{(Optional). Minimum number of bases a sequence must have to be
retained. Defaults to \code{0}. See \emph{Details}.}

\item{fasta_width}{(Optional). Number of characters per line in the output
FASTA file. Only applies if the output file is in FASTA format. Defaults to
\code{0}, which eliminates wrapping.}

\item{sample}{(Optional). Add the given sample identifier string to sequence
headers. For instance, if the given string is "ABC", the text ";sample=ABC"
will be added to the header. If \code{NULL} (default), no identifier is added.}

\item{log_file}{(Optional). Name of the log file to capture messages from
\code{VSEARCH}. If \code{NULL} (default), no log file is created.}

\item{threads}{(Optional). Number of computational threads to be used by
\code{VSEARCH}. Defaults to \code{1}.}

\item{vsearch_options}{(Optional). Additional arguments to pass to
\code{VSEARCH}. Defaults to \code{NULL}. See \emph{Details}.}

\item{tmpdir}{(Optional). Path to the directory where temporary files should
be written when tables are used as input or output. Defaults to
\code{NULL}, which resolves to the session-specific temporary directory
(\code{tempdir()}).}
}
\value{
A tibble or \code{NULL}.

If \code{fastaout} or \code{fastqout} is specified , the merged sequences are
written to the specified output file, and no tibble is returned.

If \code{fastaout} or \code{fastqout} is \code{NULL}, a tibble containing the
merged reads in the format specified by \code{output_format} is returned.

The \code{"statistics"} attribute of the returned tibble (when
\code{fastaout} or \code{fastqout} is \code{NULL}) is a tibble with the
following columns:
\itemize{
  \item \code{Tot_num_pairs}: Total number of read pairs before merging.
  \item \code{Merged}: Number of read pairs that merged.
  \item \code{Mean_Read_Length_before_merging}: Mean read length before
  merging (R1 and R2).
  \item \code{Mean_Read_Length_after_merging}: Mean read length after
  merging.
  \item \code{StdDev_Read_Length}: Standard deviation of read length
  after merging.
  \item \code{R1}: Name of the file/object with forward (R1) reads used in
  the merging.
  \item \code{R2}: Name of the file/object with reverse (R2) reads used in
  the merging.
}
}
\description{
\code{vs_fastq_mergepairs} merges paired-end sequence reads with
overlapping regions into one sequence using \code{VSEARCH}.
}
\details{
Read pairs from the input FASTQ files (\code{fastq_input} and \code{reverse})
are merged into a single sequence by overlapping regions. The resulting
sequences consist of the merged forward and reverse reads with the specified
minimum overlap.

\code{fastq_input} and \code{reverse} can either be file paths to FASTQ files
or FASTQ objects. FASTQ objects are tibbles that contain the columns
\code{Header}, \code{Sequence}, and \code{Quality}, see
\code{\link[microseq]{readFastq}}. Forward and reverse reads must appear in
the same order and have the same total number of reads in both files.

If \code{fastq_input} is an object of class \code{"pe_df"}, the reverse reads
are automatically extracted from its \code{"reverse"} attribute unless
explicitly provided via the \code{reverse} argument. This allows streamlined
input handling for paired-end tibbles created by
\code{\link{fastx_synchronize}} or \code{\link{vs_fastx_trim_filt}}.

If \code{fastaout} or \code{fastqout} is specified, the merged reads are
written to the respective file in either FASTA or FASTQ format.

If both \code{fastaout} or \code{fastqout} are \code{NULL}, the results are
returned as a FASTA or FASTQ object, and no file is written.

\code{output_format} has to match the desired output files/objects.

Any input sequence with fewer bases than the value set in \code{minlen} will
be discarded. Default \code{minlen} is 0, meaning no sequences are removed.
However, using the default value may allow empty sequences to remain in
the results.

If \code{log_file} is \code{NULL} and \code{fastqout} or \code{fastaout} is
specified, merging statistics from \code{VSEARCH} will not be captured.

\code{vsearch_options} allows users to pass additional command-line arguments
to \code{VSEARCH} that are not directly supported by this function. Refer to
the \code{VSEARCH} manual for more details.
}
\examples{
\dontrun{
# Define arguments
fastq_input <- file.path(file.path(path.package("Rsearch"), "extdata"),
                         "small_R1.fq")
reverse <- file.path(file.path(path.package("Rsearch"), "extdata"),
                     "small_R2.fq")
output_format <- "fastq"

# Merge sequences and return a FASTQ tibble
merge_seqs <- vs_fastq_mergepairs(fastq_input = fastq_input,
                                  reverse = reverse,
                                  output_format = output_format)

# Extract merging statistics
statistics <- attr(merge_seqs, "statistics")

# Merge sequences and write sequences to a FASTQ file
vs_fastq_mergepairs(fastq_input = fastq_input,
                    reverse = reverse,
                    output_format = output_format,
                    fastqout = "merged_sequences.fq")
}

}
\references{
\url{https://github.com/torognes/vsearch}
}
