% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/gggenomes.R
\name{gggenomes}
\alias{gggenomes}
\title{Plot genomes, features and synteny maps}
\usage{
gggenomes(
  genes = NULL,
  seqs = NULL,
  feats = NULL,
  links = NULL,
  .id = "file_id",
  spacing = 0.05,
  wrap = NULL,
  adjacent_only = TRUE,
  infer_bin_id = seq_id,
  infer_start = min(start, end),
  infer_end = max(start, end),
  infer_length = max(start, end),
  theme = c("clean", NULL),
  .layout = NULL,
  ...
)
}
\arguments{
\item{genes, feats}{A data.frame, a list of data.frames, or a character vector
with paths to files containing gene data. Each item is added as feature
track.

For a single data.frame the track_id will be "genes" and "feats",
respectively. For a list, track_ids are parsed from the list names, or if
names are missing from the name of the variable containing each data.frame.
Data columns:
\itemize{
\item required: \verb{seq_id,start,end}
\item recognized: \verb{strand,bin_id,feat_id,introns}
}}

\item{seqs}{A data.frame or a character vector with paths to files containing
sequence data. Data columns:
\itemize{
\item required: \verb{seq_id,length}
\item recognized: \verb{bin_id,start,end,strand}
}}

\item{links}{A data.frame or a character vector with paths to files
containing link data. Each item is added as links track. Data columns:
\itemize{
\item required: \verb{seq_id,seq_id2}
\item recognized: \verb{start,end,bin_id,start2,end2,bin_id2,strand}
}}

\item{.id}{The name of the column for file labels that are created when
reading directly from files. Defaults to "file_id". Set to "bin_id" if
every file represents a different bin.}

\item{spacing}{between sequences in bases (>1) or relative to longest bin (<1)}

\item{wrap}{wrap bins into multiple lines with at most this many nucleotides
per line.}

\item{adjacent_only}{Indicates whether links should be created between adjacent sequences/chromosomes only.
By default it is set to \code{adjacent_only = TRUE}. If \code{FALSE}, links will be created between all sequences

(\emph{not recommended for large data sets})}

\item{infer_length, infer_start, infer_end, infer_bin_id}{used to infer pseudo
seqs if only feats or links are provided, or if no bin_id column was
provided. The expressions are evaluated in the context of the first feat
or link track.

By default subregions of sequences from the first to the last feat/link
are generated. Set \code{infer_start} to 0 to show all sequences from their
true beginning.}

\item{theme}{choose a gggenomes default theme, NULL to omit.}

\item{.layout}{a pre-computed layout from \code{\link[=layout_genomes]{layout_genomes()}}. Useful for
developmental purposes.}

\item{...}{additional parameters, passed to layout}
}
\value{
gggenomes-flavored ggplot object
}
\description{
\code{gggenomes()} initializes a gggenomes-flavored ggplot object.
It is used to declare the input data for gggenomes' track system.

(\emph{See for more details on the track system, gggenomes vignette or the Details/Arguments section})
}
\details{
\code{gggenomes::gggenomes()} resembles the functionality of \code{ggplot2::ggplot()}.
It is used to construct the initial plot object, and is often followed by "+" to add components to the plot (\emph{e.g. "+ geom_gene()"}).

A big difference between the two is that gggenomes has a multi-track setup (\emph{\code{'seqs'}, \code{'feats'}, \code{'genes'} and \code{'links'}}).
\code{gggenomes()} pre-computes a layout and adds coordinates (\verb{y,x,xend}) to each data frame prior to the actual plot construction.
This has some implications for the usage of gggenomes:
\itemize{
\item \strong{Data frames for tracks have required variables.} These predefined variables are used during import
to compute x/y coordinates (\emph{see arguments}).
\item \strong{gggenomes' geoms can often be used without explicit \code{aes()} mappings}  This works because
we always know the names of the plot variables ahead of time: they originate from the pre-computed layout,
and we can use that information to set sensible default aesthetic mappings for most cases.
}
}
\examples{
# Compare the genomic organization of three viral elements
# EMALEs: endogenous mavirus-like elements (example data shipped with gggenomes)
gggenomes(emale_genes, emale_seqs, emale_tirs, emale_ava) +
  geom_seq() + geom_bin_label() + # chromosomes and labels
  geom_feat(linewidth= 8) + # terminal inverted repeats
  geom_gene(aes(fill = strand), position = "strand") + # genes
  geom_link(offset = 0.15) # synteny-blocks

# with some more information
gggenomes(emale_genes, emale_seqs, emale_tirs, emale_ava) \%>\%
  add_feats(emale_ngaros, emale_gc) \%>\%
  add_clusters(emale_cogs) \%>\%
  sync() +
  geom_link(offset = 0.15, color = "white") + # synteny-blocks
  geom_seq() + geom_bin_label() + # chromosomes and labels
  # thistle4, salmon4, burlywood4
  geom_feat(linewidth= 6, position = "identity") + # terminal inverted repeats
  geom_feat(
    data = feats(emale_ngaros), color = "turquoise4", alpha = .3,
    position = "strand", linewidth = 16
  ) +
  geom_feat_note(aes(label = type),
    data = feats(emale_ngaros),
    position = "strand", nudge_y = .3
  ) +
  geom_gene(aes(fill = cluster_id), position = "strand") + # genes
  geom_wiggle(aes(z = score, linetype = "GC-content"), feats(emale_gc),
    fill = "lavenderblush4", position = position_nudge(y = -.2), height = .2
  ) +
  scale_fill_brewer("Conserved genes", palette = "Dark2", na.value = "cornsilk3")

# initialize plot directly from files
gggenomes(
  ex("emales/emales.gff"),
  ex("emales/emales.gff"),
  ex("emales/emales-tirs.gff"),
  ex("emales/emales.paf")
) + geom_seq() + geom_gene() + geom_feat() + geom_link()

# multi-contig genomes wrap to fixed width
s0 <- read_seqs(list.files(ex("cafeteria"), "Cr.*\\\\.fa.fai$", full.names = TRUE))
s1 <- s0 \%>\% dplyr::filter(length > 5e5)
gggenomes(seqs = s1, infer_bin_id = file_id, wrap = 5e6) +
  geom_seq() + geom_bin_label() + geom_seq_label()
}
