% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/pmc_text.R
\name{pmc_text}
\alias{pmc_text}
\title{Split section paragraphs into sentences}
\usage{
pmc_text(doc, sentence = TRUE)
}
\arguments{
\item{doc}{\code{xml_document} from PubMed Central}

\item{sentence}{split paragraphs into sentences, default TRUE}
}
\value{
a tibble with section, paragraph and sentence number and text
}
\description{
Split section paragraph tags into a table with subsection titles and
sentences using \code{tokenize_sentences}
}
\note{
Subsections may be nested to arbitrary depths and this function will
return the entire path to the subsection title as a delimited string like
"Results; Predicted functions; Pathogenicity".  Tables, figures and
formulas that are nested in section paragraphs are removed, superscripted
references are replaced with brackets, and any other superscripts or
subscripts are separared with ^ and _.
}
\examples{
# doc <- pmc_xml("PMC2231364")
doc <- xml2::read_xml(system.file("extdata/PMC2231364.xml",
  package = "tidypmc"
))
txt <- pmc_text(doc)
txt
dplyr::count(txt, section, sort = TRUE)
}
\author{
Chris Stubben
}
