#' Correct and Standardize References in a 'Word' Document
#'
#' Reads a 'Word' document containing bibliographic references with missing or
#' incomplete information (e.g., authors, title, year, volume, issue, or page
#' numbers) and produces an updated document with standardized and completed
#' references. Missing metadata are retrieved using the 'CrossRef' application
#' programming interface (API), and digital object identifiers (DOIs) are added
#' where available.
#'
#' @param input_path Character string specifying the path to the input 'Word'
#'   (.docx) document containing incomplete references.
#' @param output_path Character string specifying the path where the corrected
#'   'Word' document will be written.
#' @param reference_lines Integer specifying the number of lines at the end of
#'   the document that are treated as references.
#' @param style Character string specifying the reference style (e.g.,
#'   \code{"vancouver"}).
#'
#' @import httr
#' @import jsonlite
#' @import stringr
#' @import officer
#' @importFrom utils tail URLencode
#'
#' @return Writes a corrected 'Word' document to the location specified by
#'   \code{output_path}. The function returns \code{invisible(NULL)}.
#'
#' @export
#'
#' @examples
#' \donttest{
#' library(officer)
#'
#' # Step 1: Create a temporary Word document with sample references
#' tmp_input <- file.path(tempdir(), "tmp_refs.docx")
#' doc <- read_docx()
#' doc <- body_add_par(doc, "1. Smith J. Some article. 2020;10(2):123-9.", style = "Normal")
#' doc <- body_add_par(doc, "2. Doe A. Another article. 2021;5(1):10-5.", style = "Normal")
#' print(doc, target = tmp_input)
#'
#' # Step 2: Specify output path
#' tmp_output <- file.path(tempdir(), "updated_refs.docx")
#'
#' # Step 3: Run the reference correction function
#' correct_ref(
#'   input_path = tmp_input,
#'   output_path = tmp_output,
#'   reference_lines = 2,
#'   style = "vancouver"
#' )
#'
#' # Step 4: The corrected Word file is saved in tmp_output
#' tmp_output
#' }
correct_ref <- function(input_path,
                        output_path,
                        reference_lines = 50,
                        style = "vancouver") {

  # Step 1: Read Word file and get last N reference lines
  doc <- read_docx(input_path)
  doc_text <- docx_summary(doc)$text
  references <- tail(doc_text, reference_lines)
  references <- references[nchar(references) > 30]  # remove very short lines

  # Step 2: Function to get DOI from CrossRef
  get_doi <- function(reference) {
    query <- paste0("https://api.crossref.org/works?query=", URLencode(reference))

    response <- tryCatch(GET(query), error = function(e) NULL)
    if (is.null(response) || http_error(response)) return(NA)

    data <- tryCatch(
      fromJSON(content(response, "text", encoding = "UTF-8")),
      error = function(e) NULL
    )

    if (is.null(data) || length(data$message$items) == 0) return(NA)

    data$message$items$DOI[1]
  }

  # Step 3: Function to fetch formatted reference from citation.doi.org
  get_citation <- function(doi, style) {
    if (is.na(doi)) return(NA)

    url <- paste0("https://citation.doi.org/format?doi=", doi, "&style=", style)

    res <- tryCatch({
      r <- GET(url)
      if (status_code(r) == 200) content(r, "text", encoding = "UTF-8")
      else NA
    }, error = function(e) NA)

    if (!is.na(res)) res <- str_trim(res)
    res
  }

  # Step 4: Process each reference
  updated_references <- vector("character", length(references))

  for (i in seq_along(references)) {

    # Remove any existing numbering like "1. "
    ref <- str_remove(references[i], "^\\d+\\.\\s+")

    # Step 1: CrossRef → DOI
    doi <- get_doi(ref)

    # Step 2: citation.doi.org → formatted reference
    citation <- get_citation(doi, style)

    # Fallback if citation not found
    if (is.na(citation)) {
      citation <- paste0(ref, " Available from: http://dx.doi.org/", doi)
    }

    updated_references[i] <- paste0(i, ". ", citation)
  }

  # Step 5: POST-CLEANING (duplicate numbering, [Internet], Available from → DOI)
  updated_references <- updated_references |>

    # Remove duplicate numbering like "2. 1. Author..."
    str_replace("^(\\d+)\\.\\s+\\d+\\.\\s+", "\\1. ") |>

    # Remove [Internet]
    str_remove_all("\\s*\\[Internet\\]") |>

    # Replace "Available from:" with "DOI:"
    str_replace_all(
      "Available from:\\s*(http[s]?://dx\\.doi\\.org/[^\\s]+)",
      "DOI: \\1"
    )
  # Step 6: Write cleaned references to Word
  doc_out <- read_docx()
  for (line in updated_references) {
    doc_out <- body_add_par(doc_out, line, style = "Normal")
  }

  print(doc_out, target = output_path)
  message("Updated references saved to: ", output_path)

  # Return updated references
  updated_references
}


