dssr.r

DSSR

Function to submit a PDB file to the DSSR API

In [1]:

dssr <- function(pdb) {

  # extract filename
  filename <- gsub('.pdb', '', basename(pdb))

  # API url
  url <- 'http://api.x3dna.org/dssr'

  # request
  req <- request(url) %>%
    req_body_form(url = pdb)

  # perform request
  resp <- req_perform(req)

  # extract the response
  resp_body_string(resp) %>%
    # save in text file
    writeLines(paste0("data/dssr/dssr_results_", filename, ".txt"))

}

Function to extract the base pairs from a DSSR file

In [2]:

dssr.bp <- function(dssr.file) {

  filename <- basename(dssr.file)
  #keep the text right of the last underscore
  filename <- gsub('.+_', '', filename)
  # keep only numbers
  filename <- gsub('\\D', '', filename)


  dssr.file <- readLines(dssr.file)

  # find the start of the bp list
  bp.start <- grep("List of [0-9]+ base pairs", dssr.file, value = FALSE)

  # find the end of the bp list
  bp.end <- grep("^$", dssr.file, value = FALSE)
  bp.end <- bp.end[bp.end > bp.start][1] # keep the first empty line after the bp start


  bp.df <- dssr.file[(bp.start + 2):(bp.end - 1)] %>%
    # replace all consecutive spaces by a _ to make it easier to split
    gsub(' +', '_', .) %>%
    # remove the first underscore
    gsub('^_', '', .) %>%
    as.data.frame() %>%
    separate_wider_delim(cols = 1, delim = '_', names = c('id', 'nt1', 'nt2', 'bp', 'name', 'saenger', 'lw', 'dssr')) %>%
    mutate(
      id = as.integer(id),
      nt1 = gsub('D', '', gsub('5/1', '1', nt1)),
      nt2 = gsub('5/1', '1', gsub('D', '', nt2)),
      name = gsub('--', NA_character_, name),
      saenger = gsub('[0-9-]', '', gsub('--', NA_character_, saenger)),
      lw = gsub('--', NA_character_, lw),
      dssr = gsub('--', NA_character_, dssr)
    ) %>%
    mutate(structure = filename, .before = 'id')

  return(bp.df)
}