Dopamine aptamer structure determination

Eric Largy

In [1]:

# load packages (install where necessary)
if (!requireNamespace("librarian", quietly = TRUE)) {
  install.packages("librarian")
}

librarian::shelf(tidyverse, data.table, dtplyr, patchwork,
                 ggsci, ggthemes, ggtext, ggrepel,
                 reticulate, rmarkdown, readxl, writexl, DT, 
                 arrow, purrr, httr2, bio3d, NbClust, magrittr,
                 martinasladek/convertr)


  The 'cran_repo' argument in shelf() was not set, so it will use
  cran_repo = 'https://cran.r-project.org' by default.

  To avoid this message, set the 'cran_repo' argument to a CRAN
  mirror URL (see https://cran.r-project.org/mirrors.html) or set
  'quiet = TRUE'.

Warning: package 'reticulate' was built under R version 4.3.3

Warning: package 'bio3d' was built under R version 4.3.3

# source files and convert to notebooks for display in article
lapply(
  list('R/theme.R', 'R/rst.error.R', 'R/dssr.R',
       'R/chiR.R', 'R/nuR.R', 'R/Contact_analyzer.R'),
  function(x) {
    source(x)
    r_to_qmd(x, paste0('notebooks/', gsub('R/', '', x), '.qmd'))
  }
)

[[1]]
NULL

[[2]]
NULL

[[3]]
NULL

[[4]]
NULL

[[5]]
NULL

[[6]]
NULL

scaling <- 1

color.d3 <- c(
  "#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
  "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"
)

In [2]:

info.short <- read_table(
  'D:/Amber/dop/pmemd/out/4_prod_doprstslt01.out',
  col_names = FALSE,
  skip = 37,
  n_max = 8
) %>% 
  select(1) %>% 
  separate_wider_delim(delim = '=', col = 1, names = c('param', 'value')) %>% 
  mutate(value = as.numeric(gsub(',', '', value))) %>% 
  pivot_wider(names_from = param, values_from = value) %>% 
  summarise(
    time.per.frame = ntpr * dt / 1000, #ns
    total.t = time.per.frame * nstlim/ntpr
  )


── Column specification ────────────────────────────────────────────────────────
cols(
  X1 = col_character(),
  X2 = col_character(),
  X3 = col_character(),
  X4 = col_character(),
  X5 = col_character(),
  X6 = col_character(),
  X7 = col_character(),
  X8 = col_character()
)

Warning: 8 parsing failures.
row col  expected     actual                                            file
  2  -- 8 columns 12 columns 'D:/Amber/dop/pmemd/out/4_prod_doprstslt01.out'
  3  -- 8 columns 10 columns 'D:/Amber/dop/pmemd/out/4_prod_doprstslt01.out'
  4  -- 8 columns 12 columns 'D:/Amber/dop/pmemd/out/4_prod_doprstslt01.out'
  5  -- 8 columns 4 columns  'D:/Amber/dop/pmemd/out/4_prod_doprstslt01.out'
  6  -- 8 columns 12 columns 'D:/Amber/dop/pmemd/out/4_prod_doprstslt01.out'
... ... ......... .......... ...............................................
See problems(...) for more details.

info.long <- read_table(
  'D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200.out', 
  col_names = FALSE,
  skip = 37,
  n_max = 8
) %>% 
  select(1) %>% 
  separate_wider_delim(delim = '=', col = 1, names = c('param', 'value')) %>% 
  mutate(value = as.numeric(gsub(',', '', value))) %>% 
  pivot_wider(names_from = param, values_from = value) %>% 
  summarise(
    time.per.frame = ntpr * dt / 1000, #ns
    total.t = time.per.frame * nstlim/ntpr #ns
  )


── Column specification ────────────────────────────────────────────────────────
cols(
  X1 = col_character(),
  X2 = col_character(),
  X3 = col_character(),
  X4 = col_character(),
  X5 = col_character(),
  X6 = col_character(),
  X7 = col_character(),
  X8 = col_character()
)

Warning: 8 parsing failures.
row col  expected     actual                                                file
  2  -- 8 columns 12 columns 'D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200.out'
  3  -- 8 columns 10 columns 'D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200.out'
  4  -- 8 columns 12 columns 'D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200.out'
  5  -- 8 columns 4 columns  'D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200.out'
  6  -- 8 columns 12 columns 'D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200.out'
... ... ......... .......... ...................................................
See problems(...) for more details.

info.long.unrst <- read_table(
  'D:/Amber/dop/pmemd/out/4_prod_dopunrstslt01_250.out', 
  col_names = FALSE,
  skip = 37,
  n_max = 8
) %>% 
  select(1) %>% 
  separate_wider_delim(delim = '=', col = 1, names = c('param', 'value')) %>% 
  mutate(value = as.numeric(gsub(',', '', value))) %>% 
  pivot_wider(names_from = param, values_from = value) %>% 
  summarise(
    time.per.frame = ntpr * dt / 1000, #ns
    total.t = time.per.frame * nstlim/ntpr #ns
  )


── Column specification ────────────────────────────────────────────────────────
cols(
  X1 = col_character(),
  X2 = col_character(),
  X3 = col_character(),
  X4 = col_character(),
  X5 = col_character(),
  X6 = col_character(),
  X7 = col_character(),
  X8 = col_character()
)

Warning: 8 parsing failures.
row col  expected     actual                                                  file
  2  -- 8 columns 12 columns 'D:/Amber/dop/pmemd/out/4_prod_dopunrstslt01_250.out'
  3  -- 8 columns 10 columns 'D:/Amber/dop/pmemd/out/4_prod_dopunrstslt01_250.out'
  4  -- 8 columns 12 columns 'D:/Amber/dop/pmemd/out/4_prod_dopunrstslt01_250.out'
  5  -- 8 columns 4 columns  'D:/Amber/dop/pmemd/out/4_prod_dopunrstslt01_250.out'
  6  -- 8 columns 12 columns 'D:/Amber/dop/pmemd/out/4_prod_dopunrstslt01_250.out'
... ... ......... .......... .....................................................
See problems(...) for more details.

In [3]:

# Minimized structures---

#remove previous file
if (file.exists('images/multi.min.pdb')) {
  file.remove('images/multi.min.pdb')
}

[1] TRUE

# read all final minimization pdb files
min.pdb.list <- lapply(
  list.files(path = 'data', pattern = "5_min_dop.*\\.pdb$", full.names = TRUE), 
  read.pdb
)

# concatenate xyz coordinates
for (i in 1:length(min.pdb.list)) {
  if (i == 1) {
    xyz <- min.pdb.list[[i]]$xyz[1,]
  } else {
    xyz <- rbind(
      xyz,
      min.pdb.list[[i]]$xyz[1,]
    )
  }
}

Warning in rbind(xyz, min.pdb.list[[i]]$xyz[1, ]): number of columns of result
is not a multiple of vector length (arg 2)

Warning in rbind(xyz, min.pdb.list[[i]]$xyz[1, ]): number of columns of result
is not a multiple of vector length (arg 2)

Warning in rbind(xyz, min.pdb.list[[i]]$xyz[1, ]): number of columns of result
is not a multiple of vector length (arg 2)

Warning in rbind(xyz, min.pdb.list[[i]]$xyz[1, ]): number of columns of result
is not a multiple of vector length (arg 2)

Warning in rbind(xyz, min.pdb.list[[i]]$xyz[1, ]): number of columns of result
is not a multiple of vector length (arg 2)

Warning in rbind(xyz, min.pdb.list[[i]]$xyz[1, ]): number of columns of result
is not a multiple of vector length (arg 2)

Warning in rbind(xyz, min.pdb.list[[i]]$xyz[1, ]): number of columns of result
is not a multiple of vector length (arg 2)

Warning in rbind(xyz, min.pdb.list[[i]]$xyz[1, ]): number of columns of result
is not a multiple of vector length (arg 2)

Warning in rbind(xyz, min.pdb.list[[i]]$xyz[1, ]): number of columns of result
is not a multiple of vector length (arg 2)

Warning in rbind(xyz, min.pdb.list[[i]]$xyz[1, ]): number of columns of result
is not a multiple of vector length (arg 2)

Warning in rbind(xyz, min.pdb.list[[i]]$xyz[1, ]): number of columns of result
is not a multiple of vector length (arg 2)

Warning in rbind(xyz, min.pdb.list[[i]]$xyz[1, ]): number of columns of result
is not a multiple of vector length (arg 2)

Warning in rbind(xyz, min.pdb.list[[i]]$xyz[1, ]): number of columns of result
is not a multiple of vector length (arg 2)

Warning in rbind(xyz, min.pdb.list[[i]]$xyz[1, ]): number of columns of result
is not a multiple of vector length (arg 2)

# export multimodel pdb file
write.pdb(min.pdb.list[[1]], xyz = xyz, file = "data/input_multi.min.pdb")

# read multimodel pdb file, renumber, remove hydrogens and waters
min.pdb <- read.pdb("data/input_multi.min.pdb", multi = TRUE) %>% 
  convert.pdb(., renumber = TRUE,
              first.resno = 11,
              rm.h = FALSE,
              rm.wat = TRUE) %>% 
  trim.pdb(., atom.select(., elety = c('Na+', 'Cl-'), inverse = TRUE))

     Retaining 938 non-water atoms
     Removing a total of 27966  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )

Warning in convert.pdb(., renumber = TRUE, first.resno = 11, rm.h = FALSE, :     Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

# clean up of pdb file
min.pdb$atom <- min.pdb$atom %>% 
  mutate(
    # clean up residue names
    resid = case_when(
      resid == 'DT3' ~ 'DT',
      resid == 'DT5' ~ 'DT',
      TRUE ~ resid
    ),
    # move dopamine to chain B
    chain = if_else(resid == 'LDP', 'B', 'A'),
    # rename dopamine atoms
    elety = case_when(
      resid == 'LDP' & elety == 'OAH' ~ 'O1',
      resid == 'LDP' & elety == 'HAH' ~ 'HO1',
      resid == 'LDP' & elety == 'OAI' ~ 'O2',
      resid == 'LDP' & elety == 'HAI' ~ 'HO2',
      resid == 'LDP' & elety == 'CAB' ~ 'C1',
      resid == 'LDP' & elety == 'CAD' ~ 'C2',
      resid == 'LDP' & elety == 'HAD' ~ 'H2',
      resid == 'LDP' & elety == 'CAG' ~ 'C3',
      resid == 'LDP' & elety == 'CAC' ~ 'C4',
      resid == 'LDP' & elety == 'CAF' ~ 'C5',
      resid == 'LDP' & elety == 'HAF' ~ 'H5',
      resid == 'LDP' & elety == 'CAE' ~ 'C6',
      resid == 'LDP' & elety == 'HAE' ~ 'H6',
      resid == 'LDP' & elety == 'CAA' ~ 'C7',
      resid == 'LDP' & elety == 'HAA' ~ 'H71',
      resid == 'LDP' & elety == 'HAB' ~ 'H72',
      resid == 'LDP' & elety == 'CAJ' ~ 'C8',
      resid == 'LDP' & elety == 'HAJ' ~ 'H81',
      resid == 'LDP' & elety == 'HAK' ~ 'H82',
      resid == 'LDP' & elety == 'NAK' ~ 'N1',
      resid == 'LDP' & elety == 'HAL' ~ 'HN11',
      resid == 'LDP' & elety == 'HAM' ~ 'HN13',
      resid == 'LDP' & elety == 'HAN' ~ 'HN12',
      TRUE ~ elety
    )
  )

#align all frames
min.pdb$xyz <- fit.xyz(
  fixed = min.pdb$xyz[1,],
  mobile = min.pdb$xyz,
  fixed.inds = atom.select(min.pdb, elety = unique(min.pdb$atom$elety))$xyz,
  mobile.inds = atom.select(min.pdb, elety = unique(min.pdb$atom$elety))$xyz
)

# write pdb file for deposition
if (!file.exists('images/multi.min.pdb')) {
  write.pdb(min.pdb, file = "images/multi.min.pdb")
}

# re-import without hydrogends for data processing below
pdb <- read.pdb('images/multi.min.pdb', multi = TRUE) %>% 
  convert.pdb(., rm.h = TRUE)

     Retaining 571 non-hydrogen atoms
     Removing a total of 320  atoms

In [4]:

for (i in 1:15) {
  
  if (i < 10) {
    j <- paste0('0', i)
  } else {
    j <- i
  }
  
  # Load the minimized structures
  # remove water and hydrogen atoms
  pdb.object <- as.pdb(
    prmtop = read.prmtop(paste0('data/doprstslt', j, '.prmtop')), 
    crd = read.ncdf(paste0('D:/Amber/dop/pmemd/out/4_prod_doprstslt', j, '.nc'))
  ) %>% 
    convert.pdb(., renumber = TRUE,
                first.resno = 11,
                rm.h = TRUE,
                rm.wat = TRUE) %>% 
    trim.pdb(., atom.select(., elety = c('Na+', 'Cl-'), inverse = TRUE))
  
  # clean up pdb
  pdb.object$atom <- pdb.object$atom %>% 
    mutate(
      # clean up residue names
      resid = case_when(
        resid == 'DT3' ~ 'DT',
        resid == 'DT5' ~ 'DT',
        TRUE ~ resid
      ),
      # move dopamine to chain B
      chain = if_else(resid == 'LDP', 'B', 'A'),
      # rename dopamine atoms
      elety = case_when(
        resid == 'LDP' & elety == 'OAH' ~ 'O1',
        resid == 'LDP' & elety == 'HAH' ~ 'HO1',
        resid == 'LDP' & elety == 'OAI' ~ 'O2',
        resid == 'LDP' & elety == 'HAI' ~ 'HO2',
        resid == 'LDP' & elety == 'CAB' ~ 'C1',
        resid == 'LDP' & elety == 'CAD' ~ 'C2',
        resid == 'LDP' & elety == 'HAD' ~ 'H2',
        resid == 'LDP' & elety == 'CAG' ~ 'C3',
        resid == 'LDP' & elety == 'CAC' ~ 'C4',
        resid == 'LDP' & elety == 'CAF' ~ 'C5',
        resid == 'LDP' & elety == 'HAF' ~ 'H5',
        resid == 'LDP' & elety == 'CAE' ~ 'C6',
        resid == 'LDP' & elety == 'HAE' ~ 'H6',
        resid == 'LDP' & elety == 'CAA' ~ 'C7',
        resid == 'LDP' & elety == 'HAA' ~ 'H71',
        resid == 'LDP' & elety == 'HAB' ~ 'H72',
        resid == 'LDP' & elety == 'CAJ' ~ 'C8',
        resid == 'LDP' & elety == 'HAJ' ~ 'H81',
        resid == 'LDP' & elety == 'HAK' ~ 'H82',
        resid == 'LDP' & elety == 'NAK' ~ 'N1',
        resid == 'LDP' & elety == 'HAL' ~ 'HN11',
        resid == 'LDP' & elety == 'HAM' ~ 'HN13',
        resid == 'LDP' & elety == 'HAN' ~ 'HN12',
        TRUE ~ elety
      ),
      elesy = if_else(
        is.na(elesy) | elesy == 'not found',
        case_when(
          elety %in% c(paste0("C", 1:99), paste0("C", 1:99, "'")) ~ 'C',
          elety %in% c(paste0("H", 1:99), paste0("H", 1:99, "'"), 'HN11', 'HN12', 'HN13', 'HO1', 'HO2') ~ 'H',
          elety %in% c(paste0("O", 1:99), paste0("O", 1:99, "'"), paste0("OP", 1:99)) ~ 'O',
          elety %in% c(paste0("N", 1:99)) ~ 'N',
          elety %in% c("P", paste0("P", 1:99)) ~ 'P',
          TRUE ~ 'not found'
        ),
        elesy
      )
    )
  
  #align all frames
  pdb.object$xyz <- fit.xyz(
    fixed = pdb.object$xyz[1,],
    mobile = pdb.object$xyz,
    fixed.inds = atom.select(pdb.object, elety = unique(pdb.object$atom$elety))$xyz,
    mobile.inds = atom.select(pdb.object, elety = unique(pdb.object$atom$elety))$xyz
  )
  
  #assign to numbered pdb object
  assign(paste0('pdb.traj.', j), pdb.object)
  
  #export a pdb trajectory with only 1000 frames
  #to avoid very big files
  if (!file.exists(paste0('images/4_prod_doprstslt', j, '.traj.1000frames.pdb'))) {
    write.pdb(
      pdb = pdb.object,
      xyz = pdb.object$xyz[seq(1, 5000, by = 5),],
      file = paste0('images/4_prod_doprstslt', j, '.traj.1000frames.pdb')
    )
  }
  
}

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt01.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Retaining 618 non-hydrogen atoms
     Removing a total of 37608  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt02.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 37252"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  37252 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9152 

     Retaining 936 non-water atoms
     Retaining 616 non-hydrogen atoms
     Removing a total of 36636  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt03.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 36364"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  36364 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 8930 

     Retaining 936 non-water atoms
     Retaining 616 non-hydrogen atoms
     Removing a total of 35748  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt04.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 34168"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  34168 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 8378 

     Retaining 932 non-water atoms
     Retaining 612 non-hydrogen atoms
     Removing a total of 33556  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt05.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 36656"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  36656 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9003 

     Retaining 936 non-water atoms
     Retaining 616 non-hydrogen atoms
     Removing a total of 36040  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt06.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 36972"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  36972 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9082 

     Retaining 936 non-water atoms
     Retaining 616 non-hydrogen atoms
     Removing a total of 36356  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt07.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 38038"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38038 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9350 

     Retaining 938 non-water atoms
     Retaining 618 non-hydrogen atoms
     Removing a total of 37420  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt08.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 35250"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  35250 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 8650 

     Retaining 934 non-water atoms
     Retaining 614 non-hydrogen atoms
     Removing a total of 34636  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt09.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 37192"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  37192 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9137 

     Retaining 936 non-water atoms
     Retaining 616 non-hydrogen atoms
     Removing a total of 36576  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt10.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 37962"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  37962 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9331 

     Retaining 938 non-water atoms
     Retaining 618 non-hydrogen atoms
     Removing a total of 37344  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt11.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 36728"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  36728 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9021 

     Retaining 936 non-water atoms
     Retaining 616 non-hydrogen atoms
     Removing a total of 36112  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt12.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 38566"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38566 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9482 

     Retaining 938 non-water atoms
     Retaining 618 non-hydrogen atoms
     Removing a total of 37948  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt13.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 36432"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  36432 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 8947 

     Retaining 936 non-water atoms
     Retaining 616 non-hydrogen atoms
     Removing a total of 35816  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt14.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 33620"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  33620 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 8241 

     Retaining 932 non-water atoms
     Retaining 612 non-hydrogen atoms
     Removing a total of 33008  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt15.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 35920"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  35920 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 8819 

     Retaining 936 non-water atoms
     Retaining 616 non-hydrogen atoms
     Removing a total of 35304  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )

# create a xyz.list that contains all xyz 
xyz.list <- lapply(1:15, function(i) {
  pdb.traj <- get(paste0('pdb.traj.', sprintf('%02d', i)))
  return(pdb.traj$xyz)
})

In [5]:

pdb.long.files <- lapply(
  list.files(
    path = 'D:/Amber/dop/pmemd/out/', 
    pattern = "4_prod_doprstslt01_200(\\_restart\\d*)?.nc$", 
    full.names = TRUE), 
  function(prmtop = 'data/doprstslt01.prmtop', crd) {
    as.pdb(
      prmtop = read.prmtop(prmtop),
      crd = read.ncdf(crd)
    ) %>%
      convert.pdb(., renumber = TRUE,
                  first.resno = 11,
                  rm.h = TRUE,
                  rm.wat = TRUE) %>%
      trim.pdb(., atom.select(., elety = c('Na+', 'Cl-'), inverse = TRUE))
  }, 
  prmtop = 'data/doprstslt01.prmtop')

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 2246"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Retaining 618 non-hydrogen atoms
     Removing a total of 37608  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200_restart.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Retaining 618 non-hydrogen atoms
     Removing a total of 37608  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200_restart2.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Retaining 618 non-hydrogen atoms
     Removing a total of 37608  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200_restart3.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Retaining 618 non-hydrogen atoms
     Removing a total of 37608  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200_restart4.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Retaining 618 non-hydrogen atoms
     Removing a total of 37608  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200_restart5.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 3847"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Retaining 618 non-hydrogen atoms
     Removing a total of 37608  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )

pdb.long <- pdb.long.files[[1]]
pdb.long$xyz <- do.call(rbind, lapply(pdb.long.files, function(x) x$xyz))

divider <- 5

pdb.long$xyz <- pdb.long$xyz[1:(5000*divider),] #trim the first 25000 frames (exactly one microsecond)

# clean up pdb
pdb.long$atom <- pdb.long$atom %>% 
  mutate(
    # clean up residue names
    resid = case_when(
      resid == 'DT3' ~ 'DT',
      resid == 'DT5' ~ 'DT',
      TRUE ~ resid
    ),
    # move dopamine to chain B
    chain = if_else(resid == 'LDP', 'B', 'A'),
    # rename dopamine atoms
    elety = case_when(
      resid == 'LDP' & elety == 'OAH' ~ 'O1',
      resid == 'LDP' & elety == 'HAH' ~ 'HO1',
      resid == 'LDP' & elety == 'OAI' ~ 'O2',
      resid == 'LDP' & elety == 'HAI' ~ 'HO2',
      resid == 'LDP' & elety == 'CAB' ~ 'C1',
      resid == 'LDP' & elety == 'CAD' ~ 'C2',
      resid == 'LDP' & elety == 'HAD' ~ 'H2',
      resid == 'LDP' & elety == 'CAG' ~ 'C3',
      resid == 'LDP' & elety == 'CAC' ~ 'C4',
      resid == 'LDP' & elety == 'CAF' ~ 'C5',
      resid == 'LDP' & elety == 'HAF' ~ 'H5',
      resid == 'LDP' & elety == 'CAE' ~ 'C6',
      resid == 'LDP' & elety == 'HAE' ~ 'H6',
      resid == 'LDP' & elety == 'CAA' ~ 'C7',
      resid == 'LDP' & elety == 'HAA' ~ 'H71',
      resid == 'LDP' & elety == 'HAB' ~ 'H72',
      resid == 'LDP' & elety == 'CAJ' ~ 'C8',
      resid == 'LDP' & elety == 'HAJ' ~ 'H81',
      resid == 'LDP' & elety == 'HAK' ~ 'H82',
      resid == 'LDP' & elety == 'NAK' ~ 'N1',
      resid == 'LDP' & elety == 'HAL' ~ 'HN11',
      resid == 'LDP' & elety == 'HAM' ~ 'HN13',
      resid == 'LDP' & elety == 'HAN' ~ 'HN12',
      TRUE ~ elety
    ),
    elesy = if_else(
      is.na(elesy) | elesy == 'not found',
      case_when(
        elety %in% c(paste0("C", 1:99), paste0("C", 1:99, "'")) ~ 'C',
        elety %in% c(paste0("H", 1:99), paste0("H", 1:99, "'"), 'HN11', 'HN12', 'HN13', 'HO1', 'HO2') ~ 'H',
        elety %in% c(paste0("O", 1:99), paste0("O", 1:99, "'"), paste0("OP", 1:99)) ~ 'O',
        elety %in% c(paste0("N", 1:99)) ~ 'N',
        elety %in% c("P", paste0("P", 1:99)) ~ 'P',
        TRUE ~ 'not found'
      ),
      elesy
    )
  )

#align all frames
pdb.long$xyz <- fit.xyz(
  fixed = pdb.long$xyz[1,],
  mobile = pdb.long$xyz,
  fixed.inds = atom.select(pdb.long, elety = unique(pdb.long$atom$elety))$xyz,
  mobile.inds = atom.select(pdb.long, elety = unique(pdb.long$atom$elety))$xyz
)

###

#for reference, these are the imported files
# [1] "D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200.nc"         
# [2] "D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200_restart.nc" 
# [3] "D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200_restart2.nc"
# [4] "D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200_restart3.nc"
# [5] "D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200_restart4.nc"
# [6] "D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200_restart5.nc"

In [6]:

pdb.long.h.files <- lapply(
  list.files(
    path = 'D:/Amber/dop/pmemd/out/', 
    pattern = "4_prod_doprstslt01_200(\\_restart\\d*)?.nc$", 
    full.names = TRUE), 
  function(prmtop = 'data/doprstslt01.prmtop', crd) {
    as.pdb(
      prmtop = read.prmtop(prmtop),
      crd = read.ncdf(crd)
    ) %>%
      convert.pdb(., renumber = TRUE,
                  first.resno = 11,
                  rm.h = FALSE,
                  rm.wat = TRUE) %>%
      trim.pdb(., atom.select(., elety = c('Na+', 'Cl-'), inverse = TRUE))
  }, 
  prmtop = 'data/doprstslt01.prmtop')

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 2246"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Removing a total of 37288  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )

Warning in convert.pdb(., renumber = TRUE, first.resno = 11, rm.h = FALSE, :     Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200_restart.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Removing a total of 37288  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )

Warning in convert.pdb(., renumber = TRUE, first.resno = 11, rm.h = FALSE, :     Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200_restart2.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Removing a total of 37288  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )

Warning in convert.pdb(., renumber = TRUE, first.resno = 11, rm.h = FALSE, :     Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200_restart3.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Removing a total of 37288  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )

Warning in convert.pdb(., renumber = TRUE, first.resno = 11, rm.h = FALSE, :     Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200_restart4.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Removing a total of 37288  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )

Warning in convert.pdb(., renumber = TRUE, first.resno = 11, rm.h = FALSE, :     Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doprstslt01_200_restart5.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 3847"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Removing a total of 37288  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )

Warning in convert.pdb(., renumber = TRUE, first.resno = 11, rm.h = FALSE, :     Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

pdb.long.h <- pdb.long.h.files[[1]]
pdb.long.h$xyz <- do.call(rbind, lapply(pdb.long.h.files, function(x) x$xyz))

divider <- 5

pdb.long.h$xyz <- pdb.long.h$xyz[1:(5000*divider),] #trim the first 25000 frames (exactly one microsecond)

# clean up pdb
pdb.long.h$atom <- pdb.long.h$atom %>% 
  mutate(
    # clean up residue names
    resid = case_when(
      resid == 'DT3' ~ 'DT',
      resid == 'DT5' ~ 'DT',
      TRUE ~ resid
    ),
    # move dopamine to chain B
    chain = if_else(resid == 'LDP', 'B', 'A'),
    # rename dopamine atoms
    elety = case_when(
      resid == 'LDP' & elety == 'OAH' ~ 'O1',
      resid == 'LDP' & elety == 'HAH' ~ 'HO1',
      resid == 'LDP' & elety == 'OAI' ~ 'O2',
      resid == 'LDP' & elety == 'HAI' ~ 'HO2',
      resid == 'LDP' & elety == 'CAB' ~ 'C1',
      resid == 'LDP' & elety == 'CAD' ~ 'C2',
      resid == 'LDP' & elety == 'HAD' ~ 'H2',
      resid == 'LDP' & elety == 'CAG' ~ 'C3',
      resid == 'LDP' & elety == 'CAC' ~ 'C4',
      resid == 'LDP' & elety == 'CAF' ~ 'C5',
      resid == 'LDP' & elety == 'HAF' ~ 'H5',
      resid == 'LDP' & elety == 'CAE' ~ 'C6',
      resid == 'LDP' & elety == 'HAE' ~ 'H6',
      resid == 'LDP' & elety == 'CAA' ~ 'C7',
      resid == 'LDP' & elety == 'HAA' ~ 'H71',
      resid == 'LDP' & elety == 'HAB' ~ 'H72',
      resid == 'LDP' & elety == 'CAJ' ~ 'C8',
      resid == 'LDP' & elety == 'HAJ' ~ 'H81',
      resid == 'LDP' & elety == 'HAK' ~ 'H82',
      resid == 'LDP' & elety == 'NAK' ~ 'N1',
      resid == 'LDP' & elety == 'HAL' ~ 'HN11',
      resid == 'LDP' & elety == 'HAM' ~ 'HN13',
      resid == 'LDP' & elety == 'HAN' ~ 'HN12',
      TRUE ~ elety
    ),
    elesy = if_else(
      is.na(elesy) | elesy == 'not found',
      case_when(
        elety %in% c(paste0("C", 1:99), paste0("C", 1:99, "'")) ~ 'C',
        elety %in% c(paste0("H", 1:99), paste0("H", 1:99, "'"), 'HN11', 'HN12', 'HN13', 'HO1', 'HO2') ~ 'H',
        elety %in% c(paste0("O", 1:99), paste0("O", 1:99, "'"), paste0("OP", 1:99)) ~ 'O',
        elety %in% c(paste0("N", 1:99)) ~ 'N',
        elety %in% c("P", paste0("P", 1:99)) ~ 'P',
        TRUE ~ 'not found'
      ),
      elesy
    )
  )

#align all frames
pdb.long.h$xyz <- fit.xyz(
  fixed = pdb.long.h$xyz[1,],
  mobile = pdb.long.h$xyz,
  fixed.inds = atom.select(pdb.long.h, elety = unique(pdb.long.h$atom$elety))$xyz,
  mobile.inds = atom.select(pdb.long.h, elety = unique(pdb.long.h$atom$elety))$xyz
)

In [7]:

pdb.unrst.long.files <- lapply(
  list.files(
    path = 'D:/Amber/dop/pmemd/out/', 
    pattern = "4_prod_dopunrstslt01_250(\\_restart\\d*)?.nc$", 
    full.names = TRUE), 
  function(prmtop = 'data/doprstslt01.prmtop', crd) {
    as.pdb(
      prmtop = read.prmtop(prmtop),
      crd = read.ncdf(crd)
    ) %>%
      convert.pdb(., renumber = TRUE,
                  first.resno = 11,
                  rm.h = TRUE,
                  rm.wat = TRUE) %>%
      trim.pdb(., atom.select(., elety = c('Na+', 'Cl-'), inverse = TRUE))
  }, 
  prmtop = 'data/doprstslt01.prmtop')

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_dopunrstslt01_250.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Retaining 618 non-hydrogen atoms
     Removing a total of 37608  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_dopunrstslt01_250_restart.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Retaining 618 non-hydrogen atoms
     Removing a total of 37608  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_dopunrstslt01_250_restart2.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Retaining 618 non-hydrogen atoms
     Removing a total of 37608  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_dopunrstslt01_250_restart3.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Retaining 618 non-hydrogen atoms
     Removing a total of 37608  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )

pdb.unrst.long <- pdb.unrst.long.files[[1]]
pdb.unrst.long$xyz <- do.call(rbind, lapply(pdb.unrst.long.files, function(x) x$xyz))

unrst.divider <- length(
  list.files(
    path = 'D:/Amber/dop/pmemd/out/', 
    pattern = "4_prod_dopunrstslt01_250(\\_restart\\d*)?.nc$")
)

pdb.unrst.long$xyz <- pdb.unrst.long$xyz[1:(5000*25/20*unrst.divider),] #trim the first 25000 frames (exactly one microsecond)

# clean up pdb
pdb.unrst.long$atom <- pdb.unrst.long$atom %>% 
  mutate(
    # clean up residue names
    resid = case_when(
      resid == 'DT3' ~ 'DT',
      resid == 'DT5' ~ 'DT',
      TRUE ~ resid
    ),
    # move dopamine to chain B
    chain = if_else(resid == 'LDP', 'B', 'A'),
    # rename dopamine atoms
    elety = case_when(
      resid == 'LDP' & elety == 'OAH' ~ 'O1',
      resid == 'LDP' & elety == 'HAH' ~ 'HO1',
      resid == 'LDP' & elety == 'OAI' ~ 'O2',
      resid == 'LDP' & elety == 'HAI' ~ 'HO2',
      resid == 'LDP' & elety == 'CAB' ~ 'C1',
      resid == 'LDP' & elety == 'CAD' ~ 'C2',
      resid == 'LDP' & elety == 'HAD' ~ 'H2',
      resid == 'LDP' & elety == 'CAG' ~ 'C3',
      resid == 'LDP' & elety == 'CAC' ~ 'C4',
      resid == 'LDP' & elety == 'CAF' ~ 'C5',
      resid == 'LDP' & elety == 'HAF' ~ 'H5',
      resid == 'LDP' & elety == 'CAE' ~ 'C6',
      resid == 'LDP' & elety == 'HAE' ~ 'H6',
      resid == 'LDP' & elety == 'CAA' ~ 'C7',
      resid == 'LDP' & elety == 'HAA' ~ 'H71',
      resid == 'LDP' & elety == 'HAB' ~ 'H72',
      resid == 'LDP' & elety == 'CAJ' ~ 'C8',
      resid == 'LDP' & elety == 'HAJ' ~ 'H81',
      resid == 'LDP' & elety == 'HAK' ~ 'H82',
      resid == 'LDP' & elety == 'NAK' ~ 'N1',
      resid == 'LDP' & elety == 'HAL' ~ 'HN11',
      resid == 'LDP' & elety == 'HAM' ~ 'HN13',
      resid == 'LDP' & elety == 'HAN' ~ 'HN12',
      TRUE ~ elety
    ),
    elesy = if_else(
      is.na(elesy) | elesy == 'not found',
      case_when(
        elety %in% c(paste0("C", 1:99), paste0("C", 1:99, "'")) ~ 'C',
        elety %in% c(paste0("H", 1:99), paste0("H", 1:99, "'"), 'HN11', 'HN12', 'HN13', 'HO1', 'HO2') ~ 'H',
        elety %in% c(paste0("O", 1:99), paste0("O", 1:99, "'"), paste0("OP", 1:99)) ~ 'O',
        elety %in% c(paste0("N", 1:99)) ~ 'N',
        elety %in% c("P", paste0("P", 1:99)) ~ 'P',
        TRUE ~ 'not found'
      ),
      elesy
    )
  )

#align all frames
pdb.unrst.long$xyz <- fit.xyz(
  fixed = pdb.unrst.long$xyz[1,],
  mobile = pdb.unrst.long$xyz,
  fixed.inds = atom.select(pdb.unrst.long, elety = unique(pdb.unrst.long$atom$elety))$xyz,
  mobile.inds = atom.select(pdb.unrst.long, elety = unique(pdb.unrst.long$atom$elety))$xyz
)

###
###

In [8]:

pdb.unrst.h.long.files <- lapply(
  list.files(
    path = 'D:/Amber/dop/pmemd/out/', 
    pattern = "4_prod_dopunrstslt01_250(\\_restart\\d*)?.nc$", 
    full.names = TRUE), 
  function(prmtop = 'data/doprstslt01.prmtop', crd) {
    as.pdb(
      prmtop = read.prmtop(prmtop),
      crd = read.ncdf(crd)
    ) %>%
      convert.pdb(., renumber = TRUE,
                  first.resno = 11,
                  rm.h = FALSE,
                  rm.wat = TRUE) %>%
      trim.pdb(., atom.select(., elety = c('Na+', 'Cl-'), inverse = TRUE))
  }, 
  prmtop = 'data/doprstslt01.prmtop')

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_dopunrstslt01_250.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Removing a total of 37288  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )

Warning in convert.pdb(., renumber = TRUE, first.resno = 11, rm.h = FALSE, :     Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_dopunrstslt01_250_restart.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Removing a total of 37288  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )

Warning in convert.pdb(., renumber = TRUE, first.resno = 11, rm.h = FALSE, :     Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_dopunrstslt01_250_restart2.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Removing a total of 37288  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )

Warning in convert.pdb(., renumber = TRUE, first.resno = 11, rm.h = FALSE, :     Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_dopunrstslt01_250_restart3.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 38226"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38226 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9397 

     Retaining 938 non-water atoms
     Removing a total of 37288  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )

Warning in convert.pdb(., renumber = TRUE, first.resno = 11, rm.h = FALSE, :     Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

pdb.unrst.h.long <- pdb.unrst.h.long.files[[1]]
pdb.unrst.h.long$xyz <- do.call(rbind, lapply(pdb.unrst.h.long.files, function(x) x$xyz))

unrst.divider <- length(
  list.files(
    path = 'D:/Amber/dop/pmemd/out/', 
    pattern = "4_prod_dopunrstslt01_250(\\_restart\\d*)?.nc$")
)

pdb.unrst.h.long$xyz <- pdb.unrst.h.long$xyz[1:(5000*25/20*unrst.divider),] #trim the first 25000 frames (exactly one microsecond)

# clean up pdb
pdb.unrst.h.long$atom <- pdb.unrst.h.long$atom %>% 
  mutate(
    # clean up residue names
    resid = case_when(
      resid == 'DT3' ~ 'DT',
      resid == 'DT5' ~ 'DT',
      TRUE ~ resid
    ),
    # move dopamine to chain B
    chain = if_else(resid == 'LDP', 'B', 'A'),
    # rename dopamine atoms
    elety = case_when(
      resid == 'LDP' & elety == 'OAH' ~ 'O1',
      resid == 'LDP' & elety == 'HAH' ~ 'HO1',
      resid == 'LDP' & elety == 'OAI' ~ 'O2',
      resid == 'LDP' & elety == 'HAI' ~ 'HO2',
      resid == 'LDP' & elety == 'CAB' ~ 'C1',
      resid == 'LDP' & elety == 'CAD' ~ 'C2',
      resid == 'LDP' & elety == 'HAD' ~ 'H2',
      resid == 'LDP' & elety == 'CAG' ~ 'C3',
      resid == 'LDP' & elety == 'CAC' ~ 'C4',
      resid == 'LDP' & elety == 'CAF' ~ 'C5',
      resid == 'LDP' & elety == 'HAF' ~ 'H5',
      resid == 'LDP' & elety == 'CAE' ~ 'C6',
      resid == 'LDP' & elety == 'HAE' ~ 'H6',
      resid == 'LDP' & elety == 'CAA' ~ 'C7',
      resid == 'LDP' & elety == 'HAA' ~ 'H71',
      resid == 'LDP' & elety == 'HAB' ~ 'H72',
      resid == 'LDP' & elety == 'CAJ' ~ 'C8',
      resid == 'LDP' & elety == 'HAJ' ~ 'H81',
      resid == 'LDP' & elety == 'HAK' ~ 'H82',
      resid == 'LDP' & elety == 'NAK' ~ 'N1',
      resid == 'LDP' & elety == 'HAL' ~ 'HN11',
      resid == 'LDP' & elety == 'HAM' ~ 'HN13',
      resid == 'LDP' & elety == 'HAN' ~ 'HN12',
      TRUE ~ elety
    ),
    elesy = if_else(
      is.na(elesy) | elesy == 'not found',
      case_when(
        elety %in% c(paste0("C", 1:99), paste0("C", 1:99, "'")) ~ 'C',
        elety %in% c(paste0("H", 1:99), paste0("H", 1:99, "'"), 'HN11', 'HN12', 'HN13', 'HO1', 'HO2') ~ 'H',
        elety %in% c(paste0("O", 1:99), paste0("O", 1:99, "'"), paste0("OP", 1:99)) ~ 'O',
        elety %in% c(paste0("N", 1:99)) ~ 'N',
        elety %in% c("P", paste0("P", 1:99)) ~ 'P',
        TRUE ~ 'not found'
      ),
      elesy
    )
  )

#align all frames
pdb.unrst.h.long$xyz <- fit.xyz(
  fixed = pdb.unrst.h.long$xyz[1,],
  mobile = pdb.unrst.h.long$xyz,
  fixed.inds = atom.select(pdb.unrst.h.long, elety = unique(pdb.unrst.h.long$atom$elety))$xyz,
  mobile.inds = atom.select(pdb.unrst.h.long, elety = unique(pdb.unrst.h.long$atom$elety))$xyz
)

In [9]:

pdb.noldp.long.files <- lapply(
  list.files(
    path = 'D:/Amber/dop/pmemd/out/', 
    pattern = "4_prod_noligand_250(\\_restart\\d*)?.nc$", 
    full.names = TRUE), 
  function(prmtop = 'D:/Amber/dop/leap/noligand.prmtop', crd) {
    as.pdb(
      prmtop = read.prmtop(prmtop),
      crd = read.ncdf(crd)
    ) %>%
      convert.pdb(., renumber = TRUE,
                  first.resno = 11,
                  rm.h = TRUE,
                  rm.wat = TRUE) %>%
      trim.pdb(., atom.select(., elety = c('Na+', 'Cl-'), inverse = TRUE))
  }, 
  prmtop = 'D:/Amber/dop/leap/noligand.prmtop')

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_noligand_250.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 38202"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38202 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9395 

     Retaining 914 non-water atoms
     Retaining 606 non-hydrogen atoms
     Removing a total of 37596  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_noligand_250_restart.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 38202"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38202 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9395 

     Retaining 914 non-water atoms
     Retaining 606 non-hydrogen atoms
     Removing a total of 37596  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_noligand_250_restart2.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 38202"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38202 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9395 

     Retaining 914 non-water atoms
     Retaining 606 non-hydrogen atoms
     Removing a total of 37596  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_noligand_250_restart3.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 38202"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38202 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9395 

     Retaining 914 non-water atoms
     Retaining 606 non-hydrogen atoms
     Removing a total of 37596  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )

pdb.noldp.long <- pdb.noldp.long.files[[1]]
pdb.noldp.long$xyz <- do.call(rbind, lapply(pdb.noldp.long.files, function(x) x$xyz))

noldp.divider <- length(
  list.files(
    path = 'D:/Amber/dop/pmemd/out/', 
    pattern = "4_prod_noligand_250(\\_restart\\d*)?.nc$")
)

pdb.noldp.long$xyz <- pdb.noldp.long$xyz[1:(5000*25/20*noldp.divider),] #trim the first 25000 frames (exactly one microsecond)

# clean up pdb
pdb.noldp.long$atom <- pdb.noldp.long$atom %>% 
  mutate(
    # clean up residue names
    resid = case_when(
      resid == 'DT3' ~ 'DT',
      resid == 'DT5' ~ 'DT',
      TRUE ~ resid
    ),
    # move dopamine to chain B
    chain = if_else(resid == 'LDP', 'B', 'A'),
    # rename dopamine atoms
    elety = case_when(
      resid == 'LDP' & elety == 'OAH' ~ 'O1',
      resid == 'LDP' & elety == 'HAH' ~ 'HO1',
      resid == 'LDP' & elety == 'OAI' ~ 'O2',
      resid == 'LDP' & elety == 'HAI' ~ 'HO2',
      resid == 'LDP' & elety == 'CAB' ~ 'C1',
      resid == 'LDP' & elety == 'CAD' ~ 'C2',
      resid == 'LDP' & elety == 'HAD' ~ 'H2',
      resid == 'LDP' & elety == 'CAG' ~ 'C3',
      resid == 'LDP' & elety == 'CAC' ~ 'C4',
      resid == 'LDP' & elety == 'CAF' ~ 'C5',
      resid == 'LDP' & elety == 'HAF' ~ 'H5',
      resid == 'LDP' & elety == 'CAE' ~ 'C6',
      resid == 'LDP' & elety == 'HAE' ~ 'H6',
      resid == 'LDP' & elety == 'CAA' ~ 'C7',
      resid == 'LDP' & elety == 'HAA' ~ 'H71',
      resid == 'LDP' & elety == 'HAB' ~ 'H72',
      resid == 'LDP' & elety == 'CAJ' ~ 'C8',
      resid == 'LDP' & elety == 'HAJ' ~ 'H81',
      resid == 'LDP' & elety == 'HAK' ~ 'H82',
      resid == 'LDP' & elety == 'NAK' ~ 'N1',
      resid == 'LDP' & elety == 'HAL' ~ 'HN11',
      resid == 'LDP' & elety == 'HAM' ~ 'HN13',
      resid == 'LDP' & elety == 'HAN' ~ 'HN12',
      TRUE ~ elety
    ),
    elesy = if_else(
      is.na(elesy) | elesy == 'not found',
      case_when(
        elety %in% c(paste0("C", 1:99), paste0("C", 1:99, "'")) ~ 'C',
        elety %in% c(paste0("H", 1:99), paste0("H", 1:99, "'"), 'HN11', 'HN12', 'HN13', 'HO1', 'HO2') ~ 'H',
        elety %in% c(paste0("O", 1:99), paste0("O", 1:99, "'"), paste0("OP", 1:99)) ~ 'O',
        elety %in% c(paste0("N", 1:99)) ~ 'N',
        elety %in% c("P", paste0("P", 1:99)) ~ 'P',
        TRUE ~ 'not found'
      ),
      elesy
    )
  )

#align all frames
pdb.noldp.long$xyz <- fit.xyz(
  fixed = pdb.noldp.long$xyz[1,],
  mobile = pdb.noldp.long$xyz,
  fixed.inds = atom.select(pdb.noldp.long, elety = unique(pdb.noldp.long$atom$elety))$xyz,
  mobile.inds = atom.select(pdb.noldp.long, elety = unique(pdb.noldp.long$atom$elety))$xyz
)

In [10]:

pdb.noldp.h.long.files <- lapply(
  list.files(
    path = 'D:/Amber/dop/pmemd/out/', 
    pattern = "4_prod_noligand_250(\\_restart\\d*)?.nc$", 
    full.names = TRUE), 
  function(prmtop = 'D:/Amber/dop/leap/noligand.prmtop', crd) {
    as.pdb(
      prmtop = read.prmtop(prmtop),
      crd = read.ncdf(crd)
    ) %>%
      convert.pdb(., renumber = TRUE,
                  first.resno = 11,
                  rm.h = FALSE,
                  rm.wat = TRUE) %>%
      trim.pdb(., atom.select(., elety = c('Na+', 'Cl-'), inverse = TRUE))
  }, 
  prmtop = 'D:/Amber/dop/leap/noligand.prmtop')

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_noligand_250.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 38202"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38202 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9395 

     Retaining 914 non-water atoms
     Removing a total of 37288  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )

Warning in convert.pdb(., renumber = TRUE, first.resno = 11, rm.h = FALSE, :     Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_noligand_250_restart.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 38202"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38202 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9395 

     Retaining 914 non-water atoms
     Removing a total of 37288  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )

Warning in convert.pdb(., renumber = TRUE, first.resno = 11, rm.h = FALSE, :     Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_noligand_250_restart2.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 38202"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38202 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9395 

     Retaining 914 non-water atoms
     Removing a total of 37288  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )

Warning in convert.pdb(., renumber = TRUE, first.resno = 11, rm.h = FALSE, :     Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_noligand_250_restart3.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 38202"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  38202 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 9395 

     Retaining 914 non-water atoms
     Removing a total of 37288  atoms
     Renumbering residues ( from 11 ) and atoms ( from 1 )

Warning in convert.pdb(., renumber = TRUE, first.resno = 11, rm.h = FALSE, :     Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

pdb.noldp.h.long <- pdb.noldp.h.long.files[[1]]
pdb.noldp.h.long$xyz <- do.call(rbind, lapply(pdb.noldp.h.long.files, function(x) x$xyz))

noldp.divider <- length(
  list.files(
    path = 'D:/Amber/dop/pmemd/out/', 
    pattern = "4_prod_noligand_250(\\_restart\\d*)?.nc$")
)

pdb.noldp.long$xyz <- pdb.noldp.long$xyz[1:(5000*25/20*noldp.divider),] #trim the first 25000 frames (exactly one microsecond)

# clean up pdb
pdb.noldp.h.long$atom <- pdb.noldp.h.long$atom %>% 
  mutate(
    # clean up residue names
    resid = case_when(
      resid == 'DT3' ~ 'DT',
      resid == 'DT5' ~ 'DT',
      TRUE ~ resid
    ),
    # move dopamine to chain B
    chain = if_else(resid == 'LDP', 'B', 'A'),
    # rename dopamine atoms
    elety = case_when(
      resid == 'LDP' & elety == 'OAH' ~ 'O1',
      resid == 'LDP' & elety == 'HAH' ~ 'HO1',
      resid == 'LDP' & elety == 'OAI' ~ 'O2',
      resid == 'LDP' & elety == 'HAI' ~ 'HO2',
      resid == 'LDP' & elety == 'CAB' ~ 'C1',
      resid == 'LDP' & elety == 'CAD' ~ 'C2',
      resid == 'LDP' & elety == 'HAD' ~ 'H2',
      resid == 'LDP' & elety == 'CAG' ~ 'C3',
      resid == 'LDP' & elety == 'CAC' ~ 'C4',
      resid == 'LDP' & elety == 'CAF' ~ 'C5',
      resid == 'LDP' & elety == 'HAF' ~ 'H5',
      resid == 'LDP' & elety == 'CAE' ~ 'C6',
      resid == 'LDP' & elety == 'HAE' ~ 'H6',
      resid == 'LDP' & elety == 'CAA' ~ 'C7',
      resid == 'LDP' & elety == 'HAA' ~ 'H71',
      resid == 'LDP' & elety == 'HAB' ~ 'H72',
      resid == 'LDP' & elety == 'CAJ' ~ 'C8',
      resid == 'LDP' & elety == 'HAJ' ~ 'H81',
      resid == 'LDP' & elety == 'HAK' ~ 'H82',
      resid == 'LDP' & elety == 'NAK' ~ 'N1',
      resid == 'LDP' & elety == 'HAL' ~ 'HN11',
      resid == 'LDP' & elety == 'HAM' ~ 'HN13',
      resid == 'LDP' & elety == 'HAN' ~ 'HN12',
      TRUE ~ elety
    ),
    elesy = if_else(
      is.na(elesy) | elesy == 'not found',
      case_when(
        elety %in% c(paste0("C", 1:99), paste0("C", 1:99, "'")) ~ 'C',
        elety %in% c(paste0("H", 1:99), paste0("H", 1:99, "'"), 'HN11', 'HN12', 'HN13', 'HO1', 'HO2') ~ 'H',
        elety %in% c(paste0("O", 1:99), paste0("O", 1:99, "'"), paste0("OP", 1:99)) ~ 'O',
        elety %in% c(paste0("N", 1:99)) ~ 'N',
        elety %in% c("P", paste0("P", 1:99)) ~ 'P',
        TRUE ~ 'not found'
      ),
      elesy
    )
  )

#align all frames
pdb.noldp.h.long$xyz <- fit.xyz(
  fixed = pdb.noldp.h.long$xyz[1,],
  mobile = pdb.noldp.h.long$xyz,
  fixed.inds = atom.select(pdb.noldp.h.long, elety = unique(pdb.noldp.h.long$atom$elety))$xyz,
  mobile.inds = atom.select(pdb.noldp.h.long, elety = unique(pdb.noldp.h.long$atom$elety))$xyz
)

In [11]:

source('R/doplong.R')

[1] "file already exists"


  The 'cran_repo' argument in shelf() was not set, so it will use
  cran_repo = 'https://cran.r-project.org' by default.

  To avoid this message, set the 'cran_repo' argument to a CRAN
  mirror URL (see https://cran.r-project.org/mirrors.html) or set
  'quiet = TRUE'.


── Column specification ────────────────────────────────────────────────────────
cols(
  .default = col_character(),
  X7 = col_double(),
  X16 = col_double(),
  X20 = col_double(),
  X21 = col_double(),
  X22 = col_double(),
  X24 = col_double()
)
ℹ Use `spec()` for the full column specifications.

Warning: 196 parsing failures.
row col   expected     actual               file
 38  -- 24 columns 19 columns 'data/unambig.tbl'
 39  -- 24 columns 19 columns 'data/unambig.tbl'
 41  -- 24 columns 19 columns 'data/unambig.tbl'
 42  -- 24 columns 19 columns 'data/unambig.tbl'
 44  -- 24 columns 19 columns 'data/unambig.tbl'
... ... .......... .......... ..................
See problems(...) for more details.

[1] "group 0 processed"
[1] "group 1 processed"
[1] "group 2 processed"
[1] "group 3 processed"
[1] "group 4 processed"
[1] "group 5 processed"
[1] "group 6 processed"
[1] "group 7 processed"
[1] "group 8 processed"
[1] "group 9 processed"
[1] "group 10 processed"
[1] "group 11 processed"
[1] "group 12 processed"
[1] "group 13 processed"
[1] "group 14 processed"
[1] "group 15 processed"
[1] "group 16 processed"
[1] "group 17 processed"
[1] "group 18 processed"
[1] "group 19 processed"
[1] "group 20 processed"
[1] "group 21 processed"
[1] "group 22 processed"
[1] "group 23 processed"
[1] "group 24 processed"
[1] "group 25 processed"
[1] "group 26 processed"
[1] "group 27 processed"
[1] "group 28 processed"
[1] "group 29 processed"
[1] "group 30 processed"
[1] "group 31 processed"
[1] "group 32 processed"
[1] "group 33 processed"
[1] "group 34 processed"
[1] "group 35 processed"
[1] "group 36 processed"
[1] "group 37 processed"
[1] "group 38 processed"
[1] "group 39 processed"
[1] "group 40 processed"
[1] "group 41 processed"
[1] "group 42 processed"
[1] "group 43 processed"
[1] "group 44 processed"
[1] "group 45 processed"
[1] "group 46 processed"
[1] "group 47 processed"
[1] "group 48 processed"
[1] "group 49 processed"
[1] "group 50 processed"
[1] "group 51 processed"
[1] "group 52 processed"
[1] "group 53 processed"
[1] "group 54 processed"
[1] "group 55 processed"
[1] "group 56 processed"
[1] "group 57 processed"
[1] "group 58 processed"
[1] "group 59 processed"
[1] "group 60 processed"
[1] "group 61 processed"
[1] "group 62 processed"
[1] "group 63 processed"
[1] "group 64 processed"
[1] "group 65 processed"
[1] "group 66 processed"
[1] "group 67 processed"
[1] "group 68 processed"
[1] "group 69 processed"
[1] "group 70 processed"
[1] "group 71 processed"
[1] "group 72 processed"
[1] "group 73 processed"
[1] "group 74 processed"
[1] "group 75 processed"
[1] "group 76 processed"
[1] "group 77 processed"
[1] "group 78 processed"
[1] "group 79 processed"
[1] "group 80 processed"
[1] "group 81 processed"
[1] "group 82 processed"
[1] "group 83 processed"
[1] "group 84 processed"
[1] "group 85 processed"
[1] "group 86 processed"
[1] "group 87 processed"
[1] "group 88 processed"
[1] "group 89 processed"
[1] "group 90 processed"
[1] "group 91 processed"
[1] "group 92 processed"
[1] "group 93 processed"
[1] "group 94 processed"
[1] "group 95 processed"
[1] "group 96 processed"
[1] "group 97 processed"
[1] "group 98 processed"
[1] "group 99 processed"
[1] "group 100 processed"
[1] "group 101 processed"
[1] "group 102 processed"
[1] "group 103 processed"
[1] "group 104 processed"
[1] "group 105 processed"
[1] "group 106 processed"
[1] "group 107 processed"
[1] "group 108 processed"
[1] "group 109 processed"
[1] "group 110 processed"
[1] "group 111 processed"
[1] "group 112 processed"
[1] "group 113 processed"
[1] "group 114 processed"
[1] "group 115 processed"
[1] "group 116 processed"
[1] "group 117 processed"
[1] "group 118 processed"
[1] "group 119 processed"
[1] "group 120 processed"
[1] "group 121 processed"
[1] "group 122 processed"
[1] "group 123 processed"
[1] "group 124 processed"
[1] "group 125 processed"
[1] "group 126 processed"
[1] "group 127 processed"
[1] "group 128 processed"
[1] "group 129 processed"
[1] "group 130 processed"
[1] "group 131 processed"
[1] "group 132 processed"
[1] "group 133 processed"
[1] "group 134 processed"
[1] "group 135 processed"
[1] "group 136 processed"
[1] "group 137 processed"
[1] "group 138 processed"
[1] "group 139 processed"
[1] "group 140 processed"
[1] "group 141 processed"
[1] "group 142 processed"
[1] "group 143 processed"
[1] "group 144 processed"
[1] "group 145 processed"
[1] "group 146 processed"
[1] "group 147 processed"
[1] "group 148 processed"
[1] "group 149 processed"
[1] "group 150 processed"
[1] "group 151 processed"
[1] "group 152 processed"
[1] "group 153 processed"
[1] "group 154 processed"
[1] "group 155 processed"
[1] "group 156 processed"
[1] "group 157 processed"
[1] "group 158 processed"
[1] "group 159 processed"
[1] "group 160 processed"
[1] "group 161 processed"
[1] "group 162 processed"
[1] "group 163 processed"
[1] "group 164 processed"
[1] "group 165 processed"
[1] "group 166 processed"
[1] "group 167 processed"
[1] "group 168 processed"
[1] "group 169 processed"
[1] "group 170 processed"
[1] "group 171 processed"
[1] "group 172 processed"
[1] "group 173 processed"
[1] "group 174 processed"
[1] "group 175 processed"
[1] "group 176 processed"
[1] "group 177 processed"
[1] "group 178 processed"
[1] "group 179 processed"
[1] "group 180 processed"
[1] "group 181 processed"
[1] "group 182 processed"
[1] "group 183 processed"
[1] "group 184 processed"
[1] "group 185 processed"
[1] "group 186 processed"
[1] "group 187 processed"
[1] "group 188 processed"
[1] "group 189 processed"
[1] "group 190 processed"
[1] "group 191 processed"
[1] "group 192 processed"
[1] "group 193 processed"
[1] "group 194 processed"
[1] "group 195 processed"
[1] "group 196 processed"
[1] "group 197 processed"
[1] "group 198 processed"
[1] "group 199 processed"
[1] "group 200 processed"
[1] "group 201 processed"
[1] "group 202 processed"
[1] "group 203 processed"
[1] "group 204 processed"
[1] "group 205 processed"
[1] "group 206 processed"
[1] "group 207 processed"
[1] "group 208 processed"
[1] "group 209 processed"
[1] "group 210 processed"
[1] "group 211 processed"
[1] "group 212 processed"
[1] "group 213 processed"
[1] "group 214 processed"
[1] "group 215 processed"
[1] "group 216 processed"
[1] "group 217 processed"
[1] "group 218 processed"
[1] "group 219 processed"
[1] "group 220 processed"
[1] "group 221 processed"
[1] "group 222 processed"
[1] "group 223 processed"
[1] "group 224 processed"
[1] "group 225 processed"
[1] "group 226 processed"
[1] "group 227 processed"
[1] "group 228 processed"
[1] "group 229 processed"
[1] "group 230 processed"
[1] "group 231 processed"
[1] "group 232 processed"
[1] "group 233 processed"
[1] "group 234 processed"
[1] "group 235 processed"
[1] "group 236 processed"
[1] "group 237 processed"
[1] "group 238 processed"
[1] "group 239 processed"
[1] "group 240 processed"
[1] "group 241 processed"
[1] "group 242 processed"
[1] "group 243 processed"
[1] "group 244 processed"
[1] "group 245 processed"
[1] "group 246 processed"
[1] "group 247 processed"
[1] "group 248 processed"
[1] "group 249 processed"
[1] "group 250 processed"
[1] "group 251 processed"
[1] "group 252 processed"
[1] "group 253 processed"
[1] "group 254 processed"
[1] "group 255 processed"
[1] "group 256 processed"
[1] "group 257 processed"
[1] "group 258 processed"
[1] "group 259 processed"
[1] "group 260 processed"
[1] "group 261 processed"
[1] "group 262 processed"
[1] "group 263 processed"
[1] "group 264 processed"
[1] "group 265 processed"
[1] "group 266 processed"
[1] "group 267 processed"
[1] "group 268 processed"
[1] "group 269 processed"
[1] "group 270 processed"
[1] "group 271 processed"
[1] "group 272 processed"
[1] "group 273 processed"
[1] "group 274 processed"
[1] "group 275 processed"
[1] "group 276 processed"
[1] "group 277 processed"
[1] "group 278 processed"
[1] "group 279 processed"
[1] "group 280 processed"
[1] "group 281 processed"
[1] "group 282 processed"
[1] "group 283 processed"
[1] "group 284 processed"
[1] "group 285 processed"
[1] "group 286 processed"
[1] "group 287 processed"
[1] "group 288 processed"
[1] "group 289 processed"
[1] "group 290 processed"
[1] "group 291 processed"
[1] "group 292 processed"
[1] "group 293 processed"
[1] "group 294 processed"
[1] "group 295 processed"
[1] "group 296 processed"
[1] "group 297 processed"
[1] "group 298 processed"
[1] "group 299 processed"
[1] "group 300 processed"
[1] "group 301 processed"
[1] "group 302 processed"
[1] "group 303 processed"
[1] "group 304 processed"
[1] "group 305 processed"
[1] "group 306 processed"
[1] "group 307 processed"
[1] "group 308 processed"
[1] "group 309 processed"
[1] "group 310 processed"
[1] "group 311 processed"
[1] "group 312 processed"
[1] "group 313 processed"
[1] "group 314 processed"
[1] "group 315 processed"
[1] "group 316 processed"
[1] "group 317 processed"
[1] "group 318 processed"
[1] "group 319 processed"
[1] "group 320 processed"
[1] "group 321 processed"
[1] "group 322 processed"
[1] "group 323 processed"
[1] "group 324 processed"
[1] "group 325 processed"
[1] "group 326 processed"
[1] "group 327 processed"
[1] "group 328 processed"
[1] "group 329 processed"
[1] "group 330 processed"
[1] "group 331 processed"
[1] "group 332 processed"
[1] "group 333 processed"
[1] "group 334 processed"
[1] "group 335 processed"
[1] "group 336 processed"
[1] "group 337 processed"
[1] "group 338 processed"
[1] "group 339 processed"
[1] "group 340 processed"
[1] "group 341 processed"
[1] "group 342 processed"
[1] "group 343 processed"
[1] "group 344 processed"
[1] "group 345 processed"
[1] "group 346 processed"
[1] "group 347 processed"
[1] "group 348 processed"
[1] "group 349 processed"
[1] "group 350 processed"
[1] "group 351 processed"
[1] "group 352 processed"
[1] "group 353 processed"
[1] "group 354 processed"
[1] "group 355 processed"
[1] "group 356 processed"
[1] "group 357 processed"
[1] "group 358 processed"
[1] "group 359 processed"
[1] "group 360 processed"
[1] "group 361 processed"
[1] "group 362 processed"
[1] "group 363 processed"
[1] "group 364 processed"
[1] "group 365 processed"
[1] "group 366 processed"
[1] "group 367 processed"
[1] "group 368 processed"
[1] "group 369 processed"
[1] "group 370 processed"
[1] "group 371 processed"
[1] "group 372 processed"
[1] "group 373 processed"
[1] "group 374 processed"
[1] "group 375 processed"
[1] "group 376 processed"
[1] "group 377 processed"
[1] "group 378 processed"
[1] "group 379 processed"
[1] "group 380 processed"
[1] "group 381 processed"
[1] "group 382 processed"
[1] "group 383 processed"
[1] "group 384 processed"
[1] "group 385 processed"
[1] "group 386 processed"
[1] "group 387 processed"
[1] "group 388 processed"
[1] "group 389 processed"
[1] "group 390 processed"
[1] "group 391 processed"
[1] "group 392 processed"
[1] "group 393 processed"
[1] "group 394 processed"
[1] "group 395 processed"
[1] "group 396 processed"
[1] "group 397 processed"
[1] "group 398 processed"
[1] "group 399 processed"
[1] "group 400 processed"
[1] "group 401 processed"
[1] "group 402 processed"
[1] "group 403 processed"
[1] "group 404 processed"
[1] "group 405 processed"
[1] "group 406 processed"
[1] "group 407 processed"
[1] "group 408 processed"
[1] "group 409 processed"
[1] "group 410 processed"
[1] "group 411 processed"
[1] "group 412 processed"
[1] "group 413 processed"
[1] "group 414 processed"
[1] "group 415 processed"
[1] "group 416 processed"
[1] "group 417 processed"
[1] "group 418 processed"
[1] "group 419 processed"
[1] "group 420 processed"
[1] "group 421 processed"
[1] "group 422 processed"
[1] "group 423 processed"
[1] "group 424 processed"
[1] "group 425 processed"
[1] "group 426 processed"
[1] "group 427 processed"
[1] "group 428 processed"
[1] "group 429 processed"
[1] "group 430 processed"
[1] "group 431 processed"
[1] "group 432 processed"
[1] "group 433 processed"
[1] "group 434 processed"
[1] "group 435 processed"
[1] "group 436 processed"
[1] "group 437 processed"
[1] "group 438 processed"
[1] "group 439 processed"
[1] "group 440 processed"
[1] "group 441 processed"
[1] "group 442 processed"
[1] "group 443 processed"
[1] "group 444 processed"
[1] "group 445 processed"
[1] "group 446 processed"
[1] "group 447 processed"
[1] "group 448 processed"
[1] "group 449 processed"
[1] "group 450 processed"
[1] "group 451 processed"
[1] "group 452 processed"
[1] "group 453 processed"
[1] "group 454 processed"
[1] "group 455 processed"
[1] "group 456 processed"
[1] "group 457 processed"
[1] "group 458 processed"
[1] "group 459 processed"
[1] "group 460 processed"
[1] "group 461 processed"
[1] "group 462 processed"
[1] "group 463 processed"
[1] "group 464 processed"
[1] "group 465 processed"
[1] "group 466 processed"
[1] "group 467 processed"
[1] "group 468 processed"
[1] "group 469 processed"
[1] "group 470 processed"
[1] "group 471 processed"
[1] "group 472 processed"
[1] "group 473 processed"
[1] "group 474 processed"
[1] "group 475 processed"
[1] "group 476 processed"
[1] "group 477 processed"
[1] "group 478 processed"
[1] "group 479 processed"
[1] "group 480 processed"
[1] "group 481 processed"
[1] "group 482 processed"
[1] "group 483 processed"
[1] "group 484 processed"
[1] "group 485 processed"
[1] "group 486 processed"
[1] "group 487 processed"
[1] "group 488 processed"
[1] "group 489 processed"
[1] "group 490 processed"
[1] "group 491 processed"
[1] "group 492 processed"
[1] "group 493 processed"
[1] "group 494 processed"
[1] "group 495 processed"
[1] "group 496 processed"
[1] "group 497 processed"
[1] "group 498 processed"
[1] "group 499 processed"
[1] "group 500 processed"
[1] "group 501 processed"
[1] "group 502 processed"
[1] "group 503 processed"
[1] "group 504 processed"
[1] "group 505 processed"
[1] "group 506 processed"
[1] "group 507 processed"
[1] "group 508 processed"
[1] "group 509 processed"
[1] "group 510 processed"
[1] "group 511 processed"
[1] "group 512 processed"
[1] "group 513 processed"
[1] "group 514 processed"
[1] "group 515 processed"
[1] "group 516 processed"
[1] "group 517 processed"
[1] "group 518 processed"
[1] "group 519 processed"
[1] "group 520 processed"
[1] "group 521 processed"
[1] "group 522 processed"
[1] "group 523 processed"
[1] "group 524 processed"
[1] "group 525 processed"
[1] "group 526 processed"
[1] "group 527 processed"
[1] "group 528 processed"
[1] "group 529 processed"
[1] "group 530 processed"
[1] "group 531 processed"
[1] "group 532 processed"
[1] "group 533 processed"
[1] "group 534 processed"
[1] "group 535 processed"
[1] "group 536 processed"
[1] "group 537 processed"
[1] "group 538 processed"
[1] "group 539 processed"
[1] "group 540 processed"
[1] "group 541 processed"
[1] "group 542 processed"
[1] "group 543 processed"
[1] "group 544 processed"
[1] "group 545 processed"
[1] "group 546 processed"
[1] "group 547 processed"
[1] "group 548 processed"
[1] "group 549 processed"
[1] "group 550 processed"
[1] "group 551 processed"
[1] "group 552 processed"
[1] "group 553 processed"
[1] "group 554 processed"
[1] "group 555 processed"
[1] "group 556 processed"
[1] "group 557 processed"
[1] "group 558 processed"
[1] "group 559 processed"
[1] "group 560 processed"
[1] "group 561 processed"
[1] "group 562 processed"
[1] "group 563 processed"
[1] "group 564 processed"
[1] "group 565 processed"
[1] "group 566 processed"
[1] "group 567 processed"
[1] "group 568 processed"
[1] "group 569 processed"
[1] "group 570 processed"
[1] "group 571 processed"
[1] "group 572 processed"
[1] "group 573 processed"
[1] "group 574 processed"
[1] "group 575 processed"
[1] "group 576 processed"
[1] "group 577 processed"
[1] "group 578 processed"
[1] "group 579 processed"
[1] "group 580 processed"
[1] "group 581 processed"
[1] "group 582 processed"
[1] "group 583 processed"
[1] "group 584 processed"
[1] "group 585 processed"
[1] "group 586 processed"
[1] "group 587 processed"
[1] "group 588 processed"
[1] "group 589 processed"
[1] "group 590 processed"
[1] "group 591 processed"
[1] "group 592 processed"
[1] "group 593 processed"
[1] "group 594 processed"
[1] "group 595 processed"
[1] "group 596 processed"
[1] "group 597 processed"
[1] "group 598 processed"
[1] "group 599 processed"
[1] "group 600 processed"
[1] "group 601 processed"
[1] "group 602 processed"
[1] "group 603 processed"
[1] "group 604 processed"
[1] "group 605 processed"
[1] "group 606 processed"
[1] "group 607 processed"
[1] "group 608 processed"
[1] "group 609 processed"
[1] "group 610 processed"
[1] "group 611 processed"
[1] "group 612 processed"
[1] "group 613 processed"
[1] "group 614 processed"
[1] "group 615 processed"
[1] "group 616 processed"
[1] "group 617 processed"
[1] "group 618 processed"
[1] "group 619 processed"
[1] "group 620 processed"
[1] "group 621 processed"
[1] "group 622 processed"
[1] "group 623 processed"
[1] "group 624 processed"
[1] "group 625 processed"
[1] "group 626 processed"
[1] "group 627 processed"
[1] "group 628 processed"
[1] "group 629 processed"
[1] "group 630 processed"
[1] "group 631 processed"
[1] "group 632 processed"
[1] "group 633 processed"
[1] "group 634 processed"
[1] "group 635 processed"
[1] "group 636 processed"
[1] "group 637 processed"
[1] "group 638 processed"
[1] "group 639 processed"
[1] "group 640 processed"
[1] "group 641 processed"
[1] "group 642 processed"
[1] "group 643 processed"
[1] "group 644 processed"
[1] "group 645 processed"
[1] "group 646 processed"
[1] "group 647 processed"
[1] "group 648 processed"
[1] "group 649 processed"
[1] "group 650 processed"
[1] "group 651 processed"
[1] "group 652 processed"
[1] "group 653 processed"
[1] "group 654 processed"
[1] "group 655 processed"
[1] "group 656 processed"
[1] "group 657 processed"
[1] "group 658 processed"
[1] "group 659 processed"
[1] "group 660 processed"
[1] "group 661 processed"
[1] "group 662 processed"
[1] "group 663 processed"
[1] "group 664 processed"
[1] "group 665 processed"
[1] "group 666 processed"
[1] "group 667 processed"
[1] "group 668 processed"
[1] "group 669 processed"
[1] "group 670 processed"
[1] "group 671 processed"
[1] "group 672 processed"
[1] "group 673 processed"
[1] "group 674 processed"
[1] "group 675 processed"
[1] "group 676 processed"
[1] "group 677 processed"
[1] "group 678 processed"
[1] "group 679 processed"
[1] "group 680 processed"
[1] "group 681 processed"
[1] "group 682 processed"
[1] "group 683 processed"
[1] "group 684 processed"
[1] "group 685 processed"
[1] "group 686 processed"
[1] "group 687 processed"
[1] "group 688 processed"
[1] "group 689 processed"
[1] "group 690 processed"
[1] "group 691 processed"
[1] "group 692 processed"
[1] "group 693 processed"
[1] "group 694 processed"
[1] "group 695 processed"
[1] "group 696 processed"
[1] "group 697 processed"
[1] "group 698 processed"
[1] "group 699 processed"
[1] "group 700 processed"
[1] "group 701 processed"
[1] "group 702 processed"
[1] "group 703 processed"
[1] "group 704 processed"
[1] "group 705 processed"
[1] "group 706 processed"
[1] "group 707 processed"
[1] "group 708 processed"
[1] "group 709 processed"
[1] "group 710 processed"
[1] "group 711 processed"
[1] "group 712 processed"
[1] "group 713 processed"
[1] "group 714 processed"
[1] "group 715 processed"
[1] "group 716 processed"
[1] "group 717 processed"
[1] "group 718 processed"
[1] "group 719 processed"
[1] "group 720 processed"
[1] "group 721 processed"
[1] "group 722 processed"
[1] "group 723 processed"
[1] "group 724 processed"
[1] "group 725 processed"
[1] "group 726 processed"
[1] "group 727 processed"
[1] "group 728 processed"
[1] "group 729 processed"
[1] "group 730 processed"
[1] "group 731 processed"
[1] "group 732 processed"
[1] "group 733 processed"
[1] "group 734 processed"
[1] "group 735 processed"
[1] "group 736 processed"
[1] "group 737 processed"
[1] "group 738 processed"
[1] "group 739 processed"
[1] "group 740 processed"
[1] "group 741 processed"
[1] "group 742 processed"
[1] "group 743 processed"
[1] "group 744 processed"
[1] "group 745 processed"
[1] "group 746 processed"
[1] "group 747 processed"
[1] "group 748 processed"
[1] "group 749 processed"
[1] "group 750 processed"
[1] "group 751 processed"
[1] "group 752 processed"
[1] "group 753 processed"
[1] "group 754 processed"
[1] "group 755 processed"
[1] "group 756 processed"
[1] "group 757 processed"
[1] "group 758 processed"
[1] "group 759 processed"
[1] "group 760 processed"
[1] "group 761 processed"
[1] "group 762 processed"
[1] "group 763 processed"
[1] "group 764 processed"
[1] "group 765 processed"
[1] "group 766 processed"
[1] "group 767 processed"
[1] "group 768 processed"
[1] "group 769 processed"
[1] "group 770 processed"
[1] "group 771 processed"
[1] "group 772 processed"
[1] "group 773 processed"
[1] "group 774 processed"
[1] "group 775 processed"
[1] "group 776 processed"
[1] "group 777 processed"
[1] "group 778 processed"
[1] "group 779 processed"
[1] "group 780 processed"
[1] "group 781 processed"
[1] "group 782 processed"
[1] "group 783 processed"
[1] "group 784 processed"
[1] "group 785 processed"
[1] "group 786 processed"
[1] "group 787 processed"
[1] "group 788 processed"
[1] "group 789 processed"
[1] "group 790 processed"
[1] "group 791 processed"
[1] "group 792 processed"
[1] "group 793 processed"
[1] "group 794 processed"
[1] "group 795 processed"
[1] "group 796 processed"
[1] "group 797 processed"
[1] "group 798 processed"
[1] "group 799 processed"
[1] "group 800 processed"
[1] "group 801 processed"
[1] "group 802 processed"
[1] "group 803 processed"
[1] "group 804 processed"
[1] "group 805 processed"
[1] "group 806 processed"
[1] "group 807 processed"
[1] "group 808 processed"
[1] "group 809 processed"
[1] "group 810 processed"
[1] "group 811 processed"
[1] "group 812 processed"
[1] "group 813 processed"
[1] "group 814 processed"
[1] "group 815 processed"
[1] "group 816 processed"
[1] "group 817 processed"
[1] "group 818 processed"
[1] "group 819 processed"
[1] "group 820 processed"
[1] "group 821 processed"
[1] "group 822 processed"
[1] "group 823 processed"
[1] "group 824 processed"
[1] "group 825 processed"
[1] "group 826 processed"
[1] "group 827 processed"
[1] "group 828 processed"
[1] "group 829 processed"
[1] "group 830 processed"
[1] "group 831 processed"
[1] "group 832 processed"
[1] "group 833 processed"
[1] "group 834 processed"
[1] "group 835 processed"
[1] "group 836 processed"
[1] "group 837 processed"
[1] "group 838 processed"
[1] "group 839 processed"
[1] "group 840 processed"
[1] "group 841 processed"
[1] "group 842 processed"
[1] "group 843 processed"
[1] "group 844 processed"
[1] "group 845 processed"
[1] "group 846 processed"
[1] "group 847 processed"
[1] "group 848 processed"
[1] "group 849 processed"
[1] "group 850 processed"
[1] "group 851 processed"
[1] "group 852 processed"
[1] "group 853 processed"
[1] "group 854 processed"
[1] "group 855 processed"
[1] "group 856 processed"
[1] "group 857 processed"
[1] "group 858 processed"
[1] "group 859 processed"
[1] "group 860 processed"
[1] "group 861 processed"
[1] "group 862 processed"
[1] "group 863 processed"
[1] "group 864 processed"
[1] "group 865 processed"
[1] "group 866 processed"
[1] "group 867 processed"
[1] "group 868 processed"
[1] "group 869 processed"
[1] "group 870 processed"
[1] "group 871 processed"
[1] "group 872 processed"
[1] "Congratulations! All ambiguous assignments have been processed."


── Column specification ────────────────────────────────────────────────────────
cols(
  X1 = col_character(),
  X2 = col_character(),
  X3 = col_character(),
  X4 = col_double(),
  X5 = col_character(),
  X6 = col_character(),
  X7 = col_character(),
  X8 = col_character(),
  X9 = col_character(),
  X10 = col_character(),
  X11 = col_double(),
  X12 = col_character(),
  X13 = col_character(),
  X14 = col_character(),
  X15 = col_character(),
  X16 = col_double(),
  X17 = col_double(),
  X18 = col_double()
)


── Column specification ────────────────────────────────────────────────────────
cols(
  X1 = col_character(),
  X2 = col_character(),
  X3 = col_character(),
  X4 = col_double(),
  X5 = col_character(),
  X6 = col_character(),
  X7 = col_character(),
  X8 = col_character(),
  X9 = col_character(),
  X10 = col_character(),
  X11 = col_double(),
  X12 = col_character(),
  X13 = col_character(),
  X14 = col_character(),
  X15 = col_character(),
  X16 = col_double(),
  X17 = col_double(),
  X18 = col_double()
)

[1] "Reading file D:/Amber/dop/pmemd/out/3_eq_doplong7_5.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 10"
[1] "Atoms: 44373"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  44373 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 10821 

[1] "Reading file D:/Amber/dop/pmemd/out/3_eq_doplong8_5.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 10"
[1] "Atoms: 59957"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  59957 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 14732 

[1] "file already exists"

In [12]:

doplong7.finalmin <- read.pdb('D:/Amber/dop/pmemd/out/5_min_doplong7.pdb') %>%
  convert.pdb(
    .,
    rm.h = FALSE,
    rm.wat = TRUE
  ) %>%
  trim.pdb(., atom.select(., elety = c('Na+', 'Cl-'), inverse = TRUE))

     Retaining 1485 non-water atoms
     Removing a total of 32166  atoms

Warning in convert.pdb(., rm.h = FALSE, rm.wat = TRUE):      Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

doplong7.finalmin$atom <- doplong7.finalmin$atom %>%
  mutate(
    # clean up residue names
    resid = case_when(
      resid == 'DG3' ~ 'DG',
      resid == 'DC5' ~ 'DC',
      TRUE ~ resid
    ),
    # move dopamine to chain B
    chain = if_else(resid == 'LDP', 'B', 'A'),
    # rename dopamine atoms
    elety = case_when(
      resid == 'LDP' & elety == 'OAH' ~ 'O1',
      resid == 'LDP' & elety == 'HAH' ~ 'HO1',
      resid == 'LDP' & elety == 'OAI' ~ 'O2',
      resid == 'LDP' & elety == 'HAI' ~ 'HO2',
      resid == 'LDP' & elety == 'CAB' ~ 'C1',
      resid == 'LDP' & elety == 'CAD' ~ 'C2',
      resid == 'LDP' & elety == 'HAD' ~ 'H2',
      resid == 'LDP' & elety == 'CAG' ~ 'C3',
      resid == 'LDP' & elety == 'CAC' ~ 'C4',
      resid == 'LDP' & elety == 'CAF' ~ 'C5',
      resid == 'LDP' & elety == 'HAF' ~ 'H5',
      resid == 'LDP' & elety == 'CAE' ~ 'C6',
      resid == 'LDP' & elety == 'HAE' ~ 'H6',
      resid == 'LDP' & elety == 'CAA' ~ 'C7',
      resid == 'LDP' & elety == 'HAA' ~ 'H71',
      resid == 'LDP' & elety == 'HAB' ~ 'H72',
      resid == 'LDP' & elety == 'CAJ' ~ 'C8',
      resid == 'LDP' & elety == 'HAJ' ~ 'H81',
      resid == 'LDP' & elety == 'HAK' ~ 'H82',
      resid == 'LDP' & elety == 'NAK' ~ 'N1',
      resid == 'LDP' & elety == 'HAL' ~ 'HN11',
      resid == 'LDP' & elety == 'HAM' ~ 'HN13',
      resid == 'LDP' & elety == 'HAN' ~ 'HN12',
      TRUE ~ elety
    )
  )

doplong8.finalmin <- read.pdb('D:/Amber/dop/pmemd/out/5_min_doplong8.pdb') %>%
  convert.pdb(
    .,
    rm.h = FALSE,
    rm.wat = TRUE
  ) %>%
  trim.pdb(., atom.select(., elety = c('Na+', 'Cl-'), inverse = TRUE))

     Retaining 1505 non-water atoms
     Removing a total of 43839  atoms

Warning in convert.pdb(., rm.h = FALSE, rm.wat = TRUE):      Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

doplong8.finalmin$atom <- doplong8.finalmin$atom %>%
  mutate(
    # clean up residue names
    resid = case_when(
      resid == 'DG3' ~ 'DG',
      resid == 'DC5' ~ 'DC',
      TRUE ~ resid
    ),
    # move dopamine to chain B
    chain = if_else(resid == 'LDP', 'B', 'A'),
    # rename dopamine atoms
    elety = case_when(
      resid == 'LDP' & elety == 'OAH' ~ 'O1',
      resid == 'LDP' & elety == 'HAH' ~ 'HO1',
      resid == 'LDP' & elety == 'OAI' ~ 'O2',
      resid == 'LDP' & elety == 'HAI' ~ 'HO2',
      resid == 'LDP' & elety == 'CAB' ~ 'C1',
      resid == 'LDP' & elety == 'CAD' ~ 'C2',
      resid == 'LDP' & elety == 'HAD' ~ 'H2',
      resid == 'LDP' & elety == 'CAG' ~ 'C3',
      resid == 'LDP' & elety == 'CAC' ~ 'C4',
      resid == 'LDP' & elety == 'CAF' ~ 'C5',
      resid == 'LDP' & elety == 'HAF' ~ 'H5',
      resid == 'LDP' & elety == 'CAE' ~ 'C6',
      resid == 'LDP' & elety == 'HAE' ~ 'H6',
      resid == 'LDP' & elety == 'CAA' ~ 'C7',
      resid == 'LDP' & elety == 'HAA' ~ 'H71',
      resid == 'LDP' & elety == 'HAB' ~ 'H72',
      resid == 'LDP' & elety == 'CAJ' ~ 'C8',
      resid == 'LDP' & elety == 'HAJ' ~ 'H81',
      resid == 'LDP' & elety == 'HAK' ~ 'H82',
      resid == 'LDP' & elety == 'NAK' ~ 'N1',
      resid == 'LDP' & elety == 'HAL' ~ 'HN11',
      resid == 'LDP' & elety == 'HAM' ~ 'HN13',
      resid == 'LDP' & elety == 'HAN' ~ 'HN12',
      TRUE ~ elety
    )
  )

### Export----
if (!file.exists('data/doplong/5_min_doplong7.pdb')) {
  write.pdb(doplong7.finalmin, file = 'data/doplong/5_min_doplong7.pdb')
  write.pdb(doplong8.finalmin, file = 'data/doplong/5_min_doplong8.pdb')
} else {print('file exists')}

[1] "file exists"

In [13]:

doplong7.traj <- as.pdb(
  prm = read.prmtop('D:/Amber/dop/leap/doplong7.prmtop'),
  crd = read.ncdf('D:/Amber/dop/pmemd/out/4_prod_doplong7.nc'),
) %>%
  convert.pdb(
    .,
    rm.h = FALSE,
    rm.wat = TRUE
  ) %>%
  trim.pdb(., atom.select(., elety = c('Na+', 'Cl-'), inverse = TRUE))

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doplong7.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 44373"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  44373 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 10821 

     Retaining 1485 non-water atoms
     Removing a total of 42888  atoms

Warning in convert.pdb(., rm.h = FALSE, rm.wat = TRUE):      Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

doplong7.traj$xyz <- fit.xyz(
  fixed = doplong7.traj$xyz[1,],
  mobile = doplong7.traj$xyz,
  fixed.inds = atom.select(doplong7.traj, elety = unique(doplong7.traj$atom$elety))$xyz,
  mobile.inds = atom.select(doplong7.traj, elety = unique(doplong7.traj$atom$elety))$xyz
)

doplong7.traj$atom <- doplong7.traj$atom %>%
  mutate(
    # clean up residue names
    resid = case_when(
      resid == 'DG3' ~ 'DG',
      resid == 'DC5' ~ 'DC',
      TRUE ~ resid
    ),
    # move dopamine to chain B
    chain = if_else(resid == 'LDP', 'B', 'A'),
    # rename dopamine atoms
    elety = case_when(
      resid == 'LDP' & elety == 'OAH' ~ 'O1',
      resid == 'LDP' & elety == 'HAH' ~ 'HO1',
      resid == 'LDP' & elety == 'OAI' ~ 'O2',
      resid == 'LDP' & elety == 'HAI' ~ 'HO2',
      resid == 'LDP' & elety == 'CAB' ~ 'C1',
      resid == 'LDP' & elety == 'CAD' ~ 'C2',
      resid == 'LDP' & elety == 'HAD' ~ 'H2',
      resid == 'LDP' & elety == 'CAG' ~ 'C3',
      resid == 'LDP' & elety == 'CAC' ~ 'C4',
      resid == 'LDP' & elety == 'CAF' ~ 'C5',
      resid == 'LDP' & elety == 'HAF' ~ 'H5',
      resid == 'LDP' & elety == 'CAE' ~ 'C6',
      resid == 'LDP' & elety == 'HAE' ~ 'H6',
      resid == 'LDP' & elety == 'CAA' ~ 'C7',
      resid == 'LDP' & elety == 'HAA' ~ 'H71',
      resid == 'LDP' & elety == 'HAB' ~ 'H72',
      resid == 'LDP' & elety == 'CAJ' ~ 'C8',
      resid == 'LDP' & elety == 'HAJ' ~ 'H81',
      resid == 'LDP' & elety == 'HAK' ~ 'H82',
      resid == 'LDP' & elety == 'NAK' ~ 'N1',
      resid == 'LDP' & elety == 'HAL' ~ 'HN11',
      resid == 'LDP' & elety == 'HAM' ~ 'HN13',
      resid == 'LDP' & elety == 'HAN' ~ 'HN12',
      TRUE ~ elety
    )
  )

doplong8.traj <- as.pdb(
  prm = read.prmtop('D:/Amber/dop/leap/doplong8.prmtop'),
  crd = read.ncdf('D:/Amber/dop/pmemd/out/4_prod_doplong8.nc'),
) %>%
  convert.pdb(
    .,
    rm.h = FALSE,
    rm.wat = TRUE
  ) %>%
  trim.pdb(., atom.select(., elety = c('Na+', 'Cl-'), inverse = TRUE))

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doplong8.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 5000"
[1] "Atoms: 59957"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  59957 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 14732 

     Retaining 1505 non-water atoms
     Removing a total of 58452  atoms

Warning in convert.pdb(., rm.h = FALSE, rm.wat = TRUE):      Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

doplong8.traj$xyz <- fit.xyz(
  fixed = doplong8.traj$xyz[1,],
  mobile = doplong8.traj$xyz,
  fixed.inds = atom.select(doplong8.traj, elety = unique(doplong8.traj$atom$elety))$xyz,
  mobile.inds = atom.select(doplong8.traj, elety = unique(doplong8.traj$atom$elety))$xyz
)

doplong8.traj$atom <- doplong8.traj$atom %>%
  mutate(
    # clean up residue names
    resid = case_when(
      resid == 'DG3' ~ 'DG',
      resid == 'DC5' ~ 'DC',
      TRUE ~ resid
    ),
    # move dopamine to chain B
    chain = if_else(resid == 'LDP', 'B', 'A'),
    # rename dopamine atoms
    elety = case_when(
      resid == 'LDP' & elety == 'OAH' ~ 'O1',
      resid == 'LDP' & elety == 'HAH' ~ 'HO1',
      resid == 'LDP' & elety == 'OAI' ~ 'O2',
      resid == 'LDP' & elety == 'HAI' ~ 'HO2',
      resid == 'LDP' & elety == 'CAB' ~ 'C1',
      resid == 'LDP' & elety == 'CAD' ~ 'C2',
      resid == 'LDP' & elety == 'HAD' ~ 'H2',
      resid == 'LDP' & elety == 'CAG' ~ 'C3',
      resid == 'LDP' & elety == 'CAC' ~ 'C4',
      resid == 'LDP' & elety == 'CAF' ~ 'C5',
      resid == 'LDP' & elety == 'HAF' ~ 'H5',
      resid == 'LDP' & elety == 'CAE' ~ 'C6',
      resid == 'LDP' & elety == 'HAE' ~ 'H6',
      resid == 'LDP' & elety == 'CAA' ~ 'C7',
      resid == 'LDP' & elety == 'HAA' ~ 'H71',
      resid == 'LDP' & elety == 'HAB' ~ 'H72',
      resid == 'LDP' & elety == 'CAJ' ~ 'C8',
      resid == 'LDP' & elety == 'HAJ' ~ 'H81',
      resid == 'LDP' & elety == 'HAK' ~ 'H82',
      resid == 'LDP' & elety == 'NAK' ~ 'N1',
      resid == 'LDP' & elety == 'HAL' ~ 'HN11',
      resid == 'LDP' & elety == 'HAM' ~ 'HN13',
      resid == 'LDP' & elety == 'HAN' ~ 'HN12',
      TRUE ~ elety
    )
  )

## Export----

doplong7.traj.1000f <- doplong7.traj
doplong8.traj.1000f <- doplong8.traj

doplong.traj.1000f <- lapply(
  list(doplong7.traj.1000f, doplong8.traj.1000f),
  function(x) {
    x$xyz <- x$xyz[seq(1, 5000, by = 5),]
    return(x)
  }
)

names(doplong.traj.1000f) <- c('doplong7', 'doplong8')

if (!file.exists('data/doplong/4_prod_doplong7_1000f.pdb')) {
  lapply(
    seq_along(doplong.traj.1000f),
    function(x) {
      write.pdb(
        pdb = doplong.traj.1000f[[x]],
        file = paste0('data/doplong/4_prod_', names(doplong.traj.1000f[x]), '_1000f.pdb')
      )
    }
  )
} else {print('file exists')}

[1] "file exists"

In [14]:

doplong7_longtrajfiles <- lapply(
  paste0('D:/Amber/dop/pmemd/out/4_prod_doplong7_250_', 1:4, '.nc'),
  function(x) {
    as.pdb(
      prm = read.prmtop('D:/Amber/dop/leap/doplong7.prmtop'),
      crd = read.ncdf(x)
    ) %>%
      convert.pdb(.,
                  rm.h = TRUE,
                  rm.wat = TRUE) %>%
      trim.pdb(., atom.select(., elety = c('Na+', 'Cl-'), inverse = TRUE))
  }
)

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doplong7_250_1.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 44373"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  44373 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 10821 

     Retaining 1485 non-water atoms
     Retaining 975 non-hydrogen atoms
     Removing a total of 43398  atoms
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doplong7_250_2.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 44373"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  44373 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 10821 

     Retaining 1485 non-water atoms
     Retaining 975 non-hydrogen atoms
     Removing a total of 43398  atoms
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doplong7_250_3.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 44373"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  44373 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 10821 

     Retaining 1485 non-water atoms
     Retaining 975 non-hydrogen atoms
     Removing a total of 43398  atoms
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doplong7_250_4.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 44373"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  44373 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 10821 

     Retaining 1485 non-water atoms
     Retaining 975 non-hydrogen atoms
     Removing a total of 43398  atoms

doplong7_longtraj <- doplong7_longtrajfiles[[1]]

# rbind all $xyz items
doplong7_longtraj$xyz <- do.call(rbind, lapply(doplong7_longtrajfiles, function(x) x$xyz))

doplong7_longtraj$xyz <- fit.xyz(
  fixed = doplong7_longtraj$xyz[1,],
  mobile = doplong7_longtraj$xyz,
  fixed.inds = atom.select(doplong7_longtraj, elety = unique(doplong7_longtraj$atom$elety))$xyz,
  mobile.inds = atom.select(doplong7_longtraj, elety = unique(doplong7_longtraj$atom$elety))$xyz
)

doplong7_longtraj$atom <- doplong7_longtraj$atom %>%
    mutate(
    # clean up residue names
    resid = case_when(
      resid == 'DG3' ~ 'DG',
      resid == 'DC5' ~ 'DC',
      TRUE ~ resid
    ),
    # move dopamine to chain B
    chain = if_else(resid == 'LDP', 'B', 'A'),
    # rename dopamine atoms
    elety = case_when(
      resid == 'LDP' & elety == 'OAH' ~ 'O1',
      resid == 'LDP' & elety == 'HAH' ~ 'HO1',
      resid == 'LDP' & elety == 'OAI' ~ 'O2',
      resid == 'LDP' & elety == 'HAI' ~ 'HO2',
      resid == 'LDP' & elety == 'CAB' ~ 'C1',
      resid == 'LDP' & elety == 'CAD' ~ 'C2',
      resid == 'LDP' & elety == 'HAD' ~ 'H2',
      resid == 'LDP' & elety == 'CAG' ~ 'C3',
      resid == 'LDP' & elety == 'CAC' ~ 'C4',
      resid == 'LDP' & elety == 'CAF' ~ 'C5',
      resid == 'LDP' & elety == 'HAF' ~ 'H5',
      resid == 'LDP' & elety == 'CAE' ~ 'C6',
      resid == 'LDP' & elety == 'HAE' ~ 'H6',
      resid == 'LDP' & elety == 'CAA' ~ 'C7',
      resid == 'LDP' & elety == 'HAA' ~ 'H71',
      resid == 'LDP' & elety == 'HAB' ~ 'H72',
      resid == 'LDP' & elety == 'CAJ' ~ 'C8',
      resid == 'LDP' & elety == 'HAJ' ~ 'H81',
      resid == 'LDP' & elety == 'HAK' ~ 'H82',
      resid == 'LDP' & elety == 'NAK' ~ 'N1',
      resid == 'LDP' & elety == 'HAL' ~ 'HN11',
      resid == 'LDP' & elety == 'HAM' ~ 'HN13',
      resid == 'LDP' & elety == 'HAN' ~ 'HN12',
      TRUE ~ elety
    ),
      elesy = if_else(
        is.na(elesy) | elesy == 'not found',
        case_when(
          elety %in% c(paste0("C", 1:99), paste0("C", 1:99, "'")) ~ 'C',
          elety %in% c(paste0("H", 1:99), paste0("H", 1:99, "'"), 'HN11', 'HN12', 'HN13', 'HO1', 'HO2') ~ 'H',
          elety %in% c(paste0("O", 1:99), paste0("O", 1:99, "'"), paste0("OP", 1:99)) ~ 'O',
          elety %in% c(paste0("N", 1:99)) ~ 'N',
          elety %in% c("P", paste0("P", 1:99)) ~ 'P',
          TRUE ~ 'not found'
        ),
        elesy
    )
  )

doplong8_longtrajfiles <- lapply(
  paste0('D:/Amber/dop/pmemd/out/4_prod_doplong8_250_', 1:4, '.nc'),
  function(x) {
    as.pdb(
      prm = read.prmtop('D:/Amber/dop/leap/doplong8.prmtop'),
      crd = read.ncdf(x)
    ) %>%
      convert.pdb(.,
                  rm.h = TRUE,
                  rm.wat = TRUE) %>%
      trim.pdb(., atom.select(., elety = c('Na+', 'Cl-'), inverse = TRUE))
  }
)

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doplong8_250_1.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 59957"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  59957 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 14732 

     Retaining 1505 non-water atoms
     Retaining 995 non-hydrogen atoms
     Removing a total of 58962  atoms
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doplong8_250_2.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 59957"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  59957 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 14732 

     Retaining 1505 non-water atoms
     Retaining 995 non-hydrogen atoms
     Removing a total of 58962  atoms
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doplong8_250_3.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 59957"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  59957 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 14732 

     Retaining 1505 non-water atoms
     Retaining 995 non-hydrogen atoms
     Removing a total of 58962  atoms
[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doplong8_250_4.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 59957"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  59957 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 14732 

     Retaining 1505 non-water atoms
     Retaining 995 non-hydrogen atoms
     Removing a total of 58962  atoms

doplong8_longtraj <- doplong8_longtrajfiles[[1]]

# rbind all $xyz items
doplong8_longtraj$xyz <- do.call(rbind, lapply(doplong8_longtrajfiles, function(x) x$xyz))

doplong8_longtraj$xyz <- fit.xyz(
  fixed = doplong8_longtraj$xyz[1,],
  mobile = doplong8_longtraj$xyz,
  fixed.inds = atom.select(doplong8_longtraj, elety = unique(doplong8_longtraj$atom$elety))$xyz,
  mobile.inds = atom.select(doplong8_longtraj, elety = unique(doplong8_longtraj$atom$elety))$xyz
)

doplong8_longtraj$atom <- doplong8_longtraj$atom %>%
  mutate(
    # clean up residue names
    resid = case_when(
      resid == 'DG3' ~ 'DG',
      resid == 'DC5' ~ 'DC',
      TRUE ~ resid
    ),
    # move dopamine to chain B
    chain = if_else(resid == 'LDP', 'B', 'A'),
    # rename dopamine atoms
    elety = case_when(
      resid == 'LDP' & elety == 'OAH' ~ 'O1',
      resid == 'LDP' & elety == 'HAH' ~ 'HO1',
      resid == 'LDP' & elety == 'OAI' ~ 'O2',
      resid == 'LDP' & elety == 'HAI' ~ 'HO2',
      resid == 'LDP' & elety == 'CAB' ~ 'C1',
      resid == 'LDP' & elety == 'CAD' ~ 'C2',
      resid == 'LDP' & elety == 'HAD' ~ 'H2',
      resid == 'LDP' & elety == 'CAG' ~ 'C3',
      resid == 'LDP' & elety == 'CAC' ~ 'C4',
      resid == 'LDP' & elety == 'CAF' ~ 'C5',
      resid == 'LDP' & elety == 'HAF' ~ 'H5',
      resid == 'LDP' & elety == 'CAE' ~ 'C6',
      resid == 'LDP' & elety == 'HAE' ~ 'H6',
      resid == 'LDP' & elety == 'CAA' ~ 'C7',
      resid == 'LDP' & elety == 'HAA' ~ 'H71',
      resid == 'LDP' & elety == 'HAB' ~ 'H72',
      resid == 'LDP' & elety == 'CAJ' ~ 'C8',
      resid == 'LDP' & elety == 'HAJ' ~ 'H81',
      resid == 'LDP' & elety == 'HAK' ~ 'H82',
      resid == 'LDP' & elety == 'NAK' ~ 'N1',
      resid == 'LDP' & elety == 'HAL' ~ 'HN11',
      resid == 'LDP' & elety == 'HAM' ~ 'HN13',
      resid == 'LDP' & elety == 'HAN' ~ 'HN12',
      TRUE ~ elety
    ),
      elesy = if_else(
        is.na(elesy) | elesy == 'not found',
        case_when(
          elety %in% c(paste0("C", 1:99), paste0("C", 1:99, "'")) ~ 'C',
          elety %in% c(paste0("H", 1:99), paste0("H", 1:99, "'"), 'HN11', 'HN12', 'HN13', 'HO1', 'HO2') ~ 'H',
          elety %in% c(paste0("O", 1:99), paste0("O", 1:99, "'"), paste0("OP", 1:99)) ~ 'O',
          elety %in% c(paste0("N", 1:99)) ~ 'N',
          elety %in% c("P", paste0("P", 1:99)) ~ 'P',
          TRUE ~ 'not found'
        ),
        elesy
    )
  )

gc()

            used   (Mb) gc trigger    (Mb)   max used    (Mb)
Ncells   2402830  128.4    4015078   214.5    4015078   214.5
Vcells 280608944 2140.9 2660484864 20297.9 3590148510 27390.7

In [15]:

doplong7h_longtrajfiles <- lapply(
  paste0('D:/Amber/dop/pmemd/out/4_prod_doplong7_250_', 1:4, '.nc'),
  function(x) {
    as.pdb(
      prm = read.prmtop('D:/Amber/dop/leap/doplong7.prmtop'),
      crd = read.ncdf(x)
    ) %>%
      convert.pdb(.,
                  rm.h = FALSE,
                  rm.wat = TRUE) %>%
      trim.pdb(., atom.select(., elety = c('Na+', 'Cl-'), inverse = TRUE))
  }
)

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doplong7_250_1.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 44373"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  44373 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 10821 

     Retaining 1485 non-water atoms
     Removing a total of 42888  atoms

Warning in convert.pdb(., rm.h = FALSE, rm.wat = TRUE):      Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doplong7_250_2.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 44373"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  44373 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 10821 

     Retaining 1485 non-water atoms
     Removing a total of 42888  atoms

Warning in convert.pdb(., rm.h = FALSE, rm.wat = TRUE):      Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doplong7_250_3.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 44373"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  44373 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 10821 

     Retaining 1485 non-water atoms
     Removing a total of 42888  atoms

Warning in convert.pdb(., rm.h = FALSE, rm.wat = TRUE):      Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doplong7_250_4.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 44373"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  44373 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 10821 

     Retaining 1485 non-water atoms
     Removing a total of 42888  atoms

Warning in convert.pdb(., rm.h = FALSE, rm.wat = TRUE):      Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

doplong7h_longtraj <- doplong7h_longtrajfiles[[1]]

# rbind all $xyz items
doplong7h_longtraj$xyz <- do.call(rbind, lapply(doplong7h_longtrajfiles, function(x) x$xyz))

doplong7h_longtraj$xyz <- fit.xyz(
  fixed = doplong7h_longtraj$xyz[1,],
  mobile = doplong7h_longtraj$xyz,
  fixed.inds = atom.select(doplong7h_longtraj, elety = unique(doplong7h_longtraj$atom$elety))$xyz,
  mobile.inds = atom.select(doplong7h_longtraj, elety = unique(doplong7h_longtraj$atom$elety))$xyz
)

doplong7h_longtraj$atom <- doplong7h_longtraj$atom %>%
  mutate(
    # clean up residue names
    resid = case_when(
      resid == 'DG3' ~ 'DG',
      resid == 'DC5' ~ 'DC',
      TRUE ~ resid
    ),
    # move dopamine to chain B
    chain = if_else(resid == 'LDP', 'B', 'A'),
    # rename dopamine atoms
    elety = case_when(
      resid == 'LDP' & elety == 'OAH' ~ 'O1',
      resid == 'LDP' & elety == 'HAH' ~ 'HO1',
      resid == 'LDP' & elety == 'OAI' ~ 'O2',
      resid == 'LDP' & elety == 'HAI' ~ 'HO2',
      resid == 'LDP' & elety == 'CAB' ~ 'C1',
      resid == 'LDP' & elety == 'CAD' ~ 'C2',
      resid == 'LDP' & elety == 'HAD' ~ 'H2',
      resid == 'LDP' & elety == 'CAG' ~ 'C3',
      resid == 'LDP' & elety == 'CAC' ~ 'C4',
      resid == 'LDP' & elety == 'CAF' ~ 'C5',
      resid == 'LDP' & elety == 'HAF' ~ 'H5',
      resid == 'LDP' & elety == 'CAE' ~ 'C6',
      resid == 'LDP' & elety == 'HAE' ~ 'H6',
      resid == 'LDP' & elety == 'CAA' ~ 'C7',
      resid == 'LDP' & elety == 'HAA' ~ 'H71',
      resid == 'LDP' & elety == 'HAB' ~ 'H72',
      resid == 'LDP' & elety == 'CAJ' ~ 'C8',
      resid == 'LDP' & elety == 'HAJ' ~ 'H81',
      resid == 'LDP' & elety == 'HAK' ~ 'H82',
      resid == 'LDP' & elety == 'NAK' ~ 'N1',
      resid == 'LDP' & elety == 'HAL' ~ 'HN11',
      resid == 'LDP' & elety == 'HAM' ~ 'HN13',
      resid == 'LDP' & elety == 'HAN' ~ 'HN12',
      TRUE ~ elety
    ),
    elesy = if_else(
      is.na(elesy) | elesy == 'not found',
      case_when(
        elety %in% c(paste0("C", 1:99), paste0("C", 1:99, "'")) ~ 'C',
        elety %in% c(paste0("H", 1:99), paste0("H", 1:99, "'"), 'HN11', 'HN12', 'HN13', 'HO1', 'HO2') ~ 'H',
        elety %in% c(paste0("O", 1:99), paste0("O", 1:99, "'"), paste0("OP", 1:99)) ~ 'O',
        elety %in% c(paste0("N", 1:99)) ~ 'N',
        elety %in% c("P", paste0("P", 1:99)) ~ 'P',
        TRUE ~ 'not found'
      ),
      elesy
    )
  )

doplong8h_longtrajfiles <- lapply(
  paste0('D:/Amber/dop/pmemd/out/4_prod_doplong8_250_', 1:4, '.nc'),
  function(x) {
    as.pdb(
      prm = read.prmtop('D:/Amber/dop/leap/doplong8.prmtop'),
      crd = read.ncdf(x)
    ) %>%
      convert.pdb(.,
                  rm.h = FALSE,
                  rm.wat = TRUE) %>%
      trim.pdb(., atom.select(., elety = c('Na+', 'Cl-'), inverse = TRUE))
  }
)

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doplong8_250_1.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 59957"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  59957 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 14732 

     Retaining 1505 non-water atoms
     Removing a total of 58452  atoms

Warning in convert.pdb(., rm.h = FALSE, rm.wat = TRUE):      Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doplong8_250_2.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 59957"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  59957 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 14732 

     Retaining 1505 non-water atoms
     Removing a total of 58452  atoms

Warning in convert.pdb(., rm.h = FALSE, rm.wat = TRUE):      Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doplong8_250_3.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 59957"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  59957 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 14732 

     Retaining 1505 non-water atoms
     Removing a total of 58452  atoms

Warning in convert.pdb(., rm.h = FALSE, rm.wat = TRUE):      Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

[1] "Reading file D:/Amber/dop/pmemd/out/4_prod_doplong8_250_4.nc"
[1] "Produced by program: pmemd"
[1] "File conventions AMBER version 1.0"
[1] "Frames: 6250"
[1] "Atoms: 59957"

 Summary of PDB generation:
 .. number of atoms in PDB determined by 'xyz'

 ..        0 atom(s) from 'string' selection 
 ..        0 atom(s) in final combined selection 

 .. number of atoms in PDB:  59957 
 .. number of calphas in PDB: 0 
 .. number of residues in PDB: 14732 

     Retaining 1505 non-water atoms
     Removing a total of 58452  atoms

Warning in convert.pdb(., rm.h = FALSE, rm.wat = TRUE):      Additional hydrogen elety names may need converting.
       N.B. It is often best to remove hydrogen (rm.h=TRUE)
            before building systems for simulation

doplong8h_longtraj <- doplong8h_longtrajfiles[[1]]

# rbind all $xyz items
doplong8h_longtraj$xyz <- do.call(rbind, lapply(doplong8h_longtrajfiles, function(x) x$xyz))

doplong8h_longtraj$xyz <- fit.xyz(
  fixed = doplong8h_longtraj$xyz[1,],
  mobile = doplong8h_longtraj$xyz,
  fixed.inds = atom.select(doplong8h_longtraj, elety = unique(doplong8h_longtraj$atom$elety))$xyz,
  mobile.inds = atom.select(doplong8h_longtraj, elety = unique(doplong8h_longtraj$atom$elety))$xyz
)

doplong8h_longtraj$atom <- doplong8h_longtraj$atom %>%
  mutate(
    # clean up residue names
    resid = case_when(
      resid == 'DG3' ~ 'DG',
      resid == 'DC5' ~ 'DC',
      TRUE ~ resid
    ),
    # move dopamine to chain B
    chain = if_else(resid == 'LDP', 'B', 'A'),
    # rename dopamine atoms
    elety = case_when(
      resid == 'LDP' & elety == 'OAH' ~ 'O1',
      resid == 'LDP' & elety == 'HAH' ~ 'HO1',
      resid == 'LDP' & elety == 'OAI' ~ 'O2',
      resid == 'LDP' & elety == 'HAI' ~ 'HO2',
      resid == 'LDP' & elety == 'CAB' ~ 'C1',
      resid == 'LDP' & elety == 'CAD' ~ 'C2',
      resid == 'LDP' & elety == 'HAD' ~ 'H2',
      resid == 'LDP' & elety == 'CAG' ~ 'C3',
      resid == 'LDP' & elety == 'CAC' ~ 'C4',
      resid == 'LDP' & elety == 'CAF' ~ 'C5',
      resid == 'LDP' & elety == 'HAF' ~ 'H5',
      resid == 'LDP' & elety == 'CAE' ~ 'C6',
      resid == 'LDP' & elety == 'HAE' ~ 'H6',
      resid == 'LDP' & elety == 'CAA' ~ 'C7',
      resid == 'LDP' & elety == 'HAA' ~ 'H71',
      resid == 'LDP' & elety == 'HAB' ~ 'H72',
      resid == 'LDP' & elety == 'CAJ' ~ 'C8',
      resid == 'LDP' & elety == 'HAJ' ~ 'H81',
      resid == 'LDP' & elety == 'HAK' ~ 'H82',
      resid == 'LDP' & elety == 'NAK' ~ 'N1',
      resid == 'LDP' & elety == 'HAL' ~ 'HN11',
      resid == 'LDP' & elety == 'HAM' ~ 'HN13',
      resid == 'LDP' & elety == 'HAN' ~ 'HN12',
      TRUE ~ elety
    ),
    elesy = if_else(
      is.na(elesy) | elesy == 'not found',
      case_when(
        elety %in% c(paste0("C", 1:99), paste0("C", 1:99, "'")) ~ 'C',
        elety %in% c(paste0("H", 1:99), paste0("H", 1:99, "'"), 'HN11', 'HN12', 'HN13', 'HO1', 'HO2') ~ 'H',
        elety %in% c(paste0("O", 1:99), paste0("O", 1:99, "'"), paste0("OP", 1:99)) ~ 'O',
        elety %in% c(paste0("N", 1:99)) ~ 'N',
        elety %in% c("P", paste0("P", 1:99)) ~ 'P',
        TRUE ~ 'not found'
      ),
      elesy
    )
  )

gc()

            used   (Mb) gc trigger    (Mb)   max used    (Mb)
Ncells   2410536  128.8    4015078   214.5    4015078   214.5
Vcells 710089618 5417.6 2942469788 22449.3 3980524038 30369.0

In [16]:

if (!file.exists('images/longrMD.1000frames.pdb')) {
  write.pdb(
    pdb = pdb.long,
    xyz = pdb.long$xyz[seq(1, nrow(pdb.long$xyz), by = nrow(pdb.long$xyz)/1000),],
    file = 'images/longrMD.1000frames.pdb'
  )
} 

if (!file.exists('images/longuMD.1000frames.pdb')) {
  write.pdb(
    pdb = pdb.unrst.long,
    xyz = pdb.unrst.long$xyz[seq(1, nrow(pdb.unrst.long$xyz), by = nrow(pdb.unrst.long$xyz)/1000),],
    file = 'images/longuMD.1000frames.pdb'
  )
} 


if (!file.exists('images/longnoLDPMD.1000frames.pdb')) {
  write.pdb(
    pdb = pdb.noldp.long,
    xyz = pdb.noldp.long$xyz[seq(1, nrow(pdb.noldp.long$xyz), by = nrow(pdb.noldp.long$xyz)/1000),],
    file = 'images/longnoLDPMD.1000frames.pdb'
  )
} 

if (!file.exists('images/longdoplong7.1000frames.pdb')) {
  write.pdb(
    pdb = doplong7_longtraj,
    xyz = doplong7_longtraj$xyz[seq(1, nrow(doplong7_longtraj$xyz), by = nrow(doplong7_longtraj$xyz)/1000),],
    file = 'images/longdoplong7.1000frames.pdb'
  )
} 

if (!file.exists('images/longdoplong8.1000frames.pdb')) {
  write.pdb(
    pdb = doplong8_longtraj,
    xyz = doplong8_longtraj$xyz[seq(1, nrow(doplong8_longtraj$xyz), by = nrow(doplong8_longtraj$xyz)/1000),],
    file = 'images/longdoplong8.1000frames.pdb'
  )
}

In [17]:

.justify {
text-align: justify !important
}

1 Methods

1.1 System preparation

The system was prepared using the leap program from the Amber24 suite (Case et al. 2023). The OL21 and AMBER General Force Field for organic molecules (Version 1.81) were used to describe the complex (J. Wang et al. 2004; Zgarbová, Šponer, and Jurečka 2021; Love et al. 2023). The structure was explicitly solvated in a truncated octahedral box of water molecules, using the OPC model with the ad hoc Li/Merz ion parameters of atomic ions (12-6 set)(Izadi, Anandakrishnan, and Onufriev 2014; Li et al. 2020), with a minimum of 14 Å between the solute and the box edge. For simulations in absence of salts (case of unrestrained simulations), the system was neutralized by adding 25 Na⁺ counter-cations. In cases where 140 mM NaCl were added to better reflect the SELEX conditions (restrained simulations), the number of required ions (\(N_{\pm}\); here around 35 Na⁺ and 10 Cl^-) was determined following the SLTCAP method (Schmit et al. 2018), using Equation 1 simplified by Machado and Pantano into Equation 2, where \(\nu_w\) is the water volume of the simulation box (around \(3\times 10^5\) Å³ here) in reduced units, \(c_0\) the salt concentration, \(Q\) the total charge of the complex (here -25: 26 phosphates on the aptamer and 1 ammonium group on the dopamine), and \(N_0 = \frac{N_w \times c_0}{55.5}\), with \(N_w\). the number of water molecules in the simulation box (here, around \(9\times10^4\)) (Machado and Pantano 2020).

\[N_{\pm}=\nu_w c_0 e^{\mp ArcSinh(\frac{Q}{2 \nu_w c_0})} \tag{1}\] \[N_{\pm}=N_0\sqrt{1+(\frac{Q}{2N_0})^2 \mp \frac{Q}{2}} \tag{2}\]

Note that the SPLIT method describe by Machado and Pantano cannot be applied as our system does not satisfy the \(N_0 \gg Q\) condition; however it yields identical values in most cases or deviate by a single ion.

1.2 Restrained minimization, heating and molecular dynamics

Using an in-house R script (R Core Team 2023; Wickham et al. 2019), the distance restraints obtained in ARIA were converted to an 8-column format suitable for its processing by the makeDIST_RST function from the sander module of Amber. A custom map file was prepared to define common names for groups of protons sharing a given restraints (e.g. 3 H from a same methyl group). The resulting DISANG restraints file was then applied to all steps below.

All simulation steps were performed with pmemd.cuda (v. 18.0.0) from the CUDA version of AMBER (Götz et al. 2012; Salomon-Ferrer et al. 2013; Le Grand, Götz, and Walker 2013), on an NVIDIA H100 PCIe Tensor core GPU (CUDA version: 12.4) from the DOREMI CALI v3 cluster of the Mésocentre de Calcul Intensif Aquitain (Université de Bordeaux). The system was minimized for 20000 cycles using the steepest descent algorithm for the first 4000 steps and the conjugate gradient for the next 16000 steps. The weights of the restraints were kept constant at 100 kcal.mol^-1.Å^-2. The system was then heated at constant volume from 0 to 298 K over 18 ps then kept for 2 ps at the final temperature, using a time step of 2 fs, the Langevin thermostat with a 2.0 ps^-1 collision frequency and a different seed for the pseudo-random number generation for every run to avoid synchronization artifacts (Sindhikara et al. 2009), an 8 Å non-bonded cutoff, and the bonds involving hydrogen were constrained with the SHAKE algorithm. The system was further equilibrated five times at 298 K with the parameters above and the restraint weights ramping down from 100 to 5 kcal.mol^-1.Å^-2. The pressure was kept at 1 bar with the Berendsen barostat (Berendsen et al. 1984).

Restrained molecular dynamics for subsequent minimization were ran for 10 ns with the parameters above, and restraints weights set at 20 kcal.mol^-1.Å^-2. The final coordinates were further minimized, as described above except for the restraint weights set at 20 kcal.mol^-1.Å^-2. Remaining NMR violations were summarized with the sviol function from the Amber package.

Production MD were run over a microsecond with and without restraints, as well as in absence of dopamine, with the parameters above.

1.3 Data analysis

The bio3d package was used for minimized structure and trajectory files cleanup, alignment, filtering, averaging and analysis (B. J. et al. 2006). The determination of RMSD and RMSF was performed with the rmsd and rmsf functions. Dihedral angles, sugar pucker angles and amplitudes \(\theta_M\) were obtained with in-house R functions leveraging the torsion.xyz function, following Equation 3, where the pucker P is determined by Equation 4 and the sugar torsion angles \(\nu_i\) are defined by four atoms as shown below.

\[ \theta_M = \frac{\nu_2}{\cos P} \tag{3}\]

\[ \tan P = \frac{(\nu_4 + \nu_1)-(\nu_3 + \nu_0)}{2\nu_2(\sin(\frac{\pi}{5}) + \sin(\frac{2\pi}{5}))} \tag{4}\]

\[\nu_0=C4'-O4'-C1'-C2'\] \[\nu_1=O4'-C1'-C2'-C3'\] \[\nu_2=C1'-C2'-C3'-C4'\] \[\nu_3=C2'-C3'-C4'-O4'\] \[\nu_4=C3'-C4'-O4'-C1'\]

All atom-atom and ring-ring distances and angles were measured with the dist.xyz and angle.xyz functions. In-house R scripts were used to infer the formation of H-bonds from these values on full trajectories, by first selecting realistic donor/acceptor atoms for each residue/ligand, then verifying that the donor/acceptor distances and angles were compatible with H-bond formation.

Base pairs were further characterized with DSSR 2.3.2 (Lu, Bussemaker, and Olson 2015; Lu 2020), using the web API through the httr2 R package (Wickham 2023).

Principal component analysis on the final structures and trajectories (all atoms except hydrogen) was performed with the pca function from bio3d. The results were clustered with the k-means method using Euclidean distance and the best number of clusters determined by Nbclust (Charrad et al. 2014). Cross-correlation analysis was carried out with the dccm function of bio3d.

Molecular structures images were created in PyMOL 2.5 (Schrödinger, LLC 2021), interactive molecular structures with mol*, and ligand binding site diagram with LigPlot⁺ (Laskowski and Swindells 2011). All further data processing and all plotting was performed in R 4.3. Apache Arrow was used to write/read processed data files in feather format (Richardson et al. 2024).

2 Results

Hereafter, are described the results for the restrained simulations over 10 ns for the fifteen starting structures, the first of which was also simulated for a microsecond. As a control, the latter was also performed in absernce of restraints.

2.1 Trajectory

2.1.1 Visualization

2.1.1.1 Microsecond simulation

Below can be seen a comparison between the restrained and unrestrained microsecond simulations. One frame every 10 ns was extracted.

2.1.1.2 Production simulation for minimization

All twenty restrained molecular dynamic trajectories (10 ns) can be visualized below. Only one every 5 frames were extracted. Click on the top-left play button to animate the trajectory.

2.1.2 Average structures

In [18]:

# split pdb.long$xyz into 20 matrices of n.frames/20 rows
mean.xyz.list <- split(
  as.data.table(pdb.long$xyz), 
  rep(1:20, each = nrow(pdb.long$xyz) %/% 20)
)
# average each coordinate (per atom/dimension) for each matrix
mean.xyz <- lapply(mean.xyz.list, function(x) as.numeric(colMeans(x)))
# Collapse all numerics into a single matrix of 20 sets of coordinates
mean.xyz <- matrix(unlist(mean.xyz), nrow = 20, byrow = TRUE)

if (!file.exists('images/mean_long_rMD.pdb')) {
  write.pdb(pdb = pdb.long, xyz = mean.xyz, file = 'images/mean_long_rMD.pdb')
}

###

Twenty structures of the microsecond restrained simulation were averaged over 1250 frames of the simulation, without further minimization. It is not incredibly useful as it was not further minimized, but it can be done. The minimized structures can be found in Section 2.2.

2.1.3 RMSD

2.1.3.1 Restrained simulations

The trajectory of almost all fifteen simulations seem to converge after the first hundreds of ps (Figure 1). Only structure 2 requires around 5 ns to converge. Minimization of the coordinates after ten nanoseconds of simulation is appropriate.

In [19]:

#apply rmsd on all objects of xyz.list
rmsd.list <- lapply(1:15, function(i) {
  rmsd(
    a = xyz.list[[i]][1,],
    b = xyz.list[[i]][seq(1, nrow(xyz.list[[i]]), by = 5),],
    fit = FALSE #already aligned at the import stage
  )
})


# bind rows of rmsd.list to make a data.frame, and add a column for the frame number, and a column for the list number
rmsd.df <- do.call(rbind, lapply(1:15, function(i) {
  data.frame(
    frame = seq(1, nrow(xyz.list[[i]]), by = 5),
    rmsd = rmsd.list[[i]],
    structure = i
  ) 
})) %>% 
  mutate(t = frame * info.short$time.per.frame)


#plot rmsds
rmsd.df %>% 
  ggplot(aes(x = t, y = rmsd)) +
  geom_line() +
  custom.theme(0.8) +
  facet_wrap(~structure,
             ncol = 5,
             scales = 'free_x') +
  # scale_x_continuous(n.breaks = 3)  +
  labs(
    x = 't (ns)',
    y = 'RMSD (&angst;)',
  )

Figure 1: RMSD on all residues (including 5’ and 3’-ends) without hydrogens across the restrained simulations (10 ns), where the first frame is used as reference.

The microsecond simulation has a very stable RMSD vs. the first frame.

In [20]:

data.frame(
  rmsd = rmsd(
    a = pdb.long$xyz[1,],
    b = pdb.long$xyz[seq(1, nrow(pdb.long$xyz), by = divider), ],
    fit = FALSE
  ),
  frame = (seq(1, nrow(pdb.long$xyz), by = divider))
) %>% 
  mutate(t = frame * info.long$time.per.frame) %>% 
  ggplot(aes(x = t, y = rmsd)) +
  geom_line() +
  custom.theme(1) +
  labs(
    x = 't (ns)',
    y = 'RMSD (&angst;)',
  )

Figure 2: RMSD on all residues (including 5’ and 3’-ends) without hydrogens across the restrained simulation (1 microsecond), where the first frame is used as reference.

Two frames with similar one-dimensional RMSD are not necessarily similar. Pairwise RMSD better captures the extent of structural diversity across the simulation and avoids the bias from comparing all frames to a single (first) conformer. A few clusters of structures are visible in the RMSD matrices, suggesting that the simulations have converged to a few distinct states (Figure 3). However, there are no very significant variations along the trajectories (RMSD never exceeds ~ 2 Å). The nature of the visited states is investigated for the structure 1 in the sections below.

In [21]:

#calculate only for 1000 frames
rmsd.pairwise <- lapply(1:15, function(i) {
  rmsd(
    a = xyz.list[[i]][seq(1, nrow(xyz.list[[i]]), by = 5),], #keep one in every 5 frames
    fit = FALSE
  ) %>%
    lazy_dt() %>%
    mutate(var1 = 1:nrow(.), .before = 1) %>%
    pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>%
    mutate(var2 = as.numeric(gsub("V", "", var2)),
           structure = i) %>%
    as.data.table()
}) %>%
  rbindlist()

Warning in rmsd(a = xyz.list[[i]][seq(1, nrow(xyz.list[[i]]), by = 5), ], : No indices provided, using the 571 non NA positions

Warning in rmsd(a = xyz.list[[i]][seq(1, nrow(xyz.list[[i]]), by = 5), ], : No indices provided, using the 571 non NA positions

Warning in rmsd(a = xyz.list[[i]][seq(1, nrow(xyz.list[[i]]), by = 5), ], : No indices provided, using the 571 non NA positions

Warning in rmsd(a = xyz.list[[i]][seq(1, nrow(xyz.list[[i]]), by = 5), ], : No indices provided, using the 571 non NA positions

Warning in rmsd(a = xyz.list[[i]][seq(1, nrow(xyz.list[[i]]), by = 5), ], : No indices provided, using the 571 non NA positions

Warning in rmsd(a = xyz.list[[i]][seq(1, nrow(xyz.list[[i]]), by = 5), ], : No indices provided, using the 571 non NA positions

Warning in rmsd(a = xyz.list[[i]][seq(1, nrow(xyz.list[[i]]), by = 5), ], : No indices provided, using the 571 non NA positions

Warning in rmsd(a = xyz.list[[i]][seq(1, nrow(xyz.list[[i]]), by = 5), ], : No indices provided, using the 571 non NA positions

Warning in rmsd(a = xyz.list[[i]][seq(1, nrow(xyz.list[[i]]), by = 5), ], : No indices provided, using the 571 non NA positions

Warning in rmsd(a = xyz.list[[i]][seq(1, nrow(xyz.list[[i]]), by = 5), ], : No indices provided, using the 571 non NA positions

Warning in rmsd(a = xyz.list[[i]][seq(1, nrow(xyz.list[[i]]), by = 5), ], : No indices provided, using the 571 non NA positions

Warning in rmsd(a = xyz.list[[i]][seq(1, nrow(xyz.list[[i]]), by = 5), ], : No indices provided, using the 571 non NA positions

Warning in rmsd(a = xyz.list[[i]][seq(1, nrow(xyz.list[[i]]), by = 5), ], : No indices provided, using the 571 non NA positions

Warning in rmsd(a = xyz.list[[i]][seq(1, nrow(xyz.list[[i]]), by = 5), ], : No indices provided, using the 571 non NA positions

Warning in rmsd(a = xyz.list[[i]][seq(1, nrow(xyz.list[[i]]), by = 5), ], : No indices provided, using the 571 non NA positions

arrow::write_feather(rmsd.pairwise, "data/rmsd.pairwise.feather")

In [22]:

#calculate only for 1000 frames

rmsd.pairwise.long <- rmsd(
  a = pdb.long$xyz[seq(1, nrow(pdb.long$xyz), by = nrow(pdb.long$xyz)/1000),], #keep 1000 frames only
  fit = FALSE
) %>%
  lazy_dt() %>%
  mutate(var1 = 1:nrow(.), .before = 1) %>%
  pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>%
  mutate(var2 = as.numeric(gsub("V", "", var2))) %>%
  as.data.table()

Warning in rmsd(a = pdb.long$xyz[seq(1, nrow(pdb.long$xyz), by = nrow(pdb.long$xyz)/1000), : No indices provided, using the 571 non NA positions

arrow::write_feather(rmsd.pairwise.long, "data/rmsd.pairwise.long.feather")

###

In [23]:

scaling <- 0.75

arrow::read_feather('data/rmsd.pairwise.feather') %>%
  mutate(t1 = var1 * info.short$time.per.frame * 5,
         t2 = var2 * info.short$time.per.frame * 5) %>%
  ggplot(aes(x = t1, y = t2, fill = value)) +
  geom_raster() +
  facet_wrap(~structure,
             ncol = 5,
             scales = 'free') +
  scale_fill_viridis_c(name = 'RMSD (&angst;)') +
  scale_x_continuous(expand = c(0, 0), limits = c(0, NA), 
                     breaks = c(0, 5, 10),
                     name = 't (ns)') +
  scale_y_continuous(expand = c(0, 0), limits = c(0, NA), 
                     breaks = c(0, 5, 10),
                     name = 't (ns)') +
  ggthemes::theme_pander() +
  theme(
    axis.text.x = element_markdown(size = 12 * scaling),
    axis.text.y = element_markdown(size = 12 * scaling),
    axis.title.x = element_markdown(size = 14 * scaling, face = 'bold'),
    axis.title.y = element_markdown(size = 14 * scaling, face = 'bold'),
    axis.line = element_line(linewidth = 0.75 * scaling),
    axis.ticks = element_line(linewidth = 0.75 * scaling),
    legend.text = element_markdown(size = 12 * scaling),
    legend.title = element_markdown(size = 14 * scaling, face = 'bold'),
    strip.text.x = element_markdown(size = 14 * scaling, face = 'bold'),
    strip.text.y = element_markdown(size = 14 * scaling, face = 'bold'),
    strip.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    plot.title = element_markdown(size = 16 * scaling, face = 'bold'),
    plot.subtitle = element_markdown(size = 14 * scaling),
    plot.caption = element_markdown(hjust = 1, size = 10 * scaling)
  ) +
  theme(legend.position = 'bottom') + 
  theme(plot.margin = margin(1, 1, 0, 0, 'lines'))

Figure 3: All-to-all RMSD (all residues and ligand atoms except H) for the fifteen simulations

In [24]:

scaling <- 1.0

arrow::read_feather('data/rmsd.pairwise.long.feather') %>%
  mutate(t1 = var1 * info.long$time.per.frame * nrow(pdb.long$xyz)/1000,
         t2 = var2 * info.long$time.per.frame * nrow(pdb.long$xyz)/1000) %>%
  ggplot(aes(x = t1, y = t2, fill = value)) +
  geom_raster() +
  scale_fill_viridis_c(name = 'RMSD (&angst;)') +
  scale_x_continuous(expand = c(0, 0), limits = c(0, NA), 
                     name = 't (ns)') +
  scale_y_continuous(expand = c(0, 0), limits = c(0, NA), 
                     name = 't (ns)') +
  ggthemes::theme_pander() +
  theme(
    axis.text.x = element_markdown(size = 12 * scaling),
    axis.text.y = element_markdown(size = 12 * scaling),
    axis.title.x = element_markdown(size = 14 * scaling, face = 'bold'),
    axis.title.y = element_markdown(size = 14 * scaling, face = 'bold'),
    axis.line = element_line(linewidth = 0.75 * scaling),
    axis.ticks = element_line(linewidth = 0.75 * scaling, color = 'black'),
    legend.text = element_markdown(size = 12 * scaling),
    legend.title = element_markdown(size = 14 * scaling, face = 'bold'),
    strip.text.x = element_markdown(size = 14 * scaling, face = 'bold'),
    strip.text.y = element_markdown(size = 14 * scaling, face = 'bold'),
    strip.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    plot.title = element_markdown(size = 16 * scaling, face = 'bold'),
    plot.subtitle = element_markdown(size = 14 * scaling),
    plot.caption = element_markdown(hjust = 1, size = 10 * scaling)
  ) +
  theme(legend.position = 'bottom') +
  theme(plot.margin = margin(1, 1, 0, 0, 'lines'))

###

Figure 4: All-to-all RMSD (all residues and ligand atoms except H) for the microsecond simulation

2.1.3.2 Unrestrained MD

The trajectory is relatively stable across the unrestrained microsecond simulation, although with a larger RMSD (Figure 5, Figure 6).

In [25]:

rmsd.pairwise.unrst.long <- rmsd(
  a = pdb.unrst.long$xyz[seq(1, nrow(pdb.unrst.long$xyz), by = nrow(pdb.unrst.long$xyz)/1000),], #keep 1000 frames only
  fit = FALSE
) %>%
  lazy_dt() %>%
  mutate(var1 = 1:nrow(.), .before = 1) %>%
  pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>%
  mutate(var2 = as.numeric(gsub("V", "", var2))) %>%
  as.data.table()

Warning in rmsd(a = pdb.unrst.long$xyz[seq(1, nrow(pdb.unrst.long$xyz), : No indices provided, using the 571 non NA positions

arrow::write_feather(rmsd.pairwise.unrst.long, "data/uMD.rmsd.pairwise.feather")

In [26]:

data.frame(
  rmsd = rmsd(
    a = pdb.unrst.long$xyz[1,],
    b = pdb.unrst.long$xyz[seq(1, nrow(pdb.unrst.long$xyz), by = unrst.divider), ],
    fit = FALSE
  ),
  frame = (seq(1, nrow(pdb.unrst.long$xyz), by = unrst.divider))
) %>% 
  mutate(t = frame * info.long$time.per.frame) %>% 
  ggplot(aes(x = t, y = rmsd)) +
  geom_line() +
  custom.theme(1) +
  labs(
    x = 't (ns)',
    y = 'RMSD (&angst;)',
  )

Figure 5: RMSD on all residues (including 5’ and 3’-ends) without hydrogens across the unrestrained simulation, where the first frame is used as reference.

In [27]:

scaling <- 1.0

arrow::read_feather('data/uMD.rmsd.pairwise.feather') %>%
  mutate(t1 = var1 * info.long.unrst$time.per.frame * nrow(pdb.unrst.long$xyz)/1000,
         t2 = var2 * info.long.unrst$time.per.frame * nrow(pdb.unrst.long$xyz)/1000) %>%
  ggplot(aes(x = t1, y = t2, fill = value)) +
  geom_raster() +
  scale_fill_viridis_c(name = 'RMSD (&angst;)') +
  scale_x_continuous(expand = c(0, 0), limits = c(0, NA), 
                     name = 't (ns)') +
  scale_y_continuous(expand = c(0, 0), limits = c(0, NA), 
                     name = 't (ns)') +
  ggthemes::theme_pander() +
  theme(
    axis.text.x = element_markdown(size = 12 * scaling),
    axis.text.y = element_markdown(size = 12 * scaling),
    axis.title.x = element_markdown(size = 14 * scaling, face = 'bold'),
    axis.title.y = element_markdown(size = 14 * scaling, face = 'bold'),
    axis.line = element_line(linewidth = 0.75 * scaling),
    axis.ticks = element_line(linewidth = 0.75 * scaling, color = 'black'),
    #legend.text = element_markdown(size = 12 * scaling),
    legend.title = element_markdown(size = 14 * scaling, face = 'bold'),
    strip.text.x = element_markdown(size = 14 * scaling, face = 'bold'),
    strip.text.y = element_markdown(size = 14 * scaling, face = 'bold'),
    strip.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    plot.title = element_markdown(size = 16 * scaling, face = 'bold'),
    plot.subtitle = element_markdown(size = 14 * scaling),
    plot.caption = element_markdown(hjust = 1, size = 10 * scaling)
  ) +
  theme(legend.position = 'bottom') +
  theme(plot.margin = margin(1, 1, 0, 0, 'lines'))

##

Figure 6: All-to-all RMSD (all residues and ligand atoms except H) of the unrestrained simulation

2.1.4 PCA

Given the RMSD heatmaps shown above, it is likely that the aptamer visits different conformations. To isolate discrete states from the microsecond simulation, principal component analysis was performed.

In [28]:

#PCA
pca_pdb_traj <- pca.pdbs(
  pdb.long, 
  use.svd = FALSE, 
  rm.gaps = TRUE, 
  fit = FALSE #pdb models are already aligned at the import stage
)

scree <- data.frame(
  pc = 1:length(pca_pdb_traj$L),
  L = pca_pdb_traj$L
) %>% 
  mutate(
    var = L/sum(L) * 100,
    cum.var = cumsum(var)
  ) %>% 
  filter(pc <= 15) %>%
  select(-L) %>%
  mutate(
    label = ifelse(
      pc %in% 1:3 | 
        pc %in% seq(4, nrow(.), 3) | 
        pc == nrow(.), 
      signif(cum.var, 3), NA)
  ) %>% 
  ggplot(., aes(x = pc, y = var)) +
  geom_text_repel(aes(label = label), size = 5, fontface = 'bold', force = 100) +
  geom_line(linewidth = 0.75) +
  geom_point(size = 2) +
  custom.theme(scaling) +
  labs(
    x = 'Number of Principal Components',
    y = 'Proportion of variance (%)'
  ) 

###

In [29]:

pca.nb.traj <- NbClust(
  data.frame(pca_pdb_traj$z) %>% select(all_of(1:4)),
  distance = "euclidean",
  min.nc = 2, max.nc = 5,
  method = "kmeans"
)

*** : The Hubert index is a graphical method of determining the number of clusters.
                In the plot of Hubert index, we seek a significant knee that corresponds to a 
                significant increase of the value of the measure i.e the significant peak in Hubert
                index second differences plot.

*** : The D index is a graphical method of determining the number of clusters. 
                In the plot of D index, we seek a significant knee (the significant peak in Dindex
                second differences plot) that corresponds to a significant increase of the value of
                the measure. 
 
******************************************************************* 
* Among all indices:                                                
* 10 proposed 2 as the best number of clusters 
* 12 proposed 3 as the best number of clusters 
* 1 proposed 4 as the best number of clusters 
* 1 proposed 5 as the best number of clusters 

                   ***** Conclusion *****                            
 
* According to the majority rule, the best number of clusters is  3 
 
 
*******************************************************************

###

In [30]:

pca.plotr.traj <- function(pca_object, dim.1, dim.2, 
                           nb = pca.nb.traj,
                           scale = scaling){
  
  scores <- pca_object$z %>% 
    as_tibble() %>% 
    set_names(paste0('PC', 1:ncol(.))) %>%
    mutate(State = 1:n(), .before = 1) %>% 
    mutate(cluster = nb$Best.partition) %>% 
    select(State, cluster, !!sym(dim.1), !!sym(dim.2))
  
  # print(scores)
  
  # Function to calculate convex hull coordinates for each cluster
  get_hull_coordinates <- function(cluster_data) {
    hull_indices <- chull(cluster_data$PC1, cluster_data$PC2)
    hull_coordinates <- cluster_data[hull_indices, ]
    return(hull_coordinates)
  }
  
  # Calculate hull coordinates for each cluster
  renamed_scores <- scores %>% magrittr::set_colnames(c('State', 'cluster', 'PC1', 'PC2'))
  hull_coordinates <- do.call(rbind, lapply(split(renamed_scores, renamed_scores$cluster), get_hull_coordinates)) %>% 
    magrittr::set_colnames(c('State', 'cluster', dim.1, dim.2))
  
  
  # Calculate the variance explained by each PC
  var <- signif(pca_object$L/sum(pca_object$L) * 100, 2)[c(as.numeric(gsub('PC', '', dim.1)), as.numeric(gsub('PC', '', dim.2)))]
  
  representative_states <- renamed_scores %>%
    left_join(
      renamed_scores %>%
        group_by(cluster) %>%
        summarize(
          centroid_PC1 = mean(PC1),
          centroid_PC2 = mean(PC2)
        ), 
      by = "cluster") %>%
    mutate(
      distance_to_centroid = sqrt((PC1 - centroid_PC1)^2 + (PC2 - centroid_PC2)^2)
    ) %>%
    group_by(cluster) %>%
    slice_min(distance_to_centroid) %>% 
    pull(State)
  
  # Plot the scores and color by cluster assignment
  pca_plot <- hull_coordinates %>% 
    ggplot(., aes_string(x = dim.1, y = dim.2)) +
    geom_polygon(
      aes(group = cluster, fill = factor(cluster), color = factor(cluster)),
      alpha = 0.05,
      linewidth = 0.75
    ) +
    geom_point(
      mapping = aes(color = factor(cluster)),
      data = scores,
      alpha = 0.2
    ) +
    geom_text_repel(
      data = scores %>% 
        filter(State %in% representative_states),
      aes(label = State),
      size = 5,
      fontface = 'bold',
      force = 100
    ) +
    geom_point(
      data = scores %>% 
        filter(State %in% representative_states),
      aes(fill = factor(cluster)),
      size = 3, shape = 21, color = 'black', linewidth = 0.75
    ) +
    custom.theme(scale) +
    labs(
      x = glue::glue(dim.1, ' (', var[1], '%)'),
      y =  glue::glue(dim.2, ' (', var[2], '%)')
    ) +
    scale_color_d3(name = 'Cluster') +
    scale_fill_d3(name = 'Cluster')
  
  return(pca_plot)
  
}

In [31]:

p.loads.traj <- data.frame(
  u = c(pca_pdb_traj$U[,1], pca_pdb_traj$U[,2], pca_pdb_traj$U[,3]),
  eleno = rep(pdb.long$atom$eleno, 3),
  PC = c(rep('PC1', length(pca_pdb_traj$U[,1])),
         rep('PC2', length(pca_pdb_traj$U[,2])),
         rep('PC3', length(pca_pdb_traj$U[,3])))
) %>% 
  left_join(
    pdb.long$atom,
    by = c("eleno" = "eleno")
  ) %>% 
  group_by(resno, resid, PC, chain) %>%
  summarise(u = sum(abs(u))) %>% 
  mutate(resno = if_else(resid == 'LDP', 38, resno),
         resid = gsub('^D', '', resid),
         label = if_else(
           resid == 'LDP', 
           #make LDP bold and pink
           paste0('<span style="color:pink"><b>', resid, '</b></span>'),
           paste0('<b>', resid, resno, '</b>')
         )
  ) %>%
  filter(PC %in% paste0('PC', 1:2)) %>% 
  ggplot(aes(
    x = factor(
      resno,
      levels = unique(resno),
      labels = unique(label)
    ),
    y = u,
    fill = chain
  )
  ) +
  facet_wrap(~PC, ncol = 1) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = c('grey', 'pink')) +
  scale_y_continuous(expand = c(0, 0)) +
  custom.theme(1) +
  theme(axis.title.x = element_blank(),
        legend.position = 'none') +
  labs(y = 'sum of absolute loadings')

`summarise()` has grouped output by 'resno', 'resid', 'PC'. You can override
using the `.groups` argument.

###

In [32]:

p.pca <- scree +
  pca.plotr.traj(pca_pdb_traj, 'PC1', 'PC2', nb = pca.nb.traj) +
  p.loads.traj +
  plot_layout(
    design = '
              AB
              CC
              ') &
  plot_annotation(tag_levels = c('A', 'B', 'C', 'D'))

Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
`.name_repair` is omitted as of tibble 2.0.0.
ℹ Using compatibility `.name_repair`.

Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.

Warning in geom_point(data = scores %>% filter(State %in%
representative_states), : Ignoring unknown parameters: `linewidth`

###

In [33]:

p.pca

Warning: Removed 7 rows containing missing values (`geom_text_repel()`).

###

Figure 7: Principal component analysis on the coordinates of the restrained microsecond trajectory. Scree plot (A) showing the contribution of each principal component on the total variance and the cumulative variance labelled on selected data points. (B) Score plots along the first two principal components, colored by kmeans clusters. (C) Sum of absolute loadings of residues for the first two first principal components

In [34]:

#if (!file.exists('data/representative_states_pca_microsec.pdb')) {
write.pdb(
  pdb = pdb.long,
  xyz = pdb.long$xyz[c(10062, 8099, 12207),],
  file = paste0('data/representative_states_pca_microsec.pdb')
)
# }

###

From the first four principal components, the data is best clustered in three groups. The states closest to the centroids of each cluster are shown below. The largest differences are at the 3’-end region (G36:T37 and G13:A14, different in cluster 3), the T23 chain reversal loop (different in cluster 1), and - to a lower extent - the G25. This correlates well with the root mean squared flucturations measured during the simulation (Figure 8)

Representative structures of the two PCA clusters. Differences are highlighted.

In [35]:

rmsf <- rmsf(pdb.long$xyz)

labels <- pdb.long$atom %>% 
  select(resid, resno, eleno) %>%
  group_by(resno) %>%
  filter(eleno == round(mean(eleno), 0)) %>% 
  mutate(residue = if_else(
    resno < 38,
    paste0(gsub('D', '', resid), resno),
    resid
  ))

rmsf.df <- tibble(
  rmsf = rmsf
) %>% 
  mutate(eleno = 1:nrow(.)) %>%
  left_join(pdb.long$atom, by = c('eleno' = 'eleno'))

rmsf.df %>% 
  filter(resno < 38) %>% 
  ggplot(aes(x = eleno, y = rmsf, fill = factor(resno), color = factor(resno))) +
  geom_vline(
    data = pdb.long$atom %>%
      select(resno, eleno) %>%
      filter(resno < 38) %>% 
      group_by(resno) %>%
      filter(eleno == max(eleno)),
    aes(xintercept = eleno),
    color = 'grey', linetype = 'dotted'
  ) +
  geom_text(
    data = labels %>% filter(resno < 38),
    aes(label = residue, y = 1.05*max(rmsf)),
    fontface = 'bold',
    show.legend = FALSE
  ) +
  geom_text(
    data = labels %>% filter(resno == 38),
    aes(label = residue, y = 1.05*max(rmsf)),
    fontface = 'bold',
    show.legend = FALSE,
    color = 'pink'
  ) +
  geom_col(
    show.legend = FALSE,
    width = 1.2,
    alpha = 0.9
  ) +
  geom_col(
    data = rmsf.df %>% filter(resno == 38),
    color = 'pink', fill = 'pink',
    show.legend = FALSE,
    width = 1.2,
    alpha = 0.9
  ) +
  scale_x_continuous(expand = c(0, 0), limits = c(0,NA)) +
  scale_y_continuous(expand = c(0, 0), limits = c(0,NA)) +
  scale_color_viridis_d(option = 'D') +
  scale_fill_viridis_d(option = 'D') +
  labs(
    x = 'Atom number',
    y = 'RMSF (Å)'
  ) +
  custom.theme(1) +
  coord_cartesian(clip = 'off')

Warning: `position_stack()` requires non-overlapping x intervals
`position_stack()` requires non-overlapping x intervals

###

Figure 8: Root mean squared fluctuations (RMSF), highlighting the residues with the largest mobility during the restrained simulation.

2.1.5 Cross-correlation analysis

The correlation of atomic movements was assessed by calculation of the atom-wise cross-correlation matrix, shown in Figure 9 as a heatmap. Negative values indicate that the atoms/residues move in opposite directions.

Here, it is possible to observe a positive correlation of the dopamine binding site residues T18 and T19 H-bonded to the cathecol, A27 and G28 to the ammonium, and the stacked C26, T32 and G33 (see Section 2.5 below).

In [36]:

cij <- dccm(pdb.long$xyz)

cij.atoms <- cij %>% 
  lazy_dt() %>% 
  mutate(atom1 = 1:nrow(.), .before = 1) %>%
  pivot_longer(cols = -atom1, names_to = "atom2", values_to = "value") %>% 
  mutate(atom2 = as.numeric(gsub("V", "", atom2))) %>% 
  left_join(pdb.long$atom %>% 
              select(eleno, resno, resid), 
            by = c("atom1" = "eleno")) %>%
  rename(resno1 = resno, resid1 = resid) %>%
  left_join(pdb.long$atom %>% 
              select(eleno, resno, resid), 
            by = c("atom2" = "eleno")) %>%
  rename(resno2 = resno, resid2 = resid) %>% 
  filter(resid1 != 'LDP', resid2 != 'LDP') %>%
  as.data.table()



p.cij.atoms <- ggplot(cij.atoms, aes(x = atom1, y = atom2, fill = value)) +
  geom_raster() +
  scale_fill_gradientn(
    colors = hcl.colors(n = 1E5, palette = "Tropic"),
    limits = c(-1, 1),
    name = ''
  ) +
  scale_x_continuous(expand = c(0,0)) +
  scale_y_continuous(expand = c(0,0)) +
  custom.theme(1) +
  coord_cartesian(clip = "off") +
  labs(x = "Atom", y = "Atom")


cij.res <- cij.atoms %>%
  group_by(resno1, resno2) %>%
  summarise(value = mean(value))

`summarise()` has grouped output by 'resno1'. You can override using the
`.groups` argument.

p.cij.res <- ggplot(cij.res, aes(x = resno1, y = resno2, fill = value)) +
  geom_raster(show.legend = FALSE) +
  scale_fill_gradientn(
    colors = hcl.colors(n = 1E5, palette = "Tropic"),
    limits = c(-1, 1)
  ) +
  scale_x_continuous(expand = c(0,0), breaks = 11:37) +
  scale_y_continuous(expand = c(0,0), breaks = 11:37) +
  custom.theme(1) +
  coord_cartesian(clip = "off") +
  labs(x = "Residue", y = "Residue")



p.cij.atoms + p.cij.res +
  plot_layout(guides = "collect", ncol = 2) +
  plot_annotation(
    tag_levels = c('A', 'B')
  ) &
  theme(legend.position = 'bottom')

###

Figure 9: Dynamical cross-correlation map (DCCM) of the microsecond simulation across atoms (A) or residues (B)

2.1.6 Domain analysis

The identification of geometrically rigid regions of the aptamer in the microsecond simulation was performed with GeoStaS algorithm (Romanowska, Nowiński, and Trylska 2012). In Figure 10, we see that most of the aptamer is rigid, with the exception of the T23 loop and, to a lower extent, the nearby T30 loop.

In [37]:

gs <- geostas(
  pdb.long$xyz[seq(1, nrow(pdb.long$xyz), by = nrow(pdb.long$xyz)/1000),], 
  fit = FALSE
)

  .. 'xyz' coordinate data with 1000 frames 
  .. coordinates are not superimposed prior to geostas calculation
  .. calculating atomic movement similarity matrix ('amsm.xyz()') 
  .. dimensions of AMSM are 571x571
  .. clustering AMSM using 'kmeans'

###

In [38]:

gs.amsm <- gs$amsm %>%
  lazy_dt() %>% 
  mutate(atom1 = 1:nrow(.), .before = 1) %>%
  pivot_longer(cols = -atom1, names_to = "atom2", values_to = "value") %>% 
  mutate(atom2 = as.numeric(gsub("V", "", atom2))) %>% 
  left_join(pdb.long$atom %>% 
              select(eleno, resno, resid), 
            by = c("atom1" = "eleno")) %>%
  rename(resno1 = resno, resid1 = resid) %>%
  left_join(pdb.long$atom %>% 
              select(eleno, resno, resid), 
            by = c("atom2" = "eleno")) %>%
  rename(resno2 = resno, resid2 = resid) %>% 
  filter(resid1 != 'LDP', resid2 != 'LDP') %>%
  as.data.table()

p.gs.amsm <- ggplot(gs.amsm, aes(x = atom1, y = atom2, fill = value)) +
  geom_raster() +
  scale_fill_viridis_c(direction = -1, name = '') +
  scale_x_continuous(expand = c(0,0)) +
  scale_y_continuous(expand = c(0,0)) +
  custom.theme(1) +
  coord_cartesian(clip = "off") +
  labs(x = "Atom", y = "Atom")


gs.amsm.res <- gs.amsm %>%
  group_by(resno1, resno2) %>%
  summarise(value = mean(value))

`summarise()` has grouped output by 'resno1'. You can override using the
`.groups` argument.

p.gs.amsm.res <- ggplot(gs.amsm.res, aes(x = resno1, y = resno2, fill = value)) +
  geom_raster(show.legend = FALSE) +
  scale_fill_viridis_c(direction = -1, name = '') +
  scale_x_continuous(expand = c(0,0), breaks = 11:37) +
  scale_y_continuous(expand = c(0,0), breaks = 11:37) +
  custom.theme(1) +
  coord_cartesian(clip = "off") +
  labs(x = "Residue", y = "Residue") 

p.gs.amsm + p.gs.amsm.res +
  plot_layout(guides = "collect", ncol = 2) +
  plot_annotation(
    tag_levels = c('A', 'B')
  ) &
  theme(legend.position = 'bottom')

###

Figure 10: Atomic movement similarity matrix obtained with the GeoStaS analysis for the microsecond simulation across atoms (A) or residues (B). The calculation was carried out on 1000 evenly-spaced frames.

2.2 Minimized structures

In [39]:

rmsd.interstates <- rmsd(pdb, 
                         a.inds = atom.select(pdb),
                         b.inds = atom.select(pdb),
                         fit = TRUE) %>% 
  as.data.frame() %>%
  magrittr::set_colnames(as.character(1:15)) %>%
  mutate(State = 1:15, .before = 1) %>% 
  #replace all 0 with NA in columns 2:16
  mutate(across(2:16, ~ifelse(. == 0, NA_real_, .))) %>%
  rbind(c(NA_real_, sapply(.[, -1], mean, na.rm = TRUE))) %>% 
  mutate(State = ifelse(is.na(State), 'Mean', as.character(State)))

# Mean RMSD
mean.rmsd.doprstslt <- rmsd.interstates[rmsd.interstates$State == "Mean", 2:16] %>% 
  #collapse into a single vector
  unlist() %>% 
  mean(.) %>% 
  round(1)

# Std. dev. of RMSD
sd.rmsd.doprstslt <- rmsd.interstates[rmsd.interstates$State == "Mean", 2:16] %>% 
  #collapse into a single vector
  unlist() %>% 
  sd(.) %>% 
  round(2)

2.2.1 Structures and energies

The fifteen minimized structures of the dopamine aptamer obtained by restrained molecular dynamics are displayed in Figure 11 (mean RMSD across structures: 1.3 \(\pm\) 0.14 Å). The longer sequence (with duplex stem at the bottom) are shown alongside (also minimized after 10 ns rMD ; see Section 2.7).

Figure 11: Fifteen minimized structures obtained by restrained molecular dynamics and minimization (left) and corresponding structures for the longer sequence calculated with and without A38•T37 base pairing. The dopamine ligand is shown in yellow, guanines in tan, thymines in green, adenines in blue and cytosine in purple.

The summary of the simulation energies is given in Table 1.

The inter-states RMSD are given in Table 2

In [40]:

In [41]:

# list all files in sander/out with the format 5_min_doprstslt**.mdinfo where ** are numbers
files <- list.files('data/', pattern = "5_min_doprstslt[0-9]+.mdinfo", full.names = TRUE)

# function to read the mdinfo file
mdinfo.reader <- function(input.file) {
  
  blob <- fread(input.file, skip = 5, fill = TRUE) %>% 
    as.data.frame()
  
  mdinfo <- data.frame(
    file = basename(input.file),
    bond = blob[1,3],
    angle = blob[1,6],
    dihedral = blob[1,9],
    vdw = blob[2, 3],
    eel = blob[2, 6],
    hbond = blob[2, 9],
    vdw14 = blob[3, 4],
    eel14 = blob[3, 8],
    restraint = blob[3, 11],
    eamber = blob[4, 3],
    bond.rst = blob[5, 5],
    angle.rst = blob[5, 8],
    torsion.rst = blob[5, 11]
  ) %>% 
    mutate(
      structure = gsub('.mdinfo', '', gsub("5_min_doprstslt", "", file)),
      .before = 1
    ) %>% 
    select(structure, eamber, restraint, bond, angle, dihedral, vdw, eel,
           vdw14, eel14) %>% 
    mutate(across(2:ncol(.), as.double))
  
  return(mdinfo)
  
}

# read and bind all files
energies <- do.call(rbind, lapply(files, mdinfo.reader))

digits <- energies %>% 
  pivot_longer(2:ncol(.), names_to = "energy", values_to = "value") %>%
  group_by(energy) %>%
  summarise(rst = sd(value)/mean(value)) %>% 
  mutate(
    digits = case_when(
      abs(rst) < 0.001 ~ 5,
      abs(rst) < 0.01 ~ 4,
      abs(rst) < 0.1 ~ 3,
      abs(rst) < 1 ~ 2,
      TRUE ~ 1
    )
  )

DT::datatable(
  energies %>% 
    filter(structure <= 15),
  filter = 'top', 
  extensions = c('Buttons', 'FixedHeader', 'Scroller'),   
  options = list(     
    dom = 'Bfrtip',     
    buttons = c('copy', 'csv', 'excel', 'pdf'),     
    scrollX = TRUE,     
    scrollY = 450,     
    pageLength = 20   
  ),
  #change column names
  colnames = c(
    'Structure', 'Amber energy', 'NMR restraints', 'Bond', 'Angle', 
    'Dihedral', 'VdW', 'EEL', '1-4 VdW', '1-4 EEL'
  ),
  style = "bootstrap"
) %>%  
  formatSignif(columns = 'eamber', digits = digits %>% filter(energy == 'eamber') %>% pull(digits)) %>%
  formatSignif(columns = 'restraint', digits = digits %>% filter(energy == 'restraint') %>% pull(digits)) %>%
  formatSignif(columns = 'vdw', digits = digits %>% filter(energy == 'vdw') %>% pull(digits)) %>%
  formatSignif(columns = 'eel', digits = digits %>% filter(energy == 'eel') %>% pull(digits)) %>%
  formatSignif(columns = 'bond', digits = digits %>% filter(energy == 'bond') %>% pull(digits)) %>%
  formatSignif(columns = 'angle', digits = digits %>% filter(energy == 'angle') %>% pull(digits)) %>%
  formatSignif(columns = 'dihedral', digits = digits %>% filter(energy == 'dihedral') %>% pull(digits)) %>%
  formatSignif(columns = 'vdw14', digits = digits %>% filter(energy == 'vdw14') %>% pull(digits)) %>%
  formatSignif(columns = 'eel14', digits = digits %>% filter(energy == 'eel14') %>% pull(digits))
####

Table 1: Energies

In [42]:

In [43]:

brks <- quantile(rmsd.interstates[-1], probs = seq(0.005, 0.995, .0025), na.rm = TRUE)

rmsd.interstates %>% 
  DT::datatable(
    extensions = c('Buttons', 'FixedHeader', 'Scroller'),  
    rownames = FALSE,
    options = list(     
      dom = 'Brt',
      buttons = c('copy', 'csv', 'excel', 'prmsd.interstates'),     
      scrollX = TRUE,   
      pageLength = 16   
    ),   
    style = "bootstrap"
  ) %>% 
  formatRound(columns = 2:16, digits = 2) %>%
  formatStyle(
    columns = 1,
    fontWeight = 'bold'
  ) %>% 
  formatStyle(
    columns = 1:16,
    target = 'row',
    fontWeight = styleEqual(c('Mean'), c('bold'))
  ) %>% 
  formatStyle(
    columns = 2:16,
    background = styleInterval(
      quantile(rmsd.interstates[-1], probs = seq(0.005, 0.995, .0025), na.rm = TRUE), 
      c('steelblue', 
        colorRampPalette(c('steelblue', 'white', 'tomato'))(length(brks) - 1),
        'tomato')
    )
  ) %>% 
  formatStyle(
    columns = 1:16,
    textAlign = 'center'
  ) 
###

Table 2: Pairwise RMSD between the top 15 structures

2.2.2 PCA

It is apparent that some structures deviate more from others, e.g. 1, 4, and 8 and, to a lower extent, 9,12 and 14. We aimed at identifying clusters of structures to summarize the conformational space explored by the aptamer. Below are shown the results of a principal component analysis based on all atoms except hydrogen atoms.

In [44]:

pca_pdb <- pca.pdbs(
  pdb, 
  use.svd = FALSE, 
  rm.gaps = TRUE, 
  fit = FALSE #pdb models are already aligned at the import stage
)

scree <- data.frame(
  pc = 1:length(pca_pdb$L),
  L = pca_pdb$L
) %>% 
  mutate(
    var = L/sum(L) * 100,
    cum.var = cumsum(var)
  ) %>% 
  filter(pc <= 15) %>%
  select(-L) %>%
  mutate(
    label = ifelse(
      pc %in% 1:3 | 
        pc %in% seq(4, nrow(.), 3) | 
        pc == nrow(.), 
      signif(cum.var, 3), NA)
  ) %>% 
  ggplot(., aes(x = pc, y = var)) +
  geom_text_repel(aes(label = label), size = 5, fontface = 'bold', force = 100) +
  geom_line(linewidth = 0.75) +
  geom_point(size = 2) +
  custom.theme(scaling) +
  labs(
    x = 'Number of Principal Components',
    y = 'Proportion of variance (%)'
  ) 


pca.plotr <- function(pca_object, dim.1, dim.2, 
                      max.pc = 4, meth = "kmeans", dist = "euclidean", 
                      min_nc = 3, max_nc = 7,
                      scale = scaling){
  
  # Run NbClust to get the best number of clusters and cluster assignment
  nb <- NbClust(
    data.frame(pca_object$z) %>% select(all_of(1:max.pc)), 
    distance = dist, 
    min.nc = min_nc, max.nc = max_nc, 
    method = meth
  )
  
  # Extract and format scores
  scores <- pca_object$z %>% 
    as_tibble() %>% 
    set_names(paste0('PC', 1:ncol(.))) %>%
    mutate(State = 1:n(), .before = 1) %>% 
    mutate(cluster = nb$Best.partition) %>% 
    select(State, cluster, !!sym(dim.1), !!sym(dim.2))
  
  # print(scores)
  
  # Function to calculate convex hull coordinates for each cluster
  get_hull_coordinates <- function(cluster_data) {
    hull_indices <- chull(cluster_data$PC1, cluster_data$PC2)
    hull_coordinates <- cluster_data[hull_indices, ]
    return(hull_coordinates)
  }
  
  # Calculate hull coordinates for each cluster
  renamed_scores <- scores %>% magrittr::set_colnames(c('State', 'cluster', 'PC1', 'PC2'))
  hull_coordinates <- do.call(rbind, lapply(split(renamed_scores, renamed_scores$cluster), get_hull_coordinates)) %>% 
    magrittr::set_colnames(c('State', 'cluster', dim.1, dim.2))
  
  
  # Calculate the variance explained by each PC
  var <- signif(pca_object$L/sum(pca_object$L) * 100, 2)[c(as.numeric(gsub('PC', '', dim.1)), as.numeric(gsub('PC', '', dim.2)))]
  
  # Plot the scores and color by cluster assignment
  pca_plot <- hull_coordinates %>% 
    ggplot(., aes_string(x = dim.1, y = dim.2)) +
    geom_polygon(
      aes(group = cluster, fill = factor(cluster), color = factor(cluster)),
      alpha = 0.2
    ) +
    geom_point(
      mapping = aes(color = factor(cluster)),
      data = scores
    ) +
    geom_text_repel(
      mapping = aes(label = State, color = factor(cluster)),
      data = scores,
      size = 5,
      fontface = 'bold',
      show.legend = FALSE
    ) +
    custom.theme(scale) +
    labs(
      x = glue::glue(dim.1, ' (', var[1], '%)'),
      y =  glue::glue(dim.2, ' (', var[2], '%)')
    ) +
    scale_color_d3(name = 'Cluster') +
    scale_fill_d3(name = 'Cluster')
  
  return(pca_plot)
  
}

p.pca <- scree +
  pca.plotr(pca_pdb, 'PC1', 'PC2') +
  pca.plotr(pca_pdb, 'PC1', 'PC3') +
  pca.plotr(pca_pdb, 'PC2', 'PC3') +
  plot_layout(guides = 'collect') &
  plot_annotation(tag_levels = c('A', 'B', 'C', 'D'))

Warning in pf(beale, pp, df2): NaNs produced

*** : The Hubert index is a graphical method of determining the number of clusters.
                In the plot of Hubert index, we seek a significant knee that corresponds to a 
                significant increase of the value of the measure i.e the significant peak in Hubert
                index second differences plot.

*** : The D index is a graphical method of determining the number of clusters. 
                In the plot of D index, we seek a significant knee (the significant peak in Dindex
                second differences plot) that corresponds to a significant increase of the value of
                the measure. 
 
******************************************************************* 
* Among all indices:                                                
* 6 proposed 3 as the best number of clusters 
* 3 proposed 4 as the best number of clusters 
* 8 proposed 5 as the best number of clusters 
* 4 proposed 6 as the best number of clusters 
* 2 proposed 7 as the best number of clusters 

                   ***** Conclusion *****                            
 
* According to the majority rule, the best number of clusters is  5 
 
 
*******************************************************************

Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
`.name_repair` is omitted as of tibble 2.0.0.
ℹ Using compatibility `.name_repair`.

Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
ℹ Please use tidy evaluation idioms with `aes()`.
ℹ See also `vignette("ggplot2-in-packages")` for more information.

Warning in pf(beale, pp, df2): NaNs produced

*** : The Hubert index is a graphical method of determining the number of clusters.
                In the plot of Hubert index, we seek a significant knee that corresponds to a 
                significant increase of the value of the measure i.e the significant peak in Hubert
                index second differences plot.

*** : The D index is a graphical method of determining the number of clusters. 
                In the plot of D index, we seek a significant knee (the significant peak in Dindex
                second differences plot) that corresponds to a significant increase of the value of
                the measure. 
 
******************************************************************* 
* Among all indices:                                                
* 6 proposed 3 as the best number of clusters 
* 3 proposed 4 as the best number of clusters 
* 8 proposed 5 as the best number of clusters 
* 4 proposed 6 as the best number of clusters 
* 2 proposed 7 as the best number of clusters 

                   ***** Conclusion *****                            
 
* According to the majority rule, the best number of clusters is  5 
 
 
*******************************************************************

Warning in pf(beale, pp, df2): NaNs produced

*** : The Hubert index is a graphical method of determining the number of clusters.
                In the plot of Hubert index, we seek a significant knee that corresponds to a 
                significant increase of the value of the measure i.e the significant peak in Hubert
                index second differences plot.

*** : The D index is a graphical method of determining the number of clusters. 
                In the plot of D index, we seek a significant knee (the significant peak in Dindex
                second differences plot) that corresponds to a significant increase of the value of
                the measure. 
 
******************************************************************* 
* Among all indices:                                                
* 6 proposed 3 as the best number of clusters 
* 3 proposed 4 as the best number of clusters 
* 8 proposed 5 as the best number of clusters 
* 4 proposed 6 as the best number of clusters 
* 2 proposed 7 as the best number of clusters 

                   ***** Conclusion *****                            
 
* According to the majority rule, the best number of clusters is  5 
 
 
*******************************************************************

In [45]:

#plot
p.pca

Warning: Removed 7 rows containing missing values (`geom_text_repel()`).

Figure 12: Principal component analysis on the coordinates of the 15 minimized structures. Scree plot (A) showing the contribution of each principal component on the total variance and the cumulative variance labelled on selected data points, and (B) to (D): score plots along two dimensions for the first three principal components. Clusters determined by kmeans are colored.

In [46]:

data.frame(
  u = c(pca_pdb$U[,1], pca_pdb$U[,2], pca_pdb$U[,3]),
  eleno = rep(pdb$atom$eleno, 3),
  PC = c(rep('PC1', length(pca_pdb$U[,1])),
         rep('PC2', length(pca_pdb$U[,2])),
         rep('PC3', length(pca_pdb$U[,3])))
) %>% 
  left_join(
    pdb$atom,
    by = c("eleno" = "eleno")
  ) %>% 
  group_by(resno, resid, PC, chain) %>%
  summarise(u = sum(abs(u))) %>% 
  mutate(resno = if_else(resid == 'LDP', 38, resno),
         resid = gsub('^D', '', resid),
         label = if_else(
           resid == 'LDP', 
           #make LDP bold and pink
           paste0('<span style="color:tomato"><b>', resid, '</b></span>'),
           paste0('<b>', resid, resno, '</b>')
         )
  ) %>%
  ggplot(aes(
    x = factor(
      resno,
      levels = unique(resno),
      labels = unique(label)
    ),
    y = u,
    fill = chain
  )
  ) +
  facet_wrap(~PC, ncol = 1) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = c('grey', 'pink')) +
  custom.theme(1) +
  theme(axis.title.x = element_blank(),
        legend.position = 'none') +
  labs(y = 'sum of absolute loadings')

`summarise()` has grouped output by 'resno', 'resid', 'PC'. You can override
using the `.groups` argument.

###

Figure 13: Sum of absolute loadings of residues for the first three first principal components

As expected from the analysis of trajectories, the key differences between the clusters are the loop thymine T23, the 3’end, and G25.

Minimized structures colored by clusters

2.2.3 NMR violations

In [47]:

rstslt.violations <- rst.error.batchr(input.file = 'data/nmr_viol_doprstslt.txt', error.thresh = 0.15, occurence.thresh = 0, states = 15)


── Column specification ────────────────────────────────────────────────────────
cols(
  .default = col_character(),
  X3 = col_double(),
  X9 = col_double(),
  X26 = col_double(),
  X29 = col_double(),
  X31 = col_double(),
  X32 = col_double()
)
ℹ Use `spec()` for the full column specifications.

Warning: There were 15 warnings in `mutate()`.
The first warning was:
ℹ In argument: `state.1 = .Primitive("as.double")(state.1)`.
Caused by warning:
! NAs introduced by coercion
ℹ Run `dplyr::last_dplyr_warnings()` to see the 14 remaining warnings.


── Column specification ────────────────────────────────────────────────────────
cols(
  X1 = col_double(),
  X2 = col_character(),
  X3 = col_character(),
  X4 = col_double(),
  X5 = col_character(),
  X6 = col_character(),
  X7 = col_double(),
  X8 = col_double()
)

writexl::write_xlsx(rstslt.violations$violations, 'data/rstslt_all_violations.xlsx')
writexl::write_xlsx(rstslt.violations$summary, 'data/rstslt_summary_violations.xlsx')

The average violations observed across the 20 minimized structures, with a cut-off of 0.15 Å are presented in Table 3. No violation exceeds a mean error of 0.25 Å. Out of the 10 violations above the cutoff, 8 are present in more than half of the structures.

Dopamine is involved in 0 violation(s).

In [48]:

In [49]:

DT::datatable(   
  rstslt.violations$summary %>%     
    filter(mean.error > 0.15) %>% 
    mutate(
      atom.1 = gsub('HAD', 'H2', atom.1)
    ),   
  filter = 'top',
  extensions = c('Buttons', 'FixedHeader', 'Scroller'),   
  options = list(     
    dom = 'Bfrtip',     
    buttons = c('copy', 'csv', 'excel', 'pdf'),     
    scrollX = TRUE,     
    scrollY = 400,     
    pageLength = 10   
  ),   
  #change column names   
  colnames = c(
    'Atom 1', 'Atom 2', 'Target', 'Mean error (&angst;)',
    'SD error (&angst;)', 'Occurence'
  ),
  style = "bootstrap"
)

Table 3: NMR restraint violations (mean error > 0.15 Å)

2.2.4 Base pairs

The base pairs identified in the 20 minimized structures are named, where possible, in Table 4, and further described using three nomenclatures.

The Saenger nomeclature is a list of 28 (plus one added later) base pairs identified by Roman numerals (Saenger 1984; Gesteland, Cech, and Atkins 2006). A canonical Watson-Crick base pair is XIX for GC, and XX for AT.

The Leontis-Westhof classification (LEONTIS and WESTHOF 2001), latter expanded by Lemieux and Major (Lemieux 2002), describes base-pairing by considering whether H-bonds are established on the Watson-Crick, Hoogsteen or sugar edges, and what is the glycosidic orientation (cis/trans: c/t) of the nucleotides. A canonical Watson-Crick base pair would then be cWW. Note that the sugar edge has been defined with RNA in mind, and therefore with 2’-OH that are absent here. This sugar edge is absent for syn glycosidic dihedral angles (see section Section 2.3 below), which is only the case of G31 here.

The DSSR classification is similar to the Leontis-Westhof, as it indicates the cis/trans (c/t) orientation followed by the interacting edges, which can be Watson-Crick (W), major (M; ~Hoogsteen) and minor grooves (m; ~sugar). It additionally incorporate the +/- base ’face indication as in the Saenger nomenclature. Here, a canonical Watson-Crick base pair is called cW-W.

In [50]:

c(1:15) %>% # initialize structure numbers
  as.character() %>%
  # if the number is less than 10, add a leading zero
  sapply(function(x) {
    if (nchar(x) == 1) {
      paste0('0', x)
    } else {
      x
    }
  }) %>%
  # paste the url
  sapply(function(x) {
    paste0('https://raw.githubusercontent.com/EricLarG4/EricLarG4.github.io/master/indiv_pdb/5_min_doprstslt', x, '.pdb')
  }) %>% 
  # feed to dssr function
  lapply(., dssr)

$`1`
NULL

$`2`
NULL

$`3`
NULL

$`4`
NULL

$`5`
NULL

$`6`
NULL

$`7`
NULL

$`8`
NULL

$`9`
NULL

$`10`
NULL

$`11`
NULL

$`12`
NULL

$`13`
NULL

$`14`
NULL

$`15`
NULL

In [51]:

In [52]:

dssrbp <- lapply(list.files('data/dssr', full.names = TRUE), dssr.bp) %>% 
  bind_rows() %>% 
  # filter(structure <= 15) %>% 
  #offset the numbers from nt1 and nt2 strings by 10
  mutate(
    nt1 = paste0(str_extract(nt1, '[A-Z]+'), as.numeric(str_extract(nt1, '\\d+')) + 10),
    nt2 = paste0(str_extract(nt2, '[A-Z]+'), as.numeric(str_extract(nt2, '\\d+')) + 10)
  )

DT::datatable(
  dssrbp,
  filter = 'top',
  extensions = c('Buttons', 'FixedHeader', 'Scroller'),
  options = list(
    dom = 'Bfrtip',
    buttons = c('copy', 'csv', 'excel', 'pdf'),
    scrollX = TRUE,
    scrollY = 450,
    pageLength = 30
  ),
  colnames = c(
    'Structure', 'ID', 'nucleotide 1', 'nucleotide 2', 'base pair', 'Name', 'Saenger', 'Leontis-Westhof', 'DSSR'
  ),
  style = "bootstrap"
)
###

Table 4: Base pairs identified by DSSR in the 15 minimized structures. Abbreviations. +/-: the paired nucleotides have same/opposite faces (as in canonical antiparallel Watson-Crick pairing); WC and W: Watson-Crick pairing/edge; H: Hoogsteen edge; S: sugar edge; M/m: Major/minor groove edges; .: undefined edge; r: reverse; c/t: cis/trans orientation along the glycosidic bond.

Remarkably, the whole structure only contains two canonical Watson-Crick base pairs (C24-G29 and C26-G33; Figure 14 (a)). One GC (C20-G28) and two AT ((T12-A35 and T19-A27) base pairs have rWC configurations (Figure 14 (b)). Note that T19 is bound by the dopamine ligand as well (see Section 2.5).

2.3 Dihedral angles

All residues are in the anti configuration except for G31 in syn (Table 5). G31 forms a trans Watson-Crick base pair with G21 (tW+W; Figure 15).

In [53]:

In [54]:

chi20 <- chiR(pdb, chain = 'A') %>% 
  rename(structure = frame) 


chi20.sum <- chi20 %>%
  group_by(residue) %>% 
  summarise(
    mean.chi = mean(chi),
    rsd.chi = sd(chi)/mean.chi*100
  )

chi20 %>%
  pivot_wider(
    names_from = structure,
    values_from = chi
  ) %>%
  left_join(chi20.sum, by = 'residue') %>% 
  select(residue, mean.chi, rsd.chi, everything()) %>%
  mutate(gba = ifelse(mean.chi > 60 & mean.chi < 90, 'syn', 'anti'),
         .before = mean.chi) %>%
  DT::datatable(
    extensions = c('Buttons', 'FixedHeader', 'Scroller'),   
    options = list(     
      dom = 'Bfrtip',     
      buttons = c('copy', 'csv', 'excel', 'pdf'),     
      scrollX = TRUE,     
      scrollY = 450,     
      pageLength = 30   
    ),
    #change column names
    colnames = c(
      'Residue', 'GBA', 'Mean', 'RSD (%)', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15'
    ),
    style = "bootstrap"
  ) %>%
  formatSignif(columns = c(3, 5:19), digits = 3) %>%
  formatSignif(columns = 4, digits = 2) %>% 
  formatStyle(
    columns = 2,
    color = styleEqual(c('syn', 'anti'), c('lightcoral', 'black'))
  )

Table 5: Dihedral χ angles of the 15 minimized structures

Figure 15: Non-canonical base pair between G31 and G21 (tW+W), within a triplet with G29 (tm+m)

In [55]:

# cache-lazy: false

chi.rstslt <- chiR(pdb.long, chain = 'A', res.start = 11, res.end = 37)
chi.unrstslt <- chiR(pdb.unrst.long, chain = 'A', res.start = 11, res.end = 37)

###

Dihedral angles are stable along the simulation (Figure 16). The same is observed during the unrestrained simulation, albeit with more variance (Figure 17).

In [56]:

scaling <- 0.7

dihedrals_rstslt <- chi.rstslt %>%
  group_by(residue) %>% 
  mutate(
    chi = ifelse(chi > 180, chi - 360, chi),
    mean.chi = mean(chi),
    mod.chi = if_else(
      mean.chi < 20 & mean.chi > 0 & chi < 0, chi + 360,
      if_else(
        mean.chi < -160 & chi > 0, chi - 360, chi
      )
    ),
    mean.chi = mean(mod.chi),
    gba = case_when(
      mean.chi > 60 & mean.chi < 90 ~ 'syn',
      mean.chi < -60 | mean.chi > -180 ~ 'anti',
      TRUE ~ 'other'
    ),
    residue = str_replace_all(residue, 'D|3_|5_|_', ''),
    #add html colors to the residue as function of their gba
    residue.label = case_when(
      gba == 'syn' ~ paste0('<span style="color:tomato">', residue, '</span>'),
      gba == 'anti' ~ paste0('<span style="color:grey10">', residue, '</span>'),
      TRUE ~ '<span style="color:steelblue4">other</span>'
    ),
    residue.number = as.numeric(str_extract(residue, '\\d+'))
  ) 

ggplot(dihedrals_rstslt, aes(chi, frame, color = gba)) +
  facet_wrap(~factor(
    residue.label,
    levels = dihedrals_rstslt %>%
      arrange(residue.number) %>%
      pull(residue.label) %>%
      unique()
  )) +
  geom_point(alpha = 0.005) +
  scale_y_continuous(expand = c(0,0), breaks = c(99999)) +
  scale_x_continuous(limits = c(-180, 180), breaks = c(-90, 0, 90, 180)) +
  scale_color_manual(values = rev(c('tomato', 'grey10'))) +
  ggthemes::theme_pander() +
  theme(
    axis.text.x = element_text(size = 12 * scaling, face = 'bold'),
    axis.text.y = element_blank(),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    axis.ticks = element_blank(),
    strip.text.x = element_markdown(size = 16 * scaling, face = 'bold'),
    strip.text.y = element_markdown(size = 16 * scaling, face = 'bold'),
    strip.background = element_rect(),
    panel.grid.major.x = element_line(linewidth = 0.5 * scaling, color = 'grey', linetype = 'dotted'),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.y = element_line(linewidth = 0.5 * scaling, color = 'grey', linetype = 'dotted'),
    panel.grid.minor.y = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    panel.spacing.y = unit(1, 'lines'),
    panel.spacing.x = unit(2, 'lines'),
    plot.title = element_markdown(size = 16 * scaling, face = 'bold', hjust = 0.5),
    plot.subtitle = element_markdown(size = 14 * scaling),
    plot.caption = element_markdown(hjust = 1, size = 14 * scaling),
    legend.position = 'none'
  ) +
  coord_polar(clip = 'off')

##

Figure 16: Dihedral angles of all residues for the rMD, wrapped in [-180;180], along the trajectory (from center to outside of circle)

In [57]:

scaling <- 0.7

dihedrals_unrstslt <- chi.unrstslt %>%
  group_by(residue) %>% 
  mutate(
    chi = ifelse(chi > 180, chi - 360, chi),
    mean.chi = mean(chi),
    mod.chi = if_else(
      mean.chi < 20 & mean.chi > 0 & chi < 0, chi + 360,
      if_else(
        mean.chi < -160 & chi > 0, chi - 360, chi
      )
    ),
    mean.chi = mean(mod.chi),
    gba = case_when(
      mean.chi > 60 & mean.chi < 90 ~ 'syn',
      mean.chi < -60 | mean.chi > -180 ~ 'anti',
      TRUE ~ 'other'
    ),
    residue = str_replace_all(residue, 'D|3_|5_|_', ''),
    #add html colors to the residue as function of their gba
    residue.label = case_when(
      gba == 'syn' ~ paste0('<span style="color:tomato">', residue, '</span>'),
      gba == 'anti' ~ paste0('<span style="color:grey10">', residue, '</span>'),
      TRUE ~ '<span style="color:steelblue4">other</span>'
    ),
    residue.number = as.numeric(str_extract(residue, '\\d+'))
  ) 

ggplot(dihedrals_unrstslt, aes(chi, frame, color = gba)) +
  facet_wrap(~factor(
    residue.label,
    levels = dihedrals_unrstslt %>%
      arrange(residue.number) %>%
      pull(residue.label) %>%
      unique()
  )) +
  geom_point(alpha = 0.005) +
  scale_y_continuous(expand = c(0,0), breaks = c(99999)) +
  scale_x_continuous(limits = c(-180, 180), breaks = c(-90, 0, 90, 180)) +
  scale_color_manual(values = rev(c('tomato', 'grey10'))) +
  ggthemes::theme_pander() +
  theme(
    axis.text.x = element_text(size = 12 * scaling, face = 'bold'),
    axis.text.y = element_blank(),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    axis.ticks = element_blank(),
    strip.text.x = element_markdown(size = 16 * scaling, face = 'bold'),
    strip.text.y = element_markdown(size = 16 * scaling, face = 'bold'),
    strip.background = element_rect(),
    panel.grid.major.x = element_line(linewidth = 0.5 * scaling, color = 'grey', linetype = 'dotted'),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.y = element_line(linewidth = 0.5 * scaling, color = 'grey', linetype = 'dotted'),
    panel.grid.minor.y = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    panel.spacing.y = unit(1, 'lines'),
    panel.spacing.x = unit(2, 'lines'),
    plot.title = element_markdown(size = 16 * scaling, face = 'bold', hjust = 0.5),
    plot.subtitle = element_markdown(size = 14 * scaling),
    plot.caption = element_markdown(hjust = 1, size = 14 * scaling),
    legend.position = 'none'
  ) +
  coord_polar(clip = 'off')

Figure 17: Dihedral angles of all residues for the unrestrained MD, wrapped in [-180;180], along the trajectory (from center to outside of circle)

2.4 Puckers

Most nucleotides have their sugar adopting a C2’-endo (typical in B DNA) or close to it (C1’-exo, C3’-exo; Table 6, Figure 18). Among those, T23 also visits the C1’-endo pucker during a large portion of the simulation (Figure 20), and can be found in several of the minimized structures; however this is of little significance given that this residue does not form any intra- or inter-molecular contacts.

G25, whose amino group binds to the N7 of G28 (tm-M; Figure 21 (a)), is found in half the structures in C1’-exo or C2’-endo but adopts a variety of puckers in others. During the microsecond simulation, it mostly exist in C1’-exo, and only visits the other configurations for a short period of time.

T18, which is stacked on G17, is tightly bound by the dopamine ligand (Figure 21 (b)) and forms an off-plane tW-M base pair with G33, has a C1’-endo configuration in most conformers, which has been observed for dsDNA around intercalator drugs or when gaining increasing flexibility (A. H. J. Wang et al. 1987; Shen et al. 2011). It is therefore likely that the backbone of T18 adopts a non-canonical conformation to allow dopamine binding. It remains stable during the simulation.

G16, C20, and G34 mostly exist in the C4’-exo/O4’-endo configurations, which is not very common for DNA (Anosova et al. 2015). G16 pairs with the Hoogsteen face of G34 (Figure 21 (c); tW-M) and their puckering is quite unstable during the simulation (Figure 20). C20 is involved in a triplet made of a reverse Watson-Crick base pair with G28 (tW+W) and a single H-bond to T32 (tW-.; Figure 21 (d)).

G17 has an extremely varied pucker depending on the minimized structure (9 unique puckers observed!), and has a remarkably diffuse pucker angle and amplitude during the course of the microsecond simulation. G17 is involved in a non coplanar triplet with the Hoogsteen face of G33 (tW-M; stacked under the dopamine) and T11 (cW-m; Figure 21 (e)). It also binds to G34 (tW-M) and is stacked on G16, whose puckering is also fairly unstable.

A15 is the only nucleotide that mostly adopts a C4’-endo pucker. It forms an asymetric homopurine base pair with A35 (Figure 21 (f); tW-M) , and the sugar pucker is stable during the simulation (Figure 20).

In [58]:

scaling <- 0.7 

puckers.rstslt20 <- nuR(pdb, chain = 'A', res.start = 11, res.end = 37) %>% 
  puck.formatR(.) 


puckers.rstslt20 %>% 
  ungroup() %>% 
  mutate(
    puck = case_when(
      angle > 0 & angle < 36 ~ "C3'-endo",
      angle > 36 & angle < 72 ~ "C4'-exo",
      angle > 72 & angle < 108 ~ "O4'-endo",
      angle > 108 & angle < 144 ~ "C1'-exo",
      angle > 144 & angle < 180 ~ "C2'-endo",
      angle > 180 & angle < 216 ~ "C3'-exo",
      angle > 216 & angle < 252 ~ "C4'-endo",
      angle > 252 & angle < 288 ~ "O4'-exo",
      angle > 288 & angle < 324 ~ "C1'-endo",
      angle > 324 & angle < 360 ~ "C2'-exo"
    )
  ) %>% 
  ggplot(aes(x = angle, y = amplitude, fill = puck)) +
  geom_point(size = 3, alpha = 0.5, shape = 21, color = 'black') +
  scale_x_continuous(limits = c(0,360), breaks = 36*(0:10)) +
  scale_y_continuous(expand = c(0,0)) +
  scale_color_manual(name = 'pucker', values = color.d3, drop = FALSE) +
  scale_fill_manual(name = 'pucker', values = color.d3, drop = FALSE) +
  ggthemes::theme_pander() +
  theme(
    axis.text.x = element_text(size = 12 * scaling, face = 'bold'),
    axis.text.y = element_blank(),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    strip.text.x = element_markdown(size = 14 * scaling, face = 'bold'),
    strip.text.y = element_markdown(size = 14 * scaling, face = 'bold'),
    strip.background = element_blank(),
    panel.grid.major.x = element_line(
      linewidth = 0.5 * scaling, color = 'grey',
      linetype = 'dotted'
    ),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.y = element_line(
      linewidth = 0.5 * scaling, color = 'grey',
      linetype = 'dotted'
    ),
    panel.grid.minor.y = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    plot.title = element_markdown(size = 16 * scaling, face = 'bold'),
    plot.subtitle = element_markdown(size = 14 * scaling),
    plot.caption = element_markdown(hjust = 1, size = 10 * scaling)
  ) +
  coord_polar(clip = 'off') +
  facet_wrap(~residue)

Figure 18: Sugar puckering of the 15 minimized structures. Puckers are presented from most to least frequent.

In [59]:

In [60]:

puckers.rstslt20 <- nuR(pdb, chain = 'A', res.start = 11, res.end = 37) %>% 
  puck.formatR(.) %>% 
  group_by(residue) %>% 
  mutate(
    mean.angle = mean(angle),
    rsd.angle = sd(angle)/mean.angle*100
  ) %>% 
  ungroup() %>% 
  mutate(
    puck = case_when(
      angle > 0 & angle < 36 ~ "C3'-endo",
      angle > 36 & angle < 72 ~ "C4'-exo",
      angle > 72 & angle < 108 ~ "O4'-endo",
      angle > 108 & angle < 144 ~ "C1'-exo",
      angle > 144 & angle < 180 ~ "C2'-endo",
      angle > 180 & angle < 216 ~ "C3'-exo",
      angle > 216 & angle < 252 ~ "C4'-endo",
      angle > 252 & angle < 288 ~ "O4'-exo",
      angle > 288 & angle < 324 ~ "C1'-endo",
      angle > 324 & angle < 360 ~ "C2'-exo"
    )
  )


puckers.rstslt20 <- puckers.rstslt20 %>% 
  select(-c(puck, nu0, nu1, nu2, nu3, nu4, P0, amplitude, mean.amplitude, mod.angle)) %>% 
  left_join(
    puckers.rstslt20 %>% 
      group_by(residue) %>%
      count(puck) %>%
      arrange(-n) %>% 
      mutate(if_else(n == max(n), paste0('**', puck, '**'), puck)) %>%
      summarise(puck = paste(unique(puck), collapse = ", ")), 
    by = 'residue'
  ) 


puckers.rstslt20 %>%
  pivot_wider(
    names_from = frame,
    values_from = angle
  ) %>%  
  select(residue, puck, mean.angle, rsd.angle, everything()) %>%
  DT::datatable(
    extensions = c('Buttons', 'FixedHeader', 'Scroller'),   
    options = list(     
      dom = 'Bfrtip',     
      buttons = c('copy', 'csv', 'excel', 'pdf'),     
      scrollX = TRUE,     
      scrollY = 450,     
      pageLength = 30   
    ),
    #change column names
    colnames = c(
      'Residue', 'Pucker', 'Mean', 'RSD (%)', 
      '1', '2', '3', '4', '5', '6', '7', '8', 
      '9', '10', '11', '12', '13', '14', '15'
    ),
    style = "bootstrap"
  ) %>%
  formatSignif(columns = c(3, 5:19), digits = 3) %>%
  formatSignif(columns = 4, digits = 2)

Table 6: Sugar puckering of the 15 minimized structures. Puckers are presented from most to least frequent.

In [61]:

scaling <- 0.8

pdb.long.puck <- pdb.long
pdb.long.puck$xyz <- pdb.long$xyz[seq(1, nrow(pdb.long$xyz), by = nrow(pdb.long$xyz)/5000),]

puckers.rstslt <- nuR(
  pdb.long.puck, chain = 'A', res.start = 11, res.end = 37
) %>% 
  puck.formatR(.)

puck.plotR(puckers.rstslt, alpha = 0.01)

Figure 19: Sugar puckering calculated with the Altona & Sundaralingam method, wrapped from 0 to 360 for the restrained simulation. The position of the points relative to the center/border of the circles scales with the pucker amplitude (the larger the closer to the border). The circled points represent the median pucker/amplitude. Calculation performed on 5000 evenly spaced frames.

In [62]:

scaling <- 0.8

puckers.rstslt %>% 
  group_by(residue) %>%
  mutate(
    pucker.color = if_else(
      sd(angle)/mean(angle) > 0.20,
      'flexible', 'stable'
    ),
    pucker.color = if_else(residue == 'C10', 'stable', pucker.color),
    pucker.color = factor(pucker.color, levels = c('stable', 'flexible'))
  ) %>% 
  ggplot(aes(y = frame, x = angle, color = pucker.color)) +
  geom_point(alpha = 0.025) +
  geom_point(
    data = . %>% 
      filter(frame == 1)
  ) +
  facet_wrap(~factor(
    residue, levels = puckers.rstslt %>%
      arrange(mean.angle) %>%
      pull(residue) %>%
      unique())) +
  scale_x_continuous(limits = c(0,360), breaks = 36*(0:10)) +
  scale_y_continuous(expand = c(0,0)) +
  scale_color_manual(values = c('grey', 'pink'), name = 'pucker stability') +
  ggthemes::theme_pander() +
  theme(
    axis.text.x = element_text(size = 12 * scaling, face = 'bold'),
    axis.text.y = element_blank(),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
    strip.text.x = element_markdown(size = 14 * scaling, face = 'bold'),
    strip.text.y = element_markdown(size = 14 * scaling, face = 'bold'),
    strip.background = element_blank(),
    panel.grid.major.x = element_line(
      linewidth = 0.5 * scaling, color = 'grey',
      linetype = 'dotted'
    ),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.y = element_line(
      linewidth = 0.5 * scaling, color = 'grey',
      linetype = 'dotted'
    ),
    panel.grid.minor.y = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    plot.title = element_markdown(size = 16 * scaling, face = 'bold'),
    plot.subtitle = element_markdown(size = 14 * scaling),
    plot.caption = element_markdown(hjust = 1, size = 10 * scaling)
  ) +
  coord_polar(clip = 'off')

Figure 20: Pucker angles of all residues for the rMD, along the trajectory (from center to outside of circle). Residues with relative standard deviation above 20% are labeled as flexible

(a) Sheared base pair between the sugar edge of G25 (left) and the Hoogsteen edge of G28 (tm-M)

In the unrestrained simulations, almost all nucleotides have their sugar adopting a C2’-endo (typical in B DNA) or close to it (C1’-exo; Figure 22). Only T18 adopts a C4’-exo configuration. The non-canonical puckers observed above are therefore imposed by the restraints. For the specific case of T18, the more RNA-like pucker observed in the unrestrained simulation still differs from the C1’-endo pucker of its restrained counterpart.

In [63]:

scaling <- 0.8

pdb.unrst.long.puck <- pdb.unrst.long
pdb.unrst.long.puck$xyz <- pdb.unrst.long$xyz[seq(1, nrow(pdb.unrst.long$xyz), by = nrow(pdb.unrst.long$xyz)/5000),]

puckers.rstslt <- nuR(
  pdb.unrst.long.puck, chain = 'A', res.start = 11, res.end = 37
) %>% 
  puck.formatR(.)

puck.plotR(puckers.rstslt, alpha = 0.01)


###

Figure 22: Sugar puckering calculated with the Altona & Sundaralingam method, wrapped from 0 to 360 for the unrestrained simulation. The position of the points relative to the center/border of the circles scales with the pucker amplitude (the larger the closer to the border). The circled points represent the median pucker/amplitude.

2.5 Dopamine binding

The dopamine is intercalated between T32 and G33/C16, and forms H-bond contacts with the O4s of T18 and T19 with its two hydroxy groups (O1, O2; Figure 25). The ammonium group (N1 forms two additional H-bonds, with the N3 of of A27 (and O4’; although not always canonically since the bond angle is sometimes below 120°; Figure 24) and the deoxyribose O4’ of G28.

The H-bonds between O1/O2 of the dopamine and T18/T19 are globally stable during the restrained simulation (Figure 23), in particular the short LDP:O2/T18:O4 (Figure 28). The ammonium group (N1) also establishes stable H-bonds with G28 and A27.

In absence of restraints, the H-bond with A27 and G28 are longer and less stable. Conversely, the bond to T19 is shorter and stable. H-bond angles are also significantly much less stable.

In [64]:

pair.list.rst <- contactr(
 min.pdb,
 dist.thresh = 3.1, freq.thresh = 0.2,
 lgd.resno = 38,
 lgd.donors = c('O2', 'O1', 'N1'),
 lgd.acceptors = NA,
 donor.atoms = c('N', 'O', 'S', 'F'),
 acceptor.atoms = c('O', 'N', 'S', 'F', 'Cl', 'Br', 'I')
  )

[1] "Looking for contacts within 3.1 angstroms in 15 frames"

`summarise()` has grouped output by 'resid1', 'resno1', 'elety1', 'resid2',
'resno2'. You can override using the `.groups` argument.

pair.dist.rst <- lapply(
  pair.list.rst$pair,
  atom.distr,
  pdb.input = pdb.long,
  span = 0.01
) %>%
  do.call(rbind, .)

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O1 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O1 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

pair.angle.rst <- lapply(
  pair.list.rst$pair,
  bond.angler,
  pdb.input = pdb.long.h, 
  info = info.long, 
  span = 0.01, 
  clean = TRUE, 
  rectify = 50
) %>%
  do.call(rbind, .)

[1] "Processing pair: T11:N3-G33:O6"
[1] "Residues: T T G"
[1] "Numbers: 11 11 33"
[1] "Elements: N3 H3 O6"
[1] "Processing hydrogen: H3"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: T11:N3-G34:O6"
[1] "Residues: T T G"
[1] "Numbers: 11 11 34"
[1] "Elements: N3 H3 O6"
[1] "Processing hydrogen: H3"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: T12:N3-A35:N1"
[1] "Residues: T T A"
[1] "Numbers: 12 12 35"
[1] "Elements: N3 H3 N1"
[1] "Processing hydrogen: H3"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G13:N2-G36:N3"
[1] "Residues: G G G"
[1] "Numbers: 13 13 36"
[1] "Elements: N2 H21 N3" "Elements: N2 H22 N3"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G13:N1-T37:O4'"
[1] "Residues: G G T"
[1] "Numbers: 13 13 37"
[1] "Elements: N1 H1 O4'"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: A15:N6-A35:N7"
[1] "Residues: A A A"
[1] "Numbers: 15 15 35"
[1] "Elements: N6 H61 N7" "Elements: N6 H62 N7"
[1] "Processing hydrogen: H61"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H62"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G16:N1-G34:N7"
[1] "Residues: G G G"
[1] "Numbers: 16 16 34"
[1] "Elements: N1 H1 N7"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G16:N2-G34:N7"
[1] "Residues: G G G"
[1] "Numbers: 16 16 34"
[1] "Elements: N2 H21 N7" "Elements: N2 H22 N7"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G16:N2-G34:O6"
[1] "Residues: G G G"
[1] "Numbers: 16 16 34"
[1] "Elements: N2 H21 O6" "Elements: N2 H22 O6"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G17:N2-T11:O2"
[1] "Residues: G G T"
[1] "Numbers: 17 17 11"
[1] "Elements: N2 H21 O2" "Elements: N2 H22 O2"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G17:N1-G33:N7"
[1] "Residues: G G G"
[1] "Numbers: 17 17 33"
[1] "Elements: N1 H1 N7"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G17:N2-G34:O6"
[1] "Residues: G G G"
[1] "Numbers: 17 17 34"
[1] "Elements: N2 H21 O6" "Elements: N2 H22 O6"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: T19:N3-A27:N1"
[1] "Residues: T T A"
[1] "Numbers: 19 19 27"
[1] "Elements: N3 H3 N1"
[1] "Processing hydrogen: H3"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: C20:N4-T32:O4"
[1] "Residues: C C T"
[1] "Numbers: 20 20 32"
[1] "Elements: N4 H41 O4" "Elements: N4 H42 O4"
[1] "Processing hydrogen: H41"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H42"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G21:N2-G29:N3"
[1] "Residues: G G G"
[1] "Numbers: 21 21 29"
[1] "Elements: N2 H21 N3" "Elements: N2 H22 N3"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G21:N1-G31:O6"
[1] "Residues: G G G"
[1] "Numbers: 21 21 31"
[1] "Elements: N1 H1 O6"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G21:N2-G31:O6"
[1] "Residues: G G G"
[1] "Numbers: 21 21 31"
[1] "Elements: N2 H21 O6" "Elements: N2 H22 O6"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: C24:N4-T22:O4'"
[1] "Residues: C C T"
[1] "Numbers: 24 24 22"
[1] "Elements: N4 H41 O4'" "Elements: N4 H42 O4'"
[1] "Processing hydrogen: H41"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H42"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: C24:N4-G29:O6"
[1] "Residues: C C G"
[1] "Numbers: 24 24 29"
[1] "Elements: N4 H41 O6" "Elements: N4 H42 O6"
[1] "Processing hydrogen: H41"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H42"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G25:N2-G28:N7"
[1] "Residues: G G G"
[1] "Numbers: 25 25 28"
[1] "Elements: N2 H21 N7" "Elements: N2 H22 N7"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G25:N2-G29:O6"
[1] "Residues: G G G"
[1] "Numbers: 25 25 29"
[1] "Elements: N2 H21 O6" "Elements: N2 H22 O6"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: C26:N4-G33:O6"
[1] "Residues: C C G"
[1] "Numbers: 26 26 33"
[1] "Elements: N4 H41 O6" "Elements: N4 H42 O6"
[1] "Processing hydrogen: H41"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H42"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: A27:N6-T19:O2"
[1] "Residues: A A T"
[1] "Numbers: 27 27 19"
[1] "Elements: N6 H61 O2" "Elements: N6 H62 O2"
[1] "Processing hydrogen: H61"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H62"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G28:N2-C20:N3"
[1] "Residues: G G C"
[1] "Numbers: 28 28 20"
[1] "Elements: N2 H21 N3" "Elements: N2 H22 N3"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G28:N1-C20:O2"
[1] "Residues: G G C"
[1] "Numbers: 28 28 20"
[1] "Elements: N1 H1 O2"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G28:N2-T32:O4"
[1] "Residues: G G T"
[1] "Numbers: 28 28 32"
[1] "Elements: N2 H21 O4" "Elements: N2 H22 O4"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G29:N2-G21:N3"
[1] "Residues: G G G"
[1] "Numbers: 29 29 21"
[1] "Elements: N2 H21 N3" "Elements: N2 H22 N3"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G29:N1-C24:N3"
[1] "Residues: G G C"
[1] "Numbers: 29 29 24"
[1] "Elements: N1 H1 N3"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G29:N2-C24:O2"
[1] "Residues: G G C"
[1] "Numbers: 29 29 24"
[1] "Elements: N2 H21 O2" "Elements: N2 H22 O2"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G31:N1-G21:O6"
[1] "Residues: G G G"
[1] "Numbers: 31 31 21"
[1] "Elements: N1 H1 O6"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G31:N2-G21:O6"
[1] "Residues: G G G"
[1] "Numbers: 31 31 21"
[1] "Elements: N2 H21 O6" "Elements: N2 H22 O6"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G31:N1-T30:O4'"
[1] "Residues: G G T"
[1] "Numbers: 31 31 30"
[1] "Elements: N1 H1 O4'"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G33:N1-C26:N3"
[1] "Residues: G G C"
[1] "Numbers: 33 33 26"
[1] "Elements: N1 H1 N3"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G33:N2-C26:O2"
[1] "Residues: G G C"
[1] "Numbers: 33 33 26"
[1] "Elements: N2 H21 O2" "Elements: N2 H22 O2"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G34:N1-T11:O4"
[1] "Residues: G G T"
[1] "Numbers: 34 34 11"
[1] "Elements: N1 H1 O4"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G34:N2-T11:O4"
[1] "Residues: G G T"
[1] "Numbers: 34 34 11"
[1] "Elements: N2 H21 O4" "Elements: N2 H22 O4"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: A35:N6-T12:O2"
[1] "Residues: A A T"
[1] "Numbers: 35 35 12"
[1] "Elements: N6 H61 O2" "Elements: N6 H62 O2"
[1] "Processing hydrogen: H61"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H62"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: A35:N6-A15:N1"
[1] "Residues: A A A"
[1] "Numbers: 35 35 15"
[1] "Elements: N6 H61 N1" "Elements: N6 H62 N1"
[1] "Processing hydrogen: H61"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H62"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G36:N2-G13:N3"
[1] "Residues: G G G"
[1] "Numbers: 36 36 13"
[1] "Elements: N2 H21 N3" "Elements: N2 H22 N3"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G36:N1-A14:O4'"
[1] "Residues: G G A"
[1] "Numbers: 36 36 14"
[1] "Elements: N1 H1 O4'"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G36:N1-A15:N1"
[1] "Residues: G G A"
[1] "Numbers: 36 36 15"
[1] "Elements: N1 H1 N1"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G36:N2-A15:N1"
[1] "Residues: G G A"
[1] "Numbers: 36 36 15"
[1] "Elements: N2 H21 N1" "Elements: N2 H22 N1"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: LDP38:O1-T18:O4"
[1] "Residues: LDP LDP T"
[1] "Numbers: 38 38 18"
[1] "Elements: O1 HO1 O4"
[1] "Processing hydrogen: HO1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: LDP38:O2-T18:O4"
[1] "Residues: LDP LDP T"
[1] "Numbers: 38 38 18"
[1] "Elements: O2 HO2 O4"
[1] "Processing hydrogen: HO2"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: LDP38:O1-T19:O4"
[1] "Residues: LDP LDP T"
[1] "Numbers: 38 38 19"
[1] "Elements: O1 HO1 O4"
[1] "Processing hydrogen: HO1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: LDP38:N1-A27:N3"
[1] "Residues: LDP LDP A"
[1] "Numbers: 38 38 27"
[1] "Elements: N1 HN11 N3" "Elements: N1 HN12 N3" "Elements: N1 HN13 N3"
[1] "Processing hydrogen: HN11"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: HN12"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: HN13"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 75000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: LDP38:N1-A27:O4'"
[1] "Residues: LDP LDP A"
[1] "Numbers: 38 38 27"
[1] "Elements: N1 HN11 O4'" "Elements: N1 HN12 O4'" "Elements: N1 HN13 O4'"
[1] "Processing hydrogen: HN11"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: HN12"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: HN13"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 75000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: LDP38:N1-G28:O4'"
[1] "Residues: LDP LDP G"
[1] "Numbers: 38 38 28"
[1] "Elements: N1 HN11 O4'" "Elements: N1 HN12 O4'" "Elements: N1 HN13 O4'"
[1] "Processing hydrogen: HN11"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: HN12"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: HN13"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 75000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"

###

In [65]:

# pdb.noldp.5000 <- pdb.noldp.long
# pdb.noldp.5000$xyz <- pdb.noldp.long$xyz[seq(1, nrow(pdb.noldp.long$xyz), by = nrow(pdb.noldp.long$xyz)/5000),]
# 
# pair.list.noldp <- contactr(pdb.noldp.5000, 3.1, freq.thresh = 0.05)

In [66]:

pair.dist.unrst <- lapply(
  pair.list.rst$pair, #recycling of same contacts than rst!
  atom.distr,
  pdb.input = pdb.unrst.long,
  span = 0.01
) %>%
  do.call(rbind, .)

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O1 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O1 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

pair.angle.unrst <- lapply(
  pair.list.rst$pair, #recycling of same contacts than rst!
  bond.angler,
  pdb.input = pdb.unrst.h.long, 
  info = info.long, 
  span = 0.01, 
  clean = TRUE, 
  rectify = 50
) %>%
  do.call(rbind, .)

[1] "Processing pair: T11:N3-G33:O6"
[1] "Residues: T T G"
[1] "Numbers: 11 11 33"
[1] "Elements: N3 H3 O6"
[1] "Processing hydrogen: H3"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: T11:N3-G34:O6"
[1] "Residues: T T G"
[1] "Numbers: 11 11 34"
[1] "Elements: N3 H3 O6"
[1] "Processing hydrogen: H3"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: T12:N3-A35:N1"
[1] "Residues: T T A"
[1] "Numbers: 12 12 35"
[1] "Elements: N3 H3 N1"
[1] "Processing hydrogen: H3"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G13:N2-G36:N3"
[1] "Residues: G G G"
[1] "Numbers: 13 13 36"
[1] "Elements: N2 H21 N3" "Elements: N2 H22 N3"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G13:N1-T37:O4'"
[1] "Residues: G G T"
[1] "Numbers: 13 13 37"
[1] "Elements: N1 H1 O4'"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: A15:N6-A35:N7"
[1] "Residues: A A A"
[1] "Numbers: 15 15 35"
[1] "Elements: N6 H61 N7" "Elements: N6 H62 N7"
[1] "Processing hydrogen: H61"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H62"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G16:N1-G34:N7"
[1] "Residues: G G G"
[1] "Numbers: 16 16 34"
[1] "Elements: N1 H1 N7"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G16:N2-G34:N7"
[1] "Residues: G G G"
[1] "Numbers: 16 16 34"
[1] "Elements: N2 H21 N7" "Elements: N2 H22 N7"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G16:N2-G34:O6"
[1] "Residues: G G G"
[1] "Numbers: 16 16 34"
[1] "Elements: N2 H21 O6" "Elements: N2 H22 O6"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G17:N2-T11:O2"
[1] "Residues: G G T"
[1] "Numbers: 17 17 11"
[1] "Elements: N2 H21 O2" "Elements: N2 H22 O2"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G17:N1-G33:N7"
[1] "Residues: G G G"
[1] "Numbers: 17 17 33"
[1] "Elements: N1 H1 N7"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G17:N2-G34:O6"
[1] "Residues: G G G"
[1] "Numbers: 17 17 34"
[1] "Elements: N2 H21 O6" "Elements: N2 H22 O6"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: T19:N3-A27:N1"
[1] "Residues: T T A"
[1] "Numbers: 19 19 27"
[1] "Elements: N3 H3 N1"
[1] "Processing hydrogen: H3"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: C20:N4-T32:O4"
[1] "Residues: C C T"
[1] "Numbers: 20 20 32"
[1] "Elements: N4 H41 O4" "Elements: N4 H42 O4"
[1] "Processing hydrogen: H41"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H42"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G21:N2-G29:N3"
[1] "Residues: G G G"
[1] "Numbers: 21 21 29"
[1] "Elements: N2 H21 N3" "Elements: N2 H22 N3"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G21:N1-G31:O6"
[1] "Residues: G G G"
[1] "Numbers: 21 21 31"
[1] "Elements: N1 H1 O6"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G21:N2-G31:O6"
[1] "Residues: G G G"
[1] "Numbers: 21 21 31"
[1] "Elements: N2 H21 O6" "Elements: N2 H22 O6"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: C24:N4-T22:O4'"
[1] "Residues: C C T"
[1] "Numbers: 24 24 22"
[1] "Elements: N4 H41 O4'" "Elements: N4 H42 O4'"
[1] "Processing hydrogen: H41"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H42"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: C24:N4-G29:O6"
[1] "Residues: C C G"
[1] "Numbers: 24 24 29"
[1] "Elements: N4 H41 O6" "Elements: N4 H42 O6"
[1] "Processing hydrogen: H41"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H42"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G25:N2-G28:N7"
[1] "Residues: G G G"
[1] "Numbers: 25 25 28"
[1] "Elements: N2 H21 N7" "Elements: N2 H22 N7"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G25:N2-G29:O6"
[1] "Residues: G G G"
[1] "Numbers: 25 25 29"
[1] "Elements: N2 H21 O6" "Elements: N2 H22 O6"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: C26:N4-G33:O6"
[1] "Residues: C C G"
[1] "Numbers: 26 26 33"
[1] "Elements: N4 H41 O6" "Elements: N4 H42 O6"
[1] "Processing hydrogen: H41"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H42"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: A27:N6-T19:O2"
[1] "Residues: A A T"
[1] "Numbers: 27 27 19"
[1] "Elements: N6 H61 O2" "Elements: N6 H62 O2"
[1] "Processing hydrogen: H61"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H62"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G28:N2-C20:N3"
[1] "Residues: G G C"
[1] "Numbers: 28 28 20"
[1] "Elements: N2 H21 N3" "Elements: N2 H22 N3"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G28:N1-C20:O2"
[1] "Residues: G G C"
[1] "Numbers: 28 28 20"
[1] "Elements: N1 H1 O2"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G28:N2-T32:O4"
[1] "Residues: G G T"
[1] "Numbers: 28 28 32"
[1] "Elements: N2 H21 O4" "Elements: N2 H22 O4"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G29:N2-G21:N3"
[1] "Residues: G G G"
[1] "Numbers: 29 29 21"
[1] "Elements: N2 H21 N3" "Elements: N2 H22 N3"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G29:N1-C24:N3"
[1] "Residues: G G C"
[1] "Numbers: 29 29 24"
[1] "Elements: N1 H1 N3"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G29:N2-C24:O2"
[1] "Residues: G G C"
[1] "Numbers: 29 29 24"
[1] "Elements: N2 H21 O2" "Elements: N2 H22 O2"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G31:N1-G21:O6"
[1] "Residues: G G G"
[1] "Numbers: 31 31 21"
[1] "Elements: N1 H1 O6"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G31:N2-G21:O6"
[1] "Residues: G G G"
[1] "Numbers: 31 31 21"
[1] "Elements: N2 H21 O6" "Elements: N2 H22 O6"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G31:N1-T30:O4'"
[1] "Residues: G G T"
[1] "Numbers: 31 31 30"
[1] "Elements: N1 H1 O4'"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G33:N1-C26:N3"
[1] "Residues: G G C"
[1] "Numbers: 33 33 26"
[1] "Elements: N1 H1 N3"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G33:N2-C26:O2"
[1] "Residues: G G C"
[1] "Numbers: 33 33 26"
[1] "Elements: N2 H21 O2" "Elements: N2 H22 O2"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G34:N1-T11:O4"
[1] "Residues: G G T"
[1] "Numbers: 34 34 11"
[1] "Elements: N1 H1 O4"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G34:N2-T11:O4"
[1] "Residues: G G T"
[1] "Numbers: 34 34 11"
[1] "Elements: N2 H21 O4" "Elements: N2 H22 O4"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: A35:N6-T12:O2"
[1] "Residues: A A T"
[1] "Numbers: 35 35 12"
[1] "Elements: N6 H61 O2" "Elements: N6 H62 O2"
[1] "Processing hydrogen: H61"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H62"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: A35:N6-A15:N1"
[1] "Residues: A A A"
[1] "Numbers: 35 35 15"
[1] "Elements: N6 H61 N1" "Elements: N6 H62 N1"
[1] "Processing hydrogen: H61"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H62"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G36:N2-G13:N3"
[1] "Residues: G G G"
[1] "Numbers: 36 36 13"
[1] "Elements: N2 H21 N3" "Elements: N2 H22 N3"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G36:N1-A14:O4'"
[1] "Residues: G G A"
[1] "Numbers: 36 36 14"
[1] "Elements: N1 H1 O4'"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G36:N1-A15:N1"
[1] "Residues: G G A"
[1] "Numbers: 36 36 15"
[1] "Elements: N1 H1 N1"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G36:N2-A15:N1"
[1] "Residues: G G A"
[1] "Numbers: 36 36 15"
[1] "Elements: N2 H21 N1" "Elements: N2 H22 N1"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: LDP38:O1-T18:O4"
[1] "Residues: LDP LDP T"
[1] "Numbers: 38 38 18"
[1] "Elements: O1 HO1 O4"
[1] "Processing hydrogen: HO1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: LDP38:O2-T18:O4"
[1] "Residues: LDP LDP T"
[1] "Numbers: 38 38 18"
[1] "Elements: O2 HO2 O4"
[1] "Processing hydrogen: HO2"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: LDP38:O1-T19:O4"
[1] "Residues: LDP LDP T"
[1] "Numbers: 38 38 19"
[1] "Elements: O1 HO1 O4"
[1] "Processing hydrogen: HO1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: LDP38:N1-A27:N3"
[1] "Residues: LDP LDP A"
[1] "Numbers: 38 38 27"
[1] "Elements: N1 HN11 N3" "Elements: N1 HN12 N3" "Elements: N1 HN13 N3"
[1] "Processing hydrogen: HN11"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: HN12"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: HN13"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 75000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: LDP38:N1-A27:O4'"
[1] "Residues: LDP LDP A"
[1] "Numbers: 38 38 27"
[1] "Elements: N1 HN11 O4'" "Elements: N1 HN12 O4'" "Elements: N1 HN13 O4'"
[1] "Processing hydrogen: HN11"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: HN12"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: HN13"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 75000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: LDP38:N1-G28:O4'"
[1] "Residues: LDP LDP G"
[1] "Numbers: 38 38 28"
[1] "Elements: N1 HN11 O4'" "Elements: N1 HN12 O4'" "Elements: N1 HN13 O4'"
[1] "Processing hydrogen: HN11"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: HN12"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: HN13"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 75000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"

###

In [67]:

pair.dist.noldp <- lapply(
  #recycling of same contacts than rst!
  #but without LDP
  pair.list.rst$pair[!str_detect(pair.list.rst$pair, "LDP")], 
  atom.distr,
  pdb.input = pdb.noldp.long,
  span = 0.01
) %>%
  do.call(rbind, .)

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

pair.angle.noldp <- lapply(
  #recycling of same contacts than rst!
  #but without LDP
  pair.list.rst$pair[!str_detect(pair.list.rst$pair, "LDP")], 
  bond.angler,
  pdb.input = pdb.noldp.h.long, 
  info = info.long, 
  span = 0.01, 
  clean = TRUE, 
  rectify = 50
) %>%
  do.call(rbind, .)

[1] "Processing pair: T11:N3-G33:O6"
[1] "Residues: T T G"
[1] "Numbers: 11 11 33"
[1] "Elements: N3 H3 O6"
[1] "Processing hydrogen: H3"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: T11:N3-G34:O6"
[1] "Residues: T T G"
[1] "Numbers: 11 11 34"
[1] "Elements: N3 H3 O6"
[1] "Processing hydrogen: H3"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: T12:N3-A35:N1"
[1] "Residues: T T A"
[1] "Numbers: 12 12 35"
[1] "Elements: N3 H3 N1"
[1] "Processing hydrogen: H3"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G13:N2-G36:N3"
[1] "Residues: G G G"
[1] "Numbers: 13 13 36"
[1] "Elements: N2 H21 N3" "Elements: N2 H22 N3"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G13:N1-T37:O4'"
[1] "Residues: G G T"
[1] "Numbers: 13 13 37"
[1] "Elements: N1 H1 O4'"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: A15:N6-A35:N7"
[1] "Residues: A A A"
[1] "Numbers: 15 15 35"
[1] "Elements: N6 H61 N7" "Elements: N6 H62 N7"
[1] "Processing hydrogen: H61"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H62"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G16:N1-G34:N7"
[1] "Residues: G G G"
[1] "Numbers: 16 16 34"
[1] "Elements: N1 H1 N7"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G16:N2-G34:N7"
[1] "Residues: G G G"
[1] "Numbers: 16 16 34"
[1] "Elements: N2 H21 N7" "Elements: N2 H22 N7"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G16:N2-G34:O6"
[1] "Residues: G G G"
[1] "Numbers: 16 16 34"
[1] "Elements: N2 H21 O6" "Elements: N2 H22 O6"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G17:N2-T11:O2"
[1] "Residues: G G T"
[1] "Numbers: 17 17 11"
[1] "Elements: N2 H21 O2" "Elements: N2 H22 O2"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G17:N1-G33:N7"
[1] "Residues: G G G"
[1] "Numbers: 17 17 33"
[1] "Elements: N1 H1 N7"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G17:N2-G34:O6"
[1] "Residues: G G G"
[1] "Numbers: 17 17 34"
[1] "Elements: N2 H21 O6" "Elements: N2 H22 O6"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: T19:N3-A27:N1"
[1] "Residues: T T A"
[1] "Numbers: 19 19 27"
[1] "Elements: N3 H3 N1"
[1] "Processing hydrogen: H3"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: C20:N4-T32:O4"
[1] "Residues: C C T"
[1] "Numbers: 20 20 32"
[1] "Elements: N4 H41 O4" "Elements: N4 H42 O4"
[1] "Processing hydrogen: H41"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H42"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G21:N2-G29:N3"
[1] "Residues: G G G"
[1] "Numbers: 21 21 29"
[1] "Elements: N2 H21 N3" "Elements: N2 H22 N3"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G21:N1-G31:O6"
[1] "Residues: G G G"
[1] "Numbers: 21 21 31"
[1] "Elements: N1 H1 O6"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G21:N2-G31:O6"
[1] "Residues: G G G"
[1] "Numbers: 21 21 31"
[1] "Elements: N2 H21 O6" "Elements: N2 H22 O6"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: C24:N4-T22:O4'"
[1] "Residues: C C T"
[1] "Numbers: 24 24 22"
[1] "Elements: N4 H41 O4'" "Elements: N4 H42 O4'"
[1] "Processing hydrogen: H41"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H42"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: C24:N4-G29:O6"
[1] "Residues: C C G"
[1] "Numbers: 24 24 29"
[1] "Elements: N4 H41 O6" "Elements: N4 H42 O6"
[1] "Processing hydrogen: H41"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H42"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G25:N2-G28:N7"
[1] "Residues: G G G"
[1] "Numbers: 25 25 28"
[1] "Elements: N2 H21 N7" "Elements: N2 H22 N7"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G25:N2-G29:O6"
[1] "Residues: G G G"
[1] "Numbers: 25 25 29"
[1] "Elements: N2 H21 O6" "Elements: N2 H22 O6"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: C26:N4-G33:O6"
[1] "Residues: C C G"
[1] "Numbers: 26 26 33"
[1] "Elements: N4 H41 O6" "Elements: N4 H42 O6"
[1] "Processing hydrogen: H41"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H42"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: A27:N6-T19:O2"
[1] "Residues: A A T"
[1] "Numbers: 27 27 19"
[1] "Elements: N6 H61 O2" "Elements: N6 H62 O2"
[1] "Processing hydrogen: H61"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H62"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G28:N2-C20:N3"
[1] "Residues: G G C"
[1] "Numbers: 28 28 20"
[1] "Elements: N2 H21 N3" "Elements: N2 H22 N3"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G28:N1-C20:O2"
[1] "Residues: G G C"
[1] "Numbers: 28 28 20"
[1] "Elements: N1 H1 O2"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G28:N2-T32:O4"
[1] "Residues: G G T"
[1] "Numbers: 28 28 32"
[1] "Elements: N2 H21 O4" "Elements: N2 H22 O4"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G29:N2-G21:N3"
[1] "Residues: G G G"
[1] "Numbers: 29 29 21"
[1] "Elements: N2 H21 N3" "Elements: N2 H22 N3"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G29:N1-C24:N3"
[1] "Residues: G G C"
[1] "Numbers: 29 29 24"
[1] "Elements: N1 H1 N3"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G29:N2-C24:O2"
[1] "Residues: G G C"
[1] "Numbers: 29 29 24"
[1] "Elements: N2 H21 O2" "Elements: N2 H22 O2"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G31:N1-G21:O6"
[1] "Residues: G G G"
[1] "Numbers: 31 31 21"
[1] "Elements: N1 H1 O6"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G31:N2-G21:O6"
[1] "Residues: G G G"
[1] "Numbers: 31 31 21"
[1] "Elements: N2 H21 O6" "Elements: N2 H22 O6"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G31:N1-T30:O4'"
[1] "Residues: G G T"
[1] "Numbers: 31 31 30"
[1] "Elements: N1 H1 O4'"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G33:N1-C26:N3"
[1] "Residues: G G C"
[1] "Numbers: 33 33 26"
[1] "Elements: N1 H1 N3"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G33:N2-C26:O2"
[1] "Residues: G G C"
[1] "Numbers: 33 33 26"
[1] "Elements: N2 H21 O2" "Elements: N2 H22 O2"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G34:N1-T11:O4"
[1] "Residues: G G T"
[1] "Numbers: 34 34 11"
[1] "Elements: N1 H1 O4"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G34:N2-T11:O4"
[1] "Residues: G G T"
[1] "Numbers: 34 34 11"
[1] "Elements: N2 H21 O4" "Elements: N2 H22 O4"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: A35:N6-T12:O2"
[1] "Residues: A A T"
[1] "Numbers: 35 35 12"
[1] "Elements: N6 H61 O2" "Elements: N6 H62 O2"
[1] "Processing hydrogen: H61"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H62"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: A35:N6-A15:N1"
[1] "Residues: A A A"
[1] "Numbers: 35 35 15"
[1] "Elements: N6 H61 N1" "Elements: N6 H62 N1"
[1] "Processing hydrogen: H61"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H62"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G36:N2-G13:N3"
[1] "Residues: G G G"
[1] "Numbers: 36 36 13"
[1] "Elements: N2 H21 N3" "Elements: N2 H22 N3"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G36:N1-A14:O4'"
[1] "Residues: G G A"
[1] "Numbers: 36 36 14"
[1] "Elements: N1 H1 O4'"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G36:N1-A15:N1"
[1] "Residues: G G A"
[1] "Numbers: 36 36 15"
[1] "Elements: N1 H1 N1"
[1] "Processing hydrogen: H1"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 25000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"
[1] "Processing pair: G36:N2-A15:N1"
[1] "Residues: G G A"
[1] "Numbers: 36 36 15"
[1] "Elements: N2 H21 N1" "Elements: N2 H22 N1"
[1] "Processing hydrogen: H21"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Processing hydrogen: H22"
 Union of selects
 *  Selected a total of: 3 atoms  *
[1] "Length of angle list before cleaning: 50000"
[1] "Length of angle list after cleaning: 25000"
[1] "Finished angle list nrow 25000"

###

In [68]:

(
  pair.dist.rst %>%
    filter(
      str_detect(pair, 'LDP') | 
        (str_detect(pair, 'G33') & str_detect(pair, 'C26'))
    ) %>% 
    ggplot(., aes(x = t, y = dist.loess, color = pair)) +
    geom_hline(yintercept = 3.0, linetype = 'dashed') +
    geom_text_repel(
      data = . %>% filter(t == max(t)),
      aes(label = pair),
      xlim = c(1020, 1100),
      fontface = 'bold',
      show.legend = FALSE,
      size = 5*scaling
    ) +
    geom_line(show.legend = FALSE,
              linewidth = 0.75 * scaling) +
    scale_x_continuous(expand = c(0, 0)) +
    scale_y_continuous(limits = c(2.5, 3.5), expand = c(0,0)) +
    scale_color_d3() +
    labs(x = 'Time (ns)', y = 'Distance (Å)') +
    custom.theme(scaling) +
    theme(plot.margin = margin(1, 150, 1, 1)) +
    coord_cartesian(clip = 'off')
) +
  (
    pair.dist.unrst %>%
      filter(
        str_detect(pair, 'LDP') | 
          (str_detect(pair, 'G33') & str_detect(pair, 'C26'))
      ) %>% 
      ggplot(., aes(x = t, y = dist.loess, color = pair)) +
      geom_hline(yintercept = 3.0, linetype = 'dashed') +
      geom_text_repel(
        data = . %>% filter(t == max(t)),
        aes(label = pair),
        xlim = c(1020, 1100),
        fontface = 'bold',
        show.legend = FALSE,
        size = 5*scaling
      ) +
      geom_line(show.legend = FALSE,
                linewidth = 0.75 * scaling) +
      scale_x_continuous(expand = c(0, 0)) +
      scale_y_continuous(limits = c(2.5, 3.5), expand = c(0,0)) +
      scale_color_d3() +
      labs(x = 'Time (ns)', y = 'Distance (Å)') +
      custom.theme(scaling) +
      theme(plot.margin = margin(1, 150, 1, 1)) +
      coord_cartesian(clip = 'off')
  ) +
  plot_layout(ncol = 2)

Warning: Removed 2 rows containing missing values (`geom_text_repel()`).

Warning: Removed 1938 rows containing missing values (`geom_line()`).

###

Figure 23: Lengths of H-bonds (donor-acceptor distance) involved in dopamine binding during the restrained (top) and unrestrained (bottom) simulations. The lines are the result of local polynomial regression fitting with a span of 0.01. The canonical C26•G33 bp was used as a reference.

In [69]:

(
  pair.angle.rst %>% 
    filter(
      str_detect(pair, 'LDP') | 
        (str_detect(pair, 'G33') & str_detect(pair, 'C26'))
    ) %>% 
    ggplot(., aes(x = t, y = angle.loess, color = pair)) +
    geom_hline(yintercept = c(120, 140), linetype = 'dashed', linewidth = 0.75 * scaling) +
    geom_text_repel(
      data = . %>% filter(t == max(t)),
      aes(label = pair),
      xlim = c(1020, 1100),
      fontface = 'bold',
      show.legend = FALSE,
      size = 5*scaling
    ) +
    geom_line(linewidth = 0.75, show.legend = FALSE) +
    labs(x = 't (ns)', y = 'Angle (°)') +
    custom.theme(1) +
    scale_x_continuous(expand = c(0, 0)) +
    scale_y_continuous(expand = c(0, 0), limits = c(100, 180),
                       breaks = c(100, 120, 140, 160, 180)) +
    scale_color_d3() +
    theme(plot.margin = margin(10, 150, 1, 1)) +
    coord_cartesian(clip = 'off')
) + (
  pair.angle.unrst %>% 
    filter(
      str_detect(pair, 'LDP') | 
        (str_detect(pair, 'G33') & str_detect(pair, 'C26'))
    ) %>% 
    ggplot(., aes(x = t, y = angle.loess, color = pair)) +
    geom_hline(yintercept = 120, linetype = 'dashed', linewidth = 0.75 * scaling) +
    geom_text_repel(
      data = . %>% filter(t == max(t)),
      aes(label = pair),
      xlim = c(1020, 1100),
      fontface = 'bold',
      show.legend = FALSE,
      size = 5*scaling
    ) +
    geom_line(linewidth = 0.75, show.legend = FALSE) +
    labs(x = 't (ns)', y = 'Angle (°)') +
    custom.theme(1) +
    scale_x_continuous(expand = c(0, 0)) +
    scale_y_continuous(expand = c(0, 0), limits = c(100, 180),
                       breaks = c(100, 120, 140, 160, 180)) +
    scale_color_d3() +
    theme(plot.margin = margin(10, 150, 1, 1)) +
    coord_cartesian(clip = 'off')
)

###

Figure 24: Angles of H-bonds (donor-H-acceptor atoms) established by the dopamine ligand during the microsecond simulation, with the canonical C26•G33 bp used as a reference. When multiple interconverting hydrogen atoms are involved, all are considered and each data point reflect the angle formed by the hydrogen most likely to establish an H-bond. The lines are the result of local polynomial regression fitting with a span of 0.01. The angle stays below 140 ° during the simulation in the case of A27:O4’, which is suboptimal to establish an H-bond. Note that the hydrogen atoms interconvert frequently during the simulation; all three atoms were considered and .

T18 is off-plane as it also binds to the Hoogsteen face of G33 stacked ‘under’ the dopamine. G33 is itself base pairing canonically to C26 (see also Figure 14 (a)).

Conversely, T19 is coplanar to the dopamine, and the two form a triplet with A27. T19 and A27 are arranged in a reverse WC configuration (see also Figure 14 (b)).

In absence of NMR restraints, we noticed that the ammonium side-chain flips between configurations ‘above’ or ‘below’ the catechol plane. We performed PCA on the binding pocket (T18:T19-C26:G28-T32:G33 and dopamine) to identify frames representative of the different configurations.

In [70]:

# Create a binding pocket trimmed pdb object
# from the long unrestrained simulation

core.pdb <- pdb.unrst.long %>% 
  trim.pdb(
    .,
    atom.select(
      .,
      resno = c(18, 19, 26, 27, 28, 32, 33, 38)
    )
  )

# Re-align frames to the binding pocket only
core.pdb$xyz <- fit.xyz(
  fixed = core.pdb$xyz[1,],
  mobile = core.pdb$xyz,
  fixed.inds = atom.select(core.pdb, elety = unique(core.pdb$atom$elety))$xyz,
  mobile.inds = atom.select(core.pdb, elety = unique(core.pdb$atom$elety))$xyz
)


# Perform PCA
pca_unrst_core <- pca.pdbs(
  core.pdb, 
  use.svd = FALSE, 
  rm.gaps = TRUE, 
  fit = FALSE #pdb models are already aligned at the import stage
)

# Scree plot
scree_unrst_core <- data.frame(
  pc = 1:length(pca_unrst_core$L),
  L = pca_unrst_core$L
) %>% 
  mutate(
    var = L/sum(L) * 100,
    cum.var = cumsum(var)
  ) %>% 
  filter(pc <= 15) %>%
  select(-L) %>%
  mutate(
    label = ifelse(
      pc %in% 1:3 | 
        pc %in% seq(4, nrow(.), 3) | 
        pc == nrow(.), 
      signif(cum.var, 3), NA)
  ) %>% 
  ggplot(., aes(x = pc, y = var)) +
  geom_text_repel(aes(label = label), size = 5, fontface = 'bold', force = 100) +
  geom_line(linewidth = 0.75) +
  geom_point(size = 2) +
  custom.theme(scaling) +
  labs(
    x = 'Number of Principal Components',
    y = 'Proportion of variance (%)'
  ) 

# NbClust determination [LONG!]
pca.nb.unrst <- NbClust(
  data.frame(pca_unrst_core$z) %>% select(all_of(1:4)),
  distance = "euclidean",
  min.nc = 3, max.nc = 5,
  method = "kmeans"
)

*** : The Hubert index is a graphical method of determining the number of clusters.
                In the plot of Hubert index, we seek a significant knee that corresponds to a 
                significant increase of the value of the measure i.e the significant peak in Hubert
                index second differences plot.

*** : The D index is a graphical method of determining the number of clusters. 
                In the plot of D index, we seek a significant knee (the significant peak in Dindex
                second differences plot) that corresponds to a significant increase of the value of
                the measure. 
 
******************************************************************* 
* Among all indices:                                                
* 5 proposed 3 as the best number of clusters 
* 15 proposed 4 as the best number of clusters 
* 3 proposed 5 as the best number of clusters 

                   ***** Conclusion *****                            
 
* According to the majority rule, the best number of clusters is  4 
 
 
*******************************************************************

# count the number of times each number of clusters was selected as the best
tbl.clusters <- table(pca.nb.unrst$Best.partition)
frac.clust.1 <- round(tbl.clusters[1]/sum(tbl.clusters)*100, 1)
frac.clust.4 <- round(tbl.clusters[4]/sum(tbl.clusters)*100, 1)

# plot loadings
p.loads.unrst <- data.frame(
  u = c(pca_unrst_core$U[,1], pca_unrst_core$U[,2], pca_unrst_core$U[,3]),
  eleno = rep(core.pdb$atom$eleno, 3),
  PC = c(rep('PC1', length(pca_unrst_core$U[,1])),
         rep('PC2', length(pca_unrst_core$U[,2])),
         rep('PC3', length(pca_unrst_core$U[,3])))
) %>% 
  left_join(
    core.pdb$atom,
    by = c("eleno" = "eleno")
  ) %>% 
  group_by(resno, resid, PC, chain) %>%
  summarise(u = sum(abs(u))) %>% 
  mutate(resno = if_else(resid == 'LDP', 38, resno),
         resid = gsub('^D', '', resid),
         label = if_else(
           resid == 'LDP', 
           #make LDP bold and pink
           paste0('<span style="color:pink"><b>', resid, '</b></span>'),
           paste0('<b>', resid, resno, '</b>')
         )
  ) %>%
  filter(PC %in% paste0('PC', 1:2)) %>% 
  ggplot(aes(
    x = factor(
      resno,
      levels = unique(resno),
      labels = unique(label)
    ),
    y = u,
    fill = chain
  )
  ) +
  facet_wrap(~PC, ncol = 1) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = c('grey', 'pink')) +
  scale_y_continuous(expand = c(0, 0)) +
  custom.theme(1) +
  theme(axis.title.x = element_blank(),
        legend.position = 'none') +
  labs(y = 'sum of absolute loadings')

`summarise()` has grouped output by 'resno', 'resid', 'PC'. You can override
using the `.groups` argument.

#full plot
p.pca.unrst <- scree_unrst_core +
  pca.plotr.traj(pca_unrst_core, 'PC1', 'PC2', nb = pca.nb.unrst) +
  p.loads.unrst +
  plot_layout(
    design = '
              AB
              CC
              ') &
  plot_annotation(tag_levels = c('A', 'B', 'C', 'D'))

Warning in geom_point(data = scores %>% filter(State %in%
representative_states), : Ignoring unknown parameters: `linewidth`

#remake a core but with hydrogens for figure below
core.pdb.h <- pdb.unrst.h.long %>% 
  trim.pdb(
    .,
    atom.select(
      .,
      resno = c(18, 19, 26, 27, 28, 32, 33, 38)
    )
  )

core.pdb.h$xyz <- core.pdb.h$xyz[c(22927, 13823, 11460, 18656),]

# Re-align frames to the binding pocket only
core.pdb.h$xyz <- fit.xyz(
  fixed = core.pdb.h$xyz[1,],
  mobile = core.pdb.h$xyz,
  fixed.inds = atom.select(core.pdb.h, elety = unique(core.pdb.h$atom$elety))$xyz,
  mobile.inds = atom.select(core.pdb.h, elety = unique(core.pdb.h$atom$elety))$xyz
)

if (!file.exists('data/core.h.pdb')) {
  write.pdb(core.pdb.h, file = 'data/core.h.pdb')
}

In [71]:

p.pca.unrst

Warning: Removed 7 rows containing missing values (`geom_text_repel()`).

Figure 26: Principal component analysis on the coordinates of the binding pocket in the unrestrained microsecond trajectory. Scree plot (A) showing the contribution of each principal component on the total variance and the cumulative variance labelled on selected data points. (B) Score plots along the first two principal components, colored by kmeans clusters. (C) Sum of absolute loadings of residues for the first two first principal components

Four clusters were identified and the structures closest to their centroids are shown in Figure 27. The main difference if the configuration of the ammonium chain. The ammonium binds to either:

The O4’ and N3 of G33, which is stacked under the dopamine (top-left structure)
the O2 of T32, which is stacked above the dopamine (top-right structure)
the O4’ and N3 of G28 (bottom-left structure)
the O2 of T32 and N3 of G28, in a configuration midway between the two above (bottom-right structure)

The first configuration only accounts for 2.6% of the simulation. In the three latter case, T32 overlaps more with the catechol moiety, which increases \(\pi-\pi\) interactions. The fourth configuration is the more frequent (41.9% of the frames are in this cluster).

Neither of the four configurations follows exactly the restrained one. The ‘above’ configurations are closer to it, but the ammonium binds differently in each three case. It is folded back above the chain (akin to a scorpio) and does not bind to A27. However, beyond these four discrete examples, we evidence below that the ammonium is still able to bind to A27:N3 in around of third of the frames (Figure 28). For reference, it is almost always bound when restrained (Figure 28).

Figure 27: Binding modes in absence of NMR restraints. Four main binding modes have been evidenced by PCA, mostly varying by the configuration of the amino chain.

2.6 H-bond formation and stability

2.6.1 Complex

Besides those from the binding site, the formation of all H-bonds was monitored over the course of the simulation. Below, the list of monitored bonds is based on H-bonds detected in the minimized structures obtained by rMD; therefore Figure 28 ignores by design bonds formed exclusively in the uMD simulation and/or in absence of dopamine. The purpose of Figure 28 is therefore to establish the stability of H-bonds during the rMD simulation and assess whether they ‘survived’ in absence of restraints and dopamine.

The tight binding of dopamine to T18:O4 by the catechol moiety is confirmed. It is still present in absence of restraints but less frequently. The ammonium group almost always bind the sugar of G28 and the N3 of A27. Similarly, the frequency of H-bond formation is decreased in absence of restraints.

The less canonical binding to the sugar of the A27 is also observed, with low frequency but across the whole simulation, in the rMD. It is almost absent in absence of restraints, as seen above.

Conversely to the observations above, the H-bonding of dopamine to T19 is less frequent and tight in the rMD than in the uMD, as expected from bond length analysis above (Figure 23).

In [72]:

pair.rst.freq <- bond.freqr(
  dist = pair.dist.rst,
  angl = pair.angle.rst,
  dist.cutoff = 3.0,
  angl.cutoff = 140,
  dist.lax.cutoff = 3.5,
  angl.lax.cutoff = 120
)

`summarise()` has grouped output by 'frame', 't'. You can override using the
`.groups` argument.

pair.unrst.freq <- bond.freqr(
  dist = pair.dist.unrst,
  angl = pair.angle.unrst,
  dist.cutoff = 3.0,
  angl.cutoff = 140,
  dist.lax.cutoff = 3.5,
  angl.lax.cutoff = 120
)

`summarise()` has grouped output by 'frame', 't'. You can override using the
`.groups` argument.

pair.noldp.freq <- bond.freqr(
  dist = pair.dist.noldp,
  angl = pair.angle.noldp,
  dist.cutoff = 3.0,
  angl.cutoff = 140,
  dist.lax.cutoff = 3.5,
  angl.lax.cutoff = 120
)

`summarise()` has grouped output by 'frame', 't'. You can override using the
`.groups` argument.

###

In [73]:

all.exp <- list(
  bond.index = rbind(
    pair.rst.freq$bond.index %>% mutate(exp = 'rMD'),
    pair.unrst.freq$bond.index %>% mutate(exp = 'uMD'),
    pair.noldp.freq$bond.index %>% mutate(exp = 'no LDP')
  ),
  bond.freq = rbind(
    pair.rst.freq$bond.freq %>% mutate(exp = 'rMD'),
    pair.unrst.freq$bond.freq %>% mutate(exp = 'uMD'),
    pair.noldp.freq$bond.freq %>% mutate(exp = 'no LDP')
  )
)

hbond.cumul.plotr(
  all.exp, freq.cutoff = 0.10,
                  facet = TRUE, 
  main.exp = 'rMD', 
  exp.order = c('rMD', 'uMD', 'no LDP'),
  highlight = 'LDP', 
  highlight.region = c(11:16, 34:37)
  )

###
###
###

Figure 28: Formation of H-bonds over the course of the simulations with restraints (rMD), without restraints (uMD) and unrestrained with no dopamine (no LDP). The list of H-bonds was established on the minimized structures from the rMD simulations, with a maximum donor-acceptor distance of maximum 3.1 Å, keeping those formed in at least 20% of structures. H-bonds in the 5’- and 3’-end regions (residues 11:17 and 33:37() are colored in blue, and those involving the dopamine in pink. Top: each line is a frame (one frame every 0.04 ns); the darker a frame, the shorter and more linear the H-bond (the transparency follows linearly the bond angle/length ratio). Bottom: Cumulative frequencies. The frequency of bond formation is shown for relaxed (3.5 Å and 120°; light colors) and more stringent (3.0 Å and 140°; dark colors) cut-offs. Only inter-residue H-bonds with a ‘stringent’ frequency above 0.10 are shown.

2.6.2 (Pre-folded) free DNA

Generally speaking, the residues in the 5’ and 3’-end regions (in blue in Figure 28) are remarkably well conserved in absence of dopamine, when starting the simulation from the folded structure. This may indicate that the oligonucleotide can prefold into a short stem intermediate before the binding occurs between this stem and the very dynamic loops, after which the latter could finish folding.

PCA on the trajectory yielded three clusters (Figure 29), whose centroid coordinates are shown in Figure 30. The terminus region can be nicely aligned across clusters, but also against the complex, further demonstrating that the stem is very stable during the simulation, and retains the complex structure.

The loop region also maintains the same local folding in all clusters (it can be locally aligned), however its orientation relative to the terminus stem changes dramatically. It can be orthogonal (cluster 1; Figure 30; left), yielding a linear configuration (‘open’), or parallel, forming a more globular structure (‘closed’), closer to that of the complex (cluster 3; Figure 30; right). The second cluster contain intermediate conformations between these two extrema (cluster 2; Figure 30; center).

In [74]:

#PCA
pca_noldp <- pca.pdbs(
  pdb.noldp.long, 
  use.svd = FALSE, 
  rm.gaps = TRUE, 
  fit = FALSE #pdb models are already aligned at the import stage
)

scree <- data.frame(
  pc = 1:length(pca_noldp$L),
  L = pca_noldp$L
) %>% 
  mutate(
    var = L/sum(L) * 100,
    cum.var = cumsum(var)
  ) %>% 
  filter(pc <= 15) %>%
  select(-L) %>%
  mutate(
    label = ifelse(
      pc %in% 1:3 | 
        pc %in% seq(4, nrow(.), 3) | 
        pc == nrow(.), 
      signif(cum.var, 3), NA)
  ) %>% 
  ggplot(., aes(x = pc, y = var)) +
  geom_text_repel(aes(label = label), size = 5, fontface = 'bold', force = 100) +
  geom_line(linewidth = 0.75) +
  geom_point(size = 2) +
  custom.theme(scaling) +
  labs(
    x = 'Number of Principal Components',
    y = 'Proportion of variance (%)'
  )

In [75]:

pca.nb.noldp <- NbClust(
  data.frame(pca_noldp$z) %>% select(all_of(1:4)),
  distance = "euclidean",
  min.nc = 3, max.nc = 5,
  method = "kmeans"
)

*** : The Hubert index is a graphical method of determining the number of clusters.
                In the plot of Hubert index, we seek a significant knee that corresponds to a 
                significant increase of the value of the measure i.e the significant peak in Hubert
                index second differences plot.

*** : The D index is a graphical method of determining the number of clusters. 
                In the plot of D index, we seek a significant knee (the significant peak in Dindex
                second differences plot) that corresponds to a significant increase of the value of
                the measure. 
 
******************************************************************* 
* Among all indices:                                                
* 11 proposed 3 as the best number of clusters 
* 8 proposed 4 as the best number of clusters 
* 5 proposed 5 as the best number of clusters 

                   ***** Conclusion *****                            
 
* According to the majority rule, the best number of clusters is  3 
 
 
*******************************************************************

In [76]:

p.loads.noldp <- data.frame(
  u = c(pca_noldp$U[,1], pca_noldp$U[,2], pca_noldp$U[,3]),
  eleno = rep(pdb.noldp.long$atom$eleno, 3),
  PC = c(rep('PC1', length(pca_noldp$U[,1])),
         rep('PC2', length(pca_noldp$U[,2])),
         rep('PC3', length(pca_noldp$U[,3])))
) %>% 
  left_join(
    pdb.noldp.long$atom,
    by = c("eleno" = "eleno")
  ) %>% 
  group_by(resno, resid, PC, chain) %>%
  summarise(u = sum(abs(u))) %>% 
  mutate(
    resno = if_else(resid == 'LDP', 38, resno),
    resid = gsub('^D', '', resid),
    label = if_else(
      resid == 'LDP', 
      #make LDP bold and pink
      paste0('<span style="color:pink"><b>', resid, '</b></span>'),
      paste0('<b>', resid, resno, '</b>')
    )
  ) %>%
  filter(PC %in% paste0('PC', 1:2)) %>% 
  ggplot(aes(
    x = factor(
      resno,
      levels = unique(resno),
      labels = unique(label)
    ),
    y = u,
    fill = chain
  )
  ) +
  facet_wrap(~PC, ncol = 1) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = c('grey', 'pink')) +
  scale_y_continuous(expand = c(0, 0)) +
  custom.theme(1) +
  theme(axis.title.x = element_blank(),
        legend.position = 'none') +
  labs(y = 'sum of absolute loadings')

`summarise()` has grouped output by 'resno', 'resid', 'PC'. You can override
using the `.groups` argument.

p.pca.noldp <- scree +
  pca.plotr.traj(pca_noldp, 'PC1', 'PC2', nb = pca.nb.noldp) +
  p.loads.noldp +
  plot_layout(
    design = '
              AB
              CC
              ') &
  plot_annotation(tag_levels = c('A', 'B', 'C', 'D'))

Warning in geom_point(data = scores %>% filter(State %in%
representative_states), : Ignoring unknown parameters: `linewidth`

In [77]:

p.pca.noldp

Warning: Removed 7 rows containing missing values (`geom_text_repel()`).

Figure 29: Principal component analysis on the coordinates of the unbound oligonucleotide in the unrestrained microsecond trajectory. Scree plot (A) showing the contribution of each principal component on the total variance and the cumulative variance labelled on selected data points. (B) Score plots along the first two principal components, colored by kmeans clusters. (C) Sum of absolute loadings of residues for the first two first principal components

In [78]:

#create pdb object
pdb.noldp.pca <- pdb.noldp.h.long

#keep the coordinates of the centroids of the clusters
pdb.noldp.pca$xyz <- pdb.noldp.pca$xyz[c(4453, 14840, 20352),]

#align on the stem
pdb.noldp.pca$xyz <- fit.xyz(
  fixed = pdb.noldp.pca$xyz[1,],
  mobile = pdb.noldp.pca$xyz,
  fixed.inds = atom.select(
    pdb.noldp.pca, 
    resno = c(11:16, 34:36)
  )$xyz,
  mobile.inds = atom.select(
    pdb.noldp.pca,
    resno = c(11:16, 34:36)
  )$xyz
)

#write pdb file
if (!file.exists('data/pdb.noldp.pca.pdb')) {
  write.pdb(pdb.noldp.pca, file = 'data/pdb.noldp.pca.pdb')
}

Figure 30: Structures of the centroids of each cluster (1 to 3 from left to right) superimposed with the first minimized structure of the complex (grey).

In [79]:

tbl.clusters.noldp <- table(pca.nb.noldp$Best.partition)
frac.clust.no.ldp.1 <- round(tbl.clusters.noldp[1]/sum(tbl.clusters.noldp)*100, 1)
frac.clust.no.ldp.2 <- round(tbl.clusters.noldp[2]/sum(tbl.clusters.noldp)*100, 1)
frac.clust.no.ldp.3 <- round(tbl.clusters.noldp[3]/sum(tbl.clusters.noldp)*100, 1)

open.times <- tibble(
  frame = 1:length(pca.nb.noldp$Best.partition),
  cluster = pca.nb.noldp$Best.partition
) %>% 
  #create a group column that increments every time the cluster changes
  mutate(
    group = cumsum(c(1, diff(cluster) != 0)),
    t = frame * info.long$time.per.frame
  ) %>% 
  group_by(cluster, group) %>%
  #calculate the time spent in each cluster
  summarise(
    t = max(t) - min(t)
  ) %>%
  group_by(cluster) %>%
  summarise(
    max.t = signif(max(t), 2),
    mean.t = signif(mean(t), 2)
  )

`summarise()` has grouped output by 'cluster'. You can override using the
`.groups` argument.

The unbound aptamer spends most of its time in the ‘closed’ (43.8%) and ‘intermediate’ (43.5%) configurations, and only occasionally visits the ‘open’, linear conformation (12.7%) (Figure 31). The ‘closed’ conformation has a mean lifetime of 0.7 ns and a max residency time of 51 ns. The ‘open’ configuration has a lower mean lifetime 0.54 ns, and is never visited more than 24 ns. The so-called ‘intermediate’ conformation is aptly named as it has an even lower mean lifetime 0.5 ns, and visits never exceed 9.2 ns.

In [80]:

p.open <- (
  tibble(
    frame = 1:length(pca.nb.noldp$Best.partition),
    cluster = pca.nb.noldp$Best.partition
  ) %>% 
    mutate(
      t = frame * info.long$time.per.frame,
      cluster = case_when(
        cluster == 1 ~ 'open',
        cluster == 2 ~ 'intermediate',
        cluster == 3 ~ 'closed',
        TRUE ~ 'who knows'
      ),
      cluster = factor(
        cluster,
        levels = c('open', 'intermediate', 'closed')
      )
    ) %>% 
    ggplot(aes(t, cluster)) +
    geom_point(aes(color = cluster),
               show.legend = FALSE,
               alpha = 0.25) +
    geom_line(alpha = 0.1) +
    custom.theme(1) +
    scale_x_continuous(expand = c(0, 0)) +
    scale_y_discrete(limits = rev) +
    scale_color_d3(name = 'Cluster') +
    labs(
      x = '',
      y = 'Conformation'
    ) 
) + (
  atom.distr(
    pair = "C24:C1'-A35:C1'", 
    pdb.noldp.long, span = 0.001, 
    info = info.long) %>% 
    ggplot(aes(x = t, y = dist.loess)) +
    geom_hline(
      aes(color = factor(frame), yintercept = dist),
      data = atom.distr(
        pair = "C24:C1'-A35:C1'", 
        pdb.noldp.pca, span = 1, 
        info = info.long),
      linetype = 'dashed', linewidth = 0.75
    ) +
    geom_line(alpha = 0.75) +
    custom.theme(1) +
    scale_x_continuous(expand = c(0, 0)) +
    scale_color_d3(name = 'Cluster') +
    labs(y = "Distance [C24:C1'-A35:C1'] (Å)",
         x = 't (ns)') 
) +
  plot_layout(guides = 'collect', nrow = 2)

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element C1' to C

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element C1' to C

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element C1' to C

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element C1' to C

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: span too small.  fewer data values than degrees of freedom.

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at 0.99

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 2.01

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 0

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: There are other near singularities as well. 4.0401

###

In [81]:

p.open

Figure 31: Opening/closing of the unbound oligonucleotide during the microsecond simulation. Top: Cluster assignment for each frame. Bottom: Distance between the C1’ atoms of C24 (loop region) and A35 (stem region). The distance measured for each cluster centroid is shown as a dashed line.

2.7 Long aptamer

The long aptamer was studied from starting structure including (doplong8) or devoid of (doplong7) base pairing between A8 and T37. The minimized structures shown in Figure 11 were obtained by minimizing the last frame of 10-ns *restrained* simulations. The dynamics of these systems was explored with 1 microsecond unrestrained simulations.

2.7.1 RMSD

2.7.1.1 10 ns restrained simulations (for minimization)

The RMSD (vs. the first frame) is quickly plateauing when the A38•T37 is pre-formed (and restrained), but never does in absence of this base pair (Figure 32, Figure 33).

In [82]:

short.longdop.traj <- list('doplong7' = doplong7.traj, 'doplong8' = doplong8.traj)

lapply(
  seq_along(short.longdop.traj),
  function(x) {
    data.frame(
      rmsd = rmsd(
        a = short.longdop.traj[[x]]$xyz[1,],
        b = short.longdop.traj[[x]]$xyz[seq(1, nrow(short.longdop.traj[[x]]$xyz), by = 1), ],
        fit = FALSE
      ),
      frame = (seq(1, nrow(short.longdop.traj[[x]]$xyz), by = 1)) #divider set to 1 to keep all frames
    ) %>% 
      mutate(t = frame * info.short$time.per.frame,
             traj = x)
  }
) %>% 
  do.call(rbind, .) %>% 
  mutate(traj = case_when(
    traj == 1 ~ 'doplong7 (no 8:37)',
    traj == 2 ~ 'doplong8 (with 8:37)'
  )) %>% 
  ggplot(aes(x = t, y = rmsd, color = traj)) +
  geom_line() +
  custom.theme(1) +
  labs(
    x = 't (ns)',
    y = 'RMSD (&angst;)',
  ) +
  scale_color_manual(values = c('#e62e73', '#003e83')) 
###

Figure 32: RMSD on all residues (including 5’ and 3’-ends) without hydrogens across the 10 ns restrained simulation of the long aptamer, where the first frame is used as reference.

In [83]:

rmsd.pairwise.doplong7 <- rmsd(
  a = doplong7.traj$xyz[seq(1, nrow(doplong7.traj$xyz), by = nrow(doplong7.traj$xyz)/1000),], #keep one out of 5 frames
  fit = FALSE
) %>%
  lazy_dt() %>%
  mutate(var1 = 1:nrow(.), .before = 1) %>%
  pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>%
  mutate(var2 = as.numeric(gsub("V", "", var2))) %>%
  mutate(traj = 'doplong7') %>% 
  as.data.table()

Warning in rmsd(a = doplong7.traj$xyz[seq(1, nrow(doplong7.traj$xyz), by = nrow(doplong7.traj$xyz)/1000), : No indices provided, using the 1431 non NA positions

rmsd.pairwise.doplong8 <- rmsd(
  a = doplong8.traj$xyz[seq(1, nrow(doplong8.traj$xyz), by = nrow(doplong8.traj$xyz)/1000),], #keep one out of 5 frames
  fit = FALSE
) %>%
  lazy_dt() %>%
  mutate(var1 = 1:nrow(.), .before = 1) %>%
  pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>%
  mutate(var2 = as.numeric(gsub("V", "", var2))) %>%
  mutate(traj = 'doplong8') %>% 
  as.data.table()

Warning in rmsd(a = doplong8.traj$xyz[seq(1, nrow(doplong8.traj$xyz), by = nrow(doplong8.traj$xyz)/1000), : No indices provided, using the 1431 non NA positions

rbind(rmsd.pairwise.doplong7, rmsd.pairwise.doplong8) %>% 
arrow::write_feather(., "data/doplong.rmsd.pairwise.feather")
###

In [84]:

arrow::read_feather('data/doplong.rmsd.pairwise.feather') %>%
  mutate(t1 = var1 * info.short$time.per.frame * nrow(doplong7.traj$xyz)/1000,
         t2 = var2 * info.short$time.per.frame * nrow(doplong7.traj$xyz)/1000) %>%
  ggplot(aes(x = t1, y = t2, fill = value)) +
  facet_wrap(~traj) +
  geom_raster() +
  scale_fill_viridis_c(name = 'RMSD (&angst;)') +
  scale_x_continuous(expand = c(0, 0), limits = c(0, NA), 
                     name = 't (ns)') +
  scale_y_continuous(expand = c(0, 0), limits = c(0, NA), 
                     name = 't (ns)') +
  ggthemes::theme_pander() +
  theme(
    axis.text.x = element_markdown(size = 12 * scaling),
    axis.text.y = element_markdown(size = 12 * scaling),
    axis.title.x = element_markdown(size = 14 * scaling, face = 'bold'),
    axis.title.y = element_markdown(size = 14 * scaling, face = 'bold'),
    axis.line = element_line(linewidth = 0.75 * scaling),
    axis.ticks = element_line(linewidth = 0.75 * scaling, color = 'black'),
    #legend.text = element_markdown(size = 12 * scaling),
    legend.title = element_markdown(size = 14 * scaling, face = 'bold'),
    strip.text.x = element_markdown(size = 14 * scaling, face = 'bold'),
    strip.text.y = element_markdown(size = 14 * scaling, face = 'bold'),
    strip.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    plot.title = element_markdown(size = 16 * scaling, face = 'bold'),
    plot.subtitle = element_markdown(size = 14 * scaling),
    plot.caption = element_markdown(hjust = 1, size = 10 * scaling)
  ) +
  theme(legend.position = 'bottom') +
  theme(plot.margin = margin(1, 1, 0, 0, 'lines'))
###

Figure 33: All-to-all RMSD (all residues and ligand atoms except H) for the 10 ns restrained simulation of the long aptamer.

2.7.1.2 Unrestrained microsecond simulations

In absence of restraints, doplong7 settles at RMSD values close to that of the final frame from the restrained 10 ns simulation (Figure 34). The pairwise data also reveals that an event clearly occurs after 56 ns (Figure 35), that is close to the start of the microsecond simulation but past the endpoint of the restrained 10 ns simulation. After this conformational change, the pairwise RMSD remains apparently stable, but other changes still occur (see below).

In presence of the pre-formed A38•T37 (doplong8), the simulation plateaus much faster (Figure 34)., as seen above for the retrained production run, but at about twice the RMSD. A very significant event is visible at 576 ns, resulting in a doubled RMSD vs. the first frame. The RMSD then varies much more significantly than before, with another significant change at 663 ns.

In [85]:

doplong7_longtraj.traj <- list('doplong7' = doplong7_longtraj, 'doplong8' = doplong8_longtraj)

lapply(
  seq_along(doplong7_longtraj.traj),
  function(x) {
    data.frame(
      rmsd = rmsd(
        a = doplong7_longtraj.traj[[x]]$xyz[1,],
        b = doplong7_longtraj.traj[[x]]$xyz[seq(1, nrow(doplong7_longtraj.traj[[x]]$xyz), by = 1), ],
        fit = FALSE
      ),
      frame = (seq(1, nrow(doplong7_longtraj.traj[[x]]$xyz), by = 1)) #divider set to 1 to keep all frames
    ) %>% 
      mutate(t = frame * info.long$time.per.frame,
             traj = x)
  }
) %>% 
  do.call(rbind, .) %>% 
  mutate(traj = case_when(
    traj == 1 ~ 'doplong7 (no 8:37)',
    traj == 2 ~ 'doplong8 (with 8:37)'
  )) %>% 
  ggplot(aes(x = t, y = rmsd, color = traj)) +
  geom_line() +
  custom.theme(1) +
  labs(
    x = 't (ns)',
    y = 'RMSD (&angst;)',
  ) +
  scale_color_manual(values = c('doplong7 (no 8:37)' = '#e62e73', 'doplong8 (with 8:37)' = '#003e83')) +
  geom_segment(
    inherit.aes = FALSE,
    data = data.frame(
      traj = c(rep('doplong7 (no 8:37)', 3), rep('doplong8 (with 8:37)', 2)),
      x = c(56.5, 375, 553, 576, 663),
      y = c(13, 13.5, 14, 14, 15)
    ),
    aes(
    x = x, xend = x,
    y = y, yend = (y - 1),
    color = traj
    ),
    arrow = arrow(type = "closed", length = unit(0.02, "npc")),
    linewidth = 1
  )

Figure 34: RMSD on all residues (including 5’ and 3’-ends) without hydrogens across the microsecond unrestrained simulation of the long aptamer, where the first frame is used as reference. The arrows point to conformational changes that are not all evident from this metric alone.

In [86]:

rmsd.pairwise.doplong7_longtraj <- rmsd(
  a = doplong7_longtraj$xyz[seq(1, nrow(doplong7_longtraj$xyz), by = nrow(doplong7_longtraj$xyz)/1000),], #keep one out of 5 frames
  fit = FALSE
) %>%
  lazy_dt() %>%
  mutate(var1 = 1:nrow(.), .before = 1) %>%
  pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>%
  mutate(var2 = as.numeric(gsub("V", "", var2))) %>%
  mutate(traj = 'doplong7') %>% 
  as.data.table()

Warning in rmsd(a = doplong7_longtraj$xyz[seq(1, nrow(doplong7_longtraj$xyz), : No indices provided, using the 921 non NA positions

rmsd.pairwise.doplong8_longtraj <- rmsd(
  a = doplong8_longtraj$xyz[seq(1, nrow(doplong8_longtraj$xyz), by = nrow(doplong8_longtraj$xyz)/1000),], #keep one out of 5 frames
  fit = FALSE
) %>%
  lazy_dt() %>%
  mutate(var1 = 1:nrow(.), .before = 1) %>%
  pivot_longer(cols = -var1, names_to = "var2", values_to = "value") %>%
  mutate(var2 = as.numeric(gsub("V", "", var2))) %>%
  mutate(traj = 'doplong8') %>% 
  as.data.table()

Warning in rmsd(a = doplong8_longtraj$xyz[seq(1, nrow(doplong8_longtraj$xyz), : No indices provided, using the 921 non NA positions

rbind(rmsd.pairwise.doplong7_longtraj, rmsd.pairwise.doplong8_longtraj) %>% 
arrow::write_feather(., "data/doplong.longtraj.rmsd.pairwise.feather")
###

In [87]:

arrow::read_feather('data/doplong.longtraj.rmsd.pairwise.feather') %>%
  mutate(t1 = var1 * info.long$time.per.frame * nrow(doplong7_longtraj$xyz)/1000,
         t2 = var2 * info.long$time.per.frame * nrow(doplong7_longtraj$xyz)/1000) %>%
  ggplot(aes(x = t1, y = t2, fill = value)) +
  facet_wrap(~traj) +
  geom_raster() +
  scale_fill_viridis_c(name = 'RMSD (&angst;)') +
  scale_x_continuous(expand = c(0, 0), limits = c(0, NA), 
                     name = 't (ns)') +
  scale_y_continuous(expand = c(0, 0), limits = c(0, NA), 
                     name = 't (ns)') +
  ggthemes::theme_pander() +
  theme(
    axis.text.x = element_markdown(size = 12 * scaling),
    axis.text.y = element_markdown(size = 12 * scaling),
    axis.title.x = element_markdown(size = 14 * scaling, face = 'bold'),
    axis.title.y = element_markdown(size = 14 * scaling, face = 'bold'),
    axis.line = element_line(linewidth = 0.75 * scaling),
    axis.ticks = element_line(linewidth = 0.75 * scaling, color = 'black'),
    #legend.text = element_markdown(size = 12 * scaling),
    legend.title = element_markdown(size = 14 * scaling, face = 'bold'),
    strip.text.x = element_markdown(size = 14 * scaling, face = 'bold'),
    strip.text.y = element_markdown(size = 14 * scaling, face = 'bold'),
    strip.background = element_blank(),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    plot.title = element_markdown(size = 16 * scaling, face = 'bold'),
    plot.subtitle = element_markdown(size = 14 * scaling),
    plot.caption = element_markdown(hjust = 1, size = 10 * scaling)
  ) +
  theme(legend.position = 'bottom') +
  theme(plot.margin = margin(1, 1, 0, 0, 'lines')) +
  geom_segment(
    inherit.aes = FALSE,
    data = data.frame(
      t1 = c(56.5, 375, 553, 576, 663), 
      t2 = c(56.5, 375, 553, 576, 663), 
      traj = c(rep('doplong7', 3), rep('doplong8', 2))),
    aes(x = (t1 + 50), y = (t2 + 50), xend = t1, yend = t2),
    arrow = arrow(type = "closed", length = unit(0.02, "npc")),
    linewidth = 1, color = 'white'
  )
###

Figure 35: All-to-all RMSD (all residues and ligand atoms except H) for the unrestrained microsecond simulation. The white arrows point to significant events. The arrows point to conformational changes that are not all evident from this metric alone.

2.7.2 Base pairing

2.7.2.1 A38•T37 formation

Analysis of base pairing along the unrestrained simulation provides a straightforward explanation for the large conformational change of doplong7: the A8 and T37, initially far apart, rapidly base pair (canonical cW-W). This is accompanied by the formation of a base pair between G9 and the Hoogsteen face of G13. The latter is also rapidly formed in doplong8, wherein the A38•T37 bp is initially formed; however the base pairing is ‘inverted’:

doplong7: tW-M (VII in Saenger nomenclature), with G9•N2-G13:O6 and G9•N1-G13-N7 H-bonds
doplong8: a cW+M (VI in Saenger nomenclature), with G9•N2-G13-N7 and G9•N1-G13:O6 H-bonds

Note that this base pair is fairly unstable in doplong8, but A38•T37 remains formed nonetheless (reminder: there is no restraint here).

In [88]:

doplong7h_100frames <- doplong7h_longtraj
doplong7h_100frames$xyz <- doplong7h_100frames$xyz[seq(1, nrow(doplong7h_100frames$xyz), by = nrow(doplong7h_100frames$xyz)/100),]

pair.list.doplong7 <- contactr(
  doplong7h_100frames,
  dist.thresh = 3.1, freq.thresh = 0.2,
  lgd.resno = 45,
  lgd.donors = c('O2', 'O1', 'N1'),
  lgd.acceptors = NA,
  donor.atoms = c('N', 'O', 'S', 'F'),
  acceptor.atoms = c('O', 'N', 'S', 'F', 'Cl', 'Br', 'I')
)

[1] "Looking for contacts within 3.1 angstroms in 100 frames"

`summarise()` has grouped output by 'resid1', 'resno1', 'elety1', 'resid2',
'resno2'. You can override using the `.groups` argument.

pair.dist.doplong7 <- lapply(
  pair.list.doplong7$pair,
  atom.distr,
  pdb.input = doplong7h_longtraj,
  span = 0.01
) %>%
  do.call(rbind, .)

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O1 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O1 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O1 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

doplong8h_100frames <- doplong8h_longtraj
doplong8h_100frames$xyz <- doplong8h_100frames$xyz[seq(1, nrow(doplong8h_100frames$xyz), by = nrow(doplong8h_100frames$xyz)/100),]

pair.list.doplong8 <- contactr(
  doplong8h_100frames,
  dist.thresh = 3.1, freq.thresh = 0.2,
  lgd.resno = 45,
  lgd.donors = c('O2', 'O1', 'N1'),
  lgd.acceptors = NA,
  donor.atoms = c('N', 'O', 'S', 'F'),
  acceptor.atoms = c('O', 'N', 'S', 'F', 'Cl', 'Br', 'I')
)

[1] "Looking for contacts within 3.1 angstroms in 100 frames"

`summarise()` has grouped output by 'resid1', 'resno1', 'elety1', 'resid2',
'resno2'. You can override using the `.groups` argument.

pair.dist.doplong8 <- lapply(
  pair.list.doplong8$pair,
  atom.distr,
  pdb.input = doplong8h_longtraj,
  span = 0.01
) %>%
  do.call(rbind, .)

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N7 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N6 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N4 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O6 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N2 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O1 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4 to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N3 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O4' to O

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element N1 to N

Warning in atom2ele.default(x, elety.custom, rescue, ...): 
    mapped element O2 to O

###

In [89]:

scaling <- 1

pair.dist.doplong <- rbind(
  pair.dist.doplong7 %>% mutate(traj = 'doplong7'),
  pair.dist.doplong8 %>% mutate(traj = 'doplong8')
) %>% 
  filter(
    (str_detect(pair, 'T37') & str_detect(pair, 'A8')) |
      (str_detect(pair, 'G9') & str_detect(pair, 'G13'))
    ) %>% 
  arrange(pair)

pair.levels <- unique(pair.dist.doplong$pair)

pair.dist.doplong %>% 
  ggplot(aes(t, dist.loess, color = factor(pair, levels = pair.levels))) +
  facet_wrap(~traj, ncol = 1, nrow = 2) +
  geom_hline(yintercept = 3.0, linetype = 'dashed') +
  geom_text_repel(
    data = . %>% filter(t == max(t)),
    aes(label = pair),
    xlim = c(1020, 1100),
    fontface = 'bold',
    show.legend = FALSE,
    size = 5*scaling
  ) +
  geom_line(show.legend = FALSE,
            linewidth = 0.75 * scaling) +
  scale_x_continuous(expand = c(0, 0)) +
  scale_y_continuous(limits = c(0, NA), expand = c(0,0)) +
  scale_color_d3() +
  labs(x = 'Time (ns)', y = 'Distance (Å)') +
  custom.theme(scaling) +
  theme(plot.margin = margin(1, 150, 1, 1)) +
  coord_cartesian(clip = 'off') +
  geom_segment(
    data = data.frame(traj = 'doplong7'),
    inherit.aes = FALSE,
    x = 56, xend = 56,
    y = 19, yend = 17,
    arrow = arrow(type = "closed", length = unit(0.02, "npc")),
    linewidth = 1, color = '#e62e73'
  )

Figure 36: Length of potential A38•T37, and G9•G13 H-bonds during the simulation. Note that the G9•G13 pairing differs. The pink arrow shows the onset of basepairing for doplong7.

2.7.2.2 Other events

The above does not explain what happens in the second part of doplong8’s simulation, nor is the only event occuring for doplong7.

doplong7: Two discrete events occur at around 375 and 553 ns
- First, two of the three H-bonds from C24•G29 bonds get disrupted
- Then the third gives up as well. The neighboring G25•G28 base pair is also disrupted at this last onset (Figure 40).
doplong8: Two discrete events as well, at around 576 and 663 ns.
- First occurs the formation of the consecutive A15•A35, G16•G34 and G17•G33 base pairs in lieu of G34•T11, which is the expected base pair from NMR data of the short aptamer.
- Second, the C24•G29 and G25•G28 base pairs are lost, similar to doplong7 (Figure 40).
- Note also the transient flipping out of G9 after these events. The G36 base pair remains in contact, although it now adopts a co-planar configuration.

All of these conformational changes occur within the region of the aptamer present in the smaller version, and are very likely to be the consequence of the absence of retraints in this microsecond simulation; since all lost base pairs are present in the restrained simulations.

In [90]:

rbind(
  pair.dist.doplong7 %>% mutate(traj = 'doplong7'),
  pair.dist.doplong8 %>% mutate(traj = 'doplong8')
) %>% 
  arrange(pair) %>% 
  filter(
    (str_detect(pair, 'A15') & str_detect(pair, 'A35')) |
      (str_detect(pair, 'A35') & str_detect(pair, 'A15') | str_detect(pair, 'T11')) |
      (str_detect(pair, 'C24') & str_detect(pair, 'G29')) |
      (str_detect(pair, 'G16') & str_detect(pair, 'G34')) |
      (str_detect(pair, 'G17') & str_detect(pair, 'G33') | str_detect(pair, 'G34')) |
      (str_detect(pair, 'G25') & str_detect(pair, 'G28')) |
      (str_detect(pair, 'G29') & str_detect(pair, 'C24') | str_detect(pair, 'C24')) |
      (str_detect(pair, 'G34') & str_detect(pair, 'T11')) |
      (str_detect(pair, 'G11')) |
      (str_detect(pair, 'A8')) |
      str_detect(pair, 'G9') |
      str_detect(pair, 'T18')  |
      str_detect(pair, 'G36')
  ) %>% 
  filter(!str_detect(pair, 'LDP')) %>%  
  ggplot(aes(t, dist.loess, color = traj)) +
  facet_wrap(~pair, ncol = 3) +
  geom_line(show.legend = TRUE,
            linewidth = 0.75 * scaling) +
  scale_x_continuous(expand = c(0, 0)) +
  scale_y_continuous(limits = c(0, NA), expand = c(0,0)) +
  scale_color_manual(
    name = 'trajectory',
    values = c('doplong7' = '#e62e73', 'doplong8' = '#003e83')
    ) +
  scale_linetype_manual(values = c(2:4), name = 'onset') +
  labs(x = 'Time (ns)', y = 'Distance (Å)') +
  custom.theme(scaling) +
  theme(plot.margin = margin(1, 150, 1, 1)) +
  coord_cartesian(clip = 'off') +
  geom_vline(
    data = data.frame(
      traj = c(rep('doplong7', 3), rep('doplong8', 2)),
      x = c(56.5, 375, 553, 576, 663),
      onset = c('1', '2', '3', '1', '2')
    ),
    aes(xintercept = x, color = traj, linetype = onset),
    show.legend = FALSE,
    linewidth = 0.75 * scaling
  ) +
  theme(legend.position = 'bottom') +
  guides(linetype = 'none')

Figure 37: H-bond lengths for selected nucleotides undergoing base pair formation/disruption during the unrestrained microsecond simulation. The vertical lines indicate the onset of formation/disruption and correspond to the arrows in the figures above.

2.7.3 PCA

Principal component analysis allows to extract coordinates characteristic of the conformers prior and after the conformational changes.

In [91]:

pca_doplong7_longtraj <- pca.pdbs(
  doplong7_longtraj,
  use.svd = FALSE,
  rm.gaps = TRUE,
  fit = FALSE #pdb models are already aligned at the import stage
)

In [92]:

pca_doplong8_longtraj <- pca.pdbs(
  doplong8_longtraj,
  use.svd = FALSE,
  rm.gaps = TRUE,
  fit = FALSE #pdb models are already aligned at the import stage
)

In [93]:

scree7 <- data.frame(
  pc = 1:length(pca_doplong7_longtraj$L),
  L = pca_doplong7_longtraj$L
) %>%
  mutate(
    var = L/sum(L) * 100,
    cum.var = cumsum(var)
  ) %>%
  filter(pc <= 15) %>%
  select(-L) %>%
  mutate(
    label = ifelse(
      pc %in% 1:3 |
        pc %in% seq(4, nrow(.), 3) |
        pc == nrow(.),
      signif(cum.var, 3), NA)
  ) %>%
  ggplot(., aes(x = pc, y = var)) +
  geom_text_repel(aes(label = label), size = 5, fontface = 'bold', force = 100) +
  geom_line(linewidth = 0.75) +
  geom_point(size = 2) +
  custom.theme(scaling) +
  labs(
    x = 'Number of Principal Components',
    y = 'Proportion of variance (%)'
  )

scree8 <- data.frame(
  pc = 1:length(pca_doplong8_longtraj$L),
  L = pca_doplong8_longtraj$L
) %>%
  mutate(
    var = L/sum(L) * 100,
    cum.var = cumsum(var)
  ) %>%
  filter(pc <= 15) %>%
  select(-L) %>%
  mutate(
    label = ifelse(
      pc %in% 1:3 |
        pc %in% seq(4, nrow(.), 3) |
        pc == nrow(.),
      signif(cum.var, 3), NA)
  ) %>%
  ggplot(aes(x = pc, y = var)) +
  geom_text_repel(aes(label = label), size = 5, fontface = 'bold', force = 100) +
  geom_line(linewidth = 0.75) +
  geom_point(size = 2) +
  custom.theme(scaling) +
  labs(
    x = 'Number of Principal Components',
    y = 'Proportion of variance (%)'
  )

In [94]:

pca.nb.doplong7_longtraj <- NbClust(
  data.frame(pca_doplong7_longtraj$z) %>% select(all_of(1:4)),
  distance = "euclidean",
  min.nc = 2, max.nc = 5,
  method = "kmeans"
)

*** : The Hubert index is a graphical method of determining the number of clusters.
                In the plot of Hubert index, we seek a significant knee that corresponds to a 
                significant increase of the value of the measure i.e the significant peak in Hubert
                index second differences plot.

*** : The D index is a graphical method of determining the number of clusters. 
                In the plot of D index, we seek a significant knee (the significant peak in Dindex
                second differences plot) that corresponds to a significant increase of the value of
                the measure. 
 
******************************************************************* 
* Among all indices:                                                
* 10 proposed 2 as the best number of clusters 
* 7 proposed 3 as the best number of clusters 
* 4 proposed 4 as the best number of clusters 
* 3 proposed 5 as the best number of clusters 

                   ***** Conclusion *****                            
 
* According to the majority rule, the best number of clusters is  2 
 
 
*******************************************************************

In [95]:

pca.nb.doplong8_longtraj <- NbClust(
  data.frame(pca_doplong8_longtraj$z) %>% select(all_of(1:4)),
  distance = "euclidean",
  min.nc = 2, max.nc = 5,
  method = "kmeans"
)

*** : The Hubert index is a graphical method of determining the number of clusters.
                In the plot of Hubert index, we seek a significant knee that corresponds to a 
                significant increase of the value of the measure i.e the significant peak in Hubert
                index second differences plot.

*** : The D index is a graphical method of determining the number of clusters. 
                In the plot of D index, we seek a significant knee (the significant peak in Dindex
                second differences plot) that corresponds to a significant increase of the value of
                the measure. 
 
******************************************************************* 
* Among all indices:                                                
* 12 proposed 2 as the best number of clusters 
* 7 proposed 3 as the best number of clusters 
* 2 proposed 4 as the best number of clusters 
* 3 proposed 5 as the best number of clusters 

                   ***** Conclusion *****                            
 
* According to the majority rule, the best number of clusters is  2 
 
 
*******************************************************************

In [96]:

p.loads.doplong7_longtraj <- data.frame(
  u = c(pca_doplong7_longtraj$U[,1], pca_doplong7_longtraj$U[,2], pca_doplong7_longtraj$U[,3]),
  eleno = rep(doplong7_longtraj$atom$eleno, 3),
  PC = c(rep('PC1', length(pca_doplong7_longtraj$U[,1])),
         rep('PC2', length(pca_doplong7_longtraj$U[,2])),
         rep('PC3', length(pca_doplong7_longtraj$U[,3])))
) %>%
  left_join(
    doplong7_longtraj$atom,
    by = c("eleno" = "eleno")
  ) %>%
  group_by(resno, resid, PC, chain) %>%
  summarise(u = sum(abs(u))) %>%
  mutate(
    resno = if_else(resid == 'LDP', 45, resno),
    resid = gsub('^D', '', resid),
    label = if_else(
      resid == 'LDP',
      #make LDP bold and pink
      paste0('<span style="color:pink"><b>', resid, '</b></span>'),
      paste0('<b>', resid, resno, '</b>')
    )
  ) %>%
  filter(PC %in% paste0('PC', 1:2)) %>%
  ggplot(aes(
    x = factor(
      resno,
      levels = unique(resno),
      labels = unique(label)
    ),
    y = u,
    fill = chain
  )
  ) +
  facet_wrap(~PC, ncol = 1) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = c('grey', 'pink')) +
  scale_y_continuous(expand = c(0, 0)) +
  custom.theme(0.75) +
  theme(axis.title.x = element_blank(),
        legend.position = 'none') +
  labs(y = 'sum of absolute loadings')

`summarise()` has grouped output by 'resno', 'resid', 'PC'. You can override
using the `.groups` argument.

p.pca.doplong7_longtraj <- scree7 +
  pca.plotr.traj(pca_doplong7_longtraj, 'PC1', 'PC2', nb = pca.nb.doplong7_longtraj) +
  p.loads.doplong7_longtraj +
  plot_layout(
    design = '
              AB
              CC
              ') &
  plot_annotation(tag_levels = c('A', 'B', 'C', 'D'))

Warning in geom_point(data = scores %>% filter(State %in%
representative_states), : Ignoring unknown parameters: `linewidth`

p.loads.doplong8_longtraj <- data.frame(
  u = c(pca_doplong8_longtraj$U[,1], pca_doplong8_longtraj$U[,2], pca_doplong8_longtraj$U[,3]),
  eleno = rep(doplong8_longtraj$atom$eleno, 3),
  PC = c(rep('PC1', length(pca_doplong8_longtraj$U[,1])),
         rep('PC2', length(pca_doplong8_longtraj$U[,2])),
         rep('PC3', length(pca_doplong8_longtraj$U[,3])))
) %>%
  left_join(
    doplong8_longtraj$atom,
    by = c("eleno" = "eleno")
  ) %>%
  group_by(resno, resid, PC, chain) %>%
  summarise(u = sum(abs(u))) %>%
  mutate(
    resno = if_else(resid == 'LDP', 45, resno),
    resid = gsub('^D', '', resid),
    label = if_else(
      resid == 'LDP',
      #make LDP bold and pink
      paste0('<span style="color:pink"><b>', resid, '</b></span>'),
      paste0('<b>', resid, resno, '</b>')
    )
  ) %>%
  filter(PC %in% paste0('PC', 1:2)) %>%
  ggplot(aes(
    x = factor(
      resno,
      levels = unique(resno),
      labels = unique(label)
    ),
    y = u,
    fill = chain
  )
  ) +
  facet_wrap(~PC, ncol = 1) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = c('grey', 'pink')) +
  scale_y_continuous(expand = c(0, 0)) +
  custom.theme(0.75) +
  theme(axis.title.x = element_blank(),
        legend.position = 'none') +
  labs(y = 'sum of absolute loadings')

`summarise()` has grouped output by 'resno', 'resid', 'PC'. You can override
using the `.groups` argument.

p.pca.doplong8_longtraj <- scree8 +
  pca.plotr.traj(pca_doplong8_longtraj, 'PC1', 'PC2', nb = pca.nb.doplong8_longtraj) +
  p.loads.doplong8_longtraj +
  plot_layout(
    design = '
              AB
              CC
              ') &
  plot_annotation(tag_levels = c('A', 'B', 'C', 'D'))

Warning in geom_point(data = scores %>% filter(State %in%
representative_states), : Ignoring unknown parameters: `linewidth`

In [97]:

p.pca.doplong7_longtraj

Warning: Removed 7 rows containing missing values (`geom_text_repel()`).

Figure 38: Principal component analysis on the coordinates of the long aptamer doplong7 (starting without A38•T37 bp) in the unrestrained microsecond trajectory. Scree plot (A) showing the contribution of each principal component on the total variance and the cumulative variance labelled on selected data points. (B) Score plots along the first two principal components, colored by kmeans clusters. (C) Sum of absolute loadings of residues for the first two first principal components

In [98]:

p.pca.doplong8_longtraj

Warning: Removed 7 rows containing missing values (`geom_text_repel()`).

Figure 39: Principal component analysis on the coordinates of the long aptamer doplong8 (starting with A38•T37 bp) in the unrestrained microsecond trajectory. Scree plot (A) showing the contribution of each principal component on the total variance and the cumulative variance labelled on selected data points. (B) Score plots along the first two principal components, colored by kmeans clusters. (C) Sum of absolute loadings of residues for the first two first principal components

In [99]:

#create pdb object
pca.doplong7_longtraj.pca <- doplong7_longtraj
pca.doplong8_longtraj.pca <- doplong8_longtraj

#keep the coordinates of the centroids of the clusters
pca.doplong7_longtraj.pca$xyz <- pca.doplong7_longtraj.pca$xyz[c(750, 24576),]
pca.doplong8_longtraj.pca$xyz <- pca.doplong8_longtraj.pca$xyz[c(2035, 21687),]

#align
pca.doplong7_longtraj.pca$xyz <- fit.xyz(
  fixed = pca.doplong7_longtraj.pca$xyz[1,],
  mobile = pca.doplong7_longtraj.pca$xyz,
  fixed.inds = atom.select(pca.doplong7_longtraj.pca, elety = unique(pca.doplong7_longtraj.pca$atom$elety))$xyz,
  mobile.inds = atom.select(pca.doplong7_longtraj.pca, elety = unique(pca.doplong7_longtraj.pca$atom$elety))$xyz
)
pca.doplong8_longtraj.pca$xyz <- fit.xyz(
  fixed = pca.doplong8_longtraj.pca$xyz[1,],
  mobile = pca.doplong8_longtraj.pca$xyz,
  fixed.inds = atom.select(pca.doplong8_longtraj.pca, elety = unique(pca.doplong8_longtraj.pca$atom$elety))$xyz,
  mobile.inds = atom.select(pca.doplong8_longtraj.pca, elety = unique(pca.doplong8_longtraj.pca$atom$elety))$xyz
)

#write pdb file
if (!file.exists('data/pca.doplong7_longtraj.pca.pdb')) {
  write.pdb(pca.doplong7_longtraj.pca, file = 'data/pca.doplong7_longtraj.pca.pdb')
}
if (!file.exists('data/pca.doplong8_longtraj.pca.pdb')) {
  write.pdb(pca.doplong8_longtraj.pca, file = 'data/pca.doplong8_longtraj.pca.pdb')
}

**pcadoplong7:** The A38•T37 base pair is initially absent but canonically formed in cluster 2. However, the latter loses the C24•G29 G25•G28 base pairs. The two events are not directly linked as they occur around half a microsecond from one another.

Anosova, Irina, Ewa A. Kowal, Matthew R. Dunn, John C. Chaput, Wade D. Van Horn, and Martin Egli. 2015. “The Structural Diversity of Artificial Genetic Polymers.” Nucleic Acids Research 44 (3): 1007–21. https://doi.org/10.1093/nar/gkv1472.

B. J., Grant, Rodrigues A. P. C., ElSawy K. M., McCammon J. A., and Caves L. S. D. 2006. “Bio3D: An r Package for the Comparative Analysis of Protein Structures.” 22.

Berendsen, H. J. C., J. P. M. Postma, W. F. van Gunsteren, A. DiNola, and J. R. Haak. 1984. “Molecular Dynamics with Coupling to an External Bath.” The Journal of Chemical Physics 81 (8): 3684–90. https://doi.org/10.1063/1.448118.

Case, David A., Hasan Metin Aktulga, Kellon Belfon, David S. Cerutti, G. Andrés Cisneros, Vinícius Wilian D. Cruzeiro, Negin Forouzesh, et al. 2023. “AmberTools.” Journal of Chemical Information and Modeling 63 (20): 6183–91. https://doi.org/10.1021/acs.jcim.3c01153.

Charrad, Malika, Nadia Ghazzali, Véronique Boiteau, and Azam Niknafs. 2014. “NbClust: An r Package for Determining the Relevant Number of Clusters in a Data Set” 61. https://www.jstatsoft.org/v61/i06/.

Gesteland, R. F., T. Cech, and J. F. Atkins. 2006. The RNA World: The Nature of Modern RNA Suggests a Prebiotic RNA World. Cold Spring Harbor Monograph Series. Cold Spring Harbor Laboratory Press. https://books.google.fr/books?id=3mREVdXNzFcC.

Götz, Andreas W., Mark J. Williamson, Dong Xu, Duncan Poole, Scott Le Grand, and Ross C. Walker. 2012. “Routine Microsecond Molecular Dynamics Simulations with AMBER on GPUs. 1. Generalized Born.” Journal of Chemical Theory and Computation 8 (5): 1542–55. https://doi.org/10.1021/ct200909j.

Izadi, Saeed, Ramu Anandakrishnan, and Alexey V. Onufriev. 2014. “Building Water Models: A Different Approach.” The Journal of Physical Chemistry Letters 5 (21): 3863–71. https://doi.org/10.1021/jz501780a.

Laskowski, Roman A., and Mark B. Swindells. 2011. “LigPlot+: Multiple LigandProtein Interaction Diagrams for Drug Discovery.” Journal of Chemical Information and Modeling 51 (10): 2778–86. https://doi.org/10.1021/ci200227u.

Le Grand, Scott, Andreas W. Götz, and Ross C. Walker. 2013. “SPFP: Speed Without CompromiseA Mixed Precision Model for GPU Accelerated Molecular Dynamics Simulations.” Computer Physics Communications 184 (2): 374–80. https://doi.org/10.1016/j.cpc.2012.09.022.

Lemieux, S. 2002. “RNA Canonical and Non-Canonical Base Pairing Types: A Recognition Method and Complete Repertoire.” Nucleic Acids Research 30 (19): 4250–63. https://doi.org/10.1093/nar/gkf540.

LEONTIS, NEOCLES B., and ERIC WESTHOF. 2001. “Geometric Nomenclature and Classification of RNA Base Pairs.” RNA 7 (4): 499–512. https://doi.org/10.1017/s1355838201002515.

Li, Zhen, Lin Frank Song, Pengfei Li, and Kenneth M. Merz. 2020. “Systematic Parametrization of Divalent Metal Ions for the OPC3, OPC, TIP3P-FB, and TIP4P-FB Water Models.” Journal of Chemical Theory and Computation 16 (7): 4429–42. https://doi.org/10.1021/acs.jctc.0c00194.

Love, Olivia, Rodrigo Galindo-Murillo, Marie Zgarbová, Jiří Šponer, Petr Jurečka, and Thomas E. Cheatham. 2023. “Assessing the Current State of Amber Force Field Modifications for DNA─2023 Edition.” Journal of Chemical Theory and Computation 19 (13): 4299–4307. https://doi.org/10.1021/acs.jctc.3c00233.

Lu, Xiang-Jun. 2020. “DSSR-Enabled Innovative Schematics of 3D Nucleic Acid Structures with PyMOL.” Nucleic Acids Research, May. https://doi.org/10.1093/nar/gkaa426.

Lu, Xiang-Jun, Harmen J. Bussemaker, and Wilma K. Olson. 2015. “DSSR: An Integrated Software Tool for Dissecting the Spatial Structure of RNA.” Nucleic Acids Research, July, gkv716. https://doi.org/10.1093/nar/gkv716.

Machado, Matías R., and Sergio Pantano. 2020. “Split the Charge Difference in Two! A Rule of Thumb for Adding Proper Amounts of Ions in MD Simulations.” Journal of Chemical Theory and Computation 16 (3): 1367–72. https://doi.org/10.1021/acs.jctc.9b00953.

R Core Team. 2023. R: A Language and Environment for Statistical Computing. Vienna, Austria: R Foundation for Statistical Computing. https://www.R-project.org/.

Richardson, Neal, Ian Cook, Nic Crane, Dewey Dunnington, Romain François, Jonathan Keane, Dragoș Moldovan-Grünfeld, Jeroen Ooms, Jacob Wujciak-Jens, and Apache Arrow. 2024. Arrow: Integration to ’Apache’ ’Arrow’. https://github.com/apache/arrow/.

Romanowska, Julia, Krzysztof S. Nowiński, and Joanna Trylska. 2012. “Determining Geometrically Stable Domains in Molecular Conformation Sets.” Journal of Chemical Theory and Computation 8 (8): 2588–99. https://doi.org/10.1021/ct300206j.

Saenger, Wolfram. 1984. Principles of Nucleic Acid Structure. Springer New York. https://doi.org/10.1007/978-1-4612-5190-3.

Salomon-Ferrer, Romelia, Andreas W. Götz, Duncan Poole, Scott Le Grand, and Ross C. Walker. 2013. “Routine Microsecond Molecular Dynamics Simulations with AMBER on GPUs. 2. Explicit Solvent Particle Mesh Ewald.” Journal of Chemical Theory and Computation 9 (9): 3878–88. https://doi.org/10.1021/ct400314y.

Schmit, Jeremy D., Nilusha L. Kariyawasam, Vince Needham, and Paul E. Smith. 2018. “SLTCAP: A Simple Method for Calculating the Number of Ions Needed for MD Simulation.” Journal of Chemical Theory and Computation 14 (4): 1823–27. https://doi.org/10.1021/acs.jctc.7b01254.

Schrödinger, LLC. 2021. The PyMOL Molecular Graphics System, Version 2.5. http://www.pymol.org/pymol.

Shen, X., B. Gu, S. A. Che, and F. S. Zhang. 2011. “Solvent Effects on the Conformation of DNA Dodecamer Segment: A Simulation Study.” The Journal of Chemical Physics 135 (3). https://doi.org/10.1063/1.3610549.

Sindhikara, Daniel J., Seonah Kim, Arthur F. Voter, and Adrian E. Roitberg. 2009. “Bad Seeds Sprout Perilous Dynamics: Stochastic Thermostat Induced Trajectory Synchronization in Biomolecules.” Journal of Chemical Theory and Computation 5 (6): 1624–31. https://doi.org/10.1021/ct800573m.

Wang, Andrew H. J., Giovanni Ughetto, Gary J. Quigley, and Alexander Rich. 1987. “Interactions Between an Anthracycline Antibiotic and DNA: Molecular Structure of Daunomycin Complexed to d(CpGpTpApCpG) at 1.2-.ANG. Resolution.” Biochemistry 26 (4): 1152–63. https://doi.org/10.1021/bi00378a025.

Wang, Junmei, Romain M. Wolf, James W. Caldwell, Peter A. Kollman, and David A. Case. 2004. “Development and Testing of a General Amber Force Field.” Journal of Computational Chemistry 25 (9): 1157–74. https://doi.org/10.1002/jcc.20035.

Wickham, Hadley. 2023. “Httr2: Perform HTTP Requests and Process the Responses.” https://CRAN.R-project.org/package=httr2.

Wickham, Hadley, Mara Averick, Jennifer Bryan, Winston Chang, Lucy D’Agostino McGowan, Romain François, Garrett Grolemund, et al. 2019. “Welcome to the Tidyverse” 4: 1686. https://doi.org/10.21105/joss.01686.

Zgarbová, Marie, Jiří Šponer, and Petr Jurečka. 2021. “Z-DNA as a Touchstone for Additive Empirical Force Fields and a Refinement of the Alpha/Gamma DNA Torsions for AMBER.” Journal of Chemical Theory and Computation 17 (10): 6292–6301. https://doi.org/10.1021/acs.jctc.1c00697.

Article Notebook