contactr <- function(pdb, dist.thresh = 3.5, freq.thresh = 0.1,
lgd.resno = 38,
lgd.donors = c('O2', 'O1', 'N1'),
lgd.acceptors = NA,
donor.atoms = c('N', 'O', 'S', 'F'),
acceptor.atoms = c('O', 'N', 'S', 'F', 'Cl', 'Br', 'I')) {
donor_atoms <- donor.atoms
acceptor_atoms <- acceptor.atoms
donors <- list(
Gua = c('N2', 'N1'),
Ade = c('N6'),
Thy = c('N3'),
Cyt = c('N4')
)
acceptors <- list(
Gua = c('O6', 'N3', 'N7', "O4'"),
Ade = c('N1', 'N3', 'N7', "O4'"),
Thy = c('O4', 'O2', "O4'"),
Cyt = c('N3', 'O2', "O4'")
)
all.donor <- unique(c(donors$G, donors$A, donors$T, donors$C))
all.acceptor <- unique(c(acceptors$G, acceptors$A, acceptors$T, acceptors$C))
print(paste('Looking for contacts within', dist.thresh, 'angstroms in',
nrow(pdb$xyz), 'frames'))
lapply(
1:nrow(pdb$xyz),
function(i) {
dist <- dist.xyz(a = pdb$xyz[i,], all.pairs = TRUE) %>%
lazy_dt() %>%
mutate(atom1 = 1:nrow(.), .before = 1) %>%
pivot_longer(cols = -atom1, names_to = "atom2", values_to = "value") %>%
mutate(atom2 = as.numeric(atom2)) %>%
# only keeps half of the matrix
filter(atom1 < atom2) %>%
# remove distance above threshold
filter(value < dist.thresh) %>%
left_join(pdb$atom %>%
select(eleno, resno, resid, elety, elesy),
by = c("atom1" = "eleno")) %>%
rename(resno1 = resno, resid1 = resid, elety1 = elety, elesy1 = elesy) %>%
left_join(pdb$atom %>%
select(eleno, resno, resid, elety, elesy),
by = c("atom2" = "eleno")) %>%
rename(resno2 = resno, resid2 = resid, elety2 = elety, elesy2 = elesy) %>%
# removes intra-residue contacts
filter(resno1 != resno2) %>%
# selects atoms capable of forming hydrogen bonds
filter(elesy1 %in% donor_atoms & elesy2 %in% acceptor_atoms |
elesy1 %in% acceptor_atoms & elesy2 %in% donor_atoms) %>%
#remove 'D' from residue name
mutate(
resid1 = ifelse(substr(resid1, 1, 1) == "D", substr(resid1, 2, nchar(resid1)), resid1),
resid2 = ifelse(substr(resid2, 1, 1) == "D", substr(resid2, 2, nchar(resid2)), resid2)
) %>%
# only keep atoms in donor or acceptor groups depending of the residue
filter(
(resid1 == 'G' & elety1 %in% donors$Gua &
((resid2 == 'G' & elety2 %in% acceptors$Gua) |
(resid2 == 'C' & elety2 %in% acceptors$Cyt) |
(resid2 == 'A' & elety2 %in% acceptors$Ade) |
(resid2 == 'T' & elety2 %in% acceptors$Thy)
)) |
(resid2 == 'G' & elety2 %in% donors$Gua &
((resid1 == 'G' & elety1 %in% acceptors$Gua) |
(resid1 == 'C' & elety1 %in% acceptors$Cyt) |
(resid1 == 'A' & elety1 %in% acceptors$Ade) |
(resid1 == 'T' & elety1 %in% acceptors$Thy)
)) |
(resid1 == 'C' & elety1 %in% donors$Cyt &
((resid2 == 'G' & elety2 %in% acceptors$Gua) |
(resid2 == 'C' & elety2 %in% acceptors$Cyt) |
(resid2 == 'A' & elety2 %in% acceptors$Ade) |
(resid2 == 'T' & elety2 %in% acceptors$Thy)
)) |
(resid2 == 'C' & elety2 %in% donors$Cyt &
((resid1 == 'G' & elety1 %in% acceptors$Gua) |
(resid1 == 'C' & elety1 %in% acceptors$Cyt) |
(resid1 == 'A' & elety1 %in% acceptors$Ade) |
(resid1 == 'T' & elety1 %in% acceptors$Thy)
)) |
(resid1 == 'A' & elety1 %in% donors$Ade &
((resid2 == 'G' & elety2 %in% acceptors$Gua) |
(resid2 == 'C' & elety2 %in% acceptors$Cyt) |
(resid2 == 'A' & elety2 %in% acceptors$Ade) |
(resid2 == 'T' & elety2 %in% acceptors$Thy)
)) |
(resid2 == 'A' & elety2 %in% donors$Ade &
((resid1 == 'G' & elety1 %in% acceptors$Gua) |
(resid1 == 'C' & elety1 %in% acceptors$Cyt) |
(resid1 == 'A' & elety1 %in% acceptors$Ade) |
(resid1 == 'T' & elety1 %in% acceptors$Thy)
)) |
(resid1 == 'T' & elety1 %in% donors$Thy &
((resid2 == 'G' & elety2 %in% acceptors$Gua) |
(resid2 == 'C' & elety2 %in% acceptors$Cyt) |
(resid2 == 'A' & elety2 %in% acceptors$Ade) |
(resid2 == 'T' & elety2 %in% acceptors$Thy)
)) |
(resid2 == 'T' & elety2 %in% donors$Thy &
((resid1 == 'G' & elety1 %in% acceptors$Gua) |
(resid1 == 'C' & elety1 %in% acceptors$Cyt) |
(resid1 == 'A' & elety1 %in% acceptors$Ade) |
(resid1 == 'T' & elety1 %in% acceptors$Thy)
)) |
!(resid1 %in% c('G', 'A', 'T', 'C')) |
!(resid2 %in% c('G', 'A', 'T', 'C'))
) %>%
as.data.table()
return(dist)
}
) %>%
rbindlist(.) %>%
group_by(resid1, resno1, elety1,
resid2, resno2, elety2) %>%
# keep a single row per contact, outputs value and frequency within ensemble
summarise(
value = mean(value, na.rm = TRUE),
n = n(),
freq = n/nrow(pdb$xyz)
) %>%
# only keep contacts with a frequency above threshold
filter(freq >= freq.thresh) %>%
ungroup() %>%
# assign donors/acceptors
mutate(
donor.resid = case_when(
resid1 == 'G' & elety1 %in% donors$Gua ~ resid1,
resid1 == 'A' & elety1 %in% donors$Ade ~ resid1,
resid1 == 'T' & elety1 %in% donors$Thy ~ resid1,
resid1 == 'C' & elety1 %in% donors$Cyt ~ resid1,
resno1 == lgd.resno & elety1 %in% lgd.donors ~ resid1,
TRUE ~ resid2
),
donor.resno = case_when(
resid1 == 'G' & elety1 %in% donors$Gua ~ resno1,
resid1 == 'A' & elety1 %in% donors$Ade ~ resno1,
resid1 == 'T' & elety1 %in% donors$Thy ~ resno1,
resid1 == 'C' & elety1 %in% donors$Cyt ~ resno1,
resno1 == lgd.resno & elety1 %in% lgd.donors ~ resno1,
TRUE ~ resno2
),
donor.elety = case_when(
resid1 == 'G' & elety1 %in% donors$Gua ~ elety1,
resid1 == 'A' & elety1 %in% donors$Ade ~ elety1,
resid1 == 'T' & elety1 %in% donors$Thy ~ elety1,
resid1 == 'C' & elety1 %in% donors$Cyt ~ elety1,
resno1 == lgd.resno & elety1 %in% lgd.donors ~ elety1,
TRUE ~ elety2
),
acceptor.resid = case_when(
resid1 == 'G' & elety1 %in% acceptors$Gua ~ resid1,
resid1 == 'A' & elety1 %in% acceptors$Ade ~ resid1,
resid1 == 'T' & elety1 %in% acceptors$Thy ~ resid1,
resid1 == 'C' & elety1 %in% acceptors$Cyt ~ resid1,
resno1 == lgd.resno & elety1 %in% lgd.acceptors ~ resid1,
TRUE ~ resid2
),
acceptor.resno = case_when(
resid1 == 'G' & elety1 %in% acceptors$Gua ~ resno1,
resid1 == 'A' & elety1 %in% acceptors$Ade ~ resno1,
resid1 == 'T' & elety1 %in% acceptors$Thy ~ resno1,
resid1 == 'C' & elety1 %in% acceptors$Cyt ~ resno1,
resno1 == lgd.resno & elety1 %in% lgd.acceptors ~ resno1,
TRUE ~ resno2
),
acceptor.elety = case_when(
resid1 == 'G' & elety1 %in% acceptors$Gua ~ elety1,
resid1 == 'A' & elety1 %in% acceptors$Ade ~ elety1,
resid1 == 'T' & elety1 %in% acceptors$Thy ~ elety1,
resid1 == 'C' & elety1 %in% acceptors$Cyt ~ elety1,
resno1 == lgd.resno & elety1 %in% lgd.acceptors ~ elety1,
TRUE ~ elety2
)
) %>%
# remove useless columns
select(-c(resid1, resno1, elety1, resid2, resno2, elety2)) %>%
# generate pair names
mutate(
pair = paste0(donor.resid, donor.resno, ':', donor.elety, '-',
acceptor.resid, acceptor.resno, ':', acceptor.elety),
.before = 1
) %>%
# arrange pairs
arrange(donor.resno, acceptor.resno)
}Contact analyzer
Discovery
Function to discover contacts between donor and acceptor atoms Includes a ligand residue number and a list of corresponding donor and acceptor atoms
Use a pdb file with hydrogens May be lengthy for larger molecules/larger numbers of frame It is not necessary to include all frames in the discovery step Fewer steps and lower frequency reduces calculation time and false negative results
In [1]:
Atom-atom distance calculation and smoothing
In [2]:
atom.distr <- function(pair, pdb.input, span = 0.005, info = info.long) {
pair.a <- str_split(pair, '-')[[1]][1]
pair.b <- str_split(pair, '-')[[1]][2]
dist <- data.frame(
frame = 1:nrow(pdb.input$xyz),
dist = dist.xyz(
a = com(
pdb.input,
inds = atom.select(
pdb.input,
resno = as.numeric(gsub("\\D", "", str_split(pair.a, ':')[[1]][1])),
elety = str_split(pair.a, ':')[[1]][2]
)
),
b = com(
pdb.input,
inds = atom.select(
pdb.input,
resno = as.numeric(gsub("\\D", "", str_split(pair.b, ':')[[1]][1])),
elety = str_split(pair.b, ':')[[1]][2]
)
),
all.pairs = FALSE
)
) %>%
cbind(
.,
dist.loess = loess(
dist ~ frame,
data = .,
span = span
)$fitted
) %>%
mutate(pair = pair, .before = 1) %>%
mutate(
t = frame * info$time.per.frame,
pair = str_replace(pair, '-', '•')
)
return(dist)
}Bond angle calculation and smoothing
REQUIRES A PDB FILE WITH HYDROGENS!!! ALSO REQUIRES TO CUSTOMISE LIGAND HYDROGEN ATOMS SWITCH
In [3]:
bond.angler <- function(pair, pdb.input, info, span = 0.005, clean = TRUE, rectify = 50) {
# pair <- pair
# print(pair)
print(paste('Processing pair:', pair))
res.1 <- str_split(pair, '-')[[1]][1]
res.3 <- str_split(pair, '-')[[1]][2]
elety.1 <- str_split(res.1, ':')[[1]][2]
elety.3 <- str_split(res.3, ':')[[1]][2]
res.1 <- str_split(res.1, ':')[[1]][1]
res.3 <- str_split(res.3, ':')[[1]][1]
resno.1 <- as.numeric(gsub("\\D", "", res.1))
resno.2 <- resno.1
resno.3 <- as.numeric(gsub("\\D", "", res.3))
res.1 <- gsub("\\d", "", res.1)
res.3 <- gsub("\\d", "", res.3)
elety.2 <- switch(
res.1,
'G' = switch(
elety.1,
'N1' = c('H1'),
'N2' = c('H21', 'H22')
),
'A' = c('H61', 'H62'),
'C' = c('H41', 'H42'),
'T' = c('H3'),
'LDP' = switch(
elety.1,
'N1' = c('HN11', 'HN12', 'HN13'),
'O1' = c('HO1'),
'O2' = c('HO2')
)
)
print(paste('Residues:', res.1, res.1, res.3))
print(paste('Numbers:', resno.1, resno.2, resno.3))
print(paste('Elements:', elety.1, elety.2, elety.3))
# Function that calculates the bond angle between three atoms along all frames
base.function <- function(hydrogen) {
print(paste('Processing hydrogen:', hydrogen))
# Select the atoms
angle.inds <- combine.select(
atom.select(
pdb.input,
resno = resno.1,
elety = elety.1
),
atom.select(
pdb.input,
resno = resno.2,
elety = hydrogen
),
atom.select(
pdb.input,
resno = resno.3,
elety = elety.3
),
operator = 'OR'
)
# Calculate the bond angles
angles <- sapply(
1:nrow(pdb.input$xyz), function(x) {
angle.xyz(
pdb.input$xyz[x,][angle.inds$xyz]
)
}
) %>%
as_tibble() %>%
rename('angle' = 'value') %>%
# add the residue names
mutate(
frame = 1:nrow(.),
pair = pair,
h = hydrogen, #track info of which hydrogen binds, useful if several interconverting
angle = ifelse(angle < rectify, 180 - angle, angle)
) %>%
# add the loess fit
cbind(
.,
angle.loess = loess(
angle ~ frame,
data = .,
span = span
)$fitted
) %>%
mutate(t = frame * info$time.per.frame, .after = 1)
return(angles)
}
# Apply the function to the list of hydrogen from the pair
angle.list <- lapply(
elety.2,
base.function
) %>%
do.call(rbind, .)
print(paste('Length of angle list before cleaning:', nrow(angle.list)))
# If clean is TRUE, keep only the maximum angle for each frame
# and reapply the loess fit
if (clean) {
angle.list <- angle.list %>%
group_by(pair, t) %>%
filter(angle == max(angle)) %>%
ungroup() %>%
select(-angle.loess) %>%
cbind(
.,
angle.loess = loess(
angle ~ t,
data = .,
span = span
)$fitted
) %>%
mutate(pair = paste0(res.1, resno.1, ':', elety.1, '•', res.3, resno.3, ':', elety.3))
print(paste('Length of angle list after cleaning:', nrow(angle.list)))
}
print(paste('Finished angle list nrow', nrow(angle.list)))
return(angle.list)
}Bond formation frequency calculation and plotting
Bond frequency calculation
In [4]:
bond.freqr <- function(dist, angl, dist.cutoff = 3.0, angl.cutoff = 120, dist.lax.cutoff = 3.2, angl.lax.cutoff = 100){
bond.index <- angl %>%
left_join(
dist,
by = c('frame', 'pair', 't')
) %>%
mutate(
bond.type = case_when(
dist < dist.cutoff & angle > angl.cutoff ~ 'canonical',
dist < dist.lax.cutoff & angle > angl.lax.cutoff ~ 'non canonical',
TRUE ~ 'no bond'
),
bond.type = factor(bond.type, levels = c('no bond', 'non canonical', 'canonical')),
h.bond.index = angle/dist
) %>%
select(pair, frame, t, dist, angle, bond.type, h.bond.index, everything())
bond.freq <- bond.index %>%
# group_by(pair) %>%
group_by(frame, t, pair) %>%
# summarise the frequency of formation of canonical and non-canonical bonds
summarise(
canonical = sum(bond.type == 'canonical') ,
non.canonical = sum(bond.type == 'non canonical'),
no.bond = sum(bond.type == 'no bond')
) %>%
group_by(pair) %>%
summarise(
canonical = sum(canonical)/n(),
non.canonical = sum(non.canonical)/n(),
no.bond = sum(no.bond)/n()
) %>%
pivot_longer(-c(pair), names_to = 'bond.type', values_to = 'frequency') %>%
mutate(bond.type = factor(gsub('\\.', ' ', bond.type), levels = c('no bond', 'non canonical', 'canonical')))
return(list(bond.index = bond.index, bond.freq = bond.freq))
}Bond formation frequency plotting
In [5]:
hbond.cumul.plotr <- function(bond.freq = hbond.rstslt, freq.cutoff = 0.25,
facet = FALSE, main.exp = 'rMD', exp.order = c('rMD', 'uMD', 'no LDP'),
highlight = 'LDP', highlight.region = c(11:18, 33:37)){
df <- lapply(
list(
bond.freq$bond.index,
bond.freq$bond.freq
),
function(x){
x %>%
mutate(
pair = if_else(
str_detect(pair, highlight),
paste0("<b><span style='color: hotpink;'>", pair, "</span></b>"),
pair
),
# highlight = if_else(str_detect(pair, highlight), TRUE, FALSE),
highlight = case_when(
str_detect(pair, highlight) ~ '2.highlight',
as.numeric(str_extract(pair, '\\d+')) %in% highlight.region ~ '3.region',
TRUE ~ '1.no highlight'
),
pair = if_else(
as.numeric(str_extract(pair, '\\d+')) %in% highlight.region,
paste0("<b><span style='color: steelblue;'>", pair, "</span></b>"),
pair
)
)
}
)
if (isFALSE(facet)) {
pair.levels <- df[[2]] %>%
filter(bond.type == 'canonical') %>%
group_by(pair) %>%
summarise(freq = sum(frequency)) %>%
filter(freq >= freq.cutoff) %>%
arrange((freq)) %>%
pull(pair)
} else {
pair.levels <- df[[2]] %>%
filter(
exp == main.exp,
bond.type == 'canonical') %>%
group_by(pair) %>%
summarise(freq = sum(frequency)) %>%
filter(freq >= freq.cutoff) %>%
arrange((freq)) %>%
pull(pair)
}
if (isFALSE(facet)) {
(
df[[1]] %>%
filter(pair %in% pair.levels) %>%
ggplot(., aes(x = t, y = factor(pair, levels = pair.levels),
alpha = h.bond.index, fill = highlight)) +
geom_tile(color = NA) +
scale_fill_manual(values = c('grey20', '#ff748c', '#b0c4de')) +
scale_alpha_continuous(range = c(0,1), name = 'H-bond index', limits = c(30, NA)) +
scale_x_continuous(expand = c(0,0), name = 't (ns)') +
scale_y_discrete(expand = c(0,0), name = '') +
custom.theme(1)
) + (
df[[2]] %>%
filter(pair %in% pair.levels) %>%
ggplot(., aes(x = frequency, y = factor(pair, levels = pair.levels),
alpha = bond.type, fill = highlight)) +
geom_col(position = 'stack') +
scale_alpha_manual(values = c(0, 0.35, 1)) +
scale_fill_manual(values = c('grey40', '#ff748c', '#b0c4de')) +
scale_x_continuous(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0), name = '') +
custom.theme(1)
) +
plot_layout(guides = 'auto') &
theme(legend.position = 'none')
} else {
(
df[[1]] %>%
filter(pair %in% pair.levels) %>%
ggplot(., aes(x = t, y = factor(pair, levels = pair.levels),
alpha = h.bond.index, fill = highlight)) +
facet_wrap(~factor(exp, levels = exp.order), nrow = 1) +
geom_tile(color = NA) +
scale_fill_manual(values = c('grey20', '#ff748c', '#b0c4de')) +
scale_alpha_continuous(range = c(0,1), name = 'H-bond index', limits = c(30, NA)) +
scale_x_continuous(expand = c(0,0), name = 't (ns)') +
scale_y_discrete(expand = c(0,0), name = '', drop = FALSE) +
custom.theme(1)
) +
(
df[[2]] %>%
filter(pair %in% pair.levels) %>%
ggplot(., aes(x = frequency, y = factor(pair, levels = pair.levels),
alpha = bond.type, fill = highlight)) +
facet_wrap(~factor(exp, levels = exp.order), nrow = 1) +
geom_col(position = 'stack') +
scale_alpha_manual(values = c(0, 0.35, 1)) +
scale_fill_manual(values = c('grey40', '#ff748c', '#b0c4de')) +
scale_x_continuous(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0), name = '', drop = FALSE) +
custom.theme(1)
) +
plot_layout(guides = 'auto', ncol = 1) &
theme(legend.position = 'none')
}
}Distances to rings
Unrelated to contact finder as the latter only ouputs H-bond contacts. Here pairs to be analyzed are given as input with residue1-residue2 format.
In [6]:
ring.distr <- function(pair, pdb.input = pdb.long, span = 0.005) {
res.a <- str_split(pair, '-')[[1]][1]
res.b <- str_split(pair, '-')[[1]][2]
eletyr <- function(res) {
switch(
res,
'G' = c('N1', 'C2', 'N3', 'C4', 'C5', 'C6', 'N7', 'C8', 'N9'),
'A' = c('N1', 'C2', 'N3', 'C4', 'C5', 'C6', 'N7', 'C8', 'N9'),
'T' = c('N1', 'C2', 'N3', 'C4', 'C5', 'C6'),
'C' = c('N1', 'C2', 'N3', 'C4', 'C5', 'C6'),
'LDP' = c('C1', 'C2', 'C3', 'C4', 'C5', 'C6'),
stop(paste(res, 'not recognized'))
)
}
elety.a <- eletyr(gsub("\\d", "", res.a))
elety.b <- eletyr(gsub("\\d", "", res.b))
dist <- data.frame(
frame = 1:nrow(pdb.input$xyz),
dist = dist.xyz(
a = com(
pdb.input,
inds = atom.select(
pdb.input,
resno = as.numeric(gsub("\\D", "", res.a)),
elety = elety.a
)
),
b = com(
pdb.input,
inds = atom.select(
pdb.input,
resno = as.numeric(gsub("\\D", "", res.b)),
elety = elety.b
)
),
all.pairs = FALSE
)
) %>%
cbind(
.,
dist.loess = loess(
dist ~ frame,
data = .,
span = span
)$fitted
) %>%
mutate(pair = pair, .before = 1)
return(dist)
}