rst.error.batchr <- function(input.file, rst = 'data/unambig_8columns.tbl',
states = 20,
error.thresh = 0.1, occurence.thresh = 0.2,
sort = 'error',
offset = 0){
df <- read_table(
input.file,
col_names = FALSE,
skip = states*4 + 29,
comment = "#") %>%
mutate_all(~str_remove(., ";")) %>%
mutate_all(~str_remove(., ":")) %>%
mutate_all(~str_remove(., '>')) %>%
rename(
atom.1 = X1,
res.1.name = X2,
res.1 = X3,
atom.2 = X5,
res.2.name = X6,
res.2 = X7,
target = X9,
occurence = ncol(.),
max.error = ncol(.) - 1,
min.error = ncol(.) - 3,
std.error = ncol(.) - 4,
mean.error = ncol(.) - 6
) %>%
select(-X4, -X8, -X10,-(ncol(.) - 2), -(ncol(.) - 5)) %>%
mutate(
res.1 = as.numeric(res.1) + 10,
res.2 = as.numeric(res.2) + 10
)
if (nrow(df) == 0) {
stop("No violations found")
}
for (i in 1:states) {
# rename column in position 7+i by the column.name
df <- df %>%
rename_with(~paste0("state.", i), starts_with(paste0("X", 10 + i)))
}
df <- df %>%
mutate_at(vars(res.1, res.2, occurence, max.error, min.error,
std.error, mean.error, target), as.numeric) %>%
#all columns starting by state to numeric
mutate_at(vars(starts_with("state")), as.numeric) %>%
filter(
mean.error > error.thresh,
occurence > occurence.thresh * states
) %>%
mutate_at(vars(atom.1, atom.2), ~str_remove(., "\\*")) %>%
mutate(
res.1.name = case_when(
res.1.name == 'DT' | res.1.name == 'DT5' | res.1.name == 'DT3' ~ 'T',
res.1.name == 'DC' | res.1.name == 'DC5' | res.1.name == 'DC3' ~ 'C',
res.1.name == 'DG' | res.1.name == 'DG5' | res.1.name == 'DG3' ~ 'G',
res.1.name == 'DA' | res.1.name == 'DA5' | res.1.name == 'DA3' ~ 'A',
TRUE ~ res.1.name
),
res.2.name = case_when(
res.2.name == 'DT' | res.2.name == 'DT5' | res.2.name == 'DT3' ~ 'T',
res.2.name == 'DC' | res.2.name == 'DC5' | res.2.name == 'DC3' ~ 'C',
res.2.name == 'DG' | res.2.name == 'DG5' | res.2.name == 'DG3' ~ 'G',
res.2.name == 'DA' | res.2.name == 'DA5' | res.2.name == 'DA3' ~ 'A',
TRUE ~ res.2.name
)
) %>%
mutate(
# import these two case_when from R/super.mapr.R
map.atom.1 = case_when(
res.1.name == 'LDP' & atom.1 %in% c('HAL', 'HAM', 'HAN') ~ 'HM',
res.1.name == 'T' & atom.1 %in% c('H71', 'H72', 'H73') ~ 'M7',
res.1.name == 'G' & atom.1 %in% c('H21', 'H22') ~ 'HN2',
res.1.name == 'T' & atom.1 %in% c("H2''", "H2'") ~ "Q2'",
res.1.name == 'G' & atom.1 %in% c("H2''", "H2'") ~ "Q2'",
res.1.name == 'G' & atom.1 %in% c("H5''", "H5'") ~ "Q5'",
TRUE ~ atom.1
),
map.atom.2 = case_when(
res.2.name == 'LDP' & atom.2 %in% c('HAL', 'HAM', 'HAN') ~ 'HM',
res.2.name == 'T' & atom.2 %in% c('H71', 'H72', 'H73') ~ 'M7',
res.2.name == 'G' & atom.2 %in% c('H21', 'H22') ~ 'HN2',
res.2.name == 'T' & atom.2 %in% c("H2''", "H2'") ~ "Q2'",
res.2.name == 'G' & atom.2 %in% c("H2''", "H2'") ~ "Q2'",
res.2.name == 'G' & atom.2 %in% c("H5''", "H5'") ~ "Q5'",
TRUE ~ atom.2
)
)
if (nrow(df) == 0) {
stop(paste("No violations found for error threshold", error.thresh))
}
## Import and cleanup of unambiguous restraints (8 column file)
rst.df <- read_table(
rst,
col_names = FALSE
) %>%
rename(
map.atom.1 = X3,
res.1.name = X2,
res.1 = X1,
map.atom.2 = X6,
res.2.name = X5,
res.2 = X4,
lower.bound = X7,
upper.bound = X8
)
## Joining
df <- left_join(
df,
rst.df,
by = join_by(res.1, res.2, map.atom.1, map.atom.2, res.1.name, res.2.name)
)
if (sort == 'error') {
df <- df %>% arrange(desc(mean.error))
} else if (sort == 'residue') {
df <- df %>% arrange(res.1, res.2)
} else {
df <- df
}
df.summary <- df %>%
mutate(
atom.1 = if_else(
res.1.name != 'LDP',
paste0(res.1.name, res.1, '@', atom.1),
paste0(res.1.name, '@', map.atom.1)
),
atom.2 = if_else(
res.2.name != 'LDP',
paste0(res.2.name, res.2, '@', atom.2),
paste0(res.2.name, '@', map.atom.2)
)
) %>%
select(atom.1, atom.2, target, mean.error, std.error, occurence)
return(
list(
violations = df,
summary = df.summary
)
)
}Restraint errors
batch calculations for restraint errors
In [1]:
rstslt.violations <- rst.error.batchr(input.file = ‘data/nmr_viol_doprstslt.txt’, error.thresh = 0.15, occurence.thresh = 0) rstslt.violations$summary %>% filter(str_detect(atom.1, ‘LDP28’) | str_detect(atom.2, ‘LDP28’)) %>% nrow(.)