Newer
Older
"""
amp_stats.py
------------
Post-MD Analysis with ASE & AMP
Help & Usage: python amp_stats.py -h
- Max Winokan
[ Part of GromacsOnEUREKA
https://gitlab.eps.surrey.ac.uk/mw00368/GromacsOnEUREKA ]
"""
import argparse
import mout # https://github.com/mwinokan/MPyTools
import mcol # https://github.com/mwinokan/MPyTools
import mplot # https://github.com/mwinokan/MPyTools
import asemolplot as amp # https://github.com/mwinokan/AseMolPlot
from ase.io.trajectory import Trajectory
import numpy as np
##########################################################################
argparser = argparse.ArgumentParser(description='Various methods for analysing GROMACS output with ASE & AMP')
argparser.add_argument("-v","--verbosity",metavar="LEVEL",type=int,default=2,help="Verbosity. Default = 2")
argparser.add_argument("-convert", type=str,metavar='PDB',help="Convert a configuration/trajectory (.pdb/.gro from gmx trjconv) to an ASE Trajectory (.traj)")
argparser.add_argument("-dhb","--dna-hydrogen-bonds",type=str,metavar="PDB",help="Plot the hydrogen bond lengths between DNA chains")
argparser.add_argument("-rcom","--residue-centre-of-mass",type=str,metavar="PDB",help="Get, set, and shift residue CoM's")
argparser.add_argument("-ts","--time-step",type=float,metavar='STEP',help="Time step in picoseconds. Mostly for graph axes.")
argparser.add_argument("-ps","--print-script", type=mout.str2bool,nargs='?',const=True,default=False,help="Print the script name in console output.")
argparser.add_argument("-rpf","--respair-file",type=str,metavar="RPF",help="Specify a file containing pairs of hydrogen bonded residues.")
argparser.add_argument("-g","--gui",help="Open an ASE GUI.",action='store_true')
argparser.add_argument("-s","--show",help="Show plots.",action='store_true')
argparser.add_argument("-pa","--plot-alternate",help="Plot alternate h-bond. [-dhb]",action='store_true')
argparser.add_argument("-pt","--plot-total",help="Plot total h-bond. [-dhb]",action='store_true')
argparser.add_argument("-on","--optimise-naughty",help="Intelligently swap bonding with H2 groups. [-dhb]",action='store_true')
# argparser.add_argument("-sh41","--swap-H41",help="Swap H41 and H42",action='store_true')
argparser.add_argument("-nt","--no-tag",help="Don't attempt to tag atoms",action='store_true')
argparser.add_argument("-set",metavar="DAT",type=str,help="Set the CoM's of two residues to values in file.")
argparser.add_argument("-shift",type=float,help="Shift the CoM's of two residues apart by the given value.")
argparser.add_argument("-fmin","--fit-min",type=float,help="Begin all fitting at. [-dhb]")
argparser.add_argument("-fmax","--fit-max",type=float,help="End all fitting at. [-dhb]")
args = argparser.parse_args()
##########################################################################
printScript = args.print_script
# get the input filename
infile = args.convert
if args.output is None:
out_prefix = "amp_out"
mout.warningOut("Defaulted to output keyword 'amp_out'.",printScript=printScript,code=3)
else:
out_prefix = args.output
tagging = not args.no_tag
if infile.endswith(".pdb"):
amp.pdb2traj(infile,out_prefix+".traj",verbosity=2,tagging=tagging,printScript=printScript)
elif infile.endswith(".gro"):
amp.gro2traj(infile,out_prefix+".traj",verbosity=2,tagging=tagging,printScript=printScript)
elif infile.endswith(".xyz"):
amp.xyz2traj(infile,out_prefix+".traj",verbosity=2,printScript=printScript)
else:
mout.errorOut("Unrecognised file type: "+mcol.file+infile,fatal=True)
if args.gui:
import os
os.system("ase gui "+out_prefix+".traj")
if args.optimise_naughty is None: args.optimise_naughty = False
##### <ARGUMENTS> #####
print_match = False
print_bond_candidates = False
show_plots = args.show
plot_alternate = args.plot_alternate
optimise_naughty = args.optimise_naughty
fit_min = args.fit_min
fit_max = args.fit_max
fitOrder=1
##### </ARGUMENTS> #####
if args.output is not None:
datfile = args.output+".dat"
else:
mout.warningOut("Defaulted to datafile 'dhb.dat'.",printScript=printScript)
datfile = "dhb.dat"
output_dat = open(datfile,'w')
output_dat.write('# name, value, (error), unit \n')
output_dat.close()
output_dat = open(datfile,'a')
if infile.endswith(".pdb"):
file_is_gro = False
elif infile.endswith(".gro"):
file_is_gro = True
if args.respair_file is None:
mout.errorOut("RPF required for .gro files, specify with "+mcol.arg+"-rpf <FILE>",fatal=True)
else:
mout.errorOut("Unsupported file type.",fatal=True)
if args.output is None:
out_prefix = "amp_out"
mout.warningOut("Defaulted to output keyword 'amp_out'.",printScript=printScript,code=3)
else:
out_prefix = args.output
mout.headerOut("Looking for DNA hydrogen-bonds in"+mcol.file+" "+infile)
searching = True
# initialise lists:
dt_chainlist=[]
dt_reslist=[]
dt_O4_indexlist=[]
dt_H3_indexlist=[]
dt_N3_indexlist=[] # alternate
da_H62_indexlist=[] # naughty
da_N6_indexlist=[] # alternate
dc_H42_indexlist=[] # naughty
dc_O2_indexlist=[]
dc_N4_indexlist=[] # alternate
dg_H22_indexlist=[] # naughty
dg_N1_indexlist=[] # alternate
dg_N2_indexlist=[] # alternate
# tautomers
dt_is_tautomer=[] # tautomer
da_is_tautomer=[] # tautomer
taglist=[]
"""
Naughty list:
* DA/ADE9 H61 & H62
* DC/CTSN H41 & H42
* DG/GUA9 H21 & H22
"""
if not file_is_gro:
# Parse the PDB to get the properties of relevant atoms
with open(infile,"r") as input_pdb:
for line in input_pdb:
if searching:
if line.startswith("MODEL"):
searching = False
if line.startswith("ENDMDL"):
break
if line.startswith("TER"):
continue
else:
stripped_line = line.strip()
split_line = stripped_line.split()
if len(split_line) == 10:
# res_string = split_line[3][:-1]+split_line[4]
res_string = line[17:22].strip()+split_line[4]
chain_string = split_line[4]
elif len(split_line) == 11:
# res_string = split_line[3][:-1]+split_line[4]
res_string = line[17:22].strip()+split_line[4]
chain_string = split_line[3][-1]
elif len(split_line) == 12:
res_string = split_line[3]+split_line[5]
chain_string = split_line[4]
else:
mout.errorOut("Weird PDB! # Data Columns = "+str(len(split_line)),fatal=True)
atom_index = int(split_line[1])
if ("ADTH" in res_string or "THTH" in res_string) and args.respair_file is None:
mout.errorOut("Use a respair-file with tautomers!",fatal=True)
mout.warningOut("Atom indices don't start with 1!")
index_adj=0
if " DT" in stripped_line or "THMN" in stripped_line or "TCHG" in stripped_line or "TMUL" in stripped_line or "THCH" in stripped_line or "THTH" in stripped_line:
if " O4 " in stripped_line:
dt_reslist.append(res_string)
dt_chainlist.append(chain_string)
dt_O4_indexlist.append(atom_index)
if res_string.startswith("THTH"):
dt_is_tautomer.append(True)
else:
dt_is_tautomer.append(False)
if print_match: print(mcol.bold+stripped_line+mcol.clear)
elif " H3 " in stripped_line:
dt_H3_indexlist.append(atom_index)
if print_match: print(mcol.bold+stripped_line+mcol.clear)
elif " N3 " in stripped_line:
dt_N3_indexlist.append(atom_index)
if print_match: print(mcol.bold+stripped_line+mcol.clear)
elif print_non_match:
print(stripped_line)
elif "DA" in stripped_line or "ADE9" in stripped_line or "ACHG" in stripped_line or "AMUL" in stripped_line or "ADCH" in stripped_line or "ADTH" in stripped_line:
if " H61 " in stripped_line:
da_H61_indexlist.append(atom_index)
if print_match: print(mcol.error+stripped_line+mcol.clear)
elif " H62 " in stripped_line:
da_H62_indexlist.append(atom_index)
if print_match: print(mcol.error+stripped_line+mcol.clear)
da_reslist.append(res_string)
if res_string.startswith("ADTH"):
da_is_tautomer.append(True)
else:
da_is_tautomer.append(False)
da_chainlist.append(chain_string)
da_N1_indexlist.append(atom_index)
if print_match: print(mcol.error+stripped_line+mcol.clear)
elif " N6 " in stripped_line:
da_N6_indexlist.append(atom_index)
if print_match: print(mcol.error+stripped_line+mcol.clear)
elif print_non_match:
print(stripped_line)
elif "DC" in stripped_line or "CTSN" in stripped_line:
if " H41 " in stripped_line:
dc_reslist.append(res_string)
dc_chainlist.append(chain_string)
dc_H41_indexlist.append(atom_index)
if print_match: print(mcol.success+stripped_line+mcol.clear)
elif " H42 " in stripped_line:
dc_H42_indexlist.append(atom_index)
if print_match: print(mcol.success+stripped_line+mcol.clear)
elif " N3 " in stripped_line:
dc_N3_indexlist.append(atom_index)
if print_match: print(mcol.success+stripped_line+mcol.clear)
elif " O2 " in stripped_line:
dc_O2_indexlist.append(atom_index)
if print_match: print(mcol.success+stripped_line+mcol.clear)
elif " N4 " in stripped_line:
dc_N4_indexlist.append(atom_index)
if print_match: print(mcol.success+stripped_line+mcol.clear)
elif print_non_match:
print(stripped_line)
elif "DG" in stripped_line or "GUA9" in stripped_line:
if " O6 " in stripped_line:
dg_reslist.append(res_string)
dg_chainlist.append(chain_string)
dg_O6_indexlist.append(atom_index)
if print_match: print(mcol.file+mcol.bold+stripped_line+mcol.clear)
elif " H1 " in stripped_line:
dg_H1_indexlist.append(atom_index)
if print_match: print(mcol.file+mcol.bold+stripped_line+mcol.clear)
elif " H21 " in stripped_line:
dg_H21_indexlist.append(atom_index)
if print_match: print(mcol.file+mcol.bold+stripped_line+mcol.clear)
elif " H22 " in stripped_line:
dg_H22_indexlist.append(atom_index)
if print_match: print(mcol.file+mcol.bold+stripped_line+mcol.clear)
elif " N1 " in stripped_line:
dg_N1_indexlist.append(atom_index)
if print_match: print(mcol.file+mcol.bold+stripped_line+mcol.clear)
elif " N2 " in stripped_line:
dg_N2_indexlist.append(atom_index)
if print_match: print(mcol.file+mcol.bold+stripped_line+mcol.clear)
elif print_non_match:
print(stripped_line)
if args.no_tag is not None:
tag_string = split_line[2].replace("'","")
try:
tag = int(''.join(filter(lambda i: i.isdigit(), tag_string)))
except:
if tag_warning: mout.warningOut("Untaggable atom '"+tag_string+"'")
tag = 0
taglist.append(tag)
else:
# There is no chain information in a .gro so an RPF is also needed!
line_number = 0
max_line_number = 1000
# Parse the GRO to get the properties of relevant atoms
with open(infile,"r") as input_gro:
for line in input_gro:
if line_number == 1:
max_line_number = int(line.strip().split()[0]) + 2
if line_number == max_line_number:
break
if line_number > 1:
split_line = stripped_line.split()
tag_string = split_line[1].replace("'","")
atom_tag = int(''.join(filter(lambda i: i.isdigit(),tag_string)))
if "DT" in stripped_line or "THMN" in stripped_line:
if print_match: print(mcol.bold+stripped_line+mcol.clear)
elif " N3 " in stripped_line:
if print_match: print(mcol.bold+stripped_line+mcol.clear)
elif print_non_match:
print(stripped_line)
elif "DA" in stripped_line or "ADE9" in stripped_line:
elif " H62 " in stripped_line:
da_H62_indexlist.append(atom_index)
if print_match: print(mcol.error+stripped_line+mcol.clear)
if print_match: print(mcol.error+stripped_line+mcol.clear)
elif " N6 " in stripped_line:
if print_match: print(mcol.error+stripped_line+mcol.clear)
elif print_non_match:
print(stripped_line)
elif "DC" in stripped_line or "CTSN" in stripped_line:
if " H41 " in stripped_line:
elif " H42 " in stripped_line:
dc_H42_indexlist.append(atom_index)
if print_match: print(mcol.success+stripped_line+mcol.clear)
if print_match: print(mcol.success+stripped_line+mcol.clear)
elif " N4 " in stripped_line:
if print_match: print(mcol.success+stripped_line+mcol.clear)
elif print_non_match:
print(stripped_line)
elif "DG" in stripped_line or "GUA9" in stripped_line:
dg_H1_indexlist.append(atom_index)
if print_match: print(mcol.file+mcol.bold+stripped_line+mcol.clear+" "+str(atom_tag)+" "+str(atom_index))
if print_match: print(mcol.file+mcol.bold+stripped_line+mcol.clear)
elif " H22 " in stripped_line:
dg_H22_indexlist.append(atom_index)
if print_match: print(mcol.file+mcol.bold+stripped_line+mcol.clear)
elif " N1 " in stripped_line:
if print_match: print(mcol.file+mcol.bold+stripped_line+mcol.clear)
elif " N2 " in stripped_line:
if print_match: print(mcol.file+mcol.bold+stripped_line+mcol.clear)
elif print_non_match:
print(stripped_line)
mout.out("Found "+mcol.result+str(len(da_chainlist))+mcol.arg+" DA Residues")
mout.out("Found "+mcol.result+str(len(dt_chainlist))+mcol.arg+" DT Residues")
mout.out("Found "+mcol.result+str(len(dg_chainlist))+mcol.arg+" DG Residues")
mout.out("Found "+mcol.result+str(len(dc_chainlist))+mcol.arg+" DC Residues")
no_AT_pairs=False
no_GC_pairs=False
# Do some checks:
if len(da_chainlist) + len(dt_chainlist) + len(dg_chainlist) + len(dc_chainlist) < 2:
mout.errorOut("Less than two bases. Maybe there are no models defined in the PDB?",fatal=True)
if len(da_chainlist) == 0 ^ len(dt_chainlist) == 0:
mout.warningOut("Not enough bases for AT pairs")
no_AT_pairs=True
if len(dg_chainlist) == 0 ^ len(dc_chainlist) == 0:
mout.warningOut("Not enough bases for GC pairs")
no_GC_pairs=True
if no_AT_pairs and no_GC_pairs:
mout.errorOut("No matching base pairs",fatal=True)
atoms = amp.read(infile,index="0",printScript=False,verbosity=verbosity-1)
if args.no_tag is not None:
# set the tags
for index,tag in enumerate(taglist):
if atoms[index].tag == 0:
atoms[index].tag = tag
elif atoms[index].tag != tag:
atoms[index].tag = tag
if tag_warning: mout.warningOut("Existing tag overwritten, index="+str(index)+" old_tag="+str(atoms[index].tag)+" new_tag="+str(tag))
if args.gui:
amp.view(atoms)
if args.respair_file is not None:
# Use respairs defined in a file instead of searching.
# Open the file
respair_file_read = open(args.respair_file,'r').read()
### A-T ###
# initialise lists:
dt_O4_da_H61_respairs=[]
dt_H3_da_N1_respairs=[]
dt_O4_da_H61_bondpairs=[] # normal
dt_H3_da_N1_bondpairs=[] # normal
dt_O4_da_H62_bondpairs=[] # naughty
dt_O4_da_N6_bondpairs=[] # total
dt_N3_da_N1_bondpairs=[] # total
# Loop over all the DT residues
for dt_index,dt_residue in enumerate(dt_reslist):
# Find the number of definitions in the RPF
num_defs = respair_file_read.count(dt_residue+" ")
# Correct number of definitions:
if num_defs == 1:
mout.out("RPF definition found for "+mcol.varName+dt_residue)
# Loop over the RPF again
respair_file = open(args.respair_file)
for num,line in enumerate(respair_file,1):
# Find the correct line in the RPF:
if dt_residue+" " in line:
# Get the the other residue
residues = line.split()
for index,residue in enumerate(residues):
if residue != dt_residue:
paired_res = residue
dt_O4_da_H61_respairs.append([dt_residue,residue])
dt_H3_da_N1_respairs.append([dt_residue,residue])
if not residue.startswith("DA") and not residue.startswith("ADE9") and not residue.startswith("ADTH"):
mout.errorOut("Bad RPF definition in "+mcol.file+args.respair_file+mcol.error+". "+mcol.varName+dt_residue+mcol.error+" cannot be paired with "+mcol.varName+residue,fatal=True)
respair_file.close()
# Get the index of the bonded DA residue
for da_index,da_residue in enumerate(da_reslist):
if paired_res == da_residue:
if not dt_is_tautomer[dt_index]:
dt_O4_da_H61_bondpairs.append([dt_O4_indexlist[dt_index]+index_adj,da_H61_indexlist[da_index]+index_adj])
dt_O4_da_H62_bondpairs.append([dt_O4_indexlist[dt_index]+index_adj,da_H62_indexlist[da_index]+index_adj])
dt_H3_da_N1_bondpairs.append([dt_H3_indexlist[dt_index]+index_adj,da_N1_indexlist[da_index]+index_adj])
da_N6_da_H61_bondpairs.append([da_N6_indexlist[da_index]+index_adj,da_H61_indexlist[da_index]+index_adj]) # alternate
dt_H3_dt_N3_bondpairs.append([dt_H3_indexlist[dt_index]+index_adj,dt_N3_indexlist[dt_index]+index_adj]) # alternate
dt_O4_da_N6_bondpairs.append([dt_O4_indexlist[dt_index]+index_adj,da_N6_indexlist[da_index]+index_adj]) # total
dt_N3_da_N1_bondpairs.append([dt_N3_indexlist[dt_index]+index_adj,da_N1_indexlist[da_index]+index_adj]) # total
elif num_defs > 1:
mout.errorOut(str(num_defs)+" respair definitions for "+mcol.varName+dt_residue+mcol.error+" in "+mcol.file+args.respair_file,fatal=True)
elif num_defs < 1:
mout.errorOut("No respair definition for "+mcol.varName+dt_residue+mcol.error+" in "+mcol.file+args.respair_file,fatal=True)
### G-C ###
# initialise lists:
dc_H41_dg_O6_respairs=[]
dc_N3_dg_H1_respairs=[]
dc_O2_dg_H21_respairs=[]
dc_H41_dg_O6_bondpairs=[] # normal
dc_N3_dg_H1_bondpairs=[] # normal
dc_O2_dg_H21_bondpairs=[] # normal
dc_H42_dg_O6_bondpairs=[] # naughty
dc_O2_dg_H22_bondpairs=[] # naughty
dc_H41_dc_N4_bondpairs=[] # alternate
dg_N1_dg_H1_bondpairs=[] # alternate
dg_N2_dg_H21_bondpairs=[] # alternate
dc_N4_dg_O6_bondpairs=[] # total
dc_N3_dg_N1_bondpairs=[] # total
dc_O2_dg_N2_bondpairs=[] # total
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
# Loop over all the DC residues
for dc_index,dc_residue in enumerate(dc_reslist):
# Find the number of definitions in the RPF
num_defs = respair_file_read.count(dc_residue+" ")
# Correct number of definitions:
if num_defs == 1:
mout.out("RPF definition found for "+mcol.varName+dc_residue)
# Loop over the RPF again
respair_file = open(args.respair_file)
for num,line in enumerate(respair_file,1):
# Find the correct line in the RPF:
if dc_residue+" " in line:
# Get the the other residue
residues = line.split()
for residue in residues:
if residue != dc_residue:
paired_res = residue
dc_H41_dg_O6_respairs.append([dc_residue,residue])
dc_N3_dg_H1_respairs.append([dc_residue,residue])
dc_O2_dg_H21_respairs.append([dc_residue,residue])
if not residue.startswith("DG") and not residue.startswith("GUA9"):
mout.errorOut("Bad RPF definition in "+mcol.file+args.respair_file+mcol.error+". "+mcol.varName+dt_residue+mcol.error+" cannot be paired with "+mcol.varName+residue,fatal=True)
respair_file.close()
# Get the index of the bonded DG residue
for dg_index,dg_residue in enumerate(dg_reslist):
if paired_res == dg_residue:
dc_H41_dg_O6_bondpairs.append([dc_H41_indexlist[dc_index]+index_adj,dg_O6_indexlist[dg_index]+index_adj])
dc_H42_dg_O6_bondpairs.append([dc_H42_indexlist[dc_index]+index_adj,dg_O6_indexlist[dg_index]+index_adj])
dc_N3_dg_H1_bondpairs.append([dc_N3_indexlist[dc_index]+index_adj,dg_H1_indexlist[dg_index]+index_adj])
dc_O2_dg_H21_bondpairs.append([dc_O2_indexlist[dc_index]+index_adj,dg_H21_indexlist[dg_index]+index_adj])
dc_O2_dg_H22_bondpairs.append([dc_O2_indexlist[dc_index]+index_adj,dg_H22_indexlist[dg_index]+index_adj])
dc_H41_dc_N4_bondpairs.append([dc_H41_indexlist[dc_index]+index_adj,dc_N4_indexlist[dc_index]+index_adj]) # alternate
dg_N1_dg_H1_bondpairs.append([dg_N1_indexlist[dg_index]+index_adj,dg_H1_indexlist[dg_index]+index_adj]) # alternate
dg_N2_dg_H21_bondpairs.append([dg_N2_indexlist[dg_index]+index_adj,dg_H21_indexlist[dg_index]+index_adj]) # alternate
dc_N4_dg_O6_bondpairs.append([dc_N4_indexlist[dc_index]+index_adj,dg_O6_indexlist[dg_index]+index_adj]) # total
dc_N3_dg_N1_bondpairs.append([dc_N3_indexlist[dc_index]+index_adj,dg_N1_indexlist[dg_index]+index_adj]) # total
dc_O2_dg_N2_bondpairs.append([dc_O2_indexlist[dc_index]+index_adj,dg_N2_indexlist[dg_index]+index_adj]) # total
mout.errorOut(str(num_defs)+" respair definitions for "+mcol.varName+dc_residue+mcol.error+" in "+mcol.file+args.respair_file,fatal=True)
mout.warningOut("No respair definition for "+mcol.varName+dc_residue+mcol.warning+" in "+mcol.file+args.respair_file)
else:
##################### A-T ##########################################
##################### A-T ##########################################
##################### A-T ##########################################
mout.headerOut("Looking for A-T base pairs:")
dt_da_maxbondnum = min([len(da_chainlist),len(dt_chainlist)])
###### DT:O4 ... DA:H61 ######
# initialise lists
dt_O4_da_H61_bondpairs=[]
dt_O4_da_H61_chainpairs=[]
dt_O4_da_H61_respairs=[]
dt_O4_da_H61_distances=[]
da_N6_da_H61_bondpairs=[] # alternate
dt_O4_da_H62_bondpairs=[] # naughty
dt_O4_da_N6_bondpairs=[] # total
# loop over all DT residues
for dt_index,dt_chain in enumerate(dt_chainlist):
# get the O4 Atom object
dt_O4_atom = atoms[dt_O4_indexlist[dt_index]+index_adj]
# initialise arrays
closest_distance=None
# loop over all DA residues
for da_index,da_H61_index in enumerate(da_H61_indexlist):
# get the H61 Atom object
da_H61_atom = atoms[da_H61_indexlist[da_index]+index_adj]
# find and append the distance between the O4 and H61
this_dist=atoms.get_distance(dt_O4_atom.index,da_H61_atom.index)
# store information about leading candidate
if closest_distance is None or this_dist < closest_distance:
if dt_chainlist[dt_index] != da_chainlist[da_index]:
closest_distance = this_dist
closest_index = da_index
closest_atom_index = da_H61_index
# check if in same chain
if dt_chainlist[dt_index] == da_chainlist[closest_index]:
mout.warningOut("Residues "+dt_reslist[dt_index]+" and "+da_reslist[closest_index]+" are in the same chain!")
# print possible bond pairs
if print_bond_candidates:
print(dt_reslist[dt_index]," bonded to ",da_reslist[closest_index],"? dist(O4...H61) =",closest_distance)
# store the possible bond pairs
dt_O4_da_H61_bondpairs.append([dt_O4_atom.index,closest_atom_index+index_adj])
dt_O4_da_H61_chainpairs.append([dt_chainlist[dt_index],da_chainlist[closest_index]])
dt_O4_da_H61_respairs.append([dt_reslist[dt_index],da_reslist[closest_index]])
dt_O4_da_H61_distances.append(closest_distance)
da_N6_da_H61_bondpairs.append([da_N6_indexlist[closest_index]+index_adj,closest_atom_index+index_adj]) # alternate
dt_O4_da_H62_bondpairs.append([dt_O4_atom.index,da_H62_indexlist[closest_index]+index_adj]) # naughty
dt_O4_da_N6_bondpairs.append([dt_O4_atom.index,da_N6_indexlist[closest_index]+index_adj]) # total
mout.headerOut("Identified "+mcol.result+str(len(dt_O4_da_H61_bondpairs))+mcol.arg+" possible O4...H61 bonds")
if optimise_naughty:
mout.headerOut("Identified "+mcol.result+str(len(dt_O4_da_H62_bondpairs))+mcol.arg+" possible O4...H62 bonds"+mcol.warning+" *")
###### DT:H3 ... DA:N1 ######
# initialise lists
dt_H3_da_N1_bondpairs=[]
dt_H3_da_N1_chainpairs=[]
dt_H3_da_N1_respairs=[]
dt_H3_da_N1_distances=[]
dt_H3_dt_N3_bondpairs=[] # alternate
dt_N3_da_N1_bondpairs=[] # total
# loop over all DT residues
for dt_index,dt_chain in enumerate(dt_chainlist):
# get the H3 Atom object
dt_H3_atom = atoms[dt_H3_indexlist[dt_index]+index_adj]
# initialise arrays
closest_distance=None
# loop over all DA residues
for da_index,da_N1_index in enumerate(da_N1_indexlist):
# get the N1 Atom object
da_N1_atom = atoms[da_N1_indexlist[da_index]+index_adj]
# find and append the distance between the H3 and N1
this_dist=atoms.get_distance(dt_H3_atom.index,da_N1_atom.index)
# store information about leading candidate
if closest_distance is None or this_dist < closest_distance:
if dt_chainlist[dt_index] != da_chainlist[da_index]:
closest_distance = this_dist
closest_index = da_index
closest_atom_index = da_N1_index
# check if in same chain
if dt_chainlist[dt_index] == da_chainlist[closest_index]:
mout.warningOut("Residues "+dt_reslist[dt_index]+" and "+da_reslist[closest_index]+" are in the same chain!")
# print possible bond pairs
if print_bond_candidates:
print(dt_reslist[dt_index]," bonded to ",da_reslist[closest_index],"? dist(H3...N1) =",closest_distance)
# store the possible bond pairs
dt_H3_da_N1_bondpairs.append([dt_H3_atom.index,closest_atom_index+index_adj])
dt_H3_da_N1_chainpairs.append([dt_chainlist[dt_index],da_chainlist[closest_index]])
dt_H3_da_N1_respairs.append([dt_reslist[dt_index],da_reslist[closest_index]])
dt_H3_da_N1_distances.append(closest_distance)
dt_H3_dt_N3_bondpairs.append([dt_H3_atom.index,dt_N3_indexlist[dt_index]+index_adj]) # alternate
dt_N3_da_N1_bondpairs.append([dt_N3_indexlist[dt_index]+index_adj,closest_atom_index+index_adj]) # total
mout.headerOut("Identified "+mcol.result+str(len(dt_H3_da_N1_bondpairs))+mcol.arg+" possible H3...N1 bonds")
###### Check for problems
# i.e. when there are unequal numbers of bonds between A-T's
if len(dt_O4_da_H61_bondpairs) != len(dt_H3_da_N1_bondpairs):
mout.errorOut("Number of bondpairs not equal!",fatal=True)
# i.e. identify when a DNA base wants to bond to more than one complementary base
for index,respair in enumerate(dt_O4_da_H61_respairs):
# get the other residue pair
dt_H3_da_N1_respair = dt_H3_da_N1_respairs[index]
if respair != dt_H3_da_N1_respair:
mout.warningOut("Bad bond match!")
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
###### Remove longest bonds
# when there are too many base-pairs
if len(dt_O4_da_H61_bondpairs) > dt_da_maxbondnum:
# number of bonds to remove
bondtrimnum=-dt_da_maxbondnum+len(dt_O4_da_H61_bondpairs)
# write warning
mout.warningOut("Should be no more than "+str(dt_da_maxbondnum)+"! Will remove longest "+str(bondtrimnum)+" bonds.")
# initialise lists
combined_distances=[]
# sum distances for the two hydrogen bonds
for index,distance in enumerate(dt_O4_da_H61_distances):
combined_distances.append(distance+dt_H3_da_N1_distances[index])
# remove the longest bonds
for index in range(0,bondtrimnum):
# find longest bond
longest_distance=max(combined_distances)
longest_index=combined_distances.index(longest_distance)
# remove the longest bond
del combined_distances[longest_index]
del dt_O4_da_H61_bondpairs[longest_index]
del dt_O4_da_H61_chainpairs[longest_index]
del dt_O4_da_H61_respairs[longest_index]
del dt_O4_da_H61_distances[longest_index]
del dt_H3_da_N1_bondpairs[longest_index]
del dt_H3_da_N1_chainpairs[longest_index]
del dt_H3_da_N1_respairs[longest_index]
del dt_H3_da_N1_distances[longest_index]
del dt_H3_dt_N3_bondpairs[longest_index] # alternate
del da_N6_da_H61_bondpairs[longest_index] # alternate
del dt_O4_da_H62_bondpairs[longest_index] # naughty
del dt_O4_da_N6_bondpairs[longest_distance] # total
del dt_N3_da_N1_bondpairs[longest_distance] # total
##################### G-C ##########################################
##################### G-C ##########################################
##################### G-C ##########################################
mout.headerOut("Looking for G-C base pairs:")
dc_gc_maxbondnum = min([len(dc_chainlist),len(dg_chainlist)])
###### DC:H41 ... DG:O6 ######
# initialise lists
dc_H41_dg_O6_bondpairs=[]
dc_H41_dg_O6_chainpairs=[]
dc_H41_dg_O6_respairs=[]
dc_H41_dg_O6_distances=[]
dc_H41_dc_N4_bondpairs=[] # alternate
dc_H42_dg_O6_bondpairs=[] # naughty
dc_N4_dg_O6_bondpairs=[] # total
# loop over all DC residues
for dc_index,dc_chain in enumerate(dc_chainlist):
# get the O4 Atom object
dc_H41_atom = atoms[dc_H41_indexlist[dc_index]+index_adj]
# initialise arrays
closest_distance=None
# loop over all DG residues
for dg_index,dg_O6_index in enumerate(dg_O6_indexlist):
# get the H61 Atom object
dg_O6_atom = atoms[dg_O6_indexlist[dg_index]+index_adj]
# find the distance between the H41 and O6 atoms
this_dist=atoms.get_distance(dc_H41_atom.index,dg_O6_atom.index)
# store information about leading candidate
if closest_distance is None or this_dist < closest_distance:
if dc_chainlist[dc_index] != dg_chainlist[dg_index]:
closest_distance = this_dist
closest_index = dg_index
closest_atom_index = dg_O6_index
# check if in same chain
if dc_chainlist[dc_index] == dg_chainlist[closest_index]:
mout.warningOut("Residues "+dc_reslist[dc_index]+" and "+dg_reslist[closest_index]+" are in the same chain!")
# print possible bond pairs
if print_bond_candidates:
print(dc_reslist[dc_index]," bonded to ",dg_reslist[closest_index],"? dist(H41...O6) =",closest_distance)
# store the possible bond pairs
dc_H41_dg_O6_bondpairs.append([dc_H41_atom.index,closest_atom_index+index_adj])
dc_H41_dg_O6_chainpairs.append([dc_chainlist[dc_index],dg_chainlist[closest_index]])
dc_H41_dg_O6_respairs.append([dc_reslist[dc_index],dg_reslist[closest_index]])
dc_H41_dg_O6_distances.append(closest_distance)
dc_H41_dc_N4_bondpairs.append([dc_H41_atom.index,dc_N4_indexlist[dc_index]+index_adj]) # alternate
dc_H42_dg_O6_bondpairs.append([dc_H42_indexlist[dc_index]+index_adj,closest_atom_index+index_adj]) # naughty
dc_N4_dg_O6_bondpairs.append([dc_N4_indexlist[dc_index]+index_adj,closest_atom_index+index_adj]) # total
mout.headerOut("Identified "+mcol.result+str(len(dc_H41_dg_O6_bondpairs))+mcol.arg+" possible H41...O6 bonds")
if optimise_naughty:
mout.headerOut("Identified "+mcol.result+str(len(dc_H42_dg_O6_bondpairs))+mcol.arg+" possible H42...O6 bonds"+mcol.warning+" *")
###### DC:N3 ... DG:H1 ######
# initialise lists
dc_N3_dg_H1_bondpairs=[]
dc_N3_dg_H1_chainpairs=[]
dc_N3_dg_H1_respairs=[]
dc_N3_dg_H1_distances=[]
dg_N1_dg_H1_bondpairs=[] # alternate
dc_N3_dg_N1_bondpairs=[] # total
# loop over all DC residues
for dc_index,dc_chain in enumerate(dc_chainlist):
# get the N3 Atom object
dc_N3_atom = atoms[dc_N3_indexlist[dc_index]+index_adj]
# initialise arrays
closest_distance=None
# loop over all DG residues
for dg_index,dg_H1_index in enumerate(dg_H1_indexlist):
# get the H61 Atom object
dg_H1_atom = atoms[dg_H1_indexlist[dg_index]+index_adj]
# find the distance between the N3 and H1 atoms
this_dist=atoms.get_distance(dc_N3_atom.index,dg_H1_atom.index)
# store information about leading candidate
if closest_distance is None or this_dist < closest_distance:
if dc_chainlist[dc_index] != dg_chainlist[dg_index]:
closest_distance = this_dist
closest_index = dg_index
closest_atom_index = dg_H1_index
# check if in same chain
if dc_chainlist[dc_index] == dg_chainlist[closest_index]:
mout.warningOut("Residues "+dc_reslist[dc_index]+" and "+dg_reslist[closest_index]+" are in the same chain!")
# print possible bond pairs
if print_bond_candidates:
print(dc_reslist[dc_index]," bonded to ",dg_reslist[closest_index],"? dist(N3...H1) =",closest_distance)
# store the possible bond pairs
dc_N3_dg_H1_bondpairs.append([dc_N3_atom.index,closest_atom_index+index_adj])
dc_N3_dg_H1_chainpairs.append([dc_chainlist[dc_index],dg_chainlist[closest_index]])
dc_N3_dg_H1_respairs.append([dc_reslist[dc_index],dg_reslist[closest_index]])
dc_N3_dg_H1_distances.append(closest_distance)
dg_N1_dg_H1_bondpairs.append([dg_N1_indexlist[closest_index]+index_adj,closest_atom_index+index_adj]) # alternate
dc_N3_dg_N1_bondpairs.append([dc_N3_atom.index,dg_N1_indexlist[closest_index]+index_adj]) # total
mout.headerOut("Identified "+mcol.result+str(len(dc_N3_dg_H1_bondpairs))+mcol.arg+" possible N3...H1 bonds")
###### DC:O2 ... DG:H21 ######
# initialise lists
dc_O2_dg_H21_bondpairs=[]
dc_O2_dg_H21_chainpairs=[]
dc_O2_dg_H21_respairs=[]
dc_O2_dg_H21_distances=[]
dg_N2_dg_H21_bondpairs=[] # alternate
dc_O2_dg_H22_bondpairs=[] # naughty
dc_O2_dg_N2_bondpairs=[] # total
# loop over all DC residues
for dc_index,dc_chain in enumerate(dc_chainlist):
# get the O2 Atom object
dc_O2_atom = atoms[dc_O2_indexlist[dc_index]+index_adj]
# initialise arrays
closest_distance=None
# loop over all DG residues
for dg_index,dg_H21_index in enumerate(dg_H21_indexlist):
# get the H61 Atom object
dg_H21_atom = atoms[dg_H21_indexlist[dg_index]+index_adj]
# find the distance between the O2 and H21 atoms
this_dist=atoms.get_distance(dc_O2_atom.index,dg_H21_atom.index)
# store information about leading candidate
if closest_distance is None or this_dist < closest_distance:
if dc_chainlist[dc_index] != dg_chainlist[dg_index]:
closest_distance = this_dist
closest_index = dg_index
closest_atom_index = dg_H21_index
# check if in same chain
if dc_chainlist[dc_index] == dg_chainlist[closest_index]:
mout.warningOut("Residues "+dc_reslist[dc_index]+" and "+dg_reslist[closest_index]+" are in the same chain!")
# print possible bond pairs
if print_bond_candidates:
print(dc_reslist[dc_index]," bonded to ",dg_reslist[closest_index],"? dist(O2...H21) =",closest_distance)
# store the possible bond pairs
dc_O2_dg_H21_bondpairs.append([dc_O2_atom.index,closest_atom_index+index_adj])
dc_O2_dg_H21_chainpairs.append([dc_chainlist[dc_index],dg_chainlist[closest_index]])
dc_O2_dg_H21_respairs.append([dc_reslist[dc_index],dg_reslist[closest_index]])
dc_O2_dg_H21_distances.append(closest_distance)
dg_N2_dg_H21_bondpairs.append([dg_N2_indexlist[closest_index]+index_adj,closest_atom_index+index_adj]) # alternate
dc_O2_dg_H22_bondpairs.append([dc_O2_atom.index,dg_H22_indexlist[closest_index]+index_adj]) # naughty
dc_O2_dg_N2_bondpairs.append([dc_O2_atom.index,dg_N2_indexlist[closest_index]+index_adj]) # total
mout.headerOut("Identified "+mcol.result+str(len(dc_O2_dg_H21_bondpairs))+mcol.arg+" possible O2...H21 bonds")
if optimise_naughty:
mout.headerOut("Identified "+mcol.result+str(len(dc_O2_dg_H22_bondpairs))+mcol.arg+" possible O2...H22 bonds"+mcol.warning+" *")
###### Check for problems
# i.e. when there are unequal numbers of bonds between A-T's
if len(dc_H41_dg_O6_bondpairs) != len(dc_N3_dg_H1_bondpairs) or len(dc_N3_dg_H1_bondpairs) != len(dc_O2_dg_H21_bondpairs):
mout.errorOut("Number of bondpairs not equal!",fatal=True)
# i.e. identify when a DNA base wants to bond to more than one complementary base
for index,respair in enumerate(dc_H41_dg_O6_respairs):
# get the other residue pair
dc_N3_dg_H1_respair = dc_N3_dg_H1_respairs[index]
dc_O2_dg_H21_respair = dc_O2_dg_H21_respairs[index]
if respair != dc_N3_dg_H1_respair or dc_N3_dg_H1_respair != dc_O2_dg_H21_respair:
mout.warningOut("Bad bond match!")
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
###### Remove longest bonds
# when there are too many base-pairs
if len(dc_H41_dg_O6_bondpairs) > dc_gc_maxbondnum:
# number of bonds to remove
bondtrimnum=-dc_gc_maxbondnum+len(dc_H41_dg_O6_bondpairs)
# write warning
mout.warningOut("Should be no more than "+str(dc_gc_maxbondnum)+"! Will remove longest "+str(bondtrimnum)+" bonds.")
# initialise lists
combined_distances=[]
# sum distances for the three hydrogen bonds
for index,distance in enumerate(dc_H41_dg_O6_distances):
combined_distances.append(distance+dc_N3_dg_H1_distances[index]+dc_O2_dg_H21_distances[index])
# remove the longest bonds
for index in range(0,bondtrimnum):
# find longest bond
longest_distance=max(combined_distances)
longest_index=combined_distances.index(longest_distance)
# remove the longest bond
del combined_distances[longest_index]
del dc_H41_dg_O6_bondpairs[longest_index]
del dc_H41_dg_O6_chainpairs[longest_index]
del dc_H41_dg_O6_respairs[longest_index]
del dc_H41_dg_O6_distances[longest_index]
del dc_N3_dg_H1_bondpairs[longest_index]
del dc_N3_dg_H1_chainpairs[longest_index]