Newer
Older
#!/bin/bash
source $MWSHPATH/colours.sh
source $MWSHPATH/out.sh
SEARCH_RTP=0
REPLACE=0
FIND=0
FORCE=0
EXPERT=0
VERBOSITY=0
PROGRESS=0
SUMMARY=0
REMOVE_CHAIN=0
TOP_PATH=/opt/proprietary-apps/gromacs/2018/share/gromacs/top/
while test $# -gt 0; do
case "$1" in
echo -e $colBold"Methods for "$colFunc"pdb_mod.sh"$colClear":"
echo
echo -e $colUnderline"Summary of a PDB$colClear:"
echo -e $colFunc"pdb_mod.sh"$colClear$colArg" --summary <PDB>"$colClear
echo
echo -e $colFunc"pdb_mod.sh"$colClear$colArg" --summary <PDB> --filter <CHAIN> [--residue-list-only]"$colClear
echo -e $colUnderline"Find a residue entry in a given force field$colClear:"
echo -e $colFunc"pdb_mod.sh"$colClear$colArg" --search-rtp <RES> --force-field <FIELD> [--missing-atom <ATOM>]"$colClear
echo -e "* Add missing atom argument to highlight possible matches."
echo
echo -e $colUnderline"Find bonded interactions in a given force field$colClear:"
echo -e $colFunc"pdb_mod.sh"$colClear$colArg" --search-rtp '<AT1> <AT2> ...' --force-field <FIELD> [--missing-atom <ATOM>]"$colClear
echo -e "* Add missing atom argument to highlight possible matches."
echo
echo -e $colUnderline"List all instances of an atom name in a PDB$colClear:"
echo -e $colFunc"pdb_mod.sh"$colClear$colArg" --find-atom <ATOM> --input <PDB> "$colClear
echo
echo -e $colUnderline"Replace all entries with a given atom name with another in a PDB$colClear:"
echo -e $colFunc"pdb_mod.sh"$colClear$colArg" --replace <OLD_ATOM> <NEW_ATOM> --input <PDB> [--output <PDB>] [--filter <RES/CHAIN>] "$colClear
echo -e "* Specify an output file with $colArg--output$colClear."
echo -e "* Only replace atoms in a given residue or chain with $colArg--filter$colClear."
echo
echo -e $colUnderline"Remove a chain from a PDB$colClear:"
echo -e $colFunc"pdb_mod.sh"$colClear$colArg" --remove-chain <CHAIN> --input <PDB> [--output <PDB>]"$colClear
echo -e "* Specify an output file with $colArg--output$colClear."
echo
echo -e $colBold"Additional arguments for "$colFunc"pdb_mod.sh"$colClear":"
echo -e $colArg"-v|--verbose"$colClear" more verbose output"
echo -e $colArg"-x|--expert"$colClear" hide warnings"
echo -e $colArg"-f|--force"$colClear" ignore warnings"
# echo -e $colArg"-l|--lognum"$colClear" specify the output log number"
-s|--summary)
SUMMARY=1
;;
-srtp|--search-rtp)
shift
RES=$1
SEARCH_RTP=1
shift
;;
-sffb|--search-ff-bonded)
shift
ATOMTYPES_STRING=$1
SEARCH_FFB=1
shift
;;
-ff|--force-field)
shift
FIELD=$1
shift
;;
-matm|--missing-atom)
shift
MATM=$1
shift
;;
-r|--replace)
shift
MATM=$1
shift
NEWATM=$1
shift
REPLACE=1
;;
-rc|--remove-chain)
REMOVE_CHAIN=1
shift
CHAIN=$1
shift
;;
-fatm|--find-atom)
shift
MATM=$1
shift
FIND=1
;;
-i|--input)
shift
INPUT=$1
shift
;;
-o|--output)
shift
OUTPUT=$1
shift
;;
-filter|--filter)
shift
FILTER=$1
shift
;;
-v)
shift
VERBOSITY=1
;;
-f|--force)
shift
FORCE=1
;;
-x|--expert)
shift
EXPERT=1
;;
warningOut "Unrecognised CLI flag: $colArg$1"
break
;;
esac
done
# LOGFILE=_pdbmod"$LOGNUM".log
if [ $SEARCH_RTP -eq 1 ] ; then
TOP_PATH=/opt/proprietary-apps/gromacs/2018/share/gromacs/top/$FIELD.ff/
if [[ -z $FIELD ]] ; then
warningOut "Using CHARMM36-Custom."
TOP_PATH=$HOME/gmx_ff/charm36-custom.ff/
# Search the residue topologies file of the
# force field for the given residue
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
FOUND=0
for FILE in "$TOP_PATH*.rtp"; do
# echo $FILE
FOUND=$(grep -n "\[ $RES \]" -m1 $FILE | wc -l)
if [ $FOUND -eq 1 ] ; then break ; fi
done
if [ $FOUND -eq 0 ] ; then
errorOut "No $RES found in $FIELD!" 1
fi
# grep -n "\[ $RES \]" -m1 $FILE
LINE_NUM=$(grep -n "\[ $RES \]" $FILE | awk '{print $1}')
LINE_NUM=${LINE_NUM::-2}
# { grep -n -m1 "\[ $RES \]"; grep -n -m1 "\[ "; } << $TOP_PATH/*.rtp
RTP_LEN=$(cat $FILE | wc -l)
let "TAIL = $LINE_NUM - $RTP_LEN"
# tail $TOP_PATH/*.rtp -n $TAIL | grep -n -m1 '^$' $TOP_PATH/*.rtp
# tail $TOP_PATH/*.rtp -n $TAIL | grep -n -m5 "\[" $TOP_PATH/*.rtp
tail $FILE -n $TAIL > __temp__
# echo $RES > __temp__2
headerOut "$colArg$RES$colClear$colBold entry in $colFunc$FIELD"
headerOut "$colFile$FILE$colClear"
headerOut "line $LINE_NUM and onwards:"
while read -r LINE; do
# if [[ $LINE != "[ atoms ]" ]] && [[ $LINE == "[ *" ]] ; then
# exit
# fi
if [[ $LINE == "" ]] ; then exit ; fi
# echo "$LINE" >> __temp__2
if [[ ! -z $MATM ]] ; then
if [ $(echo "$LINE" | grep "${MATM:0:1}" | wc -l) -eq 1 ] ; then
echo "$LINE" | grep -E --color "${MATM:0:1}|${MATM:1:1}|${MATM:2:1}"
else
echo "$LINE"
fi
else
echo "$LINE"
fi
done < __temp__
rm __temp__*
fi
if [ $SEARCH_FFB -eq 1 ] ; then
TOP_PATH=/opt/proprietary-apps/gromacs/2018/share/gromacs/top/$FIELD.ff/
if [[ -z $FIELD ]] ; then
warningOut "Using CHARMM36-Custom."
TOP_PATH=$HOME/gmx_ff/charm36-custom.ff/
fi
NUM_ATOMTYPES=$(echo $ATOMTYPES_STRING | wc -w)
if [ $NUM_ATOMTYPES -eq 2 ] ;
echo "Searching for "
# grep "" $TOP_PATH/ffbonded.itp
fi
if [ $FIND -eq 1 ] ; then
# List all instances of an atom name in a pdb
if [[ -z $INPUT ]] ; then
errorOut "No input file specified."
exit
fi
echo -e "$colBold$colArg$MATM$colClear$colBold appears in "$colClear"$colFile$colBold$INPUT$colClear$colBold in$colClear:"
echo -e "$colUnderline$colVarName""residue$colClear $colUnderline$colVarName""chain$colClear $colResult$colUnderline""instances"$colClear
# nano __temp__
touch __temp__2
while read -r RESCHAIN; do
if [ $(grep "$RESCHAIN" __temp__2 | wc -l) -ne 0 ] ; then continue ; fi
echo $RESCHAIN >> __temp__2
done < __temp__
# nano __temp__2
while read -r RESCHAIN; do
echo -e $colVarName$RESCHAIN $colResult$(grep "$RESCHAIN" __temp__ | wc -l)"x" $colClear
done < __temp__2
rm __temp__*
fi
if [ $REPLACE -eq 1 ] ; then
# Replace all entries with a given atom name with another
if [[ -z $OUTPUT ]] ; then
if [ $EXPERT -eq 0 ] ; then
warningOut "No output file specified. Defaulted to $colFile""mod.pdb"
fi
OUTPUT="mod.pdb"
fi
if [[ -z $INPUT ]] ; then
errorOut "No input file specified."
exit
fi
if [[ $INPUT == $OUTPUT ]] ; then
if [ $FORCE -eq 1 ] ; then
if [ $EXPERT -eq 0 ] ; then
warningOut "Overwriting $colFile"$INPUT"$colWarning!"
fi
else
errorOut "Overwriting is not permitted. Select different input and output files. (ignore with -f)"
fi
if [[ -z $INPUT ]] ; then
errorOut "No input file specified."
exit
fi
if [[ "$OUTPUT" == "__temp__" ]] ; then
errorOut "Choose a different output name."
exit
fi
if [[ -z $FILTER ]] ; then
if [ $EXPERT -eq 0 ] ; then
warningOut "All instances of $colArg$MATM$colClear$colWarning being replaced."
fi
fi
if [ $VERBOSITY -gt 0 ] ; then
if [[ -z $FILTER ]] ; then
echo -ne $colFunc"pdb_mod.sh"$colClear": Renaming $colArg$MATM$colClear to $colArg$NEWATM$colClear... "
else
echo -ne $colFunc"pdb_mod.sh"$colClear": Renaming $colArg$MATM$colClear to $colArg$NEWATM$colClear in RESCHAINs containing $colArg$FILTER$colClear... "
fi
rm __temp__ 2> /dev/null
if [[ ! $INPUT == $OUTPUT ]] ; then
rm $OUTPUT 2> /dev/null
fi
COUNT=0
# IN_LINES=$(cat $INPUT | wc -l)
while read -r LINE; do
# if [ $PROGRESS -eq 1 ] ; then echo ; fi
if [[ $LINE == ATOM*$MATM* ]] ; then
OLSTRING=" "
OLSTRING=" $MATM""${OLSTRING:${#MATM}}"
NEWSTRING=" "
NEWSTRING=" $NEWATM""${NEWSTRING:${#NEWATM}}"
# echo "$LINE"
if [[ -z $FILTER ]] ; then
# echo "$LINE" | sed "s/ $MATM / $NEWATM /" >> __temp__
# echo "$LINE" | sed "s/ $MATM / $NEWATM /" >> $LOGFILE
echo "$LINE" | sed "s/$OLSTRING/$NEWSTRING/" >> __temp__
let "COUNT = COUNT + 1"
else
RESCHAIN=$(echo "$LINE" | awk '{print $4" "$5}')
if [[ $RESCHAIN == *$FILTER* ]] ; then
echo "$LINE" | sed "s/$OLSTRING/$NEWSTRING/" >> __temp__
let "COUNT = COUNT + 1"
else
fi
fi
else
fi
done < $INPUT
# if [ $PROGRESS -eq 1 ] ; then echo ; fi
mv __temp__ $OUTPUT
if [ $COUNT -eq 0 ] ; then
if [ $EXPERT -eq 0 ] ; then
echo ""
warningOut "No replacements made"
fi
else
if [[ -z $FILTER ]] ; then
successOut "$COUNT ($MATM->$NEWATM) replacements made"
else
successOut "$COUNT ($MATM->$NEWATM) replacements made in $FILTER"
fi
if [ $SUMMARY -eq 1 ] ; then
# List chains in the PDB with the number of atoms within
if [[ -z $INPUT ]] ; then
errorOut "No input file specified."
exit
fi
CHAINS=""
RESIDUES=""
NUM_ATOMS=""
NUM_CHAINS=0
LAST_CHAIN=""
LAST_RESIDUE=""
while read -r LINE; do
if [[ $LINE == "ATOM "* ]] || [[ $LINE == "HETATM"* ]] ; then
THIS_CHAIN=$(echo ${LINE:21:1})
if [[ ! -z $FILTER ]] ; then
if [[ $THIS_CHAIN == *$FILTER* ]] ; then
THIS_RESNAMENUM=$(echo ${LINE:17:3})$(echo ${LINE:23:3})
if [[ $THIS_RESNAMENUM != $LAST_RESNAMENUM ]] ; then
RESLIST=$RESLIST" "$(echo ${LINE:17:3})$(echo ${LINE:23:3})
fi
LAST_RESNAMENUM=$THIS_RESNAMENUM
fi
fi
if [[ $THIS_CHAIN != $LAST_CHAIN ]] ; then
# New chain!
LASTFIRST_RESNUM=$FIRST_RESNUM
FIRST_RESNUM=$(echo ${LINE:23:3})
# Store chain name
CHAINS="$CHAINS $THIS_CHAIN"
# Increment chain count
let "NUM_CHAINS = NUM_CHAINS + 1"
# if [[ $NUM_ATOMS != "" ]] ; then
if [ $NUM_CHAINS -gt 1 ] ; then
# Not first chain!
# Store number of atoms in chain
NUM_ATOMS="$NUM_ATOMS $THIS_NUM"
let "LAST_RESNUM = LAST_RESNUM - LASTFIRST_RESNUM + 1"
RESIDUES="$RESIDUES $LAST_RESNUM"
fi
THIS_NUM=0
THIS_RES_NUM=0
fi
let "THIS_NUM = THIS_NUM + 1"
LAST_CHAIN=$THIS_CHAIN
LAST_RESNUM=$(echo ${LINE:23:3})
fi
done < $INPUT
NUM_ATOMS="$NUM_ATOMS $THIS_NUM"
let "LAST_RESNUM = LAST_RESNUM - FIRST_RESNUM + 1"
RESIDUES="$RESIDUES $LAST_RESNUM"
if [ $RESLISTONLY -eq 0 ] ; then
echo -e $colBold$colFile$INPUT$colClear$colBold" contains $colResult$NUM_CHAINS$colClear $colBold""chains$colClear:"
fi
COUNT=1
for CHAIN in $CHAINS; do
if [ $RESLISTONLY -eq 0 ] ; then
echo -ne $colVarType"Chain "$colVarName$CHAIN$colClear" with "$colResult$(echo $NUM_ATOMS | cut -d " " -f $COUNT)$colClear"$colVarType atoms$colClear in "
echo -ne $colResult$(echo $RESIDUES | cut -d " " -f $COUNT)$colClear"$colVarType residues"$colClear
echo ""
fi
let "COUNT = COUNT + 1"
done
if [[ ! -z $FILTER ]] ; then
echo
echo -e $colBold$colVarType"Chain "$colArg$FILTER$colClear$colBold"'s "$colVarType"residues"$colClear":"
echo -e $colResult$RESLIST$colClear
fi
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
fi
if [ $REMOVE_CHAIN -eq 1 ] ; then
# Look for entries starting with "ATOM " or "HETATM"
# and remove if character 22 matches given chain
REMOVED_ATOM_NUM=0
REMOVED_TERM_NUM=0
REMOVED_CONECT_NUM=0
CONECT_REMOVE=""
if [[ -z $INPUT ]] ; then
errorOut "No input file specified."
exit
fi
if [[ -z $OUTPUT ]] ; then
if [ $EXPERT -eq 0 ] ; then
warningOut "No output file specified. Defaulted to $colFile""mod.pdb"
fi
OUTPUT="mod.pdb"
fi
if [[ $INPUT == $OUTPUT ]] ; then
if [ $FORCE -eq 1 ] ; then
if [ $EXPERT -eq 0 ] ; then
warningOut "Overwriting $colFile"$INPUT"$colWarning!"
fi
else
errorOut "Overwriting is not permitted. Select different input and output files."
exit
fi
fi
rm __temp__2 2> /dev/null
if [[ ! $INPUT == $OUTPUT ]] ; then
rm $OUTPUT 2> /dev/null
fi
grep "CONECT" $INPUT > __temp__
while read -r LINE; do
if [[ $LINE == "ATOM "* ]] || [[ $LINE == "HETATM"* ]] ; then
THIS_CHAIN=$(echo ${LINE:21:1})
if [ $VERBOSITY -gt 0 ] ; then
echo -ne "\rProcessing atoms in chain $colArg$THIS_CHAIN$colClear... $colResult$REMOVED_ATOM_NUM$colClear removed so far"
fi
if [[ $THIS_CHAIN == $CHAIN ]] ; then
# echo -e $colBold$colError"$LINE"$colClear
let 'REMOVED_ATOM_NUM = REMOVED_ATOM_NUM + 1'
THIS_ATOM=$(echo "$LINE" | awk '{print $2}')
# remove connections!
if [ $(grep -w $THIS_ATOM __temp__ | wc -l) -gt 0 ] ; then
CONECT_REMOVE="$CONECT_REMOVE $THIS_ATOM"
fi
else
# echo -e $colBold"$LINE"$colClear
echo "$LINE" >> __temp__2
fi
elif [[ $LINE == "TER "* ]] ; then
THIS_CHAIN=$(echo ${LINE:21:1})
if [[ $THIS_CHAIN == $CHAIN ]] ; then
# echo -e $colBold$colError"$LINE"$colClear
let 'REMOVED_TERM_NUM = REMOVED_TERM_NUM + 1'
else
# echo -e $colBold"$LINE"$colClear
echo "$LINE" >> __temp__2
fi
else
# echo "$LINE"
echo "$LINE" >> __temp__2
fi
done < $INPUT
if [ $VERBOSITY -gt 0 ] ; then
echo ""
fi
# echo $CONECT_REMOVE
# CONECT_REMOVE="5737 5739"
for ATOM in $CONECT_REMOVE; do
if [ $VERBOSITY -gt 0 ] ; then
echo -ne "\rRemoving atom $colArg$ATOM$colClear's connections..."
fi
# ATOM=5738
# echo $ATOM
rm __temp__3 2> /dev/null
while read -r LINE; do
if [[ $LINE == "CONECT"*$ATOM* ]] ; then
# echo -e $colError"$LINE"$colClear
let 'REMOVED_CONECT_NUM = REMOVED_CONECT_NUM + 1'
# echo $ATOM "$LINE" >> _remv_
else
echo "$LINE" >> __temp__3
fi
done < __temp__2
cp __temp__3 __temp__2
done
if [ $VERBOSITY -gt 0 ] ; then
echo ""
fi
mv __temp__2 $OUTPUT
successOut "Removed $colResult$REMOVED_ATOM_NUM$colSuccess atoms in chain $colArg$CHAIN"
successOut "Removed $colResult$REMOVED_TERM_NUM$colSuccess termini in chain $colArg$CHAIN"
successOut "Removed $colResult$REMOVED_CONECT_NUM$colSuccess connections in chain $colArg$CHAIN"