#!/bin/bash source $MWSHPATH/colours.sh source $MWSHPATH/out.sh SEARCH_RTP=0 REPLACE=0 FIND=0 FORCE=0 EXPERT=0 VERBOSITY=0 PROGRESS=0 SUMMARY=0 REMOVE_CHAIN=0 INVERT=0 DNA_HYDROGEN=0 RESLISTONLY=0 TOP_PATH=/opt/proprietary-apps/gromacs/2018/share/gromacs/top/ while test $# -gt 0; do case "$1" in -h|-help|--help|-usage|--usage) echo -e $colBold"Methods for "$colFunc"pdb_mod.sh"$colClear":" echo echo -e $colUnderline"Summary of a PDB$colClear:" echo -e $colFunc"pdb_mod.sh"$colClear$colArg" --summary <PDB>"$colClear echo echo -e $colUnderline"Summary of residues in a chain in PDB$colClear:" echo -e $colFunc"pdb_mod.sh"$colClear$colArg" --summary <PDB> --filter <CHAIN> [--residue-list-only]"$colClear echo echo -e $colUnderline"Find a residue entry in a given force field$colClear:" echo -e $colFunc"pdb_mod.sh"$colClear$colArg" --search-rtp <RES> --force-field <FIELD> [--missing-atom <ATOM>]"$colClear echo -e "* Add missing atom argument to highlight possible matches." echo echo -e $colUnderline"Find a residue entry in a given force field$colClear:" echo -e $colFunc"pdb_mod.sh"$colClear$colArg" --search-rtp <RES> --force-field <FIELD> [--missing-atom <ATOM>]"$colClear echo -e "* Add missing atom argument to highlight possible matches." echo echo -e $colUnderline"List all instances of an atom name in a PDB$colClear:" echo -e $colFunc"pdb_mod.sh"$colClear$colArg" --find-atom <ATOM> --input <PDB> "$colClear echo echo -e $colUnderline"Replace all entries with a given atom name with another in a PDB$colClear:" echo -e $colFunc"pdb_mod.sh"$colClear$colArg" --replace <OLD_ATOM> <NEW_ATOM> --input <PDB> [--output <PDB>] [--filter <RES/CHAIN>] "$colClear echo -e "* Specify an output file with $colArg--output$colClear." echo -e "* Only replace atoms in a given residue or chain with $colArg--filter$colClear." echo echo -e $colUnderline"Remove a chain from a PDB$colClear:" echo -e $colFunc"pdb_mod.sh"$colClear$colArg" --remove-chain <CHAIN> --input <PDB> [--output <PDB>]"$colClear echo -e "* Specify an output file with $colArg--output$colClear." echo echo -e $colBold"Additional arguments for "$colFunc"pdb_mod.sh"$colClear":" echo -e $colArg"-v|--verbose"$colClear" more verbose output" echo -e $colArg"-x|--expert"$colClear" hide warnings" echo -e $colArg"-f|--force"$colClear" ignore warnings" # echo -e $colArg"-l|--lognum"$colClear" specify the output log number" exit 5 ;; -s|--summary) SUMMARY=1 shift INPUT=$1 shift ;; -srtp|--search-rtp) shift RES=$1 SEARCH_RTP=1 shift ;; -rlo|--residue-list-only) shift RESLISTONLY=1 ;; -ff|--force-field) shift FIELD=$1 shift ;; -matm|--missing-atom) shift MATM=$1 shift ;; -r|--replace) shift MATM=$1 shift NEWATM=$1 shift REPLACE=1 ;; -rc|--remove-chain) REMOVE_CHAIN=1 shift CHAIN=$1 shift ;; -fatm|--find-atom) shift MATM=$1 shift FIND=1 ;; -i|--input) shift INPUT=$1 shift ;; -o|--output) shift OUTPUT=$1 shift ;; -filter|--filter) shift FILTER=$1 shift ;; -v) shift VERBOSITY=1 ;; -f|--force) shift FORCE=1 ;; -x|--expert) shift EXPERT=1 ;; *) warningOut "Unrecognised CLI flag: $colArg$1" break ;; esac done # LOGFILE=_pdbmod"$LOGNUM".log if [ $SEARCH_RTP -eq 1 ] ; then TOP_PATH=/opt/proprietary-apps/gromacs/2018/share/gromacs/top/$FIELD.ff/ if [[ -z $FIELD ]] ; then warningOut "Using CHARMM36-Custom." TOP_PATH=$HOME/gmx_ff/charm36-custom.ff fi # Search the residue topologies file of the # force field for the given residue FOUND=0 for FILE in "$TOP_PATH*.rtp"; do # echo $FILE FOUND=$(grep -n "\[ $RES \]" -m1 $FILE | wc -l) if [ $FOUND -eq 1 ] ; then break ; fi done if [ $FOUND -eq 0 ] ; then errorOut "No $RES found in $FIELD!" 1 fi # grep -n "\[ $RES \]" -m1 $FILE LINE_NUM=$(grep -n "\[ $RES \]" $FILE | awk '{print $1}') LINE_NUM=${LINE_NUM::-2} # { grep -n -m1 "\[ $RES \]"; grep -n -m1 "\[ "; } << $TOP_PATH/*.rtp RTP_LEN=$(cat $FILE | wc -l) let "TAIL = $LINE_NUM - $RTP_LEN" # tail $TOP_PATH/*.rtp -n $TAIL | grep -n -m1 '^$' $TOP_PATH/*.rtp # tail $TOP_PATH/*.rtp -n $TAIL | grep -n -m5 "\[" $TOP_PATH/*.rtp tail $FILE -n $TAIL > __temp__ # echo $RES > __temp__2 headerOut "$colArg$RES$colClear$colBold entry in $colFunc$FIELD" headerOut "$colFile$FILE$colClear" headerOut "line $LINE_NUM and onwards:" while read -r LINE; do # if [[ $LINE != "[ atoms ]" ]] && [[ $LINE == "[ *" ]] ; then # exit # fi if [[ $LINE == "" ]] ; then exit ; fi # echo "$LINE" >> __temp__2 if [[ ! -z $MATM ]] ; then if [ $(echo "$LINE" | grep "${MATM:0:1}" | wc -l) -eq 1 ] ; then echo "$LINE" | grep -E --color "${MATM:0:1}|${MATM:1:1}|${MATM:2:1}" else echo "$LINE" fi else echo "$LINE" fi done < __temp__ rm __temp__* # if [[ -z $MATM ]] ; then fi if [ $FIND -eq 1 ] ; then # List all instances of an atom name in a pdb if [[ -z $INPUT ]] ; then errorOut "No input file specified." exit fi echo -e "$colBold$colArg$MATM$colClear$colBold appears in "$colClear"$colFile$colBold$INPUT$colClear$colBold in$colClear:" echo -e "$colUnderline$colVarName""residue$colClear $colUnderline$colVarName""chain$colClear $colResult$colUnderline""instances"$colClear grep "ATOM" $INPUT | grep " $MATM " | awk '{print $4" "$5}' > __temp__ # nano __temp__ touch __temp__2 while read -r RESCHAIN; do if [ $(grep "$RESCHAIN" __temp__2 | wc -l) -ne 0 ] ; then continue ; fi echo $RESCHAIN >> __temp__2 done < __temp__ # nano __temp__2 while read -r RESCHAIN; do echo -e $colVarName$RESCHAIN $colResult$(grep "$RESCHAIN" __temp__ | wc -l)"x" $colClear done < __temp__2 rm __temp__* fi if [ $REPLACE -eq 1 ] ; then # Replace all entries with a given atom name with another if [[ -z $OUTPUT ]] ; then if [ $EXPERT -eq 0 ] ; then warningOut "No output file specified. Defaulted to $colFile""mod.pdb" fi OUTPUT="mod.pdb" fi if [[ -z $INPUT ]] ; then errorOut "No input file specified." exit fi if [[ $INPUT == $OUTPUT ]] ; then if [ $FORCE -eq 1 ] ; then if [ $EXPERT -eq 0 ] ; then warningOut "Overwriting $colFile"$INPUT"$colWarning!" fi else errorOut "Overwriting is not permitted. Select different input and output files. (ignore with -f)" exit fi fi if [[ -z $INPUT ]] ; then errorOut "No input file specified." exit fi if [[ "$OUTPUT" == "__temp__" ]] ; then errorOut "Choose a different output name." exit fi if [[ -z $FILTER ]] ; then if [ $EXPERT -eq 0 ] ; then warningOut "All instances of $colArg$MATM$colClear$colWarning being replaced." fi fi if [ $VERBOSITY -gt 0 ] ; then if [[ -z $FILTER ]] ; then echo -ne $colFunc"pdb_mod.sh"$colClear": Renaming $colArg$MATM$colClear to $colArg$NEWATM$colClear... " else echo -ne $colFunc"pdb_mod.sh"$colClear": Renaming $colArg$MATM$colClear to $colArg$NEWATM$colClear in RESCHAINs containing $colArg$FILTER$colClear... " fi fi rm __temp__ 2> /dev/null if [[ ! $INPUT == $OUTPUT ]] ; then rm $OUTPUT 2> /dev/null fi COUNT=0 # IN_LINES=$(cat $INPUT | wc -l) while read -r LINE; do # if [ $PROGRESS -eq 1 ] ; then echo ; fi if [[ $LINE == ATOM*$MATM* ]] ; then OLSTRING=" " OLSTRING=" $MATM""${OLSTRING:${#MATM}}" NEWSTRING=" " NEWSTRING=" $NEWATM""${NEWSTRING:${#NEWATM}}" # echo "$LINE" if [[ -z $FILTER ]] ; then # echo "$LINE" | sed "s/ $MATM / $NEWATM /" >> __temp__ # echo "$LINE" | sed "s/ $MATM / $NEWATM /" >> $LOGFILE echo "$LINE" | sed "s/$OLSTRING/$NEWSTRING/" >> __temp__ # echo "$LINE" | sed "s/$OLSTRING/$NEWSTRING/" >> $LOGFILE let "COUNT = COUNT + 1" else RESCHAIN=$(echo "$LINE" | awk '{print $4" "$5}') if [[ $RESCHAIN == *$FILTER* ]] ; then echo "$LINE" | sed "s/$OLSTRING/$NEWSTRING/" >> __temp__ # echo "$LINE" | sed "s/$OLSTRING/$NEWSTRING/" >> $LOGFILE let "COUNT = COUNT + 1" else echo "$LINE" >> __temp__ fi fi else echo "$LINE" >> __temp__ fi done < $INPUT # if [ $PROGRESS -eq 1 ] ; then echo ; fi mv __temp__ $OUTPUT if [ $COUNT -eq 0 ] ; then if [ $EXPERT -eq 0 ] ; then echo "" warningOut "No replacements made" fi else if [[ -z $FILTER ]] ; then successOut "$COUNT ($MATM->$NEWATM) replacements made" else successOut "$COUNT ($MATM->$NEWATM) replacements made in $FILTER" fi fi rm __temp__* 2> /dev/null exit $COUNT fi if [ $SUMMARY -eq 1 ] ; then # List chains in the PDB with the number of atoms within if [[ -z $INPUT ]] ; then errorOut "No input file specified." exit fi CHAINS="" RESIDUES="" NUM_ATOMS="" NUM_CHAINS=0 LAST_CHAIN="" LAST_RESIDUE="" while read -r LINE; do if [[ $LINE == "ATOM "* ]] || [[ $LINE == "HETATM"* ]] ; then THIS_CHAIN=$(echo ${LINE:21:1}) if [[ ! -z $FILTER ]] ; then if [[ $THIS_CHAIN == *$FILTER* ]] ; then THIS_RESNAMENUM=$(echo ${LINE:17:3})$(echo ${LINE:23:3}) if [[ $THIS_RESNAMENUM != $LAST_RESNAMENUM ]] ; then RESLIST=$RESLIST" "$(echo ${LINE:17:3})$(echo ${LINE:23:3}) fi LAST_RESNAMENUM=$THIS_RESNAMENUM fi fi if [[ $THIS_CHAIN != $LAST_CHAIN ]] ; then # New chain! LASTFIRST_RESNUM=$FIRST_RESNUM FIRST_RESNUM=$(echo ${LINE:23:3}) # Store chain name CHAINS="$CHAINS $THIS_CHAIN" # Increment chain count let "NUM_CHAINS = NUM_CHAINS + 1" # if [[ $NUM_ATOMS != "" ]] ; then if [ $NUM_CHAINS -gt 1 ] ; then # Not first chain! # Store number of atoms in chain NUM_ATOMS="$NUM_ATOMS $THIS_NUM" let "LAST_RESNUM = LAST_RESNUM - LASTFIRST_RESNUM + 1" RESIDUES="$RESIDUES $LAST_RESNUM" fi THIS_NUM=0 THIS_RES_NUM=0 fi let "THIS_NUM = THIS_NUM + 1" LAST_CHAIN=$THIS_CHAIN LAST_RESNUM=$(echo ${LINE:23:3}) fi done < $INPUT NUM_ATOMS="$NUM_ATOMS $THIS_NUM" let "LAST_RESNUM = LAST_RESNUM - FIRST_RESNUM + 1" RESIDUES="$RESIDUES $LAST_RESNUM" if [ $RESLISTONLY -eq 0 ] ; then echo -e $colBold$colFile$INPUT$colClear$colBold" contains $colResult$NUM_CHAINS$colClear $colBold""chains$colClear:" fi COUNT=1 for CHAIN in $CHAINS; do if [ $RESLISTONLY -eq 0 ] ; then echo -ne $colVarType"Chain "$colVarName$CHAIN$colClear" with "$colResult$(echo $NUM_ATOMS | cut -d " " -f $COUNT)$colClear"$colVarType atoms$colClear in " echo -ne $colResult$(echo $RESIDUES | cut -d " " -f $COUNT)$colClear"$colVarType residues"$colClear echo "" fi let "COUNT = COUNT + 1" done if [[ ! -z $FILTER ]] ; then echo echo -e $colBold$colVarType"Chain "$colArg$FILTER$colClear$colBold"'s "$colVarType"residues"$colClear":" echo -e $colResult$RESLIST$colClear fi fi if [ $REMOVE_CHAIN -eq 1 ] ; then # Look for entries starting with "ATOM " or "HETATM" # and remove if character 22 matches given chain REMOVED_ATOM_NUM=0 REMOVED_TERM_NUM=0 REMOVED_CONECT_NUM=0 CONECT_REMOVE="" if [[ -z $INPUT ]] ; then errorOut "No input file specified." exit fi if [[ -z $OUTPUT ]] ; then if [ $EXPERT -eq 0 ] ; then warningOut "No output file specified. Defaulted to $colFile""mod.pdb" fi OUTPUT="mod.pdb" fi if [[ $INPUT == $OUTPUT ]] ; then if [ $FORCE -eq 1 ] ; then if [ $EXPERT -eq 0 ] ; then warningOut "Overwriting $colFile"$INPUT"$colWarning!" fi else errorOut "Overwriting is not permitted. Select different input and output files." exit fi fi rm __temp__2 2> /dev/null if [[ ! $INPUT == $OUTPUT ]] ; then rm $OUTPUT 2> /dev/null fi grep "CONECT" $INPUT > __temp__ while read -r LINE; do if [[ $LINE == "ATOM "* ]] || [[ $LINE == "HETATM"* ]] ; then THIS_CHAIN=$(echo ${LINE:21:1}) if [ $VERBOSITY -gt 0 ] ; then echo -ne "\rProcessing atoms in chain $colArg$THIS_CHAIN$colClear... $colResult$REMOVED_ATOM_NUM$colClear removed so far" fi if [[ $THIS_CHAIN == $CHAIN ]] ; then # echo -e $colBold$colError"$LINE"$colClear let 'REMOVED_ATOM_NUM = REMOVED_ATOM_NUM + 1' THIS_ATOM=$(echo "$LINE" | awk '{print $2}') # remove connections! if [ $(grep -w $THIS_ATOM __temp__ | wc -l) -gt 0 ] ; then CONECT_REMOVE="$CONECT_REMOVE $THIS_ATOM" fi else # echo -e $colBold"$LINE"$colClear echo "$LINE" >> __temp__2 fi elif [[ $LINE == "TER "* ]] ; then THIS_CHAIN=$(echo ${LINE:21:1}) if [[ $THIS_CHAIN == $CHAIN ]] ; then # echo -e $colBold$colError"$LINE"$colClear let 'REMOVED_TERM_NUM = REMOVED_TERM_NUM + 1' else # echo -e $colBold"$LINE"$colClear echo "$LINE" >> __temp__2 fi else # echo "$LINE" echo "$LINE" >> __temp__2 fi done < $INPUT if [ $VERBOSITY -gt 0 ] ; then echo "" fi # echo $CONECT_REMOVE # CONECT_REMOVE="5737 5739" for ATOM in $CONECT_REMOVE; do if [ $VERBOSITY -gt 0 ] ; then echo -ne "\rRemoving atom $colArg$ATOM$colClear's connections..." fi # ATOM=5738 # echo $ATOM rm __temp__3 2> /dev/null while read -r LINE; do if [[ $LINE == "CONECT"*$ATOM* ]] ; then # echo -e $colError"$LINE"$colClear let 'REMOVED_CONECT_NUM = REMOVED_CONECT_NUM + 1' # echo $ATOM "$LINE" >> _remv_ else echo "$LINE" >> __temp__3 fi done < __temp__2 cp __temp__3 __temp__2 done if [ $VERBOSITY -gt 0 ] ; then echo "" fi mv __temp__2 $OUTPUT successOut "Removed $colResult$REMOVED_ATOM_NUM$colSuccess atoms in chain $colArg$CHAIN" successOut "Removed $colResult$REMOVED_TERM_NUM$colSuccess termini in chain $colArg$CHAIN" successOut "Removed $colResult$REMOVED_CONECT_NUM$colSuccess connections in chain $colArg$CHAIN" rm __temp__* fi