def showPDB_interactions(pdbPath, complexName, l_r_pairsres , showPairs=True): rnameFile= os.path.join(pdbPath,complexName+"_r_u.pdb") rname= complexName+"_r_u" lnameFile= os.path.join(pdbPath,complexName+"_l_u.pdb") lname= complexName+"_l_u" ### Load Structures pymol.finish_launching() pymol.cmd.load(lnameFile,lname) pymol.cmd.show_as("cartoon",lname) pymol.cmd.load(rnameFile,rname) pymol.cmd.show_as("cartoon",rname) pymol.cmd.select("ml"," model "+ lname) pymol.cmd.color("grey","ml") pymol.cmd.select("mr"," model "+ rname) pymol.cmd.color("palegreen","mr") if showPairs: for i, ((chainL, resL, resNameL), (chainR, resR, resNameR)) in enumerate(l_r_pairsres): resNameL= one_to_three(resNameL) resNameR= one_to_three(resNameR) pymol.cmd.distance( "dist%d"%i, " model %s and chain %s and resi %s and resn %s and name CA"%(lname, chainL, resL, resNameL), " model %s and chain %s and resi %s and resn %s and name CA"%(rname, chainR, resR, resNameR), ) print( (chainL, resL, resNameL), (chainR, resR, resNameR)) else: res_pred_l, res_pred_r = zip(* l_r_pairsres) res_pred_l= [ elem[:2] for elem in res_pred_l] res_pred_r= [ elem[:2] for elem in res_pred_r] showPDB_patches_all(pdbPath, complexName, res_pred_l, res_pred_r, res_true_l={}, res_true_r={})
def get_mutation_dict(mutation): if mutation.strip()[1].isdigit(): pattern = re.compile(r"(?P<wt>[a-zA-Z]{1})(?P<resnum>\d+)(?P<mt>[a-zA-Z]{1})") mut_dict = pattern.match(mutation.strip()).groupdict() mut_dict['wt'] = one_to_three(mut_dict['wt'].upper()) mut_dict['mt'] = one_to_three(mut_dict['mt'].upper()) else: pattern = re.compile(r"(?P<wt>[a-zA-Z]{3})(?P<resnum>\d+)(?P<mt>[a-zA-Z]{3})") mut_dict = pattern.match(mutation.strip()).groupdict() return mut_dict
def getStructFromFasta(self, fname, chainType): ''' Creates a Bio.PDB.Structure object from a fasta file contained in fname. Atoms are not filled and thus no coordiantes availables. Implements from Structure to Residue hierarchy. :param fname: str. path to fasta file @chainType: str. "l" or "r" ''' seq = self.parseFasta( fname, inputNumber="1" if chainType == "l" else "2") #inpuNumber is used to report which partner fails if error prefix = self.splitExtendedPrefix(self.getExtendedPrefix(fname))[0] chainId = chainType.upper() residues = [] struct = Structure(prefix) model = Model(0) struct.add(model) chain = Chain(chainId) model.add(chain) for i, aa in enumerate(seq): try: resname = one_to_three(aa) except KeyError: resname = "UNK" res = Residue((' ', i, ' '), resname, prefix) chain.add(res) return struct
def BSParser(infile): bslist = [] for line in open(infile): content = line.strip().split('\t') pdbid = content[BSLineOrder.index("PDBID")].lower() chainid = content[BSLineOrder.index("ChainID")] bscode = content[BSLineOrder.index("BSID")] ligname = content[BSLineOrder.index("LigName")] ligchain= content[BSLineOrder.index("LigChain")] bsres = content[BSLineOrder.index("BSRes")] newbs = BindingSite(pdbid, chainid, bscode, ligchain, ligname) for eachres in bsres.split(): try: resname = one_to_three(eachres[0]) except: print "wrong bindingsite res: " + eachres continue try: resnum = int(eachres[1:]) except: continue #raise Exception("convert %s to number" % eachres[1:]) residue = Residue(resnum, resname, chainid) try: newbs.appendResidue(residue) except Exception as e: print e continue bslist.append(newbs) if not PDBFileExist(pdbid): print "Cannot find file " + pdbid return bslist
def _standard_resname(self, res): resname3 = res.upper() if resname3 not in list(aa3) and resname3 not in list(aa1): # TODO: mutation to selenocysteine (U;SEC) is not working raise ValueError("Unrecognised residue {}".format(res)) if len(resname3) == 1: resname3 = one_to_three(resname3) return resname3
def accessibility_class(residue, accessibility): #get solvent accessibility class #use relative accessibility. #acc>=0.95 (2), 0.95>acc>=0.05 (1), 0.05>acc>0 (0) Type='Miller' #Miller or Wilke type available resmax=residue_max_acc[Type] try: rel_acc=float(accessibility)/float(resmax[one_to_three(residue)]) except: return ("NA","NA") # print(rel_acc) if rel_acc>=0.95: return (rel_acc,2) elif rel_acc>=0.05: return (rel_acc,1) else: return (rel_acc,0)
def pdb_atom(self, ind, a, aa, chain, pos, xyz): """ PDB file ATOM template Input: ind : int, atom index a : str, atom ('N', 'CA', 'C' or 'CB') aa : char, one letter aminoacid name chain: char, chain id character pos : aminoacid position xyz : list of coordinates Output: atom: pdb like ATOM list """ atom = 'ATOM {:>6} {:3} {:3} {:1} {:>4} '.format( ind + 1, a, one_to_three(aa), chain, pos + 1) if 'C' in a: last_char = 'C' else: last_char = 'N' atom += '{:7.3f} {:7.3f} {:7.3f} {:6.3f} {:6.3f} {}'.format( xyz[0], xyz[1], xyz[2], 1.0, 1.0, last_char) return atom
def side_chain_term(oa, k=1*kilocalorie_per_mole, gmmFileFolder="/Users/weilu/opt/parameters/side_chain", forceGroup=25): # add chi forces # The sign of the equilibrium value is opposite and magnitude differs slightly # 251.04 = 60 * 4.184 kJ, converted from default value in LAMMPS AWSEM # multiply interaction strength by overall scaling k = k.value_in_unit(kilojoule_per_mole) k_side_chain = k * oa.k_awsem n_components = 3 means_all_res = np.zeros((20, 3, 3)) precisions_chol_all_res = np.zeros((20, 3, 3, 3)) log_det_all_res = np.zeros((20, 3)) weights_all_res = np.zeros((20, 3)) mean_dot_precisions_chol_all_res = np.zeros((20, 3, 3)) res_type_map_letters = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'] gamma_se_map_1_letter = { 'A': 0, 'R': 1, 'N': 2, 'D': 3, 'C': 4, 'Q': 5, 'E': 6, 'G': 7, 'H': 8, 'I': 9, 'L': 10, 'K': 11, 'M': 12, 'F': 13, 'P': 14, 'S': 15, 'T': 16, 'W': 17, 'Y': 18, 'V': 19} for i, res_type_one_letter in enumerate(res_type_map_letters): res_type = one_to_three(res_type_one_letter) if res_type == "GLY": weights_all_res[i] = np.array([1/3, 1/3, 1/3]) continue means = np.loadtxt(f"{gmmFileFolder}/{res_type}_means.txt") precisions_chol = np.loadtxt(f"{gmmFileFolder}/{res_type}_precisions_chol.txt").reshape(3,3,3) log_det = np.loadtxt(f"{gmmFileFolder}/{res_type}_log_det.txt") weights = np.loadtxt(f"{gmmFileFolder}/{res_type}_weights.txt") means_all_res[i] = means precisions_chol_all_res[i] = precisions_chol log_det_all_res[i] = log_det weights_all_res[i] = weights for j in range(n_components): mean_dot_precisions_chol_all_res[i][j] = np.dot(means[j], precisions_chol[j]) means_all_res = means_all_res.reshape(20, 9) precisions_chol_all_res = precisions_chol_all_res.reshape(20, 27) mean_dot_precisions_chol_all_res = mean_dot_precisions_chol_all_res.reshape(20, 9) log_weights = np.log(weights_all_res) sumexp_line = "+".join([f"exp(log_gaussian_and_weights_{i}-c)" for i in range(n_components)]) const = 3 * np.log(2 * np.pi) side_chain = CustomCompoundBondForce(4, f"-{k_side_chain}*(log({sumexp_line})+c);\ c=max(log_gaussian_and_weights_0,max(log_gaussian_and_weights_1,log_gaussian_and_weights_2));\ log_gaussian_and_weights_0=log_gaussian_prob_0+log_weights(res,0);\ log_gaussian_and_weights_1=log_gaussian_prob_1+log_weights(res,1);\ log_gaussian_and_weights_2=log_gaussian_prob_2+log_weights(res,2);\ log_gaussian_prob_0=-.5*({const}+log_prob_0)+log_det(res,0);\ log_gaussian_prob_1=-.5*({const}+log_prob_1)+log_det(res,1);\ log_gaussian_prob_2=-.5*({const}+log_prob_2)+log_det(res,2);\ log_prob_0=((r1*pc(res,0)+r2*pc(res,3)+r3*pc(res,6)-mdpc(res,0))^2+\ (r1*pc(res,1)+r2*pc(res,4)+r3*pc(res,7)-mdpc(res,1))^2+\ (r1*pc(res,2)+r2*pc(res,5)+r3*pc(res,8)-mdpc(res,2))^2);\ log_prob_1=((r1*pc(res,9)+r2*pc(res,12)+r3*pc(res,15)-mdpc(res,3))^2+\ (r1*pc(res,10)+r2*pc(res,13)+r3*pc(res,16)-mdpc(res,4))^2+\ (r1*pc(res,11)+r2*pc(res,14)+r3*pc(res,17)-mdpc(res,5))^2);\ log_prob_2=((r1*pc(res,18)+r2*pc(res,21)+r3*pc(res,24)-mdpc(res,6))^2+\ (r1*pc(res,19)+r2*pc(res,22)+r3*pc(res,25)-mdpc(res,7))^2+\ (r1*pc(res,20)+r2*pc(res,23)+r3*pc(res,26)-mdpc(res,8))^2);\ r1=10*distance(p1,p4);\ r2=10*distance(p2,p4);\ r3=10*distance(p3,p4)") side_chain.addPerBondParameter("res") side_chain.addTabulatedFunction("pc", Discrete2DFunction(20, 27, precisions_chol_all_res.T.flatten())) side_chain.addTabulatedFunction("log_weights", Discrete2DFunction(20, 3, log_weights.T.flatten())) side_chain.addTabulatedFunction("log_det", Discrete2DFunction(20, 3, log_det_all_res.T.flatten())) side_chain.addTabulatedFunction("mdpc", Discrete2DFunction(20, 9, mean_dot_precisions_chol_all_res.T.flatten())) for i in range(oa.nres): if i not in oa.chain_starts and i not in oa.chain_ends and not oa.res_type[i] == "IGL": # print(i) # if i != 1: # continue # print(oa.n[i], oa.ca[i], oa.c[i], oa.cb[i]) # print(i, oa.seq[i], gamma_se_map_1_letter[oa.seq[i]], precisions_chol_all_res[gamma_se_map_1_letter[oa.seq[i]]]) side_chain.addBond([oa.n[i], oa.ca[i], oa.c[i], oa.cb[i]], [gamma_se_map_1_letter[oa.seq[i]]]) side_chain.setForceGroup(forceGroup) return side_chain
# quiet the SettingWithCopyWarning when converting dtypes in get_deletions/mutations methods pd.options.mode.chained_assignment = None from Bio.PDB.Polypeptide import one_to_three _aa_property_dict_one = {'Tiny': ['A','C','G','S','T'], 'Small': ['A','C','D','G','N','P','S','T','V'], 'Aliphatic': ['A','I','L','V'], 'Aromatic': ['F','H','W','Y'], 'Non-polar': ['A','C','F','G','I','L','M','P','V','W','Y'], 'Polar': ['D','E','H','K','N','Q','R','S','T'], 'Charged': ['D','E','H','K','R'], 'Basic': ['H','K','R'], 'Acidic': ['D','E']} _aa_property_dict_three = {k: [one_to_three(x) for x in v] for k,v in _aa_property_dict_one.items()} def pairwise_sequence_alignment(a_seq, b_seq, engine, a_seq_id=None, b_seq_id=None, gapopen=10, gapextend=0.5, outfile=None, outdir=None, force_rerun=False): """Run a global pairwise sequence alignment between two sequence strings. Args: a_seq (str, Seq, SeqRecord, SeqProp): Reference sequence b_seq (str, Seq, SeqRecord, SeqProp): Sequence to be aligned to reference engine (str): `biopython` or `needle` - which pairwise alignment program to use a_seq_id (str): Reference sequence ID. If not set, is "a_seq" b_seq_id (str): Sequence to be aligned ID. If not set, is "b_seq" gapopen (int): Only for `needle` - Gap open penalty is the score taken away when a gap is created gapextend (float): Only for `needle` - Gap extension penalty is added to the standard gap penalty for each
log = logging.getLogger(__name__) _aa_property_dict_one = { 'Aliphatic': set(['A', 'I', 'L', 'V']), 'Aromatic': set(['F', 'H', 'W', 'Y']), 'Non-polar': set(['A', 'C', 'F', 'G', 'I', 'L', 'M', 'P', 'V', 'W', 'Y']), 'Polar': set(['D', 'E', 'H', 'K', 'N', 'Q', 'R', 'S', 'T']), 'Charged': set(['D', 'E', 'H', 'K', 'R']), 'Basic': set(['H', 'K', 'R']), 'Acidic': set(['D', 'E']), 'Tiny': set(['A', 'C', 'G', 'S', 'T']), 'Small': set(['A', 'C', 'D', 'G', 'N', 'P', 'S', 'T', 'V']) } _aa_property_dict_three = { k: [one_to_three(x) for x in v] for k, v in _aa_property_dict_one.items() } _aa_flexibility_dict_one = { 'A': -0.605, 'C': -0.692, 'D': -0.279, 'E': -0.16, 'F': -0.719, 'G': -0.537, 'H': -0.662, 'I': -0.682, 'K': -0.043, 'L': -0.631, 'M': -0.626,
log = logging.getLogger(__name__) _aa_property_dict_one = { 'Aliphatic': ['A', 'I', 'L', 'V'], 'Aromatic' : ['F', 'H', 'W', 'Y'], 'Non-polar': ['A', 'C', 'F', 'G', 'I', 'L', 'M', 'P', 'V', 'W', 'Y'], 'Polar' : ['D', 'E', 'H', 'K', 'N', 'Q', 'R', 'S', 'T'], 'Charged' : ['D', 'E', 'H', 'K', 'R'], 'Basic' : ['H', 'K', 'R'], 'Acidic' : ['D', 'E']} # 'Tiny': ['A','C','G','S','T'] # 'Small': ['A','C','D','G','N','P','S','T','V'] _aa_property_dict_three = {k: [one_to_three(x) for x in v] for k, v in _aa_property_dict_one.items()} _aa_flexibility_dict_one = {'A': -0.605, 'C': -0.692, 'D': -0.279, 'E': -0.16, 'F': -0.719, 'G': -0.537, 'H': -0.662, 'I': -0.682, 'K': -0.043, 'L': -0.631, 'M': -0.626, 'N': -0.381, 'P': -0.271,
from scipy import spatial import matplotlib.pyplot as plt import cPickle from Bio.PDB import * from Bio import SeqIO import tempfile from scipy.sparse import lil_matrix from Bio.SubsMat import MatrixInfo from Bio.PDB.Polypeptide import one_to_three import urllib, os, traceback, pdb AA = 'ACDEFGHIKLMNPQRSTVWY-' aaidx = dict(zip(AA, range(len(AA)))) aa3idx = {} for __i__, __a__ in enumerate(AA): try: aa3idx[one_to_three(__a__)] = __i__ except: continue def module_exists(module_name): try: __import__(module_name) except ImportError: return False else: return True def score_match(pair, matrix=MatrixInfo.blosum62): """
def mutate_whole_sequence(res_list, new_sequence): for i in range(len(res_list)): res_list[i].resname = one_to_three(new_sequence[i]) return res_list
from scipy import spatial import matplotlib.pyplot as plt import cPickle from Bio.PDB import * from Bio import SeqIO import tempfile from scipy.sparse import lil_matrix from Bio.SubsMat import MatrixInfo from Bio.PDB.Polypeptide import one_to_three import urllib,os,traceback,pdb AA='ACDEFGHIKLMNPQRSTVWY-' aaidx=dict(zip(AA,range(len(AA)))) aa3idx={} for __i__,__a__ in enumerate(AA): try: aa3idx[one_to_three(__a__)]=__i__ except: continue def module_exists(module_name): try: __import__(module_name) except ImportError: return False else: return True def score_match(pair,matrix=MatrixInfo.blosum62): """ Given a tuple pair of amino acids, it returns the substitution matrix score
Wrapper for stride (predictor of RASA) @author: Afsar with modifications from Basir """ import numpy as np import tempfile import os from Bio.PDB.Polypeptide import one_to_three from codebase.constants import amino_acids to_one_letter_code = {} aa3idx = {} for index, amino_acid in enumerate(amino_acids): try: aa3idx[one_to_three(amino_acid)] = index to_one_letter_code[one_to_three(amino_acid)] = amino_acid except (): continue def get_max_asa(s=None): """ This function returns a dictionary containing the maximum ASA for different residues. when s=single, single letter codes of aa are also added to the dictionary """ max_acc = { "ALA": 106.0, "CYS": 135.0, "ASP": 163.0,
from Bio.PDB.Polypeptide import one_to_three _aa_property_dict_one = { 'Tiny': ['A', 'C', 'G', 'S', 'T'], 'Small': ['A', 'C', 'D', 'G', 'N', 'P', 'S', 'T', 'V'], 'Aliphatic': ['A', 'I', 'L', 'V'], 'Aromatic': ['F', 'H', 'W', 'Y'], 'Non-polar': ['A', 'C', 'F', 'G', 'I', 'L', 'M', 'P', 'V', 'W', 'Y'], 'Polar': ['D', 'E', 'H', 'K', 'N', 'Q', 'R', 'S', 'T'], 'Charged': ['D', 'E', 'H', 'K', 'R'], 'Basic': ['H', 'K', 'R'], 'Acidic': ['D', 'E'] } _aa_property_dict_three = { k: [one_to_three(x) for x in v] for k, v in _aa_property_dict_one.items() } def pairwise_sequence_alignment(a_seq, b_seq, engine, a_seq_id=None, b_seq_id=None, gapopen=10, gapextend=0.5, outfile=None, outdir=None, force_rerun=False): """Run a global pairwise sequence alignment between two sequence strings.