def generate_EMs_from_normalized_AA_PDBs(workingDir): structureBlurrer = StructureBlurrer() emDirectory = workingDir+"/simulated/EM" pdbNormalizedDir = workingDir+ "/simulated/PDB_normalized" numFiles = numOfFilesSubdir(pdbNormalizedDir) currFileNum = 0 for aaDirName in os.listdir(pdbNormalizedDir): if aaDirName in aAList: emAaDir = "{0}/{1}".format(emDirectory, aaDirName) if not os.path.exists(emAaDir): os.makedirs(emAaDir) pdbPath = "{0}/{1}".format(pdbNormalizedDir,aaDirName) for pdbFileName in os.listdir(pdbPath): pdbFileNameMatch = re.match( r'(\S+)\.pdb', pdbFileName, re.I) pdbFileNameWoExtension = pdbFileNameMatch.group(1) pdbFilePath = "{0}/{1}".format(pdbPath,pdbFileName) aaStruture=PDBParser.read_PDB_file(pdbFileNameWoExtension,pdbFilePath) #aaSimMap = structureBlurrer.gaussian_blur(aaStruture, 2) aaSimMap = structureBlurrer.gaussian_blur_box(aaStruture, 2, 50, 50, 50) aAEmFileName = "{0}/{1}.map".format(emAaDir,pdbFileNameWoExtension) aaSimMap.write_to_MRC_file(aAEmFileName) currFileNum += 1 sys.stdout.write('\r{:4}/{:4} ({:5.4}%), current file: {}'.format(currFileNum, numFiles, currFileNum*100./numFiles, aAEmFileName)) sys.stdout.flush() print '\n',
def blur_model(p,res=4.0,emmap=False): pName = os.path.basename(p).split('.')[0] print 'reading the model' structure_instance=PDBParser.read_PDB_file(pName,p,hetatm=False,water=False) print 'filtering the model' blurrer = StructureBlurrer() if res is None: sys.exit('Map resolution required..') #emmap = blurrer.gaussian_blur(structure_instance, res,densMap=emmap_1,normalise=True) modelmap = blurrer.gaussian_blur_real_space(structure_instance, res,sigma_coeff=0.187,densMap=emmap,normalise=True) return pName,modelmap, structure_instance
def blur_model(p, res=4.0, emmap=False): pName = os.path.basename(p).split('.')[0] structure_instance = PDBParser.read_PDB_file(pName, p, hetatm=False, water=False) blurrer = StructureBlurrer() if res is None: sys.exit('Map resolution required..') modelmap = blurrer.gaussian_blur_real_space(structure_instance, res, densMap=emmap, normalise=True) return pName, modelmap
def test_tempy_sccc(self): ''' Test the tempy sccc score based on the files provided. Use this as a baseline for the second chimeraX test. ''' # the sigma factor determines the width of the Gaussian distribution used to describe each atom sim_sigma_coeff = 0.187 path_test = "./" m = os.path.join(path_test, '1akeA_10A.mrc') p = os.path.join(path_test, '1ake_mdl1.pdb') r = 10.0 rb_file = os.path.join(path_test, '1ake_mdl1_rigid.txt') scorer = ScoringFunctions() # read map file emmap = MapParser.readMRC(m) # read PDB file structure_instance = PDBParser.read_PDB_file('pdbfile', p, hetatm=False, water=False) SCCC_list_structure_instance = [] # read rigid body file and generate structure instances for each segment listRB = RBParser.read_FlexEM_RIBFIND_files(rb_file, structure_instance) # score each rigid body segment listsc_sccc = [] for RB in listRB: # sccc score score_SCCC = scorer.SCCC(emmap, r, sim_sigma_coeff, structure_instance, RB) listsc_sccc.append(score_SCCC) self.assertTrue(len(listRB) == 6) self.assertTrue(abs(round(listsc_sccc[0], 4) - 0.954) < 0.01) self.assertTrue(abs(round(listsc_sccc[1], 4) - 0.427) < 0.01) self.assertTrue(abs(round(listsc_sccc[2], 4) - 0.624) < 0.01) self.assertTrue(abs(round(listsc_sccc[3], 4) - 0.838) < 0.01) self.assertTrue(abs(round(listsc_sccc[4], 4) - 0.971) < 0.01) self.assertTrue(abs(round(listsc_sccc[5], 4) - 0.928) < 0.01)
def generate_extra_AA_PDBs(workingDir): """ Create extra AA PDBs from the existing PDBs, by filling up each AA directory to 100 instances. New PDBs are created by randomly selecting a file from the existing list, randomly rotating it around a random axis, and writing the result to a new file. """ min_num_of_pdbs_per_aa = 100 # first count how much work we have to do (compute totalNumFilesNeeded) pdbNormalizedDir = workingDir+"/simulated/PDB" totalNumFilesDone = 0 totalNumFilesNeeded = 0 for curAA in aAList: numFiles = len(os.listdir(pdbNormalizedDir + '/' + curAA)) if numFiles < min_num_of_pdbs_per_aa: totalNumFilesNeeded += min_num_of_pdbs_per_aa - numFiles # now create the files for curAA in aAList: # loop on all AAs current_dir = pdbNormalizedDir + '/' + curAA dirFilesList = os.listdir(current_dir) numFiles = len(dirFilesList) while numFiles < min_num_of_pdbs_per_aa: # until we have enough files numFiles += 1 totalNumFilesDone += 1 # get the file pdbFileName = random.choice(dirFilesList) pdbFileNameMatch = re.match( r'(\S+)\.pdb', pdbFileName, re.I) pdbFileNameWoExtension = pdbFileNameMatch.group(1) pdbFilePath = "{0}/{1}".format(current_dir,pdbFileName) aaStruture = PDBParser.read_PDB_file(pdbFileNameWoExtension, pdbFilePath) # randomly rotate and write the result randomAxis = [random.random() for i in xrange(3)] randomAngle = random.random()*360 # (angle is in degrees) aaStruture.rotate_by_axis_angle(randomAxis[0], randomAxis[1], randomAxis[2], randomAngle) aaStruture.write_to_PDB("{0}/{1}-extra{2}".format(current_dir, pdbFileNameWoExtension, numFiles)) # show progress sys.stdout.write('\r{:4}/{:4} ({:5.4}%), current AA: {}'.format(totalNumFilesDone, totalNumFilesNeeded, totalNumFilesDone*100./totalNumFilesNeeded, curAA)) sys.stdout.flush() print '\n',
def loadEnsemble(self,path_dir,file_name_flag,hetatm=False,water=False,verbose=False,pdb=True): """ Load an ensemble of Structure Instance from the directory path_dir. Arguments: *path_dir* directory name *file_name_flag* name or suffix of the files. """ structure_list=[] list_rotate_models=[filein for filein in os.listdir(path_dir) if file_name_flag in filein and filein[-4:]=='.pdb' ] for pdbin in list_rotate_models: print pdbin if pdb==True: file_in=path_dir+'/'+pdbin #print file_in if verbose==True: print "load file:",pdbin[:-4],file_in structure_instance=PDBParser.read_PDB_file(str(pdbin[:-4]),str(file_in),hetatm=hetatm,water=water) structure_list.append([pdbin[:-4],structure_instance]) return structure_list
def _ccc(self, mapname, modelname, res): path_test = "./" m = os.path.join(path_test, mapname) emmap1 = MapParser.readMRC(m) p = os.path.join(path_test, modelname) structure_instance = PDBParser.read_PDB_file('pdbfile', p, hetatm=False, water=False) blurrer = StructureBlurrer() t = 1.5 c1 = None c2 = None #calculate map contour zeropeak, ave, sigma1 = emmap1._peak_density() if not zeropeak is None: c1 = zeropeak + (t * sigma1) mt = 0.1 if res > 20.0: mt = 2.0 elif res > 10.0: mt = 1.0 elif res > 6.0: mt = 0.5 #emmap2 = blurrer.gaussian_blur(structure_instance, res, densMap=emmap1) emmap2 = blurrer.gaussian_blur_real_space(structure_instance, res, sigma_coeff=0.187, densMap=emmap1, normalise=True) # calculate model contour - emmap1 apparently? c2 = mt * emmap2.std() sc = ScoringFunctions() _, ovr = sc.CCC_map(emmap1, emmap2, c1, c2, 3, cmode=False) ccc, _ = sc.CCC_map(emmap1, emmap2, c1, c2, cmode=False) print("Printing CCC", ccc, ovr, c1, c2) return (ccc, ovr)
from TEMPy.StructureParser import PDBParser from TEMPy.EnsembleGeneration import EnsembleGeneration import os path_out='Test_Files' if os.path.exists(path_out)==True: print "%s exists" %path_out else: os.mkdir(path_out) os.chdir(path_out) 'read a PDB files and create a structure instance' structure_instance=PDBParser.read_PDB_file('1J6Z','1J6Z.pdb',hetatm=False,water=False) print structure_instance translation_vector=[4.3, 1.0, -55] rotation_angle= 110 axis=[0.21949010788898163, -0.80559787935161753, -0.55030527207975843] print "rotation: ",rotation_angle print "axix: ",axis print "translation_vector",translation_vector print "generate angular sweep for 1J6Z" #EnsembleGeneration=EnsembleGeneration() list_ensemble=EnsembleGeneration().anglar_sweep(structure_instance,axis, translation_vector, 10, rotation_angle, 'mdl_angular_sweep', atom_com_ind=False) for struct in list_ensemble:
p = tp.args.pdb else: sys.exit('Input a map and model, map resolution (required)') rb_file = tp.args.rigidfile if rb_file is None: sys.exit('Rigid body file missing') # make class instances for density simulation (blurring), scoring and plot scores blurrer = StructureBlurrer() scorer = ScoringFunctions() Plot = Plot() # read map file emmap = MapParser.readMRC(m) # read PDB file structure_instance = PDBParser.read_PDB_file('pdbfile', p, hetatm=False, water=False) # generate atom density and blur to required resolution #sim_map = blurrer.gaussian_blur(structure_instance, r,densMap=emmap,sigma_coeff=sim_sigma_coeff,normalise=True) #sim_map = blurrer.gaussian_blur_real_space(structure_instance, r,densMap=emmap,sigma_coeff=sim_sigma_coeff,normalise=True) SCCC_list_structure_instance = [] # read rigid body file and generate structure instances for each segment listRB = RBParser.read_FlexEM_RIBFIND_files(rb_file, structure_instance) # score each rigid body segment listsc_sccc = [] print 'calculating scores' for RB in listRB: # sccc score score_SCCC = scorer.SCCC(emmap, r, sim_sigma_coeff, structure_instance, RB) SCCC_list_structure_instance.append(score_SCCC)
del emmap2.fullMap del emmap2 return emmap_1, emmap_2 #GET INPUT DATA if flag_example: p = os.path.join(path_example, '1J6Z.pdb') m = os.path.join(path_example, 'emd_5168_monomer.mrc') res = 6.6 Name1 = os.path.basename(m).split('.')[0] Name2 = os.path.basename(p).split('.')[0] emmap1 = MapParser.readMRC(m) structure_instance = PDBParser.read_PDB_file(Name2, p, hetatm=False, water=False) blurrer = StructureBlurrer() emmap2 = blurrer.gaussian_blur(structure_instance, res, densMap=emmap1) c1 = 9.7 c2 = 1.0 elif all(x is None for x in [m, m1, m2]): # for 2 models if None in [p1, p2]: sys.exit( 'Input two maps or a map and model, map resolution(s) (required) and contours (optional)' ) Name1, emmap1, c1 = model_contour(p1, res=4.0, emmap=False, t=0.5) r1 = r2 = r = 4.0 if c2 is None: Name2, emmap2, c2 = model_contour(p2, res=r, emmap=False, t=0.5)
if input_config["--res"] == 0: print 'Input resolution value not specified.\n' usage() #print 'multiple_ga --ipdb <pdbfile> --imap <mrcfile> --res <resolution> --popsize <popsize> --ngen <number_of_generations> --nga <number_of_ga_runs> --gof <score_type> --ncpu <number_of_cpus> --outdir <directory_name> --outfile <prefix_filename> --pdbsnap <1 or 0>' exit(0) if input_config["--outdir"] == 'dummy': print input_config["--ipdb"] input_config["--outdir"] = input_config["--ipdb"].split('.')[0] if input_config["--outfile"] == 'dummy.log': input_config["--outfile"] = input_config["--ipdb"].split('.')[0] + '.log' # Read pdb try: prot = PDBParser.read_PDB_file(input_config["--outdir"], input_config["--ipdb"], hetatm=False, water=False) # Get number of components comps = prot.split_into_chains() ncomp = len(comps) except: print "Error in reading pdb coordinate file\n" usage() exit(0) try: emmap = MapParser.readMRC(input_config["--imap"]) emmap.normalise() except: print "Error in reading map density file\n" usage()
it = 0 dict_reslist = {} #flex_em run iter while iter_num > 0: #if it > 0: # prevratio = avghigh/avglow #if it > 1: score_inc_prev = score_inc[:] ##os.chdir(DATADIR) dict_chains_scores = {} out_iter_pdb = list_to_check[it] lab = list_labels[it] if os.path.isfile(os.path.join(DATADIR, out_iter_pdb)): #read pdb structure_instance = PDBParser.read_PDB_file('pdbfile', os.path.join( DATADIR, out_iter_pdb), hetatm=False, water=False) #get scores dict_ch_scores, dict_chain_res = sc.SMOC(emmap, res_map, structure_instance, win, rfilepath, sim_sigma_coeff) else: print 'PDB file not found:', out_iter_pdb if rigid_out: dict_chain_indices, dict_chain_CA = blurrer.get_coordinates( structure_instance) rigidf = open(rigid_out_prefix + '_' + lab, 'w')
def test_tempy_smoc(self): ''' Test the tempy smoc score based on the files provided. Use this as a baseline for the second chimeraX test. It is taken straight from the score_smoc.py example tutorial.''' list_labels = [] tp = TempyParser() tp.generate_args() # the sigma factor determines the width of the Gaussian distribution used to describe each atom sim_sigma_coeff = 0.187 #score window win = 9 path_test = os.getcwd() map_file = os.path.join(path_test, '1akeA_10A.mrc') res_map = 10.0 DATADIR = path_test list_to_check = ['1ake_mdl1.pdb'] if len(list_labels) == 0: list_labels = [x.split('.')[0] for x in list_to_check] #['initial','final'] list_styles = [ ':', '-.', '--', '-', '-', ':', '-.', '--', '-', '-', ':', '-.', '--', '-', '-', ':', '-.', '--', '-', '-', ':', '-.', '--', '-', '-' ] #'--' z_score_check = 2 def model_tree(list_coord1, distpot=3.5, list_coord2=None): try: from scipy.spatial import cKDTree coordtree = cKDTree(list_coord2) except ImportError: from scipy.spatial import KDTree coordtree = KDTree(list_coord12) if list_coord2 != None: neigh_points = coordtree.query_ball_point(list_coord1, distpot) return neigh_points start_pdb = list_to_check[0] iter_num = len(list_to_check) intermed_file = "" slow = 0.50 shigh = 0.25 # fraction of structure fitted reasonably well initially rigidbody_file = None sc = ScoringFunctions() emmap = MapParser.readMRC(map_file) rfilepath = rigidbody_file dict_str_scores = {} if rigidbody_file is not None: rfilepath = os.path.join(DATADIR, rigidbody_file) list_zscores = [] curdir = os.getcwd() rerun_ct = 0 flag_rerun = 0 it = 0 dict_reslist = {} # TODO - this whole bit needs a cleanup I think while iter_num > 0: dict_chains_scores = {} out_iter_pdb = list_to_check[it] lab = list_labels[it] if os.path.isfile(os.path.join(DATADIR, out_iter_pdb)): #read pdb structure_instance = PDBParser.read_PDB_file('pdbfile', os.path.join( DATADIR, out_iter_pdb), hetatm=False, water=False) #get scores dict_ch_scores, dict_chain_res = sc.SMOC( emmap, res_map, structure_instance, win, rfilepath, sim_sigma_coeff) else: print('PDB file not found:', out_iter_pdb) for ch in dict_ch_scores: flagch = 1 dict_res_scores = dict_ch_scores[ch] #get res number list (for ref) if it == 0: dict_reslist[ch] = dict_chain_res[ch][:] try: if len(dict_reslist[ch]) == 0: print('Chain missing:', out_iter_pdb, ch) flagch = 0 continue except KeyError: print('Chain not common:', ch, out_iter_pdb) flagch = 0 continue try: reslist = dict_reslist[ch] except KeyError: print('Chain not common:', ch, out_iter_pdb) flagch = 0 continue if not ch in dict_chains_scores: dict_chains_scores[ch] = {} scorelist = [] for res in reslist: try: scorelist.append(dict_res_scores[res]) except KeyError: if reslist.index(res) <= 0: scorelist.append( dict_res_scores[reslist[reslist.index(res) + 1]]) else: try: scorelist.append( dict_res_scores[reslist[reslist.index(res) - 1]]) except IndexError: scorelist.append(0.0) #save scores for each chain curscore = "{0:.2f}".format(round(scorelist[-1], 2)) try: dict_chains_scores[ch][res][it] = str(curscore) except KeyError: dict_chains_scores[ch][res] = [str(0.0) ] * len(list_to_check) dict_chains_scores[ch][res][it] = str(curscore) dict_str_scores[lab] = dict_chains_scores #calc ratio between current and prev scores if it > 0: score_cur = scorelist[:] score_inc = [(1 + x) / (1 + y) for x, y in zip(score_cur, score_prev)][:] score_diff = [(x - y) for x, y in zip(score_cur, score_prev)][:] #calculate z-scores npscorelist = np.array(scorelist) try: list_zscores.append((npscorelist - np.mean(npscorelist)) / np.std(npscorelist)) except: list_zscores.append((npscorelist - np.mean(npscorelist))) #calculate low and high score bounds list_sccc = scorelist[:] score_prev = scorelist[:] list_sccc.sort() #save avg of highest and lowest 20% avglow = list_sccc[int(len(list_sccc) * slow)] if avglow == 0.0: avglow = 0.00001 avghigh = list_sccc[int(len(list_sccc) * (1 - shigh))] if it == 0: avghigh1 = list_sccc[int(len(list_sccc) * (1 - shigh))] curratio = avghigh / avglow self.assertTrue(abs(avghigh - 0.967) < 0.01) self.assertTrue(abs(avglow - 0.956) < 0.01) self.assertTrue( abs(sum(scorelist) / len(scorelist) - 0.899) < 0.01) #include smoc scores as b-factor records for x in structure_instance.atomList: cur_chain = x.chain cur_res = x.get_res_no() if not cur_chain in dict_reslist.keys(): continue if cur_chain in dict_chains_scores.keys(): try: x.temp_fac = dict_chains_scores[cur_chain][cur_res][it] except: print('Residue missing: ', cur_res, ch, out_iter_pdb) x.temp_fac = 0.0 else: x.temp_fac = 0.0 it = it + 1 iter_num = iter_num - 1
def test_tempy_nmi(self): ''' Test the tempy nmi score based on the files provided. Use this as a baseline for the second chimeraX test. ''' path_test = "./" m = os.path.join(path_test, 'emd_5168.map') p = os.path.join(path_test, 'emd_5170.map') sc = ScoringFunctions() rez1 = 6.6 rez2 = 15.0 Name1, emmap1, c1 = map_contour(m, t=1.5) Name2, emmap2, c2 = map_contour(p, t=1.5) print(rez1, rez2, c1, c2, emmap1.apix, emmap2.apix) if not sc.mapComparison(emmap1, emmap2): emmap1._crop_box(c1, 0.5) emmap2._crop_box(c2, 0.5) if rez1 > 1.25 * rez2: emmap_2 = lpfilter(emmap2, rez1) emmap1, emmap2 = match_grid(emmap1, emmap_2, c1, c2) elif rez2 > 1.25 * rez1: emmap_1 = lpfilter(emmap1, rez2) emmap1, emmap2 = match_grid(emmap_1, emmap2, c1, c2) else: emmap1, emmap2 = match_grid(emmap1, emmap2, c1, c2) nmi = 0 try: nmi = sc.MI(emmap1, emmap2, c1, c2, 1, None, None, True) if nmi < 0.0: nmi = 0.0 except: self.assertTrue(False) print_exc() nmi = 0.0 self.assertTrue(abs(round(nmi, 5) - 1.0492) < 0.001) # Now test with a model and map p = os.path.join(path_test, '1J6Z.pdb') m = os.path.join(path_test, 'emd_5168_monomer.mrc') res = 6.6 Name1 = os.path.basename(m).split('.')[0] Name2 = os.path.basename(p).split('.')[0] emmap1 = MapParser.readMRC(m) structure_instance = PDBParser.read_PDB_file(Name2, p, hetatm=False, water=False) blurrer = StructureBlurrer() emmap2 = blurrer.gaussian_blur(structure_instance, res, densMap=emmap1) c1 = 9.7 c2 = 1.0 nmi = 0 try: nmi = sc.MI(emmap1, emmap2, c1, c2, 1, None, None, True) if nmi < 0.0: nmi = 0.0 except: self.assertTrue(False) print_exc() nmi = 0.0 self.assertTrue(abs(round(nmi, 5) - 1.0575) < 0.001)
from TEMPy.MapParser import MapParser from TEMPy.StructureBlurrer import StructureBlurrer import numpy as np import sys # define point for rotation # tempy examples use COM from input structure # rotating against 0 0 0 doesn't seem to work import TEMPy.Vector as Vector com = Vector.Vector(90, 90, 90) # read in map target_map = MapParser.readMRC(sys.argv[1]) #read target map # read in structure structure_instance = PDBParser.read_PDB_file('structure_id', sys.argv[2]) # translate along x, y, z structure_instance.translate(42, 58, -5) # rotate along x, y, z structure_instance.rotate_by_axis_angle(0, 0, 1, np.rad2deg(-2.125868534775962), com=com) structure_instance.rotate_by_axis_angle(0, 1, 0, np.rad2deg(-0.0005038746980934731), com=com)
#=============================================================== # This example reads a PDB file and creates a structure instance #=============================================================== from TEMPy.StructureParser import PDBParser import os path_out='Test_Files' if os.path.exists(path_out)==True: print "%s exists" %path_out else: os.mkdir(path_out) os.chdir(path_out) #Generate Structure Instance from PDB File, default hetatm=False and water= False 'fetch a structure PDB file and create a structure instance' structure_instance=PDBParser.fetch_PDB('1A5T','1A5T.pdb',hetatm=True,water=False) print structure_instance 'fetch a structure mmCIF file and create a structure instance' #need last version Biopython (1.40b) #structure_instance=mmCIFParser.fetch_mmCIF('1A5T','1A5T.cif',hetatm=True,water=True) 'read a PDB files and create a structure instance' structure_instance=PDBParser.read_PDB_file('1J6Z','1J6Z.pdb',hetatm=False,water=False) print structure_instance