def model_contour(p,res=4.0,emmap=False,t=-1.): blurrer = StructureBlurrer() modelmap = blurrer.gaussian_blur_real_space(p, res,densMap=emmap,normalise=True) c1 = None if t != -1.0: c1 = t*emmap.std()#0.0 return modelmap,c1
def generate_EMs_from_normalized_AA_PDBs(workingDir): structureBlurrer = StructureBlurrer() emDirectory = workingDir+"/simulated/EM" pdbNormalizedDir = workingDir+ "/simulated/PDB_normalized" numFiles = numOfFilesSubdir(pdbNormalizedDir) currFileNum = 0 for aaDirName in os.listdir(pdbNormalizedDir): if aaDirName in aAList: emAaDir = "{0}/{1}".format(emDirectory, aaDirName) if not os.path.exists(emAaDir): os.makedirs(emAaDir) pdbPath = "{0}/{1}".format(pdbNormalizedDir,aaDirName) for pdbFileName in os.listdir(pdbPath): pdbFileNameMatch = re.match( r'(\S+)\.pdb', pdbFileName, re.I) pdbFileNameWoExtension = pdbFileNameMatch.group(1) pdbFilePath = "{0}/{1}".format(pdbPath,pdbFileName) aaStruture=PDBParser.read_PDB_file(pdbFileNameWoExtension,pdbFilePath) #aaSimMap = structureBlurrer.gaussian_blur(aaStruture, 2) aaSimMap = structureBlurrer.gaussian_blur_box(aaStruture, 2, 50, 50, 50) aAEmFileName = "{0}/{1}.map".format(emAaDir,pdbFileNameWoExtension) aaSimMap.write_to_MRC_file(aAEmFileName) currFileNum += 1 sys.stdout.write('\r{:4}/{:4} ({:5.4}%), current file: {}'.format(currFileNum, numFiles, currFileNum*100./numFiles, aAEmFileName)) sys.stdout.flush() print '\n',
def blur_model(p,res=4.0,emmap=False): pName = os.path.basename(p).split('.')[0] print 'reading the model' structure_instance=PDBParser.read_PDB_file(pName,p,hetatm=False,water=False) print 'filtering the model' blurrer = StructureBlurrer() if res is None: sys.exit('Map resolution required..') #emmap = blurrer.gaussian_blur(structure_instance, res,densMap=emmap_1,normalise=True) modelmap = blurrer.gaussian_blur_real_space(structure_instance, res,sigma_coeff=0.187,densMap=emmap,normalise=True) return pName,modelmap, structure_instance
def blur_model(p, res=4.0, emmap=False): pName = os.path.basename(p).split('.')[0] structure_instance = PDBParser.read_PDB_file(pName, p, hetatm=False, water=False) blurrer = StructureBlurrer() if res is None: sys.exit('Map resolution required..') modelmap = blurrer.gaussian_blur_real_space(structure_instance, res, densMap=emmap, normalise=True) return pName, modelmap
def score(session, atomic_model, map_model, rez): ''' Perform the CCC score. Takes a session, a single model and map.''' print("Calculating CCC Score") # make class instances for density simulation (blurring), scoring and plot scores blurrer = StructureBlurrer() scorer = ScoringFunctions() atomlist = [] for atom in atomic_model.atoms: atomlist.append(chimera_to_tempy_atom(atom, len(atomlist))) bio_atom_structure = BioPy_Structure(atomlist) bio_map_structure = chimera_to_tempy_map(map_model) map_probe = blurrer.gaussian_blur(bio_atom_structure, rez, densMap=bio_map_structure) score = scorer.CCC(bio_map_structure, map_probe) print(score) return score
def build_maps(self, resolution, template_map, sig_coeff=0.356): """ Build list of maps corresponding to the protein components in the structList. Arguments: *resolution* Desired resolution of the density map in Angstrom units. *template_map* A map object that will be uesd as the template to build maps of for the individual maps. Usually the input map used for the assembly fitting. *sigma_coeff* the sigma value (multiplied by the resolution) that controls the width of the Gaussian. Default values is 0.356. """ sb = StructureBlurrer() for x in self.structList: self.mapList.append(sb.gaussian_blur(x, resolution, template_map, sig_coeff)) self.initMapList.append(self.mapList[-1].copy())
def _ccc(self, mapname, modelname, res): path_test = "./" m = os.path.join(path_test, mapname) emmap1 = MapParser.readMRC(m) p = os.path.join(path_test, modelname) structure_instance = PDBParser.read_PDB_file('pdbfile', p, hetatm=False, water=False) blurrer = StructureBlurrer() t = 1.5 c1 = None c2 = None #calculate map contour zeropeak, ave, sigma1 = emmap1._peak_density() if not zeropeak is None: c1 = zeropeak + (t * sigma1) mt = 0.1 if res > 20.0: mt = 2.0 elif res > 10.0: mt = 1.0 elif res > 6.0: mt = 0.5 #emmap2 = blurrer.gaussian_blur(structure_instance, res, densMap=emmap1) emmap2 = blurrer.gaussian_blur_real_space(structure_instance, res, sigma_coeff=0.187, densMap=emmap1, normalise=True) # calculate model contour - emmap1 apparently? c2 = mt * emmap2.std() sc = ScoringFunctions() _, ovr = sc.CCC_map(emmap1, emmap2, c1, c2, 3, cmode=False) ccc, _ = sc.CCC_map(emmap1, emmap2, c1, c2, cmode=False) print("Printing CCC", ccc, ovr, c1, c2) return (ccc, ovr)
from TEMPy.ScoringFunctions import ScoringFunctions from TEMPy.EnsembleGeneration import EnsembleGeneration import os path_out='Test_Files' if os.path.exists(path_out)==True: print "%s exists" %path_out else: os.mkdir(path_out) os.chdir(path_out) structure_instance=PDBParser.read_PDB_file('1J6Z','1J6Z.pdb',hetatm=False,water=False) print structure_instance blurrer = StructureBlurrer() EnsembleGeneration=EnsembleGeneration() scorer = ScoringFunctions() map_target=MapParser.readMRC('emd_5168_monomer.mrc') #read target map map_probe = blurrer.gaussian_blur(structure_instance, 6.6,densMap=map_target)#create a simulated map from the structure instance #Create a Random ensemble of 10 structures randomly within 5 A translation and 60 deg rotation. list_rotate_models=EnsembleGeneration.randomise_structs(structure_instance, 10, 5, 60, v_grain=30, rad=False,write=True) #CCC score from starting fit line='%s %s\n'%('1J6Z',scorer.CCC(map_probe,map_target)) count=0 #loop to score each of the alternative fits in the ensemble for mod in list_rotate_models:
def cluster_fit_ensemble_top_fit(self, ensemble_list, score, rms_cutoff, res_target_map, sigma_coeff, number_top_mod=0, write=False, targetMap=False): """ RMSD clustering of the multiple "fits" starting from the best scoring model accordingly with a chosen score. Cluster the fits based on Calpha RMSD (starting from the best scoring model) Arguments: *ensemble_list* Input list of Structure Instances. *targetMap* Target Map Instance. *score* Scoring function to use. See ScoringFunctions class for a list of the available Scoring Function. E.g. set score='CCC' to use the Cross-correlation coefficient. Score option are: i 'CCC' - Cross-correlation coefficient; ii 'LAP' - Laplacian-filtered cross-correlation coefficient: useful for maps with resolutions worse than 10-15 A; iii 'MI' - Mutual information score: a good and robust score but relatively slow to calculate; iv 'ENV' - Envelope score: the fastest score to calculate due to binarisation of the map. v-vii 'NV','NV_Sobel','NV_Laplace'- Normal vector score: a vector-based surface superimposition score with or without Sobel/Laplace filter. viii 'CD' - Chamfer Distance: a score used in computer vision algorithms as a fast similarity metric *rms_cutoff* float, the Calpha RMSD cutoff based on which you want to cluster the solutions. For example 3.5 (for 3.5 A). *res_target_map* the resolution, in Angstroms, of the target Map. *sigma_coeff* the sigma value (multiplied by the resolution) that controls the width of the Gaussian. Default values is 0.356. Other values used : 0.187R corresponding with the Gaussian width of the Fourier transform falling to half the maximum at 1/resolution, as used in Situs (Wriggers et al, 1999); 0.225R which makes the Fourier transform of the distribution fall to 1/e of its maximum value at wavenumber 1/resolution, the default in Chimera (Petterson et al, 2004) 0.356R corresponding to the Gaussian width at 1/e maximum height equaling the resolution, an option in Chimera (Petterson et al, 2004); 0.425R the fullwidth half maximum being equal to the resolution, as used by FlexEM (Topf et al, 2008); 0.5R the distance between the two inflection points being the same length as the resolution, an option in Chimera (Petterson et al, 2004); 1R where the sigma value simply equal to the resolution, as used by NMFF (Tama et al, 2004). *number_top_mod* Number of Fits to cluster. Default is all. *write* True will write out a file that contains the list of the structure instances representing different fits scored and clustered. note the lrms column is the Calpha RMSD of each fit from the first fit in its class """ blurrer = StructureBlurrer() scorer = ScoringFunctions() cluster = Cluster() count = 0 dict_ensembl = {} list_ordered = cluster.rank_fit_ensemble(ensemble_list, score, res_target_map, sigma_coeff, number_top_mod=0, write=False, targetMap=targetMap.copy()) #cluster fits by local rmsd if number_top_mod == 0: ini_num = 0 end_num = len(list_ordered) fit_class = 0 for ipdb in list_ordered: print("model num %d: %s\n", list_ordered.index(ipdb) + 1, ipdb[0]) ini_num1 = list_ordered.index(ipdb) mod1 = ipdb[1] print('next index ' + str(ini_num1)) if ipdb[-1] == 0: fit_class += 1 for ipdb1 in list_ordered[ini_num1:end_num]: mod2 = ipdb1[1] if ipdb1[-1] == 0: rmsd_val = float( mod1.RMSD_from_same_structure(mod2, CA=True)) ipdb1[3] = rmsd_val print("rmsd of %s from best local fit (%s)= %.2f", ipdb1[0], ipdb[0], rmsd_val) if rmsd_val < rms_cutoff: ipdb1[-1] = fit_class print('class= ' + str(ipdb1[-1])) else: continue else: continue return cluster._print_results_cluster(list_ordered, fit_class, number_top_mod, score, write) else: x = int(number_top_mod) ini_num = 0 end_num = len(list_ordered[:x]) fit_class = 0 for ipdb in list_ordered[:x]: print("model num %d: %s\n", list_ordered.index(ipdb) + 1, ipdb[0]) ini_num1 = list_ordered.index(ipdb) mod1 = ipdb[1] print('next index ' + str(ini_num1)) if ipdb[-1] == 0: fit_class += 1 for ipdb1 in list_ordered[ini_num1:end_num]: mod2 = ipdb1[1] if ipdb1[-1] == 0: rmsd_val = float( mod1.RMSD_from_same_structure(mod2, CA=True)) print("rms of %s from best local fit (%s)= %.2f", ipdb1[0], ipdb[0], rmsd_val) ipdb1[3] = rmsd_val if rmsd_val < rms_cutoff: ipdb1[-1] = fit_class print('class= ' + str(ipdb1[-1])) else: continue else: continue return cluster._print_results_cluster(list_ordered[:x], fit_class, number_top_mod, score, write)
def rank_fit_ensemble(self,ensemble_list,score,res_target_map,sigma_coeff,number_top_mod=0,\ write=False,targetMap=False,cont_targetMap=None): """ RMSD clustering of the multiple "fits" accordingly with a chosen score. Cluster the fits based on Calpha RMSD (starting from the best scoring model) Arguments: *ensemble_list* Input list of Structure Instances. *targetMap* Target Map Instance. *score* Scoring function to use. See ScoringFunctions class for a list of the available Scoring Function. E.g. set score='CCC' to use the Cross-correlation coefficient. Score option are: i 'CCC' - Cross-correlation coefficient; ii 'LAP' - Laplacian-filtered cross-correlation coefficient: useful for maps with resolutions worse than 10-15 A; iii 'MI' - Mutual information score: a good and robust score but relatively slow to calculate; iv 'ENV' - Envelope score: the fastest score to calculate due to binarisation of the map. v-vii 'NV','NV_Sobel','NV_Laplace'- Normal vector score: a vector-based surface superimposition score with or without Sobel/Laplace filter. viii 'CD' - Chamfer Distance: a score used in computer vision algorithms as a fast similarity metric *rms_cutoff* float, the Calpha RMSD cutoff based on which you want to cluster the solutions. For example 3.5 (for 3.5 A). *res_target_map* the resolution, in Angstroms, of the target Map. *sigma_coeff* the sigma value (multiplied by the resolution) that controls the width of the Gaussian. Default values is 0.356. Other values used : 0.187R corresponding with the Gaussian width of the Fourier transform falling to half the maximum at 1/resolution, as used in Situs (Wriggers et al, 1999); 0.225R which makes the Fourier transform of the distribution fall to 1/e of its maximum value at wavenumber 1/resolution, the default in Chimera (Petterson et al, 2004) 0.356R corresponding to the Gaussian width at 1/e maximum height equaling the resolution, an option in Chimera (Petterson et al, 2004); 0.425R the fullwidth half maximum being equal to the resolution, as used by FlexEM (Topf et al, 2008); 0.5R the distance between the two inflection points being the same length as the resolution, an option in Chimera (Petterson et al, 2004); 1R where the sigma value simply equal to the resolution, as used by NMFF (Tama et al, 2004). *number_top_mod* Number of Fits to cluster. Default is all. *write* True will write out a file that contains the list of the structure instances representing different fits scored and clustered. note the lrms column is the Calpha RMSD of each fit from the first fit in its class """ blurrer = StructureBlurrer() scorer = ScoringFunctions() cluster = Cluster() count = 0 dict_ensembl = {} list_to_order = [] #print targetMap if targetMap == False: #targetMap = self.protMap(prot, min(resolution/4., 3.5), resolution) print("WARNING:Need target map") sys.exit() if score not in [ 'CCC', 'LAP', 'MI', 'NV', 'NV_Sobel', 'NV_Laplace', 'ENV', 'CD' ]: print('Incorrect Scoring Function: %s', score) print( 'Please select from one of the following scoring functions: %s', ''.join([ 'CCC', 'LAP', 'MI', 'NV', 'NV_Sobel', 'NV_Laplace', 'ENV', 'CD' ])) sys.exit() targetMap = targetMap.copy() if score == 'CCC': for mod1 in ensemble_list: count += 1 name_mod = mod1[0] mod = mod1[1] sim_map = blurrer.gaussian_blur(mod, res_target_map, densMap=targetMap, sigma_coeff=sigma_coeff) if not cont_targetMap is None: score_mod = scorer.CCC_map( sim_map, targetMap, 0.5 * sim_map.fullMap.std(), cont_targetMap, 2, True)[0] #CCC(sim_map,targetMap) else: score_mod = scorer.CCC_map(sim_map, targetMap, 0.0, 0.0, True)[0] #else: score_mod=scorer.CCC(sim_map,targetMap) #'name_file','structure_instance','score','lrmsd','class' list_to_order.append([name_mod, mod, score_mod, 0, 0]) if score == 'LAP': for mod1 in ensemble_list: count += 1 name_mod = mod1[0] mod = mod1[1] sim_map = blurrer.gaussian_blur(mod, res_target_map, densMap=targetMap, sigma_coeff=sigma_coeff) score_mod = scorer.laplace_CCC(sim_map, targetMap) #'name_file','structure_instance','score','lrmsd','class' list_to_order.append([name_mod, mod, score_mod, 0, 0]) if score == 'MI': for mod1 in ensemble_list: count += 1 name_mod = mod1[0] mod = mod1[1] sim_map = blurrer.gaussian_blur(mod, res_target_map, densMap=targetMap, sigma_coeff=sigma_coeff) if not cont_targetMap is None: score_mod = scorer.MI(sim_map, targetMap, 0.5 * sim_map.fullMap.std(), cont_targetMap, 1) else: score_mod = scorer.MI(sim_map, targetMap) list_to_order.append([name_mod, mod, score_mod, 0, 0]) if score == 'NV': for mod1 in ensemble_list: count += 1 name_mod = mod1[0] mod = mod1[1] #These two values should be calculated for the experimental map, and only #need to be calculated once, at the beginning sim_map = blurrer.gaussian_blur(mod, res_target_map, densMap=targetMap, sigma_coeff=sigma_coeff) if not cont_targetMap is None: score_mod = scorer.normal_vector_score( targetMap, sim_map, cont_targetMap - (0.1 * targetMap.std()), cont_targetMap + (0.1 * targetMap.std()), Filter=None) else: min_thr = targetMap.get_primary_boundary( mod.get_prot_mass_from_atoms(), targetMap.min(), targetMap.max()) points = targetMap.get_point_map(min_thr, percentage=0.2) max_thr = targetMap.get_second_boundary(min_thr, points, min_thr, targetMap.max(), err_percent=1) score_mod = scorer.normal_vector_score(targetMap, sim_map, min_thr, max_thr, Filter=None) score_mod = 1 - (score_mod / 3.14) list_to_order.append([name_mod, mod, score_mod, 0, 0]) if score == 'NV_Sobel': for mod1 in ensemble_list: count += 1 name_mod = mod1[0] mod = mod1[1] sim_map = blurrer.gaussian_blur(mod, res_target_map, densMap=targetMap, sigma_coeff=sigma_coeff) if not cont_targetMap is None: score_mod = scorer.normal_vector_score( targetMap, sim_map, cont_targetMap - (0.1 * targetMap.std()), cont_targetMap + (0.1 * targetMap.std()), Filter='Sobel') else: min_thr = targetMap.get_primary_boundary( mod.get_prot_mass_from_atoms(), targetMap.min(), targetMap.max()) points = targetMap.get_point_map(min_thr, percentage=0.2) max_thr = targetMap.get_second_boundary(min_thr, points, min_thr, targetMap.max(), err_percent=1) score_mod = scorer.normal_vector_score(targetMap, sim_map, min_thr, max_thr, Filter='Sobel') score_mod = 1 - (score_mod / 3.14) list_to_order.append([name_mod, mod, score_mod, 0, 0]) if score == 'NV_Laplace': for mod1 in ensemble_list: count += 1 name_mod = mod1[0] mod = mod1[1] sim_map = blurrer.gaussian_blur(mod, res_target_map, densMap=targetMap, sigma_coeff=sigma_coeff) if not cont_targetMap is None: score_mod = scorer.normal_vector_score( targetMap, sim_map, cont_targetMap - (0.1 * targetMap.std()), cont_targetMap + (0.1 * targetMap.std()), Filter='Laplace') else: min_thr = targetMap.get_primary_boundary( mod.get_prot_mass_from_atoms(), targetMap.min(), targetMap.max()) points = targetMap.get_point_map(min_thr, percentage=0.2) max_thr = targetMap.get_second_boundary(min_thr, points, min_thr, targetMap.max(), err_percent=1) score_mod = scorer.normal_vector_score(targetMap, sim_map, min_thr, max_thr, Filter='Laplace') score_mod = 1 - (score_mod / 3.14) list_to_order.append([name_mod, mod, score_mod, 0, 0]) if score == 'ENV': for mod1 in ensemble_list: count += 1 name_mod = mod1[0] mod = mod1[1] min_thr = targetMap.get_primary_boundary( mod.get_prot_mass_from_atoms(), targetMap.min(), targetMap.max()) score_mod = scorer.envelope_score(targetMap, min_thr, mod) #'name_file','structure_instance','score','lrmsd','class' list_to_order.append([name_mod, mod, score_mod, 0, 0]) if score == 'CD': for mod1 in ensemble_list: count += 1 name_mod = mod1[0] mod = mod1[1] sim_map = blurrer.gaussian_blur(mod, res_target_map, densMap=targetMap, sigma_coeff=sigma_coeff) if not cont_targetMap is None: score_mod = scorer._surface_distance_score( sim_map, targetMap, 0.5 * sim_map.fullMap.std(), cont_targetMap, 'Minimum') else: min_thr = targetMap.get_primary_boundary( mod.get_prot_mass_from_atoms(), targetMap.min(), targetMap.max()) points = targetMap.get_point_map(min_thr, percentage=0.2) max_thr = targetMap.get_second_boundary(min_thr, points, min_thr, targetMap.max(), err_percent=1) score_mod = scorer.chamfer_distance(sim_map, targetMap, min_thr, max_thr, kdtree=None) score_mod = 1 / score_mod list_to_order.append([name_mod, mod, score_mod, 0, 0]) if score in ['NV', 'NV_Sobel', 'NV_Laplace']: list_ordered = sorted( list_to_order, key=lambda x: x[2], reverse=True) #was false when NV was negative else: list_ordered = sorted(list_to_order, key=lambda x: x[2], reverse=True) if number_top_mod == 0: if write == True: return cluster._print_results_cluster2(list_ordered, write) return list_ordered else: x = int(number_top_mod) if write == True: return cluster._print_results_cluster2(list_ordered[:x], write) return list_ordered[:x]
emmap1 = MapParser.readMRC(m) if r1 is None and r is None: sys.exit('Input a map and model, map resolution (required)') elif r1 is None: r1 = r if all(x is None for x in [p, p1, p2]): sys.exit('Input a map and model, map resolution (required)') elif None in [p1, p2]: p = tp.args.pdb else: sys.exit('Input a map and model, map resolution (required)') rb_file = tp.args.rigidfile if rb_file is None: sys.exit('Rigid body file missing') # make class instances for density simulation (blurring), scoring and plot scores blurrer = StructureBlurrer() scorer = ScoringFunctions() Plot = Plot() # read map file emmap = MapParser.readMRC(m) # read PDB file structure_instance = PDBParser.read_PDB_file('pdbfile', p, hetatm=False, water=False) # generate atom density and blur to required resolution #sim_map = blurrer.gaussian_blur(structure_instance, r,densMap=emmap,sigma_coeff=sim_sigma_coeff,normalise=True) #sim_map = blurrer.gaussian_blur_real_space(structure_instance, r,densMap=emmap,sigma_coeff=sim_sigma_coeff,normalise=True) SCCC_list_structure_instance = []
from TEMPy.Cluster import Cluster import os path_out = 'Test_Files' if os.path.exists(path_out) == True: print "%s exists" % path_out else: os.mkdir(path_out) os.chdir(path_out) structure_instance = PDBParser.read_PDB_file('1J6Z', '1J6Z.pdb', hetatm=False, water=False) blurrer = StructureBlurrer() EnsembleGeneration = EnsembleGeneration() scorer = ScoringFunctions() map_target = MapParser.readMRC('emd_5168_monomer.mrc') #read target map print map_target map_probe = blurrer.gaussian_blur(structure_instance, 6.6, densMap=map_target) list_rotate_models = EnsembleGeneration.randomise_structs(structure_instance, 20, 10, 60, v_grain=30, rad=False, write=False)
def score(session, atomic_model1 = None, map_model1 = None, atomic_model2 = None, map_model2 = None, rez1 = None, rez2 = None, c1 = None, c2 = None): """ Generate the NMI score for 2 maps or 1 map and 1 model. """ sc = ScoringFunctions() # We have choices - 1 map and one model, 2 maps or 2 models emmap1 = None emmap2 = None blurrer = StructureBlurrer() if atomic_model1 != None and map_model1 != None: # 1 map 1 model if rez1 == None: print("Please provide the resolution for the model.") return emmap1 = chimera_to_tempy_map(map_model1) bms = chimera_to_tempy_model(atomic_model1) emmap2 = blurrer.gaussian_blur(bms, rez1, densMap=emmap1) elif map_model1 != None and map_model2 != None: # 2 maps emmap1 = chimera_to_tempy_map(map_model1) emmap2 = chimera_to_tempy_map(map_model2) elif atomic_model1 != None and atomic_model2 != None: # 2 models if None in [rez1,rez2]: print("Please provide the resolution for both model") return bms1 = chimera_to_tempy_model(atomic_model1) bms2 = chimera_to_tempy_model(atomic_model2) emmap1 = model_contour( bms1, rez1, emmap=False,t=0.5) if c2 is None: emmap2 = model_contour(bms2, rez2,emmap=False,t=0.5) else: emmap2 = blur_model( bms2, rez2, emmap=False) else: print("Error. Must have 1 model and 1 map, 2 maps or 2 models") return # Contouring if c1 == None: c1 = map_contour(emmap1,t=1.5) if c2 == None: c2 = map_contour(emmap2,t=1.5) # Some kind of fix if the maps don't match? # Resize, resample or blur of somekind if not sc.mapComparison(emmap1,emmap2): emmap1._crop_box(c1,0.5) emmap2._crop_box(c2,0.5) if rez1 > 1.25*rez2: emmap_2 = lpfilter(emmap2,rez1) emmap1, emmap2 = match_grid(emmap1,emmap_2,c1,c2) elif rez2 > 1.25*rez1: emmap_1 = lpfilter(emmap1,rez2) emmap1, emmap2 = match_grid(emmap_1,emmap2,c1,c2) else: emmap1, emmap2 = match_grid(emmap1,emmap2,c1,c2) nmi = 0.0 try: nmi = sc.MI(emmap1,emmap2,c1,c2,1,None,None,True) if nmi < 0.0: nmi = 0.0 except: print('Exception for NMI score') print_exc() nmi = 0.0 return nmi
def run(self, runs, no_of_gen, pop_size, selection_method, gof, w_gof, w_clash, prot, ncomp, emmap, resolution, logfile, gasnap, vq_vec_list, mrate, crate, moffset, ncpu=1): """ Main method to initiate GA cycle. Arguments: *runs* Number of GA solution to generate. *no_of_gen* Number of GA generations to generate. *pop_size* Number of members in the GA population. *selection_method* Selection method used to pick members in the population for the purpose of generating new population. Currently should be set to 1 for tournament selection. *gof* Option to specify the Goodness-of-fit function to use. Set it to 1 for Mutual information score or 2 for Cross Correlation Coefficient score. *w_gof* Weighting used for Goodness-of-fit score contribution to the GA fitness score. *w_clash* Weighting used for clash penalty score contribution to the GA fitness score. *prot* Instance of a Structure_BioPy object that contain multiple chains used as an input for building Assembly object *ncomp* Number of component in the assembly. *emmap* Instance of a map object. *resolution* Resolution of the map. *logfile* Name of the output logfile. *gasnap* Option used to control the PDB files written during the GA run. Set it to 1 for writing each individual member in the population (fit) in the every GA generation. Default is set to 'dummy' which will not write each individual member in the population in every GA generation. *vq_vec_list* List of Vector objects used to represent initial point configuration which is used to generate initial populatons of fits. *mrate* Mutation rate for the mutation operator. *crate* Crossover rate for the mutation operator. *moffset* Set the translation offset range (in Angstrom) applied to each of the components position generated in the initial population pool. *ncpu* Number of cpus to use in parallel through Parallel Python. Return: The function return the following items. An instance of the Population object of the final generation. A Structure_BioPy object corresponding to the fittest member in the final genaration and its respective simulated map object. A string containing the best fitness score, Min, Max, Avg, Std and total fitness score of all fits in the final genetation. """ sel = Selection() scorer = ScoringFunctions() if selection_method == 1: sel_type = sel.tournament tour_size = 2 else: print 'Selection method other than tournament selection is not tested. Please use tournament selection' exit(0) if gof != 1 and gof != 2: print 'Please select the gof=1 for MI or gof=2 for CCC' exit(0) #Build the assembly assembly = Assembly(prot.split_into_chains()) # Building maps based on emmap assembly.build_maps(resolution, emmap) #Template grid for volume clash score apix = 3.5 # apix used to record the atom position overlayed in the template map template_grid = emmap._make_clash_map(apix) #for grid based clash score #template_grid = emmap.make_clash_cplx_map(apix) #Create a blurrer object blurrer = StructureBlurrer() # Get the volumes occupied by the components in the assembly cvol = emmap._get_component_volumes(prot, apix, blurrer) #ppservers=("emma","talos") #If required more machines, start the ppserver in auto discovery mode (-a -d option) in the remote nodes ppservers = ("*", ) ''' if ncpu == 0 or ncpu > 5: #For running in the local machine #jobserver = pp.Server(ppservers=ppservers) #cpu_avil = multiprocessing.cpu_count() #For the purpose of EMBO practical print 'ncpu should not be 0 and should not be > 5. Exiting' exit(0) else: #For running with using n cpus (a general case where we have got more remote processing machines) jobserver = pp.Server(ncpus=ncpu, ppservers=ppservers) cpu_avil = ncpu ''' jobserver = pp.Server(ncpus=ncpu, ppservers=ppservers) cpu_avil = ncpu if pop_size <= cpu_avil: cpu_avil = pop_size else: #Used to round off the population size #and make it divisible by the available number of processor #if ncpu != 1: # pop_size = (pop_size/cpu_avil)*cpu_avil + cpu_avil if pop_size % cpu_avil != 0: pop_size = (pop_size / cpu_avil) * cpu_avil + cpu_avil if pop_size < 10: print 'Populations size < 10. Please increase the size.' exit(0) #Send the dynamically determined input parameters to log file f = file(logfile + '.log', 'a') f.write("Population size : " + str(pop_size) + "\n") f.write("Number of GA Generations : " + str(no_of_gen) + "\n") f.write("Number of GA runs : " + str(runs) + "\n") f.write("Number of CPUs used for the calculation : " + str(cpu_avil) + "\n") f.write( "------------------------------------------------------------------------------------------\n" ) f.write( "GA_run and Generation no., Best fitness score, Weighted MI, Clash penalty, Protrusion penalty, Symmetry score, Worse fitness score, Total population fitness score, Average population fitness score, Std of population fitness score \n" ) f.close() #Testing for automatice remote logging and starting a ppserver ''' secret = 'HAI' hosts = list(machines) #["talos"] ppservers = machines #("talos",) serv_arg1 = '/d/d610/s/epd/pp-1.6.1/ppserver.py -w 10 -s ' + secret + ' -t 300 < /dev/null > /dev/null &' #serv_arg1 = '/d/d610/s/epd/pp-1.6.1/ppserver.py -t 300 &' for s in hosts: os.spawnv(os.P_WAIT, '/usr/bin/ssh',['',s,'',serv_arg1]) #ssh = subprocess.Popen(["/usr/bin/ssh", "%s" % s, serv_arg1]) print 'ppservers started' #sleep(15) jobserver = pp.Server(ncpus=0, ppservers=ppservers, secret=secret) #jobserver = pp.Server(ncpus=0, ppservers=ppservers) print jobserver.get_ncpus() print jobserver.get_active_nodes() #exit(0) ''' # Build maps of subunits, if not already done if not assembly.mapList: assembly.build_maps(resolution, emmap) n_vq = len(vq_vec_list) if moffset == 0.0: #Find the minimum distance among all the vq pairs #Set the minimum distance to max_trans that is applied around each vq point to generate random fit dist_list = np.zeros(sum(range(1, len(vq_vec_list)))) p = 0 for i in range(len(vq_vec_list) - 1): for j in range(i + 1, len(vq_vec_list)): v1 = vq_vec_list[i] v2 = vq_vec_list[j] dist_list[p] = v1.dist(v2) p = p + 1 max_change = dist_list.min() else: max_change = moffset #GA run loop for grun in xrange(runs): # Get the ga_pool pop = self.get_ga_pool(assembly, pop_size, max_change, vq_vec_list, quat_vec) curr_pop = self.score_population(pop, pop_size, gof, w_gof, w_clash, cpu_avil, jobserver, scorer, assembly, emmap, ncomp, cvol, template_grid, apix) # Start iterations for x in xrange(no_of_gen): f = file(logfile + '.log', 'a') #Mutation rate linearly decreasing from 0.2 to 0.02 mutRate = mrate + (1 - x / float(no_of_gen)) * 0.08 crossRate = crate #breed new population new_pop = curr_pop.breedNewPop(no_of_gen, x, mutRate, crossRate, sel_type, tour_size) pop_size = new_pop.size() new_pop = self.score_population(new_pop, pop_size, gof, w_gof, w_clash, cpu_avil, jobserver, scorer, assembly, emmap, ncomp, cvol, template_grid, apix) old_new_Pop = Population() for pold in curr_pop.pop: old_new_Pop.addGenotype(pold.copy()) for pnew in new_pop.pop: old_new_Pop.addGenotype(pnew.copy()) best = old_new_Pop.pickSetOfBest(pop_size) curr_pop = Population() for b in best: curr_pop.addGenotype(b.copy()) #Add info to log file to record the generation number, #best pop fittness values (total, MI, clash), min_pop_fitness, total_pop_fitness #average_pop_fitness and std_pop_fitness f.write("R" + str(grun + 1) + "G" + str(x + 1) + "," + str(curr_pop.getBestScores()) + "," + str(curr_pop.min_fitness()) + "," + str(curr_pop.totalFitnessScore()) + '\n') f.write(str(curr_pop) + '\n\n') if gasnap != 'dummy': #Writing out assembly models for every generation tmap, tstruct, maplist = move_struct_quat( curr_pop.pickBest(), assembly) tstruct.write_to_PDB(gasnap + '_' + str(grun + 1) + '_' + str(x + 1) + '.pdb') newmap, newstruct, maplist = move_struct_quat( curr_pop.pickBest(), assembly) newstruct.write_to_PDB(logfile + '_' + str(grun + 1) + '.pdb') f.close() jobserver.destroy() #Return final population, best fit, map of best fit return curr_pop, newstruct, newmap, "Generation" + str(x) + ": " + str( curr_pop.getBestScores()) + ", " + str( curr_pop.min_fitness()) + ", " + str( curr_pop.totalFitnessScore()) + '\n'
# translate along x, y, z structure_instance.translate(42, 58, -5) # rotate along x, y, z structure_instance.rotate_by_axis_angle(0, 0, 1, np.rad2deg(-2.125868534775962), com=com) structure_instance.rotate_by_axis_angle(0, 1, 0, np.rad2deg(-0.0005038746980934731), com=com) structure_instance.rotate_by_axis_angle(1, 0, 0, np.rad2deg(3.1396619777494124), com=com) # save structure structure_instance.write_to_PDB('moved.pdb') # create the map blurrer = StructureBlurrer() sim_map = blurrer.gaussian_blur(structure_instance, 2.49, densMap=target_map) # save map sim_map.write_to_MRC_file('moved.mrc') # Writing out to MRC file
def score(session, atomic_model, map_model, rigid_filename, rez, sim_sigma=0.187, colour_atoms=True): """ Perform the SCCC score Takes a session, a single model, map, rigid file path and some tuneable optional variables """ print("Calculating SCCC Score") # make class instances for density simulation (blurring), scoring and plot scores blurrer = StructureBlurrer() scorer = ScoringFunctions() atomlist = [] # Pre-defines bio_atom_structure = "" bio_map_structure = "" try: for atom in atomic_model.atoms: atomlist.append(chimera_to_tempy_atom(atom, len(atomlist))) bio_atom_structure = BioPy_Structure(atomlist) bio_map_structure = chimera_to_tempy_map(map_model) # read rigid body file and generate structure instances for each segment listRB = RBParser.read_FlexEM_RIBFIND_files(rigid_filename, bio_atom_structure) except Exception as e: print(e) print( "Error in reading Model and Map. Make sure you have selected one model and one map, and the rigid file is correct." ) return # score each rigid body segment listsc_sccc = [] print('calculating SCCC') for RB in listRB: # sccc score score_SCCC = scorer.SCCC(bio_map_structure, rez, sim_sigma, bio_atom_structure, RB, c_mode=False) print('>>', score_SCCC) listsc_sccc.append((RB, score_SCCC)) # Colour the atoms based on the rating from white (1.0) to red (0.0) # TODO - maybe a faster way? Also 'all_atoms' mentioned in the API doesnt exist but atoms does! :S # TODO - move this to somewhere better maybe? if colour_atoms: dr = 255 dg = 255 db = 255 if score_SCCC >= 0.5: dr = 255 - int(math.floor(255 * ((score_SCCC - 0.5) * 2.0))) dg = dr else: db = int(math.floor(255 * (score_SCCC * 2.0))) dg = db residues = [] for a in RB.atomList: if a.res_no not in residues: residues.append(a.res_no) for r in residues: cr = atomic_model.residues[r] for catm in cr.atoms: catm.color = [dr, dg, db, 255] cr.ribbon_color = [dr, dg, db, 255] return listsc_sccc
# use count_neighbors if the corresponding indices are not required #else: # neigh_points = coordtree.query_ball_point(coordtree,distpot) #print len(list_coord1), len(neigh_points) return neigh_points start_pdb = list_to_check[0] iter_num = len(list_to_check) intermed_file = "" slow = 0.50 shigh = 0.25 # fraction of structure fitted reasonably well initially #rigid body file rigidbody_file = None # blurrer = StructureBlurrer() sc = ScoringFunctions() #read map file emmap = MapParser.readMRC(map_file) #----------------------------- #set plotting parameters flagplot = 1 try: import matplotlib except ImportError: flatplot = 0 if flagplot == 1: print 'Setting maptpltlib parameters' try: ##matplotlib.use('Agg')
def test_tempy_nmi(self): ''' Test the tempy nmi score based on the files provided. Use this as a baseline for the second chimeraX test. ''' path_test = "./" m = os.path.join(path_test, 'emd_5168.map') p = os.path.join(path_test, 'emd_5170.map') sc = ScoringFunctions() rez1 = 6.6 rez2 = 15.0 Name1, emmap1, c1 = map_contour(m, t=1.5) Name2, emmap2, c2 = map_contour(p, t=1.5) print(rez1, rez2, c1, c2, emmap1.apix, emmap2.apix) if not sc.mapComparison(emmap1, emmap2): emmap1._crop_box(c1, 0.5) emmap2._crop_box(c2, 0.5) if rez1 > 1.25 * rez2: emmap_2 = lpfilter(emmap2, rez1) emmap1, emmap2 = match_grid(emmap1, emmap_2, c1, c2) elif rez2 > 1.25 * rez1: emmap_1 = lpfilter(emmap1, rez2) emmap1, emmap2 = match_grid(emmap_1, emmap2, c1, c2) else: emmap1, emmap2 = match_grid(emmap1, emmap2, c1, c2) nmi = 0 try: nmi = sc.MI(emmap1, emmap2, c1, c2, 1, None, None, True) if nmi < 0.0: nmi = 0.0 except: self.assertTrue(False) print_exc() nmi = 0.0 self.assertTrue(abs(round(nmi, 5) - 1.0492) < 0.001) # Now test with a model and map p = os.path.join(path_test, '1J6Z.pdb') m = os.path.join(path_test, 'emd_5168_monomer.mrc') res = 6.6 Name1 = os.path.basename(m).split('.')[0] Name2 = os.path.basename(p).split('.')[0] emmap1 = MapParser.readMRC(m) structure_instance = PDBParser.read_PDB_file(Name2, p, hetatm=False, water=False) blurrer = StructureBlurrer() emmap2 = blurrer.gaussian_blur(structure_instance, res, densMap=emmap1) c1 = 9.7 c2 = 1.0 nmi = 0 try: nmi = sc.MI(emmap1, emmap2, c1, c2, 1, None, None, True) if nmi < 0.0: nmi = 0.0 except: self.assertTrue(False) print_exc() nmi = 0.0 self.assertTrue(abs(round(nmi, 5) - 1.0575) < 0.001)
''' mask1.write_to_MRC_file(Name1+'-'+Name2+'.mrc') mask2.write_to_MRC_file(Name2+'-'+Name1+'.mrc') # If PDB given write out synthetic map if output_synthetic_map: print 'Output synthetic map from : ', Name2 syn_map = emmap2._interpolate_to_grid1(emmap1.fullMap.shape, emmap1.apix, emmap1.origin, 1, 'zero') syn_map.write_to_MRC_file(Name2+'_syn.mrc') blurrer = StructureBlurrer() dict_chain_scores1 = blurrer._get_map_values(p2inst,mask1,max(r1,r2),win=5) dict_chain_scores2 = blurrer._get_map_values(p2inst,mask2,max(r1,r2),win=5) ''' try: import matplotlib.pyplot as plt except ImportError:flagread = 0 try: plt.style.use('ggplot') except AttributeError: pass from matplotlib import pylab #print dict_str_scores.keys() for ch in dict_chain_scores1: it = 0 #axes = plt.gca() #axes.set_ylim([0.4,1.0]) plt.xlabel = 'Residue_num' plt.ylabel = 'Diff score'
#GET INPUT DATA if flag_example: p = os.path.join(path_example, '1J6Z.pdb') m = os.path.join(path_example, 'emd_5168_monomer.mrc') res = 6.6 Name1 = os.path.basename(m).split('.')[0] Name2 = os.path.basename(p).split('.')[0] emmap1 = MapParser.readMRC(m) structure_instance = PDBParser.read_PDB_file(Name2, p, hetatm=False, water=False) blurrer = StructureBlurrer() emmap2 = blurrer.gaussian_blur(structure_instance, res, densMap=emmap1) c1 = 9.7 c2 = 1.0 elif all(x is None for x in [m, m1, m2]): # for 2 models if None in [p1, p2]: sys.exit( 'Input two maps or a map and model, map resolution(s) (required) and contours (optional)' ) Name1, emmap1, c1 = model_contour(p1, res=4.0, emmap=False, t=0.5) r1 = r2 = r = 4.0 if c2 is None: Name2, emmap2, c2 = model_contour(p2, res=r, emmap=False, t=0.5) else: Name2, emmap2 = blur_model(p2, res=r, emmap=False)
#rb_file2 ="1J6Z_sse.txt" structure_instance = PDBParser.read_PDB_file('3MFP', '3MFP.pdb', hetatm=False, water=False) print structure_instance structure_instance2 = PDBParser.read_PDB_file('1J6Z.pdb', '1J6Z.pdb', hetatm=False, water=False) print structure_instance2 blurrer = StructureBlurrer() scorer = ScoringFunctions() Plot = Plot() emmap = MapParser.readMRC('emd_5168_monomer.mrc') #read target map print emmap sim_map = blurrer.gaussian_blur(structure_instance, 6.6, densMap=emmap, sigma_coeff=sim_sigma_coeff, normalise=True) print 'structure_instance', scorer.CCC(sim_map, emmap) print sim_map sim_map2 = blurrer.gaussian_blur(structure_instance2,
def score_cmd(session, comparators, compared, rez_comparators, rez_compared, contours_comparators, contour_compared): sc = ScoringFunctions() blurrer = StructureBlurrer() # Loop through these to be compared idx = 0 scores = [] for comparator in comparators: emmap1 = None emmap2 = None if type(comparator) is AtomicStructure: if type(compared) is AtomicStructure: # Both models if None in ([rez_compared] + rez_comparators): print("Please provide the resolution for all models") return bms1 = chimera_to_tempy_model(compared) bms2 = chimera_to_tempy_model(comparator) emmap1 = model_contour( bms1, rez_compared, emmap=False,t=0.5) if contours_comparators[idx] is None: emmap2 = model_contour(bms2, rez_comparators[idx],emmap=False,t=0.5) else: emmap2 = blur_model(bms2, rez_comparators[idx], emmap=False) else: # 0 - map, 1 - model if rez_comparators[idx] == None: print("Please provide the resolution for the model.") return emmap1 = chimera_to_tempy_map(compared) bms = chimera_to_tempy_model(comparator) emmap2 = blurrer.gaussian_blur(bms, rez_compared, densMap=emmap1) else: if type(compared) is AtomicStructure: # 0 - model, 1 - map if rez_compared == None: print("Please provide the resolution for the model.") return emmap2 = chimera_to_tempy_map(comparator) bms = chimera_to_tempy_model(compared) emmap1 = blurrer.gaussian_blur(bms, rez_compared, densMap=emmap2) else: # 0 - map, 1 - map emmap1 = chimera_to_tempy_map(compared) emmap2 = chimera_to_tempy_map(comparator) c1 = contour_compared # Contouring if c1 == None: c1 = map_contour(emmap1,t=1.5) c2 = contours_comparators[idx] # This kinda makes no sense and could be tricky if c2 == None: c2 = map_contour(emmap2,t=1.5) # Some kind of fix if the maps don't match? # Resize, resample or blur of somekind if not sc.mapComparison(emmap1,emmap2): emmap1._crop_box(c1,0.5) emmap2._crop_box(c2,0.5) if rez_compared > 1.25*rez_comparators[idx]: emmap_2 = lpfilter(emmap2,rez_compared) emmap1, emmap2 = match_grid(emmap1,emmap_2,c1,c2) elif rez_comparators[idx] > 1.25*rez_compared: emmap_1 = lpfilter(emmap1,rez_comparators[idx]) emmap1, emmap2 = match_grid(emmap_1,emmap2,c1,c2) else: emmap1, emmap2 = match_grid(emmap1,emmap2,c1,c2) nmi = 0.0 try: nmi = sc.MI(emmap1,emmap2,c1,c2,1,None,None,True) if nmi < 0.0: nmi = 0.0 except: print('Exception for NMI score') print_exc() nmi = 0.0 scores.append(nmi) idx+=1 return scores