def build_maps(self, resolution, template_map, sig_coeff=0.356): """ Build list of maps corresponding to the protein components in the structList. Arguments: *resolution* Desired resolution of the density map in Angstrom units. *template_map* A map object that will be uesd as the template to build maps of for the individual maps. Usually the input map used for the assembly fitting. *sigma_coeff* the sigma value (multiplied by the resolution) that controls the width of the Gaussian. Default values is 0.356. """ sb = StructureBlurrer() for x in self.structList: self.mapList.append(sb.gaussian_blur(x, resolution, template_map, sig_coeff)) self.initMapList.append(self.mapList[-1].copy())
def score(session, atomic_model, map_model, rez): ''' Perform the CCC score. Takes a session, a single model and map.''' print("Calculating CCC Score") # make class instances for density simulation (blurring), scoring and plot scores blurrer = StructureBlurrer() scorer = ScoringFunctions() atomlist = [] for atom in atomic_model.atoms: atomlist.append(chimera_to_tempy_atom(atom, len(atomlist))) bio_atom_structure = BioPy_Structure(atomlist) bio_map_structure = chimera_to_tempy_map(map_model) map_probe = blurrer.gaussian_blur(bio_atom_structure, rez, densMap=bio_map_structure) score = scorer.CCC(bio_map_structure, map_probe) print(score) return score
if os.path.exists(path_out)==True: print "%s exists" %path_out else: os.mkdir(path_out) os.chdir(path_out) structure_instance=PDBParser.read_PDB_file('1J6Z','1J6Z.pdb',hetatm=False,water=False) print structure_instance blurrer = StructureBlurrer() EnsembleGeneration=EnsembleGeneration() scorer = ScoringFunctions() map_target=MapParser.readMRC('emd_5168_monomer.mrc') #read target map map_probe = blurrer.gaussian_blur(structure_instance, 6.6,densMap=map_target)#create a simulated map from the structure instance #Create a Random ensemble of 10 structures randomly within 5 A translation and 60 deg rotation. list_rotate_models=EnsembleGeneration.randomise_structs(structure_instance, 10, 5, 60, v_grain=30, rad=False,write=True) #CCC score from starting fit line='%s %s\n'%('1J6Z',scorer.CCC(map_probe,map_target)) count=0 #loop to score each of the alternative fits in the ensemble for mod in list_rotate_models: count+=1 mod_name=mod[0] mod_structure_instance=mod[1] map_probe = blurrer.gaussian_blur(mod_structure_instance, 6.6,densMap=map_target,sigma_coeff=0.187) line+='%s %s\n'%(mod_name,scorer.CCC(map_probe,map_target))
def rank_fit_ensemble(self,ensemble_list,score,res_target_map,sigma_coeff,number_top_mod=0,\ write=False,targetMap=False,cont_targetMap=None): """ RMSD clustering of the multiple "fits" accordingly with a chosen score. Cluster the fits based on Calpha RMSD (starting from the best scoring model) Arguments: *ensemble_list* Input list of Structure Instances. *targetMap* Target Map Instance. *score* Scoring function to use. See ScoringFunctions class for a list of the available Scoring Function. E.g. set score='CCC' to use the Cross-correlation coefficient. Score option are: i 'CCC' - Cross-correlation coefficient; ii 'LAP' - Laplacian-filtered cross-correlation coefficient: useful for maps with resolutions worse than 10-15 A; iii 'MI' - Mutual information score: a good and robust score but relatively slow to calculate; iv 'ENV' - Envelope score: the fastest score to calculate due to binarisation of the map. v-vii 'NV','NV_Sobel','NV_Laplace'- Normal vector score: a vector-based surface superimposition score with or without Sobel/Laplace filter. viii 'CD' - Chamfer Distance: a score used in computer vision algorithms as a fast similarity metric *rms_cutoff* float, the Calpha RMSD cutoff based on which you want to cluster the solutions. For example 3.5 (for 3.5 A). *res_target_map* the resolution, in Angstroms, of the target Map. *sigma_coeff* the sigma value (multiplied by the resolution) that controls the width of the Gaussian. Default values is 0.356. Other values used : 0.187R corresponding with the Gaussian width of the Fourier transform falling to half the maximum at 1/resolution, as used in Situs (Wriggers et al, 1999); 0.225R which makes the Fourier transform of the distribution fall to 1/e of its maximum value at wavenumber 1/resolution, the default in Chimera (Petterson et al, 2004) 0.356R corresponding to the Gaussian width at 1/e maximum height equaling the resolution, an option in Chimera (Petterson et al, 2004); 0.425R the fullwidth half maximum being equal to the resolution, as used by FlexEM (Topf et al, 2008); 0.5R the distance between the two inflection points being the same length as the resolution, an option in Chimera (Petterson et al, 2004); 1R where the sigma value simply equal to the resolution, as used by NMFF (Tama et al, 2004). *number_top_mod* Number of Fits to cluster. Default is all. *write* True will write out a file that contains the list of the structure instances representing different fits scored and clustered. note the lrms column is the Calpha RMSD of each fit from the first fit in its class """ blurrer = StructureBlurrer() scorer = ScoringFunctions() cluster = Cluster() count = 0 dict_ensembl = {} list_to_order = [] #print targetMap if targetMap == False: #targetMap = self.protMap(prot, min(resolution/4., 3.5), resolution) print("WARNING:Need target map") sys.exit() if score not in [ 'CCC', 'LAP', 'MI', 'NV', 'NV_Sobel', 'NV_Laplace', 'ENV', 'CD' ]: print('Incorrect Scoring Function: %s', score) print( 'Please select from one of the following scoring functions: %s', ''.join([ 'CCC', 'LAP', 'MI', 'NV', 'NV_Sobel', 'NV_Laplace', 'ENV', 'CD' ])) sys.exit() targetMap = targetMap.copy() if score == 'CCC': for mod1 in ensemble_list: count += 1 name_mod = mod1[0] mod = mod1[1] sim_map = blurrer.gaussian_blur(mod, res_target_map, densMap=targetMap, sigma_coeff=sigma_coeff) if not cont_targetMap is None: score_mod = scorer.CCC_map( sim_map, targetMap, 0.5 * sim_map.fullMap.std(), cont_targetMap, 2, True)[0] #CCC(sim_map,targetMap) else: score_mod = scorer.CCC_map(sim_map, targetMap, 0.0, 0.0, True)[0] #else: score_mod=scorer.CCC(sim_map,targetMap) #'name_file','structure_instance','score','lrmsd','class' list_to_order.append([name_mod, mod, score_mod, 0, 0]) if score == 'LAP': for mod1 in ensemble_list: count += 1 name_mod = mod1[0] mod = mod1[1] sim_map = blurrer.gaussian_blur(mod, res_target_map, densMap=targetMap, sigma_coeff=sigma_coeff) score_mod = scorer.laplace_CCC(sim_map, targetMap) #'name_file','structure_instance','score','lrmsd','class' list_to_order.append([name_mod, mod, score_mod, 0, 0]) if score == 'MI': for mod1 in ensemble_list: count += 1 name_mod = mod1[0] mod = mod1[1] sim_map = blurrer.gaussian_blur(mod, res_target_map, densMap=targetMap, sigma_coeff=sigma_coeff) if not cont_targetMap is None: score_mod = scorer.MI(sim_map, targetMap, 0.5 * sim_map.fullMap.std(), cont_targetMap, 1) else: score_mod = scorer.MI(sim_map, targetMap) list_to_order.append([name_mod, mod, score_mod, 0, 0]) if score == 'NV': for mod1 in ensemble_list: count += 1 name_mod = mod1[0] mod = mod1[1] #These two values should be calculated for the experimental map, and only #need to be calculated once, at the beginning sim_map = blurrer.gaussian_blur(mod, res_target_map, densMap=targetMap, sigma_coeff=sigma_coeff) if not cont_targetMap is None: score_mod = scorer.normal_vector_score( targetMap, sim_map, cont_targetMap - (0.1 * targetMap.std()), cont_targetMap + (0.1 * targetMap.std()), Filter=None) else: min_thr = targetMap.get_primary_boundary( mod.get_prot_mass_from_atoms(), targetMap.min(), targetMap.max()) points = targetMap.get_point_map(min_thr, percentage=0.2) max_thr = targetMap.get_second_boundary(min_thr, points, min_thr, targetMap.max(), err_percent=1) score_mod = scorer.normal_vector_score(targetMap, sim_map, min_thr, max_thr, Filter=None) score_mod = 1 - (score_mod / 3.14) list_to_order.append([name_mod, mod, score_mod, 0, 0]) if score == 'NV_Sobel': for mod1 in ensemble_list: count += 1 name_mod = mod1[0] mod = mod1[1] sim_map = blurrer.gaussian_blur(mod, res_target_map, densMap=targetMap, sigma_coeff=sigma_coeff) if not cont_targetMap is None: score_mod = scorer.normal_vector_score( targetMap, sim_map, cont_targetMap - (0.1 * targetMap.std()), cont_targetMap + (0.1 * targetMap.std()), Filter='Sobel') else: min_thr = targetMap.get_primary_boundary( mod.get_prot_mass_from_atoms(), targetMap.min(), targetMap.max()) points = targetMap.get_point_map(min_thr, percentage=0.2) max_thr = targetMap.get_second_boundary(min_thr, points, min_thr, targetMap.max(), err_percent=1) score_mod = scorer.normal_vector_score(targetMap, sim_map, min_thr, max_thr, Filter='Sobel') score_mod = 1 - (score_mod / 3.14) list_to_order.append([name_mod, mod, score_mod, 0, 0]) if score == 'NV_Laplace': for mod1 in ensemble_list: count += 1 name_mod = mod1[0] mod = mod1[1] sim_map = blurrer.gaussian_blur(mod, res_target_map, densMap=targetMap, sigma_coeff=sigma_coeff) if not cont_targetMap is None: score_mod = scorer.normal_vector_score( targetMap, sim_map, cont_targetMap - (0.1 * targetMap.std()), cont_targetMap + (0.1 * targetMap.std()), Filter='Laplace') else: min_thr = targetMap.get_primary_boundary( mod.get_prot_mass_from_atoms(), targetMap.min(), targetMap.max()) points = targetMap.get_point_map(min_thr, percentage=0.2) max_thr = targetMap.get_second_boundary(min_thr, points, min_thr, targetMap.max(), err_percent=1) score_mod = scorer.normal_vector_score(targetMap, sim_map, min_thr, max_thr, Filter='Laplace') score_mod = 1 - (score_mod / 3.14) list_to_order.append([name_mod, mod, score_mod, 0, 0]) if score == 'ENV': for mod1 in ensemble_list: count += 1 name_mod = mod1[0] mod = mod1[1] min_thr = targetMap.get_primary_boundary( mod.get_prot_mass_from_atoms(), targetMap.min(), targetMap.max()) score_mod = scorer.envelope_score(targetMap, min_thr, mod) #'name_file','structure_instance','score','lrmsd','class' list_to_order.append([name_mod, mod, score_mod, 0, 0]) if score == 'CD': for mod1 in ensemble_list: count += 1 name_mod = mod1[0] mod = mod1[1] sim_map = blurrer.gaussian_blur(mod, res_target_map, densMap=targetMap, sigma_coeff=sigma_coeff) if not cont_targetMap is None: score_mod = scorer._surface_distance_score( sim_map, targetMap, 0.5 * sim_map.fullMap.std(), cont_targetMap, 'Minimum') else: min_thr = targetMap.get_primary_boundary( mod.get_prot_mass_from_atoms(), targetMap.min(), targetMap.max()) points = targetMap.get_point_map(min_thr, percentage=0.2) max_thr = targetMap.get_second_boundary(min_thr, points, min_thr, targetMap.max(), err_percent=1) score_mod = scorer.chamfer_distance(sim_map, targetMap, min_thr, max_thr, kdtree=None) score_mod = 1 / score_mod list_to_order.append([name_mod, mod, score_mod, 0, 0]) if score in ['NV', 'NV_Sobel', 'NV_Laplace']: list_ordered = sorted( list_to_order, key=lambda x: x[2], reverse=True) #was false when NV was negative else: list_ordered = sorted(list_to_order, key=lambda x: x[2], reverse=True) if number_top_mod == 0: if write == True: return cluster._print_results_cluster2(list_ordered, write) return list_ordered else: x = int(number_top_mod) if write == True: return cluster._print_results_cluster2(list_ordered[:x], write) return list_ordered[:x]
os.mkdir(path_out) os.chdir(path_out) structure_instance = PDBParser.read_PDB_file('1J6Z', '1J6Z.pdb', hetatm=False, water=False) blurrer = StructureBlurrer() EnsembleGeneration = EnsembleGeneration() scorer = ScoringFunctions() map_target = MapParser.readMRC('emd_5168_monomer.mrc') #read target map print map_target map_probe = blurrer.gaussian_blur(structure_instance, 6.6, densMap=map_target) list_rotate_models = EnsembleGeneration.randomise_structs(structure_instance, 20, 10, 60, v_grain=30, rad=False, write=False) Cluster = Cluster() ranked_ensemble = Cluster.cluster_fit_ensemble_top_fit( list_rotate_models, 'CCC', 1.5, 6.6, 0.187,
#GET INPUT DATA if flag_example: p = os.path.join(path_example, '1J6Z.pdb') m = os.path.join(path_example, 'emd_5168_monomer.mrc') res = 6.6 Name1 = os.path.basename(m).split('.')[0] Name2 = os.path.basename(p).split('.')[0] emmap1 = MapParser.readMRC(m) structure_instance = PDBParser.read_PDB_file(Name2, p, hetatm=False, water=False) blurrer = StructureBlurrer() emmap2 = blurrer.gaussian_blur(structure_instance, res, densMap=emmap1) c1 = 9.7 c2 = 1.0 elif all(x is None for x in [m, m1, m2]): # for 2 models if None in [p1, p2]: sys.exit( 'Input two maps or a map and model, map resolution(s) (required) and contours (optional)' ) Name1, emmap1, c1 = model_contour(p1, res=4.0, emmap=False, t=0.5) r1 = r2 = r = 4.0 if c2 is None: Name2, emmap2, c2 = model_contour(p2, res=r, emmap=False, t=0.5) else: Name2, emmap2 = blur_model(p2, res=r, emmap=False) flag_filt = False
structure_instance2 = PDBParser.read_PDB_file('1J6Z.pdb', '1J6Z.pdb', hetatm=False, water=False) print structure_instance2 blurrer = StructureBlurrer() scorer = ScoringFunctions() Plot = Plot() emmap = MapParser.readMRC('emd_5168_monomer.mrc') #read target map print emmap sim_map = blurrer.gaussian_blur(structure_instance, 6.6, densMap=emmap, sigma_coeff=sim_sigma_coeff, normalise=True) print 'structure_instance', scorer.CCC(sim_map, emmap) print sim_map sim_map2 = blurrer.gaussian_blur(structure_instance2, 6.6, densMap=emmap, sigma_coeff=sim_sigma_coeff, normalise=True) print 'structure_instance_same', scorer.CCC(sim_map2, emmap) SCCC_list_structure_instance = [] listRB = RBParser.read_FlexEM_RIBFIND_files(rb_file, structure_instance2) for RB in listRB:
def test_tempy_nmi(self): ''' Test the tempy nmi score based on the files provided. Use this as a baseline for the second chimeraX test. ''' path_test = "./" m = os.path.join(path_test, 'emd_5168.map') p = os.path.join(path_test, 'emd_5170.map') sc = ScoringFunctions() rez1 = 6.6 rez2 = 15.0 Name1, emmap1, c1 = map_contour(m, t=1.5) Name2, emmap2, c2 = map_contour(p, t=1.5) print(rez1, rez2, c1, c2, emmap1.apix, emmap2.apix) if not sc.mapComparison(emmap1, emmap2): emmap1._crop_box(c1, 0.5) emmap2._crop_box(c2, 0.5) if rez1 > 1.25 * rez2: emmap_2 = lpfilter(emmap2, rez1) emmap1, emmap2 = match_grid(emmap1, emmap_2, c1, c2) elif rez2 > 1.25 * rez1: emmap_1 = lpfilter(emmap1, rez2) emmap1, emmap2 = match_grid(emmap_1, emmap2, c1, c2) else: emmap1, emmap2 = match_grid(emmap1, emmap2, c1, c2) nmi = 0 try: nmi = sc.MI(emmap1, emmap2, c1, c2, 1, None, None, True) if nmi < 0.0: nmi = 0.0 except: self.assertTrue(False) print_exc() nmi = 0.0 self.assertTrue(abs(round(nmi, 5) - 1.0492) < 0.001) # Now test with a model and map p = os.path.join(path_test, '1J6Z.pdb') m = os.path.join(path_test, 'emd_5168_monomer.mrc') res = 6.6 Name1 = os.path.basename(m).split('.')[0] Name2 = os.path.basename(p).split('.')[0] emmap1 = MapParser.readMRC(m) structure_instance = PDBParser.read_PDB_file(Name2, p, hetatm=False, water=False) blurrer = StructureBlurrer() emmap2 = blurrer.gaussian_blur(structure_instance, res, densMap=emmap1) c1 = 9.7 c2 = 1.0 nmi = 0 try: nmi = sc.MI(emmap1, emmap2, c1, c2, 1, None, None, True) if nmi < 0.0: nmi = 0.0 except: self.assertTrue(False) print_exc() nmi = 0.0 self.assertTrue(abs(round(nmi, 5) - 1.0575) < 0.001)
def score_cmd(session, comparators, compared, rez_comparators, rez_compared, contours_comparators, contour_compared): sc = ScoringFunctions() blurrer = StructureBlurrer() # Loop through these to be compared idx = 0 scores = [] for comparator in comparators: emmap1 = None emmap2 = None if type(comparator) is AtomicStructure: if type(compared) is AtomicStructure: # Both models if None in ([rez_compared] + rez_comparators): print("Please provide the resolution for all models") return bms1 = chimera_to_tempy_model(compared) bms2 = chimera_to_tempy_model(comparator) emmap1 = model_contour( bms1, rez_compared, emmap=False,t=0.5) if contours_comparators[idx] is None: emmap2 = model_contour(bms2, rez_comparators[idx],emmap=False,t=0.5) else: emmap2 = blur_model(bms2, rez_comparators[idx], emmap=False) else: # 0 - map, 1 - model if rez_comparators[idx] == None: print("Please provide the resolution for the model.") return emmap1 = chimera_to_tempy_map(compared) bms = chimera_to_tempy_model(comparator) emmap2 = blurrer.gaussian_blur(bms, rez_compared, densMap=emmap1) else: if type(compared) is AtomicStructure: # 0 - model, 1 - map if rez_compared == None: print("Please provide the resolution for the model.") return emmap2 = chimera_to_tempy_map(comparator) bms = chimera_to_tempy_model(compared) emmap1 = blurrer.gaussian_blur(bms, rez_compared, densMap=emmap2) else: # 0 - map, 1 - map emmap1 = chimera_to_tempy_map(compared) emmap2 = chimera_to_tempy_map(comparator) c1 = contour_compared # Contouring if c1 == None: c1 = map_contour(emmap1,t=1.5) c2 = contours_comparators[idx] # This kinda makes no sense and could be tricky if c2 == None: c2 = map_contour(emmap2,t=1.5) # Some kind of fix if the maps don't match? # Resize, resample or blur of somekind if not sc.mapComparison(emmap1,emmap2): emmap1._crop_box(c1,0.5) emmap2._crop_box(c2,0.5) if rez_compared > 1.25*rez_comparators[idx]: emmap_2 = lpfilter(emmap2,rez_compared) emmap1, emmap2 = match_grid(emmap1,emmap_2,c1,c2) elif rez_comparators[idx] > 1.25*rez_compared: emmap_1 = lpfilter(emmap1,rez_comparators[idx]) emmap1, emmap2 = match_grid(emmap_1,emmap2,c1,c2) else: emmap1, emmap2 = match_grid(emmap1,emmap2,c1,c2) nmi = 0.0 try: nmi = sc.MI(emmap1,emmap2,c1,c2,1,None,None,True) if nmi < 0.0: nmi = 0.0 except: print('Exception for NMI score') print_exc() nmi = 0.0 scores.append(nmi) idx+=1 return scores
def score(session, atomic_model1 = None, map_model1 = None, atomic_model2 = None, map_model2 = None, rez1 = None, rez2 = None, c1 = None, c2 = None): """ Generate the NMI score for 2 maps or 1 map and 1 model. """ sc = ScoringFunctions() # We have choices - 1 map and one model, 2 maps or 2 models emmap1 = None emmap2 = None blurrer = StructureBlurrer() if atomic_model1 != None and map_model1 != None: # 1 map 1 model if rez1 == None: print("Please provide the resolution for the model.") return emmap1 = chimera_to_tempy_map(map_model1) bms = chimera_to_tempy_model(atomic_model1) emmap2 = blurrer.gaussian_blur(bms, rez1, densMap=emmap1) elif map_model1 != None and map_model2 != None: # 2 maps emmap1 = chimera_to_tempy_map(map_model1) emmap2 = chimera_to_tempy_map(map_model2) elif atomic_model1 != None and atomic_model2 != None: # 2 models if None in [rez1,rez2]: print("Please provide the resolution for both model") return bms1 = chimera_to_tempy_model(atomic_model1) bms2 = chimera_to_tempy_model(atomic_model2) emmap1 = model_contour( bms1, rez1, emmap=False,t=0.5) if c2 is None: emmap2 = model_contour(bms2, rez2,emmap=False,t=0.5) else: emmap2 = blur_model( bms2, rez2, emmap=False) else: print("Error. Must have 1 model and 1 map, 2 maps or 2 models") return # Contouring if c1 == None: c1 = map_contour(emmap1,t=1.5) if c2 == None: c2 = map_contour(emmap2,t=1.5) # Some kind of fix if the maps don't match? # Resize, resample or blur of somekind if not sc.mapComparison(emmap1,emmap2): emmap1._crop_box(c1,0.5) emmap2._crop_box(c2,0.5) if rez1 > 1.25*rez2: emmap_2 = lpfilter(emmap2,rez1) emmap1, emmap2 = match_grid(emmap1,emmap_2,c1,c2) elif rez2 > 1.25*rez1: emmap_1 = lpfilter(emmap1,rez2) emmap1, emmap2 = match_grid(emmap_1,emmap2,c1,c2) else: emmap1, emmap2 = match_grid(emmap1,emmap2,c1,c2) nmi = 0.0 try: nmi = sc.MI(emmap1,emmap2,c1,c2,1,None,None,True) if nmi < 0.0: nmi = 0.0 except: print('Exception for NMI score') print_exc() nmi = 0.0 return nmi
# translate along x, y, z structure_instance.translate(42, 58, -5) # rotate along x, y, z structure_instance.rotate_by_axis_angle(0, 0, 1, np.rad2deg(-2.125868534775962), com=com) structure_instance.rotate_by_axis_angle(0, 1, 0, np.rad2deg(-0.0005038746980934731), com=com) structure_instance.rotate_by_axis_angle(1, 0, 0, np.rad2deg(3.1396619777494124), com=com) # save structure structure_instance.write_to_PDB('moved.pdb') # create the map blurrer = StructureBlurrer() sim_map = blurrer.gaussian_blur(structure_instance, 2.49, densMap=target_map) # save map sim_map.write_to_MRC_file('moved.mrc') # Writing out to MRC file
else: os.mkdir(path_out) os.chdir(path_out) #read PDB file and create a Structure instance. #note hetatm and water to include structure_instance=PDBParser.read_PDB_file('1J6Z','1J6Z.pdb',hetatm=False,water=False) print "structure_instance:" print structure_instance blurrer = StructureBlurrer() scorer = ScoringFunctions() map_target=MapParser.readMRC('emd_5168_monomer.mrc') #read target map map_probe = blurrer.gaussian_blur(structure_instance, 6.6,densMap=map_target)#create a simulated map from the structure instance map_probe.write_to_MRC_file("map_probe_actin.mrc") #write simulated map to a MRC file format ##SCORING FUNCTION print "Calculate Envelope Score (ENV):" molecualr_weight=structure_instance.get_prot_mass_from_atoms() #Mmolecualr_weight=structure_instance.get_prot_mass_from_res() first_bound=map_target.get_primary_boundary(molecualr_weight, map_target.min(), map_target.max()) #print scorer.envelope_score_APJ(map_target, first_bound, structure_instance,norm=True) print scorer.envelope_score(map_target, first_bound, structure_instance,norm=True) print "Calculate Mutual information Score (MI)" print scorer.MI(map_target,map_probe) print "Calculate Laplacian cross-correlation Score (LAP)"