def helixFromPdbs(self, origin, mrPdb, nativePdb, nativeChain, dsspLog, workdir=os.getcwd()): """This is a wrapper to generate the info and resSeqMap objects needed by score Origin""" mrPdbInfo = pdb_edit.get_info(mrPdb) nativePdbInfo = pdb_edit.get_info(nativePdb) assert nativeChain in nativePdbInfo.models[0].chains import residue_map resSeqMap = residue_map.residueSequenceMap() resSeqMap.fromInfo( refInfo=nativePdbInfo, refChainID=nativeChain, targetInfo=mrPdbInfo, targetChainID=mrPdbInfo.models[0].chains[ 0] # Only 1 chain in model ) data = self.scoreOrigin(origin, mrPdbInfo, nativePdbInfo, resSeqMap, workdir) contacts = data.contacts return self.helixFromContacts(contacts, dsspLog)
def analyseModels(amoptd): # Get hold of a full model so we can do the mapping of residues refModelPdb = glob.glob(os.path.join(amoptd['models_dir'], "*.pdb"))[0] nativePdbInfo=amoptd['native_pdb_info'] refModelPdbInfo = pdb_edit.get_info(refModelPdb) amoptd['ref_model_pdb_info']=refModelPdbInfo try: resSeqMap = residue_map.residueSequenceMap() resSeqMap.fromInfo( refInfo=refModelPdbInfo, refChainID=refModelPdbInfo.models[0].chains[0], # Only 1 chain in model targetInfo=nativePdbInfo, targetChainID=nativePdbInfo.models[0].chains[0] ) amoptd['res_seq_map'] = resSeqMap except Exception as e: logger.exception("Error calculating resSeqMap: %s" % e) amoptd['res_seq_map'] = None # Won't be able to calculate RIO scores if amoptd['have_tmscore']: try: tm = tm_util.TMscore(amoptd['tmscore_exe'], wdir=fixpath(amoptd['benchmark_dir'])) # Calculation of TMscores for all models logger.info("Analysing Rosetta models with TMscore") model_list = sorted(glob.glob(os.path.join(amoptd['models_dir'], "*pdb"))) structure_list = [amoptd['native_pdb_std']] amoptd['tmComp'] = tm.compare_structures(model_list, structure_list, fastas=[amoptd['fasta']]) except Exception as e: logger.exception("Unable to run TMscores: %s", e) else: raise RuntimeError("No program to calculate TMSCORES")
def test_resSeqMap4(self): # See if we can sort out the indexing between the native and model nativePdb = os.path.join(self.testfiles_dir, "1K33.pdb") modelPdb = os.path.join(self.testfiles_dir, "1K33_S_00000001.pdb") nativePdbStd = "1K33_std.pdb" pdb_edit.standardise(nativePdb, nativePdbStd) nativeInfo = pdb_edit.get_info(nativePdbStd) modelInfo = pdb_edit.get_info(modelPdb) resSeqMap = residue_map.residueSequenceMap() resSeqMap.fromInfo(nativeInfo, 'A', modelInfo, 'A') os.unlink(nativePdbStd)
def test_resSeqMap4(self): # See if we can sort out the indexing between the native and model nativePdb = os.path.join(self.testfiles_dir,"1K33.pdb") modelPdb = os.path.join(self.testfiles_dir,"1K33_S_00000001.pdb") nativePdbStd = "1K33_std.pdb" pdb_edit.standardise( nativePdb, nativePdbStd ) nativeInfo = pdb_edit.get_info( nativePdbStd ) modelInfo = pdb_edit.get_info( modelPdb ) resSeqMap = residue_map.residueSequenceMap( ) resSeqMap.fromInfo( nativeInfo, 'A', modelInfo, 'A' ) os.unlink( nativePdbStd )
def helixFromPdbs(self, origin, mrPdb, nativePdb, nativeChain, dsspLog, workdir=os.getcwd() ): """This is a wrapper to generate the info and resSeqMap objects needed by score Origin""" mrPdbInfo = pdb_edit.get_info(mrPdb) nativePdbInfo = pdb_edit.get_info(nativePdb) assert nativeChain in nativePdbInfo.models[0].chains import residue_map resSeqMap = residue_map.residueSequenceMap() resSeqMap.fromInfo( refInfo=nativePdbInfo, refChainID=nativeChain, targetInfo=mrPdbInfo, targetChainID=mrPdbInfo.models[0].chains[0]# Only 1 chain in model ) data = self.scoreOrigin(origin, mrPdbInfo, nativePdbInfo, resSeqMap, workdir) contacts = data.contacts return self.helixFromContacts(contacts, dsspLog )
def analyseModels(amoptd): # Get hold of a full model so we can do the mapping of residues refModelPdb = glob.glob(os.path.join(amoptd['models_dir'], "*.pdb"))[0] nativePdbInfo = amoptd['native_pdb_info'] resSeqMap = residue_map.residueSequenceMap() refModelPdbInfo = pdb_edit.get_info(refModelPdb) resSeqMap.fromInfo( refInfo=refModelPdbInfo, refChainID=refModelPdbInfo.models[0]. chains[0], # Only 1 chain in model targetInfo=nativePdbInfo, targetChainID=nativePdbInfo.models[0].chains[0]) amoptd['res_seq_map'] = resSeqMap amoptd['ref_model_pdb_info'] = refModelPdbInfo if amoptd['have_tmscore']: try: tm = tm_util.TMscore(amoptd['tmscore_exe'], wdir=fixpath(amoptd['benchmark_dir'])) # Calculation of TMscores for all models logger.info("Analysing Rosetta models with TMscore") model_list = sorted( glob.glob(os.path.join(amoptd['models_dir'], "*pdb"))) structure_list = [amoptd['native_pdb_std']] amoptd['tmComp'] = tm.compare_structures(model_list, structure_list, fastas=[amoptd['fasta']]) except Exception as e: msg = "Unable to run TMscores: {0}".format(e) logger.critical(msg) else: global _MAXCLUSTERER # setting a module-level variable so need to use global keyword to it doesn't become a local variable _MAXCLUSTERER = maxcluster.Maxcluster(amoptd['maxcluster_exe']) logger.info("Analysing Rosetta models with Maxcluster") _MAXCLUSTERER.compareDirectory(nativePdbInfo=nativePdbInfo, resSeqMap=resSeqMap, modelsDirectory=amoptd['models_dir'], workdir=fixpath( amoptd['benchmark_dir'])) return
def analyseModels(amoptd): # Get hold of a full model so we can do the mapping of residues refModelPdb = glob.glob(os.path.join(amoptd['models_dir'], "*.pdb"))[0] nativePdbInfo = amoptd['native_pdb_info'] refModelPdbInfo = pdb_edit.get_info(refModelPdb) amoptd['ref_model_pdb_info'] = refModelPdbInfo try: resSeqMap = residue_map.residueSequenceMap() resSeqMap.fromInfo( refInfo=refModelPdbInfo, refChainID=refModelPdbInfo.models[0]. chains[0], # Only 1 chain in model targetInfo=nativePdbInfo, targetChainID=nativePdbInfo.models[0].chains[0], ) amoptd['res_seq_map'] = resSeqMap except Exception as e: logger.exception("Error calculating resSeqMap: %s" % e) amoptd['res_seq_map'] = None # Won't be able to calculate RIO scores if amoptd['have_tmscore']: try: tm = tm_util.TMscore(amoptd['tmscore_exe'], wdir=fixpath(amoptd['benchmark_dir'])) # Calculation of TMscores for all models logger.info("Analysing Rosetta models with TMscore") model_list = sorted( glob.glob(os.path.join(amoptd['models_dir'], "*pdb"))) structure_list = [amoptd['native_pdb_std']] amoptd['tmComp'] = tm.compare_structures(model_list, structure_list, fastas=[amoptd['fasta']]) except Exception as e: logger.exception("Unable to run TMscores: %s", e) else: raise RuntimeError("No program to calculate TMSCORES")
def analyseSolution(amoptd, d, mrinfo): logger.info("Benchmark: analysing result: {0}".format(d['ensemble_name'])) mrPdb = None if d['MR_program'] == "PHASER": mrPdb = d['PHASER_pdbout'] mrMTZ = d['PHASER_mtzout'] elif d['MR_program'] == "MOLREP": mrPdb = d['MOLREP_pdbout'] elif d['MR_program'] == "unknown": return if mrPdb is None or not os.path.isfile(mrPdb): #logger.critical("Cannot find mrPdb {0} for solution {1}".format(mrPdb,d)) return # debug - copy into work directory as reforigin struggles with long pathnames shutil.copy( mrPdb, os.path.join(fixpath(amoptd['benchmark_dir']), os.path.basename(mrPdb))) mrPdbInfo = pdb_edit.get_info(mrPdb) d['num_placed_chains'] = mrPdbInfo.numChains() d['num_placed_atoms'] = mrPdbInfo.numAtoms() d['num_placed_CA'] = mrPdbInfo.numCalpha() if amoptd['native_pdb']: if not d['SHELXE_os']: logger.critical( "mrPdb {0} has no SHELXE_os origin shift. Calculating...". format(mrPdb)) mrinfo.analyse(mrPdb) mrOrigin = mrinfo.originShift d['SHELXE_MPE'] = mrinfo.MPE d['SHELXE_wMPE'] = mrinfo.wMPE else: mrOrigin = [c * -1 for c in d['SHELXE_os']] # Move pdb onto new origin originPdb = ample_util.filename_append(mrPdb, astr='offset', directory=fixpath( amoptd['benchmark_dir'])) #print(mrPdb, originPdb, mrOrigin) pdb_edit.translate(mrPdb, originPdb, mrOrigin) # offset.pdb is the mrModel shifted onto the new origin use csymmatch to wrap onto native csymmatch.Csymmatch().wrapModelToNative( originPdb, amoptd['native_pdb'], csymmatchPdb=os.path.join( fixpath(amoptd['benchmark_dir']), "phaser_{0}_csymmatch.pdb".format(d['ensemble_name']))) # can now delete origin pdb os.unlink(originPdb) # Calculate phase error for the MR PDB try: mrinfo.analyse(mrPdb) d['MR_MPE'] = mrinfo.MPE d['MR_wMPE'] = mrinfo.wMPE except Exception as e: logger.critical("Error analysing mrPdb: {0}\n{1}".format(mrPdb, e)) # We cannot calculate the Reforigin RMSDs or RIO scores for runs where we don't have a full initial model # to compare to the native to allow us to determine which parts of the ensemble correspond to which parts of # the native structure. if not (amoptd['homologs'] or \ amoptd['ideal_helices'] or \ amoptd['import_ensembles'] or \ amoptd['single_model_mode']): # Get reforigin info rmsder = reforigin.ReforiginRmsd() try: rmsder.getRmsd(nativePdbInfo=amoptd['native_pdb_info'], placedPdbInfo=mrPdbInfo, refModelPdbInfo=amoptd['ref_model_pdb_info'], cAlphaOnly=True, workdir=fixpath(amoptd['benchmark_dir'])) d['reforigin_RMSD'] = rmsder.rmsd except Exception as e: logger.critical("Error calculating RMSD: {0}".format(e)) d['reforigin_RMSD'] = 999 # Score the origin with all-atom and rio rioData = rio.Rio().scoreOrigin( mrOrigin, mrPdbInfo=mrPdbInfo, nativePdbInfo=amoptd['native_pdb_info'], resSeqMap=amoptd['res_seq_map'], workdir=fixpath(amoptd['benchmark_dir'])) # Set attributes d['AA_num_contacts'] = rioData.aaNumContacts d['RIO_num_contacts'] = rioData.rioNumContacts d['RIO_in_register'] = rioData.rioInRegister d['RIO_oo_register'] = rioData.rioOoRegister d['RIO_backwards'] = rioData.rioBackwards d['RIO'] = rioData.rioInRegister + rioData.rioOoRegister d['RIO_no_cat'] = rioData.rioNumContacts - (rioData.rioInRegister + rioData.rioOoRegister) d['RIO_norm'] = float(d['RIO']) / float( d['native_pdb_num_residues']) else: d['AA_num_contacts'] = None d['RIO_num_contacts'] = None d['RIO_in_register'] = None d['RIO_oo_register'] = None d['RIO_backwards'] = None d['RIO'] = None d['RIO_no_cat'] = None d['RIO_norm'] = None # # Now get the helix # helixSequence = contacts.Rio().helixFromContacts( contacts=rioData.contacts, # dsspLog=dsspLog ) # if helixSequence is not None: # ampleResult.rioHelixSequence = helixSequence # ampleResult.rioLenHelix = len( helixSequence ) # hfile = os.path.join( workdir, "{0}.helix".format( ampleResult.ensembleName ) ) # with open( hfile, 'w' ) as f: # f.write( helixSequence+"\n" ) # # This purely for checking and so we have pdbs to view # # Wrap shelxe trace onto native using Csymmatch if not d['SHELXE_pdbout'] is None and os.path.isfile( fixpath(d['SHELXE_pdbout'])): csymmatch.Csymmatch().wrapModelToNative( fixpath(d['SHELXE_pdbout']), amoptd['native_pdb'], origin=mrOrigin, workdir=fixpath(amoptd['benchmark_dir'])) if not ('SHELXE_wMPE' in d and d['SHELXE_wMPE']): try: mrinfo.analyse(d['SHELXE_pdbout']) d['SHELXE_MPE'] = mrinfo.MPE d['SHELXE_wMPE'] = mrinfo.wMPE except Exception as e: logger.critical( "Error analysing SHELXE_pdbout: {0}\n{1}".format( d['SHELXE_pdbout'], e)) # Wrap parse_buccaneer model onto native if d['SXRBUCC_pdbout'] and os.path.isfile(fixpath( d['SXRBUCC_pdbout'])): # Need to rename Pdb as is just called buccSX_output.pdb csymmatchPdb = os.path.join( fixpath(amoptd['benchmark_dir']), "buccaneer_{0}_csymmatch.pdb".format(d['ensemble_name'])) csymmatch.Csymmatch().wrapModelToNative( fixpath(d['SXRBUCC_pdbout']), amoptd['native_pdb'], origin=mrOrigin, csymmatchPdb=csymmatchPdb, workdir=fixpath(amoptd['benchmark_dir'])) # Calculate phase error try: mrinfo.analyse(d['SXRBUCC_pdbout']) d['SXRBUCC_MPE'] = mrinfo.MPE d['SXRBUCC_wMPE'] = mrinfo.wMPE except Exception as e: logger.critical( "Error analysing SXRBUCC_pdbout: {0}\n{1}".format( d['SXRBUCC_pdbout'], e)) # Wrap parse_buccaneer model onto native if d['SXRARP_pdbout'] and os.path.isfile(fixpath(d['SXRARP_pdbout'])): # Need to rename Pdb as is just called buccSX_output.pdb csymmatchPdb = os.path.join( fixpath(amoptd['benchmark_dir']), "arpwarp_{0}_csymmatch.pdb".format(d['ensemble_name'])) csymmatch.Csymmatch().wrapModelToNative( fixpath(d['SXRARP_pdbout']), amoptd['native_pdb'], origin=mrOrigin, csymmatchPdb=csymmatchPdb, workdir=fixpath(amoptd['benchmark_dir'])) # Calculate phase error try: mrinfo.analyse(d['SXRARP_pdbout']) d['SXRARP_MPE'] = mrinfo.MPE d['SXRARP_wMPE'] = mrinfo.wMPE except Exception as e: logger.critical( "Error analysing SXRARP_pdbout: {0}\n{1}".format( d['SXRARP_pdbout'], e)) return
def analysePdb(amoptd): """Collect data on the native pdb structure""" nativePdb = fixpath(amoptd['native_pdb']) nativePdbInfo = pdb_edit.get_info(nativePdb) # number atoms/residues natoms, nresidues = pdb_edit.num_atoms_and_residues(nativePdb) # Get information on the origins for this spaceGroup try: originInfo = pdb_model.OriginInfo( spaceGroupLabel=nativePdbInfo.crystalInfo.spaceGroup) except Exception: originInfo = None # Do this here as a bug in pdbcur can knacker the CRYST1 data amoptd['native_pdb_code'] = nativePdbInfo.pdbCode amoptd['native_pdb_title'] = nativePdbInfo.title amoptd['native_pdb_resolution'] = nativePdbInfo.resolution amoptd['native_pdb_solvent_content'] = nativePdbInfo.solventContent amoptd[ 'native_pdb_matthews_coefficient'] = nativePdbInfo.matthewsCoefficient if not originInfo: space_group = "P1" else: space_group = originInfo.spaceGroup() amoptd['native_pdb_space_group'] = space_group amoptd['native_pdb_num_atoms'] = natoms amoptd['native_pdb_num_residues'] = nresidues # First check if the native has > 1 model and extract the first if so if len(nativePdbInfo.models) > 1: logger.info("nativePdb has > 1 model - using first") nativePdb1 = ample_util.filename_append(filename=nativePdb, astr="model1", directory=fixpath( amoptd['work_dir'])) pdb_edit.extract_model(nativePdb, nativePdb1, modelID=nativePdbInfo.models[0].serial) nativePdb = nativePdb1 # Standardise the PDB to rename any non-standard AA, remove solvent etc nativePdbStd = ample_util.filename_append(filename=nativePdb, astr="std", directory=fixpath( amoptd['work_dir'])) pdb_edit.standardise(nativePdb, nativePdbStd, del_hetatm=True) nativePdb = nativePdbStd # Get the new Info about the native nativePdbInfo = pdb_edit.get_info(nativePdb) # For maxcluster comparsion of shelxe model we need a single chain from the native so we get this here if len(nativePdbInfo.models[0].chains) > 1: chainID = nativePdbInfo.models[0].chains[0] nativeChain1 = ample_util.filename_append(filename=nativePdbInfo.pdb, astr="chain1", directory=fixpath( amoptd['work_dir'])) pdb_edit.to_single_chain(nativePdbInfo.pdb, nativeChain1) else: nativeChain1 = nativePdbInfo.pdb # Additional data amoptd['native_pdb_num_chains'] = len(nativePdbInfo.models[0].chains) amoptd['native_pdb_info'] = nativePdbInfo amoptd['native_pdb_std'] = nativePdbStd amoptd['native_pdb_1chain'] = nativeChain1 amoptd['native_pdb_origin_info'] = originInfo return
def analyseSolution(amoptd, d, mrinfo): logger.info("Benchmark: analysing result: {0}".format(d['ensemble_name'])) mrPdb=None if d['MR_program']=="PHASER": mrPdb = d['PHASER_pdbout'] mrMTZ = d['PHASER_mtzout'] elif d['MR_program']=="MOLREP": mrPdb = d['MOLREP_pdbout'] elif d['MR_program']=="unknown": return if mrPdb is None or not os.path.isfile(mrPdb): #logger.critical("Cannot find mrPdb {0} for solution {1}".format(mrPdb,d)) return # debug - copy into work directory as reforigin struggles with long pathnames shutil.copy(mrPdb, os.path.join(fixpath(amoptd['benchmark_dir']), os.path.basename(mrPdb))) mrPdbInfo = pdb_edit.get_info( mrPdb ) d['num_placed_chains'] = mrPdbInfo.numChains() d['num_placed_atoms'] = mrPdbInfo.numAtoms() d['num_placed_CA'] = mrPdbInfo.numCalpha() if amoptd['native_pdb']: if not d['SHELXE_os']: logger.critical("mrPdb {0} has no SHELXE_os origin shift. Calculating...".format(mrPdb)) mrinfo.analyse(mrPdb) mrOrigin = mrinfo.originShift d['SHELXE_MPE'] = mrinfo.MPE d['SHELXE_wMPE'] = mrinfo.wMPE else: mrOrigin=[c*-1 for c in d['SHELXE_os']] # Move pdb onto new origin originPdb = ample_util.filename_append(mrPdb, astr='offset',directory=fixpath(amoptd['benchmark_dir'])) #print(mrPdb, originPdb, mrOrigin) pdb_edit.translate(mrPdb, originPdb, mrOrigin) # offset.pdb is the mrModel shifted onto the new origin use csymmatch to wrap onto native csymmatch.Csymmatch().wrapModelToNative(originPdb, amoptd['native_pdb'], csymmatchPdb=os.path.join(fixpath(amoptd['benchmark_dir']), "phaser_{0}_csymmatch.pdb".format(d['ensemble_name']))) # can now delete origin pdb os.unlink(originPdb) # Calculate phase error for the MR PDB try: mrinfo.analyse(mrPdb) d['MR_MPE'] = mrinfo.MPE d['MR_wMPE'] = mrinfo.wMPE except Exception as e: logger.critical("Error analysing mrPdb: {0}\n{1}".format(mrPdb,e)) # We cannot calculate the Reforigin RMSDs or RIO scores for runs where we don't have a full initial model # to compare to the native to allow us to determine which parts of the ensemble correspond to which parts of # the native structure - or if we were unable to calculate a res_seq_map if not (amoptd['homologs'] or \ amoptd['ideal_helices'] or \ amoptd['import_ensembles'] or \ amoptd['single_model_mode'] or \ amoptd['res_seq_map']): # Get reforigin info rmsder = reforigin.ReforiginRmsd() try: rmsder.getRmsd(nativePdbInfo=amoptd['native_pdb_info'], placedPdbInfo=mrPdbInfo, refModelPdbInfo=amoptd['ref_model_pdb_info'], cAlphaOnly=True, workdir=fixpath(amoptd['benchmark_dir'])) d['reforigin_RMSD'] = rmsder.rmsd except Exception as e: logger.critical("Error calculating RMSD: {0}".format(e)) d['reforigin_RMSD'] = 999 # Score the origin with all-atom and rio rioData = rio.Rio().scoreOrigin(mrOrigin, mrPdbInfo=mrPdbInfo, nativePdbInfo=amoptd['native_pdb_info'], resSeqMap=amoptd['res_seq_map'], workdir=fixpath(amoptd['benchmark_dir']) ) # Set attributes d['AA_num_contacts'] = rioData.aaNumContacts d['RIO_num_contacts'] = rioData.rioNumContacts d['RIO_in_register'] = rioData.rioInRegister d['RIO_oo_register'] = rioData.rioOoRegister d['RIO_backwards'] = rioData.rioBackwards d['RIO'] = rioData.rioInRegister + rioData.rioOoRegister d['RIO_no_cat'] = rioData.rioNumContacts - ( rioData.rioInRegister + rioData.rioOoRegister ) d['RIO_norm'] = float(d['RIO']) / float(d['native_pdb_num_residues']) else: d['AA_num_contacts'] = None d['RIO_num_contacts'] = None d['RIO_in_register'] = None d['RIO_oo_register'] = None d['RIO_backwards'] = None d['RIO'] = None d['RIO_no_cat'] = None d['RIO_norm'] = None # # Now get the helix # helixSequence = contacts.Rio().helixFromContacts( contacts=rioData.contacts, # dsspLog=dsspLog ) # if helixSequence is not None: # ampleResult.rioHelixSequence = helixSequence # ampleResult.rioLenHelix = len( helixSequence ) # hfile = os.path.join( workdir, "{0}.helix".format( ampleResult.ensembleName ) ) # with open( hfile, 'w' ) as f: # f.write( helixSequence+"\n" ) # # This purely for checking and so we have pdbs to view # # Wrap shelxe trace onto native using Csymmatch if not d['SHELXE_pdbout'] is None and os.path.isfile(fixpath(d['SHELXE_pdbout'])): csymmatch.Csymmatch().wrapModelToNative( fixpath(d['SHELXE_pdbout']), amoptd['native_pdb'], origin=mrOrigin, workdir=fixpath(amoptd['benchmark_dir'])) if not('SHELXE_wMPE' in d and d['SHELXE_wMPE']): try: mrinfo.analyse(d['SHELXE_pdbout']) d['SHELXE_MPE'] = mrinfo.MPE d['SHELXE_wMPE'] = mrinfo.wMPE except Exception as e: logger.critical("Error analysing SHELXE_pdbout: {0}\n{1}".format(d['SHELXE_pdbout'],e)) # Wrap parse_buccaneer model onto native if d['SXRBUCC_pdbout'] and os.path.isfile(fixpath(d['SXRBUCC_pdbout'])): # Need to rename Pdb as is just called buccSX_output.pdb csymmatchPdb = os.path.join(fixpath(amoptd['benchmark_dir']), "buccaneer_{0}_csymmatch.pdb".format(d['ensemble_name'])) csymmatch.Csymmatch().wrapModelToNative( fixpath(d['SXRBUCC_pdbout']), amoptd['native_pdb'], origin=mrOrigin, csymmatchPdb=csymmatchPdb, workdir=fixpath(amoptd['benchmark_dir'])) # Calculate phase error try: mrinfo.analyse(d['SXRBUCC_pdbout']) d['SXRBUCC_MPE'] = mrinfo.MPE d['SXRBUCC_wMPE'] = mrinfo.wMPE except Exception as e: logger.critical("Error analysing SXRBUCC_pdbout: {0}\n{1}".format(d['SXRBUCC_pdbout'],e)) # Wrap parse_buccaneer model onto native if d['SXRARP_pdbout'] and os.path.isfile(fixpath(d['SXRARP_pdbout'])): # Need to rename Pdb as is just called buccSX_output.pdb csymmatchPdb = os.path.join(fixpath(amoptd['benchmark_dir']), "arpwarp_{0}_csymmatch.pdb".format(d['ensemble_name'])) csymmatch.Csymmatch().wrapModelToNative( fixpath(d['SXRARP_pdbout']), amoptd['native_pdb'], origin=mrOrigin, csymmatchPdb=csymmatchPdb, workdir=fixpath(amoptd['benchmark_dir'])) # Calculate phase error try: mrinfo.analyse(d['SXRARP_pdbout']) d['SXRARP_MPE'] = mrinfo.MPE d['SXRARP_wMPE'] = mrinfo.wMPE except Exception as e: logger.critical("Error analysing SXRARP_pdbout: {0}\n{1}".format(d['SXRARP_pdbout'],e)) return
def analysePdb(amoptd): """Collect data on the native pdb structure""" nativePdb = fixpath(amoptd['native_pdb']) nativePdbInfo = pdb_edit.get_info(nativePdb) # number atoms/residues natoms, nresidues = pdb_edit.num_atoms_and_residues(nativePdb) # Get information on the origins for this spaceGroup try: originInfo = pdb_model.OriginInfo(spaceGroupLabel=nativePdbInfo.crystalInfo.spaceGroup) except Exception: originInfo = None # Do this here as a bug in pdbcur can knacker the CRYST1 data amoptd['native_pdb_code'] = nativePdbInfo.pdbCode amoptd['native_pdb_title'] = nativePdbInfo.title amoptd['native_pdb_resolution'] = nativePdbInfo.resolution amoptd['native_pdb_solvent_content'] = nativePdbInfo.solventContent amoptd['native_pdb_matthews_coefficient'] = nativePdbInfo.matthewsCoefficient if not originInfo: space_group = "P1" else: space_group = originInfo.spaceGroup() amoptd['native_pdb_space_group'] = space_group amoptd['native_pdb_num_atoms'] = natoms amoptd['native_pdb_num_residues'] = nresidues # First check if the native has > 1 model and extract the first if so if len( nativePdbInfo.models ) > 1: logger.info("nativePdb has > 1 model - using first") nativePdb1 = ample_util.filename_append( filename=nativePdb, astr="model1", directory=fixpath(amoptd['work_dir'])) pdb_edit.extract_model( nativePdb, nativePdb1, modelID=nativePdbInfo.models[0].serial ) nativePdb = nativePdb1 # Standardise the PDB to rename any non-standard AA, remove solvent etc nativePdbStd = ample_util.filename_append( filename=nativePdb, astr="std", directory=fixpath(amoptd['work_dir'])) pdb_edit.standardise(nativePdb, nativePdbStd, del_hetatm=True) nativePdb = nativePdbStd # Get the new Info about the native nativePdbInfo = pdb_edit.get_info( nativePdb ) # For comparsion of shelxe model we need a single chain from the native so we get this here if len( nativePdbInfo.models[0].chains ) > 1: nativeChain1 = ample_util.filename_append( filename=nativePdbInfo.pdb, astr="chain1", directory=fixpath(amoptd['work_dir'])) pdb_edit.merge_chains(nativePdbInfo.pdb, nativeChain1) else: nativeChain1 = nativePdbInfo.pdb # Additional data amoptd['native_pdb_num_chains'] = len( nativePdbInfo.models[0].chains ) amoptd['native_pdb_info'] = nativePdbInfo amoptd['native_pdb_std'] = nativePdbStd amoptd['native_pdb_1chain'] = nativeChain1 amoptd['native_pdb_origin_info'] = originInfo return