def generateMap( mtz, pdb, FP='FP', SIGFP='SIGFP', FREE='FREE', directory=None ): """Generate a map from an mtz file and a pdb using reforigin""" assert os.path.isfile( mtz ) and os.path.isfile( pdb ), "Cannot find files: {0} {1}".format( mtz, pdb ) if not directory: directory = os.getcwd() mapFile = ample_util.filename_append( filename=mtz, astr="map", directory=directory ) mapFile = os.path.abspath(mapFile) mapPdb = ample_util.filename_append( filename=pdb, astr="map", directory=directory ) cmd = [ "refmac5", "HKLIN", mtz, "HKLOUT", mapFile, "XYZIN", pdb, "XYZOUT", mapPdb ] # FIX FOR DIFFERENT FP etc. stdin ="""RIDG DIST SIGM 0.02 LABIN FP={0} SIGFP={1} FREE={2} MAKE HYDR N WEIGHT MATRIX 0.01 NCYC 0 END """.format( FP, SIGFP, FREE ) logfile=os.path.join(directory,"generateMap.log") ret = ample_util.run_command(cmd=cmd, logfile=logfile, dolog=True, stdin=stdin) assert ret == 0, "generateMap refmac failed-check log: {0}".format(logfile) return mapFile
def wrapModelToNative(self, mrPdb, nativePdb, origin=[0.0,0.0,0.0], csymmatchPdb=None, workdir=None, cleanup=True): """Take a pdb and wrap it onto the nativePdb using csymmatch. If origin is not [0.0,0.0,0.0] we also move the structure onto the new origin before wrapping""" if workdir is None: workdir = os.getcwd() assert os.path.isfile(mrPdb) and os.path.isfile(nativePdb),"Cannot find: {0} or {1}".format(mrPdb,nativePdb) originMrPdb = None if origin != [ 0.0, 0.0, 0.0 ]: ostr="o{}_{}".format(origin, str(uuid.uuid1())).replace(" ","" ) originMrPdb = ample_util.filename_append(filename=mrPdb, astr=ostr, directory=workdir) pdb_edit.translate(inpdb=mrPdb, outpdb=originMrPdb, ftranslate=origin) mrPdb = originMrPdb if csymmatchPdb is None: csymmatchPdb = ample_util.filename_append(filename=mrPdb, astr="csymmatch_{}".format(str(uuid.uuid1())), directory=workdir) self.run(refPdb=nativePdb, inPdb=mrPdb, outPdb=csymmatchPdb, originHand=False, cleanup=cleanup) if not os.path.isfile( csymmatchPdb ): raise RuntimeError("Error generating csymmatchPdb") if cleanup and originMrPdb: os.unlink(originMrPdb) return csymmatchPdb
def scoreOrigin(self, origin=None, mrPdbInfo=None, nativePdbInfo=None, resSeqMap=None, workdir=os.getcwd() ): self.workdir = workdir if not resSeqMap.resSeqMatch(): # We need to create a copy of the placed pdb with numbering matching the native mrPdbRes = ample_util.filename_append( filename=mrPdbInfo.pdb, astr="reseq", directory=self.workdir ) pdb_edit.match_resseq( targetPdb=mrPdbInfo.pdb, sourcePdb=None, outPdb=mrPdbRes, resMap=resSeqMap ) mrPdb = mrPdbRes else: mrPdb = mrPdbInfo.pdb # Make a copy of mrPdb with chains renamed to lower case ucChains = mrPdbInfo.models[0].chains toChains = [ c.lower() for c in ucChains ] mrAaPdb = ample_util.filename_append( filename=mrPdb, astr="ren", directory=self.workdir ) pdb_edit.rename_chains( inpdb=mrPdb, outpdb=mrAaPdb, fromChain=ucChains, toChain=toChains ) # The list of chains in the native that we will be checking contacts from fromChains = nativePdbInfo.models[0].chains mrOriginPdb = mrAaPdb if origin != [ 0.0, 0.0, 0.0 ]: # Move pdb to new origin #ostr="origin{0}".format(i) ostr="o{0}".format( origin ).replace(" ","" ) mrOriginPdb = ample_util.filename_append( filename=mrAaPdb, astr=ostr, directory=self.workdir ) pdb_edit.translate( inpdb=mrAaPdb, outpdb=mrOriginPdb, ftranslate=origin ) # Concatenate into one file joinedPdb = ample_util.filename_append( filename=mrOriginPdb, astr="joined", directory=self.workdir ) pdb_edit.merge( pdb1=nativePdbInfo.pdb, pdb2=mrOriginPdb, pdbout=joinedPdb ) # Run ncont data = RioData() data.origin = origin data.originPdb = mrOriginPdb data.joinedPdb = joinedPdb data.fromChains = fromChains data.toChains = toChains # First get AllAtom score self.calcAllAtom( data ) # Then score RIO self.calcRio( data ) #data.numGood = data.inregister + data.ooregister # clean up os.unlink(mrOriginPdb) os.unlink(joinedPdb) if os.path.isfile(mrAaPdb): os.unlink(mrAaPdb) return data
def process_models(self, models, out_dir, strip_oxt=False, prefix="scwrl"): logger.info('Adding sidechains with SCWRL to models') out_pdbs = [] for i, pdb in enumerate(models): out_pdbs.append(self.add_sidechains(pdbin=pdb, pdbout=ample_util.filename_append(pdb, prefix, directory=out_dir), strip_oxt=strip_oxt)) logger.info('Processed {0} models with SCWRL into directory: {1}'.format(i+1, out_dir)) return out_pdbs
def model_core_from_fasta(models, alignment_file, work_dir=None, case_sensitive=False): if not os.path.isdir(work_dir): os.mkdir(work_dir) # Read in alignment to get align_seq = sequence_util.Sequence(fasta=alignment_file) # Check all alignments the same length # Get pdb names from alignment headers seq_names = [ h[1:].strip() for h in align_seq.headers ] # Need to check if the alignment file is from gesamt, in which case, the names have the # chain names in brackets appended for i, s in enumerate(seq_names): x = re.search("\([a-zA-Z]*\)$", s) if x: seq_names[i] = s.replace(x.group(0), "") # Get array specifying which positions are core. If the positions all align, then there # will be a capital letter for the residue. Gaps are signified by "-" and non-structurally- # aligned residues by lower-case letters GAP = '-' # Can't use below as Theseus ignores lower-case letters in the alignment if case_sensitive: core = [ all([ x in pdb_edit.one2three.keys() for x in t ]) for t in zip(*align_seq.sequences) ] else: core = [ all([ x != GAP for x in t ]) for t in zip(*align_seq.sequences) ] if not any(core): raise RuntimeError("Cannot generate core for models: {0}".format(models)) # For each sequence, get a list of which positions are core core_positions = [] for seq in align_seq.sequences: p = [] count = 0 for i, pos in enumerate(seq): if pos != GAP: if core[i]: p.append(count) count += 1 core_positions.append(p) # Should check lengths of sequences match the length of the aa in the pdbs # Create dict mapping seq_names to core positions core_dict = dict((s, core_positions[i]) for i, s in enumerate(seq_names)) # Cut the models down to core core_models = [] for m in models: name = os.path.basename(m) pdbout = ample_util.filename_append(m, astr='core', directory=work_dir) pdb_edit.select_residues(m, pdbout, tokeep_idx=core_dict[name]) core_models.append(pdbout) return core_models
def model_core_from_theseus(models, alignment_file, var_by_res, work_dir=None): """ Only residues from the first protein are listed in the theseus output, but then not even all of them We assume the output is based on the original alignment so that where each residue in the first protein lines up with either another residue in one of the other proteins or a gap SO - we need to go through the theseus data and for each residue that is core find the corresponding residues in the other proteins We use the resSeq numbers to match the residues across the alignment """ if not os.path.isdir(work_dir): os.mkdir(work_dir) seqalign = sequence_util.Sequence(fasta=alignment_file) # We now need to add the list of pdbs, chains and resSeqs of the other models to the Sequence object for m in models: seqalign.add_pdb_data(m) # Sanity check that the names of the pdb files match those from the fasta header # Format is expected to be: '>1ujb.pdb(A)' names = [ h[1:].split('(')[0] for h in seqalign.headers ] if not seqalign.pdbs == names: raise RuntimeError, "headers and names of pdb files do not match!\n{0}\n{1}".format(seqalign.pdbs, names) # Get the name of the first pdb that the alignment is based on first = seqalign.pdbs[0] # Dictionary mapping model pdb to resSeqs that are core model2core = {} for p in seqalign.pdbs: model2core[p] = [] # initialise # Get list of core resSeqs in the first sequence model2core[first] = [ x.resSeq for x in var_by_res if x.core ] # Now go through the first sequence and get the resSeqs of the corresponding core for the other models pointer = 0 # Tracks where we are in the first sequence for i, resSeq in enumerate(seqalign.resseqs[0]): if model2core[first][pointer] == resSeq: # Core residue in first sequence so append the corresponding resSeqs for the other proteins for j, pdb in enumerate(seqalign.pdbs[1:]): model2core[pdb].append(seqalign.resseqs[j+1][i]) pointer += 1 if pointer >= len(model2core[first]): break core_models = [] for m in models: name = os.path.basename(m) pdbout = ample_util.filename_append(m, astr='core', directory=work_dir) pdb_edit.select_residues(m, pdbout, tokeep=model2core[name]) core_models.append(pdbout) return core_models
def truncate_models(self, models, max_cluster_size=200, truncation_method=None, percent_truncation=None, percent_fixed_intervals=None, truncation_pruning=None, residue_scores=None, homologs=False, alignment_file=None, work_dir=None): """Generate a set of Truncation objects, referencing a set of truncated models generated from the supplied models""" truncations = self.calculate_truncations( models=models, truncation_method=truncation_method, percent_truncation=percent_truncation, percent_fixed_intervals=percent_fixed_intervals, truncation_pruning=truncation_pruning, residue_scores=residue_scores, alignment_file=alignment_file, homologs=homologs) if truncations is None or len(truncations) < 1: logger.critical("Unable to truncate the ensembles - no viable truncations") return [] # Loop through the Truncation objects, truncating the models based on the truncation data and adding # the truncated models to the Truncation.models attribute for truncation in truncations: truncation.directory = os.path.join(self.work_dir, 'tlevel_{0}'.format(truncation.level)) os.mkdir(truncation.directory) logger.info('Truncating at: %s in directory %s', truncation.level, truncation.directory) truncation.models = [] for infile in self.models: pdbout = ample_util.filename_append(infile, str(truncation.level), directory=truncation.directory) # Loop through PDB files and create new ones that only contain the residues left after truncation pdb_edit.select_residues(pdbin=infile, pdbout=pdbout, tokeep_idx=truncation.residues_idxs) truncation.models.append(pdbout) self.truncations = truncations return truncations
def generate_ensembles(self, models, alignment_file=None, homolog_aligner=None, percent_fixed_intervals=None, percent_truncation=None, side_chain_treatments=SIDE_CHAIN_TREATMENTS, truncation_method=None, **kwargs): if not percent_truncation: percent_truncation = self.percent_truncation if not truncation_method: truncation_method = self.truncation_method if not len(models): msg = "Cannot find any models for ensembling!" raise RuntimeError(msg) if not all([os.path.isfile(m) for m in models]): msg = "Problem reading models given to Ensembler: {0}".format(models) raise RuntimeError(msg) logger.info('Ensembling models in directory: %s', self.work_dir) # Create final ensembles directory if not os.path.isdir(self.ensembles_directory): os.mkdir(self.ensembles_directory) # standardise all the models std_models_dir = os.path.join(self.work_dir, "std_models") os.mkdir(std_models_dir) std_models = [] for m in models: std_model = ample_util.filename_append(m, 'std', std_models_dir) pdb_edit.standardise(pdbin=m, pdbout=std_model, del_hetatm=True) std_models.append(std_model) # Get a structural alignment between the different models if not alignment_file: if homolog_aligner == 'mustang': logger.info("Generating alignment file with mustang_exe: %s", self.mustang_exe) alignment_file = align_mustang(std_models, mustang_exe=self.mustang_exe, work_dir=self.work_dir) elif homolog_aligner == 'gesamt': logger.info("Generating alignment file with gesamt_exe: %s", self.gesamt_exe) alignment_file = align_gesamt(std_models, gesamt_exe=self.gesamt_exe, work_dir=self.work_dir) else: msg = "Unknown homolog_aligner: {0}".format(homolog_aligner) raise RuntimeError(msg) logger.info("Generated alignment file: %s", alignment_file) else: logger.info("Using alignment file: %s", alignment_file) truncate_dir = os.path.join(self.work_dir,"homolog_truncate") if not os.path.isdir(truncate_dir): os.mkdir(truncate_dir) # Now truncate and create ensembles - as standard ample, but with no subclustering self.ensembles = [] self.truncator = truncation_util.Truncator(work_dir=truncate_dir) self.truncator.theseus_exe = self.theseus_exe for truncation in self.truncator.truncate_models(models=std_models, truncation_method=truncation_method, percent_fixed_intervals=percent_fixed_intervals, percent_truncation=percent_truncation, truncation_pruning=None, homologs=True, alignment_file=alignment_file): ensemble_dir = os.path.join(truncation.directory, "ensemble_{0}".format(truncation.level)) os.mkdir(ensemble_dir) os.chdir(ensemble_dir) # Need to create an alignment file for theseus basename = "e{0}".format(truncation.level) superposed_models = self.superpose_models(truncation.models, basename=basename, work_dir=ensemble_dir, homologs=True) if not superposed_models: logger.critical("Skipping ensemble %s due to error with Theseus", basename) continue # Create Ensemble object pre_ensemble = _ensembler.Ensemble() pre_ensemble.num_residues = truncation.num_residues pre_ensemble.truncation_dir = truncation.directory pre_ensemble.truncation_level = truncation.level pre_ensemble.truncation_method = truncation.method pre_ensemble.truncation_percent = truncation.percent pre_ensemble.truncation_residues = truncation.residues pre_ensemble.truncation_variance = truncation.variances pre_ensemble.pdb = superposed_models for ensemble in self.edit_side_chains(pre_ensemble, side_chain_treatments, homologs=True): self.ensembles.append(ensemble) return self.ensembles
def analysePdb(amoptd): """Collect data on the native pdb structure""" nativePdb = fixpath(amoptd['native_pdb']) nativePdbInfo = pdb_edit.get_info(nativePdb) # number atoms/residues natoms, nresidues = pdb_edit.num_atoms_and_residues(nativePdb) # Get information on the origins for this spaceGroup try: originInfo = pdb_model.OriginInfo( spaceGroupLabel=nativePdbInfo.crystalInfo.spaceGroup) except: originInfo = None # Do this here as a bug in pdbcur can knacker the CRYST1 data amoptd['native_pdb_code'] = nativePdbInfo.pdbCode amoptd['native_pdb_title'] = nativePdbInfo.title amoptd['native_pdb_resolution'] = nativePdbInfo.resolution amoptd['native_pdb_solvent_content'] = nativePdbInfo.solventContent amoptd[ 'native_pdb_matthews_coefficient'] = nativePdbInfo.matthewsCoefficient if not originInfo: space_group = "P1" else: space_group = originInfo.spaceGroup() amoptd['native_pdb_space_group'] = space_group amoptd['native_pdb_num_atoms'] = natoms amoptd['native_pdb_num_residues'] = nresidues # First check if the native has > 1 model and extract the first if so if len(nativePdbInfo.models) > 1: logger.info("nativePdb has > 1 model - using first") nativePdb1 = ample_util.filename_append(filename=nativePdb, astr="model1", directory=fixpath( amoptd['work_dir'])) pdb_edit.extract_model(nativePdb, nativePdb1, modelID=nativePdbInfo.models[0].serial) nativePdb = nativePdb1 # Standardise the PDB to rename any non-standard AA, remove solvent etc nativePdbStd = ample_util.filename_append(filename=nativePdb, astr="std", directory=fixpath( amoptd['work_dir'])) pdb_edit.standardise(nativePdb, nativePdbStd, del_hetatm=True) nativePdb = nativePdbStd # Get the new Info about the native nativePdbInfo = pdb_edit.get_info(nativePdb) # For maxcluster comparsion of shelxe model we need a single chain from the native so we get this here if len(nativePdbInfo.models[0].chains) > 1: chainID = nativePdbInfo.models[0].chains[0] nativeChain1 = ample_util.filename_append(filename=nativePdbInfo.pdb, astr="chain1", directory=fixpath( amoptd['work_dir'])) pdb_edit.to_single_chain(nativePdbInfo.pdb, nativeChain1) else: nativeChain1 = nativePdbInfo.pdb # Additional data amoptd['native_pdb_num_chains'] = len(nativePdbInfo.models[0].chains) amoptd['native_pdb_info'] = nativePdbInfo amoptd['native_pdb_std'] = nativePdbStd amoptd['native_pdb_1chain'] = nativeChain1 amoptd['native_pdb_origin_info'] = originInfo return
def model_core_from_fasta(models, alignment_file, work_dir=None, case_sensitive=False): if not os.path.isdir(work_dir): os.mkdir(work_dir) # Read in alignment to get align_seq = sequence_util.Sequence(fasta=alignment_file) # Check all alignments the same length # Get pdb names from alignment headers seq_names = [h[1:].strip() for h in align_seq.headers] # Need to check if the alignment file is from gesamt, in which case, the names have the # chain names in brackets appended for i, s in enumerate(seq_names): x = re.search("\([a-zA-Z]*\)$", s) if x: seq_names[i] = s.replace(x.group(0), "") # Get array specifying which positions are core. If the positions all align, then there # will be a capital letter for the residue. Gaps are signified by "-" and non-structurally- # aligned residues by lower-case letters GAP = '-' # Can't use below as Theseus ignores lower-case letters in the alignment if case_sensitive: core = [ all([x in ample_util.one2three.keys() for x in t]) for t in zip(*align_seq.sequences) ] else: core = [all([x != GAP for x in t]) for t in zip(*align_seq.sequences)] if not any(core): raise RuntimeError( "Cannot generate core for models: {0}".format(models)) # For each sequence, get a list of which positions are core core_positions = [] for seq in align_seq.sequences: p = [] count = 0 for i, pos in enumerate(seq): if pos != GAP: if core[i]: p.append(count) count += 1 core_positions.append(p) # Should check lengths of sequences match the length of the aa in the pdbs # Create dict mapping seq_names to core positions core_dict = dict((s, core_positions[i]) for i, s in enumerate(seq_names)) # Cut the models down to core core_models = [] for m in models: name = os.path.basename(m) pdbout = ample_util.filename_append(m, astr='core', directory=work_dir) pdb_edit.select_residues(m, pdbout, tokeep_idx=core_dict[name]) core_models.append(pdbout) return core_models
def findOrigin(self, nativePdbInfo=None, mrPdbInfo=None, resSeqMap=None, origins=None, allAtom=False, workdir=os.getcwd() ): """Find the origin using the maximum number of contacts as metric""" self.workdir = workdir if not resSeqMap.resSeqMatch(): # We need to create a copy of the placed pdb with numbering matching the native mrPdbRes = ample_util.filename_append( filename=mrPdbInfo.pdb, astr="reseq", directory=self.workdir ) pdb_edit.match_resseq( targetPdb=mrPdbInfo.pdb, sourcePdb=None, outPdb=mrPdbRes, resMap=resSeqMap ) mrPdb = mrPdbRes else: mrPdb = mrPdbInfo.pdb # Make a copy of mrPdb with chains renamed to lower case ucChains = mrPdbInfo.models[0].chains toChains = [ c.lower() for c in ucChains ] placedAaPdb = ample_util.filename_append( filename=mrPdb, astr="ren", directory=self.workdir ) pdb_edit.rename_chains( inpdb=mrPdb, outpdb=placedAaPdb, fromChain=ucChains, toChain=toChains ) # The list of chains in the native that we will be checking contacts from fromChains = nativePdbInfo.models[0].chains # Loop over origins, move the placed pdb to the new origin and then run ncont # Object to hold data on best origin self.data = None for origin in origins: placedOriginPdb = placedAaPdb if origin != [ 0.0, 0.0, 0.0 ]: # Move pdb to new origin #ostr="origin{0}".format(i) ostr="o{0}".format( origin ).replace(" ","" ) placedOriginPdb = ample_util.filename_append( filename=placedAaPdb, astr=ostr, directory=self.workdir ) pdb_edit.translate( inpdb=placedAaPdb, outpdb=placedOriginPdb, ftranslate=origin ) # Concatenate into one file joinedPdb = ample_util.filename_append( filename=placedOriginPdb, astr="joined", directory=self.workdir ) pdb_edit.merge( pdb1=nativePdbInfo.pdb, pdb2=placedOriginPdb, pdbout=joinedPdb ) # Set up object to hold data data = RioData() data.origin = origin data.originPdb = placedOriginPdb data.joinedPdb = joinedPdb data.fromChains = fromChains data.toChains = toChains data.numGood = 0 # For holding the metric # Run ncont if allAtom: self.calcAllAtom( data ) data.numGood = data.aaNumContacts else: self.calcRio( data ) data.numGood = data.rioInRegister + data.rioOoRegister # Save the first origin and only update if we get a better score if not self.data or data.numGood > self.data.numGood: self.data = data # End loop over origins # Now need to calculate data for whichever one we didn't calculate if allAtom: self.calcRio( self.data ) else: self.calcAllAtom( self.data ) if self.data.numGood > 0: # If we got a match run csymmatch so we can see the result csym = csymmatch.Csymmatch() csymmatchPdb = ample_util.filename_append( filename=self.data.originPdb, astr="csymmatch_best", directory=self.workdir ) csym.run( refPdb=nativePdbInfo.pdb, inPdb=self.data.originPdb, outPdb=csymmatchPdb, originHand=False ) return self.data
def generate_ensembles(self, models, ensembles_directory=None, nproc=None, percent_truncation=None, percent_fixed_intervals=None, side_chain_treatments=SIDE_CHAIN_TREATMENTS, truncation_method=None, truncation_pruning=None, truncation_scorefile=None, truncation_scorefile_header=None): """Method to generate ensembles from a single structure based on residue scores""" if not truncation_method: truncation_method = self.truncation_method if not truncation_pruning: truncation_pruning = self.truncation_pruning if not truncation_scorefile: truncation_scorefile = self.truncation_scorefile if len(models) > 1: msg = "More than 1 structure provided" logger.critical(msg) raise RuntimeError(msg) if len(truncation_scorefile_header) < 2: msg = "At least two header options for scorefile are required" logger.critical(msg) raise RuntimeError(msg) # standardise the structure std_models_dir = os.path.join(self.work_dir, "std_models") os.mkdir(std_models_dir) std_model = ample_util.filename_append(models[0], 'std', std_models_dir) pdb_edit.standardise(pdbin=models[0], pdbout=std_model, del_hetatm=True) std_models = [std_model] logger.info('Standardised input model: %s', std_models[0]) # Create final ensembles directory if not os.path.isdir(self.ensembles_directory): os.mkdir(self.ensembles_directory) truncate_dir = os.path.join(self.work_dir, "single_truncate") if not os.path.isdir(truncate_dir): os.mkdir(truncate_dir) # Read all the scores into a per residue dictionary assert len(truncation_scorefile_header) > 1, \ "At least two column labels are required" residue_scores = self._read_scorefile(truncation_scorefile) residue_key = truncation_scorefile_header.pop(0) truncation_scorefile_header = map(str.strip, truncation_scorefile_header) assert all(h in residue_scores[0] for h in truncation_scorefile_header), \ "Not all column labels are in your CSV file" self.ensembles = [] for score_key in truncation_scorefile_header: zipped_scores = self._generate_residue_scorelist(residue_key, score_key, residue_scores) score_truncate_dir = os.path.join(truncate_dir, "{}".format(score_key)) if not os.path.isdir(score_truncate_dir): os.mkdir(score_truncate_dir) self.truncator = truncation_util.Truncator( work_dir=score_truncate_dir) self.truncator.theseus_exe = self.theseus_exe for truncation in self.truncator.truncate_models(models=std_models, truncation_method=truncation_method, percent_truncation=percent_truncation, percent_fixed_intervals=percent_fixed_intervals, truncation_pruning=truncation_pruning, residue_scores=zipped_scores): pre_ensemble = _ensembler.Ensemble() pre_ensemble.num_residues = truncation.num_residues pre_ensemble.truncation_dir = truncation.directory pre_ensemble.truncation_level = truncation.level pre_ensemble.truncation_method = truncation.method pre_ensemble.truncation_percent = truncation.percent pre_ensemble.truncation_residues = truncation.residues pre_ensemble.truncation_variance = truncation.variances pre_ensemble.truncation_score_key = score_key.lower() pre_ensemble.pdb = truncation.models[0] for ensemble in self.edit_side_chains(pre_ensemble, side_chain_treatments, single_structure=True): self.ensembles.append(ensemble) return self.ensembles
def getRmsd( self, nativePdbInfo=None, placedPdbInfo=None, refModelPdbInfo=None, workdir=None, cAlphaOnly=True ): """For now just save lowest rmsd - can look at collecting more nativeInfo later Currently we assume we are only given one model and that it has already been standardised. """ if workdir: self.workdir = workdir if not self.workdir: self.workdir = os.getcwd() self.cAlphaOnly = cAlphaOnly# Whether to only compare c-alpha atoms # Run a pass to find the # chains native_chains = nativePdbInfo.models[ 0 ].chains placed_chains = placedPdbInfo.models[ 0 ].chains #print "got native chains ", native_chains #print "got placed chains ", placed_chains rmsds = {} # dict of rmsd -> ( chainIDnative, chainIDrefined, reforiginLogfile ) # Match each chain in native against refined and pick the best for nativeChainID in native_chains: #print "native_chain: {0}".format( nativeChainID ) if len( native_chains ) == 1: # Don't need to do owt as we are just using the native as is nativeChainPdb = nativePdbInfo.pdb else: # Extract the chain from the pdb astr = "chain{0}".format( nativeChainID ) nativeChainPdb = ample_util.filename_append( filename=nativePdbInfo.pdb, astr=astr, directory=self.workdir ) pdb_edit.extract_chain( nativePdbInfo.pdb, nativeChainPdb, chainID=nativeChainID ) # Calculate the RefSeqMap - need to do this before we reduce to c-alphas # The second chain may be a different composition to the first, so we only generate a traceback if we fail # on the first chain. The model only has one chain, so the residueMap has to be the same for all the chains try: resSeqMap = residue_map.residueSequenceMap() resSeqMap.fromInfo( refInfo=nativePdbInfo, refChainID=nativeChainID, targetInfo=refModelPdbInfo, targetChainID='A' # Model only has one chain ) except RuntimeError: if nativeChainID == native_chains[0]: raise else: # Only compare the first chain break for placedChainID in placed_chains: # Prepare the placed PDB placedChainPdb = self.preparePlacedPdb( placedPdb=placedPdbInfo.pdb, placedChainID=placedChainID, nativeChainID=nativeChainID, resSeqMap=resSeqMap ) # Now create a PDB with the matching atoms from native that are in refined nativePdbMatch = ample_util.filename_append( filename=nativeChainPdb, astr="matched", directory=self.workdir ) pdb_edit.keep_matching( refpdb=placedChainPdb, targetpdb=nativeChainPdb, outpdb=nativePdbMatch, resSeqMap=resSeqMap ) # Now get the rmsd astr = "chain{0}_reforigin".format( nativeChainID ) reforiginOut = ample_util.filename_append( filename=placedChainPdb, astr=astr, directory=self.workdir ) try: rms = self.calculate( refpdb=nativePdbMatch, targetpdb=placedChainPdb, outpdb=reforiginOut ) except RuntimeError, e: logger.critical("GOT REFORIGIN ERROR for {0},{1},{2}\n{3}".format( placedChainPdb, nativeChainPdb, nativeChainID, e)) rms = 99999 rmsds[ rms ] = ( nativeChainID, placedChainID, reforiginOut ) # Clean up os.unlink(placedChainPdb) os.unlink(nativePdbMatch)
def scoreOrigin(self, origin=None, mrPdbInfo=None, nativePdbInfo=None, resSeqMap=None, workdir=os.getcwd()): self.workdir = workdir if not resSeqMap.resSeqMatch(): # We need to create a copy of the placed pdb with numbering matching the native mrPdbRes = ample_util.filename_append(filename=mrPdbInfo.pdb, astr="reseq", directory=self.workdir) pdb_edit.match_resseq(targetPdb=mrPdbInfo.pdb, sourcePdb=None, outPdb=mrPdbRes, resMap=resSeqMap) mrPdb = mrPdbRes else: mrPdb = mrPdbInfo.pdb # Make a copy of mrPdb with chains renamed to lower case ucChains = mrPdbInfo.models[0].chains toChains = [c.lower() for c in ucChains] mrAaPdb = ample_util.filename_append(filename=mrPdb, astr="ren", directory=self.workdir) pdb_edit.rename_chains(inpdb=mrPdb, outpdb=mrAaPdb, fromChain=ucChains, toChain=toChains) # The list of chains in the native that we will be checking contacts from fromChains = nativePdbInfo.models[0].chains mrOriginPdb = mrAaPdb if origin != [0.0, 0.0, 0.0]: # Move pdb to new origin # ostr="origin{0}".format(i) ostr = "o{0}".format(origin).replace(" ", "") mrOriginPdb = ample_util.filename_append(filename=mrAaPdb, astr=ostr, directory=self.workdir) pdb_edit.translate(inpdb=mrAaPdb, outpdb=mrOriginPdb, ftranslate=origin) # Concatenate into one file joinedPdb = ample_util.filename_append(filename=mrOriginPdb, astr="joined", directory=self.workdir) pdb_edit.merge(pdb1=nativePdbInfo.pdb, pdb2=mrOriginPdb, pdbout=joinedPdb) # Run ncont data = RioData() data.origin = origin data.originPdb = mrOriginPdb data.joinedPdb = joinedPdb data.fromChains = fromChains data.toChains = toChains # First get AllAtom score self.calcAllAtom(data) # Then score RIO self.calcRio(data) # data.numGood = data.inregister + data.ooregister # clean up os.unlink(mrOriginPdb) os.unlink(joinedPdb) if os.path.isfile(mrAaPdb): os.unlink(mrAaPdb) return data
def findOrigin(self, nativePdbInfo=None, mrPdbInfo=None, resSeqMap=None, origins=None, allAtom=False, workdir=os.getcwd()): """Find the origin using the maximum number of contacts as metric""" self.workdir = workdir if not resSeqMap.resSeqMatch(): # We need to create a copy of the placed pdb with numbering matching the native mrPdbRes = ample_util.filename_append(filename=mrPdbInfo.pdb, astr="reseq", directory=self.workdir) pdb_edit.match_resseq(targetPdb=mrPdbInfo.pdb, sourcePdb=None, outPdb=mrPdbRes, resMap=resSeqMap) mrPdb = mrPdbRes else: mrPdb = mrPdbInfo.pdb # Make a copy of mrPdb with chains renamed to lower case ucChains = mrPdbInfo.models[0].chains toChains = [c.lower() for c in ucChains] placedAaPdb = ample_util.filename_append(filename=mrPdb, astr="ren", directory=self.workdir) pdb_edit.rename_chains(inpdb=mrPdb, outpdb=placedAaPdb, fromChain=ucChains, toChain=toChains) # The list of chains in the native that we will be checking contacts from fromChains = nativePdbInfo.models[0].chains # Loop over origins, move the placed pdb to the new origin and then run ncont # Object to hold data on best origin self.data = None for origin in origins: placedOriginPdb = placedAaPdb if origin != [0.0, 0.0, 0.0]: # Move pdb to new origin # ostr="origin{0}".format(i) ostr = "o{0}".format(origin).replace(" ", "") placedOriginPdb = ample_util.filename_append( filename=placedAaPdb, astr=ostr, directory=self.workdir) pdb_edit.translate(inpdb=placedAaPdb, outpdb=placedOriginPdb, ftranslate=origin) # Concatenate into one file joinedPdb = ample_util.filename_append(filename=placedOriginPdb, astr="joined", directory=self.workdir) pdb_edit.merge(pdb1=nativePdbInfo.pdb, pdb2=placedOriginPdb, pdbout=joinedPdb) # Set up object to hold data data = RioData() data.origin = origin data.originPdb = placedOriginPdb data.joinedPdb = joinedPdb data.fromChains = fromChains data.toChains = toChains data.numGood = 0 # For holding the metric # Run ncont if allAtom: self.calcAllAtom(data) data.numGood = data.aaNumContacts else: self.calcRio(data) data.numGood = data.rioInRegister + data.rioOoRegister # Save the first origin and only update if we get a better score if not self.data or data.numGood > self.data.numGood: self.data = data # End loop over origins # Now need to calculate data for whichever one we didn't calculate if allAtom: self.calcRio(self.data) else: self.calcAllAtom(self.data) if self.data.numGood > 0: # If we got a match run csymmatch so we can see the result csym = csymmatch.Csymmatch() csymmatchPdb = ample_util.filename_append( filename=self.data.originPdb, astr="csymmatch_best", directory=self.workdir) csym.run(refPdb=nativePdbInfo.pdb, inPdb=self.data.originPdb, outPdb=csymmatchPdb, originHand=False) return self.data
def analyseSolution(amoptd, d, mrinfo): logger.info("Benchmark: analysing result: {0}".format(d['ensemble_name'])) mrPdb=None if d['MR_program']=="PHASER": mrPdb = d['PHASER_pdbout'] mrMTZ = d['PHASER_mtzout'] elif d['MR_program']=="MOLREP": mrPdb = d['MOLREP_pdbout'] elif d['MR_program']=="unknown": return if mrPdb is None or not os.path.isfile(mrPdb): #logger.critical("Cannot find mrPdb {0} for solution {1}".format(mrPdb,d)) return # debug - copy into work directory as reforigin struggles with long pathnames shutil.copy(mrPdb, os.path.join(fixpath(amoptd['benchmark_dir']), os.path.basename(mrPdb))) mrPdbInfo = pdb_edit.get_info( mrPdb ) d['num_placed_chains'] = mrPdbInfo.numChains() d['num_placed_atoms'] = mrPdbInfo.numAtoms() d['num_placed_CA'] = mrPdbInfo.numCalpha() if amoptd['native_pdb']: if not d['SHELXE_os']: logger.critical("mrPdb {0} has no SHELXE_os origin shift. Calculating...".format(mrPdb)) mrinfo.analyse(mrPdb) mrOrigin = mrinfo.originShift d['SHELXE_MPE'] = mrinfo.MPE d['SHELXE_wMPE'] = mrinfo.wMPE else: mrOrigin=[c*-1 for c in d['SHELXE_os']] # Move pdb onto new origin originPdb = ample_util.filename_append(mrPdb, astr='offset',directory=fixpath(amoptd['benchmark_dir'])) #print(mrPdb, originPdb, mrOrigin) pdb_edit.translate(mrPdb, originPdb, mrOrigin) # offset.pdb is the mrModel shifted onto the new origin use csymmatch to wrap onto native csymmatch.Csymmatch().wrapModelToNative(originPdb, amoptd['native_pdb'], csymmatchPdb=os.path.join(fixpath(amoptd['benchmark_dir']), "phaser_{0}_csymmatch.pdb".format(d['ensemble_name']))) # can now delete origin pdb os.unlink(originPdb) # Calculate phase error for the MR PDB try: mrinfo.analyse(mrPdb) d['MR_MPE'] = mrinfo.MPE d['MR_wMPE'] = mrinfo.wMPE except Exception as e: logger.critical("Error analysing mrPdb: {0}\n{1}".format(mrPdb,e)) # We cannot calculate the Reforigin RMSDs or RIO scores for runs where we don't have a full initial model # to compare to the native to allow us to determine which parts of the ensemble correspond to which parts of # the native structure - or if we were unable to calculate a res_seq_map if not (amoptd['homologs'] or \ amoptd['ideal_helices'] or \ amoptd['import_ensembles'] or \ amoptd['single_model_mode'] or \ amoptd['res_seq_map']): # Get reforigin info rmsder = reforigin.ReforiginRmsd() try: rmsder.getRmsd(nativePdbInfo=amoptd['native_pdb_info'], placedPdbInfo=mrPdbInfo, refModelPdbInfo=amoptd['ref_model_pdb_info'], cAlphaOnly=True, workdir=fixpath(amoptd['benchmark_dir'])) d['reforigin_RMSD'] = rmsder.rmsd except Exception as e: logger.critical("Error calculating RMSD: {0}".format(e)) d['reforigin_RMSD'] = 999 # Score the origin with all-atom and rio rioData = rio.Rio().scoreOrigin(mrOrigin, mrPdbInfo=mrPdbInfo, nativePdbInfo=amoptd['native_pdb_info'], resSeqMap=amoptd['res_seq_map'], workdir=fixpath(amoptd['benchmark_dir']) ) # Set attributes d['AA_num_contacts'] = rioData.aaNumContacts d['RIO_num_contacts'] = rioData.rioNumContacts d['RIO_in_register'] = rioData.rioInRegister d['RIO_oo_register'] = rioData.rioOoRegister d['RIO_backwards'] = rioData.rioBackwards d['RIO'] = rioData.rioInRegister + rioData.rioOoRegister d['RIO_no_cat'] = rioData.rioNumContacts - ( rioData.rioInRegister + rioData.rioOoRegister ) d['RIO_norm'] = float(d['RIO']) / float(d['native_pdb_num_residues']) else: d['AA_num_contacts'] = None d['RIO_num_contacts'] = None d['RIO_in_register'] = None d['RIO_oo_register'] = None d['RIO_backwards'] = None d['RIO'] = None d['RIO_no_cat'] = None d['RIO_norm'] = None # # Now get the helix # helixSequence = contacts.Rio().helixFromContacts( contacts=rioData.contacts, # dsspLog=dsspLog ) # if helixSequence is not None: # ampleResult.rioHelixSequence = helixSequence # ampleResult.rioLenHelix = len( helixSequence ) # hfile = os.path.join( workdir, "{0}.helix".format( ampleResult.ensembleName ) ) # with open( hfile, 'w' ) as f: # f.write( helixSequence+"\n" ) # # This purely for checking and so we have pdbs to view # # Wrap shelxe trace onto native using Csymmatch if not d['SHELXE_pdbout'] is None and os.path.isfile(fixpath(d['SHELXE_pdbout'])): csymmatch.Csymmatch().wrapModelToNative( fixpath(d['SHELXE_pdbout']), amoptd['native_pdb'], origin=mrOrigin, workdir=fixpath(amoptd['benchmark_dir'])) if not('SHELXE_wMPE' in d and d['SHELXE_wMPE']): try: mrinfo.analyse(d['SHELXE_pdbout']) d['SHELXE_MPE'] = mrinfo.MPE d['SHELXE_wMPE'] = mrinfo.wMPE except Exception as e: logger.critical("Error analysing SHELXE_pdbout: {0}\n{1}".format(d['SHELXE_pdbout'],e)) # Wrap parse_buccaneer model onto native if d['SXRBUCC_pdbout'] and os.path.isfile(fixpath(d['SXRBUCC_pdbout'])): # Need to rename Pdb as is just called buccSX_output.pdb csymmatchPdb = os.path.join(fixpath(amoptd['benchmark_dir']), "buccaneer_{0}_csymmatch.pdb".format(d['ensemble_name'])) csymmatch.Csymmatch().wrapModelToNative( fixpath(d['SXRBUCC_pdbout']), amoptd['native_pdb'], origin=mrOrigin, csymmatchPdb=csymmatchPdb, workdir=fixpath(amoptd['benchmark_dir'])) # Calculate phase error try: mrinfo.analyse(d['SXRBUCC_pdbout']) d['SXRBUCC_MPE'] = mrinfo.MPE d['SXRBUCC_wMPE'] = mrinfo.wMPE except Exception as e: logger.critical("Error analysing SXRBUCC_pdbout: {0}\n{1}".format(d['SXRBUCC_pdbout'],e)) # Wrap parse_buccaneer model onto native if d['SXRARP_pdbout'] and os.path.isfile(fixpath(d['SXRARP_pdbout'])): # Need to rename Pdb as is just called buccSX_output.pdb csymmatchPdb = os.path.join(fixpath(amoptd['benchmark_dir']), "arpwarp_{0}_csymmatch.pdb".format(d['ensemble_name'])) csymmatch.Csymmatch().wrapModelToNative( fixpath(d['SXRARP_pdbout']), amoptd['native_pdb'], origin=mrOrigin, csymmatchPdb=csymmatchPdb, workdir=fixpath(amoptd['benchmark_dir'])) # Calculate phase error try: mrinfo.analyse(d['SXRARP_pdbout']) d['SXRARP_MPE'] = mrinfo.MPE d['SXRARP_wMPE'] = mrinfo.wMPE except Exception as e: logger.critical("Error analysing SXRARP_pdbout: {0}\n{1}".format(d['SXRARP_pdbout'],e)) return
def analysePdb(amoptd): """Collect data on the native pdb structure""" nativePdb = fixpath(amoptd['native_pdb']) nativePdbInfo = pdb_edit.get_info(nativePdb) # number atoms/residues natoms, nresidues = pdb_edit.num_atoms_and_residues(nativePdb) # Get information on the origins for this spaceGroup try: originInfo = pdb_model.OriginInfo(spaceGroupLabel=nativePdbInfo.crystalInfo.spaceGroup) except Exception: originInfo = None # Do this here as a bug in pdbcur can knacker the CRYST1 data amoptd['native_pdb_code'] = nativePdbInfo.pdbCode amoptd['native_pdb_title'] = nativePdbInfo.title amoptd['native_pdb_resolution'] = nativePdbInfo.resolution amoptd['native_pdb_solvent_content'] = nativePdbInfo.solventContent amoptd['native_pdb_matthews_coefficient'] = nativePdbInfo.matthewsCoefficient if not originInfo: space_group = "P1" else: space_group = originInfo.spaceGroup() amoptd['native_pdb_space_group'] = space_group amoptd['native_pdb_num_atoms'] = natoms amoptd['native_pdb_num_residues'] = nresidues # First check if the native has > 1 model and extract the first if so if len( nativePdbInfo.models ) > 1: logger.info("nativePdb has > 1 model - using first") nativePdb1 = ample_util.filename_append( filename=nativePdb, astr="model1", directory=fixpath(amoptd['work_dir'])) pdb_edit.extract_model( nativePdb, nativePdb1, modelID=nativePdbInfo.models[0].serial ) nativePdb = nativePdb1 # Standardise the PDB to rename any non-standard AA, remove solvent etc nativePdbStd = ample_util.filename_append( filename=nativePdb, astr="std", directory=fixpath(amoptd['work_dir'])) pdb_edit.standardise(nativePdb, nativePdbStd, del_hetatm=True) nativePdb = nativePdbStd # Get the new Info about the native nativePdbInfo = pdb_edit.get_info( nativePdb ) # For comparsion of shelxe model we need a single chain from the native so we get this here if len( nativePdbInfo.models[0].chains ) > 1: nativeChain1 = ample_util.filename_append( filename=nativePdbInfo.pdb, astr="chain1", directory=fixpath(amoptd['work_dir'])) pdb_edit.merge_chains(nativePdbInfo.pdb, nativeChain1) else: nativeChain1 = nativePdbInfo.pdb # Additional data amoptd['native_pdb_num_chains'] = len( nativePdbInfo.models[0].chains ) amoptd['native_pdb_info'] = nativePdbInfo amoptd['native_pdb_std'] = nativePdbStd amoptd['native_pdb_1chain'] = nativeChain1 amoptd['native_pdb_origin_info'] = originInfo return
def generate_ensembles(self, models, alignment_file=None, ensembles_directory=None, homolog_aligner=None, nproc=None, percent_truncation=None, side_chain_treatments=SIDE_CHAIN_TREATMENTS, truncation_method=None): if not percent_truncation: percent_truncation = self.percent_truncation if not truncation_method: truncation_method = self.truncation_method if not len(models): msg = "Cannot find any models for ensembling!" raise RuntimeError(msg) if not all([os.path.isfile(m) for m in models]): msg = "Problem reading models given to Ensembler: {0}".format( models) raise RuntimeError(msg) logger.info('Ensembling models in directory: %s', self.work_dir) # Create final ensembles directory if not os.path.isdir(self.ensembles_directory): os.mkdir(self.ensembles_directory) # standardise all the models std_models_dir = os.path.join(self.work_dir, "std_models") os.mkdir(std_models_dir) std_models = [] for m in models: std_model = ample_util.filename_append(m, 'std', std_models_dir) pdb_edit.standardise(pdbin=m, pdbout=std_model, del_hetatm=True) std_models.append(std_model) # Get a structural alignment between the different models if not alignment_file: if homolog_aligner == 'mustang': logger.info("Generating alignment file with mustang_exe: %s", self.mustang_exe) alignment_file = align_mustang(std_models, mustang_exe=self.mustang_exe, work_dir=self.work_dir) elif homolog_aligner == 'gesamt': logger.info("Generating alignment file with gesamt_exe: %s", self.gesamt_exe) alignment_file = align_gesamt(std_models, gesamt_exe=self.gesamt_exe, work_dir=self.work_dir) else: msg = "Unknown homolog_aligner: {0}".format(homolog_aligner) raise RuntimeError(msg) logger.info("Generated alignment file: %s", alignment_file) else: logger.info("Using alignment file: %s", alignment_file) truncate_dir = os.path.join(self.work_dir, "homolog_truncate") if not os.path.isdir(truncate_dir): os.mkdir(truncate_dir) # Now truncate and create ensembles - as standard ample, but with no subclustering self.ensembles = [] self.truncator = truncation_util.Truncator(work_dir=truncate_dir) self.truncator.theseus_exe = self.theseus_exe for truncation in self.truncator.truncate_models( models=std_models, truncation_method=truncation_method, percent_truncation=percent_truncation, truncation_pruning=None, homologs=True, alignment_file=alignment_file): ensemble_dir = os.path.join( truncation.directory, "ensemble_{0}".format(truncation.level)) os.mkdir(ensemble_dir) os.chdir(ensemble_dir) # Need to create an alignment file for theseus basename = "e{0}".format(truncation.level) superposed_models = self.superpose_models(truncation.models, basename=basename, work_dir=ensemble_dir, homologs=True) if not superposed_models: logger.critical( "Skipping ensemble %s due to error with Theseus", basename) continue # Create Ensemble object pre_ensemble = _ensembler.Ensemble() pre_ensemble.num_residues = truncation.num_residues pre_ensemble.truncation_dir = truncation.directory pre_ensemble.truncation_level = truncation.level pre_ensemble.truncation_method = truncation.method pre_ensemble.truncation_percent = truncation.percent pre_ensemble.truncation_residues = truncation.residues pre_ensemble.truncation_variance = truncation.variances pre_ensemble.pdb = superposed_models for ensemble in self.edit_side_chains(pre_ensemble, side_chain_treatments, homologs=True): self.ensembles.append(ensemble) return self.ensembles
def analyseSolution(amoptd, d, mrinfo): logger.info("Benchmark: analysing result: {0}".format(d['ensemble_name'])) mrPdb = None if d['MR_program'] == "PHASER": mrPdb = d['PHASER_pdbout'] mrMTZ = d['PHASER_mtzout'] elif d['MR_program'] == "MOLREP": mrPdb = d['MOLREP_pdbout'] elif d['MR_program'] == "unknown": return if mrPdb is None or not os.path.isfile(mrPdb): # logger.critical("Cannot find mrPdb {0} for solution {1}".format(mrPdb,d)) return # debug - copy into work directory as reforigin struggles with long pathnames shutil.copy( mrPdb, os.path.join(fixpath(amoptd['benchmark_dir']), os.path.basename(mrPdb))) mrPdbInfo = pdb_edit.get_info(mrPdb) d['num_placed_chains'] = mrPdbInfo.numChains() d['num_placed_atoms'] = mrPdbInfo.numAtoms() d['num_placed_CA'] = mrPdbInfo.numCalpha() if amoptd['native_pdb']: if not d['SHELXE_os']: logger.critical( "mrPdb {0} has no SHELXE_os origin shift. Calculating...". format(mrPdb)) mrinfo.analyse(mrPdb) mrOrigin = mrinfo.originShift d['SHELXE_MPE'] = mrinfo.MPE d['SHELXE_wMPE'] = mrinfo.wMPE else: mrOrigin = [c * -1 for c in d['SHELXE_os']] # Move pdb onto new origin originPdb = ample_util.filename_append(mrPdb, astr='offset', directory=fixpath( amoptd['benchmark_dir'])) pdb_edit.translate(mrPdb, originPdb, mrOrigin) # offset.pdb is the mrModel shifted onto the new origin use csymmatch to wrap onto native csymmatch.Csymmatch().wrapModelToNative( originPdb, amoptd['native_pdb'], csymmatchPdb=os.path.join( fixpath(amoptd['benchmark_dir']), "phaser_{0}_csymmatch.pdb".format(d['ensemble_name'])), ) # can now delete origin pdb os.unlink(originPdb) # Calculate phase error for the MR PDB try: mrinfo.analyse(mrPdb) d['MR_MPE'] = mrinfo.MPE d['MR_wMPE'] = mrinfo.wMPE except Exception as e: logger.critical("Error analysing mrPdb: {0}\n{1}".format(mrPdb, e)) # We cannot calculate the Reforigin RMSDs or RIO scores for runs where we don't have a full initial model # to compare to the native to allow us to determine which parts of the ensemble correspond to which parts of # the native structure - or if we were unable to calculate a res_seq_map if not (amoptd['homologs'] or amoptd['ideal_helices'] or amoptd['import_ensembles'] or amoptd['single_model_mode'] or amoptd['res_seq_map']): # Get reforigin info rmsder = reforigin.ReforiginRmsd() try: rmsder.getRmsd( nativePdbInfo=amoptd['native_pdb_info'], placedPdbInfo=mrPdbInfo, refModelPdbInfo=amoptd['ref_model_pdb_info'], cAlphaOnly=True, workdir=fixpath(amoptd['benchmark_dir']), ) d['reforigin_RMSD'] = rmsder.rmsd except Exception as e: logger.critical("Error calculating RMSD: {0}".format(e)) d['reforigin_RMSD'] = 999 # Score the origin with all-atom and rio rioData = rio.Rio().scoreOrigin( mrOrigin, mrPdbInfo=mrPdbInfo, nativePdbInfo=amoptd['native_pdb_info'], resSeqMap=amoptd['res_seq_map'], workdir=fixpath(amoptd['benchmark_dir']), ) # Set attributes d['AA_num_contacts'] = rioData.aaNumContacts d['RIO_num_contacts'] = rioData.rioNumContacts d['RIO_in_register'] = rioData.rioInRegister d['RIO_oo_register'] = rioData.rioOoRegister d['RIO_backwards'] = rioData.rioBackwards d['RIO'] = rioData.rioInRegister + rioData.rioOoRegister d['RIO_no_cat'] = rioData.rioNumContacts - (rioData.rioInRegister + rioData.rioOoRegister) d['RIO_norm'] = float(d['RIO']) / float( d['native_pdb_num_residues']) else: d['AA_num_contacts'] = None d['RIO_num_contacts'] = None d['RIO_in_register'] = None d['RIO_oo_register'] = None d['RIO_backwards'] = None d['RIO'] = None d['RIO_no_cat'] = None d['RIO_norm'] = None # # Now get the helix # helixSequence = contacts.Rio().helixFromContacts( contacts=rioData.contacts, # dsspLog=dsspLog ) # if helixSequence is not None: # ampleResult.rioHelixSequence = helixSequence # ampleResult.rioLenHelix = len( helixSequence ) # hfile = os.path.join( workdir, "{0}.helix".format( ampleResult.ensembleName ) ) # with open( hfile, 'w' ) as f: # f.write( helixSequence+"\n" ) # # This purely for checking and so we have pdbs to view # # Wrap shelxe trace onto native using Csymmatch if not d['SHELXE_pdbout'] is None and os.path.isfile( fixpath(d['SHELXE_pdbout'])): csymmatch.Csymmatch().wrapModelToNative( fixpath(d['SHELXE_pdbout']), amoptd['native_pdb'], origin=mrOrigin, workdir=fixpath(amoptd['benchmark_dir']), ) if not ('SHELXE_wMPE' in d and d['SHELXE_wMPE']): try: mrinfo.analyse(d['SHELXE_pdbout']) d['SHELXE_MPE'] = mrinfo.MPE d['SHELXE_wMPE'] = mrinfo.wMPE except Exception as e: logger.critical( "Error analysing SHELXE_pdbout: {0}\n{1}".format( d['SHELXE_pdbout'], e)) # Wrap parse_buccaneer model onto native if d['SXRBUCC_pdbout'] and os.path.isfile(fixpath( d['SXRBUCC_pdbout'])): # Need to rename Pdb as is just called buccSX_output.pdb csymmatchPdb = os.path.join( fixpath(amoptd['benchmark_dir']), "buccaneer_{0}_csymmatch.pdb".format(d['ensemble_name'])) csymmatch.Csymmatch().wrapModelToNative( fixpath(d['SXRBUCC_pdbout']), amoptd['native_pdb'], origin=mrOrigin, csymmatchPdb=csymmatchPdb, workdir=fixpath(amoptd['benchmark_dir']), ) # Calculate phase error try: mrinfo.analyse(d['SXRBUCC_pdbout']) d['SXRBUCC_MPE'] = mrinfo.MPE d['SXRBUCC_wMPE'] = mrinfo.wMPE except Exception as e: logger.critical( "Error analysing SXRBUCC_pdbout: {0}\n{1}".format( d['SXRBUCC_pdbout'], e)) # Wrap parse_buccaneer model onto native if d['SXRARP_pdbout'] and os.path.isfile(fixpath(d['SXRARP_pdbout'])): # Need to rename Pdb as is just called buccSX_output.pdb csymmatchPdb = os.path.join( fixpath(amoptd['benchmark_dir']), "arpwarp_{0}_csymmatch.pdb".format(d['ensemble_name'])) csymmatch.Csymmatch().wrapModelToNative( fixpath(d['SXRARP_pdbout']), amoptd['native_pdb'], origin=mrOrigin, csymmatchPdb=csymmatchPdb, workdir=fixpath(amoptd['benchmark_dir']), ) # Calculate phase error try: mrinfo.analyse(d['SXRARP_pdbout']) d['SXRARP_MPE'] = mrinfo.MPE d['SXRARP_wMPE'] = mrinfo.wMPE except Exception as e: logger.critical( "Error analysing SXRARP_pdbout: {0}\n{1}".format( d['SXRARP_pdbout'], e)) return