def test_resSeqMap3(self): # See if we can sort out the indexing between the native and model nativePdb = os.path.join(self.testfiles_dir,"2UUI.pdb") modelPdb = os.path.join(self.testfiles_dir,"2UUI_S_00000001.pdb") chainA = "2UUI_A.pdb" pdb_edit.extract_chain( nativePdb, chainA, chainID='A' ) chainAstd = "2UUI_A_std.pdb" pdb_edit.standardise(chainA, chainAstd) resSeqMap = residue_map.residueSequenceMap( chainA, modelPdb ) self.assertEqual( 156, resSeqMap._lenMatch() ) nativeMask = [ False ] * 155 + [ True ] self.assertEqual( resSeqMap.refCAlphaMask, nativeMask) self.assertEqual( resSeqMap.ref2target(10), 16 ) self.assertEqual( resSeqMap.target2ref(155), 149 ) # Check ends match up m1 = resSeqMap.targetResSeq[ resSeqMap.targetOffset ] n1 = resSeqMap.target2ref( m1 ) self.assertEqual( m1, resSeqMap.ref2target(n1) ) re = resSeqMap.refResSeq[ resSeqMap.refOffset + resSeqMap.lenMatch - 1 ] self.assertEqual( resSeqMap.ref2target( re ), resSeqMap.targetResSeq[ resSeqMap.targetOffset + resSeqMap.lenMatch - 1 ] ) os.unlink( chainA ) os.unlink( chainAstd )
def test_resSeqMap1(self): # See if we can sort out the indexing between the native and model resSeqMap = residue_map.residueSequenceMap() resSeqMap.targetSequence = ['G', 'G', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'F', 'F', 'F', 'F', 'F', 'F'] resSeqMap.targetResSeq = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] resSeqMap.targetCAlphaMask = [False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False] resSeqMap.refSequence = [ 'H', 'H', 'H', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'G', 'G', 'G', 'G', 'G', 'G' ] resSeqMap.refResSeq = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 ] resSeqMap.refCAlphaMask = [False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False] resSeqMap._calc_map() self.assertEqual( resSeqMap.targetOffset, 2) self.assertEqual( resSeqMap.refOffset, 3) self.assertEqual( resSeqMap._lenMatch(), 10) self.assertEqual( resSeqMap.ref2target( 0 ), -6 ) self.assertEqual( resSeqMap.ref2target( 3 ), -3 ) self.assertEqual( resSeqMap.target2ref( 1 ), 7 ) self.assertEqual( resSeqMap.target2ref( 12 ), 18 ) self.assertEqual( resSeqMap.target2ref( 6 ), 12 ) self.assertEqual( resSeqMap.targetIncomparable(), [-5,-4,-1, 0, 7, 8, 9, 10, 11, 12] ) self.assertEqual( resSeqMap.refIncomparable(), [ 0, 1, 2, 5, 6, 12, 13, 14, 15, 16, 17, 18 ] ) # Check ends match up m1 = resSeqMap.targetResSeq[ resSeqMap.targetOffset ] n1 = resSeqMap.target2ref( m1 ) self.assertEqual( m1, resSeqMap.ref2target(n1) ) re = resSeqMap.refResSeq[ resSeqMap.refOffset + resSeqMap.lenMatch - 1 ] self.assertEqual( resSeqMap.ref2target( re ), resSeqMap.targetResSeq[ resSeqMap.targetOffset + resSeqMap.lenMatch - 1 ] )
def analyseModels(amoptd): # Get hold of a full model so we can do the mapping of residues refModelPdb = glob.glob(os.path.join(amoptd['models_dir'], "*.pdb"))[0] nativePdbInfo=amoptd['native_pdb_info'] refModelPdbInfo = pdb_edit.get_info(refModelPdb) amoptd['ref_model_pdb_info']=refModelPdbInfo try: resSeqMap = residue_map.residueSequenceMap() resSeqMap.fromInfo( refInfo=refModelPdbInfo, refChainID=refModelPdbInfo.models[0].chains[0], # Only 1 chain in model targetInfo=nativePdbInfo, targetChainID=nativePdbInfo.models[0].chains[0] ) amoptd['res_seq_map'] = resSeqMap except Exception as e: logger.exception("Error calculating resSeqMap: %s" % e) amoptd['res_seq_map'] = None # Won't be able to calculate RIO scores if amoptd['have_tmscore']: try: tm = tm_util.TMscore(amoptd['tmscore_exe'], wdir=fixpath(amoptd['benchmark_dir'])) # Calculation of TMscores for all models logger.info("Analysing Rosetta models with TMscore") model_list = sorted(glob.glob(os.path.join(amoptd['models_dir'], "*pdb"))) structure_list = [amoptd['native_pdb_std']] amoptd['tmComp'] = tm.compare_structures(model_list, structure_list, fastas=[amoptd['fasta']]) except Exception as e: logger.exception("Unable to run TMscores: %s", e) else: raise RuntimeError("No program to calculate TMSCORES")
def test_resSeqMap3(self): # See if we can sort out the indexing between the native and model nativePdb = os.path.join(self.testfiles_dir, "2UUI.pdb") modelPdb = os.path.join(self.testfiles_dir, "2UUI_S_00000001.pdb") chainA = "2UUI_A.pdb" pdb_edit.extract_chain(nativePdb, chainA, chainID='A') chainAstd = "2UUI_A_std.pdb" pdb_edit.standardise(chainA, chainAstd) resSeqMap = residue_map.residueSequenceMap(chainA, modelPdb) self.assertEqual(156, resSeqMap._lenMatch()) nativeMask = [False] * 155 + [True] self.assertEqual(resSeqMap.refCAlphaMask, nativeMask) self.assertEqual(resSeqMap.ref2target(10), 16) self.assertEqual(resSeqMap.target2ref(155), 149) # Check ends match up m1 = resSeqMap.targetResSeq[resSeqMap.targetOffset] n1 = resSeqMap.target2ref(m1) self.assertEqual(m1, resSeqMap.ref2target(n1)) re = resSeqMap.refResSeq[resSeqMap.refOffset + resSeqMap.lenMatch - 1] self.assertEqual( resSeqMap.ref2target(re), resSeqMap.targetResSeq[resSeqMap.targetOffset + resSeqMap.lenMatch - 1] ) os.unlink(chainA) os.unlink(chainAstd)
def test_resSeqMap2(self): # See if we can sort out the indexing between the native and model nativePdb = os.path.join(self.testfiles_dir,"2XOV.pdb") modelPdb = os.path.join(self.testfiles_dir,"2XOV_S_00000001.pdb") resSeqMap = residue_map.residueSequenceMap( nativePdb, modelPdb ) self.assertEqual( 181, resSeqMap._lenMatch() ) # Check ends match up m1 = resSeqMap.targetResSeq[ resSeqMap.targetOffset ] n1 = resSeqMap.target2ref( m1 ) self.assertEqual( m1, resSeqMap.ref2target(n1) ) re = resSeqMap.refResSeq[ resSeqMap.refOffset + resSeqMap.lenMatch - 1 ] self.assertEqual( resSeqMap.ref2target( re ), resSeqMap.targetResSeq[ resSeqMap.targetOffset + resSeqMap.lenMatch - 1 ] )
def test_resSeqMap4(self): # See if we can sort out the indexing between the native and model nativePdb = os.path.join(self.testfiles_dir, "1K33.pdb") modelPdb = os.path.join(self.testfiles_dir, "1K33_S_00000001.pdb") nativePdbStd = "1K33_std.pdb" pdb_edit.standardise(nativePdb, nativePdbStd) nativeInfo = pdb_edit.get_info(nativePdbStd) modelInfo = pdb_edit.get_info(modelPdb) resSeqMap = residue_map.residueSequenceMap() resSeqMap.fromInfo(nativeInfo, 'A', modelInfo, 'A') os.unlink(nativePdbStd)
def test_resSeqMap4(self): # See if we can sort out the indexing between the native and model nativePdb = os.path.join(self.testfiles_dir,"1K33.pdb") modelPdb = os.path.join(self.testfiles_dir,"1K33_S_00000001.pdb") nativePdbStd = "1K33_std.pdb" pdb_edit.standardise( nativePdb, nativePdbStd ) nativeInfo = pdb_edit.get_info( nativePdbStd ) modelInfo = pdb_edit.get_info( modelPdb ) resSeqMap = residue_map.residueSequenceMap( ) resSeqMap.fromInfo( nativeInfo, 'A', modelInfo, 'A' ) os.unlink( nativePdbStd )
def test_resSeqMap2(self): # See if we can sort out the indexing between the native and model nativePdb = os.path.join(self.testfiles_dir, "2XOV.pdb") modelPdb = os.path.join(self.testfiles_dir, "2XOV_S_00000001.pdb") resSeqMap = residue_map.residueSequenceMap(nativePdb, modelPdb) self.assertEqual(181, resSeqMap._lenMatch()) # Check ends match up m1 = resSeqMap.targetResSeq[resSeqMap.targetOffset] n1 = resSeqMap.target2ref(m1) self.assertEqual(m1, resSeqMap.ref2target(n1)) re = resSeqMap.refResSeq[resSeqMap.refOffset + resSeqMap.lenMatch - 1] self.assertEqual( resSeqMap.ref2target(re), resSeqMap.targetResSeq[resSeqMap.targetOffset + resSeqMap.lenMatch - 1] )
def analyseModels(amoptd): # Get hold of a full model so we can do the mapping of residues refModelPdb = glob.glob(os.path.join(amoptd['models_dir'], "*.pdb"))[0] nativePdbInfo = amoptd['native_pdb_info'] resSeqMap = residue_map.residueSequenceMap() refModelPdbInfo = pdb_edit.get_info(refModelPdb) resSeqMap.fromInfo( refInfo=refModelPdbInfo, refChainID=refModelPdbInfo.models[0]. chains[0], # Only 1 chain in model targetInfo=nativePdbInfo, targetChainID=nativePdbInfo.models[0].chains[0]) amoptd['res_seq_map'] = resSeqMap amoptd['ref_model_pdb_info'] = refModelPdbInfo if amoptd['have_tmscore']: try: tm = tm_util.TMscore(amoptd['tmscore_exe'], wdir=fixpath(amoptd['benchmark_dir'])) # Calculation of TMscores for all models logger.info("Analysing Rosetta models with TMscore") model_list = sorted( glob.glob(os.path.join(amoptd['models_dir'], "*pdb"))) structure_list = [amoptd['native_pdb_std']] amoptd['tmComp'] = tm.compare_structures(model_list, structure_list, fastas=[amoptd['fasta']]) except Exception as e: msg = "Unable to run TMscores: {0}".format(e) logger.critical(msg) else: global _MAXCLUSTERER # setting a module-level variable so need to use global keyword to it doesn't become a local variable _MAXCLUSTERER = maxcluster.Maxcluster(amoptd['maxcluster_exe']) logger.info("Analysing Rosetta models with Maxcluster") _MAXCLUSTERER.compareDirectory(nativePdbInfo=nativePdbInfo, resSeqMap=resSeqMap, modelsDirectory=amoptd['models_dir'], workdir=fixpath( amoptd['benchmark_dir'])) return
def match_resseq(targetPdb=None, outPdb=None, resMap=None, sourcePdb=None): assert sourcePdb or resMap assert not (sourcePdb and resMap) if not resMap: resMap = residue_map.residueSequenceMap(targetPdb, sourcePdb) chain = None # The chain we're reading with open(targetPdb, 'r') as target, open(outPdb, 'w') as out: for line in target: if line.startswith("MODEL"): raise RuntimeError("Multi-model file!") if line.startswith("ANISOU"): raise RuntimeError( "I cannot cope with ANISOU! {0}".format(line)) # Stop at TER if line.startswith("TER"): pass if line.startswith("ATOM"): atom = pdb_model.PdbAtom(line) # First atom/chain if chain == None: chain = atom.chainID if atom.chainID != chain: pass # Get the matching resSeq for the model modelResSeq = resMap.ref2target(atom.resSeq) if modelResSeq == atom.resSeq: out.write(line) else: atom.resSeq = modelResSeq out.write(atom.toLine() + "\n") continue out.write(line)
def analyseModels(amoptd): # Get hold of a full model so we can do the mapping of residues refModelPdb = glob.glob(os.path.join(amoptd['models_dir'], "*.pdb"))[0] nativePdbInfo = amoptd['native_pdb_info'] refModelPdbInfo = pdb_edit.get_info(refModelPdb) amoptd['ref_model_pdb_info'] = refModelPdbInfo try: resSeqMap = residue_map.residueSequenceMap() resSeqMap.fromInfo( refInfo=refModelPdbInfo, refChainID=refModelPdbInfo.models[0]. chains[0], # Only 1 chain in model targetInfo=nativePdbInfo, targetChainID=nativePdbInfo.models[0].chains[0], ) amoptd['res_seq_map'] = resSeqMap except Exception as e: logger.exception("Error calculating resSeqMap: %s" % e) amoptd['res_seq_map'] = None # Won't be able to calculate RIO scores if amoptd['have_tmscore']: try: tm = tm_util.TMscore(amoptd['tmscore_exe'], wdir=fixpath(amoptd['benchmark_dir'])) # Calculation of TMscores for all models logger.info("Analysing Rosetta models with TMscore") model_list = sorted( glob.glob(os.path.join(amoptd['models_dir'], "*pdb"))) structure_list = [amoptd['native_pdb_std']] amoptd['tmComp'] = tm.compare_structures(model_list, structure_list, fastas=[amoptd['fasta']]) except Exception as e: logger.exception("Unable to run TMscores: %s", e) else: raise RuntimeError("No program to calculate TMSCORES")
def getRmsd(self, nativePdbInfo=None, placedPdbInfo=None, refModelPdbInfo=None, workdir=None, cAlphaOnly=True): """For now just save lowest rmsd - can look at collecting more nativeInfo later Currently we assume we are only given one model and that it has already been standardised. """ if workdir: self.workdir = workdir if not self.workdir: self.workdir = os.getcwd() self.cAlphaOnly = cAlphaOnly # Whether to only compare c-alpha atoms # Run a pass to find the # chains native_chains = nativePdbInfo.models[0].chains placed_chains = placedPdbInfo.models[0].chains rmsds = { } # dict of rmsd -> ( chainIDnative, chainIDrefined, reforiginLogfile ) # Match each chain in native against refined and pick the best for nativeChainID in native_chains: if len(native_chains) == 1: # Don't need to do owt as we are just using the native as is nativeChainPdb = nativePdbInfo.pdb else: # Extract the chain from the pdb astr = "chain{0}".format(nativeChainID) nativeChainPdb = ample_util.filename_append( filename=nativePdbInfo.pdb, astr=astr, directory=self.workdir) pdb_edit.extract_chain(nativePdbInfo.pdb, nativeChainPdb, chainID=nativeChainID) # Calculate the RefSeqMap - need to do this before we reduce to c-alphas # The second chain may be a different composition to the first, so we only generate a traceback if we fail # on the first chain. The model only has one chain, so the residueMap has to be the same for all the chains try: resSeqMap = residue_map.residueSequenceMap() resSeqMap.fromInfo( refInfo=nativePdbInfo, refChainID=nativeChainID, targetInfo=refModelPdbInfo, targetChainID='A' # Model only has one chain ) except RuntimeError: if nativeChainID == native_chains[0]: raise Exception else: # Only compare the first chain break for placedChainID in placed_chains: # Prepare the placed PDB placedChainPdb = self.preparePlacedPdb( placedPdb=placedPdbInfo.pdb, placedChainID=placedChainID, nativeChainID=nativeChainID, resSeqMap=resSeqMap) # Now create a PDB with the matching atoms from native that are in refined nativePdbMatch = ample_util.filename_append( filename=nativeChainPdb, astr="matched", directory=self.workdir) pdb_edit.keep_matching(refpdb=placedChainPdb, targetpdb=nativeChainPdb, outpdb=nativePdbMatch, resSeqMap=resSeqMap) # Now get the rmsd astr = "chain{0}_reforigin".format(nativeChainID) reforiginOut = ample_util.filename_append( filename=placedChainPdb, astr=astr, directory=self.workdir) try: rms = self.calculate(refpdb=nativePdbMatch, targetpdb=placedChainPdb, outpdb=reforiginOut) except RuntimeError as e: logger.critical( "GOT REFORIGIN ERROR for {0},{1},{2}\n{3}".format( placedChainPdb, nativeChainPdb, nativeChainID, e)) rms = 99999 rmsds[rms] = (nativeChainID, placedChainID, reforiginOut) # Clean up os.unlink(placedChainPdb) os.unlink(nativePdbMatch) # Now pick the best... rmsd = sorted(rmsds.keys())[0] self.rmsd = rmsd self.bestNativeChain = rmsds[rmsd][0] self.bestPlacedChain = rmsds[rmsd][1] self.bestReforiginPdb = rmsds[rmsd][2] for k in rmsds.keys(): if k != rmsd: try: os.unlink(rmsds[k][2]) except Exception: pass
def test_resSeqMap1(self): # See if we can sort out the indexing between the native and model resSeqMap = residue_map.residueSequenceMap() resSeqMap.targetSequence = [ 'G', 'G', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'F', 'F', 'F', 'F', 'F', 'F', ] resSeqMap.targetResSeq = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] resSeqMap.targetCAlphaMask = [ False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, ] resSeqMap.refSequence = [ 'H', 'H', 'H', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'G', 'G', 'G', 'G', 'G', 'G', ] resSeqMap.refResSeq = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] resSeqMap.refCAlphaMask = [ False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, ] resSeqMap._calc_map() self.assertEqual(resSeqMap.targetOffset, 2) self.assertEqual(resSeqMap.refOffset, 3) self.assertEqual(resSeqMap._lenMatch(), 10) self.assertEqual(resSeqMap.ref2target(0), -6) self.assertEqual(resSeqMap.ref2target(3), -3) self.assertEqual(resSeqMap.target2ref(1), 7) self.assertEqual(resSeqMap.target2ref(12), 18) self.assertEqual(resSeqMap.target2ref(6), 12) self.assertEqual(resSeqMap.targetIncomparable(), [-5, -4, -1, 0, 7, 8, 9, 10, 11, 12]) self.assertEqual(resSeqMap.refIncomparable(), [0, 1, 2, 5, 6, 12, 13, 14, 15, 16, 17, 18]) # Check ends match up m1 = resSeqMap.targetResSeq[resSeqMap.targetOffset] n1 = resSeqMap.target2ref(m1) self.assertEqual(m1, resSeqMap.ref2target(n1)) re = resSeqMap.refResSeq[resSeqMap.refOffset + resSeqMap.lenMatch - 1] self.assertEqual( resSeqMap.ref2target(re), resSeqMap.targetResSeq[resSeqMap.targetOffset + resSeqMap.lenMatch - 1] )