Пример #1
0
    def test_resSeqMap3(self):
        # See if we can sort out the indexing between the native and model
        
        nativePdb = os.path.join(self.testfiles_dir,"2UUI.pdb")
        modelPdb = os.path.join(self.testfiles_dir,"2UUI_S_00000001.pdb")
        
        chainA = "2UUI_A.pdb"
        pdb_edit.extract_chain( nativePdb, chainA, chainID='A' )
        chainAstd = "2UUI_A_std.pdb"
        pdb_edit.standardise(chainA, chainAstd)
        
        resSeqMap = residue_map.residueSequenceMap( chainA, modelPdb )
        
        self.assertEqual( 156, resSeqMap._lenMatch() )

        
        nativeMask = [ False ] * 155 + [ True ]
        self.assertEqual( resSeqMap.refCAlphaMask, nativeMask)
        
        self.assertEqual( resSeqMap.ref2target(10), 16  )
        self.assertEqual( resSeqMap.target2ref(155), 149 )
        
        # Check ends match up
        m1 = resSeqMap.targetResSeq[ resSeqMap.targetOffset ]
        n1 = resSeqMap.target2ref( m1 )
        self.assertEqual( m1, resSeqMap.ref2target(n1) )
        re = resSeqMap.refResSeq[ resSeqMap.refOffset + resSeqMap.lenMatch - 1  ]
        self.assertEqual( resSeqMap.ref2target( re ), resSeqMap.targetResSeq[ resSeqMap.targetOffset + resSeqMap.lenMatch - 1  ] )
        
        os.unlink( chainA )
        os.unlink( chainAstd )
Пример #2
0
    def test_resSeqMap1(self):
        # See if we can sort out the indexing between the native and model

        resSeqMap = residue_map.residueSequenceMap()
        
        resSeqMap.targetSequence = ['G', 'G', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'F', 'F', 'F', 'F', 'F', 'F']
        resSeqMap.targetResSeq = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
        resSeqMap.targetCAlphaMask = [False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False]
        
        resSeqMap.refSequence = [ 'H', 'H', 'H', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'G', 'G', 'G', 'G', 'G', 'G' ]
        resSeqMap.refResSeq = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 ]
        resSeqMap.refCAlphaMask = [False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False]
        
        resSeqMap._calc_map()
        
        self.assertEqual( resSeqMap.targetOffset, 2)
        self.assertEqual( resSeqMap.refOffset, 3)
        self.assertEqual( resSeqMap._lenMatch(), 10)
        
        self.assertEqual( resSeqMap.ref2target( 0 ), -6 )
        self.assertEqual( resSeqMap.ref2target( 3 ), -3 )
        
        self.assertEqual( resSeqMap.target2ref( 1 ), 7 )
        self.assertEqual( resSeqMap.target2ref( 12 ), 18 )
        self.assertEqual( resSeqMap.target2ref( 6 ), 12 )
        
        self.assertEqual( resSeqMap.targetIncomparable(), [-5,-4,-1, 0, 7, 8, 9, 10, 11, 12] )
        self.assertEqual( resSeqMap.refIncomparable(), [ 0, 1, 2, 5, 6, 12, 13, 14, 15, 16, 17, 18 ] )
        
        # Check ends match up
        m1 = resSeqMap.targetResSeq[ resSeqMap.targetOffset ]
        n1 = resSeqMap.target2ref( m1 )
        self.assertEqual( m1, resSeqMap.ref2target(n1) )
        re = resSeqMap.refResSeq[ resSeqMap.refOffset + resSeqMap.lenMatch - 1  ]
        self.assertEqual( resSeqMap.ref2target( re ), resSeqMap.targetResSeq[ resSeqMap.targetOffset + resSeqMap.lenMatch - 1  ] )
Пример #3
0
def analyseModels(amoptd):
    
    # Get hold of a full model so we can do the mapping of residues
    refModelPdb = glob.glob(os.path.join(amoptd['models_dir'], "*.pdb"))[0]
    
    nativePdbInfo=amoptd['native_pdb_info']
    refModelPdbInfo = pdb_edit.get_info(refModelPdb)
    amoptd['ref_model_pdb_info']=refModelPdbInfo
    try:
        resSeqMap = residue_map.residueSequenceMap()
        resSeqMap.fromInfo( refInfo=refModelPdbInfo,
                            refChainID=refModelPdbInfo.models[0].chains[0], # Only 1 chain in model
                            targetInfo=nativePdbInfo,
                            targetChainID=nativePdbInfo.models[0].chains[0]
                          )
        amoptd['res_seq_map'] = resSeqMap
    except Exception as e:
        logger.exception("Error calculating resSeqMap: %s" % e)
        amoptd['res_seq_map']  = None # Won't be able to calculate RIO scores

    if amoptd['have_tmscore']:
        try:
            tm = tm_util.TMscore(amoptd['tmscore_exe'], wdir=fixpath(amoptd['benchmark_dir']))
            # Calculation of TMscores for all models
            logger.info("Analysing Rosetta models with TMscore")
            model_list = sorted(glob.glob(os.path.join(amoptd['models_dir'], "*pdb")))
            structure_list = [amoptd['native_pdb_std']]
            amoptd['tmComp'] = tm.compare_structures(model_list, structure_list, fastas=[amoptd['fasta']])
        except Exception as e:
            logger.exception("Unable to run TMscores: %s", e)
    else:
        raise RuntimeError("No program to calculate TMSCORES")
Пример #4
0
    def test_resSeqMap3(self):
        # See if we can sort out the indexing between the native and model

        nativePdb = os.path.join(self.testfiles_dir, "2UUI.pdb")
        modelPdb = os.path.join(self.testfiles_dir, "2UUI_S_00000001.pdb")

        chainA = "2UUI_A.pdb"
        pdb_edit.extract_chain(nativePdb, chainA, chainID='A')
        chainAstd = "2UUI_A_std.pdb"
        pdb_edit.standardise(chainA, chainAstd)

        resSeqMap = residue_map.residueSequenceMap(chainA, modelPdb)

        self.assertEqual(156, resSeqMap._lenMatch())

        nativeMask = [False] * 155 + [True]
        self.assertEqual(resSeqMap.refCAlphaMask, nativeMask)

        self.assertEqual(resSeqMap.ref2target(10), 16)
        self.assertEqual(resSeqMap.target2ref(155), 149)

        # Check ends match up
        m1 = resSeqMap.targetResSeq[resSeqMap.targetOffset]
        n1 = resSeqMap.target2ref(m1)
        self.assertEqual(m1, resSeqMap.ref2target(n1))
        re = resSeqMap.refResSeq[resSeqMap.refOffset + resSeqMap.lenMatch - 1]
        self.assertEqual(
            resSeqMap.ref2target(re), resSeqMap.targetResSeq[resSeqMap.targetOffset + resSeqMap.lenMatch - 1]
        )

        os.unlink(chainA)
        os.unlink(chainAstd)
Пример #5
0
 def test_resSeqMap2(self):
     # See if we can sort out the indexing between the native and model
     
     
     nativePdb = os.path.join(self.testfiles_dir,"2XOV.pdb")
     modelPdb = os.path.join(self.testfiles_dir,"2XOV_S_00000001.pdb")
     
     resSeqMap = residue_map.residueSequenceMap( nativePdb, modelPdb )
     
     self.assertEqual( 181, resSeqMap._lenMatch() )
     # Check ends match up
     m1 = resSeqMap.targetResSeq[ resSeqMap.targetOffset ]
     n1 = resSeqMap.target2ref( m1 )
     self.assertEqual( m1, resSeqMap.ref2target(n1) )
     re = resSeqMap.refResSeq[ resSeqMap.refOffset + resSeqMap.lenMatch - 1  ]
     self.assertEqual( resSeqMap.ref2target( re ), resSeqMap.targetResSeq[ resSeqMap.targetOffset + resSeqMap.lenMatch - 1  ] )
Пример #6
0
    def test_resSeqMap4(self):
        # See if we can sort out the indexing between the native and model

        nativePdb = os.path.join(self.testfiles_dir, "1K33.pdb")
        modelPdb = os.path.join(self.testfiles_dir, "1K33_S_00000001.pdb")

        nativePdbStd = "1K33_std.pdb"
        pdb_edit.standardise(nativePdb, nativePdbStd)

        nativeInfo = pdb_edit.get_info(nativePdbStd)
        modelInfo = pdb_edit.get_info(modelPdb)

        resSeqMap = residue_map.residueSequenceMap()
        resSeqMap.fromInfo(nativeInfo, 'A', modelInfo, 'A')

        os.unlink(nativePdbStd)
Пример #7
0
 def test_resSeqMap4(self):
     # See if we can sort out the indexing between the native and model
     
     
     nativePdb = os.path.join(self.testfiles_dir,"1K33.pdb")
     modelPdb = os.path.join(self.testfiles_dir,"1K33_S_00000001.pdb")
     
     nativePdbStd = "1K33_std.pdb"
     pdb_edit.standardise( nativePdb, nativePdbStd )
     
     nativeInfo = pdb_edit.get_info( nativePdbStd )
     modelInfo = pdb_edit.get_info( modelPdb )
     
     resSeqMap = residue_map.residueSequenceMap( )
     resSeqMap.fromInfo( nativeInfo, 'A', modelInfo, 'A' )
     
     os.unlink( nativePdbStd )
Пример #8
0
    def test_resSeqMap2(self):
        # See if we can sort out the indexing between the native and model

        nativePdb = os.path.join(self.testfiles_dir, "2XOV.pdb")
        modelPdb = os.path.join(self.testfiles_dir, "2XOV_S_00000001.pdb")

        resSeqMap = residue_map.residueSequenceMap(nativePdb, modelPdb)

        self.assertEqual(181, resSeqMap._lenMatch())
        # Check ends match up
        m1 = resSeqMap.targetResSeq[resSeqMap.targetOffset]
        n1 = resSeqMap.target2ref(m1)
        self.assertEqual(m1, resSeqMap.ref2target(n1))
        re = resSeqMap.refResSeq[resSeqMap.refOffset + resSeqMap.lenMatch - 1]
        self.assertEqual(
            resSeqMap.ref2target(re), resSeqMap.targetResSeq[resSeqMap.targetOffset + resSeqMap.lenMatch - 1]
        )
Пример #9
0
def analyseModels(amoptd):

    # Get hold of a full model so we can do the mapping of residues
    refModelPdb = glob.glob(os.path.join(amoptd['models_dir'], "*.pdb"))[0]

    nativePdbInfo = amoptd['native_pdb_info']

    resSeqMap = residue_map.residueSequenceMap()
    refModelPdbInfo = pdb_edit.get_info(refModelPdb)
    resSeqMap.fromInfo(
        refInfo=refModelPdbInfo,
        refChainID=refModelPdbInfo.models[0].
        chains[0],  # Only 1 chain in model
        targetInfo=nativePdbInfo,
        targetChainID=nativePdbInfo.models[0].chains[0])
    amoptd['res_seq_map'] = resSeqMap
    amoptd['ref_model_pdb_info'] = refModelPdbInfo

    if amoptd['have_tmscore']:
        try:
            tm = tm_util.TMscore(amoptd['tmscore_exe'],
                                 wdir=fixpath(amoptd['benchmark_dir']))
            # Calculation of TMscores for all models
            logger.info("Analysing Rosetta models with TMscore")
            model_list = sorted(
                glob.glob(os.path.join(amoptd['models_dir'], "*pdb")))
            structure_list = [amoptd['native_pdb_std']]
            amoptd['tmComp'] = tm.compare_structures(model_list,
                                                     structure_list,
                                                     fastas=[amoptd['fasta']])
        except Exception as e:
            msg = "Unable to run TMscores: {0}".format(e)
            logger.critical(msg)
    else:
        global _MAXCLUSTERER  # setting a module-level variable so need to use global keyword to it doesn't become a local variable
        _MAXCLUSTERER = maxcluster.Maxcluster(amoptd['maxcluster_exe'])
        logger.info("Analysing Rosetta models with Maxcluster")
        _MAXCLUSTERER.compareDirectory(nativePdbInfo=nativePdbInfo,
                                       resSeqMap=resSeqMap,
                                       modelsDirectory=amoptd['models_dir'],
                                       workdir=fixpath(
                                           amoptd['benchmark_dir']))
    return
Пример #10
0
def match_resseq(targetPdb=None, outPdb=None, resMap=None, sourcePdb=None):
    assert sourcePdb or resMap
    assert not (sourcePdb and resMap)
    if not resMap:
        resMap = residue_map.residueSequenceMap(targetPdb, sourcePdb)
    chain = None  # The chain we're reading

    with open(targetPdb, 'r') as target, open(outPdb, 'w') as out:
        for line in target:

            if line.startswith("MODEL"):
                raise RuntimeError("Multi-model file!")

            if line.startswith("ANISOU"):
                raise RuntimeError(
                    "I cannot cope with ANISOU! {0}".format(line))

            # Stop at TER
            if line.startswith("TER"):
                pass

            if line.startswith("ATOM"):
                atom = pdb_model.PdbAtom(line)

                # First atom/chain
                if chain == None:
                    chain = atom.chainID

                if atom.chainID != chain:
                    pass

                # Get the matching resSeq for the model
                modelResSeq = resMap.ref2target(atom.resSeq)
                if modelResSeq == atom.resSeq:
                    out.write(line)
                else:
                    atom.resSeq = modelResSeq
                    out.write(atom.toLine() + "\n")
                continue
            out.write(line)
Пример #11
0
def analyseModels(amoptd):

    # Get hold of a full model so we can do the mapping of residues
    refModelPdb = glob.glob(os.path.join(amoptd['models_dir'], "*.pdb"))[0]

    nativePdbInfo = amoptd['native_pdb_info']
    refModelPdbInfo = pdb_edit.get_info(refModelPdb)
    amoptd['ref_model_pdb_info'] = refModelPdbInfo
    try:
        resSeqMap = residue_map.residueSequenceMap()
        resSeqMap.fromInfo(
            refInfo=refModelPdbInfo,
            refChainID=refModelPdbInfo.models[0].
            chains[0],  # Only 1 chain in model
            targetInfo=nativePdbInfo,
            targetChainID=nativePdbInfo.models[0].chains[0],
        )
        amoptd['res_seq_map'] = resSeqMap
    except Exception as e:
        logger.exception("Error calculating resSeqMap: %s" % e)
        amoptd['res_seq_map'] = None  # Won't be able to calculate RIO scores

    if amoptd['have_tmscore']:
        try:
            tm = tm_util.TMscore(amoptd['tmscore_exe'],
                                 wdir=fixpath(amoptd['benchmark_dir']))
            # Calculation of TMscores for all models
            logger.info("Analysing Rosetta models with TMscore")
            model_list = sorted(
                glob.glob(os.path.join(amoptd['models_dir'], "*pdb")))
            structure_list = [amoptd['native_pdb_std']]
            amoptd['tmComp'] = tm.compare_structures(model_list,
                                                     structure_list,
                                                     fastas=[amoptd['fasta']])
        except Exception as e:
            logger.exception("Unable to run TMscores: %s", e)
    else:
        raise RuntimeError("No program to calculate TMSCORES")
Пример #12
0
    def getRmsd(self,
                nativePdbInfo=None,
                placedPdbInfo=None,
                refModelPdbInfo=None,
                workdir=None,
                cAlphaOnly=True):
        """For now just save lowest rmsd - can look at collecting more nativeInfo later
        
        Currently we assume we are only given one model and that it has already been standardised.
        """

        if workdir:
            self.workdir = workdir
        if not self.workdir:
            self.workdir = os.getcwd()

        self.cAlphaOnly = cAlphaOnly  # Whether to only compare c-alpha atoms

        # Run a pass to find the # chains
        native_chains = nativePdbInfo.models[0].chains
        placed_chains = placedPdbInfo.models[0].chains

        rmsds = {
        }  # dict of rmsd -> ( chainIDnative, chainIDrefined, reforiginLogfile )

        # Match each chain in native against refined and pick the best
        for nativeChainID in native_chains:

            if len(native_chains) == 1:
                # Don't need to do owt as we are just using the native as is
                nativeChainPdb = nativePdbInfo.pdb
            else:
                # Extract the chain from the pdb
                astr = "chain{0}".format(nativeChainID)
                nativeChainPdb = ample_util.filename_append(
                    filename=nativePdbInfo.pdb,
                    astr=astr,
                    directory=self.workdir)
                pdb_edit.extract_chain(nativePdbInfo.pdb,
                                       nativeChainPdb,
                                       chainID=nativeChainID)

            # Calculate the RefSeqMap - need to do this before we reduce to c-alphas
            # The second chain may be a different composition to the first, so we only generate a traceback if we fail
            # on the first chain. The model only has one chain, so the residueMap has to be the same for all the chains
            try:
                resSeqMap = residue_map.residueSequenceMap()
                resSeqMap.fromInfo(
                    refInfo=nativePdbInfo,
                    refChainID=nativeChainID,
                    targetInfo=refModelPdbInfo,
                    targetChainID='A'  # Model only has one chain
                )

            except RuntimeError:
                if nativeChainID == native_chains[0]:
                    raise Exception
                else:
                    # Only compare the first chain
                    break

            for placedChainID in placed_chains:

                # Prepare the placed PDB
                placedChainPdb = self.preparePlacedPdb(
                    placedPdb=placedPdbInfo.pdb,
                    placedChainID=placedChainID,
                    nativeChainID=nativeChainID,
                    resSeqMap=resSeqMap)

                # Now create a PDB with the matching atoms from native that are in refined
                nativePdbMatch = ample_util.filename_append(
                    filename=nativeChainPdb,
                    astr="matched",
                    directory=self.workdir)
                pdb_edit.keep_matching(refpdb=placedChainPdb,
                                       targetpdb=nativeChainPdb,
                                       outpdb=nativePdbMatch,
                                       resSeqMap=resSeqMap)

                # Now get the rmsd
                astr = "chain{0}_reforigin".format(nativeChainID)
                reforiginOut = ample_util.filename_append(
                    filename=placedChainPdb, astr=astr, directory=self.workdir)

                try:
                    rms = self.calculate(refpdb=nativePdbMatch,
                                         targetpdb=placedChainPdb,
                                         outpdb=reforiginOut)
                except RuntimeError as e:
                    logger.critical(
                        "GOT REFORIGIN ERROR for {0},{1},{2}\n{3}".format(
                            placedChainPdb, nativeChainPdb, nativeChainID, e))
                    rms = 99999
                rmsds[rms] = (nativeChainID, placedChainID, reforiginOut)
                # Clean up
                os.unlink(placedChainPdb)
                os.unlink(nativePdbMatch)
        # Now pick the best...
        rmsd = sorted(rmsds.keys())[0]

        self.rmsd = rmsd
        self.bestNativeChain = rmsds[rmsd][0]
        self.bestPlacedChain = rmsds[rmsd][1]
        self.bestReforiginPdb = rmsds[rmsd][2]

        for k in rmsds.keys():
            if k != rmsd:
                try:
                    os.unlink(rmsds[k][2])
                except Exception:
                    pass
Пример #13
0
    def test_resSeqMap1(self):
        # See if we can sort out the indexing between the native and model

        resSeqMap = residue_map.residueSequenceMap()

        resSeqMap.targetSequence = [
            'G',
            'G',
            'A',
            'A',
            'A',
            'A',
            'A',
            'A',
            'A',
            'A',
            'A',
            'A',
            'F',
            'F',
            'F',
            'F',
            'F',
            'F',
        ]
        resSeqMap.targetResSeq = [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
        resSeqMap.targetCAlphaMask = [
            False,
            False,
            False,
            False,
            True,
            False,
            False,
            False,
            False,
            False,
            False,
            False,
            False,
            False,
            False,
            False,
            False,
            False,
        ]

        resSeqMap.refSequence = [
            'H',
            'H',
            'H',
            'A',
            'A',
            'A',
            'A',
            'A',
            'A',
            'A',
            'A',
            'A',
            'A',
            'G',
            'G',
            'G',
            'G',
            'G',
            'G',
        ]
        resSeqMap.refResSeq = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]
        resSeqMap.refCAlphaMask = [
            False,
            False,
            False,
            False,
            False,
            False,
            True,
            False,
            False,
            False,
            False,
            False,
            False,
            False,
            False,
            False,
            False,
            False,
            False,
        ]

        resSeqMap._calc_map()

        self.assertEqual(resSeqMap.targetOffset, 2)
        self.assertEqual(resSeqMap.refOffset, 3)
        self.assertEqual(resSeqMap._lenMatch(), 10)

        self.assertEqual(resSeqMap.ref2target(0), -6)
        self.assertEqual(resSeqMap.ref2target(3), -3)

        self.assertEqual(resSeqMap.target2ref(1), 7)
        self.assertEqual(resSeqMap.target2ref(12), 18)
        self.assertEqual(resSeqMap.target2ref(6), 12)

        self.assertEqual(resSeqMap.targetIncomparable(), [-5, -4, -1, 0, 7, 8, 9, 10, 11, 12])
        self.assertEqual(resSeqMap.refIncomparable(), [0, 1, 2, 5, 6, 12, 13, 14, 15, 16, 17, 18])

        # Check ends match up
        m1 = resSeqMap.targetResSeq[resSeqMap.targetOffset]
        n1 = resSeqMap.target2ref(m1)
        self.assertEqual(m1, resSeqMap.ref2target(n1))
        re = resSeqMap.refResSeq[resSeqMap.refOffset + resSeqMap.lenMatch - 1]
        self.assertEqual(
            resSeqMap.ref2target(re), resSeqMap.targetResSeq[resSeqMap.targetOffset + resSeqMap.lenMatch - 1]
        )