示例#1
0
    def run(self, refPdb=None, inPdb=None, outPdb=None, connectivityRadius=None, originHand=True, cleanup=False):
        """FOO
        """
        self._reset()
        
        self.logfile = outPdb +"_{}.log".format(str(uuid.uuid1()))
        cmd= [ 'csymmatch',
              "-pdbin-ref",
              refPdb,
              "-pdbin",
              inPdb,
              "-pdbout",
              outPdb
              ]
        
        if originHand:
            cmd += [  "-origin-hand" ]

        if connectivityRadius:
            cmd += [ "-connectivity-radius", connectivityRadius ]

        retcode = ample_util.run_command(cmd=cmd, logfile=self.logfile, dolog=False)
        
        if retcode != 0: 
            raise RuntimeError("Error running command: {0}".format(" ".join(cmd)))
        
        if cleanup: 
            os.unlink(self.logfile)
示例#2
0
def align_mustang(models, mustang_exe=None, work_dir=None):
    if not ample_util.is_exe(mustang_exe):
        msg = "Cannot find mustang executable: {0}".format(mustang_exe)
        raise RuntimeError(msg)
    
    owd = os.getcwd()
    if not work_dir: work_dir = owd
    work_dir = os.path.abspath(work_dir)
    if not os.path.isdir(work_dir): os.mkdir(work_dir)
    os.chdir(work_dir)

    logfile = os.path.join(work_dir, 'mustang.log')
    basename = 'mustang'
    cmd = [mustang_exe, '-F', 'fasta', '-o', basename, '-i' ] + models
    rtn = ample_util.run_command(cmd, logfile=logfile, directory=work_dir)
    if not rtn == 0:
        msg = "Error running mustang. Check logfile: {0}".format(logfile)
        raise RuntimeError(msg)
    
    alignment_file = os.path.join(work_dir, basename + ".afasta")
    if not os.path.isfile(alignment_file): 
        msg = "Could not find alignment file: {0} after running mustang!".format(alignment_file)
        raise RuntimeError(msg)
    os.chdir(owd) # always need to go back to original directory
    return alignment_file
示例#3
0
def generateMap( mtz, pdb, FP='FP', SIGFP='SIGFP', FREE='FREE', directory=None ):
    """Generate a map from an mtz file and a pdb using reforigin"""
    
    assert os.path.isfile( mtz ) and os.path.isfile( pdb ), "Cannot find files: {0} {1}".format( mtz, pdb )
    
    if not directory:
        directory = os.getcwd()
    
    mapFile = ample_util.filename_append( filename=mtz, astr="map", directory=directory )
    mapFile = os.path.abspath(mapFile)
    mapPdb = ample_util.filename_append( filename=pdb, astr="map", directory=directory )

    cmd = [ "refmac5", "HKLIN", mtz, "HKLOUT", mapFile, "XYZIN", pdb, "XYZOUT", mapPdb ]
    # FIX FOR DIFFERENT FP etc.     
    stdin ="""RIDG DIST SIGM 0.02
LABIN FP={0} SIGFP={1} FREE={2}
MAKE HYDR N
WEIGHT MATRIX 0.01
NCYC 0
END
""".format( FP, SIGFP, FREE )
    logfile=os.path.join(directory,"generateMap.log")
    ret = ample_util.run_command(cmd=cmd, logfile=logfile, dolog=True, stdin=stdin)
    
    assert ret == 0, "generateMap refmac failed-check log: {0}".format(logfile)

    return mapFile
示例#4
0
 def runNcont( self, pdbin=None, sourceChains=None, targetChains=None, maxDist=1.5, allAtom=False ):
     """FOO
     """
     
     if allAtom:
         self.ncontLog = pdbin + ".ncont_aa.log"
     else:
         self.ncontLog = pdbin + ".ncont_rio.log"
     
     cmd = [ "ncont", "xyzin", pdbin ]
     
     # Build up stdin
     stdin = ""
     # Need to use list of chains from Native as can't work out negate operator for ncont
     if allAtom:
             stdin += "source {0}//*\n".format( ",".join( sourceChains )  )  
             stdin += "target {0}//*\n".format( ",".join( targetChains )  ) 
     else:
         stdin += "source {0}//CA\n".format( ",".join( sourceChains )  )  
         stdin += "target {0}//CA\n".format( ",".join( targetChains )  )  
     stdin += "maxdist {0}\n".format( maxDist )
     stdin += "cells 2\n"
     stdin += "sort target inc\n"
     
     retcode = ample_util.run_command(cmd=cmd, logfile=self.ncontLog, directory=os.getcwd(), dolog=True, stdin=stdin)
     
     if retcode != 0: 
         raise RuntimeError("Error running ncont command: {0}\nCheck log: {1}".format(cmd,self.ncontLog))
示例#5
0
def ccmtzOrigin( nativeMap, mrPdb  ):
    """Use the phenix get_cc_mtz_pdb script to determine the origin of a MR pdb using the supplied map"""
    
    # resolve can only handle file names < 75 characters so we need to truncate
    # We copy the file rather than symlink so that this works on windows and then delete afterwards
    tempnam=None
    if len( os.path.basename(mrPdb) ) >= 75:
        tempnam = os.tempnam()
        # Need to add .pdb extension or it doesn't work
        tempnam += ".pdb"
        assert len(tempnam) < 75
        shutil.copy( mrPdb, tempnam )
        mrPdb = tempnam
        
    # make sure we can find the program
    get_cc_mtz_pdb=ample_util.find_exe('phenix.get_cc_mtz_pdb')
    cmd = [get_cc_mtz_pdb , nativeMap, mrPdb ]
    ret = ample_util.run_command(cmd=cmd, logfile="get_cc_mtz_pdb.log", dolog=False )
    assert ret == 0, "phenix.get_cc_mtz_pdb refmac failed!"
    
    ofile = "temp_dir/resolve.offset"
    with open( ofile ) as o:
        line = o.readline().strip()
    
    t = line.split()
    assert t[0] == "OFFSET"
    origin = [ float( t[1] ) * -1, float( t[2] ) * -1, float( t[3] ) * -1 ]
    
    # remove temp file if we created it
    if tempnam:
        os.unlink(tempnam)
    
    return origin
示例#6
0
    def analyse(self, mr_pdb, cleanup=True):
        """Use SHELXE to analyse an MR pdb file to determine the origin shift and phase error

        This function sets the ``MPE``, ``wMPE`` and ``originShift`` attributes.

        Parameters
        ----------
        mr_pdb : str
          Path to the Molecular Replacement PDB file

        """

        os.chdir(self.work_dir)
        input_pdb = self.stem + ".pda"
        shutil.copyfile(mr_pdb, os.path.join(self.work_dir, input_pdb))
        cmd = [self.shelxe_exe, input_pdb, '-a0', '-q', '-s0.5', '-o', '-n', '-t0', '-m0', '-x']
        logfile = os.path.abspath('shelxe_{}.log'.format( str(uuid.uuid1())))
        ret = ample_util.run_command(cmd=cmd, logfile=logfile, directory=None, dolog=False, stdin=None)
        if ret != 0: 
            raise RuntimeError("Error running shelxe - see log: {0}".format(logfile))
        sp = parse_shelxe.ShelxeLogParser(logfile)
        # Only added in later version of MRBUMP shelxe parser
        if hasattr(sp, 'MPE'):
            self.MPE = sp.MPE
        self.wMPE = sp.wMPE
        if isinstance(sp.originShift, list):
            self.originShift = [ o*-1 for o in sp.originShift ]
        if cleanup:
            for ext in ['.hkl', '.ent', '.pda','.pdo','.phs','.lst','_trace.ps']:
                try:
                    os.unlink(self.stem + ext)
                except:
                    pass
            os.unlink(logfile)
示例#7
0
 def add_sidechains(self, pdbin=None, pdbout=None, sequence=None, hydrogens=False, strip_oxt=False):
     """Add the specified sidechains to the pdb"""
     
     _pdbout = pdbout
     if strip_oxt:
         _pdbout = pdbout+"_OXT"
     
     cmd = [ self.scwrl_exe, "-i", pdbin, "-o", _pdbout ]
     
     # Not needed by default
     if sequence is not None:
         sequenceFile = os.path.join( self.workdir, "sequence.file")
         with open( sequenceFile, 'w' ) as w:
             w.write( sequence + os.linesep )
         cmd += [ "-s",  sequenceFile ]
     
     # Don't output hydrogens
     if not hydrogens: cmd += ['-h']
         
     logfile = os.path.abspath("scwrl.log")
     retcode = ample_util.run_command(cmd, logfile=logfile)
     
     if retcode != 0:
         raise RuntimeError("Error running Scwrl - please check the logfile: {0}".format(logfile))
     else:
         os.unlink(logfile)
         
     if strip_oxt:
         # Remove all OXT atoms
         pdb_edit.strip(_pdbout, pdbout, atom_types=['OXT'])
         os.unlink(_pdbout)
         
     return os.path.abspath(pdbout)
示例#8
0
 def _sfcif2mtz(self, cifPath, mtzPath ):
     """Convert a CIF containing structure factors to an MTZ file."""
     
     cmd = [ "cif2mtz", "hklin", cifPath, "hklout", mtzPath ]
     logfile = os.path.join( os.getcwd(), "cif2mtz.log" )
     # Need empty stdin to trigger eof to get program to run
     retcode = ample_util.run_command(cmd, stdin="", logfile=logfile)
     if retcode != 0:
         raise RuntimeError("Error running sfcif2mtz. Check the logfile: {0}".format(logfile))
示例#9
0
    def calc_rmsd(self, model1, model2, nresidues=None, logfile='lsqkab.out', purge=False):
        if not nresidues:
            _, nresidues = pdb_edit.num_atoms_and_residues(model1, first=True)

        stdin = """FIT RESIDUE CA 1 TO {0} CHAIN {1}
MATCH 1 to  {0} CHAIN {1}
output  RMS
end""".format(nresidues, 'A')

        cmd = ['lsqkab', 'XYZINM', model1, 'XYZINF', model2]
        ample_util.run_command(cmd, logfile=logfile, stdin=stdin)
        rmsd =  self.parse_lsqkab_output(logfile)

        # cleanup
        if purge:
            os.unlink(logfile)
            os.unlink('RMSTAB')

        return rmsd
示例#10
0
    def generate_distance_matrix(self,pdb_list):

        # Create list of pdb files
        fname = os.path.join(os.getcwd(), "files.list" )
        with open( fname, 'w' ) as f: f.write( "\n".join( pdb_list )+"\n" )

        # Index is just the order of the pdb in the file
        self.index2pdb = sorted(pdb_list)

        # Run fast_protein_cluster - this is just to generate the distance matrix, but there
        # doesn't seem to be a way to stop it clustering as well - not a problem as it just
        # generates more files
        log_name = os.path.abspath("fast_protein_cluster.log")
        matrix_file = "fpc.matrix"
        cmd = [self.executable,
               "--cluster_write_text_matrix",
               matrix_file,
               "-i",
               fname]

        retcode = ample_util.run_command( cmd, logfile=log_name )
        if retcode != 0:
            raise RuntimeError("non-zero return code for fast_protein_cluster in generate_distance_matrix!\nCheck logfile:{0}".format(log_name))

        mlen=0
        data=[]
        with open(matrix_file) as f:
            for l in f:
                l = l.strip().split()
                x = int(l[0])
                y = int(l[1])
                d = float(l[2])
                mlen = max(mlen,x+1) # +1 as we want the length
                data.append((x,y,d))

        # create empty matrix - we use None's but this means we need to check for then when
        # looking through the matrix
        # use square matrix to make indexing easier as we're unlikely to be very big
        m = numpy.zeros([mlen, mlen])

        # Fill in all values (upper triangle)
        for i,j,d in data:
            if i > j:
                m[j][i] = d
            else:
                m[i][j] = d

        # Copy to lower
        for x in range(mlen):
            for y in range(mlen):
                if x==y: continue
                m[y][x] = m[x][y]

        self.distance_matrix = m
        return
示例#11
0
    def test_gesamt_matrix_generic(self):
        # Test we can reproduce the original thresholds
        gesamt_exe = ample_util.find_exe("gesamt" + ample_util.EXE_EXT)
        clusterer = subcluster.GesamtClusterer(executable=gesamt_exe)
        pdb_list = sorted(glob.glob(os.path.join(self.testfiles_dir, "models",'*.pdb')))
        clusterer._generate_distance_matrix_generic(pdb_list, purge_all=True)
        # Test two files manually
        index1 = 2
        index2 = 25
        f1 = pdb_list[index1]
        f2 = pdb_list[index2]
        # Run gesamt to get the score between the two
        logfile = 'gesamt.log'
        ample_util.run_command([gesamt_exe, f1, f2], logfile=logfile)
        qscore = None
        with open(logfile) as f:
            for l in f.readlines():
                if l.startswith(' Q-score'):
                    qscore = float(l.split()[2])

        self.assertIsNotNone(qscore, "No q-score found")
        # read score matrix
        matrix = []
        with open(subcluster.SCORE_MATRIX_NAME) as f:
            for l in f.readlines():
                if not l.strip(): continue
                fields = l.split()
                matrix.append((int(fields[0]),int(fields[1]), float(fields[2])))
        # Make sure the score matches
        for l in matrix:
            if l[0] == index1 and l[1] == index2:
                # Gesamt log and out file formats have different precisions
                self.assertAlmostEqual(l[2], qscore, 3, "Q-scores differ: {0} - {1}".format(l[2], qscore))
        os.unlink(logfile)
        os.unlink(subcluster.SCORE_MATRIX_NAME)
        os.unlink(subcluster.FILE_LIST_NAME)
        return
示例#12
0
    def _cluster(self, models, run_dir=None, score_type='rmsd', score_matrix=None, nproc=1):
        """
        Run spicker to cluster the models
        """
        owd = os.getcwd()
        if run_dir:
            self.run_dir = os.path.abspath(run_dir)
        if not self.run_dir:
            self.run_dir = os.path.join(owd, 'spicker')
        if not os.path.isdir(self.run_dir):
            os.mkdir(self.run_dir)
        os.chdir(self.run_dir)

        logger.debug("Running spicker with score_type {0} in directory: {1}".format(score_type, self.run_dir))
        logger.debug("Using executable: {0} on {1} processors".format(self.spicker_exe, nproc))

        self.score_type = score_type
        self.create_input_files(models, score_type=score_type, score_matrix=score_matrix)

        # We need special care if we are running with tm scores as we will be using the OPENMP
        # version of spicker which requires increasing the stack size on linux and setting the
        # OMP_NUM_THREADS environment variable on all platforms
        # The stack size on 64-bit linux seems to be 15Mb, so I guess asking for 50 seems reasonable
        # I'm assuming that the limit is in bytes and specified by an integer so 50Mb -> 50000000
        preexec_fn = None
        env = {'OMP_NUM_THREADS': str(nproc)}
        if sys.platform.lower().startswith('linux'):

            def set_stack():
                import resource
                stack_bytes = 50000000  # 50Mb
                resource.setrlimit(resource.RLIMIT_STACK, (stack_bytes, stack_bytes))

            preexec_fn = set_stack

        logfile = os.path.abspath("spicker.log")
        rtn = ample_util.run_command([self.spicker_exe], logfile=logfile, env=env, preexec_fn=preexec_fn)
        if not rtn == 0:
            raise RuntimeError("Error running spicker, check logfile: {0}".format(logfile))

        # Read the log and generate the results
        self.results = self.process_log()

        # Always go back to where we started
        os.chdir(owd)
        return
示例#13
0
 def preparePlacedPdb( self, placedPdb=None, placedChainID=None, nativeChainID=None, resSeqMap=None ):
     """
     Use pdbcur to:
     - extract chain to compare
     - strip down to CA/BB
     - remove any atoms that cannot be compared to the native
     """
     # Build up stdin
     
     # Extract the chain to compare
     stdin = "lvchain {0}\n".format( placedChainID )
     
     # Rename it to match the native
     if placedChainID != nativeChainID:
         stdin += "renchain {0} {1}\n".format( placedChainID, nativeChainID )
     
     # Find out if there are atoms in the model that we need to remove
     incomparable = resSeqMap.targetIncomparable( bbMask=not self.cAlphaOnly )
     if len( incomparable ):
         # Build up stdin - I'm too thick to work out the selection syntax for a discrete list
         for e in incomparable:
             stdin += "delresidue {0}\n".format( e )
             
     if self.cAlphaOnly:
         # Strip down to CA
         stdin += 'lvatom "CA[C]:*"\n'
     else:
         # Strip down to backbone atoms
         stdin += 'lvatom "N,CA,C,O,CB[N,C,O]"\n'
         
     # Renumber?
     stdin += "sernum\n"
     
     # Name the output file accordingly
     astr = "chain{0}".format( placedChainID )
     placedChainPdb = ample_util.filename_append( filename=placedPdb, astr=astr, directory=self.workdir )
     
     # Now run pdbcur to do it all
     cmd="pdbcur xyzin {0} xyzout {1}".format( placedPdb, placedChainPdb ).split()
     logfile = "{0}.log".format( placedChainPdb )
     retcode = ample_util.run_command( cmd=cmd, logfile=logfile, directory=self.workdir, dolog=False, stdin=stdin)
     if retcode != 0:
         raise RuntimeError("Error extracting chain from placed PDB {0} in directory {1}".format( placedPdb, self.workdir ))
     else:
         os.unlink(logfile)
     return placedChainPdb
示例#14
0
def merge(pdb1=None, pdb2=None, pdbout=None):
    """Merge two pdb files into one"""

    logfile = pdbout + ".log"
    cmd = ['pdb_merge', 'xyzin1', pdb1, 'xyzin2', pdb2, 'xyzout', pdbout]

    stdin = 'nomerge'
    retcode = ample_util.run_command(cmd=cmd,
                                     logfile=logfile,
                                     directory=os.getcwd(),
                                     dolog=False,
                                     stdin=stdin)

    if retcode == 0:
        os.unlink(logfile)
    else:
        raise RuntimeError("Error merging pdbs: {0} {1}".format(pdb1, pdb2))
示例#15
0
def calpha_only(inpdb, outpdb):
    """Strip PDB to c-alphas only"""

    logfile = outpdb + ".log"
    cmd = "pdbcur xyzin {0} xyzout {1}".format(inpdb, outpdb).split()

    stdin = 'lvatom "CA[C]:*"'
    retcode = ample_util.run_command(cmd=cmd,
                                     logfile=logfile,
                                     directory=os.getcwd(),
                                     dolog=False,
                                     stdin=stdin)

    if retcode == 0:
        os.unlink(logfile)
    else:
        raise RuntimeError("Error stripping PDB to c-alpha atoms")
示例#16
0
    def analyse(self, mr_pdb):
        """Use SHELXE to analyse an MR pdb file to determine the origin shift and phase error

        This function sets the ``MPE``, ``wMPE`` and ``originShift`` attributes.

        Parameters
        ----------
        mr_pdb : str
          Path to the Molecular Replacement PDB file

        """

        os.chdir(self.work_dir)
        input_pdb = self.stem + ".pda"
        shutil.copyfile(mr_pdb, os.path.join(self.work_dir, input_pdb))

        cmd = [
            self.shelxe_exe, input_pdb, '-a0', '-q', '-s0.5', '-o', '-n',
            '-t0', '-m0', '-x'
        ]
        logfile = os.path.abspath('shelxe_{}.log'.format(str(uuid.uuid1())))
        ret = ample_util.run_command(cmd=cmd,
                                     logfile=logfile,
                                     directory=None,
                                     dolog=False,
                                     stdin=None)
        if ret != 0:
            raise RuntimeError(
                "Error running shelxe - see log: {0}".format(logfile))

        sp = parse_shelxe.ShelxeLogParser(logfile)
        # Only added in later version of MRBUMP shelxe parser
        if hasattr(sp, 'MPE'):
            self.MPE = sp.MPE
        self.wMPE = sp.wMPE
        self.originShift = [o * -1 for o in sp.originShift]

        for ext in [
                '.hkl', '.ent', '.pda', '.pdo', '.phs', '.lst', '_trace.ps'
        ]:
            try:
                os.unlink(self.stem + ext)
            except:
                pass
        os.unlink(logfile)
示例#17
0
    def compareSingle(self,
                      nativePdb=None,
                      modelPdb=None,
                      sequenceIndependant=True,
                      rmsd=False,
                      workdir=None):

        self.workdir = workdir
        if not self.workdir:
            self.workdir = os.getcwd()

        cmd = [self.maxclusterExe, "-e", nativePdb, "-p", modelPdb]

        if sequenceIndependant:
            cmd.append("-in")

        if rmsd:
            cmd.append("-rmsd")

        logfile = ample_util.filename_append(filename=modelPdb,
                                             astr="maxcluster",
                                             directory=self.workdir)

        if rmsd:
            logfile = os.path.splitext(logfile)[0] + "_rmsd.log"
        else:
            logfile = os.path.splitext(logfile)[0] + ".log"
        self.maxclusterLogfile = logfile

        #print "running cmd "," ".join( cmd )
        retcode = ample_util.run_command(cmd,
                                         logfile=self.maxclusterLogfile,
                                         dolog=False)

        if retcode != 0:
            msg = "non-zero return code for maxcluster in runMaxcluster!"
            #logging.critical( msg )
            print msg

        if rmsd:
            data = self.parseLogSingleRmsd()
        else:
            data = self.parseLogSingleTm()

        return data
示例#18
0
    def _generate_pairwise_rmsd_matrix(self, models, purge=False):
        """
        Use gesamt to generate an all-by-all pairwise rmsd matrix of a list of pdb models

        Notes:
        gesamt -input-list inp_list.dat -sheaf-x

where inp_list.dat  contains:

1ADZ.pdb -s /1/A
1ADZ.pdb -s /2/A
1ADZ.pdb -s /3/A

        """

        # Index is just the order of the pdbs
        models = sorted(models)
        self.index2pdb = models

        # Create file with list of pdbs and model/chain
        glist = 'gesamt_models.dat'
        with open(glist, 'w') as w:
            for m in models:
                w.write("{0} -s /1/A \n".format(m))
            w.write('\n')

        cmd = [self.executable, '-input-list', glist, '-sheaf-x', '-nthreads={0}'.format(self.nproc)]

        logfile = os.path.abspath('gesamt_archive.log')
        rtn = ample_util.run_command(cmd, logfile)
        if rtn != 0:
            raise RuntimeError("Error running gesamt - check logfile: {0}".format(logfile))

        # Create a square distance_matrix no_models in size filled with None
        num_models = len(models)
        self.distance_matrix = numpy.zeros([num_models, num_models])

        # Read in the rmsds calculated
        self._parse_gesamt_rmsd_log(logfile, num_models)

        if purge:
            os.unlink(glist)
            os.unlink(logfile)
        return
示例#19
0
def align_gesamt(models, gesamt_exe=None, work_dir=None):
    if not ample_util.is_exe(gesamt_exe):
        msg = "Cannot find gesamt executable: {0}".format(gesamt_exe)
        raise RuntimeError(msg)
    
    owd = os.getcwd()
    if not work_dir: work_dir = owd
    work_dir = os.path.abspath(work_dir)
    if not os.path.isdir(work_dir): os.mkdir(work_dir)
    os.chdir(work_dir)
    
    # Need to map chain name to pdb
    model2chain = {}
    for m in models:
        seqd = pdb_edit.sequence(m)
        if len(seqd) != 1: 
            msg = "Model {0} does not contain a single chain, got: {1}".format(*seqd.keys())
            raise RuntimeError(msg)
        model2chain[m] = seqd.keys()[0]
    
    basename = 'gesamt'
    logfile = os.path.join(work_dir, 'gesamt.log')
    alignment_file = os.path.join(work_dir, basename + ".afasta")
    
    # Build up command-line
    cmd = [gesamt_exe]
    # We iterate through the models to make sure the order stays the same
    for m in models: cmd += [ m, '-s', model2chain[m] ]
    cmd += ['-o', '{0}.pdb'.format(basename), '-a', alignment_file]
    
    rtn = ample_util.run_command(cmd, logfile=logfile, directory=work_dir)
    if not rtn == 0:
        msg = "Error running gesamt. Check logfile: {0}".format(logfile)
        raise RuntimeError(msg)
    
    if not os.path.isfile(alignment_file): 
        msg = "Gesamt did not generate an alignment file.\nPlease check the logfile: {0}".format(logfile)
        raise RuntimeError(msg)
    
    if sys.platform.startswith("win"):
        alignment_file = _gesamt_aln_windows_fix(alignment_file)
    
    os.chdir(owd) # always need to go back to original directory
    return alignment_file
示例#20
0
    def _generate_pairwise_rmsd_matrix(self, models, purge=False):
        """
        Use gesamt to generate an all-by-all pairwise rmsd matrix of a list of pdb models

        Notes:
        gesamt -input-list inp_list.dat -sheaf-x

where inp_list.dat  contains:

1ADZ.pdb -s /1/A
1ADZ.pdb -s /2/A
1ADZ.pdb -s /3/A

        """

        # Index is just the order of the pdbs
        models = sorted(models)
        self.index2pdb = models

        # Create file with list of pdbs and model/chain
        glist = 'gesamt_models.dat'
        with open(glist, 'w') as w:
            for m in models:
                w.write("{0} -s /1/A \n".format(m))
            w.write('\n')

        cmd = [self.executable, '-input-list', glist, '-sheaf-x', '-nthreads={0}'.format(self.nproc)]

        logfile = os.path.abspath('gesamt_archive.log')
        rtn = ample_util.run_command(cmd, logfile)
        if rtn != 0:
            raise RuntimeError("Error running gesamt - check logfile: {0}".format(logfile))

        # Create a square distance_matrix no_models in size filled with None
        num_models = len(models)
        self.distance_matrix = numpy.zeros([num_models, num_models])

        # Read in the rmsds calculated
        self._parse_gesamt_rmsd_log(logfile, num_models)

        if purge:
            os.unlink(glist)
            os.unlink(logfile)
        return
示例#21
0
def run_scripts_serial(job_scripts, nproc=None, monitor=None, early_terminate=None, check_success=None):
    success = False
    if len(job_scripts) > 1:
        # Don't need early terminate - check_success if it exists states what's happening
        js = JobServer()
        js.setJobs(job_scripts)
        success = js.start(
            nproc=nproc, early_terminate=bool(early_terminate), check_success=check_success, monitor=monitor
        )
    else:
        script = job_scripts[0]
        name = os.path.splitext(os.path.basename(script))[0]
        logfile = "{0}.log".format(name)
        wdir = os.path.dirname(script)
        os.chdir(wdir)
        rtn = ample_util.run_command([script], logfile=logfile)
        if rtn == 0:
            success = True
    return success
示例#22
0
    def run_compare_model_list(self,
                               nativePdb=None,
                               models=None,
                               logfile=None):

        # Generate the list of models
        pdblist = os.path.join(self.workdir, "models.list")
        with open(pdblist, 'w') as f:
            f.write(os.linesep.join(models))

        # Run Maxcluster
        cmd = [self.maxclusterExe, "-e", nativePdb, "-l", pdblist]
        retcode = ample_util.run_command(cmd, logfile=logfile, dolog=True)

        if retcode != 0:
            msg = "non-zero return code for maxcluster in runMaxcluster!"
            raise RuntimeError(msg)

        return
示例#23
0
def del_column(file_name, column, overwrite=True):
    """Delete a column from an mtz file and return a path to the file"""
    mtzDel = ample_util.filename_append(file_name, "d{0}".format(column))
    cmd = ["mtzutils", "hklin1", file_name, "hklout", mtzDel]
    stdin = "EXCLUDE 1 {0}".format(column)
    logfile = os.path.join(os.getcwd(),
                           "mtzutils_{}.log".format(str(uuid.uuid1())))
    retcode = ample_util.run_command(cmd, stdin=stdin, logfile=logfile)
    if retcode != 0:
        raise RuntimeError(
            "Error running mtzutils. Check the logfile: {0}".format(logfile))
    else:
        os.unlink(logfile)

    if overwrite:
        shutil.move(mtzDel, file_name)
        return file_name
    else:
        return mtzDel
示例#24
0
def run_cphasematch(merged_mtz,
                    f_sigf_labels,
                    native_phase_labels,
                    mr_phase_labels,
                    resolution_bins=12,
                    cleanup=True):
    """run cphasematch to get phase error"""

    argd = {
        'merged_mtz': merged_mtz,
        'f_label': f_sigf_labels[0],
        'sigf_label': f_sigf_labels[1],
        'native_phase_label1': native_phase_labels[0],
        'native_phase_label2': native_phase_labels[1],
        'mr_phase_label1': mr_phase_labels[0],
        'mr_phase_label2': mr_phase_labels[1],
        'resolution_bins': resolution_bins
    }

    stdin = """
mtzin {merged_mtz}
colin-fo /*/*/[{f_label},{sigf_label}]
colin-fc-1 /*/*/[{native_phase_label1},{native_phase_label2}]
colin-fc-2 /*/*/[{mr_phase_label1},{mr_phase_label2}]
resolution-bins {resolution_bins}
""".format(**argd)

    logfile = os.path.abspath("cphasematch.log")
    cmd = ['cphasematch', "-stdin"]

    retcode = ample_util.run_command(cmd=cmd, stdin=stdin, logfile=logfile)
    if retcode != 0:
        raise RuntimeError(
            "Error running command: {0}\nCheck logfile: {1}".format(
                " ".join(cmd), logfile))

    before_origin, after_origin, change_of_hand, origin_shift = parse_cphasematch_log(
        logfile)

    if cleanup: os.unlink(logfile)

    return before_origin, after_origin, change_of_hand, origin_shift
示例#25
0
def extract_model(inpdb, outpdb, modelID):
    """Extract modelID from inpdb into outpdb"""

    assert modelID > 0

    logfile = outpdb + ".log"
    cmd = "pdbcur xyzin {0} xyzout {1}".format(inpdb, outpdb).split()

    stdin = "lvmodel /{0}\n".format(modelID)

    retcode = ample_util.run_command(cmd=cmd,
                                     logfile=logfile,
                                     directory=os.getcwd(),
                                     dolog=False,
                                     stdin=stdin)

    if retcode != 0:
        raise RuntimeError("Problem extracting model with cmd: {0}".format)

    os.unlink(logfile)
示例#26
0
    def add_sidechains(self,
                       pdbin=None,
                       pdbout=None,
                       sequence=None,
                       hydrogens=False,
                       strip_oxt=False):
        """Add the specified sidechains to the pdb"""

        _pdbout = pdbout
        if strip_oxt:
            _pdbout = pdbout + "_OXT"

        cmd = [self.scwrl_exe, "-i", pdbin, "-o", _pdbout]

        # Not needed by default
        if sequence is not None:
            sequenceFile = os.path.join(self.workdir, "sequence.file")
            with open(sequenceFile, 'w') as w:
                w.write(sequence + os.linesep)
            cmd += ["-s", sequenceFile]

        # Don't output hydrogens
        if not hydrogens:
            cmd += ['-h']

        logfile = os.path.abspath("scwrl.log")
        retcode = ample_util.run_command(cmd, logfile=logfile)

        if retcode != 0:
            raise RuntimeError(
                "Error running Scwrl - please check the logfile: {0}".format(
                    logfile))
        else:
            os.unlink(logfile)

        if strip_oxt:
            # Remove all OXT atoms
            pdb_edit.strip(_pdbout, pdbout, atom_types=['OXT'])
            os.unlink(_pdbout)

        return os.path.abspath(pdbout)
示例#27
0
    def run(self, refPdb=None, inPdb=None, outPdb=None, connectivityRadius=None, originHand=True, cleanup=False):
        """FOO
        """
        self._reset()

        self.logfile = outPdb + "_{}.log".format(str(uuid.uuid1()))
        cmd = ['csymmatch', "-pdbin-ref", refPdb, "-pdbin", inPdb, "-pdbout", outPdb]

        if originHand:
            cmd += ["-origin-hand"]

        if connectivityRadius:
            cmd += ["-connectivity-radius", connectivityRadius]

        retcode = ample_util.run_command(cmd=cmd, logfile=self.logfile, dolog=False)

        if retcode != 0:
            raise RuntimeError("Error running command: {0}".format(" ".join(cmd)))

        if cleanup:
            os.unlink(self.logfile)
示例#28
0
    def runCompareDirectory(self,
                            nativePdb=None,
                            modelsDirectory=None,
                            logfile=None):

        # Generate the list of models
        pdblist = os.path.join(self.workdir, "models.list")
        with open(pdblist, 'w') as f:
            l = glob.glob(os.path.join(modelsDirectory, '*.pdb'))
            if not len(l) > 0:
                raise RuntimeError(
                    "Could not find any pdb files in directory: {0}".format(
                        modelsDirectory))
            f.write(os.linesep.join(l))

        cmd = [self.maxclusterExe, "-e", nativePdb, "-l", pdblist]
        retcode = ample_util.run_command(cmd, logfile=logfile, dolog=True)

        if retcode != 0:
            raise RuntimeError(
                "non-zero return code for maxcluster in runMaxcluster!")
示例#29
0
文件: rio.py 项目: hlasimpk/ample
    def runNcont(self,
                 pdbin=None,
                 sourceChains=None,
                 targetChains=None,
                 maxDist=1.5,
                 allAtom=False):
        """FOO
        """

        if allAtom:
            self.ncontLog = pdbin + ".ncont_aa.log"
        else:
            self.ncontLog = pdbin + ".ncont_rio.log"

        cmd = ["ncont", "xyzin", pdbin]

        # Build up stdin
        stdin = ""
        # Need to use list of chains from Native as can't work out negate operator for ncont
        if allAtom:
            stdin += "source {0}//*\n".format(",".join(sourceChains))
            stdin += "target {0}//*\n".format(",".join(targetChains))
        else:
            stdin += "source {0}//CA\n".format(",".join(sourceChains))
            stdin += "target {0}//CA\n".format(",".join(targetChains))
        stdin += "maxdist {0}\n".format(maxDist)
        stdin += "cells 2\n"
        stdin += "sort target inc\n"

        retcode = ample_util.run_command(cmd=cmd,
                                         logfile=self.ncontLog,
                                         directory=os.getcwd(),
                                         dolog=True,
                                         stdin=stdin)

        if retcode != 0:
            raise RuntimeError, "Error running ncont command: {0}\nCheck log: {1}".format(
                cmd, self.ncontLog)

        return
示例#30
0
def add_rfree(file_name, directory=None, overwrite=True):
    """Run uniqueify on mtz file to generate RFREE data column"""
    mtzUnique = ample_util.filename_append(file_name,
                                           "uniqueify",
                                           directory=directory)

    cmd = ['uniqueify', file_name, mtzUnique]
    logfile = os.path.join(os.getcwd(),
                           "uniqueify_{}.log".format(str(uuid.uuid1())))
    retcode = ample_util.run_command(cmd, logfile=logfile)
    if retcode != 0:
        raise RuntimeError(
            "Error running command: {0}. Check the logfile: {1}".format(
                " ".join(cmd), logfile))
    else:
        os.unlink(logfile)

    if overwrite:
        shutil.move(mtzUnique, file_name)
        return file_name
    else:
        return mtzUnique
示例#31
0
def run_cphasematch(merged_mtz,
                    f_sigf_labels,
                    native_phase_labels,
                    mr_phase_labels,
                    resolution_bins=12,
                    cleanup=True):
    """run cphasematch to get phase error"""

    argd = { 'merged_mtz' : merged_mtz,
             'f_label' : f_sigf_labels[0],
             'sigf_label' : f_sigf_labels[1],
             'native_phase_label1' : native_phase_labels[0],
             'native_phase_label2' : native_phase_labels[1],
             'mr_phase_label1' : mr_phase_labels[0],
             'mr_phase_label2' : mr_phase_labels[1],
             'resolution_bins' : resolution_bins }

    stdin = """
mtzin {merged_mtz}
colin-fo /*/*/[{f_label},{sigf_label}]
colin-fc-1 /*/*/[{native_phase_label1},{native_phase_label2}]
colin-fc-2 /*/*/[{mr_phase_label1},{mr_phase_label2}]
resolution-bins {resolution_bins}
""".format(**argd)

    logfile = os.path.abspath("cphasematch.log")
    cmd= [ 'cphasematch',
          "-stdin" ]

    retcode = ample_util.run_command(cmd=cmd, stdin=stdin, logfile=logfile)
    if retcode != 0: 
        raise RuntimeError("Error running command: {0}\nCheck logfile: {1}".format(" ".join(cmd), logfile))

    before_origin, after_origin, change_of_hand, origin_shift = parse_cphasematch_log(logfile)

    if cleanup:os.unlink(logfile)

    return before_origin, after_origin, change_of_hand, origin_shift
示例#32
0
def generateMap(mtz, pdb, FP='FP', SIGFP='SIGFP', FREE='FREE', directory=None):
    """Generate a map from an mtz file and a pdb using reforigin"""

    assert os.path.isfile(mtz) and os.path.isfile(
        pdb), "Cannot find files: {0} {1}".format(mtz, pdb)

    if not directory:
        directory = os.getcwd()

    mapFile = ample_util.filename_append(filename=mtz,
                                         astr="map",
                                         directory=directory)
    mapFile = os.path.abspath(mapFile)
    mapPdb = ample_util.filename_append(filename=pdb,
                                        astr="map",
                                        directory=directory)

    cmd = [
        "refmac5", "HKLIN", mtz, "HKLOUT", mapFile, "XYZIN", pdb, "XYZOUT",
        mapPdb
    ]
    # FIX FOR DIFFERENT FP etc.
    stdin = """RIDG DIST SIGM 0.02
LABIN FP={0} SIGFP={1} FREE={2}
MAKE HYDR N
WEIGHT MATRIX 0.01
NCYC 0
END
""".format(FP, SIGFP, FREE)
    logfile = os.path.join(directory, "generateMap.log")
    ret = ample_util.run_command(cmd=cmd,
                                 logfile=logfile,
                                 dolog=True,
                                 stdin=stdin)

    assert ret == 0, "generateMap refmac failed-check log: {0}".format(logfile)

    return mapFile
示例#33
0
def translate(inpdb=None, outpdb=None, ftranslate=None):
    """translate pdb
    args:
    ftranslate -- vector of fractional coordinates to shift by
    """

    logfile = outpdb + ".log"
    cmd = "pdbcur xyzin {0} xyzout {1}".format(inpdb, outpdb).split()

    # Build up stdin
    stdin = 'translate * frac {0:F} {1:F} {2:F}'.format(
        ftranslate[0], ftranslate[1], ftranslate[2])
    retcode = ample_util.run_command(cmd=cmd,
                                     logfile=logfile,
                                     directory=os.getcwd(),
                                     dolog=False,
                                     stdin=stdin)

    if retcode == 0:
        # remove temporary files
        os.unlink(logfile)
    else:
        raise RuntimeError("Error translating PDB")
示例#34
0
def to_hkl(mtz_file,
           hkl_file=None,
           directory=None,
           F=None,
           SIGF=None,
           FREE=None):

    if directory is None:
        directory = os.getcwd()

    if hkl_file is None:
        name = os.path.splitext(os.path.basename(mtz_file))[0]
        hkl_file = os.path.join(directory, name + ".hkl")

    if F is None or SIGF is None or FREE is None:
        F, SIGF, FREE = get_labels(mtz_file)

    cmd = ['mtz2various', 'HKLIN', mtz_file, 'HKLOUT', hkl_file]
    logfile = "mtz2various_{}.log".format(str(uuid.uuid1()))
    stdin = """LABIN FP={0} SIGFP={1} FREE={2}
OUTPUT SHELX
FSQUARED
END""".format(F, SIGF, FREE)

    ret = ample_util.run_command(cmd=cmd,
                                 logfile=logfile,
                                 directory=None,
                                 dolog=False,
                                 stdin=stdin)
    if ret != 0:
        raise RuntimeError(
            "Error converting {0} to HKL format - see log: {1}".format(
                mtz_file, logfile))

    os.unlink(logfile)
    return hkl_file
示例#35
0
def backbone(inpath=None, outpath=None):
    """Only output backbone atoms.
    """

    # pdbcur segfaults with long pathnames
    inpath = os.path.relpath(inpath)
    outpath = os.path.relpath(outpath)

    logfile = outpath + ".log"
    cmd = "pdbcur xyzin {0} xyzout {1}".format(inpath, outpath).split()

    stdin = 'lvatom "N,CA,C,O,CB[N,C,O]"'
    retcode = ample_util.run_command(cmd=cmd,
                                     logfile=logfile,
                                     directory=os.getcwd(),
                                     dolog=False,
                                     stdin=stdin)

    if retcode == 0:
        os.unlink(logfile)
    else:
        raise RuntimeError(
            "Error stripping PDB to backbone atoms. See log:{0}".format(
                logfile))
示例#36
0
    def generate_distance_matrix(self, pdb_list):
        """Run maxcluster to generate the distance distance_matrix"""

        num_models = len(pdb_list)
        if not num_models:
            msg = "generate_distance_matrix got empty pdb_list!"
            logging.critical(msg)
            raise RuntimeError(msg)

        self.index2pdb = [0] * num_models

        # Maxcluster arguments
        # -l [file]   File containing a list of PDB model fragments
        # -L [n]      Log level (default is 4 for single MaxSub, 1 for lists)
        # -d [f]      The distance cut-off for search (default auto-calibrate)
        # -bb         Perform RMSD fit using backbone atoms
        #     -C [n]      Cluster method: 0 - No clustering
        # -rmsd ???
        #os.system(MAX + ' -l list  -L 4 -rmsd -d 1000 -bb -C0 >MAX_LOG ')
        #print 'MAX Done'

        # Create the list of files for maxcluster
        fname = os.path.join(os.getcwd(), FILE_LIST_NAME)
        with open(fname, 'w') as f:
            f.write("\n".join(pdb_list) + "\n")

        #log_name = "maxcluster_radius_{0}.log".format(radius)
        log_name = os.path.abspath("maxcluster.log")
        cmd = [
            self.executable, "-l", fname, "-L", "4", "-rmsd", "-d", "1000",
            "-bb", "-C0"
        ]
        retcode = ample_util.run_command(cmd, logfile=log_name)

        if retcode != 0:
            msg = "non-zero return code for maxcluster in generate_distance_matrix!\nSee logfile: {0}".format(
                log_name)
            logging.critical(msg)
            raise RuntimeError(msg)

        # Create a square distance_matrix no_models in size filled with None
        parity = 0.0
        self.distance_matrix = numpy.full([num_models, num_models], parity)

        #jmht Save output for parsing - might make more sense to use one of the dedicated maxcluster output formats
        #max_log = open(cur_dir+'/MAX_LOG')
        max_log = open(log_name, 'r')
        pattern = re.compile('INFO  \: Model')
        for line in max_log:
            if re.match(pattern, line):

                # Split so that we get a list with
                # 0: model 1 index
                # 1: path to model 1 without .pdb suffix
                # 2: model 2 index
                # 3: path to model 2 without .pdb suffix
                # 4: distance metric
                split = re.split(
                    'INFO  \: Model\s*(\d*)\s*(.*)\.pdb\s*vs\. Model\s*(\d*)\s*(.*)\.pdb\s*=\s*(\d*\.\d*)',
                    line)
                self.distance_matrix[int(split[1]) - 1][int(split[3]) -
                                                        1] = float(split[5])

                if split[2] + '.pdb' not in self.index2pdb:
                    self.index2pdb[int(split[1]) - 1] = split[2] + '.pdb'

                if split[4] + '.pdb' not in self.index2pdb:
                    self.index2pdb[int(split[3]) - 1] = split[4] + '.pdb'

        # Copy in other half of matrix - we use a full matrix as it's easier to scan for clusters
        for x in range(len(self.distance_matrix)):
            for y in range(len(self.distance_matrix)):
                self.distance_matrix[y][x] = self.distance_matrix[x][y]
        return
示例#37
0
   def cluster(self,
               models=None,
               num_clusters=None,
               nproc=1,
               score_type="rmsd",
               cluster_method="kmeans",
               work_dir=None,
               fpc_exe=None,
               max_cluster_size=200,
               benchmark=False
               ):
       
       # FPC default if 5 clusters - we just run with this for the time being
       FPC_NUM_CLUSTERS=5
       if num_clusters is None or num_clusters > FPC_NUM_CLUSTERS:
           msg = "Cannot work with more than {0} clusters, got: {1}.".format(FPC_NUM_CLUSTERS,num_clusters)
           raise RuntimeError(msg)
 
       owd=os.getcwd()
       if not os.path.isdir(work_dir): os.mkdir(work_dir)
       os.chdir(work_dir)
       
       if not len(models) or not all([os.path.isfile(m) for m in models]):
           msg = "Missing models: {0}".format(models)
           raise RuntimeError(msg)
       
       # Create list of files
       flist='files.list'
       with open(flist,'w') as f:
           for m in models:
               f.write("{0}\n".format(os.path.abspath(m)))
       
       if not os.path.isfile(fpc_exe):
           msg = "Cannot find fast_protein_cluster executable: {0}".format(fpc_exe)
           raise RuntimeError(msg)
       
       # Build up the command-line
       cmd=[fpc_exe]
       if score_type=="rmsd":
           cmd += ['--rmsd']
       elif score_type=="tm":
           cmd += ['--tmscore']
       else:
           msg = "Unrecognised score_type: {0}".format(score_type)
           raise RuntimeError(msg)
       
       if cluster_method=="kmeans":
           cmd += ['--cluster_kmeans']
       elif cluster_method=="hcomplete":
           cmd += ['--cluster_hcomplete']
       else:
           msg = "Unrecognised cluster_method: {0}".format(cluster_method)
           raise RuntimeError(msg)
       
       if nproc > 1: cmd += ['--nthreads',str(nproc)]
       
       # Always save the distance matrix
       cmd += ['--write_text_matrix','matrix.txt']
       
       # For benchmark we use a constant seed to make sure we get the same results
       if benchmark: cmd += ['-S','1']
       
       # Finally the list of files
       cmd += ['-i',flist]
       
       logfile=os.path.abspath("fast_protein_cluster.log")
       retcode = ample_util.run_command(cmd,logfile=logfile)
       if retcode != 0:
           msg = "non-zero return code for fast_protein_cluster in cluster!\nCheck logfile:{0}".format(logfile)
           raise RuntimeError(msg)
   
       cluster_list='cluster_output.clusters'
       cluster_stats='cluster_output.cluster.stats'
       if not os.path.isfile(cluster_list) or not os.path.isfile(cluster_stats):
           msg = "Cannot find files: {0} and {1}".format(cluster_list,cluster_stats)
           raise RuntimeError(msg)
       
       # Check stats and get centroids
       csizes=[]
       centroids=[]
       with open(cluster_stats) as f:
           for line in f:
               if line.startswith("Cluster:"):
                   fields=line.split()
                   csizes.append(int(fields[4]))
                   centroids.append(fields[7])
       
       if len(csizes) != FPC_NUM_CLUSTERS:
           msg = "Found {0} clusters in {1} but was expecting {2}".format(len(csizes),cluster_stats,FPC_NUM_CLUSTERS)
           raise RuntimeError(msg)
       
       all_clusters=[[] for i in range(FPC_NUM_CLUSTERS)]
       # Read in the clusters
       with open(cluster_list) as f:
           for line in f:
               fields=line.split()
               model=fields[0]
               idxCluster=int(fields[1])
               all_clusters[idxCluster].append(model)
       
       # Check
       if False:
           # Ignore this test for now as there seems to be a bug in fast_protein_cluster with the printing of sizes
           maxc=None
           for i,cs in enumerate(csizes):
               if not cs == len(all_clusters[i]):
                   msg = "Cluster {0} size {1} does not match stats size {2}".format(i,len(all_clusters[i]),cs)
                   raise RuntimeError(msg)
               if i==0:
                   maxc=cs
               else:
                   if cs > maxc:
                       msg = "Clusters do not appear to be in size order!"
                       raise RuntimeError(msg)
                   
       # make sure all clusters are < max_cluster_size
       for i, c in enumerate(all_clusters):
           if len(c) > max_cluster_size:
               all_clusters[i]=c[:max_cluster_size]
       
       # Create the data - we loop through the number of clusters specified by the user
       clusters=[]
       for i in range(num_clusters):
           cluster = Cluster()
           cluster.method = cluster_method
           cluster.score_type = score_type
           cluster.index = i + 1
           cluster.centroid = centroids[i]
           cluster.num_clusters = num_clusters
           cluster.models = all_clusters[i]
       os.chdir(owd)
       return clusters
示例#38
0
def on_cluster():
    try:
        retcode = ample_util.run_command(["qstat"])
    except:
        retcode = -1
    return True if retcode == 0 else False
示例#39
0
    def _generate_distance_matrix_generic(self, models, purge=True, purge_all=False, metric='qscore'):
        # Make sure all the files are in the same directory otherwise we wont' work
        mdir = os.path.dirname(models[0])
        if not all([ os.path.dirname(p) == mdir for p in models ]):
            raise RuntimeError("All pdb files are not in the same directory!")

        models = sorted(models)
        self.index2pdb = models
        nmodels = len(models)

        # Create list of pdb files
        fname = os.path.join(os.getcwd(), FILE_LIST_NAME)
        with open(fname, 'w') as f:
            f.write("\n".join(models) + "\n")

        # Make the archive
        logger.debug("Generating gesamt archive from models in directory %s", mdir)
        garchive = 'gesamt.archive'
        if not os.path.isdir(garchive): os.mkdir(garchive)
        logfile = os.path.abspath('gesamt_archive.log')
        cmd = [self.executable, '--make-archive', garchive, '-pdb', mdir]
        #cmd += [ '-nthreads=auto' ]
        cmd += ['-nthreads={0}'.format(self.nproc)]
        # HACK FOR DYLD!!!!
        env = None
        #env = {'DYLD_LIBRARY_PATH' : '/opt/ccp4-devtools/install/lib'}
        rtn = ample_util.run_command(cmd, logfile,env = env)
        if rtn != 0:
            raise RuntimeError("Error running gesamt - check logfile: {0}".format(logfile))

        if purge_all:
            os.unlink(logfile)

        # Now loop through each file creating the matrix
        if metric == 'rmsd':
            parity = 0.0
        elif metric == 'qscore':
            parity = 1
        else:
            raise RuntimeError("Unrecognised metric: {0}".format(metric))

        #m = [[parity for _ in range(nmodels)] for _ in range(nmodels)]
        m = numpy.full([nmodels, nmodels], parity, dtype=numpy.float)
        for i, model in enumerate(models):
            mname = os.path.basename(model)
            gesamt_out = '{0}_gesamt.out'.format(mname)
            logfile = '{0}_gesamt.log'.format(mname)
            cmd = [self.executable, model, '-archive', garchive, '-o', gesamt_out]
            cmd += ['-nthreads={0}'.format(self.nproc)]
            rtn = ample_util.run_command(cmd, logfile)
            if rtn != 0:
                raise RuntimeError("Error running gesamt!")
            else:
                if purge: os.unlink(logfile)

            gdata = self._parse_gesamt_out(gesamt_out)
            assert gdata[0].file_name == mname, gdata[0].file_name + " " + mname
            score_dict = {g.file_name: (g.rmsd, g.q_score) for g in gdata}

            for j in range(i + 1, nmodels):
                # Try and get the rmsd and qscore for this model. If it's missing we assume the model was
                # too divergent for gesamt to find it and we set the rmsd and qscore to fixed values
                model2 = os.path.basename(models[j])
                try:
                    rmsd, qscore = score_dict[model2]
                except KeyError:
                    rmsd = RMSD_MAX
                    qscore = QSCORE_MIN
                if metric == 'rmsd':
                    score = rmsd
                elif metric == 'qscore':
                    score = qscore
                else:
                    raise RuntimeError("Unrecognised metric: {0}".format(metric))
                m[i, j] = score
            if purge_all:
                os.unlink(gesamt_out)

        # Copy upper half of matrix to lower
        i_lower = numpy.tril_indices(nmodels, -1)
        m[i_lower] = m.T[i_lower]  # make the matrix symmetric
        self.distance_matrix = m

        # Remove the gesamt archive
        if purge:
            shutil.rmtree(garchive)

        # Write out the matrix in a form spicker can use
        self.dump_pdb_matrix(SCORE_MATRIX_NAME)
        return
示例#40
0
    def cluster(
        self,
        models=None,
        num_clusters=None,
        nproc=1,
        score_type="rmsd",
        cluster_method="kmeans",
        work_dir=None,
        fpc_exe=None,
        max_cluster_size=200,
        benchmark=False,
    ):

        # FPC default if 5 clusters - we just run with this for the time being
        FPC_NUM_CLUSTERS = 5
        if num_clusters is None or num_clusters > FPC_NUM_CLUSTERS:
            msg = "Cannot work with more than {0} clusters, got: {1}.".format(
                FPC_NUM_CLUSTERS, num_clusters)
            raise RuntimeError(msg)

        owd = os.getcwd()
        if not os.path.isdir(work_dir):
            os.mkdir(work_dir)
        os.chdir(work_dir)

        if not len(models) or not all([os.path.isfile(m) for m in models]):
            msg = "Missing models: {0}".format(models)
            raise RuntimeError(msg)

        # Create list of files
        flist = 'files.list'
        with open(flist, 'w') as f:
            for m in models:
                f.write("{0}\n".format(os.path.abspath(m)))

        if not os.path.isfile(fpc_exe):
            msg = "Cannot find fast_protein_cluster executable: {0}".format(
                fpc_exe)
            raise RuntimeError(msg)

        # Build up the command-line
        cmd = [fpc_exe]
        if score_type == "rmsd":
            cmd += ['--rmsd']
        elif score_type == "tm":
            cmd += ['--tmscore']
        else:
            msg = "Unrecognised score_type: {0}".format(score_type)
            raise RuntimeError(msg)

        if cluster_method == "kmeans":
            cmd += ['--cluster_kmeans']
        elif cluster_method == "hcomplete":
            cmd += ['--cluster_hcomplete']
        else:
            msg = "Unrecognised cluster_method: {0}".format(cluster_method)
            raise RuntimeError(msg)

        if nproc > 1:
            cmd += ['--nthreads', str(nproc)]

        # Always save the distance matrix
        cmd += ['--write_text_matrix', 'matrix.txt']

        # For benchmark we use a constant seed to make sure we get the same results
        if benchmark:
            cmd += ['-S', '1']

        # Finally the list of files
        cmd += ['-i', flist]

        logfile = os.path.abspath("fast_protein_cluster.log")
        retcode = ample_util.run_command(cmd, logfile=logfile)
        if retcode != 0:
            msg = "non-zero return code for fast_protein_cluster in cluster!\nCheck logfile:{0}".format(
                logfile)
            raise RuntimeError(msg)

        cluster_list = 'cluster_output.clusters'
        cluster_stats = 'cluster_output.cluster.stats'
        if not os.path.isfile(cluster_list) or not os.path.isfile(
                cluster_stats):
            msg = "Cannot find files: {0} and {1}".format(
                cluster_list, cluster_stats)
            raise RuntimeError(msg)

        # Check stats and get centroids
        csizes = []
        centroids = []
        with open(cluster_stats) as f:
            for line in f:
                if line.startswith("Cluster:"):
                    fields = line.split()
                    csizes.append(int(fields[4]))
                    centroids.append(fields[7])

        if len(csizes) != FPC_NUM_CLUSTERS:
            msg = "Found {0} clusters in {1} but was expecting {2}".format(
                len(csizes), cluster_stats, FPC_NUM_CLUSTERS)
            raise RuntimeError(msg)

        all_clusters = [[] for i in range(FPC_NUM_CLUSTERS)]
        # Read in the clusters
        with open(cluster_list) as f:
            for line in f:
                fields = line.split()
                model = fields[0]
                idxCluster = int(fields[1])
                all_clusters[idxCluster].append(model)

        # Check
        if False:
            # Ignore this test for now as there seems to be a bug in fast_protein_cluster with the printing of sizes
            maxc = None
            for i, cs in enumerate(csizes):
                if not cs == len(all_clusters[i]):
                    msg = "Cluster {0} size {1} does not match stats size {2}".format(
                        i, len(all_clusters[i]), cs)
                    raise RuntimeError(msg)
                if i == 0:
                    maxc = cs
                else:
                    if cs > maxc:
                        msg = "Clusters do not appear to be in size order!"
                        raise RuntimeError(msg)

        # make sure all clusters are < max_cluster_size
        for i, c in enumerate(all_clusters):
            if len(c) > max_cluster_size:
                all_clusters[i] = c[:max_cluster_size]

        # Create the data - we loop through the number of clusters specified by the user
        clusters = []
        for i in range(num_clusters):
            cluster = Cluster()
            cluster.method = cluster_method
            cluster.score_type = score_type
            cluster.index = i + 1
            cluster.centroid = centroids[i]
            cluster.num_clusters = num_clusters
            cluster.models = all_clusters[i]
        os.chdir(owd)
        return clusters
示例#41
0
    def _cluster(self,
                 models,
                 run_dir=None,
                 score_type='rmsd',
                 score_matrix=None,
                 nproc=1):
        """
        Run spicker to cluster the models
        """
        owd = os.getcwd()
        if run_dir:
            self.run_dir = os.path.abspath(run_dir)
        if not self.run_dir:
            self.run_dir = os.path.join(owd, 'spicker')
        if not os.path.isdir(self.run_dir):
            os.mkdir(self.run_dir)
        os.chdir(self.run_dir)

        logger.debug(
            "Running spicker with score_type {0} in directory: {1}".format(
                score_type, self.run_dir))
        logger.debug("Using executable: {0} on {1} processors".format(
            self.spicker_exe, nproc))

        self.score_type = score_type
        self.create_input_files(models,
                                score_type=score_type,
                                score_matrix=score_matrix)

        # We need special care if we are running with tm scores as we will be using the OPENMP
        # version of spicker which requires increasing the stack size on linux and setting the
        # OMP_NUM_THREADS environment variable on all platforms
        # The stack size on 64-bit linux seems to be 15Mb, so I guess asking for 50 seems reasonable
        # I'm assuming that the limit is in bytes and specified by an integer so 50Mb -> 50000000
        preexec_fn = None
        env = {'OMP_NUM_THREADS': str(nproc)}
        if sys.platform.lower().startswith('linux'):

            def set_stack():
                import resource

                stack_bytes = 50000000  # 50Mb
                resource.setrlimit(resource.RLIMIT_STACK,
                                   (stack_bytes, stack_bytes))

            preexec_fn = set_stack

        logfile = os.path.abspath("spicker.log")
        rtn = ample_util.run_command([self.spicker_exe],
                                     logfile=logfile,
                                     env=env,
                                     preexec_fn=preexec_fn)
        if not rtn == 0:
            raise RuntimeError(
                "Error running spicker, check logfile: {0}".format(logfile))

        # Read the log and generate the results
        self.results = self.process_log()

        # Always go back to where we started
        os.chdir(owd)
        return
示例#42
0
def merge_mtz(mtz1_path, mtz1_labels, mtz2_path, mtz2_labels):
    """Create MTZ file with columns from the given mtz files and mtz labels in each file"""

    # Can't have any duplicates in file labels
    assert len(mtz1_labels) == len(set(mtz1_labels)),"Duplicate labels in mtz1_labels"
    assert len(mtz2_labels) == len(set(mtz2_labels)),"Duplicate labels in mtz2_labels"

    name1 = os.path.splitext(os.path.basename(mtz1_path))[0]
    name2 = os.path.splitext(os.path.basename(mtz2_path))[0]
    merged_mtz = os.path.abspath("{0}_{1}.mtz".format(name1, name2))

    cmd = [ 'cad', 'hklin1', mtz1_path, 'hklin2', mtz2_path, 'hklout', merged_mtz ]

    # See if any labels are duplicate and need to be renamed
    rename = [] # List of (File_number, file_label_idx, orig_label, renamed_label)
    labels = []
    for i, mtz in enumerate([mtz1_path, mtz2_path]):
        for j, label in enumerate([mtz1_labels, mtz2_labels][i]):
            if label in labels:
                newlabel = label + str(i+1)
                rename.append((i+1,j+1, label, newlabel))
            else:
                newlabel = label
                rename.append((i+1,j+1, label, None))
            assert newlabel not in labels, "Too many duplicate label names: {0}".format(newlabel)
            labels.append(newlabel)

    # Build up the list of which labels to extract from which files
    stdin = ""
    last_fileno = None
    for fileno, labelno, orig_label, rename_label in rename:
        if fileno != last_fileno:
            if last_fileno is not None:
                stdin += '\n' # Need to terminate the line
            stdin += "LABIN FILE {0}".format(fileno)
            last_fileno = fileno
        stdin += " E{0}={1}".format(labelno, orig_label)
    stdin += '\n' # Need to terminate the line

    # Do any renaming for duplicate labels
    last_fileno = None
    for i, (fileno, label_idx, orig_label, rename_label) in enumerate(rename):
        if rename_label is not None:
            if last_fileno != fileno:
                stdin += 'LABOUT FILE_NUMBER {0}'.format(fileno)
                if last_fileno is not None:
                    # for anything other then then first, we need to terminate this block
                    stdin += '\n'
                last_fileno = fileno
            if fileno == last_fileno:
                stdin += ' E{0}={1}'.format(label_idx,rename_label)

    if fileno is not None:
        stdin += '\n' # Add last linebreak as we have added a rename clause

    logfile = os.path.abspath("cad.log")
    retcode = ample_util.run_command(cmd=cmd, stdin=stdin, logfile=logfile)
    if retcode != 0:
        raise RuntimeError("Error running command: {0}\nCheck logfile: {1}".format(" ".join(cmd), logfile))
    else:
        os.unlink(logfile)

    return os.path.abspath(merged_mtz), labels
示例#43
0
def worker(inqueue, early_terminate=False, check_success=None):
    """Worker process to run MrBump jobs until no more left.

    This function keeps looping over the inqueue, removing jobs from the 
    inqueue until there are no more left. It checks if a jobs has succeeded
    and if so it will terminate.

    Parameters
    ----------
    inqueue : :obj:`Queue`
       A Python Queue object
    early_terminate : bool
       Terminate on first success or continue running
    check_success : callable
       A callable to check the success status of a job
    
    Warnings
    --------
    This needs to import the main module that it lives in so maybe this should
    live in a separate module?

    """
    if early_terminate:
        assert callable(check_success)

    success = True
    while True:
        if inqueue.empty():
            logger.debug("worker {0} got empty inqueue".format(
                multiprocessing.current_process().name))
            rcode = 0 if success else 1
            sys.exit(rcode)

        # Got a script so run
        job = inqueue.get()

        # Get name from script
        logger.debug("Worker {0} running job {1}".format(
            multiprocessing.current_process().name, job))
        directory, sname = os.path.split(job)
        jobname = os.path.splitext(sname)[0]

        # Change directory to the script directory
        os.chdir(directory)
        retcode = ample_util.run_command([job],
                                         logfile=jobname + ".log",
                                         dolog=False,
                                         check=True)

        # Can we use the retcode to check?
        # REM - is retcode object
        if retcode != 0:
            logger.warning("WARNING! Worker {0} got retcode {1}".format(
                multiprocessing.current_process().name, retcode))
            success = False

        # Now check the result if early terminate
        if early_terminate:
            if check_success(job):
                logger.debug("Worker {0} job succeeded".format(
                    multiprocessing.current_process().name))
                sys.exit(0)
示例#44
0
    def _generate_distance_matrix_generic(self, models, purge=True, purge_all=False, metric='qscore'):
        # Make sure all the files are in the same directory otherwise we wont' work
        mdir = os.path.dirname(models[0])
        if not all([ os.path.dirname(p) == mdir for p in models ]):
            raise RuntimeError("All pdb files are not in the same directory!")

        models = sorted(models)
        self.index2pdb = models
        nmodels = len(models)

        # Create list of pdb files
        fname = os.path.join(os.getcwd(), FILE_LIST_NAME)
        with open(fname, 'w') as f:
            f.write("\n".join(models) + "\n")

        # Make the archive
        logger.debug("Generating gesamt archive from models in directory %s", mdir)
        garchive = 'gesamt.archive'
        if not os.path.isdir(garchive): os.mkdir(garchive)
        logfile = os.path.abspath('gesamt_archive.log')
        cmd = [self.executable, '--make-archive', garchive, '-pdb', mdir]
        #cmd += [ '-nthreads=auto' ]
        cmd += ['-nthreads={0}'.format(self.nproc)]
        # HACK FOR DYLD!!!!
        env = None
        #env = {'DYLD_LIBRARY_PATH' : '/opt/ccp4-devtools/install/lib'}
        rtn = ample_util.run_command(cmd, logfile,env = env)
        if rtn != 0:
            raise RuntimeError("Error running gesamt - check logfile: {0}".format(logfile))

        if purge_all:
            os.unlink(logfile)

        # Now loop through each file creating the matrix
        if metric == 'rmsd':
            parity = 0.0
        elif metric == 'qscore':
            parity = 1
        else:
            raise RuntimeError("Unrecognised metric: {0}".format(metric))

        #m = [[parity for _ in range(nmodels)] for _ in range(nmodels)]
        m = numpy.full([nmodels, nmodels], parity, dtype=numpy.float)
        for i, model in enumerate(models):
            mname = os.path.basename(model)
            gesamt_out = '{0}_gesamt.out'.format(mname)
            logfile = '{0}_gesamt.log'.format(mname)
            cmd = [self.executable, model, '-archive', garchive, '-o', gesamt_out]
            cmd += ['-nthreads={0}'.format(self.nproc)]
            rtn = ample_util.run_command(cmd, logfile)
            if rtn != 0:
                raise RuntimeError("Error running gesamt!")
            else:
                if purge: os.unlink(logfile)

            gdata = self._parse_gesamt_out(gesamt_out)
            assert gdata[0].file_name == mname, gdata[0].file_name + " " + mname
            score_dict = {g.file_name: (g.rmsd, g.q_score) for g in gdata}

            for j in range(i + 1, nmodels):
                # Try and get the rmsd and qscore for this model. If it's missing we assume the model was
                # too divergent for gesamt to find it and we set the rmsd and qscore to fixed values
                model2 = os.path.basename(models[j])
                try:
                    rmsd, qscore = score_dict[model2]
                except KeyError:
                    rmsd = RMSD_MAX
                    qscore = QSCORE_MIN
                if metric == 'rmsd':
                    score = rmsd
                elif metric == 'qscore':
                    score = qscore
                else:
                    raise RuntimeError("Unrecognised metric: {0}".format(metric))
                m[i, j] = score
            if purge_all:
                os.unlink(gesamt_out)

        # Copy upper half of matrix to lower
        i_lower = numpy.tril_indices(nmodels, -1)
        m[i_lower] = m.T[i_lower]  # make the matrix symmetric
        self.distance_matrix = m

        # Remove the gesamt archive
        if purge:
            shutil.rmtree(garchive)

        # Write out the matrix in a form spicker can use
        self.dump_pdb_matrix(SCORE_MATRIX_NAME)
        return
示例#45
0
def merge_mtz(mtz1_path, mtz1_labels, mtz2_path, mtz2_labels):
    """Create MTZ file with columns from the given mtz files and mtz labels in each file"""

    # Can't have any duplicates in file labels
    assert len(mtz1_labels) == len(
        set(mtz1_labels)), "Duplicate labels in mtz1_labels"
    assert len(mtz2_labels) == len(
        set(mtz2_labels)), "Duplicate labels in mtz2_labels"

    name1 = os.path.splitext(os.path.basename(mtz1_path))[0]
    name2 = os.path.splitext(os.path.basename(mtz2_path))[0]
    merged_mtz = os.path.abspath("{0}_{1}.mtz".format(name1, name2))

    cmd = [
        'cad', 'hklin1', mtz1_path, 'hklin2', mtz2_path, 'hklout', merged_mtz
    ]

    # See if any labels are duplicate and need to be renamed
    rename = [
    ]  # List of (File_number, file_label_idx, orig_label, renamed_label)
    labels = []
    for i, mtz in enumerate([mtz1_path, mtz2_path]):
        for j, label in enumerate([mtz1_labels, mtz2_labels][i]):
            if label in labels:
                newlabel = label + str(i + 1)
                rename.append((i + 1, j + 1, label, newlabel))
            else:
                newlabel = label
                rename.append((i + 1, j + 1, label, None))
            assert newlabel not in labels, "Too many duplicate label names: {0}".format(
                newlabel)
            labels.append(newlabel)

    # Build up the list of which labels to extract from which files
    stdin = ""
    last_fileno = None
    for fileno, labelno, orig_label, rename_label in rename:
        if fileno != last_fileno:
            if last_fileno is not None:
                stdin += '\n'  # Need to terminate the line
            stdin += "LABIN FILE {0}".format(fileno)
            last_fileno = fileno
        stdin += " E{0}={1}".format(labelno, orig_label)
    stdin += '\n'  # Need to terminate the line

    # Do any renaming for duplicate labels
    last_fileno = None
    for i, (fileno, label_idx, orig_label, rename_label) in enumerate(rename):
        if rename_label is not None:
            if last_fileno != fileno:
                stdin += 'LABOUT FILE_NUMBER {0}'.format(fileno)
                if last_fileno is not None:
                    # for anything other then then first, we need to terminate this block
                    stdin += '\n'
                last_fileno = fileno
            if fileno == last_fileno:
                stdin += ' E{0}={1}'.format(label_idx, rename_label)

    if fileno is not None:
        stdin += '\n'  # Add last linebreak as we have added a rename clause

    logfile = os.path.abspath("cad.log")
    retcode = ample_util.run_command(cmd=cmd, stdin=stdin, logfile=logfile)
    if retcode != 0:
        raise RuntimeError, "Error running command: {0}\nCheck logfile: {1}".format(
            " ".join(cmd), logfile)

    return os.path.abspath(merged_mtz), labels
示例#46
0
    def superpose_models(self,
                         models,
                         work_dir=None,
                         basename='theseus',
                         homologs=False,
                         alignment_file=None):
        """Superpose models and return the ensemble. Also set superposed_models and var_by_res variables.
        
        This also sets the `superposed_models` and `var_by_res` parameters.

        Parameters
        ----------
        models : :obj:`list`
            List of pdb files to be superposed.
        work_dir: str
            The directory to run theseus in and generate all the output files
        basename : str
            The stem that will be used to name all files
        homologs : bool
            True if the pdbs are homologous models as opposed to ab initio ones
        alignment_file : str
            An externally generated alignment file for homolgous models in FASTA format
            
        Returns
        -------
        superposed_models : a pdb file containing an ensemble of the superposed models 
        
        """
        self._set_work_dir(work_dir)
        if homologs:
            # Theseus expects all the models to be in the directory that it is run in as the string given in
            # the fasta header is used to construct the file names of the aligned pdb files. If a full or
            # relative path is given (e.g. /foo/bar.pdb), it tries to create files called "basename_/foo/bar.pdb"
            # We therefore copy the models in and then delete them afterwards
            if not alignment_file: alignment_file = self.alignment_file(models)
            copy_models = [
                os.path.join(self.work_dir, os.path.basename(m))
                for m in models
            ]
            for orig, copy in zip(models, copy_models):
                shutil.copy(orig, copy)
            models = copy_models

        # -Z included so we don't line the models up to the principle axis and -o so that they all line
        # up with the first model
        #cmd = [ self.theseus_exe, '-a0', '-r', basename ]
        cmd = [
            self.theseus_exe, '-a0', '-r', basename, '-Z', '-o',
            os.path.basename(models[0])
        ]
        if homologs:
            cmd += ['-A', alignment_file]
            cmd += [os.path.basename(m) for m in models]
        else:
            # Not sure why we had relpath - fails some of the tests so changing
            #cmd += [ os.path.relpath(m,self.work_dir) for m in models ]
            cmd += models

        self.theseus_log = os.path.join(self.work_dir,
                                        "tlog_{0}.log".format(basename))
        retcode = ample_util.run_command(cmd,
                                         logfile=self.theseus_log,
                                         directory=self.work_dir)
        if retcode != 0:
            raise RuntimeError(
                "non-zero return code for theseus in superpose_models!\n See log: {0}"
                .format(self.theseus_log))

        self.variance_log = os.path.join(self.work_dir,
                                         '{0}_variances.txt'.format(basename))
        self.superposed_models = os.path.join(self.work_dir,
                                              '{0}_sup.pdb'.format(basename))
        if homologs:
            # Horrible - need to rename the models so that they match the names in the alignment file
            self.aligned_models = []
            for m in copy_models:
                mb = os.path.basename(m)
                aligned_model = os.path.join(self.work_dir,
                                             "{0}_{1}".format(basename, mb))
                os.unlink(m)
                os.rename(aligned_model, os.path.join(self.work_dir, mb))
                self.aligned_models.append(mb)

        # Set the variances
        self.var_by_res = self.parse_variances()
        return self.superposed_models
示例#47
0
def on_cluster():
    try:
        retcode = ample_util.run_command(["qstat"])
    except:
        retcode = -1
    return True if retcode == 0 else False