示例#1
0
def aniso_correct(solution_id, mtzin, i, sigi, fp, sigfp, logfile):
    from fragon.place import CallbackObject
    input = phaser.InputMR_DAT()
    input.setHKLI(mtzin)
    if i is not None and sigi is not None:
        input.setLABI_I_SIGI(i, sigi)
    else:
        input.setLABI_F_SIGF(fp, sigfp)
    input.setMUTE(True)
    data = phaser.runMR_DAT(input)
    with open(logfile, 'w') as aniso_log:
        print(data.logfile(), file=aniso_log)
    mtzout = solution_id + '.aniso'
    input = phaser.InputANO()
    input.setSPAC_HALL(data.getSpaceGroupHall())
    input.setCELL6(data.getUnitCell())
    input.setREFL_DATA(data.getDATA())
    input.setHKLI(mtzin)
    input.setROOT(mtzout)
    input.setMUTE(True)
    aniso = phaser.runANO(input)
    with open(logfile, 'a') as aniso_log:
        print(aniso.logfile(), file=aniso_log)
    if data.Success():
        success = True
    else:
        log.critical('Job exit status FAILURE')
        log.critical('%s ERROR : %s' % (data.ErrorName(), data.ErrorMessage()))
示例#2
0
 def preprocess(self):
     import phaser
     if self.verbose:
         self.logger.debug('RunPhaser::preprocess')
     #try:
     #Read the dataset
     i = phaser.InputMR_DAT()
     i.setHKLI(self.datafile)
     #f = 'F'
     #sigf = 'SIGF'
     i.setLABI_F_SIGF('F', 'SIGF')
     i.setMUTE(True)
     r = phaser.runMR_DAT(i)
     if r.Success():
         for i in range(2):
             print self.process(r)
示例#3
0
def prepare_data(mtzin, i, sigi, fp, sigfp, logfile):
  input = phaser.InputMR_DAT()
  input.setHKLI(mtzin)
  if i is not None and sigi is not None:
    input.setLABI_I_SIGI(i, sigi)
  else:
    input.setLABI_F_SIGF(fp, sigfp)
  input.setMUTE(True)
  data = phaser.runMR_DAT(input)
  with open(logfile, 'w') as data_log:
    print(data.logfile(), file=data_log)
  if data.Success():
    return data
  else:
    log.critical('Job exit status FAILURE')
    log.critical('%s ERROR : %s' % (data.ErrorName(), data.ErrorMessage()))
示例#4
0
 def preprocess(self):
   import phaser
   if self.verbose:
     self.logger.debug('RunPhaser::preprocess')
   #try:
    #Read the dataset
   i = phaser.InputMR_DAT()
   i.setHKLI(self.datafile)
   #f = 'F'
   #sigf = 'SIGF'
   i.setLABI_F_SIGF('F','SIGF')
   i.setMUTE(True)
   r = phaser.runMR_DAT(i)
   if r.Success():
     for i in range(2):
       print self.process(r)
示例#5
0
    def run(self):
        """Function to run rotation search using PHASER"""

        current_work_dir = os.getcwd()
        if os.path.exists(self.work_dir):
            os.chdir(self.work_dir)
        else:
            os.makedirs(self.work_dir)
            os.chdir(self.work_dir)

        i = InputMR_DAT()
        i.setHKLI(self.hklin)

        if self.hires:
            i.setHIRES(self.hires)
        if self.i != "None" and self.sigi != "None":
            i.setLABI_I_SIGI(self.i, self.sigi)
        elif self.f != "None" and self.sigf != "None":
            i.setLABI_F_SIGF(self.f, self.sigf)
        else:
            msg = "No flags for intensities or amplitudes have been provided"
            raise RuntimeError(msg)
        i.setMUTE(True)
        run_mr_data = runMR_DAT(i)

        if run_mr_data.Success():
            i = InputMR_FRF()
            i.setJOBS(1)
            i.setREFL_DATA(run_mr_data.getREFL_DATA())
            i.setSPAC_HALL(run_mr_data.getSpaceGroupHall())
            i.setCELL6(run_mr_data.getUnitCell())
            i.setROOT("phaser_mr_output")
            i.addENSE_PDB_ID("PDB", self.pdbin, float(self.eid))
            i.setENSE_DISA_CHEC('PDB', True)
            i.setCOMP_BY("SOLVENT")
            i.setCOMP_PERC(self.solvent)
            i.addSEAR_ENSE_NUM('PDB', self.nmol)
            i.setRFAC_USE(False)
            if self.timeout != 0:
                i.setKILL_TIME(self.timeout)
            run_mr_rot = runMR_FRF(i)

            with open(self.logfile, 'w') as f:
                f.write(run_mr_rot.summary())

        os.chdir(current_work_dir)
    def run(self):
        """Function to run rotation search using PHASER"""

        current_work_dir = os.getcwd()
        if os.path.exists(self.work_dir):
            os.chdir(self.work_dir)
        else:
            os.makedirs(self.work_dir)
            os.chdir(self.work_dir)

        i = InputMR_DAT()
        i.setHKLI(self.hklin)

        if self.hires:
            i.setHIRES(self.hires)
        if self.i != "None" and self.sigi != "None":
            i.setLABI_I_SIGI(self.i, self.sigi)
        elif self.f != "None" and self.sigf != "None":
            i.setLABI_F_SIGF(self.f, self.sigf)
        else:
            msg = "No flags for intensities or amplitudes have been provided"
            raise RuntimeError(msg)
        i.setMUTE(True)
        run_mr_data = runMR_DAT(i)

        if run_mr_data.Success():
            i = InputMR_FRF()
            i.setJOBS(1)
            i.setREFL_DATA(run_mr_data.getREFL_DATA())
            i.setSPAC_HALL(run_mr_data.getSpaceGroupHall())
            i.setCELL6(run_mr_data.getUnitCell())
            i.setROOT("phaser_mr_output")
            i.addENSE_PDB_ID("PDB", self.pdbin, 0.7)
            i.setCOMP_BY("SOLVENT")
            i.setCOMP_PERC(self.solvent)
            i.addSEAR_ENSE_NUM('PDB', self.nmol)
            i.setRFAC_USE(False)
            if self.timeout != 0:
                i.setKILL_TIME(self.timeout)
            run_mr_rot = runMR_FRF(i)

            with open(self.logfile, 'w') as f:
                f.write(run_mr_rot.summary())

        os.chdir(current_work_dir)
示例#7
0
    def run(self,
            models_dir,
            nproc=2,
            shres=3.0,
            pklim=0.5,
            npic=50,
            rotastep=1.0,
            min_solvent_content=20,
            submit_nproc=None,
            submit_qtype=None,
            submit_queue=None,
            monitor=None,
            chunk_size=0,
            **kwargs):
        """Run amore rotation function on a directory of models

        Parameters
        ----------
        models_dir : str
            The directory containing the models to run the rotation search on
        nproc : int, optional
            The number of processors to run the job on
        shres : int, float, optional
            Spherical harmonic resolution [default 3.0]
        pklim : int, float, optional
            Peak limit, output all peaks above <float> [default: 0.5]
        npic : int, optional
            Number of peaks to output from the translation function map for each orientation [default: 50]
        rotastep : int, float, optional
            Size of rotation step [default : 1.0]
        min_solvent_content : int, float, optional
            The minimum solvent content present in the unit cell with the input model [default: 30]
        submit_nproc : int
            The number of processors to use on the head node when creating submission scripts on a cluster [default: 1]
        submit_qtype : str
            The cluster submission queue type - currently support SGE and LSF
        submit_queue : str
            The queue to submit to on the cluster
        monitor
        chunk_size : int, optional
            The number of jobs to submit at the same time

        Returns
        -------
        file
            log file for each model in the models_dir

        """
        self.shres = shres
        self.pklim = pklim
        self.npic = npic
        self.rotastep = rotastep

        self.submit_qtype = submit_qtype
        self.submit_queue = submit_queue

        self.simbad_dat_files = simbad.db.find_simbad_dat_files(models_dir)

        mtz_labels = simbad.util.mtz_util.GetLabels(self.mtz)

        i = InputMR_DAT()
        i.setHKLI(self.mtz)
        i.setLABI_F_SIGF(mtz_labels.f, mtz_labels.sigf)
        i.setMUTE(True)
        run_mr_data = runMR_DAT(i)

        sg = run_mr_data.getSpaceGroupName().replace(" ", "")
        cell = " ".join(map(str, run_mr_data.getUnitCell()))

        sol_calc = simbad.util.matthews_prob.SolventContent(cell, sg)

        dir_name = "simbad-tmp-" + str(uuid.uuid1())
        self.script_log_dir = os.path.join(self.work_dir, dir_name)
        os.mkdir(self.script_log_dir)

        self.hklpck0 = self._generate_hklpck0()

        self.ccp4_scr = os.environ["CCP4_SCR"]
        default_tmp_dir = os.path.join(self.work_dir, 'tmp')
        if self.tmp_dir:
            self.template_tmp_dir = os.path.join(self.tmp_dir,
                                                 dir_name + "-{0}")
        else:
            self.template_tmp_dir = os.path.join(default_tmp_dir,
                                                 dir_name + "-{0}")

        predicted_molecular_weight = 0
        if run_mr_data.Success():
            i = InputCCA()
            i.setSPAC_HALL(run_mr_data.getSpaceGroupHall())
            i.setCELL6(run_mr_data.getUnitCell())
            i.setMUTE(True)
            run_cca = runCCA(i)

            if run_cca.Success():
                predicted_molecular_weight = run_cca.getAssemblyMW()

        dat_models = []
        for dat_model in self.simbad_dat_files:
            name = os.path.basename(dat_model.replace(".dat", ""))
            pdb_struct = simbad.util.pdb_util.PdbStructure()
            pdb_struct.from_file(dat_model)
            try:
                solvent_content = sol_calc.calculate_from_struct(pdb_struct)
                if solvent_content < min_solvent_content:
                    msg = "Skipping %s: solvent content is predicted to be less than %.2f"
                    logger.debug(msg, name, min_solvent_content)
                    continue
            except ValueError:
                msg = "Skipping %s: Error calculating solvent content"
                logger.debug(msg, name)
                continue
            except IndexError:
                msg = "Skipping %s: Problem with dat file"
                logger.debug(msg, name)
                continue

            x, y, z, intrad = pdb_struct.integration_box
            model_molecular_weight = pdb_struct.molecular_weight
            mw_diff = abs(predicted_molecular_weight - model_molecular_weight)

            info = simbad.core.dat_score.DatModelScore(name, dat_model,
                                                       mw_diff, x, y, z,
                                                       intrad, solvent_content,
                                                       None)
            dat_models.append(info)

        sorted_dat_models = sorted(dat_models,
                                   key=lambda x: float(x.mw_diff),
                                   reverse=False)
        n_files = len(sorted_dat_models)
        chunk_size = simbad.rotsearch.get_chunk_size(n_files, chunk_size)
        total_chunk_cycles = simbad.rotsearch.get_total_chunk_cycles(
            n_files, chunk_size)

        if submit_qtype == 'local':
            processes = nproc
        else:
            processes = submit_nproc

        results = []
        iteration_range = range(0, n_files, chunk_size)
        for cycle, i in enumerate(iteration_range):
            logger.info("Working on chunk %d out of %d", cycle + 1,
                        total_chunk_cycles)

            if self.solution:
                logger.info(
                    "Early termination criteria met, skipping chunk %d",
                    cycle + 1)
                continue

            collector = ScriptCollector(None)
            amore_files = []
            with pool.Pool(processes=processes) as p:
                [(collector.add(i[0]), amore_files.append(i[1]))
                 for i in p.map(self, sorted_dat_models[i:i + chunk_size])
                 if i is not None]

            if len(collector.scripts) > 0:
                logger.info("Running AMORE tab/rot functions")
                amore_logs, dat_models = zip(*amore_files)
                simbad.util.submit_chunk(collector, self.script_log_dir, nproc,
                                         'simbad_amore', submit_qtype,
                                         submit_queue, True, monitor,
                                         self.rot_succeeded_log)

                for dat_model, amore_log in zip(dat_models, amore_logs):
                    base = os.path.basename(amore_log)
                    pdb_code = base.replace("amore_", "").replace(".log", "")
                    try:
                        rotsearch_parser = simbad.parsers.rotsearch_parser.AmoreRotsearchParser(
                            amore_log)
                        score = simbad.core.amore_score.AmoreRotationScore(
                            pdb_code, dat_model, rotsearch_parser.alpha,
                            rotsearch_parser.beta, rotsearch_parser.gamma,
                            rotsearch_parser.cc_f, rotsearch_parser.rf_f,
                            rotsearch_parser.cc_i, rotsearch_parser.cc_p,
                            rotsearch_parser.icp,
                            rotsearch_parser.cc_f_z_score,
                            rotsearch_parser.cc_p_z_score,
                            rotsearch_parser.num_of_rot)
                        if rotsearch_parser.cc_f_z_score:
                            results += [score]
                    except IOError:
                        pass

            else:
                logger.critical("No structures to be trialled")

        self._search_results = results
        shutil.rmtree(self.script_log_dir)

        if os.path.isdir(default_tmp_dir):
            shutil.rmtree(default_tmp_dir)
示例#8
0
    def run(self):
        """Function to run molecular replacement using PHASER

        Returns
        -------
        file
            Output pdb file
        file
            Output log file
        """

        # Make a note of the current working directory
        current_work_dir = os.getcwd()

        # Change to the PHASER working directory
        if os.path.exists(self.work_dir):
            os.chdir(self.work_dir)
        else:
            os.makedirs(self.work_dir)
            os.chdir(self.work_dir)

        # Copy hklin and pdbin to working dire for efficient running of PHASER
        hklin = os.path.join(self.work_dir, os.path.basename(self.hklin))
        shutil.copyfile(self.hklin, hklin)
        pdbin = os.path.join(self.work_dir, os.path.basename(self.pdbin))
        shutil.copyfile(self.pdbin, pdbin)

        i = InputMR_DAT()
        i.setHKLI(hklin)

        if self.hires:
            i.setHIRES(self.hires)
        if self.autohigh:
            i.setRESO_AUTO_HIGH(self.autohigh)
        if self.i != "None" and self.sigi != "None":
            i.setLABI_I_SIGI(self.i, self.sigi)
        elif self.f != "None" and self.sigf != "None":
            i.setLABI_F_SIGF(self.f, self.sigf)
        else:
            msg = "No flags for intensities or amplitudes have been provided"
            raise RuntimeError(msg)
        i.setSGAL_SELE(SGAlternatives[self.sgalternative].value)
        i.setMUTE(True)
        r = runMR_DAT(i)

        if r.Success():
            i = InputMR_AUTO()
            i.setJOBS(1)
            i.setREFL_DATA(r.getREFL_DATA())
            i.setROOT("phaser_mr_output")
            i.addENSE_PDB_ID("PDB", pdbin, 0.7)
            i.setENSE_DISA_CHEC('PDB', True)
            i.setCOMP_BY("SOLVENT")
            i.setCOMP_PERC(self.solvent)
            # nmol set to one for testing
            i.addSEAR_ENSE_NUM('PDB', 1)
            i.setSGAL_SELE(SGAlternatives[self.sgalternative].value)
            if self.timeout != 0:
                i.setKILL_TIME(self.timeout)
            i.setMUTE(True)
            del(r)
            r = runMR_AUTO(i)

            with open(self.logfile, 'w') as f:
                f.write(r.summary())

            shutil.move(r.getTopPdbFile(), self.pdbout)

            # Output original mtz with a change of basis if needed
            original_space_group, _, _ = mtz_util.crystal_data(self.hklin)
            space_group, _, _ = mtz_util.crystal_data(r.getTopMtzFile())
            if original_space_group != space_group:
                mtz_util.reindex(self.hklin, self.hklout, space_group)
            else:
                shutil.copyfile(self.hklin, self.hklout)

        # Return to original working directory
        os.chdir(current_work_dir)

        # Delete any files copied across
        if os.path.isfile(os.path.join(self.work_dir, os.path.basename(self.hklin))):
            os.remove(os.path.join(self.work_dir, os.path.basename(self.hklin)))
        if os.path.isfile(os.path.join(self.work_dir, os.path.basename(self.pdbin))):
            os.remove(os.path.join(self.work_dir, os.path.basename(self.pdbin)))
示例#9
0
文件: rapd_phaser.py 项目: RAPD/RAPD
def run_phaser_module_OLD(datafile, inp=False):
    """
    Run separate module of Phaser to get results before running full job.
    Setup so that I can read the data in once and run multiple modules.
    """
    # if self.verbose:
    #  self.logger.debug('Utilities::runPhaserModule')

    target_resolution = 0.0
    z = 0
    solvent_content = 0.0

    def run_ellg():
        res0 = 0.0
        i0 = phaser.InputMR_ELLG()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setMUTE(True)
        i0.setREFL_DATA(r.getDATA())
        if f[-3:] in ('cif'):
            i0.addENSE_CIT_ID('model', convert_unicode(f), 0.7)
        else:
            i0.addENSE_PDB_ID("model", convert_unicode(f), 0.7)
        # i.addSEAR_ENSE_NUM("junk",5)
        r1 = phaser.runMR_ELLG(i0)
        #print r1.logfile()
        if r1.Success():
            res0 = r1.get_target_resolution('model')
        del(r1)
        return res0

    def run_cca():
        z0 = 0
        sc0 = 0.0
        i0 = phaser.InputCCA()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setMUTE(True)
        # Have to set high res limit!!
        i0.setRESO_HIGH(res0)
        if np > 0:
            i0.addCOMP_PROT_NRES_NUM(np, 1)
        if na > 0:
            i0.addCOMP_NUCL_NRES_NUM(na, 1)
        r1 = phaser.runCCA(i0)
        #print r1.logfile()
        if r1.Success():
            z0 = r1.getBestZ()
            sc0 = 1-(1.23/r1.getBestVM())
        del(r1)
        return (z0, sc0)

    def run_ncs():
        i0 = phaser.InputNCS()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setREFL_DATA(r.getDATA())
        # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF())
        # i0.setLABI_F_SIGF(f,sigf)
        i0.setMUTE(True)
        # i0.setVERB(True)
        r1 = phaser.runNCS(i0)
        print r1.logfile()
        print r1.loggraph().size()
        print r1.loggraph().__dict__.keys()
        #print r1.getCentricE4()
        if r1.Success():
            return(r1)

    def run_ano():
        #from cStringIO import StringIO
        i0 = phaser.InputANO()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        # i0.setREFL(p.getMiller(),p.getF(),p.getSIGF())
        # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF())
        # i0.setREFL_F_SIGF(p.getMiller(),p.getIobs(),p.getSigIobs())
        i0.setREFL_DATA(r.getDATA())
        i0.setMUTE(True)
        r1 = phaser.runANO(i0)
        print r1.loggraph().__dict__.keys()
        print r1.loggraph().size()
        print r1.logfile()
        """
        o = phaser.Output()
        redirect_str = StringIO()
        o.setPackagePhenix(file_object=redirect_str)
        r1 = phaser.runANO(i0,o)
        """

        if r1.Success():
            print 'SUCCESS'
            return(r1)

    # Setup which modules are run
    matthews = False
    if inp:
        ellg = True
        ncs = False
        if type(inp) == str:
            f = inp
        else:
            np, na, res0, f = inp
            matthews = True
    else:
        ellg = False
        ncs = True

    # Read the dataset
    i = phaser.InputMR_DAT()
    i.setHKLI(convert_unicode(datafile))
    i.setLABI_F_SIGF('F', 'SIGF')
    i.setMUTE(True)
    r = phaser.runMR_DAT(i)
    if r.Success():
        if ellg:
            target_resolution = run_ellg()
        if matthews:
            z, solvent_content = run_cca()
        if ncs:
            n = run_ncs()
    if matthews:
        # Assumes ellg is run as well.
        # return (z,sc,res)
        return {"z": z,
                "solvent_content": solvent_content,
                "target_resolution": target_resolution}
    elif ellg:
        # ellg run by itself
        # return target_resolution
        return {"target_resolution": target_resolution}
    else:
        # NCS
        return n
示例#10
0
def run_phaser(
    data_file,
    struct_file,
    spacegroup,
    db_settings=False,
    tag=False,
    work_dir=False,
    adf=False,
    name=False,
    ncopy=1,
    cell_analysis=False,
    resolution=False,
    full=False,
):
    """
    Run Phaser and passes results back to RAPD Redis DB
    **Requires Phaser src code!**

    data_file - input data as mtz (required)
    struct_file - input search model path in mmCIF or PDB format (required)
    spacegroup - The space group to run MR (required)

    tag - a Redis key where the results are sent (cluster mode)
    db_settings - Redis connection settings for sending results (cluster mode)
    work_dir - working directory (defaults to current working dir)
    name - root name for output files (defaults to spacegroup)
    ncopy - number of molecules to search for
    cell_analysis - internal RAPD signal so all possible SG's are searched
    resolution - high res limit to run MR (float)
    full - signal to run more comprehensive MR
    """

    phaser_log = False
    # Change to work_dir
    if not work_dir:
        work_dir = os.getcwd()
    os.chdir(work_dir)

    if not name:
        name = spacegroup

    # # Handle CIF file input -> PDB
    # if struct_file[-3:] == "cif":
    #     pdb.cif_as_pdb(struct_file)
    #     struct_file = struct_file.replace(".cif", ".pdb")

    # Read the dataset
    i = phaser.InputMR_DAT()
    i.setHKLI(convert_unicode(data_file))
    i.setLABI_F_SIGF('F', 'SIGF')
    i.setMUTE(True)
    r1 = phaser.runMR_DAT(i)
    # Need to determine Phaser version for keyword changes!
    version = re.search(r'Version:\s*([\d.]+)', r1.logfile()).group(1)

    if r1.Success():
        i = phaser.InputMR_AUTO()
        # i.setREFL_DATA(r1.getREFL_DATA())
        # i.setREFL_DATA(r1.DATA_REFL())
        i.setREFL_F_SIGF(r1.getMiller(), r1.getFobs(), r1.getSigFobs())
        i.setCELL6(r1.getUnitCell())
        if struct_file[-3:].lower() == "cif":
            #i.addENSE_CIF_ID('model', cif, 0.7)
            ### Typo in PHASER CODE!!! <<<CIT>>> ###
            i.addENSE_CIT_ID('model', convert_unicode(struct_file), 0.7)
        else:
            i.addENSE_PDB_ID('model', convert_unicode(struct_file), 0.7)
        i.addSEAR_ENSE_NUM("model", ncopy)
        i.setSPAC_NAME(spacegroup)
        if cell_analysis:
            i.setSGAL_SELE("ALL")
            # Set it for worst case in orth
            # number of processes to run in parallel where possible
            i.setJOBS(1)
        else:
            i.setSGAL_SELE("NONE")
        if full:
            # Picks own resolution
            # Round 2, pick best solution as long as less that 10% clashes
            i.setPACK_SELE("PERCENT")
            i.setPACK_CUTO(0.1)
            #command += "PACK CUTOFF 10\n"
        else:
            # For first round and cell analysis
            # Only set the resolution limit in the first round or cell analysis.
            if resolution:
                i.setRESO_HIGH(resolution)
            else:
                i.setRESO_HIGH(6.0)
            # If Phaser version < 2.6.0
            if int(version.split('.')[1]) <= 6:
                i.setSEAR_DEEP(False)
            else:
                i.setSEAR_METH("FAST")

            # Don"t seem to work since it picks the high res limit now.
            # Get an error when it prunes all the solutions away and TF has no input.
            # command += "PEAKS ROT SELECT SIGMA CUTOFF 4.0\n"
            # command += "PEAKS TRA SELECT SIGMA CUTOFF 6.0\n"
        # Turn off pruning in 2.6.0
        i.setSEAR_PRUN(False)
        # Choose more top peaks to help with getting it correct.
        i.setPURG_ROTA_ENAB(True)
        i.setPURG_ROTA_NUMB(3)
        #command += "PURGE ROT ENABLE ON\nPURGE ROT NUMBER 3\n"
        i.setPURG_TRAN_ENAB(True)
        i.setPURG_TRAN_NUMB(1)
        #command += "PURGE TRA ENABLE ON\nPURGE TRA NUMBER 1\n"

        # Only keep the top after refinement.
        i.setPURG_RNP_ENAB(True)
        i.setPURG_RNP_NUMB(1)
        #command += "PURGE RNP ENABLE ON\nPURGE RNP NUMBER 1\n"
        i.setROOT(convert_unicode(name))
        # i.setMUTE(False)
        i.setMUTE(True)
        # Delete the setup results
        # del(r)
        # launch the run
        # r = phaser.runMR_AUTO(i)

        try:
            r = phaser.runMR_AUTO(i)
        except RuntimeError as e:
            # print "Hit error"
            # Known CIF error - convert to pdb and retry
            if struct_file[-3:] in ('cif', ):
                # print "Convert to pdb"
                pdb.cif_as_pdb((struct_file, ))
                pdb_file = struct_file.replace(".cif", ".pdb")

                i = phaser.InputMR_AUTO()
                # i.setREFL_DATA(r1.getREFL_DATA())
                # i.setREFL_DATA(r1.DATA_REFL())
                i.setREFL_F_SIGF(r1.getMiller(), r1.getFobs(), r1.getSigFobs())
                i.setCELL6(r1.getUnitCell())
                i.addENSE_PDB_ID('model', convert_unicode(pdb_file), 0.7)
                i.addSEAR_ENSE_NUM("model", ncopy)
                i.setSPAC_NAME(spacegroup)
                if cell_analysis:
                    i.setSGAL_SELE("ALL")
                    # Set it for worst case in orth
                    # number of processes to run in parallel where possible
                    i.setJOBS(1)
                else:
                    i.setSGAL_SELE("NONE")
                if full:
                    # Picks own resolution
                    # Round 2, pick best solution as long as less that 10% clashes
                    i.setPACK_SELE("PERCENT")
                    i.setPACK_CUTO(0.1)
                    #command += "PACK CUTOFF 10\n"
                else:
                    # For first round and cell analysis
                    # Only set the resolution limit in the first round or cell analysis.
                    if resolution:
                        i.setRESO_HIGH(resolution)
                    else:
                        i.setRESO_HIGH(6.0)
                    # If Phaser version < 2.6.0
                    if int(version.split('.')[1]) <= 6:
                        i.setSEAR_DEEP(False)
                    else:
                        i.setSEAR_METH("FAST")

                    # Don"t seem to work since it picks the high res limit now.
                    # Get an error when it prunes all the solutions away and TF has no input.
                    # command += "PEAKS ROT SELECT SIGMA CUTOFF 4.0\n"
                    # command += "PEAKS TRA SELECT SIGMA CUTOFF 6.0\n"
                # Turn off pruning in 2.6.0
                i.setSEAR_PRUN(False)
                # Choose more top peaks to help with getting it correct.
                i.setPURG_ROTA_ENAB(True)
                i.setPURG_ROTA_NUMB(3)
                #command += "PURGE ROT ENABLE ON\nPURGE ROT NUMBER 3\n"
                i.setPURG_TRAN_ENAB(True)
                i.setPURG_TRAN_NUMB(1)
                #command += "PURGE TRA ENABLE ON\nPURGE TRA NUMBER 1\n"

                # Only keep the top after refinement.
                i.setPURG_RNP_ENAB(True)
                i.setPURG_RNP_NUMB(1)
                #command += "PURGE RNP ENABLE ON\nPURGE RNP NUMBER 1\n"
                i.setROOT(convert_unicode(name))
                # i.setMUTE(False)
                i.setMUTE(True)
                # Delete the setup results
                # del(r)
                # launch the run
                r = phaser.runMR_AUTO(i)
            else:
                raise e

        if r.Success():
            # print r
            pass
            #if r.foundSolutions():
            #print "Phaser has found MR solutions"
            #print "Top LLG = %f" % r.getTopLLG()
            #print "Top PDB file = %s" % r.getTopPdbFile()
            #else:
            #print "Phaser has not found any MR solutions"
        else:
            print "Job exit status FAILURE"
            print r.ErrorName(), "ERROR :", r.ErrorMessage()

        # Save log files for debugging
        phaser_log = r.logfile()
        with open('phaser.log', 'w') as log:
            log.write(r.logfile())
            log.close()

        if r.foundSolutions():
            rfz = None
            tfz = None
            tncs = False
            # Parse results
            for p in r.getTopSet().ANNOTATION.split():
                # print p
                # For v 2.8.3
                # RF*0\nTF*0\nLLG=30699\nTFZ==174.8\nPAK=0\nLLG=30699\nTFZ==174.8\n
                if p.count('RFZ'):
                    if p.count('=') in [1]:
                        rfz = float(p.split('=')[-1])
                if p.count('RF*0'):
                    rfz = "NC"
                if p.count('TFZ'):
                    if p.count('=') in [1]:
                        tfz = p.split('=')[-1]
                        if tfz == '*':
                            tfz = 'arbitrary'
                        else:
                            tfz = float(tfz)
                if p.count('TF*0'):
                    tfz = "NC"
            tncs_test = [
                1 for line in r.getTopSet().unparse().splitlines()
                if line.count("+TNCS")
            ]
            tncs = bool(len(tncs_test))
            mtz_file = os.path.join(work_dir, r.getTopMtzFile())
            phaser_result = {
                "ID": name,
                "solution": r.foundSolutions(),
                "pdb_file": os.path.join(work_dir, r.getTopPdbFile()),
                "mtz": mtz_file,
                "gain": float(r.getTopLLG()),
                "rfz": rfz,
                # "tfz": r.getTopTFZ(),
                "tfz": tfz,
                "clash": r.getTopSet().PAK,
                "dir": os.getcwd(),
                "spacegroup":
                r.getTopSet().getSpaceGroupName().replace(' ', ''),
                "tNCS": tncs,
                "nmol": r.getTopSet().NUM,
                "adf": None,
                "peak": None,
            }

            # Calculate 2Fo-Fc & Fo-Fc maps
            # foo.mtz begets foo_2mFo-DFc.ccp4 & foo__mFo-DFc.ccp4
            local_subprocess(command="phenix.mtz2map %s" % mtz_file,
                             logfile='map.log',
                             shell=True)

            # Map files should now exist
            map_2_1 = mtz_file.replace(".mtz", "_2mFo-DFc.ccp4")
            map_1_1 = mtz_file.replace(".mtz", "_mFo-DFc.ccp4")

            # Make sure the maps exist and then package them
            if os.path.exists(map_2_1):
                # Compress the map
                arch_prod_file, arch_prod_hash = archive.compress_file(map_2_1)
                # Remove the map that was compressed
                os.unlink(map_2_1)
                # Store information
                map_for_display = {
                    "path": arch_prod_file,
                    "hash": arch_prod_hash,
                    "description": "map_2_1"
                }
                phaser_result["map_2_1"] = map_for_display

            if os.path.exists(map_1_1):
                # Compress the map
                arch_prod_file, arch_prod_hash = archive.compress_file(map_1_1)
                # Remove the map that was compressed
                os.unlink(map_1_1)
                # Store information
                map_for_display = {
                    "path": arch_prod_file,
                    "hash": arch_prod_hash,
                    "description": "map_1_1"
                }
                phaser_result["map_1_1"] = map_for_display

            # If PDB exists, package that too
            if phaser_result.get("pdb_file", False):
                if os.path.exists(phaser_result.get("pdb_file")):
                    # Compress the file
                    arch_prod_file, arch_prod_hash = archive.compress_file(
                        phaser_result.get("pdb_file"))
                    # Remove the map that was compressed
                    # os.unlink(phaser_result.get("pdb"))
                    # Store information
                    pdb_for_display = {
                        "path":
                        arch_prod_file,
                        "hash":
                        arch_prod_hash,
                        "description":
                        os.path.basename(phaser_result.get("pdb_file"))
                    }
                    phaser_result["pdb"] = pdb_for_display

            # Calc ADF map
            if adf:
                if os.path.exists(phaser_result.get(
                        "pdb_file", False)) and os.path.exists(
                            phaser_result.get("mtz", False)):
                    adf_results = calc_ADF_map(data_file=data_file,
                                               mtz=phaser_result["mtz"],
                                               pdb=phaser_result["pdb_file"])
                    if adf_results.get("adf"):
                        phaser_result.update({
                            "adf":
                            os.path.join(work_dir, adf_results.get("adf"))
                        })
                    if adf_results.get("peak"):
                        phaser_result.update({
                            "peak":
                            os.path.join(work_dir, adf_results.get("peak"))
                        })
                    #phaser_result.update({"adf": adf_results.get("adf", None),
                    #                      "peak": adf_results.get("peak", None),})

            # print "1"
            # print name
            # New procedure for making tar of results
            # Create directory
            # Remove the run # from the name
            # new_name = name[:-2]  #
            new_name = phaser_result.get("ID")  #
            # print new_name
            os.mkdir(new_name)
            # # Go through and copy files to archive directory
            file_types = ("pdb_file", "mtz", "adf", "peak")
            for file_type in file_types:
                # print file_type
                target_file = phaser_result.get(file_type, False)
                # print target_file
                if target_file:
                    if os.path.exists(target_file):
                        # Copy the file to the directory to be archived
                        shutil.copy(target_file, new_name + "/.")
            # # Create the archive
            archive_result = archive.create_archive(new_name)
            archive_result["description"] = '%s_files' % new_name
            phaser_result["tar"] = archive_result

            # print "2"

        else:
            phaser_result = {
                "ID": name,
                "solution": False,
                "message": "No solution",
                "spacegroup": spacegroup
            }
        # Add the phaser log
        if phaser_log:
            phaser_result.update({"logs": {"phaser": phaser_log}})

        # print "3"

        if db_settings and tag:
            print "db_settings and tag"
            # Connect to Redis
            redis = connect_to_redis(db_settings)
            # Key should be deleted once received, but set the key to expire in 24 hours just in case.
            redis.setex(tag, 86400, json.dumps(phaser_result))
            # Do a little sleep to make sure results are in Redis for postprocess_phaser
            time.sleep(0.1)
        else:
            # print "Printing phaser_result"
            # Print the result so it can be seen thru the queue by reading stdout
            # print phaser_result
            print json.dumps(phaser_result)
示例#11
0
def run_phaser_module_OLD(args):
    """
    Run separate module of Phaser to get results before running full job.
    Setup so that I can read the data in once and run multiple modules.
    """

    # print "run_phaser_module"

    res = 0.0
    z = 0
    solvent_content = 0.0

    def run_ellg(run_mr, pdb_file):
        """
        Perform calculations and return target-reso
        Resolution to achieve target eLLG
        """

        target_resolution = 0.0
        i0 = phaser.InputMR_ELLG()
        i0.setSPAC_HALL(run_mr.getSpaceGroupHall())
        i0.setCELL6(run_mr.getUnitCell())
        i0.setMUTE(True)
        i0.setREFL_DATA(run_mr.getDATA())
        i0.addENSE_PDB_ID("test", pdb_file, 0.7)
        r1 = phaser.runMR_ELLG(i0)
        if r1.Success():
            target_resolution = r1.get_target_resolution("test")
        del r1
        return target_resolution

    def run_cca(run_mr, target_resolution, args):
        # print "run_cca"
        z0 = 0
        solvent_content = 0.0
        i0 = phaser.InputCCA()
        i0.setSPAC_HALL(run_mr.getSpaceGroupHall())
        i0.setCELL6(run_mr.getUnitCell())
        i0.setMUTE(True)
        # Have to set high res limit!!
        i0.setRESO_HIGH(target_resolution)
        if args.np > 0:
            i0.addCOMP_PROT_NRES_NUM(args.np, 1)
        if args.na > 0:
            i0.addCOMP_NUCL_NRES_NUM(args.na, 1)
        r1 = phaser.runCCA(i0)
        if r1.Success():
            z0 = r1.getBestZ()
            solvent_content = 1 - (1.23 / r1.getBestVM())
        del r1
        return (z0, solvent_content)

    def run_ncs(run_mr):
        # print "run_ncs"
        i0 = phaser.InputNCS()
        i0.setSPAC_HALL(run_mr.getSpaceGroupHall())
        i0.setCELL6(run_mr.getUnitCell())
        i0.setREFL_DATA(run_mr.getDATA())
        i0.setMUTE(True)
        r1 = phaser.runNCS(i0)
        # print r1.logfile()
        # print r1.loggraph().size()
        # print r1.loggraph().__dict__.keys()
        if r1.Success():
            return r1

    # def run_ano(run_mr):
    #     print "run_ano"
    #     i0 = phaser.InputANO()
    #     i0.setSPAC_HALL(run_mr.getSpaceGroupHall())
    #     i0.setCELL6(run_mr.getUnitCell())
    #     i0.setREFL_DATA(run_mr.getDATA())
    #     i0.setMUTE(True)
    #     r1 = phaser.runANO(i0)
    #     # print r1.loggraph().__dict__.keys()
    #     # print r1.loggraph().size()
    #     # print r1.logfile()
    #     """
    #     o = phaser.Output()
    #     redirect_str = StringIO()
    #     o.setPackagePhenix(file_object=redirect_str)
    #     r1 = phaser.runANO(i0,o)
    #     """
    #     if r1.Success():
    #         print "SUCCESS"
    #         return r1

    # Setup which modules are run
    # if inp:
    ellg = True
    ncs = False
    target_resolution = args.resolution
    if not (args.np or args.na or args.resolution):
        pass
    # else:
    #     ellg = False
    #     ncs = True

    # Read the dataset
    i = phaser.InputMR_DAT()
    i.setHKLI(args.data_file)
    i.setLABI_F_SIGF("F", "SIGF")
    i.setMUTE(True)
    run_mr = phaser.runMR_DAT(i)
    if run_mr.Success():
        if ellg:
            target_resolution = run_ellg(run_mr, args.pdb_file)
        if args.matthews:
            z, solvent_content = run_cca(run_mr, target_resolution, args)
        if ncs:
            n = run_ncs(run_mr)
    if args.matthews:
        # Assumes ellg is run as well.
        return {
            "z": z,
            "solvent_content": solvent_content,
            "target_resolution": target_resolution
        }
    elif ellg:
        # ellg run by itself
        return {"target_resolution": target_resolution}
    else:
        # NCS
        return n
示例#12
0
    def run(self):
        """Function to run molecular replacement using PHASER

        Returns
        -------
        file
            Output pdb file
        file
            Output log file
        """

        # Make a note of the current working directory
        current_work_dir = os.getcwd()

        # Change to the PHASER working directory
        if os.path.exists(self.work_dir):
            os.chdir(self.work_dir)
        else:
            os.makedirs(self.work_dir)
            os.chdir(self.work_dir)

        # Copy hklin and pdbin to working dire for efficient running of PHASER
        hklin = os.path.join(self.work_dir, os.path.basename(self.hklin))
        shutil.copyfile(self.hklin, hklin)
        pdbin = os.path.join(self.work_dir, os.path.basename(self.pdbin))
        shutil.copyfile(self.pdbin, pdbin)

        i = InputMR_DAT()
        i.setHKLI(hklin)

        if self.hires:
            i.setHIRES(self.hires)
        if self.autohigh:
            i.setRESO_AUTO_HIGH(self.autohigh)
        if self.i != "None" and self.sigi != "None":
            i.setLABI_I_SIGI(self.i, self.sigi)
        elif self.f != "None" and self.sigf != "None":
            i.setLABI_F_SIGF(self.f, self.sigf)
        else:
            msg = "No flags for intensities or amplitudes have been provided"
            raise RuntimeError(msg)
        i.setSGAL_SELE(SGAlternatives[self.sgalternative].value)
        i.setMUTE(True)
        r = runMR_DAT(i)

        if r.Success():
            i = InputMR_AUTO()
            i.setJOBS(1)
            i.setREFL_DATA(r.getREFL_DATA())
            i.setROOT("phaser_mr_output")
            i.addENSE_PDB_ID("PDB", pdbin, 0.7)
            i.setCOMP_BY("SOLVENT")
            i.setCOMP_PERC(self.solvent)
            i.addSEAR_ENSE_NUM('PDB', self.nmol)
            i.setSGAL_SELE(SGAlternatives[self.sgalternative].value)
            if self.timeout != 0:
                i.setKILL_TIME(self.timeout)
            i.setMUTE(True)
            del (r)
            r = runMR_AUTO(i)

            with open(self.logfile, 'w') as f:
                f.write(r.summary())

            shutil.move(r.getTopPdbFile(), self.pdbout)

            # Output original mtz with a change of basis if needed
            space_group, _, _ = mtz_util.crystal_data(r.getTopMtzFile())
            ed = mtz_util.ExperimentalData(self.hklin)
            ed.change_space_group(space_group)
            ed.output_mtz(self.hklout)

        # Return to original working directory
        os.chdir(current_work_dir)

        # Delete any files copied across
        if os.path.isfile(os.path.join(self.work_dir, os.path.basename(self.hklin))):
            os.remove(os.path.join(self.work_dir, os.path.basename(self.hklin)))
        if os.path.isfile(os.path.join(self.work_dir, os.path.basename(self.pdbin))):
            os.remove(os.path.join(self.work_dir, os.path.basename(self.pdbin)))
示例#13
0
    def run(self,
            models_dir,
            nproc=2,
            min_solvent_content=20,
            submit_qtype=None,
            submit_queue=None,
            monitor=None,
            chunk_size=0,
            **kwargs):
        """Run phaser rotation function on a directory of models
        Parameters
        ----------
        models_dir : str
            The directory containing the models to run the rotation search on
        nproc : int, optional
            The number of processors to run the job on
        min_solvent_content : int, float, optional
            The minimum solvent content present in the unit cell with the input model [default: 30]
        submit_qtype : str
            The cluster submission queue type - currently support SGE and LSF
        submit_queue : str
            The queue to submit to on the cluster
        monitor
        chunk_size : int, optional
            The number of jobs to submit at the same time

        Returns
        -------
        file
            log file for each model in the models_dir
        """
        self.submit_qtype = submit_qtype
        self.submit_queue = submit_queue
        self.f, self.sigf, self.i, self.sigi, _, _, _ = simbad.util.mtz_util.get_labels(self.mtz)

        self.simbad_dat_files = simbad.db.find_simbad_dat_files(models_dir)
        n_files = len(self.simbad_dat_files)

        i = InputMR_DAT()
        i.setHKLI(self.mtz)
        i.setMUTE(True)
        run_mr_data = runMR_DAT(i)

        sg = run_mr_data.getSpaceGroupName().replace(" ", "")
        cell = " ".join(map(str, run_mr_data.getUnitCell()))

        chunk_size = simbad.rotsearch.get_chunk_size(n_files, chunk_size)
        total_chunk_cycles = simbad.rotsearch.get_total_chunk_cycles(n_files, chunk_size)

        mat_coef = simbad.util.matthews_prob.MatthewsProbability(cell, sg)

        dir_name = "simbad-tmp-" + str(uuid.uuid1())
        script_log_dir = os.path.join(self.work_dir, dir_name)
        os.mkdir(script_log_dir)

        ccp4_scr = os.environ["CCP4_SCR"]
        default_tmp_dir = os.path.join(self.work_dir, 'tmp')
        if self.tmp_dir:
            template_tmp_dir = os.path.join(self.tmp_dir, dir_name + "-{0}")
        else:
            template_tmp_dir = os.path.join(default_tmp_dir, dir_name + "-{0}")

        predicted_molecular_weight = 0
        if run_mr_data.Success():
            i = InputCCA()
            i.setSPAC_HALL(run_mr_data.getSpaceGroupHall())
            i.setCELL6(run_mr_data.getUnitCell())
            i.setMUTE(True)
            run_cca = runCCA(i)

            if run_cca.Success():
                predicted_molecular_weight = run_cca.getAssemblyMW()

        dat_models = []
        for dat_model in self.simbad_dat_files:
            name = os.path.basename(dat_model.replace(".dat", ""))
            pdb_struct = simbad.util.pdb_util.PdbStructure()
            pdb_struct.from_file(dat_model)
            solvent_fraction, n_copies = mat_coef.calculate_content_ncopies_from_struct(pdb_struct)
            solvent_content = solvent_fraction * 100
            if solvent_content < min_solvent_content:
                msg = "Skipping %s: solvent content is predicted to be less than %.2f"
                logger.debug(msg, name, min_solvent_content)
                continue
            mw_diff = abs(predicted_molecular_weight - pdb_struct.molecular_weight)

            info = simbad.core.dat_score.DatModelScore(name, dat_model, mw_diff, None, None, None, None,
                                                       solvent_fraction, n_copies)
            dat_models.append(info)

        sorted_dat_models = sorted(dat_models, key=lambda x: float(x.mw_diff), reverse=False)

        iteration_range = range(0, n_files, chunk_size)
        for cycle, i in enumerate(iteration_range):
            logger.info("Working on chunk %d out of %d", cycle + 1, total_chunk_cycles)

            template_model = os.path.join("$CCP4_SCR", "{0}.pdb")

            phaser_files = []
            for dat_model in sorted_dat_models[i:i + chunk_size]:
                logger.debug("Generating script to perform PHASER rotation " + "function on %s", dat_model.pdb_code)

                pdb_model = template_model.format(dat_model.pdb_code)
                template_rot_log = os.path.join("$CCP4_SCR", "{0}_rot.log")

                conv_py = "\"from simbad.db import convert_dat_to_pdb; convert_dat_to_pdb('{}', '{}')\""
                conv_py = conv_py.format(dat_model.dat_path, pdb_model)

                rot_log = template_rot_log.format(dat_model.pdb_code)
                tmp_dir = template_tmp_dir.format(dat_model.pdb_code)

                phaser_cmd = [
                    "simbad.rotsearch.phaser_rotation_search",
                    "-hklin",
                    self.mtz,
                    "-f",
                    self.f,
                    "-sigf",
                    self.sigf,
                    "-i",
                    self.i,
                    "-sigi",
                    self.sigi,
                    "-pdbin",
                    pdb_model,
                    "-logfile",
                    rot_log,
                    "-solvent",
                    dat_model.solvent,
                    "-nmol",
                    dat_model.nmol,
                    "-work_dir",
                    tmp_dir,
                ]
                phaser_cmd = " ".join(str(e) for e in phaser_cmd)

                cmd = [
                    [EXPORT, "CCP4_SCR=" + tmp_dir],
                    ["mkdir", "-p", "$CCP4_SCR\n"],
                    [CMD_PREFIX, "$CCP4/bin/ccp4-python", "-c", conv_py, os.linesep],
                    [CMD_PREFIX, "$CCP4/bin/ccp4-python", "-m", phaser_cmd, os.linesep],
                    ["rm", "-rf", "$CCP4_SCR\n"],
                    [EXPORT, "CCP4_SCR=" + ccp4_scr],
                ]
                phaser_script = pyjob.misc.make_script(
                    cmd, directory=script_log_dir, prefix="phaser_", stem=dat_model.pdb_code)
                phaser_log = phaser_script.rsplit(".", 1)[0] + '.log'
                phaser_files += [(phaser_script, phaser_log, dat_model.dat_path)]

            results = []
            if len(phaser_files) > 0:
                logger.info("Running PHASER rotation functions")
                phaser_scripts, phaser_logs, dat_models = zip(*phaser_files)
                simbad.rotsearch.submit_chunk(phaser_scripts, script_log_dir, nproc, 'simbad_phaser', submit_qtype,
                                              submit_queue, monitor, self.rot_succeeded_log)

                for dat_model, phaser_log in zip(dat_models, phaser_logs):
                    base = os.path.basename(phaser_log)
                    pdb_code = base.replace("phaser_", "").replace(".log", "")
                    try:
                        phaser_rotation_parser = simbad.parsers.rotsearch_parser.PhaserRotsearchParser(phaser_log)
                        if phaser_rotation_parser.rfact:
                            phaser_rotation_parser.llg = 100
                            phaser_rotation_parser.rfz = 10
                        score = simbad.core.phaser_score.PhaserRotationScore(
                            pdb_code, dat_model, phaser_rotation_parser.llg, phaser_rotation_parser.rfz)

                        if phaser_rotation_parser.rfz:
                            results += [score]
                    except IOError:
                        pass

            else:
                logger.critical("No structures to be trialled")

            self._search_results = results
            shutil.rmtree(script_log_dir)

            if os.path.isdir(default_tmp_dir):
                shutil.rmtree(default_tmp_dir)
示例#14
0
    def run(self,
            models_dir,
            nproc=2,
            min_solvent_content=20,
            submit_nproc=None,
            submit_qtype=None,
            submit_queue=None,
            monitor=None,
            chunk_size=0,
            **kwargs):
        """Run phaser rotation function on a directory of models
        Parameters
        ----------
        models_dir : str
            The directory containing the models to run the rotation search on
        nproc : int, optional
            The number of processors to run the job on
        min_solvent_content : int, float, optional
            The minimum solvent content present in the unit cell with the input model [default: 30]
        submit_nproc : int
            The number of processors to use on the head node when creating submission scripts on a cluster [default: 1]
        submit_qtype : str
            The cluster submission queue type - currently support SGE and LSF
        submit_queue : str
            The queue to submit to on the cluster
        monitor
        chunk_size : int, optional
            The number of jobs to submit at the same time

        Returns
        -------
        file
            log file for each model in the models_dir
        """
        self.submit_qtype = submit_qtype
        self.submit_queue = submit_queue
        self.mtz_labels = simbad.util.mtz_util.GetLabels(self.mtz)

        self.simbad_dat_files = simbad.db.find_simbad_dat_files(models_dir)

        i = InputMR_DAT()
        i.setHKLI(self.mtz)
        i.setLABI_F_SIGF(self.mtz_labels.f, self.mtz_labels.sigf)
        i.setMUTE(True)
        run_mr_data = runMR_DAT(i)

        sg = run_mr_data.getSpaceGroupName().replace(" ", "")
        cell = " ".join(map(str, run_mr_data.getUnitCell()))

        mat_coef = simbad.util.matthews_prob.MatthewsProbability(cell, sg)

        dir_name = "simbad-tmp-" + str(uuid.uuid1())
        self.script_log_dir = os.path.join(self.work_dir, dir_name)
        os.mkdir(self.script_log_dir)

        self.ccp4_scr = os.environ["CCP4_SCR"]
        default_tmp_dir = os.path.join(self.work_dir, 'tmp')
        if self.tmp_dir:
            self.template_tmp_dir = os.path.join(self.tmp_dir,
                                                 dir_name + "-{0}")
        else:
            self.template_tmp_dir = os.path.join(default_tmp_dir,
                                                 dir_name + "-{0}")

        predicted_molecular_weight = 0
        if run_mr_data.Success():
            i = InputCCA()
            i.setSPAC_HALL(run_mr_data.getSpaceGroupHall())
            i.setCELL6(run_mr_data.getUnitCell())
            i.setMUTE(True)
            run_cca = runCCA(i)

            if run_cca.Success():
                predicted_molecular_weight = run_cca.getAssemblyMW()

        dat_models = []
        for dat_model in self.simbad_dat_files:
            name = os.path.basename(dat_model.replace(".dat", ""))
            pdb_struct = simbad.util.pdb_util.PdbStructure()
            pdb_struct.from_file(dat_model)
            solvent_fraction, n_copies = mat_coef.calculate_content_ncopies_from_struct(
                pdb_struct)
            solvent_content = solvent_fraction * 100
            if solvent_content < min_solvent_content:
                msg = "Skipping %s: solvent content is predicted to be less than %.2f"
                logger.debug(msg, name, min_solvent_content)
                continue
            mw_diff = abs(predicted_molecular_weight -
                          pdb_struct.molecular_weight)

            info = simbad.core.dat_score.DatModelScore(name, dat_model,
                                                       mw_diff, None, None,
                                                       None, None,
                                                       solvent_fraction,
                                                       n_copies)
            dat_models.append(info)

        sorted_dat_models = sorted(dat_models,
                                   key=lambda x: float(x.mw_diff),
                                   reverse=False)
        n_files = len(sorted_dat_models)
        chunk_size = simbad.rotsearch.get_chunk_size(n_files, chunk_size)
        total_chunk_cycles = simbad.rotsearch.get_total_chunk_cycles(
            n_files, chunk_size)

        results = []
        iteration_range = range(0, n_files, chunk_size)
        for cycle, i in enumerate(iteration_range):
            logger.info("Working on chunk %d out of %d", cycle + 1,
                        total_chunk_cycles)

            if self.solution:
                logger.info(
                    "Early termination criteria met, skipping chunk %d",
                    cycle + 1)
                continue

            self.template_model = os.path.join("$CCP4_SCR", "{0}.pdb")

            if submit_qtype == 'local':
                processes = nproc
            else:
                processes = submit_nproc

            collector = ScriptCollector(None)
            phaser_files = []
            with pool.Pool(processes=processes) as p:
                [(collector.add(i[0]), phaser_files.append(i[1]))
                 for i in p.map(self, sorted_dat_models[i:i + chunk_size])
                 if i is not None]

            if len(phaser_files) > 0:
                logger.info("Running PHASER rotation functions")
                phaser_logs, dat_models = zip(*phaser_files)
                simbad.util.submit_chunk(collector, self.script_log_dir, nproc,
                                         'simbad_phaser', submit_qtype,
                                         submit_queue, True, monitor,
                                         self.rot_succeeded_log)

                for dat_model, phaser_log in zip(dat_models, phaser_logs):
                    base = os.path.basename(phaser_log)
                    pdb_code = base.replace("phaser_", "").replace(".log", "")
                    try:
                        phaser_rotation_parser = simbad.parsers.rotsearch_parser.PhaserRotsearchParser(
                            phaser_log)
                        if phaser_rotation_parser.rfact:
                            phaser_rotation_parser.llg = 100
                            phaser_rotation_parser.rfz = 10
                        score = simbad.core.phaser_score.PhaserRotationScore(
                            pdb_code, dat_model, phaser_rotation_parser.llg,
                            phaser_rotation_parser.rfz)

                        if phaser_rotation_parser.rfz:
                            results += [score]
                    except IOError:
                        pass

            else:
                logger.critical("No structures to be trialled")

        self._search_results = results
        shutil.rmtree(self.script_log_dir)

        if os.path.isdir(default_tmp_dir):
            shutil.rmtree(default_tmp_dir)
示例#15
0
def run_phaser_module_OLD(args):
    """
    Run separate module of Phaser to get results before running full job.
    Setup so that I can read the data in once and run multiple modules.
    """

    # print "run_phaser_module"

    res = 0.0
    z = 0
    solvent_content = 0.0

    def run_ellg(run_mr, pdb_file):
        """
        Perform calculations and return target-reso
        Resolution to achieve target eLLG
        """

        target_resolution = 0.0
        i0 = phaser.InputMR_ELLG()
        i0.setSPAC_HALL(run_mr.getSpaceGroupHall())
        i0.setCELL6(run_mr.getUnitCell())
        i0.setMUTE(True)
        i0.setREFL_DATA(run_mr.getDATA())
        i0.addENSE_PDB_ID("test", pdb_file, 0.7)
        r1 = phaser.runMR_ELLG(i0)
        if r1.Success():
            target_resolution = r1.get_target_resolution("test")
        del r1
        return target_resolution

    def run_cca(run_mr, target_resolution, args):
        # print "run_cca"
        z0 = 0
        solvent_content = 0.0
        i0 = phaser.InputCCA()
        i0.setSPAC_HALL(run_mr.getSpaceGroupHall())
        i0.setCELL6(run_mr.getUnitCell())
        i0.setMUTE(True)
        # Have to set high res limit!!
        i0.setRESO_HIGH(target_resolution)
        if args.np > 0:
            i0.addCOMP_PROT_NRES_NUM(args.np, 1)
        if args.na > 0:
            i0.addCOMP_NUCL_NRES_NUM(args.na, 1)
        r1 = phaser.runCCA(i0)
        if r1.Success():
            z0 = r1.getBestZ()
            solvent_content = 1-(1.23/r1.getBestVM())
        del r1
        return (z0, solvent_content)

    def run_ncs(run_mr):
        # print "run_ncs"
        i0 = phaser.InputNCS()
        i0.setSPAC_HALL(run_mr.getSpaceGroupHall())
        i0.setCELL6(run_mr.getUnitCell())
        i0.setREFL_DATA(run_mr.getDATA())
        i0.setMUTE(True)
        r1 = phaser.runNCS(i0)
        # print r1.logfile()
        # print r1.loggraph().size()
        # print r1.loggraph().__dict__.keys()
        if r1.Success():
            return r1

    # def run_ano(run_mr):
    #     print "run_ano"
    #     i0 = phaser.InputANO()
    #     i0.setSPAC_HALL(run_mr.getSpaceGroupHall())
    #     i0.setCELL6(run_mr.getUnitCell())
    #     i0.setREFL_DATA(run_mr.getDATA())
    #     i0.setMUTE(True)
    #     r1 = phaser.runANO(i0)
    #     # print r1.loggraph().__dict__.keys()
    #     # print r1.loggraph().size()
    #     # print r1.logfile()
    #     """
    #     o = phaser.Output()
    #     redirect_str = StringIO()
    #     o.setPackagePhenix(file_object=redirect_str)
    #     r1 = phaser.runANO(i0,o)
    #     """
    #     if r1.Success():
    #         print "SUCCESS"
    #         return r1

    # Setup which modules are run
    # if inp:
    ellg = True
    ncs = False
    target_resolution = args.resolution
    if not (args.np or args.na or args.resolution):
        pass
    # else:
    #     ellg = False
    #     ncs = True

    # Read the dataset
    i = phaser.InputMR_DAT()
    i.setHKLI(args.data_file)
    i.setLABI_F_SIGF("F", "SIGF")
    i.setMUTE(True)
    run_mr = phaser.runMR_DAT(i)
    if run_mr.Success():
        if ellg:
            target_resolution = run_ellg(run_mr, args.pdb_file)
        if args.matthews:
            z, solvent_content = run_cca(run_mr, target_resolution, args)
        if ncs:
            n = run_ncs(run_mr)
    if args.matthews:
        # Assumes ellg is run as well.
        return {"z": z,
                "solvent_content": solvent_content,
                "target_resolution": target_resolution}
    elif ellg:
        # ellg run by itself
        return {"target_resolution": target_resolution}
    else:
        # NCS
        return n
示例#16
0
文件: rapd_phaser.py 项目: RAPD/RAPD
def run_phaser(datafile,
               spacegroup,
               output_id,
               db_settings,
               work_dir=False,
               cif=False,
               pdb=False,
               name=False,
               ncopy=1,
               cell_analysis=False,
               resolution=False,
               large_cell=False,
               run_before=False,
               ):
    """
    Run Phaser and passes results back to RAPD Redis DB
    **Requires Phaser src code!**

    datafile - input data as mtz
    spacegroup - The space group to run MR
    output_id - a Redis key where the results are sent
    db_settings - Redis connection settings for sending results
    work_dir - working directory
    cif - input search model path in mmCIF format (do not use with 'pdb')
    pdb -  input search model path in PDB format (do not use with 'cif')
    name - root name for output files
    ncopy - number of molecules to search for
    cell_analysis - internal RAPD signal so all possible SG's are searched
    resolution - high res limit to run MR (float)
    large_cell - optimizes parameters to speed up MR with large unit cell.
    run_before - signal to run more comprehensive MR
    """
    # Change to work_dir
    if not work_dir:
        work_dir = os.getcwd()
    os.chdir(work_dir)

    if not name:
        name = spacegroup

    # Connect to Redis
    redis = connect_to_redis(db_settings)

    # Read the dataset
    i = phaser.InputMR_DAT()
    i.setHKLI(convert_unicode(datafile))
    i.setLABI_F_SIGF('F', 'SIGF')
    i.setMUTE(True)
    r = phaser.runMR_DAT(i)
    if r.Success():
        i = phaser.InputMR_AUTO()
        # i.setREFL_DATA(r.getREFL_DATA())
        # i.setREFL_DATA(r.DATA_REFL())
        i.setREFL_F_SIGF(r.getMiller(), r.getFobs(), r.getSigFobs())
        i.setCELL6(r.getUnitCell())
        if cif:
            #i.addENSE_CIF_ID('model', cif, 0.7)
            ### Typo in PHASER CODE!!!###
            i.addENSE_CIT_ID('model', convert_unicode(cif), 0.7)
        if pdb:
            i.addENSE_PDB_ID('model', convert_unicode(pdb), 0.7)
        i.addSEAR_ENSE_NUM("model", ncopy)
        i.setSPAC_NAME(spacegroup)
        if cell_analysis:
            i.setSGAL_SELE("ALL")
            # Set it for worst case in orth
            # number of processes to run in parallel where possible
            i.setJOBS(1)
        else:
            i.setSGAL_SELE("NONE")
        if run_before:
            # Picks own resolution
            # Round 2, pick best solution as long as less that 10% clashes
            i.setPACK_SELE("PERCENT")
            i.setPACK_CUTO(0.1)
            #command += "PACK CUTOFF 10\n"
        else:
            # For first round and cell analysis
            # Only set the resolution limit in the first round or cell analysis.
            if resolution:
                i.setRESO_HIGH(resolution)
            else:
                # Otherwise it runs a second MR at full resolution!!
                # I dont think a second round is run anymore.
                # command += "RESOLUTION SEARCH HIGH OFF\n"
                if large_cell:
                    i.setRESO_HIGH(6.0)
                else:
                    i.setRESO_HIGH(4.5)
            i.setSEAR_DEEP(False)
            # Don"t seem to work since it picks the high res limit now.
            # Get an error when it prunes all the solutions away and TF has no input.
            # command += "PEAKS ROT SELECT SIGMA CUTOFF 4.0\n"
            # command += "PEAKS TRA SELECT SIGMA CUTOFF 6.0\n"
        # Turn off pruning in 2.6.0
        i.setSEAR_PRUN(False)
        # Choose more top peaks to help with getting it correct.
        i.setPURG_ROTA_ENAB(True)
        i.setPURG_ROTA_NUMB(3)
        #command += "PURGE ROT ENABLE ON\nPURGE ROT NUMBER 3\n"
        i.setPURG_TRAN_ENAB(True)
        i.setPURG_TRAN_NUMB(1)
        #command += "PURGE TRA ENABLE ON\nPURGE TRA NUMBER 1\n"

        # Only keep the top after refinement.
        i.setPURG_RNP_ENAB(True)
        i.setPURG_RNP_NUMB(1)
        #command += "PURGE RNP ENABLE ON\nPURGE RNP NUMBER 1\n"
        i.setROOT(convert_unicode(name))
        # i.setMUTE(False)
        i.setMUTE(True)
        # Delete the setup results
        del(r)
        # launch the run
        r = phaser.runMR_AUTO(i)
        if r.Success():
            if r.foundSolutions():
                print "Phaser has found MR solutions"
                #print "Top LLG = %f" % r.getTopLLG()
                #print "Top PDB file = %s" % r.getTopPdbFile()
            else:
                print "Phaser has not found any MR solutions"
        else:
            print "Job exit status FAILURE"
            print r.ErrorName(), "ERROR :", r.ErrorMessage()

        with open('phaser.log', 'w') as log:
            log.write(r.logfile())
            log.close()
        with open('phaser_sum.log', 'w') as log:
            log.write(r.summary())
            log.close()

    if r.foundSolutions():
        rfz = None
        tfz = None
        tncs = False
        # Parse results
        for p in r.getTopSet().ANNOTATION.split():
            if p.count('RFZ'):
                if p.count('=') in [1]:
                    rfz = float(p.split('=')[-1])
            if p.count('RF*0'):
                rfz = "NC"
            if p.count('TFZ'):
                if p.count('=') in [1]:
                    tfz = p.split('=')[-1]
                    if tfz == '*':
                        tfz = 'arbitrary'
                    else:
                        tfz = float(tfz)
            if p.count('TF*0'):
                tfz = "NC"
        tncs_test = [1 for line in r.getTopSet().unparse().splitlines()
                     if line.count("+TNCS")]
        tncs = bool(len(tncs_test))
        phaser_result = {"ID": name,
                         "solution": r.foundSolutions(),
                         "pdb": r.getTopPdbFile(),
                         "mtz": r.getTopMtzFile(),
                         "gain": float(r.getTopLLG()),
                         "rfz": rfz,
                         # "tfz": r.getTopTFZ(),
                         "tfz": tfz,
                         "clash": r.getTopSet().PAK,
                         "dir": os.getcwd(),
                         "spacegroup": r.getTopSet().getSpaceGroupName().replace(' ', ''),
                         "tNCS": tncs,
                         "nmol": r.getTopSet().NUM,
                         "adf": None,
                         "peak": None,
                         }
        
        # make tar.bz2 of result files
        # l = ['pdb', 'mtz', 'adf', 'peak']
        # archive = "%s.tar.bz2" % name
        # with tarfile.open(archive, "w:bz2") as tar:
        #     for f in l:
        #         fo = phaser_result.get(f, False)
        #         if fo:
        #             if os.path.exists(fo):
        #                 tar.add(fo)
        #     tar.close()
        # phaser_result['tar'] = os.path.join(work_dir, archive)
        
        # New procedure for making tar of results
        # Create directory
        os.mkdir(name)
        # Go through and copy files to archive directory
        file_types = ("pdb", "mtz", "adf", "peak")
        for file_type in file_types:
            target_file = phaser_result.get(file_type, False)
            if target_file:
                if os.path.exists(target_file):
                    # Copy the file to the directory to be archived
                    shutil.copy(target_file, name+"/.")
        # Create the archive
        archive_result = archive.create_archive(name)
        archive_result["description"] = name
        phaser_result["tar"] = archive_result
        
        phaser_result["pdb_file"] = os.path.join(work_dir, r.getTopPdbFile())
    else:
        phaser_result = {"ID": name,
                         "solution": False,
                         "message": "No solution"}

    # Print the result so it can be seen in the rapd._phaser.log if needed
    print phaser_result

    # Key should be deleted once received, but set the key to expire in 24 hours just in case.
    redis.setex(output_id, 86400, json.dumps(phaser_result))
    # Do a little sleep to make sure results are in Redis for postprocess_phaser
    time.sleep(0.1)
示例#17
0
文件: rapd_phaser.py 项目: RAPD/RAPD
def run_phaser_module(data_file,
                      result_queue=False,
                      cca=False,
                      tncs=False,
                      ellg=False,
                      mmcif=False,
                      dres=False,
                      np=0,
                      na=0,):
    """
    Run separate module of Phaser to get results before running full job.
    Setup so that I can read the data in once and run multiple modules.
    data_file - input dataset mtz file
    result_queue - pass results to queue
    cca - Run CCA to determine number of molecules in AU, and solvent content (Matthew's Coefficient calc)
    tncs - Run Anisotropy and tNCS correction on CID plots
    ellg - Run analysis to determonine optimum Phaser resolution MR.
    mmcif - input mmcif file. Could also be a PDB file
    dres - resolution of dataset (ELLG, CCA)
    np - default number of protein residues (CCA)
    na - default number of nucleic acid residues (CCA)
    """

    target_resolution = 0.0
    z = 0
    solvent_content = 0.0

    def run_ellg():
        new_res = 0.0
        i0 = phaser.InputMR_ELLG()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setMUTE(True)
        i0.setREFL_DATA(r.getDATA())
        if mmcif[-3:] in ('cif'):
            i0.addENSE_CIT_ID('model', convert_unicode(mmcif), 0.7)
        else:
            i0.addENSE_PDB_ID("model", convert_unicode(mmcif), 0.7)
        r1 = phaser.runMR_ELLG(i0)
        #print r1.logfile()
        if r1.Success():
            # If it worked use the recommended resolution
            new_res = round(r1.get_target_resolution('model'), 1)
        del(r1)
        return new_res

    def run_cca(res):
        z0 = 0
        sc0 = 0.0
        i0 = phaser.InputCCA()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setMUTE(True)
        # Have to set high res limit!!
        i0.setRESO_HIGH(res)
        if np > 0:
            i0.addCOMP_PROT_NRES_NUM(np, 1)
        if na > 0:
            i0.addCOMP_NUCL_NRES_NUM(na, 1)
        r1 = phaser.runCCA(i0)
        #print r1.logfile()
        #print dir(r1)
        if r1.Success():
            z0 = r1.getBestZ()
            sc0 = round(1-(1.23/r1.getBestVM()), 2)
        del(r1)
        return (z0, sc0)

    def run_tncs():
        # CAN'T GET READABLE loggraph?!?
        i0 = phaser.InputNCS()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setREFL_DATA(r.getDATA())
        # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF())
        # i0.setLABI_F_SIGF(f,sigf)
        i0.setMUTE(True)
        # i0.setVERB(True)
        r1 = phaser.runNCS(i0)
        print dir(r1)
        print r1.logfile()
        # for l in r1.loggraph():
        #    print l
        print r1.loggraph().size()
        print r1.output_strings
        #print r1.hasTNCS()
        #print r1.summary()
        print r1.warnings()
        print r1.ErrorMessage()
        #print r1.getCentricE4()
        if r1.Success():
            return(r1.loggraph())

    def run_ano():
        #from cStringIO import StringIO
        i0 = phaser.InputANO()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        # i0.setREFL(p.getMiller(),p.getF(),p.getSIGF())
        # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF())
        # i0.setREFL_F_SIGF(p.getMiller(),p.getIobs(),p.getSigIobs())
        i0.setREFL_DATA(r.getDATA())
        i0.setMUTE(True)
        r1 = phaser.runANO(i0)
        print r1.loggraph().__dict__.keys()
        print r1.loggraph().size()
        print r1.logfile()
        """
        o = phaser.Output()
        redirect_str = StringIO()
        o.setPackagePhenix(file_object=redirect_str)
        r1 = phaser.runANO(i0,o)
        """

        if r1.Success():
            print 'SUCCESS'
            return(r1)

    # MAIN
    # Setup which modules are run
    # Read input MTZ file
    i = phaser.InputMR_DAT()
    i.setHKLI(convert_unicode(data_file))
    i.setLABI_F_SIGF('F', 'SIGF')
    i.setMUTE(True)
    r = phaser.runMR_DAT(i)
    if r.Success():
        if ellg:
            target_resolution = run_ellg()
        if cca:
            # Assumes ellg is run as well.
            z, solvent_content = run_cca(target_resolution)
        if tncs:
            n = run_tncs()
    if cca:
        out = {"z": z,
               "solvent_content": solvent_content,
               "target_resolution": target_resolution}
        if result_queue:
            result_queue.put(out)
        else:
            return out
    elif ellg:
        # ellg run by itself
        out = {"target_resolution": target_resolution}
        if result_queue:
            result_queue.put(out)
        else:
            return out
    else:
        # tNCS
        out = n
        if result_queue:
            result_queue.put(out)
        else:
            return out

    """
示例#18
0
def run_phaser_module(data_file,
                      result_queue=False,
                      cca=False,
                      tncs=False,
                      ellg=False,
                      struct_file=False,
                      dres=False,
                      np=0,
                      na=0):
    """
    Run separate module of Phaser to get results before running full job.
    Setup so that I can read the data in once and run multiple modules.
    data_file - input dataset mtz file
    result_queue - pass results to queue
    cca - Run CCA to determine number of molecules in AU, and solvent content (Matthew's Coefficient calc)
    tncs - Run Anisotropy and tNCS correction on CID plots
    ellg - Run analysis to determonine optimum Phaser resolution MR.
    struct_file - input struct_file file. Could be a PDB or mmCIF file
    dres - resolution of dataset (ELLG, CCA)
    np - default number of protein residues (CCA)
    na - default number of nucleic acid residues (CCA)
    """

    target_resolution = 0.0
    z = 0
    solvent_content = 0.0

    def run_ellg():
        new_res = 0.0
        i0 = phaser.InputMR_ELLG()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setMUTE(True)
        i0.setREFL_DATA(r.getDATA())
        #  Read in CIF file
        if struct_file[-3:] in ('cif', ):
            i0.addENSE_CIT_ID("model", convert_unicode(struct_file), 0.7)
        # Read in PDB file
        else:
            i0.addENSE_PDB_ID("model", convert_unicode(struct_file), 0.7)
        try:
            r1 = phaser.runMR_ELLG(i0)
        except RuntimeError as e:
            # print "Hit error"
            # Known CIF error - convert to pdb and retry
            if struct_file[-3:] in ('cif', ):
                # print "Convert to pdb"
                pdb.cif_as_pdb((struct_file, ))
                pdb_file = struct_file.replace(".cif", ".pdb")
                i1 = phaser.InputMR_ELLG()
                i1.setSPAC_HALL(r.getSpaceGroupHall())
                i1.setCELL6(r.getUnitCell())
                i1.setMUTE(True)
                i1.setREFL_DATA(r.getDATA())
                i1.addENSE_PDB_ID("model", convert_unicode(pdb_file), 0.7)
                r1 = phaser.runMR_ELLG(i1)
            else:
                raise e

        # print r1.logfile()
        if r1.Success():
            # If it worked use the recommended resolution
            new_res = round(r1.get_target_resolution('model'), 1)
        del (r1)
        return new_res

    def run_cca(res):
        z0 = 0
        sc0 = 0.0
        i0 = phaser.InputCCA()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setMUTE(True)
        # Have to set high res limit!!
        i0.setRESO_HIGH(res)
        if np > 0:
            i0.addCOMP_PROT_NRES_NUM(np, 1)
        if na > 0:
            i0.addCOMP_NUCL_NRES_NUM(na, 1)
        r1 = phaser.runCCA(i0)
        #print r1.logfile()
        #print dir(r1)
        if r1.Success():
            z0 = r1.getBestZ()
            try:
                sc0 = round(1 - (1.23 / r1.getBestVM()), 2)
            except ZeroDivisionError:
                sc0 = 0
        del (r1)
        return (z0, sc0)

    def run_tncs():
        # CAN'T GET READABLE loggraph?!?
        i0 = phaser.InputNCS()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setREFL_DATA(r.getDATA())
        # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF())
        # i0.setLABI_F_SIGF(f,sigf)
        i0.setMUTE(True)
        # i0.setVERB(True)
        r1 = phaser.runNCS(i0)
        print dir(r1)
        print r1.logfile()
        # for l in r1.loggraph():
        #    print l
        print r1.loggraph().size()
        print r1.output_strings
        #print r1.hasTNCS()
        #print r1.summary()
        print r1.warnings()
        print r1.ErrorMessage()
        #print r1.getCentricE4()
        if r1.Success():
            return (r1.loggraph())

    def run_ano():
        #from cStringIO import StringIO
        i0 = phaser.InputANO()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        # i0.setREFL(p.getMiller(),p.getF(),p.getSIGF())
        # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF())
        # i0.setREFL_F_SIGF(p.getMiller(),p.getIobs(),p.getSigIobs())
        i0.setREFL_DATA(r.getDATA())
        i0.setMUTE(True)
        r1 = phaser.runANO(i0)
        print r1.loggraph().__dict__.keys()
        print r1.loggraph().size()
        print r1.logfile()
        """
        o = phaser.Output()
        redirect_str = StringIO()
        o.setPackagePhenix(file_object=redirect_str)
        r1 = phaser.runANO(i0,o)
        """

        if r1.Success():
            print 'SUCCESS'
            return (r1)

    # MAIN
    # Setup which modules are run
    # Read input MTZ file
    i = phaser.InputMR_DAT()
    i.setHKLI(convert_unicode(data_file))
    i.setLABI_F_SIGF('F', 'SIGF')
    i.setMUTE(True)
    r = phaser.runMR_DAT(i)
    if r.Success():
        if ellg:
            target_resolution = run_ellg()
        if cca:
            # Assumes ellg is run as well.
            z, solvent_content = run_cca(target_resolution)
        if tncs:
            n = run_tncs()
    if cca:
        out = {
            "z": z,
            "solvent_content": solvent_content,
            "target_resolution": target_resolution
        }
        if result_queue:
            result_queue.put(out)
        else:
            return out
    elif ellg:
        # ellg run by itself
        out = {"target_resolution": target_resolution}
        if result_queue:
            result_queue.put(out)
        else:
            return out
    else:
        # tNCS
        out = n
        if result_queue:
            result_queue.put(out)
        else:
            return out
    """
示例#19
0
def run_phaser_module_OLD(data_file, inp=False):
    """
    Run separate module of Phaser to get results before running full job.
    Setup so that I can read the data in once and run multiple modules.
    """
    # if self.verbose:
    #  self.logger.debug('Utilities::runPhaserModule')

    target_resolution = 0.0
    z = 0
    solvent_content = 0.0

    def run_ellg():
        res0 = 0.0
        i0 = phaser.InputMR_ELLG()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setMUTE(True)
        i0.setREFL_DATA(r.getDATA())
        if f[-3:] in ('cif'):
            i0.addENSE_CIT_ID('model', convert_unicode(f), 0.7)
        else:
            i0.addENSE_PDB_ID("model", convert_unicode(f), 0.7)
        # i.addSEAR_ENSE_NUM("junk",5)
        r1 = phaser.runMR_ELLG(i0)
        #print r1.logfile()
        if r1.Success():
            res0 = r1.get_target_resolution('model')
        del (r1)
        return res0

    def run_cca():
        z0 = 0
        sc0 = 0.0
        i0 = phaser.InputCCA()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setMUTE(True)
        # Have to set high res limit!!
        i0.setRESO_HIGH(res0)
        if np > 0:
            i0.addCOMP_PROT_NRES_NUM(np, 1)
        if na > 0:
            i0.addCOMP_NUCL_NRES_NUM(na, 1)
        r1 = phaser.runCCA(i0)
        #print r1.logfile()
        if r1.Success():
            z0 = r1.getBestZ()
            sc0 = 1 - (1.23 / r1.getBestVM())
        del (r1)
        return (z0, sc0)

    def run_ncs():
        i0 = phaser.InputNCS()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        i0.setREFL_DATA(r.getDATA())
        # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF())
        # i0.setLABI_F_SIGF(f,sigf)
        i0.setMUTE(True)
        # i0.setVERB(True)
        r1 = phaser.runNCS(i0)
        print r1.logfile()
        print r1.loggraph().size()
        print r1.loggraph().__dict__.keys()
        #print r1.getCentricE4()
        if r1.Success():
            return (r1)

    def run_ano():
        #from cStringIO import StringIO
        i0 = phaser.InputANO()
        i0.setSPAC_HALL(r.getSpaceGroupHall())
        i0.setCELL6(r.getUnitCell())
        # i0.setREFL(p.getMiller(),p.getF(),p.getSIGF())
        # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF())
        # i0.setREFL_F_SIGF(p.getMiller(),p.getIobs(),p.getSigIobs())
        i0.setREFL_DATA(r.getDATA())
        i0.setMUTE(True)
        r1 = phaser.runANO(i0)
        print r1.loggraph().__dict__.keys()
        print r1.loggraph().size()
        print r1.logfile()
        """
        o = phaser.Output()
        redirect_str = StringIO()
        o.setPackagePhenix(file_object=redirect_str)
        r1 = phaser.runANO(i0,o)
        """

        if r1.Success():
            print 'SUCCESS'
            return (r1)

    # Setup which modules are run
    matthews = False
    if inp:
        ellg = True
        ncs = False
        if type(inp) == str:
            f = inp
        else:
            np, na, res0, f = inp
            matthews = True
    else:
        ellg = False
        ncs = True

    # Read the dataset
    i = phaser.InputMR_DAT()
    i.setHKLI(convert_unicode(data_file))
    i.setLABI_F_SIGF('F', 'SIGF')
    i.setMUTE(True)
    r = phaser.runMR_DAT(i)
    if r.Success():
        if ellg:
            target_resolution = run_ellg()
        if matthews:
            z, solvent_content = run_cca()
        if ncs:
            n = run_ncs()
    if matthews:
        # Assumes ellg is run as well.
        # return (z,sc,res)
        return {
            "z": z,
            "solvent_content": solvent_content,
            "target_resolution": target_resolution
        }
    elif ellg:
        # ellg run by itself
        # return target_resolution
        return {"target_resolution": target_resolution}
    else:
        # NCS
        return n
示例#20
0
    def run(self,
            models_dir,
            nproc=2,
            shres=3.0,
            pklim=0.5,
            npic=50,
            rotastep=1.0,
            min_solvent_content=20,
            submit_qtype=None,
            submit_queue=None,
            monitor=None,
            chunk_size=0,
            **kwargs):
        """Run amore rotation function on a directory of models

        Parameters
        ----------
        models_dir : str
            The directory containing the models to run the rotation search on
        nproc : int, optional
            The number of processors to run the job on
        shres : int, float, optional
            Spherical harmonic resolution [default 3.0]
        pklim : int, float, optional
            Peak limit, output all peaks above <float> [default: 0.5]
        npic : int, optional
            Number of peaks to output from the translation function map for each orientation [default: 50]
        rotastep : int, float, optional
            Size of rotation step [default : 1.0]
        min_solvent_content : int, float, optional
            The minimum solvent content present in the unit cell with the input model [default: 30]
        submit_qtype : str
            The cluster submission queue type - currently support SGE and LSF
        submit_queue : str
            The queue to submit to on the cluster
        monitor
        chunk_size : int, optional
            The number of jobs to submit at the same time

        Returns
        -------
        file
            log file for each model in the models_dir

        """
        self.submit_qtype = submit_qtype
        self.submit_queue = submit_queue

        self.simbad_dat_files = simbad.db.find_simbad_dat_files(models_dir)
        n_files = len(self.simbad_dat_files)

        i = InputMR_DAT()
        i.setHKLI(self.mtz)
        i.setMUTE(True)
        run_mr_data = runMR_DAT(i)

        sg = run_mr_data.getSpaceGroupName().replace(" ", "")
        cell = " ".join(map(str, run_mr_data.getUnitCell()))

        chunk_size = simbad.rotsearch.get_chunk_size(n_files, chunk_size)
        total_chunk_cycles = simbad.rotsearch.get_total_chunk_cycles(n_files, chunk_size)

        sol_calc = simbad.util.matthews_prob.SolventContent(cell, sg)

        dir_name = "simbad-tmp-" + str(uuid.uuid1())
        script_log_dir = os.path.join(self.work_dir, dir_name)
        os.mkdir(script_log_dir)

        hklpck0 = self._generate_hklpck0()

        ccp4_scr = os.environ["CCP4_SCR"]
        default_tmp_dir = os.path.join(self.work_dir, 'tmp')
        if self.tmp_dir:
            template_tmp_dir = os.path.join(self.tmp_dir, dir_name + "-{0}")
        else:
            template_tmp_dir = os.path.join(default_tmp_dir, dir_name + "-{0}")

        template_hklpck1 = os.path.join("$CCP4_SCR", "{0}.hkl")
        template_clmn0 = os.path.join("$CCP4_SCR", "{0}_spmipch.clmn")
        template_clmn1 = os.path.join("$CCP4_SCR", "{0}.clmn")
        template_mapout = os.path.join("$CCP4_SCR", "{0}_amore_cross.map")
        template_table1 = os.path.join("$CCP4_SCR", "{0}_sfs.tab")
        template_model = os.path.join("$CCP4_SCR", "{0}.pdb")
        template_rot_log = os.path.join("$CCP4_SCR", "{0}_rot.log")

        predicted_molecular_weight = 0
        if run_mr_data.Success():
            i = InputCCA()
            i.setSPAC_HALL(run_mr_data.getSpaceGroupHall())
            i.setCELL6(run_mr_data.getUnitCell())
            i.setMUTE(True)
            run_cca = runCCA(i)

            if run_cca.Success():
                predicted_molecular_weight = run_cca.getAssemblyMW()

        dat_models = []
        for dat_model in self.simbad_dat_files:
            name = os.path.basename(dat_model.replace(".dat", ""))
            pdb_struct = simbad.util.pdb_util.PdbStructure()
            pdb_struct.from_file(dat_model)
            try:
                solvent_content = sol_calc.calculate_from_struct(pdb_struct)
                if solvent_content < min_solvent_content:
                    msg = "Skipping %s: solvent content is predicted to be less than %.2f"
                    logger.debug(msg, name, min_solvent_content)
                    continue
            except ValueError:
                msg = "Skipping %s: Error calculating solvent content"
                logger.debug(msg, name)

            x, y, z, intrad = pdb_struct.integration_box
            model_molecular_weight = pdb_struct.molecular_weight
            mw_diff = abs(predicted_molecular_weight - model_molecular_weight)

            info = simbad.core.dat_score.DatModelScore(name, dat_model, mw_diff, x, y, z, intrad, solvent_content, None)
            dat_models.append(info)

        sorted_dat_models = sorted(dat_models, key=lambda x: float(x.mw_diff), reverse=False)

        iteration_range = range(0, n_files, chunk_size)
        for cycle, i in enumerate(iteration_range):
            logger.info("Working on chunk %d out of %d", cycle + 1, total_chunk_cycles)

            amore_files = []
            for dat_model in sorted_dat_models[i:i + chunk_size]:
                logger.debug("Generating script to perform AMORE rotation " + "function on %s", dat_model.pdb_code)

                pdb_model = template_model.format(dat_model.pdb_code)
                table1 = template_table1.format(dat_model.pdb_code)
                hklpck1 = template_hklpck1.format(dat_model.pdb_code)
                clmn0 = template_clmn0.format(dat_model.pdb_code)
                clmn1 = template_clmn1.format(dat_model.pdb_code)
                mapout = template_mapout.format(dat_model.pdb_code)

                conv_py = "\"from simbad.db import convert_dat_to_pdb; convert_dat_to_pdb('{}', '{}')\""
                conv_py = conv_py.format(dat_model.dat_path, pdb_model)

                tab_cmd = [self.amore_exe, "xyzin1", pdb_model, "xyzout1", pdb_model, "table1", table1]
                tab_stdin = self.tabfun_stdin_template.format(
                    x=dat_model.x, y=dat_model.y, z=dat_model.z, a=90, b=90, c=120)

                rot_cmd = [
                    self.amore_exe, 'table1', table1, 'HKLPCK1', hklpck1, 'hklpck0', hklpck0, 'clmn1', clmn1, 'clmn0',
                    clmn0, 'MAPOUT', mapout
                ]
                rot_stdin = self.rotfun_stdin_template.format(
                    shres=shres, intrad=dat_model.intrad, pklim=pklim, npic=npic, step=rotastep)
                rot_log = template_rot_log.format(dat_model.pdb_code)

                tmp_dir = template_tmp_dir.format(dat_model.pdb_code)
                cmd = [
                    [EXPORT, "CCP4_SCR=" + tmp_dir],
                    ["mkdir", "-p", "$CCP4_SCR\n"],
                    [CMD_PREFIX, "$CCP4/bin/ccp4-python", "-c", conv_py, os.linesep],
                    tab_cmd + ["<< eof >", os.devnull],
                    [tab_stdin],
                    ["eof"],
                    [os.linesep],
                    rot_cmd + ["<< eof >", rot_log],
                    [rot_stdin],
                    ["eof"],
                    [os.linesep],
                    ["grep", "-m 1", "SOLUTIONRCD", rot_log, os.linesep],
                    ["rm", "-rf", "$CCP4_SCR\n"],
                    [EXPORT, "CCP4_SCR=" + ccp4_scr],
                ]
                amore_script = pyjob.misc.make_script(
                    cmd, directory=script_log_dir, prefix="amore_", stem=dat_model.pdb_code)
                amore_log = amore_script.rsplit(".", 1)[0] + '.log'
                amore_files += [(amore_script, tab_stdin, rot_stdin, amore_log, dat_model.dat_path)]

            results = []
            if len(amore_files) > 0:
                logger.info("Running AMORE tab/rot functions")
                amore_scripts, _, _, amore_logs, dat_models = zip(*amore_files)
                simbad.rotsearch.submit_chunk(amore_scripts, script_log_dir, nproc, 'simbad_amore', submit_qtype,
                                              submit_queue, monitor, self.rot_succeeded_log)

                for dat_model, amore_log in zip(dat_models, amore_logs):
                    base = os.path.basename(amore_log)
                    pdb_code = base.replace("amore_", "").replace(".log", "")
                    try:
                        rotsearch_parser = simbad.parsers.rotsearch_parser.AmoreRotsearchParser(amore_log)
                        score = simbad.core.amore_score.AmoreRotationScore(
                            pdb_code, dat_model, rotsearch_parser.alpha, rotsearch_parser.beta, rotsearch_parser.gamma,
                            rotsearch_parser.cc_f, rotsearch_parser.rf_f, rotsearch_parser.cc_i, rotsearch_parser.cc_p,
                            rotsearch_parser.icp, rotsearch_parser.cc_f_z_score, rotsearch_parser.cc_p_z_score,
                            rotsearch_parser.num_of_rot)
                        if rotsearch_parser.cc_f_z_score:
                            results += [score]
                    except IOError:
                        pass

            else:
                logger.critical("No structures to be trialled")

            self._search_results = results
            shutil.rmtree(script_log_dir)

            if os.path.isdir(default_tmp_dir):
                shutil.rmtree(default_tmp_dir)