def aniso_correct(solution_id, mtzin, i, sigi, fp, sigfp, logfile): from fragon.place import CallbackObject input = phaser.InputMR_DAT() input.setHKLI(mtzin) if i is not None and sigi is not None: input.setLABI_I_SIGI(i, sigi) else: input.setLABI_F_SIGF(fp, sigfp) input.setMUTE(True) data = phaser.runMR_DAT(input) with open(logfile, 'w') as aniso_log: print(data.logfile(), file=aniso_log) mtzout = solution_id + '.aniso' input = phaser.InputANO() input.setSPAC_HALL(data.getSpaceGroupHall()) input.setCELL6(data.getUnitCell()) input.setREFL_DATA(data.getDATA()) input.setHKLI(mtzin) input.setROOT(mtzout) input.setMUTE(True) aniso = phaser.runANO(input) with open(logfile, 'a') as aniso_log: print(aniso.logfile(), file=aniso_log) if data.Success(): success = True else: log.critical('Job exit status FAILURE') log.critical('%s ERROR : %s' % (data.ErrorName(), data.ErrorMessage()))
def preprocess(self): import phaser if self.verbose: self.logger.debug('RunPhaser::preprocess') #try: #Read the dataset i = phaser.InputMR_DAT() i.setHKLI(self.datafile) #f = 'F' #sigf = 'SIGF' i.setLABI_F_SIGF('F', 'SIGF') i.setMUTE(True) r = phaser.runMR_DAT(i) if r.Success(): for i in range(2): print self.process(r)
def prepare_data(mtzin, i, sigi, fp, sigfp, logfile): input = phaser.InputMR_DAT() input.setHKLI(mtzin) if i is not None and sigi is not None: input.setLABI_I_SIGI(i, sigi) else: input.setLABI_F_SIGF(fp, sigfp) input.setMUTE(True) data = phaser.runMR_DAT(input) with open(logfile, 'w') as data_log: print(data.logfile(), file=data_log) if data.Success(): return data else: log.critical('Job exit status FAILURE') log.critical('%s ERROR : %s' % (data.ErrorName(), data.ErrorMessage()))
def preprocess(self): import phaser if self.verbose: self.logger.debug('RunPhaser::preprocess') #try: #Read the dataset i = phaser.InputMR_DAT() i.setHKLI(self.datafile) #f = 'F' #sigf = 'SIGF' i.setLABI_F_SIGF('F','SIGF') i.setMUTE(True) r = phaser.runMR_DAT(i) if r.Success(): for i in range(2): print self.process(r)
def run(self): """Function to run rotation search using PHASER""" current_work_dir = os.getcwd() if os.path.exists(self.work_dir): os.chdir(self.work_dir) else: os.makedirs(self.work_dir) os.chdir(self.work_dir) i = InputMR_DAT() i.setHKLI(self.hklin) if self.hires: i.setHIRES(self.hires) if self.i != "None" and self.sigi != "None": i.setLABI_I_SIGI(self.i, self.sigi) elif self.f != "None" and self.sigf != "None": i.setLABI_F_SIGF(self.f, self.sigf) else: msg = "No flags for intensities or amplitudes have been provided" raise RuntimeError(msg) i.setMUTE(True) run_mr_data = runMR_DAT(i) if run_mr_data.Success(): i = InputMR_FRF() i.setJOBS(1) i.setREFL_DATA(run_mr_data.getREFL_DATA()) i.setSPAC_HALL(run_mr_data.getSpaceGroupHall()) i.setCELL6(run_mr_data.getUnitCell()) i.setROOT("phaser_mr_output") i.addENSE_PDB_ID("PDB", self.pdbin, float(self.eid)) i.setENSE_DISA_CHEC('PDB', True) i.setCOMP_BY("SOLVENT") i.setCOMP_PERC(self.solvent) i.addSEAR_ENSE_NUM('PDB', self.nmol) i.setRFAC_USE(False) if self.timeout != 0: i.setKILL_TIME(self.timeout) run_mr_rot = runMR_FRF(i) with open(self.logfile, 'w') as f: f.write(run_mr_rot.summary()) os.chdir(current_work_dir)
def run(self): """Function to run rotation search using PHASER""" current_work_dir = os.getcwd() if os.path.exists(self.work_dir): os.chdir(self.work_dir) else: os.makedirs(self.work_dir) os.chdir(self.work_dir) i = InputMR_DAT() i.setHKLI(self.hklin) if self.hires: i.setHIRES(self.hires) if self.i != "None" and self.sigi != "None": i.setLABI_I_SIGI(self.i, self.sigi) elif self.f != "None" and self.sigf != "None": i.setLABI_F_SIGF(self.f, self.sigf) else: msg = "No flags for intensities or amplitudes have been provided" raise RuntimeError(msg) i.setMUTE(True) run_mr_data = runMR_DAT(i) if run_mr_data.Success(): i = InputMR_FRF() i.setJOBS(1) i.setREFL_DATA(run_mr_data.getREFL_DATA()) i.setSPAC_HALL(run_mr_data.getSpaceGroupHall()) i.setCELL6(run_mr_data.getUnitCell()) i.setROOT("phaser_mr_output") i.addENSE_PDB_ID("PDB", self.pdbin, 0.7) i.setCOMP_BY("SOLVENT") i.setCOMP_PERC(self.solvent) i.addSEAR_ENSE_NUM('PDB', self.nmol) i.setRFAC_USE(False) if self.timeout != 0: i.setKILL_TIME(self.timeout) run_mr_rot = runMR_FRF(i) with open(self.logfile, 'w') as f: f.write(run_mr_rot.summary()) os.chdir(current_work_dir)
def run(self, models_dir, nproc=2, shres=3.0, pklim=0.5, npic=50, rotastep=1.0, min_solvent_content=20, submit_nproc=None, submit_qtype=None, submit_queue=None, monitor=None, chunk_size=0, **kwargs): """Run amore rotation function on a directory of models Parameters ---------- models_dir : str The directory containing the models to run the rotation search on nproc : int, optional The number of processors to run the job on shres : int, float, optional Spherical harmonic resolution [default 3.0] pklim : int, float, optional Peak limit, output all peaks above <float> [default: 0.5] npic : int, optional Number of peaks to output from the translation function map for each orientation [default: 50] rotastep : int, float, optional Size of rotation step [default : 1.0] min_solvent_content : int, float, optional The minimum solvent content present in the unit cell with the input model [default: 30] submit_nproc : int The number of processors to use on the head node when creating submission scripts on a cluster [default: 1] submit_qtype : str The cluster submission queue type - currently support SGE and LSF submit_queue : str The queue to submit to on the cluster monitor chunk_size : int, optional The number of jobs to submit at the same time Returns ------- file log file for each model in the models_dir """ self.shres = shres self.pklim = pklim self.npic = npic self.rotastep = rotastep self.submit_qtype = submit_qtype self.submit_queue = submit_queue self.simbad_dat_files = simbad.db.find_simbad_dat_files(models_dir) mtz_labels = simbad.util.mtz_util.GetLabels(self.mtz) i = InputMR_DAT() i.setHKLI(self.mtz) i.setLABI_F_SIGF(mtz_labels.f, mtz_labels.sigf) i.setMUTE(True) run_mr_data = runMR_DAT(i) sg = run_mr_data.getSpaceGroupName().replace(" ", "") cell = " ".join(map(str, run_mr_data.getUnitCell())) sol_calc = simbad.util.matthews_prob.SolventContent(cell, sg) dir_name = "simbad-tmp-" + str(uuid.uuid1()) self.script_log_dir = os.path.join(self.work_dir, dir_name) os.mkdir(self.script_log_dir) self.hklpck0 = self._generate_hklpck0() self.ccp4_scr = os.environ["CCP4_SCR"] default_tmp_dir = os.path.join(self.work_dir, 'tmp') if self.tmp_dir: self.template_tmp_dir = os.path.join(self.tmp_dir, dir_name + "-{0}") else: self.template_tmp_dir = os.path.join(default_tmp_dir, dir_name + "-{0}") predicted_molecular_weight = 0 if run_mr_data.Success(): i = InputCCA() i.setSPAC_HALL(run_mr_data.getSpaceGroupHall()) i.setCELL6(run_mr_data.getUnitCell()) i.setMUTE(True) run_cca = runCCA(i) if run_cca.Success(): predicted_molecular_weight = run_cca.getAssemblyMW() dat_models = [] for dat_model in self.simbad_dat_files: name = os.path.basename(dat_model.replace(".dat", "")) pdb_struct = simbad.util.pdb_util.PdbStructure() pdb_struct.from_file(dat_model) try: solvent_content = sol_calc.calculate_from_struct(pdb_struct) if solvent_content < min_solvent_content: msg = "Skipping %s: solvent content is predicted to be less than %.2f" logger.debug(msg, name, min_solvent_content) continue except ValueError: msg = "Skipping %s: Error calculating solvent content" logger.debug(msg, name) continue except IndexError: msg = "Skipping %s: Problem with dat file" logger.debug(msg, name) continue x, y, z, intrad = pdb_struct.integration_box model_molecular_weight = pdb_struct.molecular_weight mw_diff = abs(predicted_molecular_weight - model_molecular_weight) info = simbad.core.dat_score.DatModelScore(name, dat_model, mw_diff, x, y, z, intrad, solvent_content, None) dat_models.append(info) sorted_dat_models = sorted(dat_models, key=lambda x: float(x.mw_diff), reverse=False) n_files = len(sorted_dat_models) chunk_size = simbad.rotsearch.get_chunk_size(n_files, chunk_size) total_chunk_cycles = simbad.rotsearch.get_total_chunk_cycles( n_files, chunk_size) if submit_qtype == 'local': processes = nproc else: processes = submit_nproc results = [] iteration_range = range(0, n_files, chunk_size) for cycle, i in enumerate(iteration_range): logger.info("Working on chunk %d out of %d", cycle + 1, total_chunk_cycles) if self.solution: logger.info( "Early termination criteria met, skipping chunk %d", cycle + 1) continue collector = ScriptCollector(None) amore_files = [] with pool.Pool(processes=processes) as p: [(collector.add(i[0]), amore_files.append(i[1])) for i in p.map(self, sorted_dat_models[i:i + chunk_size]) if i is not None] if len(collector.scripts) > 0: logger.info("Running AMORE tab/rot functions") amore_logs, dat_models = zip(*amore_files) simbad.util.submit_chunk(collector, self.script_log_dir, nproc, 'simbad_amore', submit_qtype, submit_queue, True, monitor, self.rot_succeeded_log) for dat_model, amore_log in zip(dat_models, amore_logs): base = os.path.basename(amore_log) pdb_code = base.replace("amore_", "").replace(".log", "") try: rotsearch_parser = simbad.parsers.rotsearch_parser.AmoreRotsearchParser( amore_log) score = simbad.core.amore_score.AmoreRotationScore( pdb_code, dat_model, rotsearch_parser.alpha, rotsearch_parser.beta, rotsearch_parser.gamma, rotsearch_parser.cc_f, rotsearch_parser.rf_f, rotsearch_parser.cc_i, rotsearch_parser.cc_p, rotsearch_parser.icp, rotsearch_parser.cc_f_z_score, rotsearch_parser.cc_p_z_score, rotsearch_parser.num_of_rot) if rotsearch_parser.cc_f_z_score: results += [score] except IOError: pass else: logger.critical("No structures to be trialled") self._search_results = results shutil.rmtree(self.script_log_dir) if os.path.isdir(default_tmp_dir): shutil.rmtree(default_tmp_dir)
def run(self): """Function to run molecular replacement using PHASER Returns ------- file Output pdb file file Output log file """ # Make a note of the current working directory current_work_dir = os.getcwd() # Change to the PHASER working directory if os.path.exists(self.work_dir): os.chdir(self.work_dir) else: os.makedirs(self.work_dir) os.chdir(self.work_dir) # Copy hklin and pdbin to working dire for efficient running of PHASER hklin = os.path.join(self.work_dir, os.path.basename(self.hklin)) shutil.copyfile(self.hklin, hklin) pdbin = os.path.join(self.work_dir, os.path.basename(self.pdbin)) shutil.copyfile(self.pdbin, pdbin) i = InputMR_DAT() i.setHKLI(hklin) if self.hires: i.setHIRES(self.hires) if self.autohigh: i.setRESO_AUTO_HIGH(self.autohigh) if self.i != "None" and self.sigi != "None": i.setLABI_I_SIGI(self.i, self.sigi) elif self.f != "None" and self.sigf != "None": i.setLABI_F_SIGF(self.f, self.sigf) else: msg = "No flags for intensities or amplitudes have been provided" raise RuntimeError(msg) i.setSGAL_SELE(SGAlternatives[self.sgalternative].value) i.setMUTE(True) r = runMR_DAT(i) if r.Success(): i = InputMR_AUTO() i.setJOBS(1) i.setREFL_DATA(r.getREFL_DATA()) i.setROOT("phaser_mr_output") i.addENSE_PDB_ID("PDB", pdbin, 0.7) i.setENSE_DISA_CHEC('PDB', True) i.setCOMP_BY("SOLVENT") i.setCOMP_PERC(self.solvent) # nmol set to one for testing i.addSEAR_ENSE_NUM('PDB', 1) i.setSGAL_SELE(SGAlternatives[self.sgalternative].value) if self.timeout != 0: i.setKILL_TIME(self.timeout) i.setMUTE(True) del(r) r = runMR_AUTO(i) with open(self.logfile, 'w') as f: f.write(r.summary()) shutil.move(r.getTopPdbFile(), self.pdbout) # Output original mtz with a change of basis if needed original_space_group, _, _ = mtz_util.crystal_data(self.hklin) space_group, _, _ = mtz_util.crystal_data(r.getTopMtzFile()) if original_space_group != space_group: mtz_util.reindex(self.hklin, self.hklout, space_group) else: shutil.copyfile(self.hklin, self.hklout) # Return to original working directory os.chdir(current_work_dir) # Delete any files copied across if os.path.isfile(os.path.join(self.work_dir, os.path.basename(self.hklin))): os.remove(os.path.join(self.work_dir, os.path.basename(self.hklin))) if os.path.isfile(os.path.join(self.work_dir, os.path.basename(self.pdbin))): os.remove(os.path.join(self.work_dir, os.path.basename(self.pdbin)))
def run_phaser_module_OLD(datafile, inp=False): """ Run separate module of Phaser to get results before running full job. Setup so that I can read the data in once and run multiple modules. """ # if self.verbose: # self.logger.debug('Utilities::runPhaserModule') target_resolution = 0.0 z = 0 solvent_content = 0.0 def run_ellg(): res0 = 0.0 i0 = phaser.InputMR_ELLG() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) i0.setREFL_DATA(r.getDATA()) if f[-3:] in ('cif'): i0.addENSE_CIT_ID('model', convert_unicode(f), 0.7) else: i0.addENSE_PDB_ID("model", convert_unicode(f), 0.7) # i.addSEAR_ENSE_NUM("junk",5) r1 = phaser.runMR_ELLG(i0) #print r1.logfile() if r1.Success(): res0 = r1.get_target_resolution('model') del(r1) return res0 def run_cca(): z0 = 0 sc0 = 0.0 i0 = phaser.InputCCA() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) # Have to set high res limit!! i0.setRESO_HIGH(res0) if np > 0: i0.addCOMP_PROT_NRES_NUM(np, 1) if na > 0: i0.addCOMP_NUCL_NRES_NUM(na, 1) r1 = phaser.runCCA(i0) #print r1.logfile() if r1.Success(): z0 = r1.getBestZ() sc0 = 1-(1.23/r1.getBestVM()) del(r1) return (z0, sc0) def run_ncs(): i0 = phaser.InputNCS() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setREFL_DATA(r.getDATA()) # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF()) # i0.setLABI_F_SIGF(f,sigf) i0.setMUTE(True) # i0.setVERB(True) r1 = phaser.runNCS(i0) print r1.logfile() print r1.loggraph().size() print r1.loggraph().__dict__.keys() #print r1.getCentricE4() if r1.Success(): return(r1) def run_ano(): #from cStringIO import StringIO i0 = phaser.InputANO() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) # i0.setREFL(p.getMiller(),p.getF(),p.getSIGF()) # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF()) # i0.setREFL_F_SIGF(p.getMiller(),p.getIobs(),p.getSigIobs()) i0.setREFL_DATA(r.getDATA()) i0.setMUTE(True) r1 = phaser.runANO(i0) print r1.loggraph().__dict__.keys() print r1.loggraph().size() print r1.logfile() """ o = phaser.Output() redirect_str = StringIO() o.setPackagePhenix(file_object=redirect_str) r1 = phaser.runANO(i0,o) """ if r1.Success(): print 'SUCCESS' return(r1) # Setup which modules are run matthews = False if inp: ellg = True ncs = False if type(inp) == str: f = inp else: np, na, res0, f = inp matthews = True else: ellg = False ncs = True # Read the dataset i = phaser.InputMR_DAT() i.setHKLI(convert_unicode(datafile)) i.setLABI_F_SIGF('F', 'SIGF') i.setMUTE(True) r = phaser.runMR_DAT(i) if r.Success(): if ellg: target_resolution = run_ellg() if matthews: z, solvent_content = run_cca() if ncs: n = run_ncs() if matthews: # Assumes ellg is run as well. # return (z,sc,res) return {"z": z, "solvent_content": solvent_content, "target_resolution": target_resolution} elif ellg: # ellg run by itself # return target_resolution return {"target_resolution": target_resolution} else: # NCS return n
def run_phaser( data_file, struct_file, spacegroup, db_settings=False, tag=False, work_dir=False, adf=False, name=False, ncopy=1, cell_analysis=False, resolution=False, full=False, ): """ Run Phaser and passes results back to RAPD Redis DB **Requires Phaser src code!** data_file - input data as mtz (required) struct_file - input search model path in mmCIF or PDB format (required) spacegroup - The space group to run MR (required) tag - a Redis key where the results are sent (cluster mode) db_settings - Redis connection settings for sending results (cluster mode) work_dir - working directory (defaults to current working dir) name - root name for output files (defaults to spacegroup) ncopy - number of molecules to search for cell_analysis - internal RAPD signal so all possible SG's are searched resolution - high res limit to run MR (float) full - signal to run more comprehensive MR """ phaser_log = False # Change to work_dir if not work_dir: work_dir = os.getcwd() os.chdir(work_dir) if not name: name = spacegroup # # Handle CIF file input -> PDB # if struct_file[-3:] == "cif": # pdb.cif_as_pdb(struct_file) # struct_file = struct_file.replace(".cif", ".pdb") # Read the dataset i = phaser.InputMR_DAT() i.setHKLI(convert_unicode(data_file)) i.setLABI_F_SIGF('F', 'SIGF') i.setMUTE(True) r1 = phaser.runMR_DAT(i) # Need to determine Phaser version for keyword changes! version = re.search(r'Version:\s*([\d.]+)', r1.logfile()).group(1) if r1.Success(): i = phaser.InputMR_AUTO() # i.setREFL_DATA(r1.getREFL_DATA()) # i.setREFL_DATA(r1.DATA_REFL()) i.setREFL_F_SIGF(r1.getMiller(), r1.getFobs(), r1.getSigFobs()) i.setCELL6(r1.getUnitCell()) if struct_file[-3:].lower() == "cif": #i.addENSE_CIF_ID('model', cif, 0.7) ### Typo in PHASER CODE!!! <<<CIT>>> ### i.addENSE_CIT_ID('model', convert_unicode(struct_file), 0.7) else: i.addENSE_PDB_ID('model', convert_unicode(struct_file), 0.7) i.addSEAR_ENSE_NUM("model", ncopy) i.setSPAC_NAME(spacegroup) if cell_analysis: i.setSGAL_SELE("ALL") # Set it for worst case in orth # number of processes to run in parallel where possible i.setJOBS(1) else: i.setSGAL_SELE("NONE") if full: # Picks own resolution # Round 2, pick best solution as long as less that 10% clashes i.setPACK_SELE("PERCENT") i.setPACK_CUTO(0.1) #command += "PACK CUTOFF 10\n" else: # For first round and cell analysis # Only set the resolution limit in the first round or cell analysis. if resolution: i.setRESO_HIGH(resolution) else: i.setRESO_HIGH(6.0) # If Phaser version < 2.6.0 if int(version.split('.')[1]) <= 6: i.setSEAR_DEEP(False) else: i.setSEAR_METH("FAST") # Don"t seem to work since it picks the high res limit now. # Get an error when it prunes all the solutions away and TF has no input. # command += "PEAKS ROT SELECT SIGMA CUTOFF 4.0\n" # command += "PEAKS TRA SELECT SIGMA CUTOFF 6.0\n" # Turn off pruning in 2.6.0 i.setSEAR_PRUN(False) # Choose more top peaks to help with getting it correct. i.setPURG_ROTA_ENAB(True) i.setPURG_ROTA_NUMB(3) #command += "PURGE ROT ENABLE ON\nPURGE ROT NUMBER 3\n" i.setPURG_TRAN_ENAB(True) i.setPURG_TRAN_NUMB(1) #command += "PURGE TRA ENABLE ON\nPURGE TRA NUMBER 1\n" # Only keep the top after refinement. i.setPURG_RNP_ENAB(True) i.setPURG_RNP_NUMB(1) #command += "PURGE RNP ENABLE ON\nPURGE RNP NUMBER 1\n" i.setROOT(convert_unicode(name)) # i.setMUTE(False) i.setMUTE(True) # Delete the setup results # del(r) # launch the run # r = phaser.runMR_AUTO(i) try: r = phaser.runMR_AUTO(i) except RuntimeError as e: # print "Hit error" # Known CIF error - convert to pdb and retry if struct_file[-3:] in ('cif', ): # print "Convert to pdb" pdb.cif_as_pdb((struct_file, )) pdb_file = struct_file.replace(".cif", ".pdb") i = phaser.InputMR_AUTO() # i.setREFL_DATA(r1.getREFL_DATA()) # i.setREFL_DATA(r1.DATA_REFL()) i.setREFL_F_SIGF(r1.getMiller(), r1.getFobs(), r1.getSigFobs()) i.setCELL6(r1.getUnitCell()) i.addENSE_PDB_ID('model', convert_unicode(pdb_file), 0.7) i.addSEAR_ENSE_NUM("model", ncopy) i.setSPAC_NAME(spacegroup) if cell_analysis: i.setSGAL_SELE("ALL") # Set it for worst case in orth # number of processes to run in parallel where possible i.setJOBS(1) else: i.setSGAL_SELE("NONE") if full: # Picks own resolution # Round 2, pick best solution as long as less that 10% clashes i.setPACK_SELE("PERCENT") i.setPACK_CUTO(0.1) #command += "PACK CUTOFF 10\n" else: # For first round and cell analysis # Only set the resolution limit in the first round or cell analysis. if resolution: i.setRESO_HIGH(resolution) else: i.setRESO_HIGH(6.0) # If Phaser version < 2.6.0 if int(version.split('.')[1]) <= 6: i.setSEAR_DEEP(False) else: i.setSEAR_METH("FAST") # Don"t seem to work since it picks the high res limit now. # Get an error when it prunes all the solutions away and TF has no input. # command += "PEAKS ROT SELECT SIGMA CUTOFF 4.0\n" # command += "PEAKS TRA SELECT SIGMA CUTOFF 6.0\n" # Turn off pruning in 2.6.0 i.setSEAR_PRUN(False) # Choose more top peaks to help with getting it correct. i.setPURG_ROTA_ENAB(True) i.setPURG_ROTA_NUMB(3) #command += "PURGE ROT ENABLE ON\nPURGE ROT NUMBER 3\n" i.setPURG_TRAN_ENAB(True) i.setPURG_TRAN_NUMB(1) #command += "PURGE TRA ENABLE ON\nPURGE TRA NUMBER 1\n" # Only keep the top after refinement. i.setPURG_RNP_ENAB(True) i.setPURG_RNP_NUMB(1) #command += "PURGE RNP ENABLE ON\nPURGE RNP NUMBER 1\n" i.setROOT(convert_unicode(name)) # i.setMUTE(False) i.setMUTE(True) # Delete the setup results # del(r) # launch the run r = phaser.runMR_AUTO(i) else: raise e if r.Success(): # print r pass #if r.foundSolutions(): #print "Phaser has found MR solutions" #print "Top LLG = %f" % r.getTopLLG() #print "Top PDB file = %s" % r.getTopPdbFile() #else: #print "Phaser has not found any MR solutions" else: print "Job exit status FAILURE" print r.ErrorName(), "ERROR :", r.ErrorMessage() # Save log files for debugging phaser_log = r.logfile() with open('phaser.log', 'w') as log: log.write(r.logfile()) log.close() if r.foundSolutions(): rfz = None tfz = None tncs = False # Parse results for p in r.getTopSet().ANNOTATION.split(): # print p # For v 2.8.3 # RF*0\nTF*0\nLLG=30699\nTFZ==174.8\nPAK=0\nLLG=30699\nTFZ==174.8\n if p.count('RFZ'): if p.count('=') in [1]: rfz = float(p.split('=')[-1]) if p.count('RF*0'): rfz = "NC" if p.count('TFZ'): if p.count('=') in [1]: tfz = p.split('=')[-1] if tfz == '*': tfz = 'arbitrary' else: tfz = float(tfz) if p.count('TF*0'): tfz = "NC" tncs_test = [ 1 for line in r.getTopSet().unparse().splitlines() if line.count("+TNCS") ] tncs = bool(len(tncs_test)) mtz_file = os.path.join(work_dir, r.getTopMtzFile()) phaser_result = { "ID": name, "solution": r.foundSolutions(), "pdb_file": os.path.join(work_dir, r.getTopPdbFile()), "mtz": mtz_file, "gain": float(r.getTopLLG()), "rfz": rfz, # "tfz": r.getTopTFZ(), "tfz": tfz, "clash": r.getTopSet().PAK, "dir": os.getcwd(), "spacegroup": r.getTopSet().getSpaceGroupName().replace(' ', ''), "tNCS": tncs, "nmol": r.getTopSet().NUM, "adf": None, "peak": None, } # Calculate 2Fo-Fc & Fo-Fc maps # foo.mtz begets foo_2mFo-DFc.ccp4 & foo__mFo-DFc.ccp4 local_subprocess(command="phenix.mtz2map %s" % mtz_file, logfile='map.log', shell=True) # Map files should now exist map_2_1 = mtz_file.replace(".mtz", "_2mFo-DFc.ccp4") map_1_1 = mtz_file.replace(".mtz", "_mFo-DFc.ccp4") # Make sure the maps exist and then package them if os.path.exists(map_2_1): # Compress the map arch_prod_file, arch_prod_hash = archive.compress_file(map_2_1) # Remove the map that was compressed os.unlink(map_2_1) # Store information map_for_display = { "path": arch_prod_file, "hash": arch_prod_hash, "description": "map_2_1" } phaser_result["map_2_1"] = map_for_display if os.path.exists(map_1_1): # Compress the map arch_prod_file, arch_prod_hash = archive.compress_file(map_1_1) # Remove the map that was compressed os.unlink(map_1_1) # Store information map_for_display = { "path": arch_prod_file, "hash": arch_prod_hash, "description": "map_1_1" } phaser_result["map_1_1"] = map_for_display # If PDB exists, package that too if phaser_result.get("pdb_file", False): if os.path.exists(phaser_result.get("pdb_file")): # Compress the file arch_prod_file, arch_prod_hash = archive.compress_file( phaser_result.get("pdb_file")) # Remove the map that was compressed # os.unlink(phaser_result.get("pdb")) # Store information pdb_for_display = { "path": arch_prod_file, "hash": arch_prod_hash, "description": os.path.basename(phaser_result.get("pdb_file")) } phaser_result["pdb"] = pdb_for_display # Calc ADF map if adf: if os.path.exists(phaser_result.get( "pdb_file", False)) and os.path.exists( phaser_result.get("mtz", False)): adf_results = calc_ADF_map(data_file=data_file, mtz=phaser_result["mtz"], pdb=phaser_result["pdb_file"]) if adf_results.get("adf"): phaser_result.update({ "adf": os.path.join(work_dir, adf_results.get("adf")) }) if adf_results.get("peak"): phaser_result.update({ "peak": os.path.join(work_dir, adf_results.get("peak")) }) #phaser_result.update({"adf": adf_results.get("adf", None), # "peak": adf_results.get("peak", None),}) # print "1" # print name # New procedure for making tar of results # Create directory # Remove the run # from the name # new_name = name[:-2] # new_name = phaser_result.get("ID") # # print new_name os.mkdir(new_name) # # Go through and copy files to archive directory file_types = ("pdb_file", "mtz", "adf", "peak") for file_type in file_types: # print file_type target_file = phaser_result.get(file_type, False) # print target_file if target_file: if os.path.exists(target_file): # Copy the file to the directory to be archived shutil.copy(target_file, new_name + "/.") # # Create the archive archive_result = archive.create_archive(new_name) archive_result["description"] = '%s_files' % new_name phaser_result["tar"] = archive_result # print "2" else: phaser_result = { "ID": name, "solution": False, "message": "No solution", "spacegroup": spacegroup } # Add the phaser log if phaser_log: phaser_result.update({"logs": {"phaser": phaser_log}}) # print "3" if db_settings and tag: print "db_settings and tag" # Connect to Redis redis = connect_to_redis(db_settings) # Key should be deleted once received, but set the key to expire in 24 hours just in case. redis.setex(tag, 86400, json.dumps(phaser_result)) # Do a little sleep to make sure results are in Redis for postprocess_phaser time.sleep(0.1) else: # print "Printing phaser_result" # Print the result so it can be seen thru the queue by reading stdout # print phaser_result print json.dumps(phaser_result)
def run_phaser_module_OLD(args): """ Run separate module of Phaser to get results before running full job. Setup so that I can read the data in once and run multiple modules. """ # print "run_phaser_module" res = 0.0 z = 0 solvent_content = 0.0 def run_ellg(run_mr, pdb_file): """ Perform calculations and return target-reso Resolution to achieve target eLLG """ target_resolution = 0.0 i0 = phaser.InputMR_ELLG() i0.setSPAC_HALL(run_mr.getSpaceGroupHall()) i0.setCELL6(run_mr.getUnitCell()) i0.setMUTE(True) i0.setREFL_DATA(run_mr.getDATA()) i0.addENSE_PDB_ID("test", pdb_file, 0.7) r1 = phaser.runMR_ELLG(i0) if r1.Success(): target_resolution = r1.get_target_resolution("test") del r1 return target_resolution def run_cca(run_mr, target_resolution, args): # print "run_cca" z0 = 0 solvent_content = 0.0 i0 = phaser.InputCCA() i0.setSPAC_HALL(run_mr.getSpaceGroupHall()) i0.setCELL6(run_mr.getUnitCell()) i0.setMUTE(True) # Have to set high res limit!! i0.setRESO_HIGH(target_resolution) if args.np > 0: i0.addCOMP_PROT_NRES_NUM(args.np, 1) if args.na > 0: i0.addCOMP_NUCL_NRES_NUM(args.na, 1) r1 = phaser.runCCA(i0) if r1.Success(): z0 = r1.getBestZ() solvent_content = 1 - (1.23 / r1.getBestVM()) del r1 return (z0, solvent_content) def run_ncs(run_mr): # print "run_ncs" i0 = phaser.InputNCS() i0.setSPAC_HALL(run_mr.getSpaceGroupHall()) i0.setCELL6(run_mr.getUnitCell()) i0.setREFL_DATA(run_mr.getDATA()) i0.setMUTE(True) r1 = phaser.runNCS(i0) # print r1.logfile() # print r1.loggraph().size() # print r1.loggraph().__dict__.keys() if r1.Success(): return r1 # def run_ano(run_mr): # print "run_ano" # i0 = phaser.InputANO() # i0.setSPAC_HALL(run_mr.getSpaceGroupHall()) # i0.setCELL6(run_mr.getUnitCell()) # i0.setREFL_DATA(run_mr.getDATA()) # i0.setMUTE(True) # r1 = phaser.runANO(i0) # # print r1.loggraph().__dict__.keys() # # print r1.loggraph().size() # # print r1.logfile() # """ # o = phaser.Output() # redirect_str = StringIO() # o.setPackagePhenix(file_object=redirect_str) # r1 = phaser.runANO(i0,o) # """ # if r1.Success(): # print "SUCCESS" # return r1 # Setup which modules are run # if inp: ellg = True ncs = False target_resolution = args.resolution if not (args.np or args.na or args.resolution): pass # else: # ellg = False # ncs = True # Read the dataset i = phaser.InputMR_DAT() i.setHKLI(args.data_file) i.setLABI_F_SIGF("F", "SIGF") i.setMUTE(True) run_mr = phaser.runMR_DAT(i) if run_mr.Success(): if ellg: target_resolution = run_ellg(run_mr, args.pdb_file) if args.matthews: z, solvent_content = run_cca(run_mr, target_resolution, args) if ncs: n = run_ncs(run_mr) if args.matthews: # Assumes ellg is run as well. return { "z": z, "solvent_content": solvent_content, "target_resolution": target_resolution } elif ellg: # ellg run by itself return {"target_resolution": target_resolution} else: # NCS return n
def run(self): """Function to run molecular replacement using PHASER Returns ------- file Output pdb file file Output log file """ # Make a note of the current working directory current_work_dir = os.getcwd() # Change to the PHASER working directory if os.path.exists(self.work_dir): os.chdir(self.work_dir) else: os.makedirs(self.work_dir) os.chdir(self.work_dir) # Copy hklin and pdbin to working dire for efficient running of PHASER hklin = os.path.join(self.work_dir, os.path.basename(self.hklin)) shutil.copyfile(self.hklin, hklin) pdbin = os.path.join(self.work_dir, os.path.basename(self.pdbin)) shutil.copyfile(self.pdbin, pdbin) i = InputMR_DAT() i.setHKLI(hklin) if self.hires: i.setHIRES(self.hires) if self.autohigh: i.setRESO_AUTO_HIGH(self.autohigh) if self.i != "None" and self.sigi != "None": i.setLABI_I_SIGI(self.i, self.sigi) elif self.f != "None" and self.sigf != "None": i.setLABI_F_SIGF(self.f, self.sigf) else: msg = "No flags for intensities or amplitudes have been provided" raise RuntimeError(msg) i.setSGAL_SELE(SGAlternatives[self.sgalternative].value) i.setMUTE(True) r = runMR_DAT(i) if r.Success(): i = InputMR_AUTO() i.setJOBS(1) i.setREFL_DATA(r.getREFL_DATA()) i.setROOT("phaser_mr_output") i.addENSE_PDB_ID("PDB", pdbin, 0.7) i.setCOMP_BY("SOLVENT") i.setCOMP_PERC(self.solvent) i.addSEAR_ENSE_NUM('PDB', self.nmol) i.setSGAL_SELE(SGAlternatives[self.sgalternative].value) if self.timeout != 0: i.setKILL_TIME(self.timeout) i.setMUTE(True) del (r) r = runMR_AUTO(i) with open(self.logfile, 'w') as f: f.write(r.summary()) shutil.move(r.getTopPdbFile(), self.pdbout) # Output original mtz with a change of basis if needed space_group, _, _ = mtz_util.crystal_data(r.getTopMtzFile()) ed = mtz_util.ExperimentalData(self.hklin) ed.change_space_group(space_group) ed.output_mtz(self.hklout) # Return to original working directory os.chdir(current_work_dir) # Delete any files copied across if os.path.isfile(os.path.join(self.work_dir, os.path.basename(self.hklin))): os.remove(os.path.join(self.work_dir, os.path.basename(self.hklin))) if os.path.isfile(os.path.join(self.work_dir, os.path.basename(self.pdbin))): os.remove(os.path.join(self.work_dir, os.path.basename(self.pdbin)))
def run(self, models_dir, nproc=2, min_solvent_content=20, submit_qtype=None, submit_queue=None, monitor=None, chunk_size=0, **kwargs): """Run phaser rotation function on a directory of models Parameters ---------- models_dir : str The directory containing the models to run the rotation search on nproc : int, optional The number of processors to run the job on min_solvent_content : int, float, optional The minimum solvent content present in the unit cell with the input model [default: 30] submit_qtype : str The cluster submission queue type - currently support SGE and LSF submit_queue : str The queue to submit to on the cluster monitor chunk_size : int, optional The number of jobs to submit at the same time Returns ------- file log file for each model in the models_dir """ self.submit_qtype = submit_qtype self.submit_queue = submit_queue self.f, self.sigf, self.i, self.sigi, _, _, _ = simbad.util.mtz_util.get_labels(self.mtz) self.simbad_dat_files = simbad.db.find_simbad_dat_files(models_dir) n_files = len(self.simbad_dat_files) i = InputMR_DAT() i.setHKLI(self.mtz) i.setMUTE(True) run_mr_data = runMR_DAT(i) sg = run_mr_data.getSpaceGroupName().replace(" ", "") cell = " ".join(map(str, run_mr_data.getUnitCell())) chunk_size = simbad.rotsearch.get_chunk_size(n_files, chunk_size) total_chunk_cycles = simbad.rotsearch.get_total_chunk_cycles(n_files, chunk_size) mat_coef = simbad.util.matthews_prob.MatthewsProbability(cell, sg) dir_name = "simbad-tmp-" + str(uuid.uuid1()) script_log_dir = os.path.join(self.work_dir, dir_name) os.mkdir(script_log_dir) ccp4_scr = os.environ["CCP4_SCR"] default_tmp_dir = os.path.join(self.work_dir, 'tmp') if self.tmp_dir: template_tmp_dir = os.path.join(self.tmp_dir, dir_name + "-{0}") else: template_tmp_dir = os.path.join(default_tmp_dir, dir_name + "-{0}") predicted_molecular_weight = 0 if run_mr_data.Success(): i = InputCCA() i.setSPAC_HALL(run_mr_data.getSpaceGroupHall()) i.setCELL6(run_mr_data.getUnitCell()) i.setMUTE(True) run_cca = runCCA(i) if run_cca.Success(): predicted_molecular_weight = run_cca.getAssemblyMW() dat_models = [] for dat_model in self.simbad_dat_files: name = os.path.basename(dat_model.replace(".dat", "")) pdb_struct = simbad.util.pdb_util.PdbStructure() pdb_struct.from_file(dat_model) solvent_fraction, n_copies = mat_coef.calculate_content_ncopies_from_struct(pdb_struct) solvent_content = solvent_fraction * 100 if solvent_content < min_solvent_content: msg = "Skipping %s: solvent content is predicted to be less than %.2f" logger.debug(msg, name, min_solvent_content) continue mw_diff = abs(predicted_molecular_weight - pdb_struct.molecular_weight) info = simbad.core.dat_score.DatModelScore(name, dat_model, mw_diff, None, None, None, None, solvent_fraction, n_copies) dat_models.append(info) sorted_dat_models = sorted(dat_models, key=lambda x: float(x.mw_diff), reverse=False) iteration_range = range(0, n_files, chunk_size) for cycle, i in enumerate(iteration_range): logger.info("Working on chunk %d out of %d", cycle + 1, total_chunk_cycles) template_model = os.path.join("$CCP4_SCR", "{0}.pdb") phaser_files = [] for dat_model in sorted_dat_models[i:i + chunk_size]: logger.debug("Generating script to perform PHASER rotation " + "function on %s", dat_model.pdb_code) pdb_model = template_model.format(dat_model.pdb_code) template_rot_log = os.path.join("$CCP4_SCR", "{0}_rot.log") conv_py = "\"from simbad.db import convert_dat_to_pdb; convert_dat_to_pdb('{}', '{}')\"" conv_py = conv_py.format(dat_model.dat_path, pdb_model) rot_log = template_rot_log.format(dat_model.pdb_code) tmp_dir = template_tmp_dir.format(dat_model.pdb_code) phaser_cmd = [ "simbad.rotsearch.phaser_rotation_search", "-hklin", self.mtz, "-f", self.f, "-sigf", self.sigf, "-i", self.i, "-sigi", self.sigi, "-pdbin", pdb_model, "-logfile", rot_log, "-solvent", dat_model.solvent, "-nmol", dat_model.nmol, "-work_dir", tmp_dir, ] phaser_cmd = " ".join(str(e) for e in phaser_cmd) cmd = [ [EXPORT, "CCP4_SCR=" + tmp_dir], ["mkdir", "-p", "$CCP4_SCR\n"], [CMD_PREFIX, "$CCP4/bin/ccp4-python", "-c", conv_py, os.linesep], [CMD_PREFIX, "$CCP4/bin/ccp4-python", "-m", phaser_cmd, os.linesep], ["rm", "-rf", "$CCP4_SCR\n"], [EXPORT, "CCP4_SCR=" + ccp4_scr], ] phaser_script = pyjob.misc.make_script( cmd, directory=script_log_dir, prefix="phaser_", stem=dat_model.pdb_code) phaser_log = phaser_script.rsplit(".", 1)[0] + '.log' phaser_files += [(phaser_script, phaser_log, dat_model.dat_path)] results = [] if len(phaser_files) > 0: logger.info("Running PHASER rotation functions") phaser_scripts, phaser_logs, dat_models = zip(*phaser_files) simbad.rotsearch.submit_chunk(phaser_scripts, script_log_dir, nproc, 'simbad_phaser', submit_qtype, submit_queue, monitor, self.rot_succeeded_log) for dat_model, phaser_log in zip(dat_models, phaser_logs): base = os.path.basename(phaser_log) pdb_code = base.replace("phaser_", "").replace(".log", "") try: phaser_rotation_parser = simbad.parsers.rotsearch_parser.PhaserRotsearchParser(phaser_log) if phaser_rotation_parser.rfact: phaser_rotation_parser.llg = 100 phaser_rotation_parser.rfz = 10 score = simbad.core.phaser_score.PhaserRotationScore( pdb_code, dat_model, phaser_rotation_parser.llg, phaser_rotation_parser.rfz) if phaser_rotation_parser.rfz: results += [score] except IOError: pass else: logger.critical("No structures to be trialled") self._search_results = results shutil.rmtree(script_log_dir) if os.path.isdir(default_tmp_dir): shutil.rmtree(default_tmp_dir)
def run(self, models_dir, nproc=2, min_solvent_content=20, submit_nproc=None, submit_qtype=None, submit_queue=None, monitor=None, chunk_size=0, **kwargs): """Run phaser rotation function on a directory of models Parameters ---------- models_dir : str The directory containing the models to run the rotation search on nproc : int, optional The number of processors to run the job on min_solvent_content : int, float, optional The minimum solvent content present in the unit cell with the input model [default: 30] submit_nproc : int The number of processors to use on the head node when creating submission scripts on a cluster [default: 1] submit_qtype : str The cluster submission queue type - currently support SGE and LSF submit_queue : str The queue to submit to on the cluster monitor chunk_size : int, optional The number of jobs to submit at the same time Returns ------- file log file for each model in the models_dir """ self.submit_qtype = submit_qtype self.submit_queue = submit_queue self.mtz_labels = simbad.util.mtz_util.GetLabels(self.mtz) self.simbad_dat_files = simbad.db.find_simbad_dat_files(models_dir) i = InputMR_DAT() i.setHKLI(self.mtz) i.setLABI_F_SIGF(self.mtz_labels.f, self.mtz_labels.sigf) i.setMUTE(True) run_mr_data = runMR_DAT(i) sg = run_mr_data.getSpaceGroupName().replace(" ", "") cell = " ".join(map(str, run_mr_data.getUnitCell())) mat_coef = simbad.util.matthews_prob.MatthewsProbability(cell, sg) dir_name = "simbad-tmp-" + str(uuid.uuid1()) self.script_log_dir = os.path.join(self.work_dir, dir_name) os.mkdir(self.script_log_dir) self.ccp4_scr = os.environ["CCP4_SCR"] default_tmp_dir = os.path.join(self.work_dir, 'tmp') if self.tmp_dir: self.template_tmp_dir = os.path.join(self.tmp_dir, dir_name + "-{0}") else: self.template_tmp_dir = os.path.join(default_tmp_dir, dir_name + "-{0}") predicted_molecular_weight = 0 if run_mr_data.Success(): i = InputCCA() i.setSPAC_HALL(run_mr_data.getSpaceGroupHall()) i.setCELL6(run_mr_data.getUnitCell()) i.setMUTE(True) run_cca = runCCA(i) if run_cca.Success(): predicted_molecular_weight = run_cca.getAssemblyMW() dat_models = [] for dat_model in self.simbad_dat_files: name = os.path.basename(dat_model.replace(".dat", "")) pdb_struct = simbad.util.pdb_util.PdbStructure() pdb_struct.from_file(dat_model) solvent_fraction, n_copies = mat_coef.calculate_content_ncopies_from_struct( pdb_struct) solvent_content = solvent_fraction * 100 if solvent_content < min_solvent_content: msg = "Skipping %s: solvent content is predicted to be less than %.2f" logger.debug(msg, name, min_solvent_content) continue mw_diff = abs(predicted_molecular_weight - pdb_struct.molecular_weight) info = simbad.core.dat_score.DatModelScore(name, dat_model, mw_diff, None, None, None, None, solvent_fraction, n_copies) dat_models.append(info) sorted_dat_models = sorted(dat_models, key=lambda x: float(x.mw_diff), reverse=False) n_files = len(sorted_dat_models) chunk_size = simbad.rotsearch.get_chunk_size(n_files, chunk_size) total_chunk_cycles = simbad.rotsearch.get_total_chunk_cycles( n_files, chunk_size) results = [] iteration_range = range(0, n_files, chunk_size) for cycle, i in enumerate(iteration_range): logger.info("Working on chunk %d out of %d", cycle + 1, total_chunk_cycles) if self.solution: logger.info( "Early termination criteria met, skipping chunk %d", cycle + 1) continue self.template_model = os.path.join("$CCP4_SCR", "{0}.pdb") if submit_qtype == 'local': processes = nproc else: processes = submit_nproc collector = ScriptCollector(None) phaser_files = [] with pool.Pool(processes=processes) as p: [(collector.add(i[0]), phaser_files.append(i[1])) for i in p.map(self, sorted_dat_models[i:i + chunk_size]) if i is not None] if len(phaser_files) > 0: logger.info("Running PHASER rotation functions") phaser_logs, dat_models = zip(*phaser_files) simbad.util.submit_chunk(collector, self.script_log_dir, nproc, 'simbad_phaser', submit_qtype, submit_queue, True, monitor, self.rot_succeeded_log) for dat_model, phaser_log in zip(dat_models, phaser_logs): base = os.path.basename(phaser_log) pdb_code = base.replace("phaser_", "").replace(".log", "") try: phaser_rotation_parser = simbad.parsers.rotsearch_parser.PhaserRotsearchParser( phaser_log) if phaser_rotation_parser.rfact: phaser_rotation_parser.llg = 100 phaser_rotation_parser.rfz = 10 score = simbad.core.phaser_score.PhaserRotationScore( pdb_code, dat_model, phaser_rotation_parser.llg, phaser_rotation_parser.rfz) if phaser_rotation_parser.rfz: results += [score] except IOError: pass else: logger.critical("No structures to be trialled") self._search_results = results shutil.rmtree(self.script_log_dir) if os.path.isdir(default_tmp_dir): shutil.rmtree(default_tmp_dir)
def run_phaser_module_OLD(args): """ Run separate module of Phaser to get results before running full job. Setup so that I can read the data in once and run multiple modules. """ # print "run_phaser_module" res = 0.0 z = 0 solvent_content = 0.0 def run_ellg(run_mr, pdb_file): """ Perform calculations and return target-reso Resolution to achieve target eLLG """ target_resolution = 0.0 i0 = phaser.InputMR_ELLG() i0.setSPAC_HALL(run_mr.getSpaceGroupHall()) i0.setCELL6(run_mr.getUnitCell()) i0.setMUTE(True) i0.setREFL_DATA(run_mr.getDATA()) i0.addENSE_PDB_ID("test", pdb_file, 0.7) r1 = phaser.runMR_ELLG(i0) if r1.Success(): target_resolution = r1.get_target_resolution("test") del r1 return target_resolution def run_cca(run_mr, target_resolution, args): # print "run_cca" z0 = 0 solvent_content = 0.0 i0 = phaser.InputCCA() i0.setSPAC_HALL(run_mr.getSpaceGroupHall()) i0.setCELL6(run_mr.getUnitCell()) i0.setMUTE(True) # Have to set high res limit!! i0.setRESO_HIGH(target_resolution) if args.np > 0: i0.addCOMP_PROT_NRES_NUM(args.np, 1) if args.na > 0: i0.addCOMP_NUCL_NRES_NUM(args.na, 1) r1 = phaser.runCCA(i0) if r1.Success(): z0 = r1.getBestZ() solvent_content = 1-(1.23/r1.getBestVM()) del r1 return (z0, solvent_content) def run_ncs(run_mr): # print "run_ncs" i0 = phaser.InputNCS() i0.setSPAC_HALL(run_mr.getSpaceGroupHall()) i0.setCELL6(run_mr.getUnitCell()) i0.setREFL_DATA(run_mr.getDATA()) i0.setMUTE(True) r1 = phaser.runNCS(i0) # print r1.logfile() # print r1.loggraph().size() # print r1.loggraph().__dict__.keys() if r1.Success(): return r1 # def run_ano(run_mr): # print "run_ano" # i0 = phaser.InputANO() # i0.setSPAC_HALL(run_mr.getSpaceGroupHall()) # i0.setCELL6(run_mr.getUnitCell()) # i0.setREFL_DATA(run_mr.getDATA()) # i0.setMUTE(True) # r1 = phaser.runANO(i0) # # print r1.loggraph().__dict__.keys() # # print r1.loggraph().size() # # print r1.logfile() # """ # o = phaser.Output() # redirect_str = StringIO() # o.setPackagePhenix(file_object=redirect_str) # r1 = phaser.runANO(i0,o) # """ # if r1.Success(): # print "SUCCESS" # return r1 # Setup which modules are run # if inp: ellg = True ncs = False target_resolution = args.resolution if not (args.np or args.na or args.resolution): pass # else: # ellg = False # ncs = True # Read the dataset i = phaser.InputMR_DAT() i.setHKLI(args.data_file) i.setLABI_F_SIGF("F", "SIGF") i.setMUTE(True) run_mr = phaser.runMR_DAT(i) if run_mr.Success(): if ellg: target_resolution = run_ellg(run_mr, args.pdb_file) if args.matthews: z, solvent_content = run_cca(run_mr, target_resolution, args) if ncs: n = run_ncs(run_mr) if args.matthews: # Assumes ellg is run as well. return {"z": z, "solvent_content": solvent_content, "target_resolution": target_resolution} elif ellg: # ellg run by itself return {"target_resolution": target_resolution} else: # NCS return n
def run_phaser(datafile, spacegroup, output_id, db_settings, work_dir=False, cif=False, pdb=False, name=False, ncopy=1, cell_analysis=False, resolution=False, large_cell=False, run_before=False, ): """ Run Phaser and passes results back to RAPD Redis DB **Requires Phaser src code!** datafile - input data as mtz spacegroup - The space group to run MR output_id - a Redis key where the results are sent db_settings - Redis connection settings for sending results work_dir - working directory cif - input search model path in mmCIF format (do not use with 'pdb') pdb - input search model path in PDB format (do not use with 'cif') name - root name for output files ncopy - number of molecules to search for cell_analysis - internal RAPD signal so all possible SG's are searched resolution - high res limit to run MR (float) large_cell - optimizes parameters to speed up MR with large unit cell. run_before - signal to run more comprehensive MR """ # Change to work_dir if not work_dir: work_dir = os.getcwd() os.chdir(work_dir) if not name: name = spacegroup # Connect to Redis redis = connect_to_redis(db_settings) # Read the dataset i = phaser.InputMR_DAT() i.setHKLI(convert_unicode(datafile)) i.setLABI_F_SIGF('F', 'SIGF') i.setMUTE(True) r = phaser.runMR_DAT(i) if r.Success(): i = phaser.InputMR_AUTO() # i.setREFL_DATA(r.getREFL_DATA()) # i.setREFL_DATA(r.DATA_REFL()) i.setREFL_F_SIGF(r.getMiller(), r.getFobs(), r.getSigFobs()) i.setCELL6(r.getUnitCell()) if cif: #i.addENSE_CIF_ID('model', cif, 0.7) ### Typo in PHASER CODE!!!### i.addENSE_CIT_ID('model', convert_unicode(cif), 0.7) if pdb: i.addENSE_PDB_ID('model', convert_unicode(pdb), 0.7) i.addSEAR_ENSE_NUM("model", ncopy) i.setSPAC_NAME(spacegroup) if cell_analysis: i.setSGAL_SELE("ALL") # Set it for worst case in orth # number of processes to run in parallel where possible i.setJOBS(1) else: i.setSGAL_SELE("NONE") if run_before: # Picks own resolution # Round 2, pick best solution as long as less that 10% clashes i.setPACK_SELE("PERCENT") i.setPACK_CUTO(0.1) #command += "PACK CUTOFF 10\n" else: # For first round and cell analysis # Only set the resolution limit in the first round or cell analysis. if resolution: i.setRESO_HIGH(resolution) else: # Otherwise it runs a second MR at full resolution!! # I dont think a second round is run anymore. # command += "RESOLUTION SEARCH HIGH OFF\n" if large_cell: i.setRESO_HIGH(6.0) else: i.setRESO_HIGH(4.5) i.setSEAR_DEEP(False) # Don"t seem to work since it picks the high res limit now. # Get an error when it prunes all the solutions away and TF has no input. # command += "PEAKS ROT SELECT SIGMA CUTOFF 4.0\n" # command += "PEAKS TRA SELECT SIGMA CUTOFF 6.0\n" # Turn off pruning in 2.6.0 i.setSEAR_PRUN(False) # Choose more top peaks to help with getting it correct. i.setPURG_ROTA_ENAB(True) i.setPURG_ROTA_NUMB(3) #command += "PURGE ROT ENABLE ON\nPURGE ROT NUMBER 3\n" i.setPURG_TRAN_ENAB(True) i.setPURG_TRAN_NUMB(1) #command += "PURGE TRA ENABLE ON\nPURGE TRA NUMBER 1\n" # Only keep the top after refinement. i.setPURG_RNP_ENAB(True) i.setPURG_RNP_NUMB(1) #command += "PURGE RNP ENABLE ON\nPURGE RNP NUMBER 1\n" i.setROOT(convert_unicode(name)) # i.setMUTE(False) i.setMUTE(True) # Delete the setup results del(r) # launch the run r = phaser.runMR_AUTO(i) if r.Success(): if r.foundSolutions(): print "Phaser has found MR solutions" #print "Top LLG = %f" % r.getTopLLG() #print "Top PDB file = %s" % r.getTopPdbFile() else: print "Phaser has not found any MR solutions" else: print "Job exit status FAILURE" print r.ErrorName(), "ERROR :", r.ErrorMessage() with open('phaser.log', 'w') as log: log.write(r.logfile()) log.close() with open('phaser_sum.log', 'w') as log: log.write(r.summary()) log.close() if r.foundSolutions(): rfz = None tfz = None tncs = False # Parse results for p in r.getTopSet().ANNOTATION.split(): if p.count('RFZ'): if p.count('=') in [1]: rfz = float(p.split('=')[-1]) if p.count('RF*0'): rfz = "NC" if p.count('TFZ'): if p.count('=') in [1]: tfz = p.split('=')[-1] if tfz == '*': tfz = 'arbitrary' else: tfz = float(tfz) if p.count('TF*0'): tfz = "NC" tncs_test = [1 for line in r.getTopSet().unparse().splitlines() if line.count("+TNCS")] tncs = bool(len(tncs_test)) phaser_result = {"ID": name, "solution": r.foundSolutions(), "pdb": r.getTopPdbFile(), "mtz": r.getTopMtzFile(), "gain": float(r.getTopLLG()), "rfz": rfz, # "tfz": r.getTopTFZ(), "tfz": tfz, "clash": r.getTopSet().PAK, "dir": os.getcwd(), "spacegroup": r.getTopSet().getSpaceGroupName().replace(' ', ''), "tNCS": tncs, "nmol": r.getTopSet().NUM, "adf": None, "peak": None, } # make tar.bz2 of result files # l = ['pdb', 'mtz', 'adf', 'peak'] # archive = "%s.tar.bz2" % name # with tarfile.open(archive, "w:bz2") as tar: # for f in l: # fo = phaser_result.get(f, False) # if fo: # if os.path.exists(fo): # tar.add(fo) # tar.close() # phaser_result['tar'] = os.path.join(work_dir, archive) # New procedure for making tar of results # Create directory os.mkdir(name) # Go through and copy files to archive directory file_types = ("pdb", "mtz", "adf", "peak") for file_type in file_types: target_file = phaser_result.get(file_type, False) if target_file: if os.path.exists(target_file): # Copy the file to the directory to be archived shutil.copy(target_file, name+"/.") # Create the archive archive_result = archive.create_archive(name) archive_result["description"] = name phaser_result["tar"] = archive_result phaser_result["pdb_file"] = os.path.join(work_dir, r.getTopPdbFile()) else: phaser_result = {"ID": name, "solution": False, "message": "No solution"} # Print the result so it can be seen in the rapd._phaser.log if needed print phaser_result # Key should be deleted once received, but set the key to expire in 24 hours just in case. redis.setex(output_id, 86400, json.dumps(phaser_result)) # Do a little sleep to make sure results are in Redis for postprocess_phaser time.sleep(0.1)
def run_phaser_module(data_file, result_queue=False, cca=False, tncs=False, ellg=False, mmcif=False, dres=False, np=0, na=0,): """ Run separate module of Phaser to get results before running full job. Setup so that I can read the data in once and run multiple modules. data_file - input dataset mtz file result_queue - pass results to queue cca - Run CCA to determine number of molecules in AU, and solvent content (Matthew's Coefficient calc) tncs - Run Anisotropy and tNCS correction on CID plots ellg - Run analysis to determonine optimum Phaser resolution MR. mmcif - input mmcif file. Could also be a PDB file dres - resolution of dataset (ELLG, CCA) np - default number of protein residues (CCA) na - default number of nucleic acid residues (CCA) """ target_resolution = 0.0 z = 0 solvent_content = 0.0 def run_ellg(): new_res = 0.0 i0 = phaser.InputMR_ELLG() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) i0.setREFL_DATA(r.getDATA()) if mmcif[-3:] in ('cif'): i0.addENSE_CIT_ID('model', convert_unicode(mmcif), 0.7) else: i0.addENSE_PDB_ID("model", convert_unicode(mmcif), 0.7) r1 = phaser.runMR_ELLG(i0) #print r1.logfile() if r1.Success(): # If it worked use the recommended resolution new_res = round(r1.get_target_resolution('model'), 1) del(r1) return new_res def run_cca(res): z0 = 0 sc0 = 0.0 i0 = phaser.InputCCA() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) # Have to set high res limit!! i0.setRESO_HIGH(res) if np > 0: i0.addCOMP_PROT_NRES_NUM(np, 1) if na > 0: i0.addCOMP_NUCL_NRES_NUM(na, 1) r1 = phaser.runCCA(i0) #print r1.logfile() #print dir(r1) if r1.Success(): z0 = r1.getBestZ() sc0 = round(1-(1.23/r1.getBestVM()), 2) del(r1) return (z0, sc0) def run_tncs(): # CAN'T GET READABLE loggraph?!? i0 = phaser.InputNCS() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setREFL_DATA(r.getDATA()) # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF()) # i0.setLABI_F_SIGF(f,sigf) i0.setMUTE(True) # i0.setVERB(True) r1 = phaser.runNCS(i0) print dir(r1) print r1.logfile() # for l in r1.loggraph(): # print l print r1.loggraph().size() print r1.output_strings #print r1.hasTNCS() #print r1.summary() print r1.warnings() print r1.ErrorMessage() #print r1.getCentricE4() if r1.Success(): return(r1.loggraph()) def run_ano(): #from cStringIO import StringIO i0 = phaser.InputANO() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) # i0.setREFL(p.getMiller(),p.getF(),p.getSIGF()) # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF()) # i0.setREFL_F_SIGF(p.getMiller(),p.getIobs(),p.getSigIobs()) i0.setREFL_DATA(r.getDATA()) i0.setMUTE(True) r1 = phaser.runANO(i0) print r1.loggraph().__dict__.keys() print r1.loggraph().size() print r1.logfile() """ o = phaser.Output() redirect_str = StringIO() o.setPackagePhenix(file_object=redirect_str) r1 = phaser.runANO(i0,o) """ if r1.Success(): print 'SUCCESS' return(r1) # MAIN # Setup which modules are run # Read input MTZ file i = phaser.InputMR_DAT() i.setHKLI(convert_unicode(data_file)) i.setLABI_F_SIGF('F', 'SIGF') i.setMUTE(True) r = phaser.runMR_DAT(i) if r.Success(): if ellg: target_resolution = run_ellg() if cca: # Assumes ellg is run as well. z, solvent_content = run_cca(target_resolution) if tncs: n = run_tncs() if cca: out = {"z": z, "solvent_content": solvent_content, "target_resolution": target_resolution} if result_queue: result_queue.put(out) else: return out elif ellg: # ellg run by itself out = {"target_resolution": target_resolution} if result_queue: result_queue.put(out) else: return out else: # tNCS out = n if result_queue: result_queue.put(out) else: return out """
def run_phaser_module(data_file, result_queue=False, cca=False, tncs=False, ellg=False, struct_file=False, dres=False, np=0, na=0): """ Run separate module of Phaser to get results before running full job. Setup so that I can read the data in once and run multiple modules. data_file - input dataset mtz file result_queue - pass results to queue cca - Run CCA to determine number of molecules in AU, and solvent content (Matthew's Coefficient calc) tncs - Run Anisotropy and tNCS correction on CID plots ellg - Run analysis to determonine optimum Phaser resolution MR. struct_file - input struct_file file. Could be a PDB or mmCIF file dres - resolution of dataset (ELLG, CCA) np - default number of protein residues (CCA) na - default number of nucleic acid residues (CCA) """ target_resolution = 0.0 z = 0 solvent_content = 0.0 def run_ellg(): new_res = 0.0 i0 = phaser.InputMR_ELLG() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) i0.setREFL_DATA(r.getDATA()) # Read in CIF file if struct_file[-3:] in ('cif', ): i0.addENSE_CIT_ID("model", convert_unicode(struct_file), 0.7) # Read in PDB file else: i0.addENSE_PDB_ID("model", convert_unicode(struct_file), 0.7) try: r1 = phaser.runMR_ELLG(i0) except RuntimeError as e: # print "Hit error" # Known CIF error - convert to pdb and retry if struct_file[-3:] in ('cif', ): # print "Convert to pdb" pdb.cif_as_pdb((struct_file, )) pdb_file = struct_file.replace(".cif", ".pdb") i1 = phaser.InputMR_ELLG() i1.setSPAC_HALL(r.getSpaceGroupHall()) i1.setCELL6(r.getUnitCell()) i1.setMUTE(True) i1.setREFL_DATA(r.getDATA()) i1.addENSE_PDB_ID("model", convert_unicode(pdb_file), 0.7) r1 = phaser.runMR_ELLG(i1) else: raise e # print r1.logfile() if r1.Success(): # If it worked use the recommended resolution new_res = round(r1.get_target_resolution('model'), 1) del (r1) return new_res def run_cca(res): z0 = 0 sc0 = 0.0 i0 = phaser.InputCCA() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) # Have to set high res limit!! i0.setRESO_HIGH(res) if np > 0: i0.addCOMP_PROT_NRES_NUM(np, 1) if na > 0: i0.addCOMP_NUCL_NRES_NUM(na, 1) r1 = phaser.runCCA(i0) #print r1.logfile() #print dir(r1) if r1.Success(): z0 = r1.getBestZ() try: sc0 = round(1 - (1.23 / r1.getBestVM()), 2) except ZeroDivisionError: sc0 = 0 del (r1) return (z0, sc0) def run_tncs(): # CAN'T GET READABLE loggraph?!? i0 = phaser.InputNCS() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setREFL_DATA(r.getDATA()) # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF()) # i0.setLABI_F_SIGF(f,sigf) i0.setMUTE(True) # i0.setVERB(True) r1 = phaser.runNCS(i0) print dir(r1) print r1.logfile() # for l in r1.loggraph(): # print l print r1.loggraph().size() print r1.output_strings #print r1.hasTNCS() #print r1.summary() print r1.warnings() print r1.ErrorMessage() #print r1.getCentricE4() if r1.Success(): return (r1.loggraph()) def run_ano(): #from cStringIO import StringIO i0 = phaser.InputANO() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) # i0.setREFL(p.getMiller(),p.getF(),p.getSIGF()) # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF()) # i0.setREFL_F_SIGF(p.getMiller(),p.getIobs(),p.getSigIobs()) i0.setREFL_DATA(r.getDATA()) i0.setMUTE(True) r1 = phaser.runANO(i0) print r1.loggraph().__dict__.keys() print r1.loggraph().size() print r1.logfile() """ o = phaser.Output() redirect_str = StringIO() o.setPackagePhenix(file_object=redirect_str) r1 = phaser.runANO(i0,o) """ if r1.Success(): print 'SUCCESS' return (r1) # MAIN # Setup which modules are run # Read input MTZ file i = phaser.InputMR_DAT() i.setHKLI(convert_unicode(data_file)) i.setLABI_F_SIGF('F', 'SIGF') i.setMUTE(True) r = phaser.runMR_DAT(i) if r.Success(): if ellg: target_resolution = run_ellg() if cca: # Assumes ellg is run as well. z, solvent_content = run_cca(target_resolution) if tncs: n = run_tncs() if cca: out = { "z": z, "solvent_content": solvent_content, "target_resolution": target_resolution } if result_queue: result_queue.put(out) else: return out elif ellg: # ellg run by itself out = {"target_resolution": target_resolution} if result_queue: result_queue.put(out) else: return out else: # tNCS out = n if result_queue: result_queue.put(out) else: return out """
def run_phaser_module_OLD(data_file, inp=False): """ Run separate module of Phaser to get results before running full job. Setup so that I can read the data in once and run multiple modules. """ # if self.verbose: # self.logger.debug('Utilities::runPhaserModule') target_resolution = 0.0 z = 0 solvent_content = 0.0 def run_ellg(): res0 = 0.0 i0 = phaser.InputMR_ELLG() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) i0.setREFL_DATA(r.getDATA()) if f[-3:] in ('cif'): i0.addENSE_CIT_ID('model', convert_unicode(f), 0.7) else: i0.addENSE_PDB_ID("model", convert_unicode(f), 0.7) # i.addSEAR_ENSE_NUM("junk",5) r1 = phaser.runMR_ELLG(i0) #print r1.logfile() if r1.Success(): res0 = r1.get_target_resolution('model') del (r1) return res0 def run_cca(): z0 = 0 sc0 = 0.0 i0 = phaser.InputCCA() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) # Have to set high res limit!! i0.setRESO_HIGH(res0) if np > 0: i0.addCOMP_PROT_NRES_NUM(np, 1) if na > 0: i0.addCOMP_NUCL_NRES_NUM(na, 1) r1 = phaser.runCCA(i0) #print r1.logfile() if r1.Success(): z0 = r1.getBestZ() sc0 = 1 - (1.23 / r1.getBestVM()) del (r1) return (z0, sc0) def run_ncs(): i0 = phaser.InputNCS() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setREFL_DATA(r.getDATA()) # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF()) # i0.setLABI_F_SIGF(f,sigf) i0.setMUTE(True) # i0.setVERB(True) r1 = phaser.runNCS(i0) print r1.logfile() print r1.loggraph().size() print r1.loggraph().__dict__.keys() #print r1.getCentricE4() if r1.Success(): return (r1) def run_ano(): #from cStringIO import StringIO i0 = phaser.InputANO() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) # i0.setREFL(p.getMiller(),p.getF(),p.getSIGF()) # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF()) # i0.setREFL_F_SIGF(p.getMiller(),p.getIobs(),p.getSigIobs()) i0.setREFL_DATA(r.getDATA()) i0.setMUTE(True) r1 = phaser.runANO(i0) print r1.loggraph().__dict__.keys() print r1.loggraph().size() print r1.logfile() """ o = phaser.Output() redirect_str = StringIO() o.setPackagePhenix(file_object=redirect_str) r1 = phaser.runANO(i0,o) """ if r1.Success(): print 'SUCCESS' return (r1) # Setup which modules are run matthews = False if inp: ellg = True ncs = False if type(inp) == str: f = inp else: np, na, res0, f = inp matthews = True else: ellg = False ncs = True # Read the dataset i = phaser.InputMR_DAT() i.setHKLI(convert_unicode(data_file)) i.setLABI_F_SIGF('F', 'SIGF') i.setMUTE(True) r = phaser.runMR_DAT(i) if r.Success(): if ellg: target_resolution = run_ellg() if matthews: z, solvent_content = run_cca() if ncs: n = run_ncs() if matthews: # Assumes ellg is run as well. # return (z,sc,res) return { "z": z, "solvent_content": solvent_content, "target_resolution": target_resolution } elif ellg: # ellg run by itself # return target_resolution return {"target_resolution": target_resolution} else: # NCS return n
def run(self, models_dir, nproc=2, shres=3.0, pklim=0.5, npic=50, rotastep=1.0, min_solvent_content=20, submit_qtype=None, submit_queue=None, monitor=None, chunk_size=0, **kwargs): """Run amore rotation function on a directory of models Parameters ---------- models_dir : str The directory containing the models to run the rotation search on nproc : int, optional The number of processors to run the job on shres : int, float, optional Spherical harmonic resolution [default 3.0] pklim : int, float, optional Peak limit, output all peaks above <float> [default: 0.5] npic : int, optional Number of peaks to output from the translation function map for each orientation [default: 50] rotastep : int, float, optional Size of rotation step [default : 1.0] min_solvent_content : int, float, optional The minimum solvent content present in the unit cell with the input model [default: 30] submit_qtype : str The cluster submission queue type - currently support SGE and LSF submit_queue : str The queue to submit to on the cluster monitor chunk_size : int, optional The number of jobs to submit at the same time Returns ------- file log file for each model in the models_dir """ self.submit_qtype = submit_qtype self.submit_queue = submit_queue self.simbad_dat_files = simbad.db.find_simbad_dat_files(models_dir) n_files = len(self.simbad_dat_files) i = InputMR_DAT() i.setHKLI(self.mtz) i.setMUTE(True) run_mr_data = runMR_DAT(i) sg = run_mr_data.getSpaceGroupName().replace(" ", "") cell = " ".join(map(str, run_mr_data.getUnitCell())) chunk_size = simbad.rotsearch.get_chunk_size(n_files, chunk_size) total_chunk_cycles = simbad.rotsearch.get_total_chunk_cycles(n_files, chunk_size) sol_calc = simbad.util.matthews_prob.SolventContent(cell, sg) dir_name = "simbad-tmp-" + str(uuid.uuid1()) script_log_dir = os.path.join(self.work_dir, dir_name) os.mkdir(script_log_dir) hklpck0 = self._generate_hklpck0() ccp4_scr = os.environ["CCP4_SCR"] default_tmp_dir = os.path.join(self.work_dir, 'tmp') if self.tmp_dir: template_tmp_dir = os.path.join(self.tmp_dir, dir_name + "-{0}") else: template_tmp_dir = os.path.join(default_tmp_dir, dir_name + "-{0}") template_hklpck1 = os.path.join("$CCP4_SCR", "{0}.hkl") template_clmn0 = os.path.join("$CCP4_SCR", "{0}_spmipch.clmn") template_clmn1 = os.path.join("$CCP4_SCR", "{0}.clmn") template_mapout = os.path.join("$CCP4_SCR", "{0}_amore_cross.map") template_table1 = os.path.join("$CCP4_SCR", "{0}_sfs.tab") template_model = os.path.join("$CCP4_SCR", "{0}.pdb") template_rot_log = os.path.join("$CCP4_SCR", "{0}_rot.log") predicted_molecular_weight = 0 if run_mr_data.Success(): i = InputCCA() i.setSPAC_HALL(run_mr_data.getSpaceGroupHall()) i.setCELL6(run_mr_data.getUnitCell()) i.setMUTE(True) run_cca = runCCA(i) if run_cca.Success(): predicted_molecular_weight = run_cca.getAssemblyMW() dat_models = [] for dat_model in self.simbad_dat_files: name = os.path.basename(dat_model.replace(".dat", "")) pdb_struct = simbad.util.pdb_util.PdbStructure() pdb_struct.from_file(dat_model) try: solvent_content = sol_calc.calculate_from_struct(pdb_struct) if solvent_content < min_solvent_content: msg = "Skipping %s: solvent content is predicted to be less than %.2f" logger.debug(msg, name, min_solvent_content) continue except ValueError: msg = "Skipping %s: Error calculating solvent content" logger.debug(msg, name) x, y, z, intrad = pdb_struct.integration_box model_molecular_weight = pdb_struct.molecular_weight mw_diff = abs(predicted_molecular_weight - model_molecular_weight) info = simbad.core.dat_score.DatModelScore(name, dat_model, mw_diff, x, y, z, intrad, solvent_content, None) dat_models.append(info) sorted_dat_models = sorted(dat_models, key=lambda x: float(x.mw_diff), reverse=False) iteration_range = range(0, n_files, chunk_size) for cycle, i in enumerate(iteration_range): logger.info("Working on chunk %d out of %d", cycle + 1, total_chunk_cycles) amore_files = [] for dat_model in sorted_dat_models[i:i + chunk_size]: logger.debug("Generating script to perform AMORE rotation " + "function on %s", dat_model.pdb_code) pdb_model = template_model.format(dat_model.pdb_code) table1 = template_table1.format(dat_model.pdb_code) hklpck1 = template_hklpck1.format(dat_model.pdb_code) clmn0 = template_clmn0.format(dat_model.pdb_code) clmn1 = template_clmn1.format(dat_model.pdb_code) mapout = template_mapout.format(dat_model.pdb_code) conv_py = "\"from simbad.db import convert_dat_to_pdb; convert_dat_to_pdb('{}', '{}')\"" conv_py = conv_py.format(dat_model.dat_path, pdb_model) tab_cmd = [self.amore_exe, "xyzin1", pdb_model, "xyzout1", pdb_model, "table1", table1] tab_stdin = self.tabfun_stdin_template.format( x=dat_model.x, y=dat_model.y, z=dat_model.z, a=90, b=90, c=120) rot_cmd = [ self.amore_exe, 'table1', table1, 'HKLPCK1', hklpck1, 'hklpck0', hklpck0, 'clmn1', clmn1, 'clmn0', clmn0, 'MAPOUT', mapout ] rot_stdin = self.rotfun_stdin_template.format( shres=shres, intrad=dat_model.intrad, pklim=pklim, npic=npic, step=rotastep) rot_log = template_rot_log.format(dat_model.pdb_code) tmp_dir = template_tmp_dir.format(dat_model.pdb_code) cmd = [ [EXPORT, "CCP4_SCR=" + tmp_dir], ["mkdir", "-p", "$CCP4_SCR\n"], [CMD_PREFIX, "$CCP4/bin/ccp4-python", "-c", conv_py, os.linesep], tab_cmd + ["<< eof >", os.devnull], [tab_stdin], ["eof"], [os.linesep], rot_cmd + ["<< eof >", rot_log], [rot_stdin], ["eof"], [os.linesep], ["grep", "-m 1", "SOLUTIONRCD", rot_log, os.linesep], ["rm", "-rf", "$CCP4_SCR\n"], [EXPORT, "CCP4_SCR=" + ccp4_scr], ] amore_script = pyjob.misc.make_script( cmd, directory=script_log_dir, prefix="amore_", stem=dat_model.pdb_code) amore_log = amore_script.rsplit(".", 1)[0] + '.log' amore_files += [(amore_script, tab_stdin, rot_stdin, amore_log, dat_model.dat_path)] results = [] if len(amore_files) > 0: logger.info("Running AMORE tab/rot functions") amore_scripts, _, _, amore_logs, dat_models = zip(*amore_files) simbad.rotsearch.submit_chunk(amore_scripts, script_log_dir, nproc, 'simbad_amore', submit_qtype, submit_queue, monitor, self.rot_succeeded_log) for dat_model, amore_log in zip(dat_models, amore_logs): base = os.path.basename(amore_log) pdb_code = base.replace("amore_", "").replace(".log", "") try: rotsearch_parser = simbad.parsers.rotsearch_parser.AmoreRotsearchParser(amore_log) score = simbad.core.amore_score.AmoreRotationScore( pdb_code, dat_model, rotsearch_parser.alpha, rotsearch_parser.beta, rotsearch_parser.gamma, rotsearch_parser.cc_f, rotsearch_parser.rf_f, rotsearch_parser.cc_i, rotsearch_parser.cc_p, rotsearch_parser.icp, rotsearch_parser.cc_f_z_score, rotsearch_parser.cc_p_z_score, rotsearch_parser.num_of_rot) if rotsearch_parser.cc_f_z_score: results += [score] except IOError: pass else: logger.critical("No structures to be trialled") self._search_results = results shutil.rmtree(script_log_dir) if os.path.isdir(default_tmp_dir): shutil.rmtree(default_tmp_dir)