def get_mtz_info(datafile): """ Get unit cell and SG from input mtz """ sg = False cell = False vol = False # Convert from unicode datafile = convert_unicode(datafile) # Read datafile data = iotbx_mtz.object(datafile) # Derive space group from datafile sg = fix_R3_sg(data.space_group_name().replace(" ", "")) # Wrangle the cell parameters cell = [round(x,3) for x in data.crystals()[0].unit_cell_parameters() ] # The volume vol = data.crystals()[0].unit_cell().volume() return (sg, cell, vol)
def get_mtz_info(data_file): """ Get unit cell and SG from input mtz """ sg = False cell = False vol = False # Convert from unicode data_file = convert_unicode(data_file) # Read data_file data = iotbx_mtz.object(data_file) # Derive space group from data_file sg = fix_R3_sg(data.space_group_name().replace(" ", "")) # Wrangle the cell parameters cell = [round(x, 3) for x in data.crystals()[0].unit_cell_parameters()] # The volume vol = data.crystals()[0].unit_cell().volume() return (sg, cell, vol)
def get_res(data_file): """Return resolution limit of dataset""" data_file = convert_unicode(data_file) data = iotbx_mtz.object(data_file) return float(data.max_min_resolution()[-1])
def get_res(datafile): """Return resolution limit of dataset""" datafile = convert_unicode(datafile) data = iotbx_mtz.object(datafile) return float(data.max_min_resolution()[-1])
def get_spacegroup_info(cif_file): """Get info from PDB of mmCIF file""" # print "get_spacegroup_info", cif_file, os.getcwd() cif_file = convert_unicode(cif_file) if cif_file[-3:].lower() == "cif": fail = False cif_spacegroup = False try: input_file = open(cif_file, "r").read(20480) for line in input_file.split('\n'): if "_symmetry.space_group_name_H-M" in line: cif_spacegroup = line[32:].strip()[1:-1].upper().replace(" ", "") if "_pdbx_database_status.pdb_format_compatible" in line: if line.split()[1] == "N": fail = True except IOError: return False if fail: return False else: return cif_spacegroup else: return str(iotbx_pdb.input(cif_file).crystal_symmetry().space_group_info()).upper().replace(" ", "")
def get_spacegroup_info(struct_file): """Get info from PDB of mmCIF file""" # print "get_spacegroup_info", struct_file, os.getcwd() struct_file = convert_unicode(struct_file) if struct_file[-3:].lower() == "cif": fail = False cif_spacegroup = False try: input_file = open(struct_file, "r").read(20480) for line in input_file.split('\n'): if "_symmetry.space_group_name_H-M" in line: cif_spacegroup = line[32:].strip()[1:-1].upper().replace( " ", "") # print cif_spacegroup if "_pdbx_database_status.pdb_format_compatible" in line: if line.split()[1] == "N": fail = True except IOError: return False if fail: return False else: return cif_spacegroup else: return str( iotbx_pdb.input(struct_file).crystal_symmetry().space_group_info() ).upper().replace(" ", "")
def wrapper(**kwargs): os.chdir(kwargs.get('work_dir', os.getcwd())) if not kwargs.get('script', False): # Pop out the launcher launcher = kwargs.pop('launcher', None) # Pop out the batch_queue batch_queue = kwargs.pop('batch_queue', None) # Pop out the results_queue result_queue = kwargs.pop('result_queue', None) # Create a unique identifier for Phaser results #kwargs['output_id'] = 'Phaser_%d' % random.randint(0, 10000) # Grab the RAPD python path (if available) rapd_python = kwargs.pop('rapd_python', 'rapd.python') # Signal to launch run kwargs['script'] = True if kwargs.get('pool', False): # If running on local machine. Launcher will be 'utils.processes.local_subprocess' pool = kwargs.pop('pool') f = write_script(kwargs) proc = pool.apply_async( launcher, kwds={ "command": "%s %s" % (rapd_python, f), "logfile": os.path.join(convert_unicode(kwargs.get('work_dir')), 'rapd_phaser.log'), "result_queue": result_queue, }) #return (proc, 'junk', kwargs['output_id']) return (proc, 'junk') else: # If running on computer cluster. Launcher will be sites.cluster.(site_name).process_cluster f = write_script(kwargs) pid_queue = Queue() proc = Process(target=launcher, kwargs={ "command": "%s %s" % (rapd_python, f), "logfile": os.path.join(kwargs.get('work_dir'), 'rapd_phaser.log'), "pid_queue": pid_queue, "batch_queue": batch_queue, }) proc.start() #return (proc, pid_queue.get(), kwargs['output_id']) return (proc, pid_queue.get()) else: # Remove extra input params used to setup job l = ['script', 'test'] for k in l: # pop WON'T error out if key not found! _ = kwargs.pop(k, None) # Just launch job return func(**kwargs)
def run_ellg(): res0 = 0.0 i0 = phaser.InputMR_ELLG() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) i0.setREFL_DATA(r.getDATA()) if f[-3:] in ('cif'): i0.addENSE_CIT_ID('model', convert_unicode(f), 0.7) else: i0.addENSE_PDB_ID("model", convert_unicode(f), 0.7) # i.addSEAR_ENSE_NUM("junk",5) r1 = phaser.runMR_ELLG(i0) #print r1.logfile() if r1.Success(): res0 = r1.get_target_resolution('model') del (r1) return res0
def run_ellg(): new_res = 0.0 i0 = phaser.InputMR_ELLG() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) i0.setREFL_DATA(r.getDATA()) if mmcif[-3:] in ('cif'): i0.addENSE_CIT_ID('model', convert_unicode(mmcif), 0.7) else: i0.addENSE_PDB_ID("model", convert_unicode(mmcif), 0.7) r1 = phaser.runMR_ELLG(i0) #print r1.logfile() if r1.Success(): # If it worked use the recommended resolution new_res = round(r1.get_target_resolution('model'), 1) del(r1) return new_res
def run_ellg(): res0 = 0.0 i0 = phaser.InputMR_ELLG() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) i0.setREFL_DATA(r.getDATA()) if f[-3:] in ('cif'): i0.addENSE_CIT_ID('model', convert_unicode(f), 0.7) else: i0.addENSE_PDB_ID("model", convert_unicode(f), 0.7) # i.addSEAR_ENSE_NUM("junk",5) r1 = phaser.runMR_ELLG(i0) #print r1.logfile() if r1.Success(): res0 = r1.get_target_resolution('model') del(r1) return res0
def run_ellg(): new_res = 0.0 i0 = phaser.InputMR_ELLG() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) i0.setREFL_DATA(r.getDATA()) # Read in CIF file if struct_file[-3:] in ('cif', ): i0.addENSE_CIT_ID("model", convert_unicode(struct_file), 0.7) # Read in PDB file else: i0.addENSE_PDB_ID("model", convert_unicode(struct_file), 0.7) try: r1 = phaser.runMR_ELLG(i0) except RuntimeError as e: # print "Hit error" # Known CIF error - convert to pdb and retry if struct_file[-3:] in ('cif', ): # print "Convert to pdb" pdb.cif_as_pdb((struct_file, )) pdb_file = struct_file.replace(".cif", ".pdb") i1 = phaser.InputMR_ELLG() i1.setSPAC_HALL(r.getSpaceGroupHall()) i1.setCELL6(r.getUnitCell()) i1.setMUTE(True) i1.setREFL_DATA(r.getDATA()) i1.addENSE_PDB_ID("model", convert_unicode(pdb_file), 0.7) r1 = phaser.runMR_ELLG(i1) else: raise e # print r1.logfile() if r1.Success(): # If it worked use the recommended resolution new_res = round(r1.get_target_resolution('model'), 1) del (r1) return new_res
def __init__(self, command, output=None, tprint=False, logger=None): # If the logging instance is passed in... if logger: self.logger = logger else: # Otherwise get the logger Instance self.logger = logging.getLogger("RAPDLogger") self.logger.debug("__init__") # Store tprint for use throughout if tprint: self.tprint = tprint # Dead end if no tprint passed else: def func(arg=False, level=False, verbosity=False, color=False): pass self.tprint = func # Stopwatch self.start_time = time.time() # Store inputs self.input = command self.output = output self.logger = logger # pprint(command) # Params self.working_dir = self.input["directories"].get("work", os.getcwd()) self.test = self.input["preferences"].get("test", False) self.sample_type = self.input["preferences"].get("type", "protein") self.solvent_content = self.input["preferences"].get( "solvent_content", 0.55) self.cluster_use = self.input["preferences"].get("cluster", False) self.clean = self.input["preferences"].get("clean", True) self.gui = self.input["preferences"].get("gui", True) # self.controller_address = self.input[0].get("control", False) self.verbose = self.input["preferences"].get("verbose", False) self.datafile = xutils.convert_unicode( self.input["input_data"].get("datafile")) Process.__init__(self, name="PDBQuery") self.start()
def __init__(self, command, output=None, tprint=False, logger=None): # If the logging instance is passed in... if logger: self.logger = logger else: # Otherwise get the logger Instance self.logger = logging.getLogger("RAPDLogger") self.logger.debug("__init__") # Store tprint for use throughout if tprint: self.tprint = tprint # Dead end if no tprint passed else: def func(arg=False, level=False, verbosity=False, color=False): pass self.tprint = func # Stopwatch self.start_time = time.time() # Store inputs self.input = command self.output = output self.logger = logger # pprint(command) # Params self.working_dir = self.input["directories"].get("work", os.getcwd()) self.test = self.input["preferences"].get("test", False) self.sample_type = self.input["preferences"].get("type", "protein") self.solvent_content = self.input["preferences"].get("solvent_content", 0.55) self.cluster_use = self.input["preferences"].get("cluster", False) self.clean = self.input["preferences"].get("clean", True) self.gui = self.input["preferences"].get("gui", True) # self.controller_address = self.input[0].get("control", False) self.verbose = self.input["preferences"].get("verbose", False) self.datafile = xutils.convert_unicode(self.input["input_data"].get("datafile")) Process.__init__(self, name="PDBQuery") self.start()
def wrapper(**kwargs): os.chdir(kwargs.get('work_dir', os.getcwd())) if not kwargs.get('script', False): # Pop out the launcher launcher = kwargs.pop('launcher', None) # Pop out the batch_queue batch_queue = kwargs.pop('batch_queue', None) # Create a unique identifier for Phaser results kwargs['output_id'] = 'Phaser_%d' % random.randint(0, 10000) # Signal to launch run kwargs['script'] = True if kwargs.get('pool', False): # If running on local machine pool = kwargs.pop('pool') f = write_script(kwargs) new_kwargs = {"command": "rapd2.python %s" % f, "logfile": os.path.join(convert_unicode(kwargs.get('work_dir')), 'rapd_phaser.log'), } proc = pool.apply_async(launcher, kwds=new_kwargs,) return (proc, 'junk', kwargs['output_id']) else: # If running on computer cluster f = write_script(kwargs) pid_queue = Queue() proc = Process(target=launcher, kwargs={"command": "rapd2.python %s" % f, "pid_queue": pid_queue, "batch_queue": batch_queue, "logfile": os.path.join(kwargs.get('work_dir'), 'rapd_phaser.log'), }) proc.start() return (proc, pid_queue.get(), kwargs['output_id']) else: # Remove extra input params used to setup job l = ['script', 'test'] for k in l: # pop WON'T error out if key not found! _ = kwargs.pop(k, None) # Just launch job return func(**kwargs)
def run_phaser( data_file, struct_file, spacegroup, db_settings=False, tag=False, work_dir=False, adf=False, name=False, ncopy=1, cell_analysis=False, resolution=False, full=False, ): """ Run Phaser and passes results back to RAPD Redis DB **Requires Phaser src code!** data_file - input data as mtz (required) struct_file - input search model path in mmCIF or PDB format (required) spacegroup - The space group to run MR (required) tag - a Redis key where the results are sent (cluster mode) db_settings - Redis connection settings for sending results (cluster mode) work_dir - working directory (defaults to current working dir) name - root name for output files (defaults to spacegroup) ncopy - number of molecules to search for cell_analysis - internal RAPD signal so all possible SG's are searched resolution - high res limit to run MR (float) full - signal to run more comprehensive MR """ phaser_log = False # Change to work_dir if not work_dir: work_dir = os.getcwd() os.chdir(work_dir) if not name: name = spacegroup # # Handle CIF file input -> PDB # if struct_file[-3:] == "cif": # pdb.cif_as_pdb(struct_file) # struct_file = struct_file.replace(".cif", ".pdb") # Read the dataset i = phaser.InputMR_DAT() i.setHKLI(convert_unicode(data_file)) i.setLABI_F_SIGF('F', 'SIGF') i.setMUTE(True) r1 = phaser.runMR_DAT(i) # Need to determine Phaser version for keyword changes! version = re.search(r'Version:\s*([\d.]+)', r1.logfile()).group(1) if r1.Success(): i = phaser.InputMR_AUTO() # i.setREFL_DATA(r1.getREFL_DATA()) # i.setREFL_DATA(r1.DATA_REFL()) i.setREFL_F_SIGF(r1.getMiller(), r1.getFobs(), r1.getSigFobs()) i.setCELL6(r1.getUnitCell()) if struct_file[-3:].lower() == "cif": #i.addENSE_CIF_ID('model', cif, 0.7) ### Typo in PHASER CODE!!! <<<CIT>>> ### i.addENSE_CIT_ID('model', convert_unicode(struct_file), 0.7) else: i.addENSE_PDB_ID('model', convert_unicode(struct_file), 0.7) i.addSEAR_ENSE_NUM("model", ncopy) i.setSPAC_NAME(spacegroup) if cell_analysis: i.setSGAL_SELE("ALL") # Set it for worst case in orth # number of processes to run in parallel where possible i.setJOBS(1) else: i.setSGAL_SELE("NONE") if full: # Picks own resolution # Round 2, pick best solution as long as less that 10% clashes i.setPACK_SELE("PERCENT") i.setPACK_CUTO(0.1) #command += "PACK CUTOFF 10\n" else: # For first round and cell analysis # Only set the resolution limit in the first round or cell analysis. if resolution: i.setRESO_HIGH(resolution) else: i.setRESO_HIGH(6.0) # If Phaser version < 2.6.0 if int(version.split('.')[1]) <= 6: i.setSEAR_DEEP(False) else: i.setSEAR_METH("FAST") # Don"t seem to work since it picks the high res limit now. # Get an error when it prunes all the solutions away and TF has no input. # command += "PEAKS ROT SELECT SIGMA CUTOFF 4.0\n" # command += "PEAKS TRA SELECT SIGMA CUTOFF 6.0\n" # Turn off pruning in 2.6.0 i.setSEAR_PRUN(False) # Choose more top peaks to help with getting it correct. i.setPURG_ROTA_ENAB(True) i.setPURG_ROTA_NUMB(3) #command += "PURGE ROT ENABLE ON\nPURGE ROT NUMBER 3\n" i.setPURG_TRAN_ENAB(True) i.setPURG_TRAN_NUMB(1) #command += "PURGE TRA ENABLE ON\nPURGE TRA NUMBER 1\n" # Only keep the top after refinement. i.setPURG_RNP_ENAB(True) i.setPURG_RNP_NUMB(1) #command += "PURGE RNP ENABLE ON\nPURGE RNP NUMBER 1\n" i.setROOT(convert_unicode(name)) # i.setMUTE(False) i.setMUTE(True) # Delete the setup results # del(r) # launch the run # r = phaser.runMR_AUTO(i) try: r = phaser.runMR_AUTO(i) except RuntimeError as e: # print "Hit error" # Known CIF error - convert to pdb and retry if struct_file[-3:] in ('cif', ): # print "Convert to pdb" pdb.cif_as_pdb((struct_file, )) pdb_file = struct_file.replace(".cif", ".pdb") i = phaser.InputMR_AUTO() # i.setREFL_DATA(r1.getREFL_DATA()) # i.setREFL_DATA(r1.DATA_REFL()) i.setREFL_F_SIGF(r1.getMiller(), r1.getFobs(), r1.getSigFobs()) i.setCELL6(r1.getUnitCell()) i.addENSE_PDB_ID('model', convert_unicode(pdb_file), 0.7) i.addSEAR_ENSE_NUM("model", ncopy) i.setSPAC_NAME(spacegroup) if cell_analysis: i.setSGAL_SELE("ALL") # Set it for worst case in orth # number of processes to run in parallel where possible i.setJOBS(1) else: i.setSGAL_SELE("NONE") if full: # Picks own resolution # Round 2, pick best solution as long as less that 10% clashes i.setPACK_SELE("PERCENT") i.setPACK_CUTO(0.1) #command += "PACK CUTOFF 10\n" else: # For first round and cell analysis # Only set the resolution limit in the first round or cell analysis. if resolution: i.setRESO_HIGH(resolution) else: i.setRESO_HIGH(6.0) # If Phaser version < 2.6.0 if int(version.split('.')[1]) <= 6: i.setSEAR_DEEP(False) else: i.setSEAR_METH("FAST") # Don"t seem to work since it picks the high res limit now. # Get an error when it prunes all the solutions away and TF has no input. # command += "PEAKS ROT SELECT SIGMA CUTOFF 4.0\n" # command += "PEAKS TRA SELECT SIGMA CUTOFF 6.0\n" # Turn off pruning in 2.6.0 i.setSEAR_PRUN(False) # Choose more top peaks to help with getting it correct. i.setPURG_ROTA_ENAB(True) i.setPURG_ROTA_NUMB(3) #command += "PURGE ROT ENABLE ON\nPURGE ROT NUMBER 3\n" i.setPURG_TRAN_ENAB(True) i.setPURG_TRAN_NUMB(1) #command += "PURGE TRA ENABLE ON\nPURGE TRA NUMBER 1\n" # Only keep the top after refinement. i.setPURG_RNP_ENAB(True) i.setPURG_RNP_NUMB(1) #command += "PURGE RNP ENABLE ON\nPURGE RNP NUMBER 1\n" i.setROOT(convert_unicode(name)) # i.setMUTE(False) i.setMUTE(True) # Delete the setup results # del(r) # launch the run r = phaser.runMR_AUTO(i) else: raise e if r.Success(): # print r pass #if r.foundSolutions(): #print "Phaser has found MR solutions" #print "Top LLG = %f" % r.getTopLLG() #print "Top PDB file = %s" % r.getTopPdbFile() #else: #print "Phaser has not found any MR solutions" else: print "Job exit status FAILURE" print r.ErrorName(), "ERROR :", r.ErrorMessage() # Save log files for debugging phaser_log = r.logfile() with open('phaser.log', 'w') as log: log.write(r.logfile()) log.close() if r.foundSolutions(): rfz = None tfz = None tncs = False # Parse results for p in r.getTopSet().ANNOTATION.split(): # print p # For v 2.8.3 # RF*0\nTF*0\nLLG=30699\nTFZ==174.8\nPAK=0\nLLG=30699\nTFZ==174.8\n if p.count('RFZ'): if p.count('=') in [1]: rfz = float(p.split('=')[-1]) if p.count('RF*0'): rfz = "NC" if p.count('TFZ'): if p.count('=') in [1]: tfz = p.split('=')[-1] if tfz == '*': tfz = 'arbitrary' else: tfz = float(tfz) if p.count('TF*0'): tfz = "NC" tncs_test = [ 1 for line in r.getTopSet().unparse().splitlines() if line.count("+TNCS") ] tncs = bool(len(tncs_test)) mtz_file = os.path.join(work_dir, r.getTopMtzFile()) phaser_result = { "ID": name, "solution": r.foundSolutions(), "pdb_file": os.path.join(work_dir, r.getTopPdbFile()), "mtz": mtz_file, "gain": float(r.getTopLLG()), "rfz": rfz, # "tfz": r.getTopTFZ(), "tfz": tfz, "clash": r.getTopSet().PAK, "dir": os.getcwd(), "spacegroup": r.getTopSet().getSpaceGroupName().replace(' ', ''), "tNCS": tncs, "nmol": r.getTopSet().NUM, "adf": None, "peak": None, } # Calculate 2Fo-Fc & Fo-Fc maps # foo.mtz begets foo_2mFo-DFc.ccp4 & foo__mFo-DFc.ccp4 local_subprocess(command="phenix.mtz2map %s" % mtz_file, logfile='map.log', shell=True) # Map files should now exist map_2_1 = mtz_file.replace(".mtz", "_2mFo-DFc.ccp4") map_1_1 = mtz_file.replace(".mtz", "_mFo-DFc.ccp4") # Make sure the maps exist and then package them if os.path.exists(map_2_1): # Compress the map arch_prod_file, arch_prod_hash = archive.compress_file(map_2_1) # Remove the map that was compressed os.unlink(map_2_1) # Store information map_for_display = { "path": arch_prod_file, "hash": arch_prod_hash, "description": "map_2_1" } phaser_result["map_2_1"] = map_for_display if os.path.exists(map_1_1): # Compress the map arch_prod_file, arch_prod_hash = archive.compress_file(map_1_1) # Remove the map that was compressed os.unlink(map_1_1) # Store information map_for_display = { "path": arch_prod_file, "hash": arch_prod_hash, "description": "map_1_1" } phaser_result["map_1_1"] = map_for_display # If PDB exists, package that too if phaser_result.get("pdb_file", False): if os.path.exists(phaser_result.get("pdb_file")): # Compress the file arch_prod_file, arch_prod_hash = archive.compress_file( phaser_result.get("pdb_file")) # Remove the map that was compressed # os.unlink(phaser_result.get("pdb")) # Store information pdb_for_display = { "path": arch_prod_file, "hash": arch_prod_hash, "description": os.path.basename(phaser_result.get("pdb_file")) } phaser_result["pdb"] = pdb_for_display # Calc ADF map if adf: if os.path.exists(phaser_result.get( "pdb_file", False)) and os.path.exists( phaser_result.get("mtz", False)): adf_results = calc_ADF_map(data_file=data_file, mtz=phaser_result["mtz"], pdb=phaser_result["pdb_file"]) if adf_results.get("adf"): phaser_result.update({ "adf": os.path.join(work_dir, adf_results.get("adf")) }) if adf_results.get("peak"): phaser_result.update({ "peak": os.path.join(work_dir, adf_results.get("peak")) }) #phaser_result.update({"adf": adf_results.get("adf", None), # "peak": adf_results.get("peak", None),}) # print "1" # print name # New procedure for making tar of results # Create directory # Remove the run # from the name # new_name = name[:-2] # new_name = phaser_result.get("ID") # # print new_name os.mkdir(new_name) # # Go through and copy files to archive directory file_types = ("pdb_file", "mtz", "adf", "peak") for file_type in file_types: # print file_type target_file = phaser_result.get(file_type, False) # print target_file if target_file: if os.path.exists(target_file): # Copy the file to the directory to be archived shutil.copy(target_file, new_name + "/.") # # Create the archive archive_result = archive.create_archive(new_name) archive_result["description"] = '%s_files' % new_name phaser_result["tar"] = archive_result # print "2" else: phaser_result = { "ID": name, "solution": False, "message": "No solution", "spacegroup": spacegroup } # Add the phaser log if phaser_log: phaser_result.update({"logs": {"phaser": phaser_log}}) # print "3" if db_settings and tag: print "db_settings and tag" # Connect to Redis redis = connect_to_redis(db_settings) # Key should be deleted once received, but set the key to expire in 24 hours just in case. redis.setex(tag, 86400, json.dumps(phaser_result)) # Do a little sleep to make sure results are in Redis for postprocess_phaser time.sleep(0.1) else: # print "Printing phaser_result" # Print the result so it can be seen thru the queue by reading stdout # print phaser_result print json.dumps(phaser_result)
def run_phaser_module_OLD(datafile, inp=False): """ Run separate module of Phaser to get results before running full job. Setup so that I can read the data in once and run multiple modules. """ # if self.verbose: # self.logger.debug('Utilities::runPhaserModule') target_resolution = 0.0 z = 0 solvent_content = 0.0 def run_ellg(): res0 = 0.0 i0 = phaser.InputMR_ELLG() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) i0.setREFL_DATA(r.getDATA()) if f[-3:] in ('cif'): i0.addENSE_CIT_ID('model', convert_unicode(f), 0.7) else: i0.addENSE_PDB_ID("model", convert_unicode(f), 0.7) # i.addSEAR_ENSE_NUM("junk",5) r1 = phaser.runMR_ELLG(i0) #print r1.logfile() if r1.Success(): res0 = r1.get_target_resolution('model') del(r1) return res0 def run_cca(): z0 = 0 sc0 = 0.0 i0 = phaser.InputCCA() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) # Have to set high res limit!! i0.setRESO_HIGH(res0) if np > 0: i0.addCOMP_PROT_NRES_NUM(np, 1) if na > 0: i0.addCOMP_NUCL_NRES_NUM(na, 1) r1 = phaser.runCCA(i0) #print r1.logfile() if r1.Success(): z0 = r1.getBestZ() sc0 = 1-(1.23/r1.getBestVM()) del(r1) return (z0, sc0) def run_ncs(): i0 = phaser.InputNCS() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setREFL_DATA(r.getDATA()) # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF()) # i0.setLABI_F_SIGF(f,sigf) i0.setMUTE(True) # i0.setVERB(True) r1 = phaser.runNCS(i0) print r1.logfile() print r1.loggraph().size() print r1.loggraph().__dict__.keys() #print r1.getCentricE4() if r1.Success(): return(r1) def run_ano(): #from cStringIO import StringIO i0 = phaser.InputANO() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) # i0.setREFL(p.getMiller(),p.getF(),p.getSIGF()) # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF()) # i0.setREFL_F_SIGF(p.getMiller(),p.getIobs(),p.getSigIobs()) i0.setREFL_DATA(r.getDATA()) i0.setMUTE(True) r1 = phaser.runANO(i0) print r1.loggraph().__dict__.keys() print r1.loggraph().size() print r1.logfile() """ o = phaser.Output() redirect_str = StringIO() o.setPackagePhenix(file_object=redirect_str) r1 = phaser.runANO(i0,o) """ if r1.Success(): print 'SUCCESS' return(r1) # Setup which modules are run matthews = False if inp: ellg = True ncs = False if type(inp) == str: f = inp else: np, na, res0, f = inp matthews = True else: ellg = False ncs = True # Read the dataset i = phaser.InputMR_DAT() i.setHKLI(convert_unicode(datafile)) i.setLABI_F_SIGF('F', 'SIGF') i.setMUTE(True) r = phaser.runMR_DAT(i) if r.Success(): if ellg: target_resolution = run_ellg() if matthews: z, solvent_content = run_cca() if ncs: n = run_ncs() if matthews: # Assumes ellg is run as well. # return (z,sc,res) return {"z": z, "solvent_content": solvent_content, "target_resolution": target_resolution} elif ellg: # ellg run by itself # return target_resolution return {"target_resolution": target_resolution} else: # NCS return n
def run_phaser_module(data_file, result_queue=False, cca=False, tncs=False, ellg=False, struct_file=False, dres=False, np=0, na=0): """ Run separate module of Phaser to get results before running full job. Setup so that I can read the data in once and run multiple modules. data_file - input dataset mtz file result_queue - pass results to queue cca - Run CCA to determine number of molecules in AU, and solvent content (Matthew's Coefficient calc) tncs - Run Anisotropy and tNCS correction on CID plots ellg - Run analysis to determonine optimum Phaser resolution MR. struct_file - input struct_file file. Could be a PDB or mmCIF file dres - resolution of dataset (ELLG, CCA) np - default number of protein residues (CCA) na - default number of nucleic acid residues (CCA) """ target_resolution = 0.0 z = 0 solvent_content = 0.0 def run_ellg(): new_res = 0.0 i0 = phaser.InputMR_ELLG() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) i0.setREFL_DATA(r.getDATA()) # Read in CIF file if struct_file[-3:] in ('cif', ): i0.addENSE_CIT_ID("model", convert_unicode(struct_file), 0.7) # Read in PDB file else: i0.addENSE_PDB_ID("model", convert_unicode(struct_file), 0.7) try: r1 = phaser.runMR_ELLG(i0) except RuntimeError as e: # print "Hit error" # Known CIF error - convert to pdb and retry if struct_file[-3:] in ('cif', ): # print "Convert to pdb" pdb.cif_as_pdb((struct_file, )) pdb_file = struct_file.replace(".cif", ".pdb") i1 = phaser.InputMR_ELLG() i1.setSPAC_HALL(r.getSpaceGroupHall()) i1.setCELL6(r.getUnitCell()) i1.setMUTE(True) i1.setREFL_DATA(r.getDATA()) i1.addENSE_PDB_ID("model", convert_unicode(pdb_file), 0.7) r1 = phaser.runMR_ELLG(i1) else: raise e # print r1.logfile() if r1.Success(): # If it worked use the recommended resolution new_res = round(r1.get_target_resolution('model'), 1) del (r1) return new_res def run_cca(res): z0 = 0 sc0 = 0.0 i0 = phaser.InputCCA() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) # Have to set high res limit!! i0.setRESO_HIGH(res) if np > 0: i0.addCOMP_PROT_NRES_NUM(np, 1) if na > 0: i0.addCOMP_NUCL_NRES_NUM(na, 1) r1 = phaser.runCCA(i0) #print r1.logfile() #print dir(r1) if r1.Success(): z0 = r1.getBestZ() try: sc0 = round(1 - (1.23 / r1.getBestVM()), 2) except ZeroDivisionError: sc0 = 0 del (r1) return (z0, sc0) def run_tncs(): # CAN'T GET READABLE loggraph?!? i0 = phaser.InputNCS() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setREFL_DATA(r.getDATA()) # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF()) # i0.setLABI_F_SIGF(f,sigf) i0.setMUTE(True) # i0.setVERB(True) r1 = phaser.runNCS(i0) print dir(r1) print r1.logfile() # for l in r1.loggraph(): # print l print r1.loggraph().size() print r1.output_strings #print r1.hasTNCS() #print r1.summary() print r1.warnings() print r1.ErrorMessage() #print r1.getCentricE4() if r1.Success(): return (r1.loggraph()) def run_ano(): #from cStringIO import StringIO i0 = phaser.InputANO() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) # i0.setREFL(p.getMiller(),p.getF(),p.getSIGF()) # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF()) # i0.setREFL_F_SIGF(p.getMiller(),p.getIobs(),p.getSigIobs()) i0.setREFL_DATA(r.getDATA()) i0.setMUTE(True) r1 = phaser.runANO(i0) print r1.loggraph().__dict__.keys() print r1.loggraph().size() print r1.logfile() """ o = phaser.Output() redirect_str = StringIO() o.setPackagePhenix(file_object=redirect_str) r1 = phaser.runANO(i0,o) """ if r1.Success(): print 'SUCCESS' return (r1) # MAIN # Setup which modules are run # Read input MTZ file i = phaser.InputMR_DAT() i.setHKLI(convert_unicode(data_file)) i.setLABI_F_SIGF('F', 'SIGF') i.setMUTE(True) r = phaser.runMR_DAT(i) if r.Success(): if ellg: target_resolution = run_ellg() if cca: # Assumes ellg is run as well. z, solvent_content = run_cca(target_resolution) if tncs: n = run_tncs() if cca: out = { "z": z, "solvent_content": solvent_content, "target_resolution": target_resolution } if result_queue: result_queue.put(out) else: return out elif ellg: # ellg run by itself out = {"target_resolution": target_resolution} if result_queue: result_queue.put(out) else: return out else: # tNCS out = n if result_queue: result_queue.put(out) else: return out """
def run_phaser_module(data_file, result_queue=False, cca=False, tncs=False, ellg=False, mmcif=False, dres=False, np=0, na=0,): """ Run separate module of Phaser to get results before running full job. Setup so that I can read the data in once and run multiple modules. data_file - input dataset mtz file result_queue - pass results to queue cca - Run CCA to determine number of molecules in AU, and solvent content (Matthew's Coefficient calc) tncs - Run Anisotropy and tNCS correction on CID plots ellg - Run analysis to determonine optimum Phaser resolution MR. mmcif - input mmcif file. Could also be a PDB file dres - resolution of dataset (ELLG, CCA) np - default number of protein residues (CCA) na - default number of nucleic acid residues (CCA) """ target_resolution = 0.0 z = 0 solvent_content = 0.0 def run_ellg(): new_res = 0.0 i0 = phaser.InputMR_ELLG() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) i0.setREFL_DATA(r.getDATA()) if mmcif[-3:] in ('cif'): i0.addENSE_CIT_ID('model', convert_unicode(mmcif), 0.7) else: i0.addENSE_PDB_ID("model", convert_unicode(mmcif), 0.7) r1 = phaser.runMR_ELLG(i0) #print r1.logfile() if r1.Success(): # If it worked use the recommended resolution new_res = round(r1.get_target_resolution('model'), 1) del(r1) return new_res def run_cca(res): z0 = 0 sc0 = 0.0 i0 = phaser.InputCCA() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) # Have to set high res limit!! i0.setRESO_HIGH(res) if np > 0: i0.addCOMP_PROT_NRES_NUM(np, 1) if na > 0: i0.addCOMP_NUCL_NRES_NUM(na, 1) r1 = phaser.runCCA(i0) #print r1.logfile() #print dir(r1) if r1.Success(): z0 = r1.getBestZ() sc0 = round(1-(1.23/r1.getBestVM()), 2) del(r1) return (z0, sc0) def run_tncs(): # CAN'T GET READABLE loggraph?!? i0 = phaser.InputNCS() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setREFL_DATA(r.getDATA()) # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF()) # i0.setLABI_F_SIGF(f,sigf) i0.setMUTE(True) # i0.setVERB(True) r1 = phaser.runNCS(i0) print dir(r1) print r1.logfile() # for l in r1.loggraph(): # print l print r1.loggraph().size() print r1.output_strings #print r1.hasTNCS() #print r1.summary() print r1.warnings() print r1.ErrorMessage() #print r1.getCentricE4() if r1.Success(): return(r1.loggraph()) def run_ano(): #from cStringIO import StringIO i0 = phaser.InputANO() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) # i0.setREFL(p.getMiller(),p.getF(),p.getSIGF()) # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF()) # i0.setREFL_F_SIGF(p.getMiller(),p.getIobs(),p.getSigIobs()) i0.setREFL_DATA(r.getDATA()) i0.setMUTE(True) r1 = phaser.runANO(i0) print r1.loggraph().__dict__.keys() print r1.loggraph().size() print r1.logfile() """ o = phaser.Output() redirect_str = StringIO() o.setPackagePhenix(file_object=redirect_str) r1 = phaser.runANO(i0,o) """ if r1.Success(): print 'SUCCESS' return(r1) # MAIN # Setup which modules are run # Read input MTZ file i = phaser.InputMR_DAT() i.setHKLI(convert_unicode(data_file)) i.setLABI_F_SIGF('F', 'SIGF') i.setMUTE(True) r = phaser.runMR_DAT(i) if r.Success(): if ellg: target_resolution = run_ellg() if cca: # Assumes ellg is run as well. z, solvent_content = run_cca(target_resolution) if tncs: n = run_tncs() if cca: out = {"z": z, "solvent_content": solvent_content, "target_resolution": target_resolution} if result_queue: result_queue.put(out) else: return out elif ellg: # ellg run by itself out = {"target_resolution": target_resolution} if result_queue: result_queue.put(out) else: return out else: # tNCS out = n if result_queue: result_queue.put(out) else: return out """
def run_phaser(datafile, spacegroup, output_id, db_settings, work_dir=False, cif=False, pdb=False, name=False, ncopy=1, cell_analysis=False, resolution=False, large_cell=False, run_before=False, ): """ Run Phaser and passes results back to RAPD Redis DB **Requires Phaser src code!** datafile - input data as mtz spacegroup - The space group to run MR output_id - a Redis key where the results are sent db_settings - Redis connection settings for sending results work_dir - working directory cif - input search model path in mmCIF format (do not use with 'pdb') pdb - input search model path in PDB format (do not use with 'cif') name - root name for output files ncopy - number of molecules to search for cell_analysis - internal RAPD signal so all possible SG's are searched resolution - high res limit to run MR (float) large_cell - optimizes parameters to speed up MR with large unit cell. run_before - signal to run more comprehensive MR """ # Change to work_dir if not work_dir: work_dir = os.getcwd() os.chdir(work_dir) if not name: name = spacegroup # Connect to Redis redis = connect_to_redis(db_settings) # Read the dataset i = phaser.InputMR_DAT() i.setHKLI(convert_unicode(datafile)) i.setLABI_F_SIGF('F', 'SIGF') i.setMUTE(True) r = phaser.runMR_DAT(i) if r.Success(): i = phaser.InputMR_AUTO() # i.setREFL_DATA(r.getREFL_DATA()) # i.setREFL_DATA(r.DATA_REFL()) i.setREFL_F_SIGF(r.getMiller(), r.getFobs(), r.getSigFobs()) i.setCELL6(r.getUnitCell()) if cif: #i.addENSE_CIF_ID('model', cif, 0.7) ### Typo in PHASER CODE!!!### i.addENSE_CIT_ID('model', convert_unicode(cif), 0.7) if pdb: i.addENSE_PDB_ID('model', convert_unicode(pdb), 0.7) i.addSEAR_ENSE_NUM("model", ncopy) i.setSPAC_NAME(spacegroup) if cell_analysis: i.setSGAL_SELE("ALL") # Set it for worst case in orth # number of processes to run in parallel where possible i.setJOBS(1) else: i.setSGAL_SELE("NONE") if run_before: # Picks own resolution # Round 2, pick best solution as long as less that 10% clashes i.setPACK_SELE("PERCENT") i.setPACK_CUTO(0.1) #command += "PACK CUTOFF 10\n" else: # For first round and cell analysis # Only set the resolution limit in the first round or cell analysis. if resolution: i.setRESO_HIGH(resolution) else: # Otherwise it runs a second MR at full resolution!! # I dont think a second round is run anymore. # command += "RESOLUTION SEARCH HIGH OFF\n" if large_cell: i.setRESO_HIGH(6.0) else: i.setRESO_HIGH(4.5) i.setSEAR_DEEP(False) # Don"t seem to work since it picks the high res limit now. # Get an error when it prunes all the solutions away and TF has no input. # command += "PEAKS ROT SELECT SIGMA CUTOFF 4.0\n" # command += "PEAKS TRA SELECT SIGMA CUTOFF 6.0\n" # Turn off pruning in 2.6.0 i.setSEAR_PRUN(False) # Choose more top peaks to help with getting it correct. i.setPURG_ROTA_ENAB(True) i.setPURG_ROTA_NUMB(3) #command += "PURGE ROT ENABLE ON\nPURGE ROT NUMBER 3\n" i.setPURG_TRAN_ENAB(True) i.setPURG_TRAN_NUMB(1) #command += "PURGE TRA ENABLE ON\nPURGE TRA NUMBER 1\n" # Only keep the top after refinement. i.setPURG_RNP_ENAB(True) i.setPURG_RNP_NUMB(1) #command += "PURGE RNP ENABLE ON\nPURGE RNP NUMBER 1\n" i.setROOT(convert_unicode(name)) # i.setMUTE(False) i.setMUTE(True) # Delete the setup results del(r) # launch the run r = phaser.runMR_AUTO(i) if r.Success(): if r.foundSolutions(): print "Phaser has found MR solutions" #print "Top LLG = %f" % r.getTopLLG() #print "Top PDB file = %s" % r.getTopPdbFile() else: print "Phaser has not found any MR solutions" else: print "Job exit status FAILURE" print r.ErrorName(), "ERROR :", r.ErrorMessage() with open('phaser.log', 'w') as log: log.write(r.logfile()) log.close() with open('phaser_sum.log', 'w') as log: log.write(r.summary()) log.close() if r.foundSolutions(): rfz = None tfz = None tncs = False # Parse results for p in r.getTopSet().ANNOTATION.split(): if p.count('RFZ'): if p.count('=') in [1]: rfz = float(p.split('=')[-1]) if p.count('RF*0'): rfz = "NC" if p.count('TFZ'): if p.count('=') in [1]: tfz = p.split('=')[-1] if tfz == '*': tfz = 'arbitrary' else: tfz = float(tfz) if p.count('TF*0'): tfz = "NC" tncs_test = [1 for line in r.getTopSet().unparse().splitlines() if line.count("+TNCS")] tncs = bool(len(tncs_test)) phaser_result = {"ID": name, "solution": r.foundSolutions(), "pdb": r.getTopPdbFile(), "mtz": r.getTopMtzFile(), "gain": float(r.getTopLLG()), "rfz": rfz, # "tfz": r.getTopTFZ(), "tfz": tfz, "clash": r.getTopSet().PAK, "dir": os.getcwd(), "spacegroup": r.getTopSet().getSpaceGroupName().replace(' ', ''), "tNCS": tncs, "nmol": r.getTopSet().NUM, "adf": None, "peak": None, } # make tar.bz2 of result files # l = ['pdb', 'mtz', 'adf', 'peak'] # archive = "%s.tar.bz2" % name # with tarfile.open(archive, "w:bz2") as tar: # for f in l: # fo = phaser_result.get(f, False) # if fo: # if os.path.exists(fo): # tar.add(fo) # tar.close() # phaser_result['tar'] = os.path.join(work_dir, archive) # New procedure for making tar of results # Create directory os.mkdir(name) # Go through and copy files to archive directory file_types = ("pdb", "mtz", "adf", "peak") for file_type in file_types: target_file = phaser_result.get(file_type, False) if target_file: if os.path.exists(target_file): # Copy the file to the directory to be archived shutil.copy(target_file, name+"/.") # Create the archive archive_result = archive.create_archive(name) archive_result["description"] = name phaser_result["tar"] = archive_result phaser_result["pdb_file"] = os.path.join(work_dir, r.getTopPdbFile()) else: phaser_result = {"ID": name, "solution": False, "message": "No solution"} # Print the result so it can be seen in the rapd._phaser.log if needed print phaser_result # Key should be deleted once received, but set the key to expire in 24 hours just in case. redis.setex(output_id, 86400, json.dumps(phaser_result)) # Do a little sleep to make sure results are in Redis for postprocess_phaser time.sleep(0.1)
def __init__(self, site, command, processed_results=False, tprint=False, logger=False, verbosity=False): """Initialize the plugin""" Thread.__init__(self) # If the logging instance is passed in... if logger: self.logger = logger else: # Otherwise get the logger Instance self.logger = logging.getLogger("RAPDLogger") self.logger.debug("__init__") # Keep track of start time self.start_time = time.time() # Store tprint for use throughout if tprint: self.tprint = tprint # Dead end if no tprint passed else: def func(arg=False, level=False, verbosity=False, color=False): """Dummy function""" pass self.tprint = func # Used for sending results back to DB referencing a dataset self.processed_results = processed_results # Some logging self.logger.info(command) self.verbose = verbosity # Store passed-in variables self.site = site self.command = command self.preferences = self.command.get("preferences", {}) # Params self.working_dir = self.command["directories"].get("work", os.getcwd()) self.test = self.preferences.get("test", False) #self.test = self.preferences.get("test", True) # Limit number of runs on cluster #self.sample_type = self.preferences.get("type", "protein") #self.solvent_content = self.preferences.get("solvent_content", 0.55) self.clean = self.preferences.get("clean", True) # self.verbose = self.command["preferences"].get("verbose", False) self.data_file = xutils.convert_unicode( self.command["input_data"].get("data_file")) # Used for setting up Redis connection self.db_settings = self.command["input_data"].get("db_settings") #self.nproc = self.preferences.get("nproc", 1) # If no launcher is passed in, use local_subprocess in a multiprocessing.Pool self.computer_cluster = xutils.load_cluster_adapter(self) if self.computer_cluster: self.launcher = self.computer_cluster.process_cluster self.batch_queue = self.computer_cluster.check_queue( self.command.get('command')) else: self.launcher = local_subprocess self.pool = mp_pool(self.preferences.get("nproc", cpu_count() - 1)) self.manager = mp_manager() # Setup a multiprocessing pool if not using a computer cluster. #if not self.computer_cluster: # self.pool = mp_pool(self.nproc) # Set Python path for subcontractors.rapd_phaser self.rapd_python = "rapd.python" if self.site: if hasattr(self.site, "RAPD_PYTHON_PATH"): self.rapd_python = self.site.RAPD_PYTHON_PATH
def run_phaser_module_OLD(data_file, inp=False): """ Run separate module of Phaser to get results before running full job. Setup so that I can read the data in once and run multiple modules. """ # if self.verbose: # self.logger.debug('Utilities::runPhaserModule') target_resolution = 0.0 z = 0 solvent_content = 0.0 def run_ellg(): res0 = 0.0 i0 = phaser.InputMR_ELLG() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) i0.setREFL_DATA(r.getDATA()) if f[-3:] in ('cif'): i0.addENSE_CIT_ID('model', convert_unicode(f), 0.7) else: i0.addENSE_PDB_ID("model", convert_unicode(f), 0.7) # i.addSEAR_ENSE_NUM("junk",5) r1 = phaser.runMR_ELLG(i0) #print r1.logfile() if r1.Success(): res0 = r1.get_target_resolution('model') del (r1) return res0 def run_cca(): z0 = 0 sc0 = 0.0 i0 = phaser.InputCCA() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setMUTE(True) # Have to set high res limit!! i0.setRESO_HIGH(res0) if np > 0: i0.addCOMP_PROT_NRES_NUM(np, 1) if na > 0: i0.addCOMP_NUCL_NRES_NUM(na, 1) r1 = phaser.runCCA(i0) #print r1.logfile() if r1.Success(): z0 = r1.getBestZ() sc0 = 1 - (1.23 / r1.getBestVM()) del (r1) return (z0, sc0) def run_ncs(): i0 = phaser.InputNCS() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) i0.setREFL_DATA(r.getDATA()) # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF()) # i0.setLABI_F_SIGF(f,sigf) i0.setMUTE(True) # i0.setVERB(True) r1 = phaser.runNCS(i0) print r1.logfile() print r1.loggraph().size() print r1.loggraph().__dict__.keys() #print r1.getCentricE4() if r1.Success(): return (r1) def run_ano(): #from cStringIO import StringIO i0 = phaser.InputANO() i0.setSPAC_HALL(r.getSpaceGroupHall()) i0.setCELL6(r.getUnitCell()) # i0.setREFL(p.getMiller(),p.getF(),p.getSIGF()) # i0.setREFL_F_SIGF(r.getMiller(),r.getF(),r.getSIGF()) # i0.setREFL_F_SIGF(p.getMiller(),p.getIobs(),p.getSigIobs()) i0.setREFL_DATA(r.getDATA()) i0.setMUTE(True) r1 = phaser.runANO(i0) print r1.loggraph().__dict__.keys() print r1.loggraph().size() print r1.logfile() """ o = phaser.Output() redirect_str = StringIO() o.setPackagePhenix(file_object=redirect_str) r1 = phaser.runANO(i0,o) """ if r1.Success(): print 'SUCCESS' return (r1) # Setup which modules are run matthews = False if inp: ellg = True ncs = False if type(inp) == str: f = inp else: np, na, res0, f = inp matthews = True else: ellg = False ncs = True # Read the dataset i = phaser.InputMR_DAT() i.setHKLI(convert_unicode(data_file)) i.setLABI_F_SIGF('F', 'SIGF') i.setMUTE(True) r = phaser.runMR_DAT(i) if r.Success(): if ellg: target_resolution = run_ellg() if matthews: z, solvent_content = run_cca() if ncs: n = run_ncs() if matthews: # Assumes ellg is run as well. # return (z,sc,res) return { "z": z, "solvent_content": solvent_content, "target_resolution": target_resolution } elif ellg: # ellg run by itself # return target_resolution return {"target_resolution": target_resolution} else: # NCS return n
def __init__(self, command, site=False, processed_results=False, tprint=False, logger=False, verbosity=False): """Initialize the plugin""" Thread.__init__(self) # If the logging instance is passed in... if logger: self.logger = logger else: # Otherwise get the logger Instance self.logger = logging.getLogger("RAPDLogger") self.logger.debug("__init__") # Keep track of start time self.start_time = time.time() # Store tprint for use throughout if tprint: self.tprint = tprint # Dead end if no tprint passed else: def func(arg=False, level=False, verbosity=False, color=False): """Dummy function""" pass self.tprint = func # Used for sending results back to DB referencing a dataset self.processed_results = processed_results # Some logging self.logger.info(command) self.verbose = verbosity # Store passed-in variables self.site = site self.command = command self.preferences = self.command.get("preferences", {}) # Params self.working_dir = self.command["directories"].get("work", os.getcwd()) #self.test = self.preferences.get("test", False) self.test = self.preferences.get( "test", True) # Limit number of runs on cluster #self.sample_type = self.preferences.get("type", "protein") #self.solvent_content = self.preferences.get("solvent_content", 0.55) # Number of molecules specified #self.nmol = self.preferences.get('nmol', False) # Input data MTZ file self.data_file = xutils.convert_unicode( self.command["input_data"].get("data_file")) # Input PDB/mmCIF file or PDB code. self.struct_file = xutils.convert_unicode( self.command["input_data"].get("struct_file")) # Save preferences self.clean = self.preferences.get("clean", True) # Calc ADF for each solution (creates a lot of big map files). self.adf = self.preferences.get("adf", False) # Check if there is a computer cluster and load adapter. self.computer_cluster = xutils.load_cluster_adapter(self) if self.computer_cluster: self.launcher = self.computer_cluster.process_cluster self.batch_queue = self.computer_cluster.check_queue( self.command.get('command')) else: # if NOT using a computer cluster setup a multiprocessing.pool and manager for queues. self.launcher = local_subprocess self.pool = mp_pool(self.preferences.get("nproc", cpu_count() - 1)) self.manager = mp_manager() # Set Python path for subcontractors.rapd_phaser self.rapd_python = "rapd.python" if self.site and hasattr(self.site, "RAPD_PYTHON_PATH"): self.rapd_python = self.site.RAPD_PYTHON_PATH
def get_pdb_info(struct_file, data_file, dres, matthews=True, chains=True): """Get info from PDB or mmCIF file""" # Get rid of ligands and water so Phenix won't error. np = 0 na = 0 nmol = 1 sc = 0.55 nchains = 0 res1 = 0.0 d = {} l = [] # Read in the file struct_file = convert_unicode(struct_file) if struct_file[-3:].lower() == 'cif': root = iotbx_mmcif.cif_input( file_name=struct_file).construct_hierarchy() else: root = iotbx_pdb.input(struct_file).construct_hierarchy() # Go through the chains for chain in root.models()[0].chains(): # Number of protein residues np1 = 0 # Number of nucleic acid residues na1 = 0 # Sometimes Hetatoms are AA with same segid. if l.count(chain.id) == 0: l.append(chain.id) repeat = False nchains += 1 else: repeat = True # Count the number of AA and NA in pdb file. for rg in chain.residue_groups(): if rg.atoms()[0].parent( ).resname in iotbx_pdb.common_residue_names_amino_acid: np1 += 1 if rg.atoms()[0].parent( ).resname in iotbx_pdb.common_residue_names_rna_dna: na1 += 1 # Not sure if I get duplicates? if rg.atoms()[0].parent().resname in \ iotbx_pdb.common_residue_names_ccp4_mon_lib_rna_dna: na1 += 1 # Limit to 10 chains?!? if nchains < 10: # Do not split up PDB if run from cell analysis if chains and not repeat: # Save info for each chain. if np1 or na1: # Write new pdb files for each chain. temp = iotbx_pdb.hierarchy.new_hierarchy_from_chain(chain) # Long was of making sure that user does not have directory named '.pdb' or # '.cif' #n = os.path.join(os.path.dirname(struct_file), "%s_%s.pdb" % \ n = os.path.join(os.path.dirname(struct_file), "%s_%s.cif" % \ (os.path.basename(struct_file)[:os.path.basename(struct_file).find('.')], \ chain.id)) #temp.write_pdb_file(file_name=n) # Write chain as mmCIF file. temp.write_mmcif_file(file_name=n) d[chain.id] = { 'file': n, 'NRes': np1 + na1, 'MWna': na1 * 330, 'MWaa': np1 * 110, 'MW': na1 * 330 + np1 * 110 } if matthews: # Run Matthews Calc. on chain #phaser_return = run_phaser_module((np1, na1, dres, n, data_file)) #phaser_return = run_phaser_module(data_file, (np1, na1, dres, n)) phaser_return = run_phaser_module(data_file=data_file, ellg=True, cca=True, struct_file=n, dres=dres, np=np1, na=na1) d[chain.id].update({ 'NMol': phaser_return.get("z", nmol), 'SC': phaser_return.get("solvent_content", sc), 'res': phaser_return.get("target_resolution", res1) }) else: #res1 = run_phaser_module(n) phaser_return = run_phaser_module(data_file=data_file, ellg=True, struct_file=n) d[chain.id].update({ 'res': phaser_return.get("target_resolution", res1) }) """ d[chain.id] = {'file': n, 'NRes': np1+na1, 'MWna': na1*330, 'MWaa': np1*110, 'MW': na1*330+np1*110, 'NMol': phaser_return.get("z", nmol), 'SC': phaser_return.get("solvent_content", sc), 'res': phaser_return.get("target_resolution", res1)} """ # Add up residue count np += np1 na += na1 d['all'] = { 'file': struct_file, 'NRes': np + na, 'MWna': na * 330, 'MWaa': np * 110, 'MW': na * 330 + np * 110 } # Run on entire PDB if matthews: #phaser_return = run_phaser_module((np, na, dres, struct_file, data_file)) #phaser_return = run_phaser_module(data_file, (np, na, dres, struct_file)) phaser_return = run_phaser_module(data_file=data_file, ellg=True, cca=True, struct_file=struct_file, dres=dres, np=np, na=na) d['all'].update({ 'NMol': phaser_return.get("z", nmol), 'SC': phaser_return.get("solvent_content", sc), 'res': phaser_return.get("target_resolution", res1) }) else: #phaser_return = run_phaser_module((np, na, dres, struct_file, data_file)) #phaser_return = run_phaser_module(data_file, (np, na, dres, struct_file)) # phaser_return = run_phaser_module(data_file=data_file, # ellg=True, # struct_file=struct_file) phaser_return = run_phaser_module(data_file=data_file, ellg=True, struct_file=struct_file) d['all'].update({'res': phaser_return.get("target_resolution", res1)}) """ d['all'] = {'file': struct_file, 'NRes': np+na, 'MWna': na*330, 'MWaa': np*110, 'MW': na*330+np*110, 'NMol': phaser_return.get("z", nmol), 'SC': phaser_return.get("solvent_content", sc), 'res': phaser_return.get("target_resolution", res1)} """ return d
def get_pdb_info(cif_file, data_file, dres, matthews=True, chains=True): """Get info from PDB of mmCIF file""" # Get rid of ligands and water so Phenix won't error. np = 0 na = 0 nmol = 1 sc = 0.55 nchains = 0 res1 = 0.0 d = {} l = [] # Read in the file cif_file = convert_unicode(cif_file) if cif_file[-3:].lower() == 'cif': root = iotbx_mmcif.cif_input(file_name=cif_file).construct_hierarchy() else: root = iotbx_pdb.input(cif_file).construct_hierarchy() # Go through the chains for chain in root.models()[0].chains(): # Number of protein residues np1 = 0 # Number of nucleic acid residues na1 = 0 # Sometimes Hetatoms are AA with same segid. if l.count(chain.id) == 0: l.append(chain.id) repeat = False nchains += 1 else: repeat = True # Count the number of AA and NA in pdb file. for rg in chain.residue_groups(): if rg.atoms()[0].parent().resname in iotbx_pdb.common_residue_names_amino_acid: np1 += 1 if rg.atoms()[0].parent().resname in iotbx_pdb.common_residue_names_rna_dna: na1 += 1 # Not sure if I get duplicates? if rg.atoms()[0].parent().resname in \ iotbx_pdb.common_residue_names_ccp4_mon_lib_rna_dna: na1 += 1 # Limit to 10 chains?!? if nchains < 10: # Do not split up PDB if run from cell analysis if chains and not repeat: # Save info for each chain. if np1 or na1: # Write new pdb files for each chain. temp = iotbx_pdb.hierarchy.new_hierarchy_from_chain(chain) # Long was of making sure that user does not have directory named '.pdb' or # '.cif' #n = os.path.join(os.path.dirname(cif_file), "%s_%s.pdb" % \ n = os.path.join(os.path.dirname(cif_file), "%s_%s.cif" % \ (os.path.basename(cif_file)[:os.path.basename(cif_file).find('.')], \ chain.id)) #temp.write_pdb_file(file_name=n) temp.write_mmcif_file(file_name=n) d[chain.id] = {'file': n, 'NRes': np1+na1, 'MWna': na1*330, 'MWaa': np1*110, 'MW': na1*330+np1*110} if matthews: # Run Matthews Calc. on chain #phaser_return = run_phaser_module((np1, na1, dres, n, data_file)) #phaser_return = run_phaser_module(data_file, (np1, na1, dres, n)) phaser_return = run_phaser_module(data_file=data_file, ellg=True, cca=True, mmcif=n, dres=dres, np=np1, na=na1) d[chain.id].update({'NMol': phaser_return.get("z", nmol), 'SC': phaser_return.get("solvent_content", sc), 'res': phaser_return.get("target_resolution", res1)}) else: #res1 = run_phaser_module(n) phaser_return = run_phaser_module(data_file=data_file, ellg=True, mmcif=n) d[chain.id].update({'res': phaser_return.get("target_resolution", res1)}) """ d[chain.id] = {'file': n, 'NRes': np1+na1, 'MWna': na1*330, 'MWaa': np1*110, 'MW': na1*330+np1*110, 'NMol': phaser_return.get("z", nmol), 'SC': phaser_return.get("solvent_content", sc), 'res': phaser_return.get("target_resolution", res1)} """ # Add up residue count np += np1 na += na1 d['all'] = {'file': cif_file, 'NRes': np+na, 'MWna': na*330, 'MWaa': np*110, 'MW': na*330+np*110} # Run on entire PDB if matthews: #phaser_return = run_phaser_module((np, na, dres, cif_file, data_file)) #phaser_return = run_phaser_module(data_file, (np, na, dres, cif_file)) phaser_return = run_phaser_module(data_file=data_file, ellg=True, cca=True, mmcif=cif_file, dres=dres, np=np, na=na) d['all'].update({'NMol': phaser_return.get("z", nmol), 'SC': phaser_return.get("solvent_content", sc), 'res': phaser_return.get("target_resolution", res1)}) else: #phaser_return = run_phaser_module((np, na, dres, cif_file, data_file)) #phaser_return = run_phaser_module(data_file, (np, na, dres, cif_file)) phaser_return = run_phaser_module(data_file=data_file, ellg=True, mmcif=cif_file) d['all'].update({'res': phaser_return.get("target_resolution", res1)}) """ d['all'] = {'file': cif_file, 'NRes': np+na, 'MWna': na*330, 'MWaa': np*110, 'MW': na*330+np*110, 'NMol': phaser_return.get("z", nmol), 'SC': phaser_return.get("solvent_content", sc), 'res': phaser_return.get("target_resolution", res1)} """ return d
def __init__(self, command, processed_results=False, computer_cluster=False, tprint=False, logger=False, verbosity=False): """Initialize the plugin""" Thread.__init__ (self) # If the logging instance is passed in... if logger: self.logger = logger else: # Otherwise get the logger Instance self.logger = logging.getLogger("RAPDLogger") self.logger.debug("__init__") # Keep track of start time self.start_time = time.time() # Store tprint for use throughout if tprint: self.tprint = tprint # Dead end if no tprint passed else: def func(arg=False, level=False, verbosity=False, color=False): """Dummy function""" pass self.tprint = func # Used for sending results back to DB referencing a dataset self.processed_results = processed_results # Some logging self.logger.info(command) self.verbose = verbosity # Store passed-in variables self.command = command self.preferences = self.command.get("preferences", {}) # Params self.working_dir = self.command["directories"].get("work", os.getcwd()) self.test = self.preferences.get("test", False) self.sample_type = self.preferences.get("type", "protein") self.solvent_content = self.preferences.get("solvent_content", 0.55) self.clean = self.preferences.get("clean", True) # self.verbose = self.command["preferences"].get("verbose", False) self.datafile = xutils.convert_unicode(self.command["input_data"].get("datafile")) # Used for setting up Redis connection self.db_settings = self.command["input_data"].get("db_settings") self.nproc = self.preferences.get("nproc", 1) self.computer_cluster = self.preferences.get("computer_cluster", False) # If no launcher is passed in, use local_subprocess in a multiprocessing.Pool self.computer_cluster = computer_cluster if self.computer_cluster: self.launcher = self.computer_cluster.process_cluster self.batch_queue = self.computer_cluster.check_queue(self.command.get('command')) else: self.launcher = local_subprocess # Setup a multiprocessing pool if not using a computer cluster. if not self.computer_cluster: self.pool = mp_pool(self.nproc)