def run_protein_flexibility(pf_obj): flex_dir = os.path.join(pf_obj.get_path(), "flexibility") make_directory(flex_dir) nonwater_tpr = os.path.join(pf_obj.get_path(), pf_obj.get_nonwater_tpr()) nonwater_xtc = os.path.join(pf_obj.get_path(), pf_obj.get_nonwater_xtc()) traj_gro = os.path.join(flex_dir, "".join([pf_obj.get_title_output(), ".", pf_obj.get_output_sufix_trjconv(), ".gro"])) rmsf_xvg = os.path.join(flex_dir, "".join(["rmsf_", pf_obj.get_title_output(), ".", pf_obj.get_output_sufix_rmsf(), ".xvg"])) command = "".join(["echo ", pf_obj.get_echo_trjconv(), " | ", gromacs_path.value, "./gmx trjconv", " -s ", nonwater_tpr, " -f ", nonwater_xtc, " -o ", traj_gro, " -b ", str(pf_obj.get_initial_frame_trjconv()), " -e ", str(pf_obj.get_final_frame_trjconv())]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate() command = "".join(["echo ", pf_obj.get_echo_rmsf(), " | ", gromacs_path.value, "./gmx rmsf", " -f ", nonwater_xtc, " -s ", traj_gro, " -o ", rmsf_xvg, " -b ", str(pf_obj.get_initial_frame_rmsf()), " -e ", str(pf_obj.get_final_frame_rmsf()), " -dt ", str(time_dt.value), " -res"]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate()
def run_protein_flexibility(pf_obj): flex_dir = os.path.join(pf_obj.get_path(), "flexibility") make_directory(flex_dir) nonwater_tpr = os.path.join(pf_obj.get_path(), pf_obj.get_nonwater_tpr()) nonwater_xtc = os.path.join(pf_obj.get_path(), pf_obj.get_nonwater_xtc()) traj_gro = os.path.join( flex_dir, "".join([ pf_obj.get_title_output(), ".", pf_obj.get_output_sufix_trjconv(), ".gro" ])) rmsf_xvg = os.path.join( flex_dir, "".join([ "rmsf_", pf_obj.get_title_output(), ".", pf_obj.get_output_sufix_rmsf(), ".xvg" ])) command = "".join([ "echo ", pf_obj.get_echo_trjconv(), " | ", gromacs_path.value, "./gmx trjconv", " -s ", nonwater_tpr, " -f ", nonwater_xtc, " -o ", traj_gro, " -b ", str(pf_obj.get_initial_frame_trjconv()), " -e ", str(pf_obj.get_final_frame_trjconv()) ]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate() command = "".join([ "echo ", pf_obj.get_echo_rmsf(), " | ", gromacs_path.value, "./gmx rmsf", " -f ", nonwater_xtc, " -s ", traj_gro, " -o ", rmsf_xvg, " -b ", str(pf_obj.get_initial_frame_rmsf()), " -e ", str(pf_obj.get_final_frame_rmsf()), " -dt ", str(time_dt.value), " -res" ]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate()
sqlCtx = SQLContext(sc) config = configparser.ConfigParser() config.read('config.ini') # Vina configuration for broadcast config_vina = config.get('VINA', 'config_file') vina_path = config.get('VINA', 'vina_program') pdbqt_ligand_path = config.get('DEFAULT', 'pdbqt_ligand_path') pdbqt_receptor_path = config.get('DEFAULT', 'pdbqt_receptor_path') path_save_output = config.get('DEFAULT', 'path_save_structure') path_save_log = config.get('DEFAULT', 'path_save_log') path_spark_drugdesign = config.get('DRUGDESIGN', 'path_spark_drugdesign') path_save_log = preparing_path(path_save_log) make_directory(path_save_log) path_save_output = preparing_path(path_save_output) make_directory(path_save_output) # Adding Python Source file sc.addPyFile(os.path.join(path_spark_drugdesign, "docking_description.py")) # Broadcast vina_path = sc.broadcast(vina_path) pdbqt_ligand_path = sc.broadcast(pdbqt_ligand_path) pdbqt_receptor_path = sc.broadcast(pdbqt_receptor_path) path_save_output = sc.broadcast(path_save_output) path_save_log = sc.broadcast(path_save_log) sc.addFile(config_vina)
def run_trajetory(md_obj): ana_dir = os.path.join(md_obj.get_path(), "analysis") make_directory(ana_dir) # Original file names from the simulation reference_xtc = os.path.join(md_obj.get_path(), md_obj.get_simulation_prefix() + ".xtc") reference_tpr = os.path.join(md_obj.get_path(), md_obj.get_simulation_prefix() + ".tpr") # File names after trajectory treatment. allatom_xtc = os.path.join(ana_dir, "".join([md_obj.get_prefix_ref(), "_fit.", str(md_obj.get_repetion_number()), ".xtc"])) allatom_tpr = reference_tpr nonwater_xtc = os.path.join(ana_dir,"".join([md_obj.get_prefix_ref(), "_non-water.", str(md_obj.get_repetion_number()), ".xtc"])) nonwater_tpr = os.path.join(ana_dir, "".join([md_obj.get_prefix_ref(), "_non-water.", str(md_obj.get_repetion_number()), ".tpr"])) nonwater_pdb = os.path.join(ana_dir, "".join([md_obj.get_prefix_ref(), "_non-water.", str(md_obj.get_repetion_number()), ".pdb"])) waterlayer_pdb = os.path.join(ana_dir, "".join([md_obj.get_prefix_ref(), "_water-", str(water_layer_thickness.value), "A-layer.", str(md_obj.get_repetion_number()), ".pdb"])) # Trajectory treatment to remove PBC artifacts xtc_whole = os.path.join(ana_dir, "".join([md_obj.get_prefix_ref(), "_whole.", str(md_obj.get_repetion_number()), ".xtc"])) command = "".join(["echo System | ", gromacs_path.value, "./gmx trjconv ", "-f ", reference_xtc, " -s ", reference_tpr, " -pbc whole", " -o ", xtc_whole, " >/dev/null 2>/dev/null"]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate() # Extracting first frame gro_first_frame = os.path.join(ana_dir, "".join(["0.", str(md_obj.get_repetion_number()), ".gro"])) command = "".join(["echo System | ", gromacs_path.value, "./gmx trjconv ", "-f ", xtc_whole, " -s ", reference_tpr, " -e 0.1 ", " -o ", gro_first_frame, " >/dev/null 2>/dev/null"]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate() # Removing jumps xtc_nojump = os.path.join(ana_dir, "".join([md_obj.get_prefix_ref(), "_nojump.", str(md_obj.get_repetion_number()), ".xtc"])) command = "".join(["echo System | ", gromacs_path.value, "./gmx trjconv ", "-f ", xtc_whole, " -s ", gro_first_frame, " -pbc nojump ", " -o ", xtc_nojump, " >/dev/null 2>/dev/null"]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate() # Centering the protein xtc_center_protein = os.path.join(ana_dir, "".join([md_obj.get_prefix_ref(), "_center.", str(md_obj.get_repetion_number()), ".xtc"])) command = "".join(["echo C-alpha System | ", gromacs_path.value, "./gmx trjconv ", "-f ", xtc_whole, " -s ", gro_first_frame, " -center ", " -o ", xtc_center_protein, " >/dev/null 2>/dev/null"]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate() # Putting all atoms in a compact box xtc_atoms_box = os.path.join(ana_dir, "".join([md_obj.get_prefix_ref(), "_atom.", str(md_obj.get_repetion_number()), ".xtc"])) command = "".join(["echo System | ", gromacs_path.value, "./gmx trjconv ", "-f ", xtc_center_protein, " -s ", gro_first_frame, " -ur compact ", " -pbc atom ", " -o ", xtc_atoms_box, " >/dev/null 2>/dev/null"]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate() # Fitting the protein command = "".join(["echo C-alpha System | ", gromacs_path.value, "./gmx trjconv ", "-f ", xtc_atoms_box, " -s ", gro_first_frame, " -fit rot+trans ", " -o ", allatom_xtc, " >/dev/null 2>/dev/null"]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate() # Creating water-free trajectory command = "".join(["echo non-water | ", gromacs_path.value, "./gmx convert-tpr ", " -s ", reference_tpr, " -o ", nonwater_tpr, " >/dev/null 2>/dev/null"]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate() command = "".join(["echo non-water | ", gromacs_path.value, "./gmx trjconv ", "-f ", allatom_xtc, " -s ", gro_first_frame, " -o ", nonwater_xtc, " >/dev/null 2>/dev/null"]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate() command = "".join(["echo system | ", gromacs_path.value, "./gmx trjconv ", " -f ", nonwater_xtc, " -s ", nonwater_tpr, " -o ", nonwater_pdb, " -dt ", str(time_dt_pdb.value), " >/dev/null 2>/dev/null"]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate() # Creating water_layer_thickness - A water-layer pdb trajectory t = 0 frame = 0 ndx_water_layer = os.path.join(ana_dir, "".join([md_obj.get_prefix_ref(), "_water-layer.", str(md_obj.get_repetion_number()), ".ndx"])) ndx_temporary = os.path.join(ana_dir, "".join([md_obj.get_prefix_ref(), "_temporary_", str(md_obj.get_repetion_number()), ".ndx"])) if os.path.isfile(waterlayer_pdb): os.remove(waterlayer_pdb) if os.path.isfile(ndx_water_layer): os.remove(ndx_water_layer) select_string = ('\'"water_layer" (same residue as ((resname SOL and within 0.'"$water_layer_thickness"' of group "Protein"))) or\ (group "Ion" and within 0.'"$water_layer_thickness"' of group "Protein") \ or (group "Protein") \'') select_string = select_string.replace("$water_layer_thickness", str(water_layer_thickness.value)) # Running make_ndx command = "".join(["echo -e ", "'chain z'\"\\n\"'q'\"\\n\" | ", gromacs_path.value, "gmx make_ndx ", "-f ", reference_tpr, " -o ", ndx_temporary, " >/dev/null 2>/dev/null"]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate() # Are there ligands? if search_for_ligand_ndx_file(ndx_temporary) is True: select_string = (select_string + '\'or (same residue as ((resname SOL and within 0.'"$water_layer_thickness"' of group "Other"))) \ or (group "Ion" and within 0.'"$water_layer_thickness"' of group "Other") \ or (group "Other")\'') select_string = select_string.replace("$water_layer_thickness", str(water_layer_thickness.value)) command = "".join([gromacs_path.value, "gmx select -f ", allatom_xtc, " -s ", allatom_tpr, " -on ", ndx_water_layer, " -select ", select_string, " -dt ", str(time_dt_pdb.value), " >/dev/null 2>/dev/null"]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate() # Creating pdb files command = "".join(["echo ", str(frame), " | ", gromacs_path.value, "./gmx trjconv ", "-f ", allatom_xtc, " -s ", allatom_tpr, " -n ", ndx_water_layer, " -o ", "frame_", str(frame), ".pdb ", "-b ", str(t), " -e ", str(t), " >/dev/null 2>/dev/null"]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate() command = "".join(["echo MODEL ", str(frame), " >> ", waterlayer_pdb]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate() command = "".join(["grep ATOM ", "frame_", str(frame), ".pdb ", ">> ", waterlayer_pdb]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate() command = "".join(["echo ENDML", ">> ", waterlayer_pdb]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate() # Removing temporary files command = "".join(["rm frame_", str(frame), ".pdb"]) proc = Popen(command, shell=True, stdout=PIPE) proc.communicate() frame = frame + 1 t = t + int(time_dt_pdb.value) if os.path.isfile(xtc_whole): os.remove(xtc_whole) if os.path.isfile(xtc_nojump): os.remove(xtc_nojump) if os.path.isfile(xtc_center_protein): os.remove(xtc_center_protein) if os.path.isfile(xtc_atoms_box): os.remove(xtc_atoms_box) if os.path.isfile(ndx_water_layer): os.remove(ndx_water_layer) if os.path.isfile(gro_first_frame): os.remove(gro_first_frame) command = "rm \#* 2>/dev/null" proc = Popen(command, shell=True, stdout=PIPE) proc.communicate() # Basic Analysis basic_an_data = (gromacs_path.value, nonwater_xtc, nonwater_tpr, md_obj.get_simulation_prefix(), ana_dir, time_dt.value) run_basic_analysis(basic_an_data)
key_name = json_name.split('.')[0] json_data = create_jsondata_from_docking_output_file(log_file) json_final_data = {key_name: json_data} json_name = os.path.join(path_to_save, json_name ) create_json_file(json_name, json_final_data) if __name__ == '__main__': config = configparser.ConfigParser() config.read('config.ini') sc = SparkContext() sqlCtx = SQLContext(sc) log_file = config.get('DEFAULT', 'path_save_log') path_to_save = config.get('DEFAULT', 'json_log') path_spark_drugdesign = config.get('DRUGDESIGN', 'path_spark_drugdesign') sc.addPyFile(os.path.join(path_spark_drugdesign,"vina_utils.py")) sc.addPyFile(os.path.join(path_spark_drugdesign,"json_utils.py")) sc.addPyFile(os.path.join(path_spark_drugdesign,"os_utils.py")) #Broadcast log_file_b = sc.broadcast(log_file) path_to_save_b = sc.broadcast(path_to_save) make_directory(path_to_save) all_log_files = get_files_log(log_file) log_filesRDD = sc.parallelize(all_log_files) log_filesRDD.foreach(log_to_json)
if __name__ == '__main__': sc = SparkContext() sqlCtx = SQLContext(sc) config = configparser.ConfigParser() config.read('config.ini') pythonsh = config.get('VINA', 'pythonsh') script_receptor4 = config.get('VINA', 'script_receptor4') pdb_path = config.get('DEFAULT', 'pdb_path') pdbqt_receptor_path = config.get('DEFAULT', 'pdbqt_receptor_path') path_spark_drugdesign = config.get('DRUGDESIGN', 'path_spark_drugdesign') make_directory(pdbqt_receptor_path) # Adding Python Source file sc.addPyFile(os.path.join(path_spark_drugdesign, "vina_utils.py")) sc.addPyFile(os.path.join(path_spark_drugdesign, "json_utils.py")) sc.addPyFile(os.path.join(path_spark_drugdesign, "os_utils.py")) # Broadcast pythonsh = sc.broadcast(pythonsh) script_receptor4 = sc.broadcast(script_receptor4) pdbqt_receptor_path = sc.broadcast(pdbqt_receptor_path) def run_prepare_receptor_spark(receptor): receptor_pdbqt = os.path.join(pdbqt_receptor_path.value, get_name_model_pdb(receptor))