def main(residues, folder, top, out_report_name, format_out, nProcessors, output_folder, new_report, trajs_to_select): """ Calculate the distances between paris of atoms :param residues: Pairs of atoms to calculate distances :type residues: list :param folder: Path the simulation :type folder: str :param top: Path to the topology :type top: str :param out_report_name: Name of the output file :type out_report_name: str :param format_out: String with the format of the output :type format_out: str :param nProcessors: Number of processors to use :type nProcessors: int :param output_folder: Path where to store the new reports :type output_folder: str :param new_report: Whether to create new reports :type new_report: bool :param trajs_to_select: Number of the reports to read, if don't want to select all :type trajs_to_select: set """ # Constants if output_folder is not None: out_report_name = os.path.join(output_folder, out_report_name) outputFilename = "_".join([out_report_name, "%d"]) trajName = "*traj*" reportName = "*report*_%d" distances_label = "\t".join(residues) residues = parse_selection(residues) if nProcessors is None: nProcessors = utilities.getCpuCount() nProcessors = max(1, nProcessors) print("Calculating distances with %d processors" % nProcessors) epochs = utilities.get_epoch_folders(folder) if top is not None: top_obj = utilities.getTopologyObject(top) else: top_obj = None files = [] if not epochs: # path does not contain an adaptive simulation, we'll try to retrieve # trajectories from the specified path files = analysis_utils.process_folder(None, folder, trajName, reportName, os.path.join(folder, outputFilename), top_obj, trajs_to_select) for epoch in epochs: print("Epoch", epoch) files.extend(analysis_utils.process_folder(epoch, folder, trajName, reportName, os.path.join(folder, epoch, outputFilename), top_obj, trajs_to_select)) print("Starting to process files!") pool = mp.Pool(nProcessors) results = [pool.apply_async(process_file, args=(info[0], info[2], residues, info[1], info[4], format_out, new_report, info[3], distances_label)) for info in files] pool.close() pool.join() for res in results: res.get()
def main(trajs, ref, nResidues, top): if top is None: top_content = None else: top_content = utilities.getTopologyObject(top) if ref is None: avgPDB, totPDBs = extractAvgPDB(trajs, top_content) else: avgPDB, totPDBs = mapReference(ref, trajs, top_content) RMSF = {atom: 0.0 for atom in avgPDB} residueMapping = {} # TODO: Handle multiple chains and insertion residues in PDB for PDBobj in totPDBs: for atomID, atom in PDBobj.atoms.items(): RMSF[atomID] += np.sum((atom.getAtomCoords() - avgPDB[atomID])**2) for atomID, atom in PDBobj.atoms.items(): if atom.resnum not in residueMapping: residueMapping[atom.resnum] = {atomID} else: residueMapping[atom.resnum].add(atomID) RMSFresidue = {} for residue, atoms in residueMapping.items(): RMSFresidue[residue] = sum([RMSF[atom] for atom in atoms]) RMSFresidue[residue] /= len(atoms) RMSFresidue[residue] = np.sqrt(RMSFresidue[residue]) print("Residue\tRMSF") for res in sorted(RMSFresidue, key=lambda x: RMSFresidue[x], reverse=True)[:nResidues]: print("%s\t%.4f" % (res, RMSFresidue[res])) plt.plot(list(RMSFresidue.keys()), RMSFresidue.values(), 'x') plt.xlabel("Residue number") plt.ylabel("RMSF") plt.savefig("RMSF-residue.png") plt.show()
def main(adaptive_results_folder, column_to_x="epoch", column_to_y="Binding Energy", column_to_z=None, output_selection_folder=None, summary_done=False, processors=4, report_pref="report_", trajectory_pref="trajectory_", separator=";", column_file="trajectory", topology=None): """ Generates a scatterplot of Adaptive's results given two or three columns (X, Y, and Z if set). This plot allows the selection of desired points by drawing. Structures will be selected and stored into an output folder. Additionally, a report file of this selected structures will be created. :param adaptive_results_folder: Path to Adaptive results. :type adaptive_results_folder: str :param column_to_x: Column name of the report file that will be used in the X axis. :type column_to_x: str :param column_to_y: Column name of the report file that will be used in the Y axis. :type column_to_y: str :param column_to_z: If set, column name of the report file that will be used in the Z axis (colorbar). :type column_to_z: str :param output_selection_folder: If set, path to the output's folder. By default it will be created in the Adaptive's results path. WARNING: Take into account that if the folder already exists it will be overwritten!!! :type output_selection_folder: str :param summary_done: If it is set, instead of looking all the reports and create a new one, the script will use the summary csv of previous usages, saving computational time." :type summary_done: bool :param processors: Number of processors that you want to use in order to save time. :type processors: int :param report_pref: PELE's report prefix. :type report_pref: str :param trajectory_pref: Adaptive's trajectory prefix. :type trajectory_pref: str :param separator: Separator string that will be used in the CSV files. :type separator: str :param column_file: Column name of the dataframe that contains the path to the trajectory file. :type column_file: str :param topology: Path to the topology for the simulation :type topology: str :return: """ summary_csv_filename = os.path.join(adaptive_results_folder, "summary.csv") if not summary_done: concat_reports_in_csv(adaptive_results_path=adaptive_results_folder, output_file_path=summary_csv_filename, report_prefix=report_pref, trajectory_prefix=trajectory_pref, separator_out=separator) dataframe = pd.read_csv(summary_csv_filename, sep=separator, engine='python', header=0) fig, ax = plt.subplots() if column_to_z: pts = ax.scatter(dataframe[column_to_x], dataframe[column_to_y], c=dataframe[column_to_z], s=20) plt.colorbar(pts) else: pts = ax.scatter(dataframe[column_to_x], dataframe[column_to_y], s=20) selector = SelectFromCollection(ax, pts) if topology is not None: topology_contents = adapt_tools.getTopologyObject(topology) else: topology_contents = None def accept(event, output_selection_folder=output_selection_folder): if event.key == "enter": print("Selected points:") df_select = dataframe.loc[selector.ind] print(df_select) counter = 0 if not output_selection_folder: output_selection_folder = os.path.join(adaptive_results_folder, "selected_from_plot") while True: try: os.mkdir(output_selection_folder + "_" + str(counter)) break except FileExistsError: counter += 1 output_selection_folder = output_selection_folder + "_" + str( counter) df_select.to_csv(os.path.join(output_selection_folder, "selection_report.csv"), sep=separator, index=False) get_pdbs_from_df_in_xtc(df_select, output_selection_folder, processors=processors, column_file=column_file, topology=topology_contents) selector.disconnect() ax.set_title("") fig.canvas.draw() fig.canvas.mpl_connect("key_press_event", accept) ax.set_title("Press enter to accept selected points.") ax.set_xlabel(column_to_x) ax.set_ylabel(column_to_y) plt.show()
def main(controlFile, trajName, reportName, folder, top, outputFilename, nProcessors, output_folder, format_str, new_report, trajs_to_select): """ Calculate the corrected rmsd values of conformation taking into account molecule symmetries :param controlFile: Control file :type controlFile: str :param folder: Path the simulation :type folder: str :param top: Path to the topology :type top: str :param outputFilename: Name of the output file :type outputFilename: str :param nProcessors: Number of processors to use :type nProcessors: int :param output_folder: Path where to store the new reports :type output_folder: str :param format_str: String with the format of the report :type format_str: str :param new_report: Whether to write rmsd to a new report file :type new_report: bool """ if trajName is None: trajName = "*traj*" else: trajName += "_*" if reportName is None: reportName = "report_%d" else: reportName += "_%d" if output_folder is not None: outputFilename = os.path.join(output_folder, outputFilename) outputFilename += "_%d" if nProcessors is None: nProcessors = utilities.getCpuCount() nProcessors = max(1, nProcessors) print("Calculating RMSDs with %d processors" % nProcessors) epochs = utilities.get_epoch_folders(folder) if top is not None: top_obj = utilities.getTopologyObject(top) else: top_obj = None resname, nativeFilename, symmetries, rmsdColInReport = readControlFile(controlFile) nativePDB = atomset.PDB() nativePDB.initialise(nativeFilename, resname=resname) files = [] if not epochs: # path does not contain an adaptive simulation, we'll try to retrieve # trajectories from the specified path files = analysis_utils.process_folder(None, folder, trajName, reportName, os.path.join(folder, outputFilename), top_obj, trajs_to_select) for epoch in epochs: print("Epoch", epoch) files.extend(analysis_utils.process_folder(epoch, folder, trajName, reportName, os.path.join(folder, epoch, outputFilename), top_obj, trajs_to_select)) pool = mp.Pool(nProcessors) results = [pool.apply_async(calculate_rmsd_traj, args=(nativePDB, resname, symmetries, rmsdColInReport, info[0], info[1], info[2], info[3], info[4], format_str, new_report)) for info in files] pool.close() pool.join() for res in results: res.get()
def main(folder_name=".", atom_Ids="", lig_resname="", numtotalSteps=0, enforceSequential_run=0, writeLigandTrajectory=True, setNumber=0, protein_CA=0, non_Repeat=False, nProcessors=None, parallelize=True, topology=None, sidechains=False, sidechain_folder=".", cm=False, use_extra_atoms=False, CM_mode="p-lig", calc_dihedrals=False, dihedrals_projection=False): params = ParamsHandler(folder_name, atom_Ids, lig_resname, numtotalSteps, enforceSequential_run, writeLigandTrajectory, setNumber, protein_CA, non_Repeat, nProcessors, parallelize, topology, sidechains, sidechain_folder, cm, use_extra_atoms, CM_mode, calc_dihedrals, dihedrals_projection) constants = Constants() if params.topology is not None: params.topology = utilities.getTopologyObject(params.topology) params.lig_resname = parseResname(params.atomIds, params.lig_resname, params.contact_map, params.cm_mode, params.dihedrals) folderWithTrajs = params.folder_name makeGatheredTrajsFolder(constants) if params.enforceSequential_run: folders = ["."] else: folders = utilities.get_epoch_folders(folderWithTrajs) if len(folders) == 0: folders = ["."] # if multiprocess is not available, turn off parallelization params.parallelize &= PARALELLIZATION if params.parallelize: if params.nProcessors is None: params.nProcessors = utilities.getCpuCount() params.nProcessors = max(1, params.nProcessors) print("Running extractCoords with %d cores" % (params.nProcessors)) pool = mp.Pool(params.nProcessors) else: pool = None params.sidechains = extractSidechainIndexes( params, pool=pool) if params.sidechains else [] for folder_it in folders: pathFolder = os.path.join(folderWithTrajs, folder_it) print("Extracting coords from folder %s" % folder_it) ligand_trajs_folder = os.path.join(pathFolder, constants.ligandTrajectoryFolder) if params.writeLigandTrajectory and not os.path.exists( ligand_trajs_folder): os.makedirs(ligand_trajs_folder) writeFilenamesExtractedCoordinates(pathFolder, params, constants, pool=pool) if not params.non_Repeat: print("Repeating snapshots from folder %s" % folder_it) repeatExtractedSnapshotsInFolder(pathFolder, constants, params.numtotalSteps, pool=None) print("Gathering trajs in %s" % constants.gatherTrajsFolder) gatherTrajs(constants, folder_it, params.setNumber, params.non_Repeat)
def main(trajectory, snapshot, epoch, outputPath, out_filename, topology, use_pdb=False): if outputPath is not None: outputPath = os.path.join(outputPath, "") if not os.path.exists(outputPath): os.makedirs(outputPath) else: outputPath = "" if topology is not None: topology = utilities.getTopologyObject(topology) else: topology = None topology_contents = None if os.path.exists(outputPath + out_filename): # If the specified name exists, append a number to distinguish the files name, ext = os.path.splitext(out_filename) out_filename = "".join([name, "_%d", ext]) i = 1 while os.path.exists(outputPath + out_filename % i): i += 1 out_filename %= i pathway = [] # Strip out trailing backslash if present pathPrefix, epoch = os.path.split(epoch.rstrip("/")) sys.stderr.write("Creating pathway...\n") while True: filename = glob.glob( os.path.join(pathPrefix, epoch, "*traj*_%d.*" % trajectory)) if not filename: raise ValueError( "Trajectory %s not found!" % os.path.join(pathPrefix, epoch, "*traj*_%d.*" % trajectory)) snapshots = utilities.getSnapshots(filename[0]) if epoch == '0': initial = 0 else: # avoid repeating the initial snapshot initial = 1 if topology is not None: topology_contents = topology.getTopology(int(epoch), trajectory) if not isinstance(snapshots[0], basestring): new_snapshots = [] for i in range(initial, snapshot + 1): PDB = atomset.PDB() PDB.initialise(snapshots[i], topology=topology_contents) new_snapshots.append(PDB.pdb) snapshots = new_snapshots else: snapshots = snapshots[initial:snapshot + 1] pathway.insert(0, snapshots) if epoch == '0': # Once we get to epoch 0, we just need to append the trajectory # where the cluster was found and we can break out of the loop break procMapping = open( os.path.join(pathPrefix, epoch, "processorMapping.txt")).read().rstrip().split(':') epoch, trajectory, snapshot = map( int, procMapping[trajectory - 1][1:-1].split(',')) epoch = str(epoch) sys.stderr.write("Writing pathway...\n") with open(outputPath + out_filename, "a") as f: if topology: #Quick fix to avoid problems when visualizing with PyMol f.write("ENDMDL\nMODEL 2\n".join( itertools.chain.from_iterable(pathway))) else: f.write("ENDMDL\n".join(itertools.chain.from_iterable(pathway)))
def main(resname, folder, top, out_report_name, format_out, nProcessors, output_folder, new_report): """ Calculate the relative SASA values of the ligand :param resname: Ligand resname :type resname: str :param folder: Path the simulation :type folder: str :param top: Path to the topology :type top: str :param out_report_name: Name of the output file :type out_report_name: str :param format_out: String with the format of the output :type format_out: str :param nProcessors: Number of processors to use :type nProcessors: int :param output_folder: Path where to store the new reports :type output_folder: str :param new_report: Whether to create new reports :type new_report: bool """ # Constants if output_folder is not None: out_report_name = os.path.join(output_folder, out_report_name) outputFilename = "_".join([out_report_name, "%d"]) trajName = "*traj*" reportName = "*report*_%d" if nProcessors is None: nProcessors = utilities.getCpuCount() nProcessors = max(1, nProcessors) print("Calculating SASA with %d processors" % nProcessors) pool = mp.Pool(nProcessors) epochs = utilities.get_epoch_folders(folder) if top is not None: top_obj = utilities.getTopologyObject(top) else: top_obj = None files = [] if not epochs: # path does not contain an adaptive simulation, we'll try to retrieve # trajectories from the specified path files = analysis_utils.process_folder( None, folder, trajName, reportName, os.path.join(folder, outputFilename), top_obj) for epoch in epochs: print("Epoch", epoch) files.extend( analysis_utils.process_folder( epoch, folder, trajName, reportName, os.path.join(folder, epoch, outputFilename), top_obj)) results = [] for info in files: results.append( pool.apply_async(process_file, args=(info[0], info[2], resname, info[1], info[4], format_out, new_report, info[3]))) for res in results: res.get() pool.close() pool.terminate()