Exemplo n.º 1
0
def main(residues, folder, top, out_report_name, format_out, nProcessors, output_folder, new_report, trajs_to_select):
    """
        Calculate the distances between paris of atoms

        :param residues: Pairs of atoms to calculate distances
        :type residues: list
        :param folder: Path the simulation
        :type folder: str
        :param top: Path to the topology
        :type top: str
        :param out_report_name: Name of the output file
        :type out_report_name: str
        :param format_out: String with the format of the output
        :type format_out: str
        :param nProcessors: Number of processors to use
        :type nProcessors: int
        :param output_folder: Path where to store the new reports
        :type output_folder: str
        :param new_report: Whether to create new reports
        :type new_report: bool
        :param trajs_to_select: Number of the reports to read, if don't want to select all
        :type trajs_to_select: set
    """
    # Constants
    if output_folder is not None:
        out_report_name = os.path.join(output_folder, out_report_name)
    outputFilename = "_".join([out_report_name, "%d"])
    trajName = "*traj*"
    reportName = "*report*_%d"
    distances_label = "\t".join(residues)
    residues = parse_selection(residues)
    if nProcessors is None:
        nProcessors = utilities.getCpuCount()
    nProcessors = max(1, nProcessors)
    print("Calculating distances with %d processors" % nProcessors)
    epochs = utilities.get_epoch_folders(folder)
    if top is not None:
        top_obj = utilities.getTopologyObject(top)
    else:
        top_obj = None
    files = []
    if not epochs:
        # path does not contain an adaptive simulation, we'll try to retrieve
        # trajectories from the specified path
        files = analysis_utils.process_folder(None, folder, trajName, reportName, os.path.join(folder, outputFilename), top_obj, trajs_to_select)
    for epoch in epochs:
        print("Epoch", epoch)
        files.extend(analysis_utils.process_folder(epoch, folder, trajName, reportName, os.path.join(folder, epoch, outputFilename), top_obj, trajs_to_select))
    print("Starting to process files!")
    pool = mp.Pool(nProcessors)
    results = [pool.apply_async(process_file, args=(info[0], info[2], residues, info[1], info[4], format_out, new_report, info[3], distances_label)) for info in files]
    pool.close()
    pool.join()
    for res in results:
        res.get()
Exemplo n.º 2
0
def main(trajs, ref, nResidues, top):
    if top is None:
        top_content = None
    else:
        top_content = utilities.getTopologyObject(top)
    if ref is None:
        avgPDB, totPDBs = extractAvgPDB(trajs, top_content)
    else:
        avgPDB, totPDBs = mapReference(ref, trajs, top_content)
    RMSF = {atom: 0.0 for atom in avgPDB}
    residueMapping = {}
    # TODO: Handle multiple chains and insertion residues in PDB
    for PDBobj in totPDBs:
        for atomID, atom in PDBobj.atoms.items():
            RMSF[atomID] += np.sum((atom.getAtomCoords() - avgPDB[atomID])**2)
    for atomID, atom in PDBobj.atoms.items():
        if atom.resnum not in residueMapping:
            residueMapping[atom.resnum] = {atomID}
        else:
            residueMapping[atom.resnum].add(atomID)
    RMSFresidue = {}
    for residue, atoms in residueMapping.items():
        RMSFresidue[residue] = sum([RMSF[atom] for atom in atoms])
        RMSFresidue[residue] /= len(atoms)
        RMSFresidue[residue] = np.sqrt(RMSFresidue[residue])

    print("Residue\tRMSF")
    for res in sorted(RMSFresidue, key=lambda x: RMSFresidue[x],
                      reverse=True)[:nResidues]:
        print("%s\t%.4f" % (res, RMSFresidue[res]))

    plt.plot(list(RMSFresidue.keys()), RMSFresidue.values(), 'x')
    plt.xlabel("Residue number")
    plt.ylabel("RMSF")
    plt.savefig("RMSF-residue.png")
    plt.show()
Exemplo n.º 3
0
def main(adaptive_results_folder,
         column_to_x="epoch",
         column_to_y="Binding Energy",
         column_to_z=None,
         output_selection_folder=None,
         summary_done=False,
         processors=4,
         report_pref="report_",
         trajectory_pref="trajectory_",
         separator=";",
         column_file="trajectory",
         topology=None):
    """
    Generates a scatterplot of Adaptive's results given two or three columns (X, Y, and Z if set).
    This plot allows the selection of desired points by drawing. Structures will be selected and
    stored into an output folder. Additionally, a report file of this selected structures
    will be created.
    :param adaptive_results_folder: Path to Adaptive results.
    :type adaptive_results_folder: str
    :param column_to_x: Column name of the report file that will be used in the X axis.
    :type column_to_x: str
    :param column_to_y: Column name of the report file that will be used in the Y axis.
    :type column_to_y: str
    :param column_to_z: If set, column name of the report file that will be used in the Z axis (colorbar).
    :type column_to_z: str
    :param output_selection_folder: If set, path to the output's folder. By default it will be created in the
    Adaptive's results path. WARNING: Take into account that if the folder already exists it will be overwritten!!!
    :type output_selection_folder: str
    :param summary_done: If it is set, instead of looking all the reports and create a new one, the script will use
    the summary csv of previous usages, saving computational time."
    :type summary_done: bool
    :param processors: Number of processors that you want to use in order to save time.
    :type processors: int
    :param report_pref: PELE's report prefix.
    :type report_pref: str
    :param trajectory_pref: Adaptive's trajectory prefix.
    :type trajectory_pref: str
    :param separator: Separator string that will be used in the CSV files.
    :type separator: str
    :param column_file: Column name of the dataframe that contains the path to the trajectory file.
    :type column_file: str
    :param topology: Path to the topology for the simulation
    :type topology: str
    :return:
    """
    summary_csv_filename = os.path.join(adaptive_results_folder, "summary.csv")
    if not summary_done:
        concat_reports_in_csv(adaptive_results_path=adaptive_results_folder,
                              output_file_path=summary_csv_filename,
                              report_prefix=report_pref,
                              trajectory_prefix=trajectory_pref,
                              separator_out=separator)
    dataframe = pd.read_csv(summary_csv_filename,
                            sep=separator,
                            engine='python',
                            header=0)
    fig, ax = plt.subplots()
    if column_to_z:
        pts = ax.scatter(dataframe[column_to_x],
                         dataframe[column_to_y],
                         c=dataframe[column_to_z],
                         s=20)
        plt.colorbar(pts)
    else:
        pts = ax.scatter(dataframe[column_to_x], dataframe[column_to_y], s=20)
    selector = SelectFromCollection(ax, pts)

    if topology is not None:
        topology_contents = adapt_tools.getTopologyObject(topology)
    else:
        topology_contents = None

    def accept(event, output_selection_folder=output_selection_folder):
        if event.key == "enter":
            print("Selected points:")
            df_select = dataframe.loc[selector.ind]
            print(df_select)
            counter = 0
            if not output_selection_folder:
                output_selection_folder = os.path.join(adaptive_results_folder,
                                                       "selected_from_plot")
            while True:
                try:
                    os.mkdir(output_selection_folder + "_" + str(counter))
                    break
                except FileExistsError:
                    counter += 1
            output_selection_folder = output_selection_folder + "_" + str(
                counter)
            df_select.to_csv(os.path.join(output_selection_folder,
                                          "selection_report.csv"),
                             sep=separator,
                             index=False)
            get_pdbs_from_df_in_xtc(df_select,
                                    output_selection_folder,
                                    processors=processors,
                                    column_file=column_file,
                                    topology=topology_contents)
            selector.disconnect()
            ax.set_title("")
            fig.canvas.draw()

    fig.canvas.mpl_connect("key_press_event", accept)
    ax.set_title("Press enter to accept selected points.")
    ax.set_xlabel(column_to_x)
    ax.set_ylabel(column_to_y)
    plt.show()
def main(controlFile, trajName, reportName, folder, top, outputFilename, nProcessors, output_folder, format_str, new_report, trajs_to_select):
    """
        Calculate the corrected rmsd values of conformation taking into account
        molecule symmetries

        :param controlFile: Control file
        :type controlFile: str
        :param folder: Path the simulation
        :type folder: str
        :param top: Path to the topology
        :type top: str
        :param outputFilename: Name of the output file
        :type outputFilename: str
        :param nProcessors: Number of processors to use
        :type nProcessors: int
        :param output_folder: Path where to store the new reports
        :type output_folder: str
        :param format_str: String with the format of the report
        :type format_str: str
        :param new_report: Whether to write rmsd to a new report file
        :type new_report: bool

    """
    if trajName is None:
        trajName = "*traj*"
    else:
        trajName += "_*"
    if reportName is None:
        reportName = "report_%d"
    else:
        reportName += "_%d"
    if output_folder is not None:
        outputFilename = os.path.join(output_folder, outputFilename)
    outputFilename += "_%d"
    if nProcessors is None:
        nProcessors = utilities.getCpuCount()
    nProcessors = max(1, nProcessors)
    print("Calculating RMSDs with %d processors" % nProcessors)
    epochs = utilities.get_epoch_folders(folder)
    if top is not None:
        top_obj = utilities.getTopologyObject(top)
    else:
        top_obj = None

    resname, nativeFilename, symmetries, rmsdColInReport = readControlFile(controlFile)

    nativePDB = atomset.PDB()
    nativePDB.initialise(nativeFilename, resname=resname)

    files = []
    if not epochs:
        # path does not contain an adaptive simulation, we'll try to retrieve
        # trajectories from the specified path
        files = analysis_utils.process_folder(None, folder, trajName, reportName, os.path.join(folder, outputFilename), top_obj, trajs_to_select)
    for epoch in epochs:
        print("Epoch", epoch)
        files.extend(analysis_utils.process_folder(epoch, folder, trajName, reportName, os.path.join(folder, epoch, outputFilename), top_obj, trajs_to_select))
    pool = mp.Pool(nProcessors)
    results = [pool.apply_async(calculate_rmsd_traj, args=(nativePDB, resname, symmetries, rmsdColInReport, info[0], info[1], info[2], info[3], info[4], format_str, new_report)) for info in files]
    pool.close()
    pool.join()
    for res in results:
        res.get()
Exemplo n.º 5
0
def main(folder_name=".",
         atom_Ids="",
         lig_resname="",
         numtotalSteps=0,
         enforceSequential_run=0,
         writeLigandTrajectory=True,
         setNumber=0,
         protein_CA=0,
         non_Repeat=False,
         nProcessors=None,
         parallelize=True,
         topology=None,
         sidechains=False,
         sidechain_folder=".",
         cm=False,
         use_extra_atoms=False,
         CM_mode="p-lig",
         calc_dihedrals=False,
         dihedrals_projection=False):
    params = ParamsHandler(folder_name, atom_Ids, lig_resname, numtotalSteps,
                           enforceSequential_run, writeLigandTrajectory,
                           setNumber, protein_CA, non_Repeat, nProcessors,
                           parallelize, topology, sidechains, sidechain_folder,
                           cm, use_extra_atoms, CM_mode, calc_dihedrals,
                           dihedrals_projection)
    constants = Constants()

    if params.topology is not None:
        params.topology = utilities.getTopologyObject(params.topology)

    params.lig_resname = parseResname(params.atomIds, params.lig_resname,
                                      params.contact_map, params.cm_mode,
                                      params.dihedrals)

    folderWithTrajs = params.folder_name

    makeGatheredTrajsFolder(constants)

    if params.enforceSequential_run:
        folders = ["."]
    else:
        folders = utilities.get_epoch_folders(folderWithTrajs)
        if len(folders) == 0:
            folders = ["."]

    # if multiprocess is not available, turn off parallelization
    params.parallelize &= PARALELLIZATION

    if params.parallelize:
        if params.nProcessors is None:
            params.nProcessors = utilities.getCpuCount()
        params.nProcessors = max(1, params.nProcessors)

        print("Running extractCoords with %d cores" % (params.nProcessors))
        pool = mp.Pool(params.nProcessors)
    else:
        pool = None

    params.sidechains = extractSidechainIndexes(
        params, pool=pool) if params.sidechains else []

    for folder_it in folders:
        pathFolder = os.path.join(folderWithTrajs, folder_it)
        print("Extracting coords from folder %s" % folder_it)
        ligand_trajs_folder = os.path.join(pathFolder,
                                           constants.ligandTrajectoryFolder)
        if params.writeLigandTrajectory and not os.path.exists(
                ligand_trajs_folder):
            os.makedirs(ligand_trajs_folder)
        writeFilenamesExtractedCoordinates(pathFolder,
                                           params,
                                           constants,
                                           pool=pool)
        if not params.non_Repeat:
            print("Repeating snapshots from folder %s" % folder_it)
            repeatExtractedSnapshotsInFolder(pathFolder,
                                             constants,
                                             params.numtotalSteps,
                                             pool=None)
        print("Gathering trajs in %s" % constants.gatherTrajsFolder)
        gatherTrajs(constants, folder_it, params.setNumber, params.non_Repeat)
Exemplo n.º 6
0
def main(trajectory,
         snapshot,
         epoch,
         outputPath,
         out_filename,
         topology,
         use_pdb=False):
    if outputPath is not None:
        outputPath = os.path.join(outputPath, "")
        if not os.path.exists(outputPath):
            os.makedirs(outputPath)
    else:
        outputPath = ""
    if topology is not None:
        topology = utilities.getTopologyObject(topology)
    else:
        topology = None
    topology_contents = None
    if os.path.exists(outputPath + out_filename):
        # If the specified name exists, append a number to distinguish the files
        name, ext = os.path.splitext(out_filename)
        out_filename = "".join([name, "_%d", ext])
        i = 1
        while os.path.exists(outputPath + out_filename % i):
            i += 1
        out_filename %= i
    pathway = []
    # Strip out trailing backslash if present
    pathPrefix, epoch = os.path.split(epoch.rstrip("/"))
    sys.stderr.write("Creating pathway...\n")
    while True:
        filename = glob.glob(
            os.path.join(pathPrefix, epoch, "*traj*_%d.*" % trajectory))
        if not filename:
            raise ValueError(
                "Trajectory %s not found!" %
                os.path.join(pathPrefix, epoch, "*traj*_%d.*" % trajectory))
        snapshots = utilities.getSnapshots(filename[0])
        if epoch == '0':
            initial = 0
        else:
            # avoid repeating the initial snapshot
            initial = 1
        if topology is not None:
            topology_contents = topology.getTopology(int(epoch), trajectory)
        if not isinstance(snapshots[0], basestring):
            new_snapshots = []
            for i in range(initial, snapshot + 1):
                PDB = atomset.PDB()
                PDB.initialise(snapshots[i], topology=topology_contents)
                new_snapshots.append(PDB.pdb)
            snapshots = new_snapshots
        else:
            snapshots = snapshots[initial:snapshot + 1]
        pathway.insert(0, snapshots)
        if epoch == '0':
            # Once we get to epoch 0, we just need to append the trajectory
            # where the cluster was found and we can break out of the loop
            break
        procMapping = open(
            os.path.join(pathPrefix, epoch,
                         "processorMapping.txt")).read().rstrip().split(':')
        epoch, trajectory, snapshot = map(
            int, procMapping[trajectory - 1][1:-1].split(','))
        epoch = str(epoch)
    sys.stderr.write("Writing pathway...\n")
    with open(outputPath + out_filename, "a") as f:
        if topology:
            #Quick fix to avoid problems when visualizing with PyMol
            f.write("ENDMDL\nMODEL     2\n".join(
                itertools.chain.from_iterable(pathway)))
        else:
            f.write("ENDMDL\n".join(itertools.chain.from_iterable(pathway)))
Exemplo n.º 7
0
def main(resname, folder, top, out_report_name, format_out, nProcessors,
         output_folder, new_report):
    """
        Calculate the relative SASA values of the ligand

        :param resname: Ligand resname
        :type resname: str
        :param folder: Path the simulation
        :type folder: str
        :param top: Path to the topology
        :type top: str
        :param out_report_name: Name of the output file
        :type out_report_name: str
        :param format_out: String with the format of the output
        :type format_out: str
        :param nProcessors: Number of processors to use
        :type nProcessors: int
        :param output_folder: Path where to store the new reports
        :type output_folder: str
        :param new_report: Whether to create new reports
        :type new_report: bool
    """
    # Constants
    if output_folder is not None:
        out_report_name = os.path.join(output_folder, out_report_name)
    outputFilename = "_".join([out_report_name, "%d"])
    trajName = "*traj*"
    reportName = "*report*_%d"
    if nProcessors is None:
        nProcessors = utilities.getCpuCount()
    nProcessors = max(1, nProcessors)
    print("Calculating SASA with %d processors" % nProcessors)
    pool = mp.Pool(nProcessors)
    epochs = utilities.get_epoch_folders(folder)
    if top is not None:
        top_obj = utilities.getTopologyObject(top)
    else:
        top_obj = None
    files = []
    if not epochs:
        # path does not contain an adaptive simulation, we'll try to retrieve
        # trajectories from the specified path
        files = analysis_utils.process_folder(
            None, folder, trajName, reportName,
            os.path.join(folder, outputFilename), top_obj)
    for epoch in epochs:
        print("Epoch", epoch)
        files.extend(
            analysis_utils.process_folder(
                epoch, folder, trajName, reportName,
                os.path.join(folder, epoch, outputFilename), top_obj))
    results = []
    for info in files:
        results.append(
            pool.apply_async(process_file,
                             args=(info[0], info[2], resname, info[1], info[4],
                                   format_out, new_report, info[3])))
    for res in results:
        res.get()
    pool.close()
    pool.terminate()