def process_file(report, outputFilename, format_out, new_report, epoch,
                 energy_column):
    try:
        reportFilename = glob.glob(report)[0]
    except IndexError:
        raise IndexError("File %s not found" % report)

    with open(reportFilename) as f:
        header = f.readline().rstrip()
        if not header.startswith("#"):
            header = ""
        reportFile = utilities.loadtxtfile(f)
    energy_values = reportFile[:, energy_column]
    energy_values = preprocessing.scale(energy_values)

    if not new_report:
        if outputFilename != reportFilename:
            reportFile = utilities.loadtxtfile(outputFilename)
        fixedReport = analysis_utils.extendReportWithRmsd(
            reportFile, energy_values)
    else:
        header = ""
        indexes = np.array(range(energy_values.shape[0]))
        fixedReport = np.concatenate(
            (indexes[:, None], energy_values[:, None]), axis=1)

    with open(outputFilename, "w") as fw:
        if header:
            fw.write("%s\tEnergy\n" % header)
        else:
            fw.write("# Step\tEnergy\n")
        np.savetxt(fw, fixedReport, fmt=format_out, delimiter="\t")
def main(filename, output_folder):
    print("FILENAME", filename)
    templateSummary = "%d/clustering/summary.txt"
    allFolders = os.listdir(".")
    numberOfEpochs = len([
        epoch for epoch in allFolders
        if epoch.isdigit() and os.path.isfile(templateSummary % int(epoch))
    ])

    clustering = utilities.loadtxtfile(templateSummary % (numberOfEpochs - 1))
    clustersThres = list(set(clustering[:, 4]))
    clustersThres.sort(reverse=True)
    spawningPerThres = np.zeros((numberOfEpochs, len(clustersThres)))
    for i in range(numberOfEpochs):
        clustering = utilities.loadtxtfile(templateSummary % i)
        for j, threshold in enumerate(clustersThres):
            spawningPerThres[i, j] = clustering[clustering[:, 4] == threshold,
                                                2].sum()
    line_objects = plt.plot(spawningPerThres)
    plt.legend(line_objects,
               tuple(["Cluster size %d" % x for x in clustersThres]),
               loc="best")
    plt.title("Processors spawned per epoch and cluster size")
    plt.xlabel("Epoch")
    plt.ylabel("Number of spawned processors")
    if output_folder and not os.path.exists(output_folder):
        os.makedirs(output_folder)
    if filename != "":
        plt.savefig(os.path.join(output_folder, "%s_spawning.png" % filename))
    plt.show()
Exemplo n.º 3
0
def getRepresentativePDBs(filesWildcard, run):
    files = glob.glob(filesWildcard)
    trajs = [utilities.loadtxtfile(f)[:, 1:] for f in files]
    cl = cluster.Cluster(0, "", "")
    cl.clusterCenters = utilities.loadtxtfile(cl.clusterCentersFile)
    dtrajs = cl.assignNewTrajectories(trajs)
    numClusters = cl.clusterCenters.shape[0]
    centersInfo = getCentersInfo(cl.clusterCenters, trajs, files, dtrajs)

    if not os.path.exists("representative_structures"):
        os.makedirs("representative_structures")
    with open("representative_structures/representative_structures_%d.dat" % run, "w") as fw:
        fw.write("Cluster\tEpoch\tTrajectory\tSnapshot\n")
        for clNum in range(numClusters):
            fw.write("%d\t" % clNum+"\t".join(centersInfo[clNum]["structure"])+"\n")
def calculate_rmsd_traj(nativePDB, resname, symmetries, rmsdColInReport, traj, reportName, top, epoch, outputFilename, fmt_str, new_report):
    top_proc = None
    if top is not None:
        top_proc = utilities.getTopologyFile(top)
    rmsds = utilities.getRMSD(traj, nativePDB, resname, symmetries, topology=top_proc)

    if new_report:
        fixedReport = np.zeros((rmsds.size, 2))
        fixedReport[:, 0] = range(rmsds.size)
        fixedReport[:, 1] = rmsds
        header = ""
    else:
        with open(reportName) as f:
            header = f.readline().rstrip()
            if not header.startswith("#"):
                header = ""
            reportFile = utilities.loadtxtfile(reportName)
        if rmsdColInReport > 0 and rmsdColInReport < reportFile.shape[1]:
            reportFile[:, rmsdColInReport] = rmsds
            fixedReport = reportFile
        else:
            fixedReport = analysis_utils.extendReportWithRmsd(reportFile, rmsds)

    with open(outputFilename, "w") as fw:
        if header:
            fw.write("%s\tRMSD\n" % header)
        else:
            fw.write("# Step\tRMSD\n")
        np.savetxt(fw, fixedReport, fmt=fmt_str)
def get_centers_info(trajectoryFolder, trajectoryBasename, num_clusters,
                     clusterCenters):
    centersInfo = {
        x: {
            "structure": None,
            "minDist": 1e6,
            "center": None
        }
        for x in range(num_clusters)
    }

    trajFiles = glob.glob(os.path.join(trajectoryFolder, trajectoryBasename))
    for traj in trajFiles:
        _, epoch, iTraj = os.path.splitext(traj)[0].split("_", 3)
        trajCoords = utilities.loadtxtfile(traj)
        if len(trajCoords.shape) < 2:
            trajCoords = [trajCoords]
        for snapshot in trajCoords:
            nSnap = snapshot[0]
            snapshotCoords = snapshot[1:]
            dist = np.sqrt(np.sum((clusterCenters - snapshotCoords)**2,
                                  axis=1))
            for clusterInd in range(num_clusters):
                if dist[clusterInd] < centersInfo[clusterInd]['minDist']:
                    centersInfo[clusterInd]['minDist'] = dist[clusterInd]
                    centersInfo[clusterInd]['structure'] = (epoch, int(iTraj),
                                                            nSnap)
                    centersInfo[clusterInd]['center'] = snapshotCoords
    return centersInfo
Exemplo n.º 6
0
def main(epoch_num, trajectory, snapshot_num, resname, clustering_object,
         topology):
    calc = RMSDCalculator.RMSDCalculator()
    clustering_object = utilities.readClusteringObject(clustering_object)
    n_clusters = utilities.loadtxtfile(
        os.path.join(str(max(0, epoch_num - 1)), "clustering",
                     "summary.txt")).shape[0]
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology_contents = None
    filename = glob.glob(
        os.path.join(str(epoch_num), "*traj*_%d.*" % trajectory))
    if not filename:
        raise ValueError(
            "No file with the specified epoch and trajectory found")
    try:
        snapshots = utilities.getSnapshots(filename[0],
                                           topology=topology)[snapshot_num]
    except IndexError:
        raise IndexError(
            "Snapshot number %d not found in trajectory %d for epoch %d, please check that the arguments provided are correct"
            % (snapshot_num, trajectory, epoch_num))
    pdb = atomset.PDB()
    pdb.initialise(snapshots, resname=resname, topology=topology_contents)
    for i, cluster in enumerate(clustering_object[:n_clusters]):
        dist = calc.computeRMSD(pdb, cluster.pdb)
        if dist < cluster.threshold:
            print("Snapshot belongs to cluster", i)
            return
    print("Snapshot not assigned to any cluster! :(")
def process_file(traj, top_file, resname, report, outputFilename, format_out,
                 new_report, epoch):
    start = time.time()
    sasa_values = calculateSASA(traj, top_file, resname)
    header = ""
    if not new_report:
        try:
            reportFilename = glob.glob(report)[0]
        except IndexError:
            raise IndexError("File %s not found" % report)
        if outputFilename != reportFilename:
            reportFilename = outputFilename

        with open(reportFilename) as f:
            header = f.readline().rstrip()
            if not header.startswith("#"):
                header = ""
            reportFile = utilities.loadtxtfile(f)

        fixedReport = analysis_utils.extendReportWithRmsd(
            reportFile, sasa_values)
    else:
        indexes = np.array(range(sasa_values.shape[0]))
        fixedReport = np.concatenate((indexes[:, None], sasa_values[:, None]),
                                     axis=1)

    with open(outputFilename, "w") as fw:
        if header:
            fw.write("%s\tSASA\n" % header)
        else:
            fw.write("# Step\tSASA\n")
        np.savetxt(fw, fixedReport, fmt=format_out, delimiter="\t")
    end = time.time()
    print("Took %.2fs to process" % (end - start), traj)
Exemplo n.º 8
0
def main(path_files, path_to_save):
    files = glob.glob(os.path.join(path_files, "traj_*.dat"))
    Y = []
    for f in files:
        Y.append(utilities.loadtxtfile(f))

    Y_stack = np.vstack(Y)
    xall = Y_stack[:, 1]
    yall = Y_stack[:, 2]
    zall = Y_stack[:, 3]

    plt.figure(figsize=(8, 5))
    mplt.plot_free_energy(xall, yall, cmap="Spectral")
    plt.xlabel("x")
    plt.ylabel("y")
    if path_to_save is not None:
        plt.savefig(os.path.join(path_to_save, "x-y_decomposition.png"))

    plt.figure(figsize=(8, 5))
    mplt.plot_free_energy(xall, zall, cmap="Spectral")
    plt.xlabel("x")
    plt.ylabel("z")
    if path_to_save is not None:
        plt.savefig(os.path.join(path_to_save, "x-z_decomposition.png"))

    plt.figure(figsize=(8, 5))
    mplt.plot_free_energy(yall, zall, cmap="Spectral")
    plt.xlabel("y")
    plt.ylabel("z")
    if path_to_save is not None:
        plt.savefig(os.path.join(path_to_save, "y-z_decomposition.png"))
    plt.show()
def calculateAutoCorrelation(lagtimes, dtrajs, nclusters, nLags):
    C = np.zeros((nclusters, nLags))
    Ci = np.zeros((nclusters, nLags))
    Cf = np.zeros((nclusters, nLags))
    autoCorr = np.zeros((nclusters, nLags))
    N = 0
    M = np.zeros(nLags)
    for trajectory in dtrajs:
        traj = utilities.loadtxtfile(trajectory, dtype=int)
        Nt = traj.size
        N += Nt
        for il, lagtime in enumerate(lagtimes):
            M[il] += Nt - lagtime
            for i in range(Nt - lagtime):
                autoCorr[traj[i], il] += (traj[i] == traj[i + lagtime])
                C[traj[i], il] += 1
                Ci[traj[i], il] += 1
                if i > lagtime:
                    Cf[traj[i], il] += 1
            for j in range(Nt - lagtime, Nt):
                C[traj[j], il] += 1
                Cf[traj[j], il] += 1

    mean = C / float(N)
    var = (N * C - (C**2)) / float(N * (N - 1))
    autoCorr += M * mean**2 - (Ci + Cf) * mean
    autoCorr /= N
    autoCorr /= var
    return autoCorr
def process_file(traj, top_file, resname, report, outputFilename, format_out,
                 new_report, epoch):
    sasa_values = calculateSASA(traj, top_file, resname)
    header = ""
    if not new_report:
        try:
            reportFilename = glob.glob(report)[0]
        except IndexError:
            raise IndexError("File %s not found" % report)

        with open(reportFilename) as f:
            header = f.readline().rstrip()
            if not header.startswith("#"):
                header = ""
            reportFile = utilities.loadtxtfile(f)

        fixedReport = correctRMSD.extendReportWithRmsd(reportFile, sasa_values)
    else:
        fixedReport = sasa_values

    with open(outputFilename, "w") as fw:
        if header:
            fw.write("%s\tSASA\n" % header)
        else:
            fw.write("# SASA\n")
        np.savetxt(fw, fixedReport, fmt=format_out, delimiter="\t")
Exemplo n.º 11
0
def getSASAvalues(representative_file, sasa_col, path_to_report):
    clusters_info = np.loadtxt(representative_file, skiprows=1, dtype=int)
    extract_info = getR.getExtractInfo(clusters_info)
    sasa = [0 for _ in clusters_info]
    for trajFile, extraInfo in extract_info.items():
        report_filename = glob.glob(
            os.path.join(path_to_report, "%d", "report*_%d") % trajFile)[0]
        report = utilities.loadtxtfile(report_filename)
        for pair in extraInfo:
            sasa[pair[0]] = report[pair[1], sasa_col]
    return sasa
Exemplo n.º 12
0
def copyWorkingTrajectories(fileWildcard,
                            length=None,
                            ntrajs=None,
                            bootstrap=True,
                            skipFirstSteps=0):
    """
        Function that copies trajectories that match "fileWildcard" into the current directory.
        It may copy a subset and a part of them (length)

        Warning! If not using bootstrap, it uses all the trajectories

        :param fileWildcard: Wildcard to match original files
        :type fileWildcard: str
        :param length: Trajectory length to consider, if None (default value), the full trajectory will be considered
        :type length: int
        :param ntrajs: Number of trajs to consider. If None (default value), a number equal to the total  will be considered
        :type ntrajs: int
        :param bootstrap: Bootstrap ntrajs from the original (default is True)
        :type bootstrap: bool
        :param skipFirstSteps: Skip first trajectory steps (default value is 0)
        :type skipFirstSteps: int

        :returns: list -- writenFiles, in order to ease a posterior cleanup

    """
    allFiles = glob.glob(fileWildcard)

    if bootstrap is False:
        trajFiles = allFiles
    else:
        if ntrajs is None:
            ntrajs = len(allFiles)
        trajFiles = np.random.choice(allFiles, ntrajs)

    writenFiles = []
    for i, trajFile in enumerate(trajFiles):
        dst = __getDstName(bootstrap, i, trajFile)
        writenFiles.append(dst)
        traj = utilities.loadtxtfile(trajFile)
        if length is None:
            traj_len = len(traj)  # so that later eveything is copied
        else:
            traj_len = length
        try:
            trimmedTraj = traj[skipFirstSteps:traj_len + 1, :]
            if len(trimmedTraj) > 0:
                np.savetxt(dst, trimmedTraj, fmt=b"%.4f", delimiter="\t")
        except:
            sys.exit("There is a problem with %s" % trajFile)
    print("Boostraping trajectories", ntrajs, len(trajFiles),
          len(set(trajFiles)))
    # trajFiles.sort()
    # print(trajFiles)
    return writenFiles
Exemplo n.º 13
0
def main(lagtime,
         clusters_file,
         disctraj,
         trajs,
         n_clusters,
         plots_path,
         save_plot,
         show_plot,
         lagtime_resolution=20):
    lagtimes = list(range(1, lagtime, lagtime_resolution))
    n_lags = len(lagtimes)
    if disctraj is None:
        clusteringObject = cluster.Cluster(n_clusters,
                                           trajs,
                                           "traj*",
                                           alwaysCluster=False)
        if clusters_file is not None:
            # only assign
            utilities.makeFolder(clusteringObject.discretizedFolder)
            clusteringObject.clusterCentersFile = clusters_file
        clusteringObject.clusterTrajectories()
        disctraj = clusteringObject.discretizedFolder
        clusterCenters = clusteringObject.clusterCenters
    else:
        clusterCenters = utilities.loadtxtfile(clusters_file)
    if len(clusterCenters) != n_clusters:
        raise ValueError(
            "Number of clusters specified in the -n parameter does not match the provided clusters"
        )
    print("Calculating autocorrelation...")
    dtrajs = glob.glob(os.path.join(disctraj, "traj*"))
    dtrajs_loaded = [
        utilities.loadtxtfile(dtraj, dtype=int) for dtraj in dtrajs
    ]

    autoCorr = utils.calculateAutoCorrelation(lagtimes, dtrajs_loaded,
                                              n_clusters, n_lags)
    np.save("autoCorr.npy", autoCorr)
    # __cleanupFiles(parameters.trajWildcard, False)

    utilities.write_PDB_clusters(np.vstack(
        (clusterCenters.T, np.abs(autoCorr[:, -1]))).T,
                                 use_beta=True,
                                 title="cluster_autoCorr.pdb")
    print("Clusters over correlation time limit")
    correlation_limit = np.exp(-1)
    states2 = np.where(autoCorr[:, -1] > correlation_limit)[0]
    size2 = states2.size
    if len(states2):
        print(" ".join(map(str, states2)))
    print("Number of clusters:", size2,
          ", %.2f%% of the total" % (100 * size2 / float(n_clusters)))
    print("Clusters with more than 0.1 autocorrelation")
    states1 = np.where(autoCorr[:, -1] > 0.1)[0]
    size1 = states1.size
    if len(states1):
        print(" ".join(map(str, states1)))
    print("Number of clusters:", size1,
          ", %.2f%% of the total" % (100 * size1 / float(n_clusters)))
    if size2 > 0:
        print("Correlation time not achieved at lagtime %d" % lagtime)
    else:
        for i in range(len(lagtimes)):
            states = np.where(autoCorr[:, -i - 1] > correlation_limit)[0]
            if len(states):
                string_states = ", ".join(map(str, states))
                print("Correlation time %d, for states: %s" %
                      (lagtimes[-i], string_states))
                break

    if plots_path is None:
        plots_path = ""
    else:
        utilities.makeFolder(plots_path)
    create_plots(autoCorr,
                 plots_path,
                 save_plot,
                 show_plot,
                 n_clusters,
                 lagtimes,
                 threshold=2.0)
Exemplo n.º 14
0
def createPlot(reportName,
               column1,
               column2,
               stepsPerRun,
               printWithLines,
               paletteModifier,
               trajs_range=None,
               label_x=None,
               label_y=None,
               label_colorbar=None,
               fig_size=(6, 6),
               simulation_path=".",
               skip_first_step=False,
               skip_steps=None,
               y_top=None,
               y_bottom=None,
               x_left=None,
               x_right=None):
    """
        Generate a string to be passed to gnuplot

        :param reportName: Name of the files containing the simulation data
        :type reportName: str
        :param column1: Column to plot in the X axis
        :type column1: int
        :param column2: Column to plot in the Y axis
        :type column2: int
        :param stepsPerRun: Number of steps per epoch,
        :type stepsPerRun: int
        :param paletteModifier: Whether to use the epoch as color or a column
        :type paletteModifier: int
        :param trajs_range: Range of trajectories to plot
        :type trajs_range: str
        :param label_x: Label of the x-axis
        :type label_x: str
        :param label_y: Label of the y-axis
        :type label_y: str
        :param label_colorbar: Label of the colorbar
        :type label_colorbar: str
        :param fig_size: Size of the plot figure (default (6in, 6in))
        :type fig_size: tuple
        :param simulation_path: Path to the simulation data
        :type simulation_path: str
        :param skip_first_step: Whether to avoid plotting the first point in each report
        :type skip_first_step: bool
        :param skip_steps: Number of steps to skip in the plot
        :type skip_steps: int
        :param y_bottom: Bottom limit of the y axis
        :type y_bottom: float
        :param y_top: Top limit of the y axis
        :type y_top: float
        :param x_left: Left limit of the x axis
        :type x_bottom: float
        :param x_right: Right limit of the x axis
        :type x_right: float
    """
    epochs = utilities.get_epoch_folders(simulation_path)
    numberOfEpochs = int(len(epochs))
    if numberOfEpochs == 0:
        raise ValueError("No simulation found in specified path ",
                         os.path.abspath(simulation_path))
    cmap_name = "viridis"

    dictionary = {
        'reportName': reportName,
        'col2': column2,
        'numberOfEpochs': numberOfEpochs,
        'col1': column1,
        'withLines': printWithLines,
        'color': paletteModifier
    }
    annotations = []
    artists = []
    trajectory_range = set()
    if trajs_range is not None:
        start, end = map(int, trajs_range.split(":"))
        trajectory_range = set(range(start, end + 1))
    cmin = 1e10
    cmax = -1e10
    data_dict = {}
    max_report = 0
    min_report = 1e10
    for epoch in epochs:
        ep = int(epoch)
        reports = utilities.getReportList(
            os.path.join(simulation_path, epoch, reportName + "*"))
        if not reports:
            raise ValueError(
                "Could not find any reports with the given name!!")
        for report in reports:
            report_num = utilities.getReportNum(report)
            max_report = max(max_report, report_num)
            min_report = min(min_report, report_num)
            if trajs_range is not None and report_num not in trajectory_range:
                continue
            data = utilities.loadtxtfile(report)
            if skip_steps is not None:
                if data.shape[0] <= skip_steps:
                    continue
                data = data[skip_steps:]
            elif skip_first_step:
                data = data[1:]
            if paletteModifier is not None and paletteModifier != -1:
                cmin = min(cmin, data[:, paletteModifier].min())
                cmax = max(cmax, data[:, paletteModifier].max())
            data_dict[(ep, report_num)] = data
    fig, ax = plt.subplots(figsize=fig_size)
    ticks = None
    if paletteModifier == -1:
        cmin = min_report
        cmax = max_report
    if paletteModifier is None:
        cmin = int(epochs[0])
        cmax = int(epochs[-1])
        ticks = range(cmin, cmax + 1)
    sm = plt.cm.ScalarMappable(cmap=plt.get_cmap(cmap_name),
                               norm=plt.Normalize(vmin=cmin, vmax=cmax))
    sm.set_array([])
    dictionary['cmap'] = sm
    if paletteModifier != -1:
        cbar = plt.colorbar(sm, ticks=ticks)
        cbar.ax.zorder = -1
    offset = 0
    if skip_steps is not None:
        offset = skip_steps
    elif skip_first_step:
        # if we skipt the first step there is a point that is not shown but we
        # should count either way
        offset = 1
    for el in data_dict:
        addLine(data_dict[el], el[1], el[0], stepsPerRun, dictionary, artists)
        annotations.append([
            "Epoch: %d\nTrajectory: %d\nModel: %d" %
            (el[0], el[1], i + 1 + offset) for i in range(len(data_dict[el]))
        ])
    if label_x is not None:
        plt.xlabel(label_x)
    if label_y is not None:
        plt.ylabel(label_y)
        if paletteModifier is None:
            cbar.set_label("Epoch")
        if label_colorbar is not None:
            cbar.set_label(label_colorbar)
    ax.set_ylim(bottom=y_bottom, top=y_top)
    ax.set_xlim(left=x_left, right=x_right)

    annot = ax.annotate("",
                        xy=(0, 0),
                        xytext=(20, 20),
                        textcoords="offset points",
                        bbox=dict(boxstyle="round", fc="w"),
                        arrowprops=dict(arrowstyle="->"))
    annot.set_visible(False)

    def modify_color(color):
        color_offset = 0.5
        color = list(color)
        for i in range(3):
            color[i] = min(color[i] + color_offset, 1)
        return tuple(color)

    def update_annot(ind, color, pos, index):
        """Update the information box of the selected point"""
        annot.xy = pos
        annot.set_text(annotations[index][int(ind["ind"][0])])
        annot.get_bbox_patch().set_facecolor(modify_color(color))
        annot.get_bbox_patch().set_alpha(0.8)
        annot.zorder = 10

    def locate_event(event):
        for j, el in enumerate(artists):
            found, info = el.contains(event)
            if found:
                return j, found, info, el
        return 0, False, None, None

    def extract_data(obj_plot, ind):
        try:
            x, y = obj_plot.get_data()
            x = x[ind["ind"][0]]
            y = y[ind["ind"][0]]
            return (x, y)
        except AttributeError:
            return obj_plot.get_offsets()[ind["ind"][0]]

    def extract_color(obj_plot, ind):
        try:
            return obj_plot.get_markerfacecolor()
        except AttributeError:
            return obj_plot.get_facecolor()[ind["ind"][0]]

    def hover(event):
        """Action to perform when hovering the mouse on a point"""
        vis = annot.get_visible()
        if event.inaxes == ax:
            index, cont, ind, obj = locate_event(event)
            if cont:
                update_annot(ind, extract_color(obj, ind),
                             extract_data(obj, ind), index)
                annot.set_visible(True)
                fig.canvas.draw_idle()
            else:
                if vis:
                    annot.set_visible(False)
                    fig.canvas.draw_idle()

    # Respond to mouse motion
    fig.canvas.mpl_connect("motion_notify_event", hover)
def extractInfo(inputFile):
    clusterInfo = utilities.loadtxtfile(inputFile)
    return clusterInfo[:, 1]
def main(num_clusters,
         criteria1,
         criteria2,
         ligand_resname,
         output_folder="ClusterCentroids",
         atom_ids="",
         cpus=2,
         topology=None,
         report="report_",
         traj="trajectory_",
         use_pdb=False,
         png=False,
         CA=0,
         sidechains=0,
         restart="all"):
    # Create multiprocess pool
    if cpus > 1:
        pool = mp.Pool(cpus)
    else:
        pool = mp.Pool(1)
    # Extract COM ligand for each snapshot
    if not glob.glob("allTrajs/traj*"):
        extractCoords.main(lig_resname=ligand_resname,
                           non_Repeat=True,
                           atom_Ids=atom_ids,
                           nProcessors=cpus,
                           parallelize=True,
                           topology=topology,
                           protein_CA=CA,
                           sidechains=sidechains)

    print("Clusterize trajectories by RMSD of COM")
    trajectoryFolder = "allTrajs"
    trajectoryBasename = "*traj*"
    stride = 1
    clusterCountsThreshold = 0
    folders = utilities.get_epoch_folders(".")
    folders.sort(key=int)
    if not restart:

        clusteringObject = cluster.Cluster(num_clusters,
                                           trajectoryFolder,
                                           trajectoryBasename,
                                           alwaysCluster=True,
                                           stride=stride)
        clusteringObject.clusterTrajectories()
        clusteringObject.eliminateLowPopulatedClusters(clusterCountsThreshold)
        clusterCenters = clusteringObject.clusterCenters
        np.savetxt("clustercenters.dat", clusterCenters)
        dtrajs = clusteringObject.dtrajs

        print("Extract metrics for each snapshot")
        min_metric_trajs = {}
        epochs = [folder for folder in glob.glob("./*/") if folder.isdigit()]
        reports = simulationToCsv.gather_reports()
        fields = simulationToCsv.retrieve_fields(reports[0])
        df = simulationToCsv.init_df(fields)
        df = simulationToCsv.fill_data(reports, df, pool)

        print("Update data with metrics and clusters")
        df.index = range(df.shape[0])
        df["Cluster"] = [None] * df.shape[0]
        input_list = [[
            df, Traj, d
        ] for d, Traj in zip(dtrajs, clusteringObject.trajFilenames)]
        results = pool.map(save_to_df, input_list)
        for data in results:
            for df_tmp in data:
                df.update(df_tmp)
        df.to_csv("Simulation.csv", index=False)
    if restart:
        df = pd.read_csv("Simulation.csv")
        clusterCenters = utilities.loadtxtfile("clustercenters.dat")
        print(clusterCenters)
    centersInfo = get_centers_info(trajectoryFolder, trajectoryBasename,
                                   num_clusters, clusterCenters)
    COMArray = [centersInfo[i]['center'] for i in range(num_clusters)]

    print("Retrieve clusters and metric")
    fields1 = []
    fields2 = []
    print(centersInfo)
    for cluster_num in centersInfo:
        epoch_num, traj_num, snap_num = map(
            int, centersInfo[cluster_num]['structure'])
        field1, crit1_name = get_metric(criteria1, epoch_num, traj_num,
                                        snap_num, report)
        field2, crit2_name = get_metric(criteria2, epoch_num, traj_num,
                                        snap_num, report)
        fields1.append(field1)
        fields2.append(field2)

    if output_folder is not None:
        outputFolder = os.path.join(output_folder, "")
        if not os.path.exists(outputFolder):
            os.makedirs(outputFolder)
    else:
        outputFolder = ""
    print("Output structures")
    writePDB(
        COMArray,
        outputFolder + "clusters_%d_KMeans_allSnapshots.pdb" % num_clusters)
    writeInitialStructures(fields1,
                           fields2,
                           crit1_name,
                           crit2_name,
                           centersInfo,
                           outputFolder + "cluster_{}_{}_{}_{}_{}.pdb",
                           traj,
                           topology=topology,
                           use_pdb=use_pdb)
    plotClusters(fields1,
                 fields2,
                 crit1_name,
                 crit2_name,
                 outputFolder,
                 png=png)
    assesClusterConvergence(df, num_clusters, traj, topology)
    return
Exemplo n.º 17
0
import glob
import numpy as np
from AdaptivePELE.utilities import utilities
import matplotlib.pyplot as plt
plt.style.use("ggplot")

files = glob.glob("dihedrals/allTrajs/traj_0_*")
colors = ["r", "k", "b", "g", "y", "c", "m", "olive", "p"]
n = 9
for j, f in enumerate(files):
    plt.figure()
    data = np.rad2deg(utilities.loadtxtfile(f)[:, 1:])
    # terminal residues lack one of the dihedrals
    for i in range(n - 2):
        plt.scatter(data[:, i + n - 1],
                    data[:, i + 1],
                    color=colors[i],
                    marker=".",
                    label="Residue %d" % (i + 2))
    plt.xlim(-180, 180)
    plt.ylim(-180, 180)
    plt.xlabel(r'$\Phi$ Angle (degrees)')
    plt.ylabel(r'$\Psi$ Angle (degrees)')
    plt.title(f)
    plt.legend()
    plt.show()