Exemplo n.º 1
0
def main(ligand, clusters_file, conf_folder, topology=None):
    trajFolder = "allTrajs_nonRepeat"
    cluster_centers = np.loadtxt(clusters_file)
    if not os.path.exists("discretized"):
        os.makedirs("discretized")
    if not os.path.exists(trajFolder):
        os.makedirs(trajFolder)
    stride = 1
    clusterCountsThreshold = 0
    trajBasename = "coord*"
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology_contents = None
    epoch_folders = utilities.get_epoch_folders(conf_folder)
    numClusters = cluster_centers.shape[0]
    coordinates = [[] for cl in range(numClusters)]
    for it in epoch_folders:
        files = glob.glob(conf_folder + "%s/extractedCoordinates/coord*" % it)
        for f in files:
            traj = os.path.splitext(f)[0].split("_")[-1]
            shutil.copy(f, trajFolder + "/coord_%s_%s.dat" % (it, traj))
    clusteringObject = cluster.Cluster(numClusters,
                                       trajFolder,
                                       trajBasename,
                                       alwaysCluster=False,
                                       stride=stride)
    clusteringObject.clusterTrajectories()
    clusteringObject.eliminateLowPopulatedClusters(clusterCountsThreshold)
    for i in range(numClusters):
        if not os.path.exists("cluster_%d" % i):
            os.makedirs("cluster_%d/allStructures" % i)
    dtrajs_files = glob.glob("discretized/*.disctraj")
    for dtraj in dtrajs_files:
        print(dtraj)
        traj = np.loadtxt(dtraj)
        epoch, traj_num = map(int,
                              os.path.splitext(dtraj)[0].split("_", 3)[1:])
        trajPositions = np.loadtxt(trajFolder + "/coord_%d_%d.dat" %
                                   (epoch, traj_num))
        trajFile = glob.glob(
            os.path.join(conf_folder + "%d/trajectory_%d*" %
                         (epoch, traj_num)))[0]
        snapshots = utilities.getSnapshots(trajFile, topology=topology)
        for nSnap, cluster_num in enumerate(traj):
            coordinates[int(cluster_num)].append(trajPositions[nSnap])
            filename = "cluster_%d/allStructures/conf_%d_%d_%d.pdb" % (
                cluster_num, epoch, traj_num, nSnap)
            if isinstance(snapshots[nSnap], basestring):
                with open(filename, "w") as fw:
                    fw.write(snapshots[nSnap])
            else:
                utilities.write_mdtraj_object_PDB(snapshots[nSnap], filename,
                                                  topology_contents)
    for cl in range(numClusters):
        np.savetxt("cluster_%d/positions.dat" % cl, coordinates[cl])
Exemplo n.º 2
0
def main(representatives_files, path_structures, output="", clusters=None, trajNames="trajectory", topology=None):
    if clusters is None:
        clusters = ['a']
    # Load the representative structures file
    try:
        clusters_info = np.loadtxt(representatives_files, skiprows=1, dtype=int)
    except IOError:
        raise IOError("Couldn't find a representative file in %s, please check that the path is correct" % representatives_files)
    # Organize to minimise pdb loading
    if clusters != ['a']:
        clusters_info = clusters_info[list(map(int, clusters))]

    extract_info = getExtractInfo(clusters_info)

    # Write appropiate pdbs
    destFolder = output
    if not output:
        destFolder, _ = os.path.split(representatives_files)
        destFolder = os.path.join(destFolder, "representative_structures_pdbs")

    if not os.path.exists(destFolder):
        os.makedirs(destFolder)
    else:
        destFolder += "_%d"
        it = 1
        while os.path.exists(destFolder % it):
            it += 1
        destFolder %= it
        os.makedirs(destFolder)
    structureFolder = os.path.join(path_structures, "%d", trajNames+"_%d.*")

    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology_contents = None

    for trajFile, extraInfo in extract_info.items():
        try:
            pdbFile = glob.glob(structureFolder % trajFile)[0]
        except IndexError:
            raise ValueError("Structure %s not found" % (structureFolder % trajFile))
        try:
            snapshots = utilities.getSnapshots(pdbFile, topology=topology)
        except IOError:
            raise IOError("Unable to open %s, please check that the path to structures provided is correct" % pdbFile)
        for pair in extraInfo:
            if topology_contents is None:
                with open(os.path.join(destFolder, "cluster_%d.pdb" % pair[0]), "w") as fw:
                    fw.write(snapshots[pair[1]])
                    fw.write("\n")
            else:
                utilities.write_mdtraj_object_PDB(snapshots[pair[1]], os.path.join(destFolder, "cluster_%d.pdb" % pair[0]), topology=topology_contents)
Exemplo n.º 3
0
def writeInitialStructures(centers_info, filename_template, topology=None):
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    for cluster_num in centers_info:
        epoch_num, traj_num, snap_num = map(
            int, centers_info[cluster_num]['structure'])
        trajectory = glob.glob("%d/trajectory_%d*" % (epoch_num, traj_num))[0]
        snapshots = utilities.getSnapshots(trajectory, topology=topology)
        if isinstance(snapshots[0], basestring):
            with open(filename_template % cluster_num, "w") as fw:
                fw.write(snapshots[snap_num])
        else:
            utilities.write_mdtraj_object_PDB(snapshots[snap_num],
                                              filename_template % cluster_num,
                                              topology_contents)
Exemplo n.º 4
0
def main(n_clusters,
         output_folder,
         SASAColumn,
         norm_energy,
         num_bins,
         percentile,
         plots,
         atom_Ids,
         folder_name,
         traj_basename,
         cluster_energy,
         topology=None):
    energyColumn = 3

    if output_folder is not None:
        outputFolder = os.path.join(output_folder, "")
        if not os.path.exists(outputFolder):
            os.makedirs(outputFolder)
    else:
        outputFolder = ""

    extractCoords.main(folder_name,
                       lig_resname=ligand_resname,
                       non_Repeat=True,
                       atom_Ids=atom_Ids)

    epochFolders = utilities.get_epoch_folders(folder_name)
    points = []
    for epoch in epochFolders:
        report_files = glob.glob(os.path.join(epoch, "*report*"))
        report_files.sort(key=lambda x: int(x[x.rfind("_") + 1:]))
        for report_name in report_files:
            traj_num = int(report_name[report_name.rfind("_") + 1:])
            coordinates = np.loadtxt(
                os.path.join(
                    folder_name, "%s/extractedCoordinates/coord_%d.dat" %
                    (epoch, traj_num)))
            report = np.loadtxt(report_name)
            if len(report.shape) < 2:
                points.append([
                    report[energyColumn], report[SASAColumn],
                    int(epoch), traj_num, 0
                ] + coordinates[1:].tolist())
            else:
                epoch_line = np.array([int(epoch)] * report.shape[0])
                traj_line = np.array([traj_num] * report.shape[0])
                snapshot_line = np.array(range(report.shape[0]))
                points.extend(
                    np.hstack(
                        (report[:, (energyColumn, SASAColumn)],
                         epoch_line[:, np.newaxis], traj_line[:, np.newaxis],
                         snapshot_line[:, np.newaxis], coordinates[:, 1:])))
    points = np.array(points)
    points = points[points[:, 1].argsort()]
    minSASA = points[0, 1]
    maxSASA = points[-1, 1]
    left_bins = np.linspace(minSASA, maxSASA, num=num_bins, endpoint=False)
    indices = np.searchsorted(points[:, 1], left_bins)
    thresholds = np.array([
        np.percentile(points[i:j, 0], percentile)
        for i, j in zip(indices[:-1], indices[1:])
    ])

    new_points = []
    occupation = []
    for ij, (i, j) in enumerate(zip(indices[:-1], indices[1:])):
        found = np.where(points[i:j, 0] < thresholds[ij])[0]
        occupation.append(len(found))
        if len(found) == 1:
            new_points.append(points[found + i])
        elif len(found) > 1:
            new_points.extend(points[found + i])

    points = np.array(new_points)
    if norm_energy:
        energyMin = points.min(axis=0)[0]
        points[:, 0] -= energyMin
        energyMax = points.max(axis=0)[0]
        points[:, 0] /= energyMax

    if cluster_energy:
        print("Clustering using energy and SASA")
        kmeans = KMeans(n_clusters=n_clusters).fit(points[:, :2])
        title = "clusters_%d_energy_SASA.pdb"
    else:
        print("Clustering using ligand coordinates")
        kmeans = KMeans(n_clusters=n_clusters).fit(points[:, 5:8])
        title = "clusters_%d_energy_SASA_coords.pdb"
    centers_energy = []
    centers_coords = []
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology = None
    for i, center in enumerate(kmeans.cluster_centers_):
        if cluster_energy:
            dist = np.linalg.norm((points[:, :2] - center), axis=1)
        else:
            dist = np.linalg.norm((points[:, 5:8] - center), axis=1)
        epoch, traj, snapshot = points[dist.argmin(), 2:5]
        centers_energy.append(points[dist.argmin(), :2])
        centers_coords.append(points[dist.argmin(), 5:8])
        traj_file = glob.glob("%d/%s_%d*" % (epoch, traj_basename, traj))[0]
        conf = utilities.getSnapshots(traj_file,
                                      topology=topology)[int(snapshot)]
        if isinstance(conf, basestring):
            with open(os.path.join(outputFolder, "initial_%d.pdb" % i),
                      "w") as fw:
                fw.write(conf)
        else:
            utilities.write_mdtraj_object_PDB(
                conf, os.path.join(outputFolder, "initial_%d.pdb" % i),
                topology_contents)
    centers_energy = np.array(centers_energy)
    centers_coords = np.array(centers_coords)
    writePDB(centers_coords, os.path.join(outputFolder, title % n_clusters))
    if plots:
        plt.scatter(points[:, 1], points[:, 0], c=kmeans.labels_, alpha=0.5)
        plt.scatter(centers_energy[:, 1],
                    centers_energy[:, 0],
                    c=list(range(n_clusters)),
                    marker='x',
                    s=56,
                    zorder=1)
        plt.xlabel("SASA")
        if norm_energy:
            plt.ylabel("Energy (normalized)")
            plt.savefig(
                os.path.join(outputFolder, "clusters_energy_normalized.png"))
        else:
            plt.ylabel("Energy (kcal/mol)")
            plt.savefig(
                os.path.join(outputFolder, "clusters_no_normalized.png"))
        plt.show()
Exemplo n.º 5
0
    topology = None
filename = "conformation_data.dat"
if not os.path.exists(filename):
    raise IOError(
        "File conformation_data.dat not found, please be sure to run extract_COM_metric.py before this script"
    )
print("Selected names: ", ' '.join(names))
with open(filename) as f:
    f.readline()
    for line in f:
        line = line.strip().split()
        if line[0] not in names:
            continue
        epoch, iTraj, nSnap = line[1:4]
        report = np.loadtxt("%s/report_%s" % (epoch, iTraj))
        print(line[0], "=>",
              "epoch %s, trajectory %s, snapshot %s" % tuple(line[1:4]),
              "metric", report[int(nSnap), metricCol])
        traj_file = glob.glob("%s/%s_%s*" % (epoch, traj_name, iTraj))
        snapshots = utilities.getSnapshots(traj_file, topology=topology)
        if isinstance(snapshots[int(nSnap)], basestring):
            with open(
                    output_folder + "conf_%s_%s_%s.pdb" %
                (epoch, iTraj, nSnap), "w") as fw:
                fw.write(snapshots[int(nSnap)])
        else:
            utilities.write_mdtraj_object_PDB(
                snapshots[int(nSnap)],
                output_folder + "conf_%s_%s_%s.pdb" % (epoch, iTraj, nSnap),
                topology_contents)