Пример #1
0
def main(metricCol, lig_resname, nTrajs, filter_val, stride, atomId, saving_frequency, trajectory_name, report_name, topology=None):
    folders = utilities.get_epoch_folders(".")
    data = []
    minMetric = 1e6
    confData = []
    for epoch in folders:
        print("Processing epoch %s" % epoch)
        for iTraj in range(1, nTrajs):
            report = np.loadtxt("%s/%s_%d" % (epoch, report_name, iTraj))
            if len(report.shape) < 2:
                report = report[np.newaxis, :]
            traj_file = glob.glob("%s/%s_%d.*" % (epoch, trajectory_name, iTraj))[0]
            snapshots = utilities.getSnapshots(traj_file, topology=topology)
            for i, snapshot in enumerate(itertools.islice(snapshots, 0, None, stride)):
                report_line = i * stride * saving_frequency
                data.append(get_coords(snapshot, atomId, lig_resname) + [report[report_line, metricCol]])
                confData.append((epoch, iTraj, report_line))

    data = np.array(data)
    minInd = np.argmin(data[:, -1])
    minMetric = data[minInd, -1]
    data[:, -1] -= minMetric
    if filter_val is not None:
        data_filter = data.copy()
        data_filter[data_filter > filter_val] = filter_val
        namesPDB = utilities.write_PDB_clusters(data_filter, title="cluster_metric.pdb", use_beta=True)
    else:
        namesPDB = utilities.write_PDB_clusters(data, title="cluster_metric.pdb", use_beta=True)
    print("Min value for metric", minMetric, namesPDB[minInd])

    with open("conformation_data.dat", "w") as fw:
        fw.write("PDB name      Epoch Trajectory   Snapshot   COM x       y       z     Metric\n")
        for j, name in enumerate(namesPDB):
            info = [name.rjust(8)]+[str(x).rjust(10) for x in confData[j]]+[str(np.round(d, 3)).rjust(7) for d in data[j, :-1]] + [str(np.round(data[j, -1], 2)).rjust(10)]
            fw.write("{:s} {:s} {:s} {:s} {:s} {:s} {:s} {:s}\n".format(*tuple(info)))
def writeInitialStructures(field1,
                           field2,
                           crit1,
                           crit2,
                           centers_info,
                           filename_template,
                           traj,
                           topology=None,
                           use_pdb=False):
    for cluster_num, field1, field2 in zip(centers_info, field1, field2):
        epoch_num, traj_num, snap_num = map(
            int, centers_info[cluster_num]['structure'])
        trajectory = "{}/{}{}.xtc".format(
            epoch_num, traj, traj_num) if topology else "{}/{}{}.pdb".format(
                epoch_num, traj, traj_num)
        snapshots = utilities.getSnapshots(trajectory,
                                           topology=topology,
                                           use_pdb=use_pdb)
        filename = filename_template.format(cluster_num, crit1, field1, crit2,
                                            field2)
        if not topology:
            with open(filename, "w") as fw:
                fw.write(snapshots[snap_num])
        else:
            splitTrajectory.main("", [
                trajectory,
            ],
                                 topology, [snap_num + 1],
                                 template=filename,
                                 use_pdb=use_pdb)
Пример #3
0
def main(epoch_num, trajectory, snapshot_num, resname, clustering_object,
         topology):
    calc = RMSDCalculator.RMSDCalculator()
    clustering_object = utilities.readClusteringObject(clustering_object)
    n_clusters = utilities.loadtxtfile(
        os.path.join(str(max(0, epoch_num - 1)), "clustering",
                     "summary.txt")).shape[0]
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology_contents = None
    filename = glob.glob(
        os.path.join(str(epoch_num), "*traj*_%d.*" % trajectory))
    if not filename:
        raise ValueError(
            "No file with the specified epoch and trajectory found")
    try:
        snapshots = utilities.getSnapshots(filename[0],
                                           topology=topology)[snapshot_num]
    except IndexError:
        raise IndexError(
            "Snapshot number %d not found in trajectory %d for epoch %d, please check that the arguments provided are correct"
            % (snapshot_num, trajectory, epoch_num))
    pdb = atomset.PDB()
    pdb.initialise(snapshots, resname=resname, topology=topology_contents)
    for i, cluster in enumerate(clustering_object[:n_clusters]):
        dist = calc.computeRMSD(pdb, cluster.pdb)
        if dist < cluster.threshold:
            print("Snapshot belongs to cluster", i)
            return
    print("Snapshot not assigned to any cluster! :(")
Пример #4
0
def main(outputDir, files, topology, structs, template=None):
    found = False
    if outputDir:
        utilities.makeFolder(outputDir)
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology_contents = None
    if structs is not None:
        structs = set(structs)
    for f in files:
        name = os.path.split(f)[-1]
        templateName = os.path.join(outputDir,
                                    template) if template else os.path.join(
                                        outputDir, name[:-4] + "_%d.pdb")
        snapshots = utilities.getSnapshots(f, topology=topology)
        for i, snapshot in enumerate(snapshots):
            if structs is not None and i + 1 not in structs:
                continue
            if not isinstance(snapshot, basestring):
                PDB = atomset.PDB()
                PDB.initialise(snapshot, topology=topology_contents)
                snapshot = PDB.get_pdb_string(model_num=i + 1)
            if template:
                with open(templateName, 'w') as of:
                    of.write(snapshot)
                found = True
            else:
                with open(templateName % i, 'w') as of:
                    of.write(snapshot)
                found = True
    return found
Пример #5
0
def trajectory_and_snapshot_to_pdb(trajectory_path, snapshot, output_path,
                                   topology_contents):
    """
    Given an absolute path to a trajectory of Adaptive and a snapshot (MODEL) in xtc format, the function transform it
    into a PDB format.
    :param trajectory_path: Absolute path to a trajectory from Adaptive, in xtc format.
    :type trajectory_path:str
    :param snapshot: model of a trajectory that you want to transform.
    :type snapshot: int
    :param output_path: output path of the new pdb file.
    :type output_path: str
    :return: Creates a PDB file.
    """
    # get the path where the adaptive simulation resides
    topology_path_splited = trajectory_path.split(os.sep)
    epoch = int(topology_path_splited[-2])
    traj = adapt_tools.getTrajNum(topology_path_splited[-1])
    trajectory = adapt_tools.getSnapshots(trajectory_path)
    try:
        single_model = trajectory[snapshot]
        PDB = atomset.PDB()
        PDB.initialise(single_model,
                       topology=topology_contents.getTopology(epoch, traj))
    except IndexError:
        exit(
            "You are selecting the model {} for a trajectory that has {} models, please, reselect the model index "
            "(starting from 0).".format(snapshot, len(trajectory)))
    with open(output_path, "w") as fw:
        fw.write("MODEL     %4d\n" % (snapshot + 1))
        fw.write(PDB.pdb)
        fw.write("ENDMDL\n")
        fw.write("END\n")
def main(trajectory, snapshot, epoch, outputPath, out_filename, topology):
    if outputPath is not None:
        outputPath = os.path.join(outputPath, "")
        if not os.path.exists(outputPath):
            os.makedirs(outputPath)
    else:
        outputPath = ""
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology_contents = None
    if os.path.exists(outputPath + out_filename):
        # If the specified name exists, append a number to distinguish the files
        name, ext = os.path.splitext(out_filename)
        out_filename = "".join([name, "_%d", ext])
        i = 1
        while os.path.exists(outputPath + out_filename % i):
            i += 1
        out_filename %= i
    pathway = []
    # Strip out trailing backslash if present
    pathPrefix, epoch = os.path.split(epoch.rstrip("/"))
    sys.stderr.write("Creating pathway...\n")
    while True:
        filename = glob.glob(
            os.path.join(pathPrefix, epoch, "*traj*_%d.*" % trajectory))
        if not filename:
            raise ValueError(
                "Trajectory %s not found!" %
                os.path.join(pathPrefix, epoch, "*traj*_%d.*" % trajectory))
        snapshots = utilities.getSnapshots(filename[0])
        if epoch == '0':
            initial = 0
        else:
            # avoid repeating the initial snapshot
            initial = 1
        if not isinstance(snapshots[0], basestring):
            new_snapshots = []
            for i in range(initial, snapshot + 1):
                PDB = atomset.PDB()
                PDB.initialise(snapshots[i], topology=topology_contents)
                new_snapshots.append(PDB.pdb)
            snapshots = new_snapshots
        else:
            snapshots = snapshots[initial:snapshot + 1]
        pathway.insert(0, snapshots)
        if epoch == '0':
            # Once we get to epoch 0, we just need to append the trajectory
            # where the cluster was found and we can break out of the loop
            break
        procMapping = open(
            os.path.join(pathPrefix, epoch,
                         "processorMapping.txt")).read().rstrip().split(':')
        epoch, trajectory, snapshot = map(
            int, procMapping[trajectory - 1][1:-1].split(','))
        epoch = str(epoch)
    sys.stderr.write("Writing pathway...\n")
    with open(outputPath + out_filename, "a") as f:
        f.write("ENDMDL\n".join(itertools.chain.from_iterable(pathway)))
Пример #7
0
def main(ligand, clusters_file, conf_folder, topology=None):
    trajFolder = "allTrajs_nonRepeat"
    cluster_centers = np.loadtxt(clusters_file)
    if not os.path.exists("discretized"):
        os.makedirs("discretized")
    if not os.path.exists(trajFolder):
        os.makedirs(trajFolder)
    stride = 1
    clusterCountsThreshold = 0
    trajBasename = "coord*"
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology_contents = None
    epoch_folders = utilities.get_epoch_folders(conf_folder)
    numClusters = cluster_centers.shape[0]
    coordinates = [[] for cl in range(numClusters)]
    for it in epoch_folders:
        files = glob.glob(conf_folder + "%s/extractedCoordinates/coord*" % it)
        for f in files:
            traj = os.path.splitext(f)[0].split("_")[-1]
            shutil.copy(f, trajFolder + "/coord_%s_%s.dat" % (it, traj))
    clusteringObject = cluster.Cluster(numClusters,
                                       trajFolder,
                                       trajBasename,
                                       alwaysCluster=False,
                                       stride=stride)
    clusteringObject.clusterTrajectories()
    clusteringObject.eliminateLowPopulatedClusters(clusterCountsThreshold)
    for i in range(numClusters):
        if not os.path.exists("cluster_%d" % i):
            os.makedirs("cluster_%d/allStructures" % i)
    dtrajs_files = glob.glob("discretized/*.disctraj")
    for dtraj in dtrajs_files:
        print(dtraj)
        traj = np.loadtxt(dtraj)
        epoch, traj_num = map(int,
                              os.path.splitext(dtraj)[0].split("_", 3)[1:])
        trajPositions = np.loadtxt(trajFolder + "/coord_%d_%d.dat" %
                                   (epoch, traj_num))
        trajFile = glob.glob(
            os.path.join(conf_folder + "%d/trajectory_%d*" %
                         (epoch, traj_num)))[0]
        snapshots = utilities.getSnapshots(trajFile, topology=topology)
        for nSnap, cluster_num in enumerate(traj):
            coordinates[int(cluster_num)].append(trajPositions[nSnap])
            filename = "cluster_%d/allStructures/conf_%d_%d_%d.pdb" % (
                cluster_num, epoch, traj_num, nSnap)
            if isinstance(snapshots[nSnap], basestring):
                with open(filename, "w") as fw:
                    fw.write(snapshots[nSnap])
            else:
                utilities.write_mdtraj_object_PDB(snapshots[nSnap], filename,
                                                  topology_contents)
    for cl in range(numClusters):
        np.savetxt("cluster_%d/positions.dat" % cl, coordinates[cl])
Пример #8
0
def main(representatives_files, path_structures, output="", clusters=None, trajNames="trajectory", topology=None):
    if clusters is None:
        clusters = ['a']
    # Load the representative structures file
    try:
        clusters_info = np.loadtxt(representatives_files, skiprows=1, dtype=int)
    except IOError:
        raise IOError("Couldn't find a representative file in %s, please check that the path is correct" % representatives_files)
    # Organize to minimise pdb loading
    if clusters != ['a']:
        clusters_info = clusters_info[list(map(int, clusters))]

    extract_info = getExtractInfo(clusters_info)

    # Write appropiate pdbs
    destFolder = output
    if not output:
        destFolder, _ = os.path.split(representatives_files)
        destFolder = os.path.join(destFolder, "representative_structures_pdbs")

    if not os.path.exists(destFolder):
        os.makedirs(destFolder)
    else:
        destFolder += "_%d"
        it = 1
        while os.path.exists(destFolder % it):
            it += 1
        destFolder %= it
        os.makedirs(destFolder)
    structureFolder = os.path.join(path_structures, "%d", trajNames+"_%d.*")

    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology_contents = None

    for trajFile, extraInfo in extract_info.items():
        try:
            pdbFile = glob.glob(structureFolder % trajFile)[0]
        except IndexError:
            raise ValueError("Structure %s not found" % (structureFolder % trajFile))
        try:
            snapshots = utilities.getSnapshots(pdbFile, topology=topology)
        except IOError:
            raise IOError("Unable to open %s, please check that the path to structures provided is correct" % pdbFile)
        for pair in extraInfo:
            if topology_contents is None:
                with open(os.path.join(destFolder, "cluster_%d.pdb" % pair[0]), "w") as fw:
                    fw.write(snapshots[pair[1]])
                    fw.write("\n")
            else:
                utilities.write_mdtraj_object_PDB(snapshots[pair[1]], os.path.join(destFolder, "cluster_%d.pdb" % pair[0]), topology=topology_contents)
Пример #9
0
def mapReference(ref, trajs, topology_content):
    refPDB = atomset.PDB()
    refPDB.initialise(ref, type="PROTEIN", topology=topology_content)
    avgStruct = {
        atomID: refPDB.atoms[atomID].getAtomCoords()
        for atomID in refPDB.atoms
    }
    snapshotsTot = []
    for traj in trajs:
        snapshots = utilities.getSnapshots(traj)
        traj_num = utilities.getTrajNum(traj)
        epoch = int(os.path.abspath(traj).split(os.sep)[-2])
        snapshots = utilities.getSnapshots(traj)
        for snapshot in snapshots:
            PDB = atomset.PDB()
            PDB.initialise(snapshot,
                           type="PROTEIN",
                           topology=topology_content.getTopology(
                               epoch, traj_num))
            snapshotsTot.append(PDB)

    return avgStruct, snapshotsTot
Пример #10
0
def writeInitialStructures(centers_info, filename_template, topology=None):
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    for cluster_num in centers_info:
        epoch_num, traj_num, snap_num = map(
            int, centers_info[cluster_num]['structure'])
        trajectory = glob.glob("%d/trajectory_%d*" % (epoch_num, traj_num))[0]
        snapshots = utilities.getSnapshots(trajectory, topology=topology)
        if isinstance(snapshots[0], basestring):
            with open(filename_template % cluster_num, "w") as fw:
                fw.write(snapshots[snap_num])
        else:
            utilities.write_mdtraj_object_PDB(snapshots[snap_num],
                                              filename_template % cluster_num,
                                              topology_contents)
Пример #11
0
def generateConformations(resname, clAcc, trajectory, topology):
    if topology is None:
        topology_contents = None
    else:
        topology_contents = utilities.getTopologyFile(topology)
    if clAcc is None:
        for traj in trajectory:
            snapshots = utilities.getSnapshots(traj, topology=topology)
            for snapshot in snapshots:
                PDBobj = atomset.PDB()
                PDBobj.initialise(snapshot, resname=resname, topology=topology_contents)
                yield PDBobj
    else:
        for cluster in clAcc.clusters.clusters:
            yield cluster.pdb
Пример #12
0
def mapReference(ref, trajs, topology, topology_content):
    refPDB = atomset.PDB()
    refPDB.initialise(ref, type="PROTEIN", topology=topology_content)
    avgStruct = {
        atomID: refPDB.atoms[atomID].getAtomCoords()
        for atomID in refPDB.atoms
    }
    snapshotsTot = []
    for traj in trajs:
        snapshots = utilities.getSnapshots(traj, topology=topology)
        for snapshot in snapshots:
            PDB = atomset.PDB()
            PDB.initialise(snapshot, type="PROTEIN", topology=topology_content)
            snapshotsTot.append(PDB)

    return avgStruct, snapshotsTot
Пример #13
0
def extractAvgPDB(trajs, topology, topology_content):
    nSnapshots = 0
    avgStruct = {}
    snapshotsTot = []
    for traj in trajs:
        snapshots = utilities.getSnapshots(traj, topology=topology)
        for snapshot in snapshots:
            nSnapshots += 1
            PDB = atomset.PDB()
            PDB.initialise(snapshot, type="PROTEIN", topology=topology_content)
            snapshotsTot.append(PDB)
            for atomID, atom in PDB.atoms.items():
                if atomID in avgStruct:
                    avgStruct[atomID] += (atom.getAtomCoords() -
                                          avgStruct[atomID]) / nSnapshots
                else:
                    avgStruct[atomID] = atom.getAtomCoords()
    return avgStruct, snapshotsTot
def write_snapshot(snap_num,
                   trajectory,
                   filename,
                   topology=None,
                   use_pdb=False):
    if not topology:
        snapshots = utilities.getSnapshots(trajectory,
                                           topology=topology,
                                           use_pdb=use_pdb)
        with open(filename, "w") as fw:
            fw.write(snapshots[snap_num])
    else:
        splitTrajectory.main("", [
            trajectory,
        ],
                             topology, [snap_num + 1],
                             template=filename,
                             use_pdb=use_pdb)
Пример #15
0
def projectTICATrajs(folders,
                     folderPath,
                     ligand_resname,
                     atomId,
                     stride_conformations,
                     nTICs,
                     tica,
                     writeFiles=True,
                     topology=None):
    if writeFiles:
        utilities.makeFolder("tica_COM")
    trajsUniq = []
    projectedUniq = []
    for epoch in folders:
        trajFiles = glob.glob(
            os.path.join(folderPath, "%s/extractedCoordinates/coord*" % epoch))
        trajFiles.sort(key=lambda x: int(x[x.rfind("_") + 1:-4]))
        for trajName in trajFiles:
            trajNum = int(trajName[trajName.rfind("_") + 1:-4])
            trajFile = glob.glob(
                os.path.join(folderPath,
                             "%s/trajectory_%d.*" % (epoch, trajNum)))[0]
            snapshotsPDB = utilities.getSnapshots(trajFile, topology=topology)
            trajCOM = [
                get_coords(snapshot, atomId,
                           ligand_resname) for snapshot in itertools.islice(
                               snapshotsPDB, 0, None, stride_conformations)
            ]
            trajsUniq.append(trajCOM)
            trajLoad = np.loadtxt(trajName)
            if len(trajLoad.shape) == 1:
                trajLoad = trajLoad[np.newaxis, :]
            # De totes agafa les 3 primeres columnes totes les files
            projectedTraj = tica.transform(
                trajLoad[::stride_conformations])[:, :nTICs]
            projectedUniq.append(projectedTraj)
            if writeFiles:
                np.savetxt(
                    "tica_COM/traj_%s_%d.dat" % (epoch, trajNum),
                    np.hstack((np.array(trajCOM), projectedTraj)),
                    header="COM coordinates x\ty\tz\t TICA coordinates\t" +
                    "\t".join(["TICA %d" % tic
                               for tic in range(nTICs)]) + "\n")
    return trajsUniq, projectedUniq
Пример #16
0
def main(pathway, pdb_with_connects, ligand_chain="L", outfilename=None):
    pdb = pm.PDB(pdb_with_connects, chain=ligand_chain)
    ligand = "".join(pdb.get_atoms_of_chain())
    connects = "".join(pdb.read_conect())
    ligand_with_connects = ligand + connects
    # Temporary file to extract the indexes of the ligand
    with open("ligand_with_connects_tmp.pdb", "w") as out:
        out.write(ligand_with_connects)
    pdb = pm.PDB(in_pdb="ligand_with_connects_tmp.pdb", chain=ligand_chain)
    names_dictionary = pdb.get_names_dictionary()
    os.remove("ligand_with_connects_tmp.pdb")
    pathway_snapshots = utilities.getSnapshots(pathway)
    new_pathway = []
    sys.stderr.write("Rebuilding indexes...\n")
    for snap in pathway_snapshots:
        with open("snap_tmp.pdb", "w") as out:
            out.write(snap)
        pdb = pm.PDB(in_pdb="snap_tmp.pdb", chain=ligand_chain)
        os.remove("snap_tmp.pdb")
        ligand = pdb.get_atoms_of_chain()
        complex_no_ligand = sorted(
            list(set(pdb.read_atoms_section()) ^ set(ligand)))
        new_ligand = []
        for index, name in names_dictionary.items():
            for line in ligand:
                pdb_name = pm.get_atom_pdb_name_from_line(line).strip()
                if name == pdb_name:
                    new_line = pm.set_index_to_line(line, index)
                    new_ligand.append(new_line)
        new_ligand_with_connects = "".join(new_ligand) + connects
        new_complex = "".join(complex_no_ligand) + new_ligand_with_connects
        new_pathway.append(new_complex)

    sys.stderr.write("Writing connects...\n")
    if not outfilename:
        outfilename = "{}_connected.pdb".format(pathway.split(".pdb")[0])
    if os.path.exists(outfilename):
        os.remove(outfilename)
    for n, model in enumerate(new_pathway):
        with open(outfilename, "a") as output:
            output.write("MODEL {}\n".format(n))
            output.write(model)
            output.write("ENDMDL\n")
Пример #17
0
def writeCentersInfo(centersInfo,
                     folderPath,
                     ligand_resname,
                     nTICs,
                     numClusters,
                     trajsUniq,
                     clustersCentersFolder,
                     nTraj,
                     topology=None):
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology_contents = None
    if not os.path.exists(clustersCentersFolder):
        os.makedirs(clustersCentersFolder)
    COM_list = []
    for clusterNum in centersInfo:
        epoch, trajNum, snap = centersInfo[clusterNum]['structure']
        COM_list.append(trajsUniq[int(epoch) * nTraj + (trajNum - 1)][snap])
        # Accept non-pdb trajectories
        trajFile = glob.glob(
            os.path.join(folderPath, "%s/trajectory_%d.*" % (epoch, trajNum)))
        trajFile = trajFile[0]
        snapshots = utilities.getSnapshots(trajFile, topology=topology)
        pdb_object = atomset.PDB()
        pdb_object.initialise(snapshots[snap],
                              resname=ligand_resname,
                              topology=topology_contents)
        pdb_object.writePDB(
            str(
                os.path.join(str(clustersCentersFolder),
                             "cluster_%d.pdb" % clusterNum)))

    distances = [[nC, centersInfo[nC]['minDist']] for nC in range(numClusters)]
    np.savetxt(
        os.path.join(clustersCentersFolder,
                     "clusterDistances_%dcl_%dTICs.dat" %
                     (numClusters, nTICs)), distances)
    utilities.write_PDB_clusters(
        COM_list,
        os.path.join(clustersCentersFolder,
                     "clustersCenters_%dcl_%dTICs.pdb" % (numClusters, nTICs)))
Пример #18
0
def main(metricCol, lig_resname, nTrajs, stride, atomId, saving_frequency):
    folders = utilities.get_epoch_folders(".")
    box_center = None
    templateLine = "HETATM%s    H BOX Z 501    %s%s%s  0.75%s            H  \n"
    for epoch in folders:
        print("Processing epoch %s" % epoch)
        data = []
        confData = []
        maxEpoch = -1
        maxEpochCoords = None
        for iTraj in range(1, nTrajs):
            report = np.loadtxt("%s/report_%d" % (epoch, iTraj))
            if len(report.shape) < 2:
                report = report[np.newaxis, :]
            maxTrajIndex = np.argmax(report[:, metricCol])
            snapshots = utilities.getSnapshots("%s/trajectory_%d.pdb" % (epoch, iTraj))
            for i, snapshot in enumerate(itertools.islice(snapshots, 0, None, stride)):
                report_line = i * stride * saving_frequency
                data.append(get_coords(snapshot, atomId, lig_resname) + [report[report_line, metricCol]])
                confData.append((epoch, iTraj, report_line))
            if report[maxTrajIndex, metricCol] > maxEpoch:
                maxEpoch = report[maxTrajIndex, metricCol]
                maxEpochCoords = get_coords(snapshots[maxTrajIndex], atomId, lig_resname)
            if box_center is None and iTraj == 1:
                box_center = data[0][:3]
        data = np.array(data)
        minInd = np.argmin(data[:, -1])
        minMetric = data[minInd, -1]
        data[:, -1] -= minMetric
        utilities.write_PDB_clusters(data, title="epoch_%s.pdb" % epoch, use_beta=True)
        print("Max value for metric", maxEpoch, maxEpochCoords)
        with open("epoch_%s.pdb" % epoch, "a") as fa:
            fa.write("TER\n")
            serial = ("%d" % data.shape[0]).rjust(5)
            x = ("%.3f" % box_center[0]).rjust(8)
            y = ("%.3f" % box_center[1]).rjust(8)
            z = ("%.3f" % box_center[2]).rjust(8)
            g = ("%.2f" % 0).rjust(6)
            fa.write(templateLine % (serial, x, y, z, g))
        box_center = maxEpochCoords
Пример #19
0
def extractAvgPDB(trajs, topology_content):
    nSnapshots = 0
    avgStruct = {}
    snapshotsTot = []
    for traj in trajs:
        snapshots = utilities.getSnapshots(traj)
        traj_num = utilities.getTrajNum(traj)
        epoch = int(os.path.abspath(traj).split(os.sep)[-2])
        for snapshot in snapshots:
            nSnapshots += 1
            PDB = atomset.PDB()
            PDB.initialise(snapshot,
                           type="PROTEIN",
                           topology=topology_content.getTopology(
                               epoch, traj_num))
            snapshotsTot.append(PDB)
            for atomID, atom in PDB.atoms.items():
                if atomID in avgStruct:
                    avgStruct[atomID] += (atom.getAtomCoords() -
                                          avgStruct[atomID]) / nSnapshots
                else:
                    avgStruct[atomID] = atom.getAtomCoords()
    return avgStruct, snapshotsTot
Пример #20
0
            os.makedirs(outputPath)
    else:
        outputPath = ""

    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology_contents = None

    sys.stderr.write("Reading clustering object...\n")
    cl = utilities.readClusteringObject(clusteringObject)
    if cl.conformationNetwork is None:
        sys.exit("Clustering object loaded has no conformation network!!")
    conf = cl.conformationNetwork
    filename = glob.glob(epoch+"/*traj*_%d*" % trajectory)
    snapshots = utilities.getSnapshots(filename[0], topology)
    snapshots = snapshots[:snapshot+1]
    if not isinstance(snapshots[0], basestring):
        new_snapshots = []
        for snapshot in snapshots:
            PDB = atomset.PDB()
            PDB.initialise(snapshot, topology=topology_contents)
            new_snapshots.append(PDB.get_pdb_string())
        snapshots = new_snapshots

    procMapping = open(os.path.join(epoch, "processorMapping.txt")).read().rstrip().split(',')
    leaf = procMapping[trajectory-1]
    pathway = conf.createPathwayToCluster(int(leaf))
    cl.writePathwayTrajectory(pathway, outputPath+"pathway.pdb")
    with open(outputPath+"pathway.pdb", "a") as f:
        f.write("ENDMDL\n".join(snapshots))
Пример #21
0
    topology = None
filename = "conformation_data.dat"
if not os.path.exists(filename):
    raise IOError(
        "File conformation_data.dat not found, please be sure to run extract_COM_metric.py before this script"
    )
print("Selected names: ", ' '.join(names))
with open(filename) as f:
    f.readline()
    for line in f:
        line = line.strip().split()
        if line[0] not in names:
            continue
        epoch, iTraj, nSnap = line[1:4]
        report = np.loadtxt("%s/report_%s" % (epoch, iTraj))
        print(line[0], "=>",
              "epoch %s, trajectory %s, snapshot %s" % tuple(line[1:4]),
              "metric", report[int(nSnap), metricCol])
        traj_file = glob.glob("%s/%s_%s*" % (epoch, traj_name, iTraj))
        snapshots = utilities.getSnapshots(traj_file, topology=topology)
        if isinstance(snapshots[int(nSnap)], basestring):
            with open(
                    output_folder + "conf_%s_%s_%s.pdb" %
                (epoch, iTraj, nSnap), "w") as fw:
                fw.write(snapshots[int(nSnap)])
        else:
            utilities.write_mdtraj_object_PDB(
                snapshots[int(nSnap)],
                output_folder + "conf_%s_%s_%s.pdb" % (epoch, iTraj, nSnap),
                topology_contents)
Пример #22
0
def main(n_clusters,
         output_folder,
         SASAColumn,
         norm_energy,
         num_bins,
         percentile,
         plots,
         atom_Ids,
         folder_name,
         traj_basename,
         cluster_energy,
         topology=None):
    energyColumn = 3

    if output_folder is not None:
        outputFolder = os.path.join(output_folder, "")
        if not os.path.exists(outputFolder):
            os.makedirs(outputFolder)
    else:
        outputFolder = ""

    extractCoords.main(folder_name,
                       lig_resname=ligand_resname,
                       non_Repeat=True,
                       atom_Ids=atom_Ids)

    epochFolders = utilities.get_epoch_folders(folder_name)
    points = []
    for epoch in epochFolders:
        report_files = glob.glob(os.path.join(epoch, "*report*"))
        report_files.sort(key=lambda x: int(x[x.rfind("_") + 1:]))
        for report_name in report_files:
            traj_num = int(report_name[report_name.rfind("_") + 1:])
            coordinates = np.loadtxt(
                os.path.join(
                    folder_name, "%s/extractedCoordinates/coord_%d.dat" %
                    (epoch, traj_num)))
            report = np.loadtxt(report_name)
            if len(report.shape) < 2:
                points.append([
                    report[energyColumn], report[SASAColumn],
                    int(epoch), traj_num, 0
                ] + coordinates[1:].tolist())
            else:
                epoch_line = np.array([int(epoch)] * report.shape[0])
                traj_line = np.array([traj_num] * report.shape[0])
                snapshot_line = np.array(range(report.shape[0]))
                points.extend(
                    np.hstack(
                        (report[:, (energyColumn, SASAColumn)],
                         epoch_line[:, np.newaxis], traj_line[:, np.newaxis],
                         snapshot_line[:, np.newaxis], coordinates[:, 1:])))
    points = np.array(points)
    points = points[points[:, 1].argsort()]
    minSASA = points[0, 1]
    maxSASA = points[-1, 1]
    left_bins = np.linspace(minSASA, maxSASA, num=num_bins, endpoint=False)
    indices = np.searchsorted(points[:, 1], left_bins)
    thresholds = np.array([
        np.percentile(points[i:j, 0], percentile)
        for i, j in zip(indices[:-1], indices[1:])
    ])

    new_points = []
    occupation = []
    for ij, (i, j) in enumerate(zip(indices[:-1], indices[1:])):
        found = np.where(points[i:j, 0] < thresholds[ij])[0]
        occupation.append(len(found))
        if len(found) == 1:
            new_points.append(points[found + i])
        elif len(found) > 1:
            new_points.extend(points[found + i])

    points = np.array(new_points)
    if norm_energy:
        energyMin = points.min(axis=0)[0]
        points[:, 0] -= energyMin
        energyMax = points.max(axis=0)[0]
        points[:, 0] /= energyMax

    if cluster_energy:
        print("Clustering using energy and SASA")
        kmeans = KMeans(n_clusters=n_clusters).fit(points[:, :2])
        title = "clusters_%d_energy_SASA.pdb"
    else:
        print("Clustering using ligand coordinates")
        kmeans = KMeans(n_clusters=n_clusters).fit(points[:, 5:8])
        title = "clusters_%d_energy_SASA_coords.pdb"
    centers_energy = []
    centers_coords = []
    if topology is not None:
        topology_contents = utilities.getTopologyFile(topology)
    else:
        topology = None
    for i, center in enumerate(kmeans.cluster_centers_):
        if cluster_energy:
            dist = np.linalg.norm((points[:, :2] - center), axis=1)
        else:
            dist = np.linalg.norm((points[:, 5:8] - center), axis=1)
        epoch, traj, snapshot = points[dist.argmin(), 2:5]
        centers_energy.append(points[dist.argmin(), :2])
        centers_coords.append(points[dist.argmin(), 5:8])
        traj_file = glob.glob("%d/%s_%d*" % (epoch, traj_basename, traj))[0]
        conf = utilities.getSnapshots(traj_file,
                                      topology=topology)[int(snapshot)]
        if isinstance(conf, basestring):
            with open(os.path.join(outputFolder, "initial_%d.pdb" % i),
                      "w") as fw:
                fw.write(conf)
        else:
            utilities.write_mdtraj_object_PDB(
                conf, os.path.join(outputFolder, "initial_%d.pdb" % i),
                topology_contents)
    centers_energy = np.array(centers_energy)
    centers_coords = np.array(centers_coords)
    writePDB(centers_coords, os.path.join(outputFolder, title % n_clusters))
    if plots:
        plt.scatter(points[:, 1], points[:, 0], c=kmeans.labels_, alpha=0.5)
        plt.scatter(centers_energy[:, 1],
                    centers_energy[:, 0],
                    c=list(range(n_clusters)),
                    marker='x',
                    s=56,
                    zorder=1)
        plt.xlabel("SASA")
        if norm_energy:
            plt.ylabel("Energy (normalized)")
            plt.savefig(
                os.path.join(outputFolder, "clusters_energy_normalized.png"))
        else:
            plt.ylabel("Energy (kcal/mol)")
            plt.savefig(
                os.path.join(outputFolder, "clusters_no_normalized.png"))
        plt.show()
Пример #23
0
def main(file_to_backtrack, results_path, outputPath, out_filename):
    """

    :param file_to_backtrack: File of the selected_results folder that you want to backtrack.
    :type file_to_backtrack: str
    :param results_path: Path where the growing simulations are stored.
    :type results_path: str
    :param outputPath: Output folder path.
    :type outputPath: str
    :param out_filename: Output filename prefix.
    :type out_filename: str
    :return: None
    """
    if outputPath is not None:
        outputPath = os.path.join(outputPath, "")
        if not os.path.exists(outputPath):
            os.makedirs(outputPath)
    else:
        outputPath = ""
    if os.path.exists(outputPath + out_filename):
        # If the specified name exists, append a number to distinguish the files
        name, ext = os.path.splitext(out_filename)
        out_filename = "".join([name, "_%d", ext])
        i = 1
        while os.path.exists(outputPath + out_filename % i):
            i += 1
        out_filename %= i
    pathway = []
    sys.stderr.write("Creating pathway...\n")
    # Get information of the input structure
    trajectory, snapshot, growing_id = extract_info_from_selected_file(
        path_to_selected_file=file_to_backtrack)
    # Obtain trajectory from sampling folder
    sampling_file_from = "sampling_result_{}/trajectory_{}.pdb".format(
        growing_id, trajectory)
    sampling_models = utilities.getSnapshots(sampling_file_from)
    # Get all snapshots from the input snapshot and add it to pathway
    snapshots = sampling_models[1:snapshot + 1]
    pathway.insert(0, snapshots)
    growing_epochs = glob.glob(
        os.path.join(results_path, "{}_growing_output*".format(growing_id)))
    if not growing_epochs:
        raise ValueError("Trajectory %s not found!" %
                         os.path.join(results_path))
    for n in range(len(growing_epochs) - 1, -1, -1):
        # Reading mapping information
        procMapping = open(
            os.path.join(results_path,
                         "{}_growing_output{}".format(growing_id, n),
                         "processorMapping.txt")).read().split(":")
        # Extract the spawning structure of the previous simulation
        cluster_from = procMapping[trajectory - 1].rstrip("\n")
        cluster_from_tup = literal_eval(cluster_from)
        # Update the trajectory and snapshot to be used
        trajectory = cluster_from_tup[1]
        snapshot = cluster_from_tup[2]
        # Extract the filename and the snapshot that has spawn
        filename = os.path.join(
            results_path, "{}_growing_output{}/trajectory_{}.pdb".format(
                growing_id, n, trajectory))
        print(n, filename, trajectory, snapshot, procMapping)
        snapshots = utilities.getSnapshots(filename)
        if n == 0:
            initial = 0
        else:
            initial = 1
        # Take all MODELS from the snapshot
        snapshots = snapshots[initial:snapshot + 1]
        pathway.insert(0, snapshots)

    sys.stderr.write("Writing pathway...\n")
    with open(outputPath + out_filename, "a") as f:
        f.write("ENDMDL\n".join(itertools.chain.from_iterable(pathway)))