def main(metricCol, lig_resname, nTrajs, filter_val, stride, atomId, saving_frequency, trajectory_name, report_name, topology=None): folders = utilities.get_epoch_folders(".") data = [] minMetric = 1e6 confData = [] for epoch in folders: print("Processing epoch %s" % epoch) for iTraj in range(1, nTrajs): report = np.loadtxt("%s/%s_%d" % (epoch, report_name, iTraj)) if len(report.shape) < 2: report = report[np.newaxis, :] traj_file = glob.glob("%s/%s_%d.*" % (epoch, trajectory_name, iTraj))[0] snapshots = utilities.getSnapshots(traj_file, topology=topology) for i, snapshot in enumerate(itertools.islice(snapshots, 0, None, stride)): report_line = i * stride * saving_frequency data.append(get_coords(snapshot, atomId, lig_resname) + [report[report_line, metricCol]]) confData.append((epoch, iTraj, report_line)) data = np.array(data) minInd = np.argmin(data[:, -1]) minMetric = data[minInd, -1] data[:, -1] -= minMetric if filter_val is not None: data_filter = data.copy() data_filter[data_filter > filter_val] = filter_val namesPDB = utilities.write_PDB_clusters(data_filter, title="cluster_metric.pdb", use_beta=True) else: namesPDB = utilities.write_PDB_clusters(data, title="cluster_metric.pdb", use_beta=True) print("Min value for metric", minMetric, namesPDB[minInd]) with open("conformation_data.dat", "w") as fw: fw.write("PDB name Epoch Trajectory Snapshot COM x y z Metric\n") for j, name in enumerate(namesPDB): info = [name.rjust(8)]+[str(x).rjust(10) for x in confData[j]]+[str(np.round(d, 3)).rjust(7) for d in data[j, :-1]] + [str(np.round(data[j, -1], 2)).rjust(10)] fw.write("{:s} {:s} {:s} {:s} {:s} {:s} {:s} {:s}\n".format(*tuple(info)))
def writeInitialStructures(field1, field2, crit1, crit2, centers_info, filename_template, traj, topology=None, use_pdb=False): for cluster_num, field1, field2 in zip(centers_info, field1, field2): epoch_num, traj_num, snap_num = map( int, centers_info[cluster_num]['structure']) trajectory = "{}/{}{}.xtc".format( epoch_num, traj, traj_num) if topology else "{}/{}{}.pdb".format( epoch_num, traj, traj_num) snapshots = utilities.getSnapshots(trajectory, topology=topology, use_pdb=use_pdb) filename = filename_template.format(cluster_num, crit1, field1, crit2, field2) if not topology: with open(filename, "w") as fw: fw.write(snapshots[snap_num]) else: splitTrajectory.main("", [ trajectory, ], topology, [snap_num + 1], template=filename, use_pdb=use_pdb)
def main(epoch_num, trajectory, snapshot_num, resname, clustering_object, topology): calc = RMSDCalculator.RMSDCalculator() clustering_object = utilities.readClusteringObject(clustering_object) n_clusters = utilities.loadtxtfile( os.path.join(str(max(0, epoch_num - 1)), "clustering", "summary.txt")).shape[0] if topology is not None: topology_contents = utilities.getTopologyFile(topology) else: topology_contents = None filename = glob.glob( os.path.join(str(epoch_num), "*traj*_%d.*" % trajectory)) if not filename: raise ValueError( "No file with the specified epoch and trajectory found") try: snapshots = utilities.getSnapshots(filename[0], topology=topology)[snapshot_num] except IndexError: raise IndexError( "Snapshot number %d not found in trajectory %d for epoch %d, please check that the arguments provided are correct" % (snapshot_num, trajectory, epoch_num)) pdb = atomset.PDB() pdb.initialise(snapshots, resname=resname, topology=topology_contents) for i, cluster in enumerate(clustering_object[:n_clusters]): dist = calc.computeRMSD(pdb, cluster.pdb) if dist < cluster.threshold: print("Snapshot belongs to cluster", i) return print("Snapshot not assigned to any cluster! :(")
def main(outputDir, files, topology, structs, template=None): found = False if outputDir: utilities.makeFolder(outputDir) if topology is not None: topology_contents = utilities.getTopologyFile(topology) else: topology_contents = None if structs is not None: structs = set(structs) for f in files: name = os.path.split(f)[-1] templateName = os.path.join(outputDir, template) if template else os.path.join( outputDir, name[:-4] + "_%d.pdb") snapshots = utilities.getSnapshots(f, topology=topology) for i, snapshot in enumerate(snapshots): if structs is not None and i + 1 not in structs: continue if not isinstance(snapshot, basestring): PDB = atomset.PDB() PDB.initialise(snapshot, topology=topology_contents) snapshot = PDB.get_pdb_string(model_num=i + 1) if template: with open(templateName, 'w') as of: of.write(snapshot) found = True else: with open(templateName % i, 'w') as of: of.write(snapshot) found = True return found
def trajectory_and_snapshot_to_pdb(trajectory_path, snapshot, output_path, topology_contents): """ Given an absolute path to a trajectory of Adaptive and a snapshot (MODEL) in xtc format, the function transform it into a PDB format. :param trajectory_path: Absolute path to a trajectory from Adaptive, in xtc format. :type trajectory_path:str :param snapshot: model of a trajectory that you want to transform. :type snapshot: int :param output_path: output path of the new pdb file. :type output_path: str :return: Creates a PDB file. """ # get the path where the adaptive simulation resides topology_path_splited = trajectory_path.split(os.sep) epoch = int(topology_path_splited[-2]) traj = adapt_tools.getTrajNum(topology_path_splited[-1]) trajectory = adapt_tools.getSnapshots(trajectory_path) try: single_model = trajectory[snapshot] PDB = atomset.PDB() PDB.initialise(single_model, topology=topology_contents.getTopology(epoch, traj)) except IndexError: exit( "You are selecting the model {} for a trajectory that has {} models, please, reselect the model index " "(starting from 0).".format(snapshot, len(trajectory))) with open(output_path, "w") as fw: fw.write("MODEL %4d\n" % (snapshot + 1)) fw.write(PDB.pdb) fw.write("ENDMDL\n") fw.write("END\n")
def main(trajectory, snapshot, epoch, outputPath, out_filename, topology): if outputPath is not None: outputPath = os.path.join(outputPath, "") if not os.path.exists(outputPath): os.makedirs(outputPath) else: outputPath = "" if topology is not None: topology_contents = utilities.getTopologyFile(topology) else: topology_contents = None if os.path.exists(outputPath + out_filename): # If the specified name exists, append a number to distinguish the files name, ext = os.path.splitext(out_filename) out_filename = "".join([name, "_%d", ext]) i = 1 while os.path.exists(outputPath + out_filename % i): i += 1 out_filename %= i pathway = [] # Strip out trailing backslash if present pathPrefix, epoch = os.path.split(epoch.rstrip("/")) sys.stderr.write("Creating pathway...\n") while True: filename = glob.glob( os.path.join(pathPrefix, epoch, "*traj*_%d.*" % trajectory)) if not filename: raise ValueError( "Trajectory %s not found!" % os.path.join(pathPrefix, epoch, "*traj*_%d.*" % trajectory)) snapshots = utilities.getSnapshots(filename[0]) if epoch == '0': initial = 0 else: # avoid repeating the initial snapshot initial = 1 if not isinstance(snapshots[0], basestring): new_snapshots = [] for i in range(initial, snapshot + 1): PDB = atomset.PDB() PDB.initialise(snapshots[i], topology=topology_contents) new_snapshots.append(PDB.pdb) snapshots = new_snapshots else: snapshots = snapshots[initial:snapshot + 1] pathway.insert(0, snapshots) if epoch == '0': # Once we get to epoch 0, we just need to append the trajectory # where the cluster was found and we can break out of the loop break procMapping = open( os.path.join(pathPrefix, epoch, "processorMapping.txt")).read().rstrip().split(':') epoch, trajectory, snapshot = map( int, procMapping[trajectory - 1][1:-1].split(',')) epoch = str(epoch) sys.stderr.write("Writing pathway...\n") with open(outputPath + out_filename, "a") as f: f.write("ENDMDL\n".join(itertools.chain.from_iterable(pathway)))
def main(ligand, clusters_file, conf_folder, topology=None): trajFolder = "allTrajs_nonRepeat" cluster_centers = np.loadtxt(clusters_file) if not os.path.exists("discretized"): os.makedirs("discretized") if not os.path.exists(trajFolder): os.makedirs(trajFolder) stride = 1 clusterCountsThreshold = 0 trajBasename = "coord*" if topology is not None: topology_contents = utilities.getTopologyFile(topology) else: topology_contents = None epoch_folders = utilities.get_epoch_folders(conf_folder) numClusters = cluster_centers.shape[0] coordinates = [[] for cl in range(numClusters)] for it in epoch_folders: files = glob.glob(conf_folder + "%s/extractedCoordinates/coord*" % it) for f in files: traj = os.path.splitext(f)[0].split("_")[-1] shutil.copy(f, trajFolder + "/coord_%s_%s.dat" % (it, traj)) clusteringObject = cluster.Cluster(numClusters, trajFolder, trajBasename, alwaysCluster=False, stride=stride) clusteringObject.clusterTrajectories() clusteringObject.eliminateLowPopulatedClusters(clusterCountsThreshold) for i in range(numClusters): if not os.path.exists("cluster_%d" % i): os.makedirs("cluster_%d/allStructures" % i) dtrajs_files = glob.glob("discretized/*.disctraj") for dtraj in dtrajs_files: print(dtraj) traj = np.loadtxt(dtraj) epoch, traj_num = map(int, os.path.splitext(dtraj)[0].split("_", 3)[1:]) trajPositions = np.loadtxt(trajFolder + "/coord_%d_%d.dat" % (epoch, traj_num)) trajFile = glob.glob( os.path.join(conf_folder + "%d/trajectory_%d*" % (epoch, traj_num)))[0] snapshots = utilities.getSnapshots(trajFile, topology=topology) for nSnap, cluster_num in enumerate(traj): coordinates[int(cluster_num)].append(trajPositions[nSnap]) filename = "cluster_%d/allStructures/conf_%d_%d_%d.pdb" % ( cluster_num, epoch, traj_num, nSnap) if isinstance(snapshots[nSnap], basestring): with open(filename, "w") as fw: fw.write(snapshots[nSnap]) else: utilities.write_mdtraj_object_PDB(snapshots[nSnap], filename, topology_contents) for cl in range(numClusters): np.savetxt("cluster_%d/positions.dat" % cl, coordinates[cl])
def main(representatives_files, path_structures, output="", clusters=None, trajNames="trajectory", topology=None): if clusters is None: clusters = ['a'] # Load the representative structures file try: clusters_info = np.loadtxt(representatives_files, skiprows=1, dtype=int) except IOError: raise IOError("Couldn't find a representative file in %s, please check that the path is correct" % representatives_files) # Organize to minimise pdb loading if clusters != ['a']: clusters_info = clusters_info[list(map(int, clusters))] extract_info = getExtractInfo(clusters_info) # Write appropiate pdbs destFolder = output if not output: destFolder, _ = os.path.split(representatives_files) destFolder = os.path.join(destFolder, "representative_structures_pdbs") if not os.path.exists(destFolder): os.makedirs(destFolder) else: destFolder += "_%d" it = 1 while os.path.exists(destFolder % it): it += 1 destFolder %= it os.makedirs(destFolder) structureFolder = os.path.join(path_structures, "%d", trajNames+"_%d.*") if topology is not None: topology_contents = utilities.getTopologyFile(topology) else: topology_contents = None for trajFile, extraInfo in extract_info.items(): try: pdbFile = glob.glob(structureFolder % trajFile)[0] except IndexError: raise ValueError("Structure %s not found" % (structureFolder % trajFile)) try: snapshots = utilities.getSnapshots(pdbFile, topology=topology) except IOError: raise IOError("Unable to open %s, please check that the path to structures provided is correct" % pdbFile) for pair in extraInfo: if topology_contents is None: with open(os.path.join(destFolder, "cluster_%d.pdb" % pair[0]), "w") as fw: fw.write(snapshots[pair[1]]) fw.write("\n") else: utilities.write_mdtraj_object_PDB(snapshots[pair[1]], os.path.join(destFolder, "cluster_%d.pdb" % pair[0]), topology=topology_contents)
def mapReference(ref, trajs, topology_content): refPDB = atomset.PDB() refPDB.initialise(ref, type="PROTEIN", topology=topology_content) avgStruct = { atomID: refPDB.atoms[atomID].getAtomCoords() for atomID in refPDB.atoms } snapshotsTot = [] for traj in trajs: snapshots = utilities.getSnapshots(traj) traj_num = utilities.getTrajNum(traj) epoch = int(os.path.abspath(traj).split(os.sep)[-2]) snapshots = utilities.getSnapshots(traj) for snapshot in snapshots: PDB = atomset.PDB() PDB.initialise(snapshot, type="PROTEIN", topology=topology_content.getTopology( epoch, traj_num)) snapshotsTot.append(PDB) return avgStruct, snapshotsTot
def writeInitialStructures(centers_info, filename_template, topology=None): if topology is not None: topology_contents = utilities.getTopologyFile(topology) for cluster_num in centers_info: epoch_num, traj_num, snap_num = map( int, centers_info[cluster_num]['structure']) trajectory = glob.glob("%d/trajectory_%d*" % (epoch_num, traj_num))[0] snapshots = utilities.getSnapshots(trajectory, topology=topology) if isinstance(snapshots[0], basestring): with open(filename_template % cluster_num, "w") as fw: fw.write(snapshots[snap_num]) else: utilities.write_mdtraj_object_PDB(snapshots[snap_num], filename_template % cluster_num, topology_contents)
def generateConformations(resname, clAcc, trajectory, topology): if topology is None: topology_contents = None else: topology_contents = utilities.getTopologyFile(topology) if clAcc is None: for traj in trajectory: snapshots = utilities.getSnapshots(traj, topology=topology) for snapshot in snapshots: PDBobj = atomset.PDB() PDBobj.initialise(snapshot, resname=resname, topology=topology_contents) yield PDBobj else: for cluster in clAcc.clusters.clusters: yield cluster.pdb
def mapReference(ref, trajs, topology, topology_content): refPDB = atomset.PDB() refPDB.initialise(ref, type="PROTEIN", topology=topology_content) avgStruct = { atomID: refPDB.atoms[atomID].getAtomCoords() for atomID in refPDB.atoms } snapshotsTot = [] for traj in trajs: snapshots = utilities.getSnapshots(traj, topology=topology) for snapshot in snapshots: PDB = atomset.PDB() PDB.initialise(snapshot, type="PROTEIN", topology=topology_content) snapshotsTot.append(PDB) return avgStruct, snapshotsTot
def extractAvgPDB(trajs, topology, topology_content): nSnapshots = 0 avgStruct = {} snapshotsTot = [] for traj in trajs: snapshots = utilities.getSnapshots(traj, topology=topology) for snapshot in snapshots: nSnapshots += 1 PDB = atomset.PDB() PDB.initialise(snapshot, type="PROTEIN", topology=topology_content) snapshotsTot.append(PDB) for atomID, atom in PDB.atoms.items(): if atomID in avgStruct: avgStruct[atomID] += (atom.getAtomCoords() - avgStruct[atomID]) / nSnapshots else: avgStruct[atomID] = atom.getAtomCoords() return avgStruct, snapshotsTot
def write_snapshot(snap_num, trajectory, filename, topology=None, use_pdb=False): if not topology: snapshots = utilities.getSnapshots(trajectory, topology=topology, use_pdb=use_pdb) with open(filename, "w") as fw: fw.write(snapshots[snap_num]) else: splitTrajectory.main("", [ trajectory, ], topology, [snap_num + 1], template=filename, use_pdb=use_pdb)
def projectTICATrajs(folders, folderPath, ligand_resname, atomId, stride_conformations, nTICs, tica, writeFiles=True, topology=None): if writeFiles: utilities.makeFolder("tica_COM") trajsUniq = [] projectedUniq = [] for epoch in folders: trajFiles = glob.glob( os.path.join(folderPath, "%s/extractedCoordinates/coord*" % epoch)) trajFiles.sort(key=lambda x: int(x[x.rfind("_") + 1:-4])) for trajName in trajFiles: trajNum = int(trajName[trajName.rfind("_") + 1:-4]) trajFile = glob.glob( os.path.join(folderPath, "%s/trajectory_%d.*" % (epoch, trajNum)))[0] snapshotsPDB = utilities.getSnapshots(trajFile, topology=topology) trajCOM = [ get_coords(snapshot, atomId, ligand_resname) for snapshot in itertools.islice( snapshotsPDB, 0, None, stride_conformations) ] trajsUniq.append(trajCOM) trajLoad = np.loadtxt(trajName) if len(trajLoad.shape) == 1: trajLoad = trajLoad[np.newaxis, :] # De totes agafa les 3 primeres columnes totes les files projectedTraj = tica.transform( trajLoad[::stride_conformations])[:, :nTICs] projectedUniq.append(projectedTraj) if writeFiles: np.savetxt( "tica_COM/traj_%s_%d.dat" % (epoch, trajNum), np.hstack((np.array(trajCOM), projectedTraj)), header="COM coordinates x\ty\tz\t TICA coordinates\t" + "\t".join(["TICA %d" % tic for tic in range(nTICs)]) + "\n") return trajsUniq, projectedUniq
def main(pathway, pdb_with_connects, ligand_chain="L", outfilename=None): pdb = pm.PDB(pdb_with_connects, chain=ligand_chain) ligand = "".join(pdb.get_atoms_of_chain()) connects = "".join(pdb.read_conect()) ligand_with_connects = ligand + connects # Temporary file to extract the indexes of the ligand with open("ligand_with_connects_tmp.pdb", "w") as out: out.write(ligand_with_connects) pdb = pm.PDB(in_pdb="ligand_with_connects_tmp.pdb", chain=ligand_chain) names_dictionary = pdb.get_names_dictionary() os.remove("ligand_with_connects_tmp.pdb") pathway_snapshots = utilities.getSnapshots(pathway) new_pathway = [] sys.stderr.write("Rebuilding indexes...\n") for snap in pathway_snapshots: with open("snap_tmp.pdb", "w") as out: out.write(snap) pdb = pm.PDB(in_pdb="snap_tmp.pdb", chain=ligand_chain) os.remove("snap_tmp.pdb") ligand = pdb.get_atoms_of_chain() complex_no_ligand = sorted( list(set(pdb.read_atoms_section()) ^ set(ligand))) new_ligand = [] for index, name in names_dictionary.items(): for line in ligand: pdb_name = pm.get_atom_pdb_name_from_line(line).strip() if name == pdb_name: new_line = pm.set_index_to_line(line, index) new_ligand.append(new_line) new_ligand_with_connects = "".join(new_ligand) + connects new_complex = "".join(complex_no_ligand) + new_ligand_with_connects new_pathway.append(new_complex) sys.stderr.write("Writing connects...\n") if not outfilename: outfilename = "{}_connected.pdb".format(pathway.split(".pdb")[0]) if os.path.exists(outfilename): os.remove(outfilename) for n, model in enumerate(new_pathway): with open(outfilename, "a") as output: output.write("MODEL {}\n".format(n)) output.write(model) output.write("ENDMDL\n")
def writeCentersInfo(centersInfo, folderPath, ligand_resname, nTICs, numClusters, trajsUniq, clustersCentersFolder, nTraj, topology=None): if topology is not None: topology_contents = utilities.getTopologyFile(topology) else: topology_contents = None if not os.path.exists(clustersCentersFolder): os.makedirs(clustersCentersFolder) COM_list = [] for clusterNum in centersInfo: epoch, trajNum, snap = centersInfo[clusterNum]['structure'] COM_list.append(trajsUniq[int(epoch) * nTraj + (trajNum - 1)][snap]) # Accept non-pdb trajectories trajFile = glob.glob( os.path.join(folderPath, "%s/trajectory_%d.*" % (epoch, trajNum))) trajFile = trajFile[0] snapshots = utilities.getSnapshots(trajFile, topology=topology) pdb_object = atomset.PDB() pdb_object.initialise(snapshots[snap], resname=ligand_resname, topology=topology_contents) pdb_object.writePDB( str( os.path.join(str(clustersCentersFolder), "cluster_%d.pdb" % clusterNum))) distances = [[nC, centersInfo[nC]['minDist']] for nC in range(numClusters)] np.savetxt( os.path.join(clustersCentersFolder, "clusterDistances_%dcl_%dTICs.dat" % (numClusters, nTICs)), distances) utilities.write_PDB_clusters( COM_list, os.path.join(clustersCentersFolder, "clustersCenters_%dcl_%dTICs.pdb" % (numClusters, nTICs)))
def main(metricCol, lig_resname, nTrajs, stride, atomId, saving_frequency): folders = utilities.get_epoch_folders(".") box_center = None templateLine = "HETATM%s H BOX Z 501 %s%s%s 0.75%s H \n" for epoch in folders: print("Processing epoch %s" % epoch) data = [] confData = [] maxEpoch = -1 maxEpochCoords = None for iTraj in range(1, nTrajs): report = np.loadtxt("%s/report_%d" % (epoch, iTraj)) if len(report.shape) < 2: report = report[np.newaxis, :] maxTrajIndex = np.argmax(report[:, metricCol]) snapshots = utilities.getSnapshots("%s/trajectory_%d.pdb" % (epoch, iTraj)) for i, snapshot in enumerate(itertools.islice(snapshots, 0, None, stride)): report_line = i * stride * saving_frequency data.append(get_coords(snapshot, atomId, lig_resname) + [report[report_line, metricCol]]) confData.append((epoch, iTraj, report_line)) if report[maxTrajIndex, metricCol] > maxEpoch: maxEpoch = report[maxTrajIndex, metricCol] maxEpochCoords = get_coords(snapshots[maxTrajIndex], atomId, lig_resname) if box_center is None and iTraj == 1: box_center = data[0][:3] data = np.array(data) minInd = np.argmin(data[:, -1]) minMetric = data[minInd, -1] data[:, -1] -= minMetric utilities.write_PDB_clusters(data, title="epoch_%s.pdb" % epoch, use_beta=True) print("Max value for metric", maxEpoch, maxEpochCoords) with open("epoch_%s.pdb" % epoch, "a") as fa: fa.write("TER\n") serial = ("%d" % data.shape[0]).rjust(5) x = ("%.3f" % box_center[0]).rjust(8) y = ("%.3f" % box_center[1]).rjust(8) z = ("%.3f" % box_center[2]).rjust(8) g = ("%.2f" % 0).rjust(6) fa.write(templateLine % (serial, x, y, z, g)) box_center = maxEpochCoords
def extractAvgPDB(trajs, topology_content): nSnapshots = 0 avgStruct = {} snapshotsTot = [] for traj in trajs: snapshots = utilities.getSnapshots(traj) traj_num = utilities.getTrajNum(traj) epoch = int(os.path.abspath(traj).split(os.sep)[-2]) for snapshot in snapshots: nSnapshots += 1 PDB = atomset.PDB() PDB.initialise(snapshot, type="PROTEIN", topology=topology_content.getTopology( epoch, traj_num)) snapshotsTot.append(PDB) for atomID, atom in PDB.atoms.items(): if atomID in avgStruct: avgStruct[atomID] += (atom.getAtomCoords() - avgStruct[atomID]) / nSnapshots else: avgStruct[atomID] = atom.getAtomCoords() return avgStruct, snapshotsTot
os.makedirs(outputPath) else: outputPath = "" if topology is not None: topology_contents = utilities.getTopologyFile(topology) else: topology_contents = None sys.stderr.write("Reading clustering object...\n") cl = utilities.readClusteringObject(clusteringObject) if cl.conformationNetwork is None: sys.exit("Clustering object loaded has no conformation network!!") conf = cl.conformationNetwork filename = glob.glob(epoch+"/*traj*_%d*" % trajectory) snapshots = utilities.getSnapshots(filename[0], topology) snapshots = snapshots[:snapshot+1] if not isinstance(snapshots[0], basestring): new_snapshots = [] for snapshot in snapshots: PDB = atomset.PDB() PDB.initialise(snapshot, topology=topology_contents) new_snapshots.append(PDB.get_pdb_string()) snapshots = new_snapshots procMapping = open(os.path.join(epoch, "processorMapping.txt")).read().rstrip().split(',') leaf = procMapping[trajectory-1] pathway = conf.createPathwayToCluster(int(leaf)) cl.writePathwayTrajectory(pathway, outputPath+"pathway.pdb") with open(outputPath+"pathway.pdb", "a") as f: f.write("ENDMDL\n".join(snapshots))
topology = None filename = "conformation_data.dat" if not os.path.exists(filename): raise IOError( "File conformation_data.dat not found, please be sure to run extract_COM_metric.py before this script" ) print("Selected names: ", ' '.join(names)) with open(filename) as f: f.readline() for line in f: line = line.strip().split() if line[0] not in names: continue epoch, iTraj, nSnap = line[1:4] report = np.loadtxt("%s/report_%s" % (epoch, iTraj)) print(line[0], "=>", "epoch %s, trajectory %s, snapshot %s" % tuple(line[1:4]), "metric", report[int(nSnap), metricCol]) traj_file = glob.glob("%s/%s_%s*" % (epoch, traj_name, iTraj)) snapshots = utilities.getSnapshots(traj_file, topology=topology) if isinstance(snapshots[int(nSnap)], basestring): with open( output_folder + "conf_%s_%s_%s.pdb" % (epoch, iTraj, nSnap), "w") as fw: fw.write(snapshots[int(nSnap)]) else: utilities.write_mdtraj_object_PDB( snapshots[int(nSnap)], output_folder + "conf_%s_%s_%s.pdb" % (epoch, iTraj, nSnap), topology_contents)
def main(n_clusters, output_folder, SASAColumn, norm_energy, num_bins, percentile, plots, atom_Ids, folder_name, traj_basename, cluster_energy, topology=None): energyColumn = 3 if output_folder is not None: outputFolder = os.path.join(output_folder, "") if not os.path.exists(outputFolder): os.makedirs(outputFolder) else: outputFolder = "" extractCoords.main(folder_name, lig_resname=ligand_resname, non_Repeat=True, atom_Ids=atom_Ids) epochFolders = utilities.get_epoch_folders(folder_name) points = [] for epoch in epochFolders: report_files = glob.glob(os.path.join(epoch, "*report*")) report_files.sort(key=lambda x: int(x[x.rfind("_") + 1:])) for report_name in report_files: traj_num = int(report_name[report_name.rfind("_") + 1:]) coordinates = np.loadtxt( os.path.join( folder_name, "%s/extractedCoordinates/coord_%d.dat" % (epoch, traj_num))) report = np.loadtxt(report_name) if len(report.shape) < 2: points.append([ report[energyColumn], report[SASAColumn], int(epoch), traj_num, 0 ] + coordinates[1:].tolist()) else: epoch_line = np.array([int(epoch)] * report.shape[0]) traj_line = np.array([traj_num] * report.shape[0]) snapshot_line = np.array(range(report.shape[0])) points.extend( np.hstack( (report[:, (energyColumn, SASAColumn)], epoch_line[:, np.newaxis], traj_line[:, np.newaxis], snapshot_line[:, np.newaxis], coordinates[:, 1:]))) points = np.array(points) points = points[points[:, 1].argsort()] minSASA = points[0, 1] maxSASA = points[-1, 1] left_bins = np.linspace(minSASA, maxSASA, num=num_bins, endpoint=False) indices = np.searchsorted(points[:, 1], left_bins) thresholds = np.array([ np.percentile(points[i:j, 0], percentile) for i, j in zip(indices[:-1], indices[1:]) ]) new_points = [] occupation = [] for ij, (i, j) in enumerate(zip(indices[:-1], indices[1:])): found = np.where(points[i:j, 0] < thresholds[ij])[0] occupation.append(len(found)) if len(found) == 1: new_points.append(points[found + i]) elif len(found) > 1: new_points.extend(points[found + i]) points = np.array(new_points) if norm_energy: energyMin = points.min(axis=0)[0] points[:, 0] -= energyMin energyMax = points.max(axis=0)[0] points[:, 0] /= energyMax if cluster_energy: print("Clustering using energy and SASA") kmeans = KMeans(n_clusters=n_clusters).fit(points[:, :2]) title = "clusters_%d_energy_SASA.pdb" else: print("Clustering using ligand coordinates") kmeans = KMeans(n_clusters=n_clusters).fit(points[:, 5:8]) title = "clusters_%d_energy_SASA_coords.pdb" centers_energy = [] centers_coords = [] if topology is not None: topology_contents = utilities.getTopologyFile(topology) else: topology = None for i, center in enumerate(kmeans.cluster_centers_): if cluster_energy: dist = np.linalg.norm((points[:, :2] - center), axis=1) else: dist = np.linalg.norm((points[:, 5:8] - center), axis=1) epoch, traj, snapshot = points[dist.argmin(), 2:5] centers_energy.append(points[dist.argmin(), :2]) centers_coords.append(points[dist.argmin(), 5:8]) traj_file = glob.glob("%d/%s_%d*" % (epoch, traj_basename, traj))[0] conf = utilities.getSnapshots(traj_file, topology=topology)[int(snapshot)] if isinstance(conf, basestring): with open(os.path.join(outputFolder, "initial_%d.pdb" % i), "w") as fw: fw.write(conf) else: utilities.write_mdtraj_object_PDB( conf, os.path.join(outputFolder, "initial_%d.pdb" % i), topology_contents) centers_energy = np.array(centers_energy) centers_coords = np.array(centers_coords) writePDB(centers_coords, os.path.join(outputFolder, title % n_clusters)) if plots: plt.scatter(points[:, 1], points[:, 0], c=kmeans.labels_, alpha=0.5) plt.scatter(centers_energy[:, 1], centers_energy[:, 0], c=list(range(n_clusters)), marker='x', s=56, zorder=1) plt.xlabel("SASA") if norm_energy: plt.ylabel("Energy (normalized)") plt.savefig( os.path.join(outputFolder, "clusters_energy_normalized.png")) else: plt.ylabel("Energy (kcal/mol)") plt.savefig( os.path.join(outputFolder, "clusters_no_normalized.png")) plt.show()
def main(file_to_backtrack, results_path, outputPath, out_filename): """ :param file_to_backtrack: File of the selected_results folder that you want to backtrack. :type file_to_backtrack: str :param results_path: Path where the growing simulations are stored. :type results_path: str :param outputPath: Output folder path. :type outputPath: str :param out_filename: Output filename prefix. :type out_filename: str :return: None """ if outputPath is not None: outputPath = os.path.join(outputPath, "") if not os.path.exists(outputPath): os.makedirs(outputPath) else: outputPath = "" if os.path.exists(outputPath + out_filename): # If the specified name exists, append a number to distinguish the files name, ext = os.path.splitext(out_filename) out_filename = "".join([name, "_%d", ext]) i = 1 while os.path.exists(outputPath + out_filename % i): i += 1 out_filename %= i pathway = [] sys.stderr.write("Creating pathway...\n") # Get information of the input structure trajectory, snapshot, growing_id = extract_info_from_selected_file( path_to_selected_file=file_to_backtrack) # Obtain trajectory from sampling folder sampling_file_from = "sampling_result_{}/trajectory_{}.pdb".format( growing_id, trajectory) sampling_models = utilities.getSnapshots(sampling_file_from) # Get all snapshots from the input snapshot and add it to pathway snapshots = sampling_models[1:snapshot + 1] pathway.insert(0, snapshots) growing_epochs = glob.glob( os.path.join(results_path, "{}_growing_output*".format(growing_id))) if not growing_epochs: raise ValueError("Trajectory %s not found!" % os.path.join(results_path)) for n in range(len(growing_epochs) - 1, -1, -1): # Reading mapping information procMapping = open( os.path.join(results_path, "{}_growing_output{}".format(growing_id, n), "processorMapping.txt")).read().split(":") # Extract the spawning structure of the previous simulation cluster_from = procMapping[trajectory - 1].rstrip("\n") cluster_from_tup = literal_eval(cluster_from) # Update the trajectory and snapshot to be used trajectory = cluster_from_tup[1] snapshot = cluster_from_tup[2] # Extract the filename and the snapshot that has spawn filename = os.path.join( results_path, "{}_growing_output{}/trajectory_{}.pdb".format( growing_id, n, trajectory)) print(n, filename, trajectory, snapshot, procMapping) snapshots = utilities.getSnapshots(filename) if n == 0: initial = 0 else: initial = 1 # Take all MODELS from the snapshot snapshots = snapshots[initial:snapshot + 1] pathway.insert(0, snapshots) sys.stderr.write("Writing pathway...\n") with open(outputPath + out_filename, "a") as f: f.write("ENDMDL\n".join(itertools.chain.from_iterable(pathway)))