def checkIntegrityClusteringObject(objectPath): """ Test whether the found clustering object to reload is a valid object :param objectPath: Clustering object path :type objectPath: str :returns: bool -- True if the found clustering object is valid """ try: utilities.readClusteringObject(objectPath) return True except EOFError: return False
def main(epoch_num, trajectory, snapshot_num, resname, clustering_object, topology): calc = RMSDCalculator.RMSDCalculator() clustering_object = utilities.readClusteringObject(clustering_object) n_clusters = utilities.loadtxtfile( os.path.join(str(max(0, epoch_num - 1)), "clustering", "summary.txt")).shape[0] if topology is not None: topology_contents = utilities.getTopologyFile(topology) else: topology_contents = None filename = glob.glob( os.path.join(str(epoch_num), "*traj*_%d.*" % trajectory)) if not filename: raise ValueError( "No file with the specified epoch and trajectory found") try: snapshots = utilities.getSnapshots(filename[0], topology=topology)[snapshot_num] except IndexError: raise IndexError( "Snapshot number %d not found in trajectory %d for epoch %d, please check that the arguments provided are correct" % (snapshot_num, trajectory, epoch_num)) pdb = atomset.PDB() pdb.initialise(snapshots, resname=resname, topology=topology_contents) for i, cluster in enumerate(clustering_object[:n_clusters]): dist = calc.computeRMSD(pdb, cluster.pdb) if dist < cluster.threshold: print("Snapshot belongs to cluster", i) return print("Snapshot not assigned to any cluster! :(")
def main(plots, lagtimes_ITS, nRuns, nStates, plot_ITS, plot_CK, save_plot, path): if lagtimes_ITS is None: lagtimes_ITS = [1, 50, 100, 200, 400, 600, 800, 1000] MSM_object = estimate.MSM() MSM_object.lagtimes = lagtimes_ITS if plot_ITS and save_plot and not os.path.exists(os.path.join(path, "ck_plots")): os.makedirs(os.path.join(path, "ck_plots")) print("Analysing folder ", path) for i in range(nRuns): print("Plotting validity checks for run %d" % i) MSM = utilities.readClusteringObject(os.path.join(path, "MSM_object_%d.pkl" % i)) assert len(MSM.dtrajs_full) == len(MSM.dtrajs_active) MSM_object.MSM_object = MSM MSM_object.dtrajs = MSM.dtrajs_full if plot_ITS and (save_plot or plots): MSM_object._calculateITS() if save_plot: plt.savefig(os.path.join(path, "its_%d.png" % i)) if plot_CK and (save_plot or plots): # setting mlags to None chooses automatically the number of lagtimes # according to the longest trajectory available CK_test = MSM_object.MSM_object.cktest(nStates, mlags=None) mplt.plot_cktest(CK_test) if save_plot: plt.savefig(os.path.join(path, "ck_plots/", "ck_%d.png" % i)) if plots: plt.show() plt.close('all')
def writeStructures(clusteringObject, listStructures, checker=lambda x: True, outputPath="cluster.pdb"): """ Print all clusters in listStructures that meet the condition specified by the checker :param clusteringObject: Clustering object with clusters to print :type clusteringObject: :py:class:`.Clustering` :param checker: Lambda function with the checker that should evaluate to True for intersted structures :type checker: function :param outputPath: Output cluster pdb filename :type outputPath: str """ clObject = utilities.readClusteringObject(clusteringObject) nameStructure = os.path.splitext(outputPath) outputName = nameStructure[0] + '_%d' + nameStructure[1] path = os.path.split(outputName) pathToWrite = path[1] if path[0]: utilities.makeFolder(path[0]) pathToWrite = os.path.join(path[0], path[1]) if listStructures is None or len( listStructures) == 0: # If no listStructures, write all listStructures = range(len(clObject.clusters.clusters)) for element in listStructures: cluster = clObject.clusters.clusters[element] if checker is None or checker(cluster): print("Writing", pathToWrite % element) cluster.writePDB(pathToWrite % element)
def getWorkingClusteringObjectAndReclusterIfNecessary( firstRun, outputPathConstants, clusteringBlock, spawningParams, simulationRunner, topologies, processManager): """ It reads the previous clustering method, and, if there are changes, it reclusters the previous trajectories. Returns the clustering object to use :param firstRun: New epoch to run :type firstRun: int :param outputPathConstants: Contains outputPath-related constants :type outputPathConstants: :py:class:`.OutputPathConstants` :param clusteringBlock: Contains the new clustering block :type clusteringBlock: json :param spawningParams: Spawning params, to know what reportFile and column to read :type spawningParams: :py:class:`.SpawningParams` :param topologies: Topology object containing the set of topologies needed for the simulation :type topologies: :py:class:`.Topology` :param processManager: Object to synchronize the possibly multiple processes :type processManager: :py:class:`.ProcessesManager` :returns: :py:class:`.Clustering` -- The clustering method to use in the adaptive sampling simulation """ if not processManager.isMaster(): for ij in range(firstRun): topologies.readMappingFromDisk( outputPathConstants.epochOutputPathTempletized % ij, ij) return lastClusteringEpoch = firstRun - 1 clusteringObjectPath = outputPathConstants.clusteringOutputObject % ( lastClusteringEpoch) oldClusteringMethod = utilities.readClusteringObject(clusteringObjectPath) clusteringBuilder = clustering.ClusteringBuilder() clusteringMethod = clusteringBuilder.buildClustering( clusteringBlock, spawningParams.reportFilename, spawningParams.reportCol) clusteringMethod.setProcessors(simulationRunner.getWorkingProcessors()) if needToRecluster(oldClusteringMethod, clusteringMethod): utilities.print_unbuffered("Reclustering!") startTime = time.time() clusterPreviousEpochs(clusteringMethod, firstRun, outputPathConstants.epochOutputPathTempletized, simulationRunner, topologies, outputPathConstants.allTrajsPath) endTime = time.time() utilities.print_unbuffered("Reclustering took %s sec" % (endTime - startTime)) else: clusteringMethod = oldClusteringMethod clusteringMethod.setCol(spawningParams.reportCol) for ij in range(firstRun): topologies.readMappingFromDisk( outputPathConstants.epochOutputPathTempletized % ij, ij) return clusteringMethod
def getTopologyObject(topology_file): ext = utilities.getFileExtension(topology_file) if ext == ".pdb": return TopologyCompat(topology_file) elif ext == ".pkl": return utilities.readClusteringObject(topology_file) else: raise ValueError( "The topology parameter needs to be the path to a pickled Topology object or a pdb!" )
def main(args): # Parameters clusteringObj = utilities.readClusteringObject(args.clusteringObj) native = args.native pathwayFilename = args.pathwayFilename ntrajs = args.ntrajs threshold = args.threshold RMSDCalc = RMSDCalculator.RMSDCalculator(clusteringObj.symmetries) # use graph algorithm to establish a path initial_cluster = 0 final_cluster = getOptimalCluster(clusteringObj, native, RMSDCalc) distanceMatrix = createNetworkMatrix(clusteringObj, threshold, RMSDCalc) predecessors = obtainShortestPath(distanceMatrix) pathway = createPathway(initial_cluster, final_cluster, predecessors) print "Pathway clusters:" print pathway # write pathway into a single trajectory writePathwayTrajectory(clusteringObj, pathway, pathwayFilename, native) # create clustering object with only the pathway clusters ClPath = clustering.Clustering() ClPath.clusters.clusters = map( lambda x: clusteringObj.clusters.clusters[x], pathway) # spawning along the trajectory spawningParams = spawning.SpawningParams() densityCalculatorBuilder = densitycalculator.DensityCalculatorBuilder() densityCalculator = densityCalculatorBuilder.build({}) spawningPathway = spawning.InverselyProportionalToPopulationCalculator( densityCalculator) # Set a least 1 processors from the extrems of the path degeneracies = spawningPathway.calculate(ClPath.clusters.clusters, ntrajs - 2, spawningParams) degeneracies[0] += 1 degeneracies[-1] += 1 print "degeneracies over pathway:" print degeneracies print ""
import matplotlib.pyplot as plt plt.style.use("ggplot") def reward_new(x, rews): return -(x * rews).sum() def reward(x, rews): return -(x[:, np.newaxis] * rews).sum() folders = utilities.get_epoch_folders(".") for folder in folders[::-1]: if os.path.exists(folder + "/clustering/object.pkl"): cl_object = utilities.readClusteringObject(folder + "/clustering/object.pkl") break # first_cluster = 0 trajToDivide = 144 * 2 rewardsEvol = [] weightsEvol = [] weightsEvol_new = [] weights = None weights_new = None metricInd = 4 labels = ["TE", "RMSD", "BE", "SASA"] plots = True for folder in folders[10:]: print("") print("Epoch", folder) summary = np.loadtxt(folder + "/clustering/summary.txt")
T = 300 gpmf = -kb * T * np.log(dist / volume) gpmf -= gpmf.min() if node is not None: np.savetxt("GPMF/gmpf_%d.dat" % node, gpmf) return gpmf.max() filename = "differences_400_gpmf.dat" if os.path.exists(filename): differences = np.loadtxt(filename) plt.plot(differences) plt.show() sys.exit() m1 = utilities.readClusteringObject("MSM_object_0.pkl") # m2 = utilities.readClusteringObject("/home/jgilaber/3PTB_free_energies/3ptb_PELE_short_steps/run2/400/MSM_0/MSM_object_0.pkl") vol1 = np.loadtxt("volumeOfClusters_0.dat") vol2 = np.loadtxt( "/home/jgilaber/3PTB_free_energies/3ptb_PELE_short_steps/run2/400/MSM_0/volumeOfClusters_0.dat" ) if len(m1.active_set) != 100: c1 = m1.count_matrix_full + 1 / float(100) # trans = run.buildRevTransitionMatrix(c1) # eiv, eic = run.getSortedEigen(trans) # pi = run.getStationaryDistr(eic[:,0]) else: # pi = np.loadtxt("stationaryDistribution.dat") c1 = m1.count_matrix_full
from builtins import range import networkx as nx from AdaptivePELE.utilities import utilities from AdaptivePELE.atomset import RMSDCalculator def weight(pathway, confs): w = 0 for j in range(1, len(path)): w += confs[pathway[j - 1]][path[j]]['metric'] return w metricCol = 4 RMSDCalc = RMSDCalculator.RMSDCalculator() cl = utilities.readClusteringObject("ClCont.pkl") nodeFin = cl.getOptimalMetric(column=metricCol) conf = nx.DiGraph() nx.read_edgelist("conformationNetwork_4DAJ.edgelist", create_using=conf, data=True, nodetype=int) for source, target in conf.edges_iter(): clusterSource = cl.getCluster(source) clusterTarget = cl.getCluster(target) conf[source][target]['metric'] = RMSDCalc.computeRMSD( clusterSource.pdb, clusterTarget.pdb) # paths = nx.all_simple_paths(conf, 0, nodeFin) paths = nx.shortest_simple_paths(conf, 0, nodeFin, weight='metric') for i, path in enumerate(paths, start=1):
cluster_center = clustering.getCluster(cluster) RMSD_cluster = 0 n = 0 for cl in np.where(model.labels_ == i)[0]: if cluster == cl: continue else: RMSD_cluster += RMSDCalc.computeRMSD(clustering.getCluster(cl).pdb, cluster_center.pdb) n += 1 RMSD_array.append(RMSD_cluster/float(n)) return RMSD_array n_clusters, clustering_object, output = parseArgs() print("Reading clustering object") cluster_object = clu.readClusteringObject(clustering_object) clusters = [cl.pdb.getCOM() for cl in cluster_object.clusterIterator()] clusters_pop = np.array([cl.elements for cl in cluster_object.clusterIterator()]) clusters_contacts = np.array([cl.contacts for cl in cluster_object.clusterIterator()]) COMArray = np.array(clusters) print("Number of adaptive clusters", len(clusters)) model = KMeans(n_clusters=n_clusters) print("Reclustering") model.fit(clusters) # RMSDCalc = RMSDCalculator.RMSDCalculator() # clusters_first = first_cluster_as_representative(model) # clusters_pop = most_populated_cluster_as_representative(model, clusters_pop, n_clusters) # distances_first = calculate_intercluster_distance(cluster_object, model, clusters_first, RMSDCalc) # distances_pop = calculate_intercluster_distance(cluster_object, model, clusters_pop, RMSDCalc) # print("Total intercluster distance first", sum(distances_first))
def readClustering(clusteringPath, metricCol): print("Reading clustering object...") clustering = utilities.readClusteringObject(clusteringPath) network = clustering.conformationNetwork.network metrics = [cl.metrics[metricCol] for cl in clustering.clusterIterator()] return clustering, network, metrics
def main(nEigenvectors, nRuns, m, outputFolder, plotEigenvectors, plotGMRQ, plotPMF, clusters, lagtimes, native, save_plots, showPlots, filtered, destFolder, resname, plotTransitions=True): minPos = get_min_Pos(native, resname) if save_plots and outputFolder is None: outputFolder = "plots_MSM" eigenPlots = os.path.join(outputFolder, "eigenvector_plots") GMRQPlots = os.path.join(outputFolder, "GMRQ_plots") PMFPlots = os.path.join(outputFolder, "PMF_plots") TransitionPlots = os.path.join(outputFolder, "transitions") if save_plots and not os.path.exists(outputFolder): os.makedirs(outputFolder) if filtered is not None: filter_str = "_filtered" else: filter_str = "" if plotEigenvectors and save_plots and not os.path.exists(eigenPlots): os.makedirs(eigenPlots) if plotGMRQ and save_plots and not os.path.exists(GMRQPlots): os.makedirs(GMRQPlots) if plotPMF and save_plots and not os.path.exists(PMFPlots): os.makedirs(PMFPlots) if plotTransitions and save_plots and not os.path.exists(TransitionPlots): os.makedirs(TransitionPlots) minPos = np.array(minPos) GMRQValues = {} print("Running from " + destFolder) if plotGMRQ: GMRQValues = [] if not os.path.exists(os.path.join(destFolder, "eigenvectors")): os.makedirs(os.path.join(destFolder, "eigenvectors")) for i in range(nRuns): titleVar = "%s, run %d" % (destFolder, i) if plotGMRQ or plotEigenvectors: msm_object = utilities.readClusteringObject( os.path.join(destFolder, "MSM_object_%d.pkl" % i)) if plotGMRQ: GMRQValues.append(np.sum(msm_object.eigenvalues()[:m])) if plotEigenvectors or plotPMF: clusters = np.loadtxt( os.path.join(destFolder, "clusterCenters_%d.dat" % i)) distance = np.linalg.norm(clusters - minPos, axis=1) volume = np.loadtxt( os.path.join(destFolder, "volumeOfClusters_%d.dat" % i)) print("Total volume for system %s , run %d" % (destFolder, i), volume.sum()) if filtered is not None: volume = volume[filtered] clusters = clusters[filtered] distance = distance[filtered] if plotEigenvectors: if clusters.size != msm_object.stationary_distribution.size: mat = computeDeltaG.reestimate_transition_matrix( msm_object.count_matrix_full) else: mat = msm_object.transition_matrix _, _, L = rdl_decomposition(mat) figures = [] axes = [] for _ in range((nEigenvectors - 1) // 4 + 1): f, axarr = plt.subplots(2, 2, figsize=(12, 12)) f.suptitle(titleVar) figures.append(f) axes.append(axarr) for j, row in enumerate(L[:nEigenvectors]): pdb_filename = os.path.join(destFolder, "eigenvectors", "eigen_%d_run_%d.pdb" % (j + 1, i)) if j: atomnames = utilities.getAtomNames( utilities.sign(row, tol=1e-3)) utilities.write_PDB_clusters(clusters, use_beta=False, elements=atomnames, title=pdb_filename) else: utilities.write_PDB_clusters(np.vstack( (clusters.T, row)).T, use_beta=True, elements=None, title=pdb_filename) if filtered is not None: row = row[filtered] np.savetxt( os.path.join( destFolder, "eigenvectors", "eigen_%d_run_%d%s.dat" % (j + 1, i, filter_str)), row) axes[j // 4][(j // 2) % 2, j % 2].scatter(distance, row) axes[j // 4][(j // 2) % 2, j % 2].set_xlabel("Distance to minimum") axes[j // 4][(j // 2) % 2, j % 2].set_ylabel("Eigenvector %d" % (j + 1)) if save_plots: for j, fg in enumerate(figures): fg.savefig( os.path.join( eigenPlots, "eigenvector_%d_run_%d%s.png" % (j + 1, i, filter_str))) plt.figure() plt.scatter(distance, L[0]) plt.xlabel("Distance to minimum") plt.ylabel("Eigenvector 1") plt.savefig( os.path.join( eigenPlots, "eigenvector_1_alone_run_%d%s.png" % (i, filter_str))) if plotPMF: data = np.loadtxt(os.path.join(destFolder, "pmf_xyzg_%d.dat" % i)) g = data[:, -1] if filtered is not None: g = g[filtered] print("Clusters with less than 2 PMF:") print(" ".join(map(str, np.where(g < 2)[0]))) print("") plt.figure() plt.title("%s" % (destFolder)) plt.scatter(distance, g) plt.xlabel("Distance to minima") plt.ylabel("PMF") if save_plots: plt.savefig( os.path.join(PMFPlots, "pmf_run_%d%s.png" % (i, filter_str))) if plotGMRQ: for t in GMRQValues: plt.figure() plt.title("%s" % (destFolder)) plt.xlabel("Number of states") plt.ylabel("GMRQ") plt.boxplot(GMRQValues) if save_plots: plt.savefig(os.path.join(GMRQPlots, "GMRQ.png" % t)) if plotTransitions: sasas = [] for file in glob.glob("*/repor*"): sasas.extend( pd.read_csv(file, sep=' ', engine='python')["sasaLig"].values) sasas = np.array(sasas) plt.figure() plt.title("%s" % (destFolder)) plt.xlabel("SASA") plt.ylabel("Transition Counts") plt.hist(sasas, 50, alpha=0.75) if save_plots: plt.savefig(os.path.join(TransitionPlots, "transition_hist.png")) if showPlots and (plotEigenvectors or plotGMRQ or plotPMF): plt.show()
parser.add_argument("-c", "--cond", type=str, default="min", help="Condition on the metric optimality, options are max or min") parser.add_argument("-b", "--bindEn", type=int, default=None, help="Column of the binding energy in the report file") args = parser.parse_args() return args.clusteringObject, args.suffix, args.metricCol, args.o, args.cond, args.bindEn if __name__ == "__main__": clusteringObject, suffix, metricCol, outputPath, metricOptimization, bindingEnergy = parseArguments() if outputPath is not None: outputPath = os.path.join(outputPath, "") if not os.path.exists(outputPath): os.makedirs(outputPath) else: outputPath = "" sys.stderr.write("Reading clustering object...\n") cl = utilities.readClusteringObject(clusteringObject) if cl.conformationNetwork is None: sys.exit("Clustering object loaded has no conformation network!!") conf = cl.conformationNetwork optimalCluster = cl.getOptimalMetric(metricCol, simulationType=metricOptimization) pathway = conf.createPathwayToCluster(optimalCluster) if not os.path.exists(outputPath+"conformationNetwork%s.edgelist" % suffix): sys.stderr.write("Writing conformation network...\n") conf.writeConformationNetwork(outputPath+"conformationNetwork%s.edgelist" % suffix) if not os.path.exists(outputPath+"FDT%s.edgelist" % suffix): sys.stderr.write("Writing FDT...\n") conf.writeFDT(outputPath+"FDT%s.edgelist" % suffix) if not os.path.exists(outputPath+"pathwayFDT%s.pdb" % suffix): sys.stderr.write("Writing pathway to optimal cluster...\n") # cl.writePathwayOptimalCluster(outputPath+"pathwayFDT%s.pdb" % suffix) cl.writePathwayTrajectory(pathway, outputPath+"pathwayFDT%s.pdb" % suffix)
def main(path, native, resname, nEigen, path_sasa): # Z = np.array([[4380, 153, 15, 2, 0, 0], [211, 4788, 1, 0, 0, 0], [169, 1, 4604, 226, 0, 0], # [3, 13, 158, 4823, 3, 0], [0, 0, 0, 4, 4978, 18], [7, 5, 0, 0, 62, 4926]]) MSM = utilities.readClusteringObject(os.path.join(path, "MSM_object_0.pkl")) Z = MSM.count_matrix_full np.set_printoptions(precision=4) m = 100 k = Z.shape[0] alpha = 1/float(k) U_counts = Z + alpha w = U_counts.sum(axis=1) P_rev = utils.buildRevTransitionMatrix(U_counts) P = U_counts / w[:, np.newaxis] P = P_rev eigvalues, _ = np.linalg.eig(P) eigvalues.sort() eigvalues = eigvalues[::-1] eigvalues_rev, _ = np.linalg.eig(P_rev) eigvalues_rev.sort() eigvalues_rev = eigvalues_rev[::-1] ek = np.zeros(k) ek[k-1] = 1.0 variance = [] contribution = [] nEigs = 10 for index in range(1, nEigs): A = P - eigvalues[index]*np.eye(k) q = calculate_q(A, P, k, ek) score = (q/(w+1))-(q/(w+1+m)) norm_q = q/q.sum() contribution.append(score/score.sum()) variance.append(norm_q) sasa = getSASAvalues(os.path.join(path, "representative_structures", "representative_structures_0.dat"), 4, path_sasa) pdb_native = atomset.PDB() pdb_native.initialise(u"%s" % native, resname=resname) minim = pdb_native.getCOM() clusters = np.loadtxt(os.path.join(path, "clusterCenters_0.dat")) distance = np.linalg.norm(clusters-minim, axis=1) variance = np.array(variance[:nEigen]) variance = variance.sum(axis=0) variance /= variance.sum() print(variance) # states = variance.argsort()[-1:-10:-1] # print(" ".join(["structures/cluster_%d.pdb" % st for st in states])) f, axarr = plt.subplots(1, 2) axarr[0].scatter(distance, variance) axarr[0].set_xlabel("Distance to minimum") axarr[0].set_ylabel("Variance") axarr[1].scatter(sasa, variance) axarr[1].set_xlabel("SASA") axarr[0].set_ylabel("Variance") f.suptitle("Variance for eigenvalues 2-%d" % (nEigen+1)) # for ind, var in enumerate(variance[:5]): # f, axarr = plt.subplots(1, 2) # axarr[0].scatter(distance, var) # axarr[0].set_xlabel("Distance to minimum") # axarr[0].set_ylabel("Variance") # axarr[1].scatter(sasa, var) # axarr[1].set_xlabel("SASA") # axarr[0].set_ylabel("Variance") # f.suptitle("Variance for eigenvalue %d" % (ind+2)) plt.show()
parser = argparse.ArgumentParser(description=desc) parser.add_argument("path", type=str, help="Path to the simulation") parser.add_argument("resname", type=str, help="Resname in the pdb") parser.add_argument("nEpochs", type=int, help="Number of epochs to cluster") parser.add_argument("ntrajs", type=int, help="Number of trajectories per epoch") parser.add_argument("--altSel", action="store_true", help="Whether to use alternative selection") parser.add_argument("--writeClusters", action="store_true", help="Whether to write pdb structures for the clusters") args = parser.parse_args() return args.path, args.resname, args.nEpochs, args.ntrajs, args.altSel, args.writeClusters if __name__ == "__main__": simulation_path, resname, nEpochs, ntrajs, altSel, writeClusters = parseArguments() contactThresholdDistance = 8 altSel = False top_object = utilities.readClusteringObject("%s/topologies/topologies.pkl" % simulation_path) thresholdCalculatorBuilder = thresholdcalculator.ThresholdCalculatorBuilder() thresholdCalculator = thresholdCalculatorBuilder.build({ "thresholdCalculator": { "type": "heaviside", "params": { "values": [2, 3, 4, 5], "conditions": [1, 0.75, 0.5] } } }) thresholdCalculator = thresholdCalculatorBuilder.build({ "thresholdCalculator": { "type": "constant", "params": {
snapshots = utilities.getSnapshots(traj, topology=topology) for snapshot in snapshots: PDBobj = atomset.PDB() PDBobj.initialise(snapshot, resname=resname, topology=topology_contents) yield PDBobj else: for cluster in clAcc.clusters.clusters: yield cluster.pdb if __name__ == "__main__": traj_name, clustering, nRes, lig_resname, contactThreshold, top = parseArguments() if clustering is None: clusterAcc = None else: clusetrAcc = utilities.readClusteringObject(clustering) totalAcc = [] symEval = SymmetryContactMapEvaluator.SymmetryContactMapEvaluator() refPDB = None for pdb in generateConformations(lig_resname, clusetrAcc, traj_name, top): if refPDB is None: refPDB = pdb contactMap, foo = symEval.createContactMap(pdb, lig_resname, contactThreshold) if len(totalAcc): totalAcc += contactMap.sum(axis=0, dtype=bool).astype(int) else: totalAcc = contactMap.sum(axis=0, dtype=bool).astype(int) proteinList = symEval.proteinList
def main(nEigenvectors, nRuns, m, outputFolder, plotEigenvectors, plotGMRQ, plotPMF, clusters, lagtimes, minPos, save_plots, showPlots, filtered, destFolder, sasa_col, path_to_report): if save_plots and outputFolder is None: outputFolder = "plots_MSM" if outputFolder is not None: eigenPlots = os.path.join(outputFolder, "eigenvector_plots") GMRQPlots = os.path.join(outputFolder, "GMRQ_plots") PMFPlots = os.path.join(outputFolder, "PMF_plots") if save_plots and not os.path.exists(outputFolder): os.makedirs(outputFolder) if filtered is not None: filter_str = "_filtered" else: filter_str = "" if plotEigenvectors and save_plots and not os.path.exists(eigenPlots): os.makedirs(eigenPlots) if plotGMRQ and save_plots and not os.path.exists(GMRQPlots): os.makedirs(GMRQPlots) if plotPMF and save_plots and not os.path.exists(PMFPlots): os.makedirs(PMFPlots) minPos = np.array(minPos) GMRQValues = {} print("Running from", destFolder) if plotGMRQ: GMRQValues = [] if not os.path.exists(os.path.join(destFolder, "eigenvectors")): os.makedirs(os.path.join(destFolder, "eigenvectors")) for i in range(nRuns): if sasa_col is not None: representatives_files = os.path.join( destFolder, "representative_structures/representative_structures_%d.dat" % i) sasa = getSASAvalues(representatives_files, sasa_col, path_to_report) titleVar = "%s, run %d" % (destFolder, i) if plotGMRQ or plotEigenvectors: msm_object = utilities.readClusteringObject( os.path.join(destFolder, "MSM_object_%d.pkl" % i)) if plotGMRQ: GMRQValues.append(np.sum(msm_object.eigenvalues()[:m])) if plotEigenvectors or plotPMF: clusters = np.loadtxt( os.path.join(destFolder, "clusterCenters_%d.dat" % i)) distance = np.linalg.norm(clusters - minPos, axis=1) volume = np.loadtxt( os.path.join(destFolder, "volumeOfClusters_%d.dat" % i)) print("Total volume for system %s , run %d" % (destFolder, i), volume.sum()) if filtered is not None: volume = volume[filtered] clusters = clusters[filtered] distance = distance[filtered] if sasa_col is not None: sasa = sasa[filtered] if plotEigenvectors: if clusters.size != msm_object.stationary_distribution.size: mat = computeDeltaG.reestimate_transition_matrix( msm_object.count_matrix_full) else: mat = msm_object.transition_matrix _, _, L = rdl_decomposition(mat) figures = [] axes = [] for _ in range((nEigenvectors - 1) // 4 + 1): f, axarr = plt.subplots(2, 2, figsize=(12, 12)) f.suptitle(titleVar) figures.append(f) axes.append(axarr) for j, row in enumerate(L[:nEigenvectors]): pdb_filename = os.path.join(destFolder, "eigenvectors", "eigen_%d_run_%d.pdb" % (j + 1, i)) if j: atomnames = utilities.getAtomNames( utilities.sign(row, tol=1e-3)) utilities.write_PDB_clusters(clusters, use_beta=False, elements=atomnames, title=pdb_filename) else: utilities.write_PDB_clusters(np.vstack( (clusters.T, row)).T, use_beta=True, elements=None, title=pdb_filename) if filtered is not None: row = row[filtered] np.savetxt( os.path.join( destFolder, "eigenvectors", "eigen_%d_run_%d%s.dat" % (j + 1, i, filter_str)), row) axes[j // 4][(j // 2) % 2, j % 2].scatter(distance, row) axes[j // 4][(j // 2) % 2, j % 2].set_xlabel("Distance to minimum") axes[j // 4][(j // 2) % 2, j % 2].set_ylabel("Eigenvector %d" % (j + 1)) Q = msm_object.count_matrix_full.diagonal( ) / msm_object.count_matrix_full.sum() plt.figure() plt.scatter(distance, Q) plt.xlabel("Distance to minimum") plt.ylabel("Metastability") if save_plots: plt.savefig( os.path.join(eigenPlots, "Q_run_%d%s.png" % (i, filter_str))) if save_plots: for j, fg in enumerate(figures): fg.savefig( os.path.join( eigenPlots, "eigenvector_%d_run_%d%s.png" % (j + 1, i, filter_str))) plt.figure() plt.scatter(distance, L[0]) plt.xlabel("Distance to minimum") plt.ylabel("Eigenvector 1") plt.savefig( os.path.join( eigenPlots, "eigenvector_1_alone_run_%d%s.png" % (i, filter_str))) if plotPMF: data = np.loadtxt(os.path.join(destFolder, "pmf_xyzg_%d.dat" % i)) g = data[:, -1] annotations = ["Cluster %d" % i for i in range(g.size)] if filtered is not None: g = g[filtered] annotations = np.array(annotations)[filtered].tolist() print("Clusters with less than 2 PMF:") print(" ".join(map(str, np.where(g < 2)[0]))) print("") fig_pmf, axarr = plt.subplots(2, 2, figsize=(12, 12)) fig_pmf.suptitle(titleVar) sc1 = axarr[1, 0].scatter(distance, g) sc2 = axarr[0, 1].scatter(distance, volume) sc3 = axarr[0, 0].scatter(g, volume) axes = [axarr[0, 1], axarr[1, 0], axarr[0, 0]] scs = [sc2, sc1, sc3] if sasa_col is not None: axarr[1, 1].scatter(sasa, g) axarr[1, 0].set_xlabel("Distance to minima") axarr[1, 0].set_ylabel("PMF") axarr[0, 1].set_xlabel("Distance to minima") axarr[0, 1].set_ylabel("Volume") axarr[0, 0].set_xlabel("PMF") axarr[0, 0].set_ylabel("Volume") annot1 = axarr[1, 0].annotate("", xy=(0, 0), xytext=(20, 20), textcoords="offset points", bbox=dict(boxstyle="round", fc="w"), arrowprops=dict(arrowstyle="->")) annot1.set_visible(False) annot2 = axarr[0, 1].annotate("", xy=(0, 0), xytext=(20, 20), textcoords="offset points", bbox=dict(boxstyle="round", fc="w"), arrowprops=dict(arrowstyle="->")) annot2.set_visible(False) annot3 = axarr[0, 0].annotate("", xy=(0, 0), xytext=(20, 20), textcoords="offset points", bbox=dict(boxstyle="round", fc="w"), arrowprops=dict(arrowstyle="->")) annot3.set_visible(False) annot_list = [annot2, annot1, annot3] if sasa_col is not None: axarr[1, 1].set_xlabel("SASA") axarr[1, 1].set_ylabel("PMF") if save_plots: fig_pmf.savefig( os.path.join(PMFPlots, "pmf_run_%d%s.png" % (i, filter_str))) if plotGMRQ: for t in GMRQValues: plt.figure() plt.title("%s" % (destFolder)) plt.xlabel("Number of states") plt.ylabel("GMRQ") plt.boxplot(GMRQValues) if save_plots: plt.savefig(os.path.join(GMRQPlots, "GMRQ.png" % t)) if showPlots and (plotEigenvectors or plotGMRQ or plotPMF): if plotPMFs: def update_annot(ind, sc, annot): """Update the information box of the selected point""" pos = sc.get_offsets()[ind["ind"][0]] annot.xy = pos annot.set_text(annotations[int(ind["ind"][0])]) # annot.get_bbox_patch().set_facecolor(cmap(norm( z_values[ind["ind"][0]]))) def hover(event): """Action to perform when hovering the mouse on a point""" # vis = any([annot.get_visible() for annot in annot_list]) for i, ax_comp in enumerate(axes): vis = annot_list[i].get_visible() if event.inaxes == ax_comp: for j in range(len(axes)): if j != i: annot_list[j].set_visible(False) cont, ind = scs[i].contains(event) if cont: update_annot(ind, scs[i], annot_list[i]) annot_list[i].set_visible(True) fig_pmf.canvas.draw_idle() else: if vis: annot_list[i].set_visible(False) fig_pmf.canvas.draw_idle() fig_pmf.canvas.mpl_connect("motion_notify_event", hover) plt.show()