def main(argv=None): if(argv==None): argv = sys.argv options = options_desc.parse_args(argv)[0] assert(not(options.refine_all and options.extend_all)) pool = Pool() needy_nodes = pool.where("isa_partition and is_sampled").multilock() # 1. Trying to detect fake convergence for n in pool.where("state == 'converged'"): means = kmeans(n.trajectory, k=2) d = (means[0] - means[1]).norm2() if(d > 2.0 and (options.refine_all or userinput("%s has converged but appears to have a bimodal distribution.\nDo you want to refine?"%n.name, "bool"))): #TODO decide upon threshold (per coordinate?) refine(n, options) # 2. Dealing with not-converged nodes for n in pool.where("state == 'not-converged'"): if(not(options.refine_all or options.extend_all)): choice = userchoice("%s has not converged. What do you want to do?"%n.name, ['_refine', '_extend', '_ignore']) if(options.refine_all or choice=="r"): refine(n, options) elif(options.extend_all or choice=="e"): extend(n) elif(choice=="i"): continue for n in needy_nodes: n.save() n.unlock() zgf_setup_nodes.main() zgf_grompp.main() zgf_cleanup.main()
def main(): options = options_desc.parse_args(sys.argv)[0] zgf_cleanup.main() pool = Pool() not_reweightable = "isa_partition and state not in ('converged'" if (options.ignore_convergence): not_reweightable += ",'not-converged'" if (options.ignore_failed): not_reweightable += ",'mdrun-failed'" not_reweightable += ")" if pool.where(not_reweightable): print "Pool can not be reweighted due to the following nodes:" for bad_guy in pool.where(not_reweightable): print "Node %s with state %s." % (bad_guy.name, bad_guy.state) sys.exit("Aborting.") active_nodes = pool.where("isa_partition and state != 'mdrun-failed'") assert (len(active_nodes) == len(active_nodes.multilock()) ) # make sure we lock ALL nodes if (options.check_restraint): for n in active_nodes: check_restraint_energy(n) if (options.method == "direct"): reweight_direct(active_nodes, options) elif (options.method == "entropy"): reweight_entropy(active_nodes, options) elif (options.method == "presampling"): reweight_presampling(active_nodes, options) else: raise (Exception("Method unkown: " + options.method)) weight_sum = np.sum([n.tmp['weight'] for n in active_nodes]) print "Thermodynamic weights calculated by method '%s':" % options.method for n in active_nodes: n.obs.weight_direct = n.tmp['weight'] / weight_sum if (options.method == "direct"): print( " %s with mean_V: %f [kJ/mol], %d refpoints and weight: %f" % (n.name, n.obs.mean_V, n.tmp['n_refpoints'], n.obs.weight_direct)) else: print(" %s with A: %f [kJ/mol] and weight: %f" % (n.name, n.obs.A, n.obs.weight_direct)) print "The above weighting uses bonded energies='%s' and nonbonded energies='%s'." % ( options.e_bonded, options.e_nonbonded) for n in active_nodes: n.save() active_nodes.unlock()
def main(): options = options_desc.parse_args(sys.argv)[0] zgf_cleanup.main() pool = Pool() #not_reweightable = "state not in ('refined','converged')" not_reweightable = "isa_partition and state!='converged'" if options.ignore_convergence: not_reweightable = "isa_partition and state not in ('converged','not-converged')" if pool.where(not_reweightable): print "Pool can not be reweighted due to the following nodes:" for bad_guy in pool.where(not_reweightable): print "Node %s with state %s."%(bad_guy.name, bad_guy.state) sys.exit("Aborting.") active_nodes = pool.where("isa_partition") assert(len(active_nodes) == len(active_nodes.multilock())) # make sure we lock ALL nodes for n in active_nodes: check_restraint_energy(n) # find out about number of energygrps mdp_file = gromacs.read_mdp_file(pool.mdp_fn) energygrps = [str(egrp) for egrp in re.findall('[\S]+', mdp_file["energygrps"])] moi_energies = True if len(energygrps) < 2: moi_energies = False # Gromacs energies are named differently when there are less than two energygrps :( if(options.method == "direct"): reweight_direct(active_nodes, moi_energies, options.sol_energy, options.save_refpoints) elif(options.method == "entropy"): reweight_entropy(active_nodes, moi_energies, options.sol_energy, options.save_refpoints) elif(options.method == "presampling"): reweight_presampling(active_nodes, options.presamp_temp, moi_energies, options.sol_energy) else: raise(Exception("Method unkown: "+options.method)) weight_sum = np.sum([n.tmp['weight'] for n in active_nodes]) print "Thermodynamic weights calculated by method '%s' (sol-energy=%s):"%(options.method, options.sol_energy) for n in active_nodes: n.obs.weight_direct = n.tmp['weight'] / weight_sum if(options.method == "direct"): print(" %s with mean_V: %f [kJ/mol], %d refpoints and weight: %f" % (n.name, n.obs.mean_V, n.tmp['n_refpoints'], n.obs.weight_direct)) else: print(" %s with A: %f [kJ/mol] and weight: %f" % (n.name, n.obs.A, n.obs.weight_direct)) for n in active_nodes: n.save() active_nodes.unlock()
def main(): options = options_desc.parse_args(sys.argv)[0] #TODO put somehow into Options, e.g. min_value=1 or required=True if(not options.doomed_nodes): sys.exit("Option --doomed_nodes is required.") pool = Pool() old_pool_size = len(pool) old_alpha = pool.alpha doomed_nodes = NodeList() #TODO: maybe this code should go into ZIBMolPy.ui for name in options.doomed_nodes.split(","): found = [n for n in pool if n.name == name] if(len(found) != 1): sys.exit("Coult not find node '%s'"%(name)) doomed_nodes.append(found[0]) for n in doomed_nodes: if(n == pool.root): sys.exit("Node %s is the root. Removal not allowed."%(n.name)) #if(len(n.children) > 0): # sys.exit("Node %s has children. Removal not allowed."%(n.name)) #TODO why should we forbid this? if not(userinput("The selected node(s) will be removed permanently. Continue?", "bool")): sys.exit("Quit by user.") assert(len(doomed_nodes) == len(doomed_nodes.multilock())) for n in doomed_nodes: print("Removing directory: "+n.dir) shutil.rmtree(n.dir) pool.reload_nodes() #TODO: this code-block also exists in zgf_create_node if(len(pool.where("isa_partition")) < 2): pool.alpha = None elif(options.methodalphas == "theta"): pool.alpha = zgf_create_nodes.calc_alpha_theta(pool) elif(options.methodalphas == "user"): pool.alpha = userinput("Please enter a value for alpha", "float") else: raise(Exception("Method unkown: "+options.methodalphas)) pool.history.append({'removed_nodes': [(n.name, n.state) for n in doomed_nodes], 'size':old_pool_size, 'alpha':old_alpha, 'timestamp':datetime.now()}) pool.save() #TODO: deal with analysis dir and dependencies zgf_cleanup.main()
def main(): options = options_desc.parse_args(sys.argv)[0] zgf_cleanup.main() pool = Pool() not_reweightable = "isa_partition and state not in ('converged'" if(options.ignore_convergence): not_reweightable += ",'not-converged'" if(options.ignore_failed): not_reweightable += ",'mdrun-failed'" not_reweightable += ")" if pool.where(not_reweightable): print "Pool can not be reweighted due to the following nodes:" for bad_guy in pool.where(not_reweightable): print "Node %s with state %s."%(bad_guy.name, bad_guy.state) sys.exit("Aborting.") active_nodes = pool.where("isa_partition and state != 'mdrun-failed'") assert(len(active_nodes) == len(active_nodes.multilock())) # make sure we lock ALL nodes if(options.check_restraint): for n in active_nodes: check_restraint_energy(n) if(options.method == "direct"): reweight_direct(active_nodes, options) elif(options.method == "entropy"): reweight_entropy(active_nodes, options) elif(options.method == "presampling"): reweight_presampling(active_nodes, options) else: raise(Exception("Method unkown: "+options.method)) weight_sum = np.sum([n.tmp['weight'] for n in active_nodes]) print "Thermodynamic weights calculated by method '%s':"%options.method for n in active_nodes: n.obs.weight_direct = n.tmp['weight'] / weight_sum if(options.method == "direct"): print(" %s with mean_V: %f [kJ/mol], %d refpoints and weight: %f" % (n.name, n.obs.mean_V, n.tmp['n_refpoints'], n.obs.weight_direct)) else: print(" %s with A: %f [kJ/mol] and weight: %f" % (n.name, n.obs.A, n.obs.weight_direct)) print "The above weighting uses bonded energies='%s' and nonbonded energies='%s'."%(options.e_bonded, options.e_nonbonded) for n in active_nodes: n.save() active_nodes.unlock()
def main(): options = options_desc.parse_args(sys.argv)[0] zgf_cleanup.main() pool = Pool() needy_nodes = pool.where("state == '%s'"%options.current_state).multilock() for n in needy_nodes: print "Recovering node %s with state %s to state %s ..."%(n.name, n.state, options.recover_state) n.state = options.recover_state n.save() n.unlock()
def main(): options = options_desc.parse_args(sys.argv)[0] zgf_cleanup.main() pool = Pool() needy_nodes = pool.where("state == '%s'" % options.current_state).multilock() for n in needy_nodes: print "Recovering node %s with state %s to state %s ..." % ( n.name, n.state, options.recover_state) n.state = options.recover_state n.save() n.unlock()
def main(argv=None): if (argv == None): argv = sys.argv options = options_desc.parse_args(argv)[0] assert (not (options.refine_all and options.extend_all)) pool = Pool() needy_nodes = pool.where("isa_partition and is_sampled").multilock() # 1. Trying to detect fake convergence for n in pool.where("state == 'converged'"): means = kmeans(n.trajectory, k=2) d = (means[0] - means[1]).norm2() if (d > 2.0 and (options.refine_all or userinput( "%s has converged but appears to have a bimodal distribution.\nDo you want to refine?" % n.name, "bool"))): #TODO decide upon threshold (per coordinate?) refine(n, options) # 2. Dealing with not-converged nodes for n in pool.where("state == 'not-converged'"): if (not (options.refine_all or options.extend_all)): choice = userchoice( "%s has not converged. What do you want to do?" % n.name, ['_refine', '_extend', '_ignore']) if (options.refine_all or choice == "r"): refine(n, options) elif (options.extend_all or choice == "e"): extend(n) elif (choice == "i"): continue for n in needy_nodes: n.save() n.unlock() zgf_setup_nodes.main() zgf_grompp.main() zgf_cleanup.main()
def main(): options = options_desc.parse_args(sys.argv)[0] zgf_cleanup.main() print("Options:\n%s\n"%pformat(eval(str(options)))) pool = Pool() parent = pool.root active_nodes = pool.where("isa_partition") assert(len(active_nodes) == len(active_nodes.multilock())) # make sure we lock ALL nodes if active_nodes.where("'weight_direct' not in obs"): sys.exit("Q-Matrix calculation not possible: Not all of the nodes have been reweighted.") node_weights = np.array([node.obs.weight_direct for node in active_nodes]) print "### Generate bins: equidist ###" result = q_equidist(parent, options.numnodes) chosen_idx=result['chosen_idx'] frames_chosen=result['frames_chosen'] theta=result['theta'] chosen_idx.sort() # makes preview-trajectory easier to understand dimension=len(chosen_idx) print "chosen_idx" print chosen_idx print "### Generate bin weights ###" bin_weights=np.zeros(dimension) for (i,n) in enumerate(active_nodes): w_denom = np.sum(n.frameweights) for t in range(len(n.trajectory)): diffs = (frames_chosen - n.trajectory.getframe(t)).norm() j = np.argmin(diffs) bin_weights[j] = bin_weights[j] + node_weights[i] * n.frameweights[t] / w_denom print "bin_weights" print bin_weights print "### Generate q_all (entries only for neighboring bins) ###" q_all = np.empty((dimension, dimension), dtype=np.float) for i in range(dimension): sum_row = 0.0 diffs = (frames_chosen - frames_chosen.getframe(i)).norm() print "diffs" print diffs for j in range(dimension): if (diffs[j] < 2.0 * theta) and (bin_weights[i] > 0.0): q_all[i,j] = np.sqrt(bin_weights[j]) / np.sqrt(bin_weights[i]) sum_row = sum_row + q_all[i , j] else: q_all[i,j] = 0 q_all[i, i] = q_all[i, i]- sum_row print "Q_All" print q_all if options.export_matlab: savemat(pool.analysis_dir+"q_all.mat", {"q_all":q_all}) active_nodes.unlock() zgf_cleanup.main()
def main(): options = options_desc.parse_args(sys.argv)[0] zgf_cleanup.main() pool = Pool() npz_file = np.load(pool.chi_mat_fn) chi_matrix = npz_file['matrix'] node_names = npz_file['node_names'] n_clusters = npz_file['n_clusters'] active_nodes = [Node(nn) for nn in node_names] # create and open dest_files, intialize counters for statistics dest_filenames = [ pool.analysis_dir + "cluster%d.trr" % (c + 1) for c in range(n_clusters) ] dest_files = [open(fn, "wb") for fn in dest_filenames] dest_frame_counters = np.zeros(n_clusters) # For each active node... for (i, n) in enumerate(active_nodes): # ... find the clusters to which it belongs (might be more than one)... belonging_clusters = np.argwhere( chi_matrix[i] > options.node_threshold) # ... and find all typical frames of this node. #TODO not an optimal solution... discuss # per default, we take every frame with above average weight frame_threshold = options.frame_threshold * 2 * np.mean(n.frameweights) typical_frame_nums = np.argwhere(n.frameweights > frame_threshold) # Go through the node's trajectory ... trr_in = TrrFile(n.trr_fn) curr_frame = trr_in.first_frame for i in typical_frame_nums: # ...stop at each typical frame... while (i != curr_frame.number): curr_frame = curr_frame.next() assert (curr_frame.number == i) #... and copy it into the dest_file of each belonging cluster. for c in belonging_clusters: dest_files[c].write(curr_frame.raw_data) dest_frame_counters[c] += 1 trr_in.close() # close source file # close dest_files for f in dest_files: f.close() del (dest_files) # desolvate cluster-trajectories 'in-place' if (not options.write_sol): for dest_fn in dest_filenames: tmp_fn = mktemp(suffix='.trr', dir=pool.analysis_dir) os.rename(dest_fn, tmp_fn) # works as both files are in same dir cmd = ["trjconv", "-f", tmp_fn, "-o", dest_fn, "-n", pool.ndx_fn] p = Popen(cmd, stdin=PIPE) p.communicate(input="MOI\n") assert (p.wait() == 0) os.remove(tmp_fn) # register dependencies for fn in dest_filenames: register_file_dependency(fn, pool.chi_mat_fn) # check number of written frames sys.stdout.write("Checking lenghts of written trajectories... ") for i in range(n_clusters): f = TrrFile(dest_filenames[i]) assert (f.count_frames() == dest_frame_counters[i]) f.close() print("done.") #output statistics print "\n### Extraction summary ###\nnode threshold: %1.1f, frame threshold: %1.1f" % ( options.node_threshold, options.frame_threshold) print "Cluster trajectories were written to %s:" % pool.analysis_dir for (c, f) in enumerate(dest_frame_counters): print "cluster%d.trr [%d frames] from node(s):" % (c + 1, f) print list(np.argwhere(chi_matrix[:, c] > options.node_threshold).flat)
def main(argv=None): if(argv==None): argv = sys.argv options = options_desc.parse_args(argv)[0] print("Options:\n%s\n"%pformat(eval(str(options)))) if(options.random_seed): # using numpy-random because python-random differs beetween 32 and 64 bit np.random.seed(hash(options.random_seed)) pool = Pool() old_pool_size = len(pool) print "pool", pool if(options.parent_node == "root"): parent = pool.root else: found = [n for n in pool if n.name == options.parent_node] assert(len(found) == 1) parent = found[0] print "### Generate nodes: %s ###" % options.methodnodes if(options.methodnodes == "kmeans"): chosen_idx = mknodes_kmeans(parent, options.numnodes) elif(options.methodnodes == "equidist"): chosen_idx = mknodes_equidist(parent, options.numnodes) elif(options.methodnodes == "maxdist"): chosen_idx = mknodes_maxdist(parent, options.numnodes) elif(options.methodnodes == "all"): chosen_idx = mknodes_all(parent) else: raise(Exception("Method unknown: "+options.methodnodes)) chosen_idx.sort() # makes preview-trajectory easier to understand if(options.write_preview): write_node_preview(pool, parent, chosen_idx) for i in chosen_idx: n = Node() n.parent_frame_num = i n.parent = parent n.state = "creating-a-partition" # will be set to "created" at end of script n.extensions_counter = 0 n.extensions_max = options.ext_max n.extensions_length = options.ext_length n.sampling_length = options.sampling_length n.internals = parent.trajectory.getframe(i) pool.append(n) print "\n### Obtain alpha: %s ###" % options.methodalphas old_alpha = pool.alpha if(options.methodalphas == "theta"): pool.alpha = calc_alpha_theta(pool) elif(options.methodalphas == "user"): pool.alpha = userinput("Please enter a value for alpha", "float") else: raise(Exception("Method unknown: "+options.methodalphas)) pool.history.append({'refined_node': (parent.name, parent.state), 'size':old_pool_size, 'alpha':old_alpha, 'timestamp':datetime.now()}) pool.save() # alpha might have changed print "\n### Obtain phi fit: %s ###" % options.methodphifit if(options.methodphifit == "harmonic"): do_phifit_harmonic(pool) elif(options.methodphifit == "switch"): do_phifit_switch(pool) elif(options.methodphifit == "leastsq"): do_phifit_leastsq(pool) else: raise(Exception("Method unkown: "+options.methodphifit)) for n in pool.where("state == 'creating-a-partition'"): n.state = "created" n.save() print "saving " +str(n) zgf_cleanup.main()
def main(): options = options_desc.parse_args(sys.argv)[0] zgf_cleanup.main() pool = Pool() active_nodes = pool.where("isa_partition") if(options.ignore_failed): active_nodes = pool.where("isa_partition and not state=='mdrun-failed'") assert(len(active_nodes) == len(active_nodes.multilock())) # make sure we lock ALL nodes if active_nodes.where("'weight_direct' not in obs"): active_nodes.unlock() sys.exit("Matrix calculation not possible: Not all of the nodes have been reweighted.") print "\n### Getting S matrix ..." s_matrix = cache_matrix(pool.s_mat_fn, active_nodes, overwrite=options.overwrite_mat, fast=options.fast_mat) register_file_dependency(pool.s_mat_fn, pool.filename) node_weights = np.array([node.obs.weight_direct for node in active_nodes]) print "\n### Symmetrizing S matrix ..." (corr_s_matrix, corr_node_weights) = symmetrize(s_matrix, node_weights, correct_weights=True, error=float(options.error)) # store intermediate results register_file_dependency(pool.s_corr_mat_fn, pool.s_mat_fn) np.savez(pool.s_corr_mat_fn, matrix=corr_s_matrix, node_names=[n.name for n in active_nodes]) if options.export_matlab: savemat(pool.analysis_dir+"node_weights.mat", {"node_weights":node_weights, "node_weights_corrected":corr_node_weights}) savemat(pool.analysis_dir+"s_mats.mat", {"s_matrix":s_matrix, "s_matrix_corrected":corr_s_matrix}) for (n, cw) in zip(active_nodes, corr_node_weights): n.obs.weight_corrected = cw print "\n### Node weights after symmetrization of S matrix:" for n in active_nodes: print "%s: initial weight: %f, corrected weight: %f, weight change: %f" % (n.name, n.obs.weight_direct, n.obs.weight_corrected, abs(n.obs.weight_direct - n.obs.weight_corrected)) n.save() active_nodes.unlock() # calculate and sort eigenvalues in descending order (eigvalues, eigvectors) = np.linalg.eig(corr_s_matrix) argsorted_eigvalues = np.argsort(-eigvalues) eigvalues = eigvalues[argsorted_eigvalues] eigvectors = eigvectors[:, argsorted_eigvalues] gaps = np.abs(eigvalues[1:]-eigvalues[:-1]) gaps = np.append(gaps, 0.0) wgaps = gaps*eigvalues print "\n### Sorted eigenvalues of symmetrized S matrix:" for (idx, ev, gap, wgap) in zip(range(1, len(eigvalues)+1), eigvalues, gaps, wgaps): print "EV%04d: %f, gap to next: %f, EV-weighted gap to next: %f" % (idx, ev, gap, wgap) n_clusters = np.argmax(wgaps)+1 print "\n### Maximum gap %f after top %d eigenvalues." % (np.max(gaps), n_clusters) print "### Maximum EV-weighted gap %f after top %d eigenvalues." % (np.max(wgaps), np.argmax(wgaps)+1) sys.stdout.flush() if not options.auto_cluster: n_clusters = userinput("Please enter the number of clusters for PCCA+", "int", "x>0") print "### Using %d clusters for PCCA+ ..."%n_clusters if options.export_matlab: savemat(pool.analysis_dir+"evs.mat", {"evs":eigvectors}) # orthogonalize and normalize eigenvectors eigvectors = orthogonalize(eigvalues, eigvectors, corr_node_weights) # perform PCCA+ # First two return-values "c_f" and "indicator" are not needed (chi_matrix, rot_matrix) = cluster_by_isa(eigvectors, n_clusters)[2:] if(options.optimize_chi): print "\n### Optimizing chi matrix ..." outliers = 5 mean_weight = np.mean(corr_node_weights) threshold = mean_weight/100*outliers print "Light-weight node threshold (%d%% of mean corrected node weight): %.4f."%(outliers, threshold) # accumulate nodes for optimization edges = np.where(np.max(chi_matrix, axis=1) > 0.9999)[0] # edges of simplex heavies = np.where( corr_node_weights > threshold)[0] # heavy-weight nodes filtered_eigvectors = eigvectors[ np.union1d(edges, heavies) ] # perform the actual optimization rot_matrix = opt_soft(filtered_eigvectors, rot_matrix, n_clusters) chi_matrix = np.dot(eigvectors[:,:n_clusters], rot_matrix) # deal with light-weight nodes: shift and scale for i in np.where(corr_node_weights <= threshold)[0]: if(i in edges): print "Column %d belongs to (potentially dangerous) light-weight node, but its node is a simplex edge."%(i+1) continue print "Column %d is shifted and scaled."%(i+1) col_min = np.min( chi_matrix[i,:] ) chi_matrix[i,:] -= col_min chi_matrix[i,:] /= 1-(n_clusters*col_min) qc_matrix = np.dot( np.dot( np.linalg.inv(rot_matrix), np.diag(eigvalues[range(n_clusters)]) ), rot_matrix ) - np.eye(n_clusters) cluster_weights = rot_matrix[0] print "\n### Matrix numerics check" print "-- Q_c matrix row sums --" print np.sum(qc_matrix, axis=1) print "-- cluster weights: first column of rot_matrix --" print cluster_weights print "-- cluster weights: numpy.dot(node_weights, chi_matrix) --" print np.dot(corr_node_weights, chi_matrix) print "-- chi matrix column max values --" print np.max(chi_matrix, axis=0) print "-- chi matrix row sums --" print np.sum(chi_matrix, axis=1) # store final results np.savez(pool.chi_mat_fn, matrix=chi_matrix, n_clusters=n_clusters, node_names=[n.name for n in active_nodes]) np.savez(pool.qc_mat_fn, matrix=qc_matrix, n_clusters=n_clusters, node_names=[n.name for n in active_nodes], weights=cluster_weights) if options.export_matlab: savemat(pool.analysis_dir+"chi_mat.mat", {"chi_matrix":chi_matrix}) savemat(pool.analysis_dir+"qc_mat.mat", {"qc_matrix":qc_matrix, "weights":cluster_weights}) register_file_dependency(pool.chi_mat_fn, pool.s_corr_mat_fn) register_file_dependency(pool.qc_mat_fn, pool.s_corr_mat_fn) for fn in (pool.s_mat_fn, pool.s_corr_mat_fn): register_file_dependency(pool.chi_mat_fn, fn) register_file_dependency(pool.qc_mat_fn, fn) # touch analysis directory (triggering update in zgf_browser) atime = mtime = time.time() os.utime(pool.analysis_dir, (atime, mtime)) # show summary if(options.summary): print "\n### Preparing cluster summary ..." chi_threshold = 1E-3 from pprint import pformat for i in range(n_clusters): involved_nodes = [active_nodes[ni] for ni in np.argwhere(chi_matrix[:,i] > chi_threshold)] max_chi_node = active_nodes[ np.argmax(chi_matrix[:,i]) ] c_max = [] for c in pool.converter: coord_range = pool.coord_range(c) scale = c.plot_scale edges = scale(np.linspace(np.min(coord_range), np.max(coord_range), num=50)) hist_cluster = np.zeros(edges.size-1) for (n, chi) in zip([n for n in active_nodes], chi_matrix[:,i]): samples = scale( n.trajectory.getcoord(c) ) hist_node = np.histogram(samples, bins=edges, weights=n.frameweights, normed=True)[0] hist_cluster += n.obs.weight_corrected * hist_node * chi c_max.append( scale(np.linspace(np.min(coord_range), np.max(coord_range), num=50))[np.argmax(hist_cluster)] ) msg = "### Cluster %d (weight=%.4f, #involved nodes=%d, representative='%s'):"%(i+1, cluster_weights[i], len(involved_nodes), max_chi_node.name) print "\n"+msg print "-- internal coordinates --" print "%s"%pformat(["%.2f"%cm for cm in c_max]) print "-- involved nodes --" print "%s"%pformat([n.name for n in involved_nodes]) print "-"*len(msg)
def main(): options = options_desc.parse_args(sys.argv)[0] zgf_cleanup.main() pool = Pool() npz_file = np.load(pool.chi_mat_fn) chi_matrix = npz_file['matrix'] node_names = npz_file['node_names'] n_clusters = npz_file['n_clusters'] active_nodes = [Node(nn) for nn in node_names] # create and open dest_files, intialize counters for statistics dest_filenames = [ pool.analysis_dir+"cluster%d.trr"%(c+1) for c in range(n_clusters) ] dest_files = [ open(fn, "wb") for fn in dest_filenames ] dest_frame_counters = np.zeros(n_clusters) # For each active node... for (i, n) in enumerate(active_nodes): # ... find the clusters to which it belongs (might be more than one)... belonging_clusters = np.argwhere(chi_matrix[i] > options.node_threshold) # ... and find all typical frames of this node. #TODO not an optimal solution... discuss # per default, we take every frame with above average weight frame_threshold = options.frame_threshold*2*np.mean(n.frameweights) typical_frame_nums = np.argwhere(n.frameweights > frame_threshold) # Go through the node's trajectory ... trr_in = TrrFile(n.trr_fn) curr_frame = trr_in.first_frame for i in typical_frame_nums: # ...stop at each typical frame... while(i != curr_frame.number): curr_frame = curr_frame.next() assert(curr_frame.number == i) #... and copy it into the dest_file of each belonging cluster. for c in belonging_clusters: dest_files[c].write(curr_frame.raw_data) dest_frame_counters[c] += 1 trr_in.close() # close source file # close dest_files for f in dest_files: f.close() del(dest_files) # desolvate cluster-trajectories 'in-place' if(not options.write_sol): for dest_fn in dest_filenames: tmp_fn = mktemp(suffix='.trr', dir=pool.analysis_dir) os.rename(dest_fn, tmp_fn) # works as both files are in same dir cmd = ["trjconv", "-f", tmp_fn, "-o", dest_fn, "-n", pool.ndx_fn] p = Popen(cmd, stdin=PIPE) p.communicate(input="MOI\n") assert(p.wait() == 0) os.remove(tmp_fn) # register dependencies for fn in dest_filenames: register_file_dependency(fn, pool.chi_mat_fn) # check number of written frames sys.stdout.write("Checking lenghts of written trajectories... ") for i in range(n_clusters): f = TrrFile(dest_filenames[i]) assert(f.count_frames() == dest_frame_counters[i]) f.close() print("done.") #output statistics print "\n### Extraction summary ###\nnode threshold: %1.1f, frame threshold: %1.1f"%(options.node_threshold, options.frame_threshold) print "Cluster trajectories were written to %s:"%pool.analysis_dir for (c, f) in enumerate(dest_frame_counters): print "cluster%d.trr [%d frames] from node(s):"%(c+1, f) print list(np.argwhere(chi_matrix[:,c] > options.node_threshold).flat)
def main(): options = options_desc.parse_args(sys.argv)[0] zgf_cleanup.main() pool = Pool() active_nodes = pool.where("isa_partition") assert(len(active_nodes) == len(active_nodes.multilock())) # make sure we lock ALL nodes if active_nodes.where("'weight_direct' not in obs"): active_nodes.unlock() sys.exit("Matrix calculation not possible: Not all of the nodes have been reweighted.") print "\n### Getting S matrix ..." s_matrix = cache_matrix(pool.s_mat_fn, active_nodes, overwrite=options.overwrite_mat) register_file_dependency(pool.s_mat_fn, pool.filename) print "\n### Getting K matrix ..." k_matrix = cache_matrix(pool.k_mat_fn, active_nodes, shift=options.lag_time, overwrite=options.overwrite_mat) register_file_dependency(pool.k_mat_fn, pool.filename) node_weights = np.array([node.obs.weight_direct for node in active_nodes]) print "\n### Symmetrizing S matrix ..." (corr_s_matrix, corr_node_weights) = symmetrize(s_matrix, node_weights, correct_weights=True, error=float(options.error)) print "\n### Symmetrizing K matrix ..." (corr_k_matrix, corr_node_weights) = symmetrize(k_matrix, corr_node_weights) # store intermediate results register_file_dependency(pool.s_corr_mat_fn, pool.s_mat_fn) register_file_dependency(pool.k_corr_mat_fn, pool.k_mat_fn) np.savez(pool.s_corr_mat_fn, matrix=corr_s_matrix, node_names=[n.name for n in active_nodes]) np.savez(pool.k_corr_mat_fn, matrix=corr_k_matrix, node_names=[n.name for n in active_nodes]) if options.export_matlab: savemat(pool.analysis_dir+"node_weights.mat", {"node_weights":node_weights, "node_weights_corrected":corr_node_weights}) savemat(pool.analysis_dir+"s_mats.mat", {"s_matrix":s_matrix, "s_matrix_corrected":corr_s_matrix}) savemat(pool.analysis_dir+"k_mats.mat", {"k_matrix":k_matrix, "k_matrix_corrected":corr_k_matrix}) for (n, cw) in zip(active_nodes, corr_node_weights): n.obs.weight_corrected = cw print "\n### Node weights after symmetrization of S matrix:" for n in active_nodes: print "%s: initial weight: %f, corrected weight: %f, weight change: %f" % (n.name, n.obs.weight_direct, n.obs.weight_corrected, abs(n.obs.weight_direct - n.obs.weight_corrected)) n.save() active_nodes.unlock() # calculate and sort eigenvalues in descending order (eigvalues, eigvectors) = np.linalg.eig(corr_s_matrix) argsorted_eigvalues = np.argsort(-eigvalues) eigvalues = eigvalues[argsorted_eigvalues] eigvectors = eigvectors[:, argsorted_eigvalues] gaps = np.abs(eigvalues[1:]-eigvalues[:-1]) gaps = np.append(gaps, 0.0) wgaps = gaps*eigvalues print "\n### Sorted eigenvalues of symmetrized S matrix:" for (idx, ev, gap, wgap) in zip(range(1, len(eigvalues)+1), eigvalues, gaps, wgaps): print "EV%04d: %f, gap to next: %f, EV-weighted gap to next: %f" % (idx, ev, gap, wgap) n_clusters = np.argmax(wgaps)+1 print "\n### Maximum gap %f after top %d eigenvalues." % (np.max(gaps), n_clusters) print "### Maximum EV-weighted gap %f after top %d eigenvalues." % (np.max(wgaps), np.argmax(wgaps)+1) sys.stdout.flush() if not options.auto_cluster: n_clusters = userinput("Please enter the number of clusters for PCCA+", "int", "x>0") print "### Using %d clusters for PCCA+ ..."%n_clusters print "eigenvectors" print eigvectors[:, :n_clusters] if options.export_matlab: savemat(pool.analysis_dir+"evs.mat", {"evs":eigvectors}) # orthogonalize and normalize eigenvectors eigvectors = orthogonalize(eigvalues, eigvectors, corr_node_weights) # perform PCCA+ # First two return-values "c_f" and "indicator" are not needed (chi_matrix, rot_matrix) = cluster_by_isa(eigvectors, n_clusters)[2:] #TODO at the moment, K-matrix is not used #xi = [] # calculate eigenvalues of Q_c, xi #for eigvec in np.transpose(eigvectors)[: n_clusters]: # num = np.dot( np.dot( np.transpose(eigvec), corr_k_matrix ), eigvec ) # denom = np.dot( np.dot( np.transpose(eigvec), corr_s_matrix ), eigvec ) # xi.append(num/denom-1) #print np.diag(xi) #TODO what does this tell us? Marcus-check qc_matrix = np.dot( np.dot( np.linalg.inv(rot_matrix), np.diag(eigvalues[range(n_clusters)]) ), rot_matrix ) - np.eye(n_clusters) cluster_weights = rot_matrix[0] print "Q_c matrix:" print qc_matrix print "Q_c matrix row sums:" print np.sum(qc_matrix, axis=1) print "cluster weights (calculated twice for checking):" print cluster_weights print np.dot(corr_node_weights, chi_matrix) print "chi matrix column sums:" print np.sum(chi_matrix, axis=0) print "chi matrix row sums:" print np.sum(chi_matrix, axis=1) # store final results np.savez(pool.chi_mat_fn, matrix=chi_matrix, n_clusters=n_clusters, node_names=[n.name for n in active_nodes]) np.savez(pool.qc_mat_fn, matrix=qc_matrix, n_clusters=n_clusters, node_names=[n.name for n in active_nodes], weights=cluster_weights) if options.export_matlab: savemat(pool.analysis_dir+"chi_mat.mat", {"chi_matrix":chi_matrix}) savemat(pool.analysis_dir+"qc_mat.mat", {"qc_matrix":qc_matrix, "weights":cluster_weights}) register_file_dependency(pool.chi_mat_fn, pool.s_corr_mat_fn) register_file_dependency(pool.qc_mat_fn, pool.s_corr_mat_fn) for fn in (pool.s_mat_fn, pool.s_corr_mat_fn, pool.k_mat_fn, pool.k_corr_mat_fn): register_file_dependency(pool.chi_mat_fn, fn) register_file_dependency(pool.qc_mat_fn, fn) zgf_cleanup.main()