예제 #1
0
def main(argv=None):
	if(argv==None):
		argv = sys.argv
	options = options_desc.parse_args(argv)[0]

	assert(not(options.refine_all and options.extend_all)) 
	
	pool = Pool()
	needy_nodes = pool.where("isa_partition and is_sampled").multilock()
	
	# 1. Trying to detect fake convergence
	for n in pool.where("state == 'converged'"):
		means = kmeans(n.trajectory, k=2)
		d = (means[0] - means[1]).norm2()
		if(d > 2.0 and (options.refine_all or userinput("%s has converged but appears to have a bimodal distribution.\nDo you want to refine?"%n.name, "bool"))): #TODO decide upon threshold (per coordinate?)
			refine(n, options)
	
	# 2. Dealing with not-converged nodes
	for n in pool.where("state == 'not-converged'"):
		if(not(options.refine_all or options.extend_all)):
			choice = userchoice("%s has not converged. What do you want to do?"%n.name, ['_refine', '_extend', '_ignore'])
		if(options.refine_all or choice=="r"):
			refine(n, options)
		elif(options.extend_all or choice=="e"):
			extend(n)
		elif(choice=="i"):
			continue
	
	for n in needy_nodes:
		n.save()
		n.unlock()
			
	zgf_setup_nodes.main()
	zgf_grompp.main()
	zgf_cleanup.main()	
예제 #2
0
def main():
    options = options_desc.parse_args(sys.argv)[0]

    zgf_cleanup.main()

    pool = Pool()

    not_reweightable = "isa_partition and state not in ('converged'"
    if (options.ignore_convergence):
        not_reweightable += ",'not-converged'"
    if (options.ignore_failed):
        not_reweightable += ",'mdrun-failed'"
    not_reweightable += ")"

    if pool.where(not_reweightable):
        print "Pool can not be reweighted due to the following nodes:"
        for bad_guy in pool.where(not_reweightable):
            print "Node %s with state %s." % (bad_guy.name, bad_guy.state)
        sys.exit("Aborting.")

    active_nodes = pool.where("isa_partition and state != 'mdrun-failed'")
    assert (len(active_nodes) == len(active_nodes.multilock())
            )  # make sure we lock ALL nodes

    if (options.check_restraint):
        for n in active_nodes:
            check_restraint_energy(n)

    if (options.method == "direct"):
        reweight_direct(active_nodes, options)
    elif (options.method == "entropy"):
        reweight_entropy(active_nodes, options)
    elif (options.method == "presampling"):
        reweight_presampling(active_nodes, options)
    else:
        raise (Exception("Method unkown: " + options.method))

    weight_sum = np.sum([n.tmp['weight'] for n in active_nodes])

    print "Thermodynamic weights calculated by method '%s':" % options.method
    for n in active_nodes:
        n.obs.weight_direct = n.tmp['weight'] / weight_sum
        if (options.method == "direct"):
            print(
                "  %s with mean_V: %f [kJ/mol], %d refpoints and weight: %f" %
                (n.name, n.obs.mean_V, n.tmp['n_refpoints'],
                 n.obs.weight_direct))
        else:
            print("  %s with A: %f [kJ/mol] and weight: %f" %
                  (n.name, n.obs.A, n.obs.weight_direct))
    print "The above weighting uses bonded energies='%s' and nonbonded energies='%s'." % (
        options.e_bonded, options.e_nonbonded)

    for n in active_nodes:
        n.save()

    active_nodes.unlock()
예제 #3
0
def main():
	options = options_desc.parse_args(sys.argv)[0]
	
	zgf_cleanup.main()
	
	pool = Pool()

	#not_reweightable = "state not in ('refined','converged')"
	not_reweightable = "isa_partition and state!='converged'"
	if options.ignore_convergence:
		not_reweightable = "isa_partition and state not in ('converged','not-converged')"

	if pool.where(not_reweightable):
		print "Pool can not be reweighted due to the following nodes:"		
		for bad_guy in pool.where(not_reweightable):
			print "Node %s with state %s."%(bad_guy.name, bad_guy.state)
		sys.exit("Aborting.")
		
	active_nodes = pool.where("isa_partition")
	assert(len(active_nodes) == len(active_nodes.multilock())) # make sure we lock ALL nodes

	for n in active_nodes:
		check_restraint_energy(n)

	# find out about number of energygrps
	mdp_file = gromacs.read_mdp_file(pool.mdp_fn)
	energygrps = [str(egrp) for egrp in re.findall('[\S]+', mdp_file["energygrps"])]
	moi_energies = True	
	if len(energygrps) < 2:
		moi_energies = False # Gromacs energies are named differently when there are less than two energygrps :(

	if(options.method == "direct"): 
		reweight_direct(active_nodes, moi_energies, options.sol_energy, options.save_refpoints)
	elif(options.method == "entropy"):
		reweight_entropy(active_nodes, moi_energies, options.sol_energy, options.save_refpoints)
	elif(options.method == "presampling"):
		reweight_presampling(active_nodes, options.presamp_temp, moi_energies, options.sol_energy)
	else:
		raise(Exception("Method unkown: "+options.method))
	
	weight_sum = np.sum([n.tmp['weight'] for n in active_nodes])
	
	print "Thermodynamic weights calculated by method '%s' (sol-energy=%s):"%(options.method, options.sol_energy)
	for n in active_nodes:
		n.obs.weight_direct = n.tmp['weight'] / weight_sum
		if(options.method == "direct"):
			print("  %s with mean_V: %f [kJ/mol], %d refpoints and weight: %f" % (n.name, n.obs.mean_V, n.tmp['n_refpoints'], n.obs.weight_direct))
		else:
			print("  %s with A: %f [kJ/mol] and weight: %f" % (n.name, n.obs.A, n.obs.weight_direct))

	for n in active_nodes:
		n.save()

	active_nodes.unlock()
예제 #4
0
def main():
	options = options_desc.parse_args(sys.argv)[0]
	
	#TODO put somehow into Options, e.g. min_value=1 or required=True
	if(not options.doomed_nodes):
		sys.exit("Option --doomed_nodes is required.")
		
	pool = Pool()
	old_pool_size = len(pool)
	old_alpha = pool.alpha

	doomed_nodes = NodeList()
	
	#TODO: maybe this code should go into ZIBMolPy.ui 
	for name in options.doomed_nodes.split(","):
		found = [n for n in pool if n.name == name]
		if(len(found) != 1):
			sys.exit("Coult not find node '%s'"%(name))
		doomed_nodes.append(found[0])
	
	for n in doomed_nodes:
		if(n == pool.root):
			sys.exit("Node %s is the root. Removal not allowed."%(n.name))		
		#if(len(n.children) > 0):
		#	sys.exit("Node %s has children. Removal not allowed."%(n.name)) #TODO why should we forbid this?

	if not(userinput("The selected node(s) will be removed permanently. Continue?", "bool")):
		sys.exit("Quit by user.")

	assert(len(doomed_nodes) == len(doomed_nodes.multilock()))
	for n in doomed_nodes:
		print("Removing directory: "+n.dir)
		shutil.rmtree(n.dir)

	pool.reload_nodes()
	
	#TODO: this code-block also exists in zgf_create_node
	if(len(pool.where("isa_partition")) < 2):
		pool.alpha = None
	elif(options.methodalphas == "theta"):
		pool.alpha = zgf_create_nodes.calc_alpha_theta(pool)
	elif(options.methodalphas == "user"):
		pool.alpha = userinput("Please enter a value for alpha", "float")
	else:
		raise(Exception("Method unkown: "+options.methodalphas))

	pool.history.append({'removed_nodes': [(n.name, n.state) for n in doomed_nodes], 'size':old_pool_size, 'alpha':old_alpha, 'timestamp':datetime.now()})
	pool.save()

	#TODO: deal with analysis dir and dependencies
	zgf_cleanup.main()	
예제 #5
0
def main():
	options = options_desc.parse_args(sys.argv)[0]
	
	#TODO put somehow into Options, e.g. min_value=1 or required=True
	if(not options.doomed_nodes):
		sys.exit("Option --doomed_nodes is required.")
		
	pool = Pool()
	old_pool_size = len(pool)
	old_alpha = pool.alpha

	doomed_nodes = NodeList()
	
	#TODO: maybe this code should go into ZIBMolPy.ui 
	for name in options.doomed_nodes.split(","):
		found = [n for n in pool if n.name == name]
		if(len(found) != 1):
			sys.exit("Coult not find node '%s'"%(name))
		doomed_nodes.append(found[0])
	
	for n in doomed_nodes:
		if(n == pool.root):
			sys.exit("Node %s is the root. Removal not allowed."%(n.name))		
		#if(len(n.children) > 0):
		#	sys.exit("Node %s has children. Removal not allowed."%(n.name)) #TODO why should we forbid this?

	if not(userinput("The selected node(s) will be removed permanently. Continue?", "bool")):
		sys.exit("Quit by user.")

	assert(len(doomed_nodes) == len(doomed_nodes.multilock()))
	for n in doomed_nodes:
		print("Removing directory: "+n.dir)
		shutil.rmtree(n.dir)

	pool.reload_nodes()
	
	#TODO: this code-block also exists in zgf_create_node
	if(len(pool.where("isa_partition")) < 2):
		pool.alpha = None
	elif(options.methodalphas == "theta"):
		pool.alpha = zgf_create_nodes.calc_alpha_theta(pool)
	elif(options.methodalphas == "user"):
		pool.alpha = userinput("Please enter a value for alpha", "float")
	else:
		raise(Exception("Method unkown: "+options.methodalphas))

	pool.history.append({'removed_nodes': [(n.name, n.state) for n in doomed_nodes], 'size':old_pool_size, 'alpha':old_alpha, 'timestamp':datetime.now()})
	pool.save()

	#TODO: deal with analysis dir and dependencies
	zgf_cleanup.main()	
예제 #6
0
def main():
	options = options_desc.parse_args(sys.argv)[0]
	
	zgf_cleanup.main()
	
	pool = Pool()

	not_reweightable = "isa_partition and state not in ('converged'"
	if(options.ignore_convergence):
		not_reweightable += ",'not-converged'"
	if(options.ignore_failed):
		not_reweightable += ",'mdrun-failed'"
	not_reweightable += ")"

	if pool.where(not_reweightable):
		print "Pool can not be reweighted due to the following nodes:"		
		for bad_guy in pool.where(not_reweightable):
			print "Node %s with state %s."%(bad_guy.name, bad_guy.state)
		sys.exit("Aborting.")

	active_nodes = pool.where("isa_partition and state != 'mdrun-failed'")
	assert(len(active_nodes) == len(active_nodes.multilock())) # make sure we lock ALL nodes

	if(options.check_restraint):
		for n in active_nodes:
			check_restraint_energy(n)

	if(options.method == "direct"):
		reweight_direct(active_nodes, options)
	elif(options.method == "entropy"):
		reweight_entropy(active_nodes, options)
	elif(options.method == "presampling"):
		reweight_presampling(active_nodes, options)
	else:
		raise(Exception("Method unkown: "+options.method))
	
	weight_sum = np.sum([n.tmp['weight'] for n in active_nodes])
	
	print "Thermodynamic weights calculated by method '%s':"%options.method
	for n in active_nodes:
		n.obs.weight_direct = n.tmp['weight'] / weight_sum
		if(options.method == "direct"):
			print("  %s with mean_V: %f [kJ/mol], %d refpoints and weight: %f" % (n.name, n.obs.mean_V, n.tmp['n_refpoints'], n.obs.weight_direct))
		else:
			print("  %s with A: %f [kJ/mol] and weight: %f" % (n.name, n.obs.A, n.obs.weight_direct))
	print "The above weighting uses bonded energies='%s' and nonbonded energies='%s'."%(options.e_bonded, options.e_nonbonded)

	for n in active_nodes:
		n.save()

	active_nodes.unlock()
예제 #7
0
def main():
	options = options_desc.parse_args(sys.argv)[0]
	
	zgf_cleanup.main()

	pool = Pool()

	needy_nodes = pool.where("state == '%s'"%options.current_state).multilock()

	for n in needy_nodes:
		print "Recovering node %s with state %s to state %s ..."%(n.name, n.state, options.recover_state)
		n.state = options.recover_state
		n.save()		
		n.unlock()
예제 #8
0
def main():
    options = options_desc.parse_args(sys.argv)[0]

    zgf_cleanup.main()

    pool = Pool()

    needy_nodes = pool.where("state == '%s'" %
                             options.current_state).multilock()

    for n in needy_nodes:
        print "Recovering node %s with state %s to state %s ..." % (
            n.name, n.state, options.recover_state)
        n.state = options.recover_state
        n.save()
        n.unlock()
예제 #9
0
def main(argv=None):
    if (argv == None):
        argv = sys.argv
    options = options_desc.parse_args(argv)[0]

    assert (not (options.refine_all and options.extend_all))

    pool = Pool()
    needy_nodes = pool.where("isa_partition and is_sampled").multilock()

    # 1. Trying to detect fake convergence
    for n in pool.where("state == 'converged'"):
        means = kmeans(n.trajectory, k=2)
        d = (means[0] - means[1]).norm2()
        if (d > 2.0 and (options.refine_all or userinput(
                "%s has converged but appears to have a bimodal distribution.\nDo you want to refine?"
                % n.name,
                "bool"))):  #TODO decide upon threshold (per coordinate?)
            refine(n, options)

    # 2. Dealing with not-converged nodes
    for n in pool.where("state == 'not-converged'"):
        if (not (options.refine_all or options.extend_all)):
            choice = userchoice(
                "%s has not converged. What do you want to do?" % n.name,
                ['_refine', '_extend', '_ignore'])
        if (options.refine_all or choice == "r"):
            refine(n, options)
        elif (options.extend_all or choice == "e"):
            extend(n)
        elif (choice == "i"):
            continue

    for n in needy_nodes:
        n.save()
        n.unlock()

    zgf_setup_nodes.main()
    zgf_grompp.main()
    zgf_cleanup.main()
예제 #10
0
def main():
	options = options_desc.parse_args(sys.argv)[0]
	zgf_cleanup.main()
	
	print("Options:\n%s\n"%pformat(eval(str(options))))
	
	pool = Pool()
	parent = pool.root
	active_nodes = pool.where("isa_partition")
	
	assert(len(active_nodes) == len(active_nodes.multilock())) # make sure we lock ALL nodes

	if active_nodes.where("'weight_direct' not in obs"):
		sys.exit("Q-Matrix calculation not possible: Not all of the nodes have been reweighted.")

	node_weights = np.array([node.obs.weight_direct for node in active_nodes])
	
	print "### Generate bins: equidist ###" 
	result = q_equidist(parent, options.numnodes)
	chosen_idx=result['chosen_idx']
	frames_chosen=result['frames_chosen']
	theta=result['theta']
	chosen_idx.sort() # makes preview-trajectory easier to understand
	dimension=len(chosen_idx)

	print "chosen_idx"
	print chosen_idx

	print "### Generate bin weights ###"
	bin_weights=np.zeros(dimension)
	for (i,n) in enumerate(active_nodes):
		w_denom = np.sum(n.frameweights) 
		for t in range(len(n.trajectory)):
			diffs = (frames_chosen - n.trajectory.getframe(t)).norm()
			j = np.argmin(diffs)
			bin_weights[j] = bin_weights[j] + node_weights[i] * n.frameweights[t] / w_denom
			
	
	print "bin_weights"
	print bin_weights
	
	print "### Generate q_all (entries only for neighboring bins) ###" 
	q_all = np.empty((dimension, dimension), dtype=np.float)
	for i in range(dimension):
		sum_row = 0.0
		diffs = (frames_chosen - frames_chosen.getframe(i)).norm()
		print "diffs"
		print diffs
		for j in range(dimension):
			if (diffs[j] < 2.0 * theta) and (bin_weights[i] > 0.0):
				q_all[i,j] = np.sqrt(bin_weights[j]) / np.sqrt(bin_weights[i])
				sum_row = sum_row + q_all[i , j]
			else:
				q_all[i,j] = 0
		q_all[i, i] = q_all[i, i]- sum_row  
			
	print "Q_All"
	print q_all
	
	if options.export_matlab:
		savemat(pool.analysis_dir+"q_all.mat", {"q_all":q_all})
		
	active_nodes.unlock()
	zgf_cleanup.main()
def main():
    options = options_desc.parse_args(sys.argv)[0]

    zgf_cleanup.main()

    pool = Pool()
    npz_file = np.load(pool.chi_mat_fn)
    chi_matrix = npz_file['matrix']
    node_names = npz_file['node_names']
    n_clusters = npz_file['n_clusters']
    active_nodes = [Node(nn) for nn in node_names]

    # create and open dest_files, intialize counters for statistics
    dest_filenames = [
        pool.analysis_dir + "cluster%d.trr" % (c + 1)
        for c in range(n_clusters)
    ]
    dest_files = [open(fn, "wb") for fn in dest_filenames]
    dest_frame_counters = np.zeros(n_clusters)

    # For each active node...
    for (i, n) in enumerate(active_nodes):
        # ... find the clusters to which it belongs (might be more than one)...
        belonging_clusters = np.argwhere(
            chi_matrix[i] > options.node_threshold)

        # ... and find all typical frames of this node.
        #TODO not an optimal solution... discuss
        # per default, we take every frame with above average weight
        frame_threshold = options.frame_threshold * 2 * np.mean(n.frameweights)
        typical_frame_nums = np.argwhere(n.frameweights > frame_threshold)

        # Go through the node's trajectory ...
        trr_in = TrrFile(n.trr_fn)
        curr_frame = trr_in.first_frame
        for i in typical_frame_nums:
            # ...stop at each typical frame...
            while (i != curr_frame.number):
                curr_frame = curr_frame.next()
            assert (curr_frame.number == i)
            #... and copy it into the dest_file of each belonging cluster.
            for c in belonging_clusters:
                dest_files[c].write(curr_frame.raw_data)
                dest_frame_counters[c] += 1
        trr_in.close()  # close source file

    # close dest_files
    for f in dest_files:
        f.close()
    del (dest_files)

    # desolvate cluster-trajectories 'in-place'
    if (not options.write_sol):
        for dest_fn in dest_filenames:
            tmp_fn = mktemp(suffix='.trr', dir=pool.analysis_dir)
            os.rename(dest_fn, tmp_fn)  # works as both files are in same dir
            cmd = ["trjconv", "-f", tmp_fn, "-o", dest_fn, "-n", pool.ndx_fn]
            p = Popen(cmd, stdin=PIPE)
            p.communicate(input="MOI\n")
            assert (p.wait() == 0)
            os.remove(tmp_fn)

    # register dependencies
    for fn in dest_filenames:
        register_file_dependency(fn, pool.chi_mat_fn)

    # check number of written frames
    sys.stdout.write("Checking lenghts of written trajectories... ")
    for i in range(n_clusters):
        f = TrrFile(dest_filenames[i])
        assert (f.count_frames() == dest_frame_counters[i])
        f.close()
    print("done.")

    #output statistics
    print "\n### Extraction summary ###\nnode threshold: %1.1f, frame threshold: %1.1f" % (
        options.node_threshold, options.frame_threshold)
    print "Cluster trajectories were written to %s:" % pool.analysis_dir
    for (c, f) in enumerate(dest_frame_counters):
        print "cluster%d.trr [%d frames] from node(s):" % (c + 1, f)
        print list(np.argwhere(chi_matrix[:, c] > options.node_threshold).flat)
예제 #12
0
def main(argv=None):
	if(argv==None): 
		argv = sys.argv
	options = options_desc.parse_args(argv)[0]
	
	print("Options:\n%s\n"%pformat(eval(str(options))))

	if(options.random_seed):
		# using numpy-random because python-random differs beetween 32 and 64 bit
		np.random.seed(hash(options.random_seed))
	
	pool = Pool()
	old_pool_size = len(pool)
	print "pool", pool
	
	if(options.parent_node == "root"):
		parent = pool.root
	else:
		found = [n for n in pool if n.name == options.parent_node]
		assert(len(found) == 1)
		parent = found[0]
	
	
	print "### Generate nodes: %s ###" % options.methodnodes
	if(options.methodnodes == "kmeans"):
		chosen_idx = mknodes_kmeans(parent, options.numnodes)
	elif(options.methodnodes == "equidist"):
		chosen_idx = mknodes_equidist(parent, options.numnodes)
	elif(options.methodnodes == "maxdist"):
		chosen_idx = mknodes_maxdist(parent, options.numnodes)
	elif(options.methodnodes == "all"):
		chosen_idx = mknodes_all(parent)
	else:
		raise(Exception("Method unknown: "+options.methodnodes))

	chosen_idx.sort() # makes preview-trajectory easier to understand 
	if(options.write_preview):
		write_node_preview(pool, parent, chosen_idx)
	
	for i in chosen_idx:
		n = Node()
		n.parent_frame_num = i
		n.parent = parent
		n.state = "creating-a-partition" # will be set to "created" at end of script
		n.extensions_counter = 0
		n.extensions_max = options.ext_max
		n.extensions_length = options.ext_length
		n.sampling_length = options.sampling_length	
		n.internals = parent.trajectory.getframe(i)
		pool.append(n)
		
	print "\n### Obtain alpha: %s ###" % options.methodalphas
	old_alpha = pool.alpha
	if(options.methodalphas == "theta"):
		pool.alpha = calc_alpha_theta(pool)
	elif(options.methodalphas == "user"):
		pool.alpha = userinput("Please enter a value for alpha", "float")
	else:
		raise(Exception("Method unknown: "+options.methodalphas))
	
	pool.history.append({'refined_node': (parent.name, parent.state), 'size':old_pool_size, 'alpha':old_alpha, 'timestamp':datetime.now()})
	
	pool.save() # alpha might have changed
	
	print "\n### Obtain phi fit: %s ###" % options.methodphifit
	if(options.methodphifit == "harmonic"):
		do_phifit_harmonic(pool)
	elif(options.methodphifit == "switch"):
		do_phifit_switch(pool)
	elif(options.methodphifit == "leastsq"):
		do_phifit_leastsq(pool)
	else:
		raise(Exception("Method unkown: "+options.methodphifit))

	for n in pool.where("state == 'creating-a-partition'"):
		n.state = "created"
		n.save()
		print "saving " +str(n)
		
	zgf_cleanup.main()
예제 #13
0
def main():
	options = options_desc.parse_args(sys.argv)[0]

	zgf_cleanup.main()
	
	pool = Pool()
	active_nodes = pool.where("isa_partition")
	if(options.ignore_failed):
			active_nodes = pool.where("isa_partition and not state=='mdrun-failed'")

	assert(len(active_nodes) == len(active_nodes.multilock())) # make sure we lock ALL nodes

	if active_nodes.where("'weight_direct' not in obs"):
		active_nodes.unlock()
		sys.exit("Matrix calculation not possible: Not all of the nodes have been reweighted.")
	
	print "\n### Getting S matrix ..."
	s_matrix = cache_matrix(pool.s_mat_fn, active_nodes, overwrite=options.overwrite_mat, fast=options.fast_mat)
	register_file_dependency(pool.s_mat_fn, pool.filename)

	node_weights = np.array([node.obs.weight_direct for node in active_nodes])
	
	print "\n### Symmetrizing S matrix ..."
	(corr_s_matrix, corr_node_weights) = symmetrize(s_matrix, node_weights, correct_weights=True, error=float(options.error))

	# store intermediate results
	register_file_dependency(pool.s_corr_mat_fn, pool.s_mat_fn)

	np.savez(pool.s_corr_mat_fn, matrix=corr_s_matrix, node_names=[n.name for n in active_nodes])
	
	if options.export_matlab:
		savemat(pool.analysis_dir+"node_weights.mat", {"node_weights":node_weights, "node_weights_corrected":corr_node_weights})
		savemat(pool.analysis_dir+"s_mats.mat", {"s_matrix":s_matrix, "s_matrix_corrected":corr_s_matrix})

	for (n, cw) in zip(active_nodes, corr_node_weights):
		n.obs.weight_corrected = cw
		
	print "\n### Node weights after symmetrization of S matrix:"
	for n in active_nodes:
		print "%s: initial weight: %f, corrected weight: %f, weight change: %f" % (n.name, n.obs.weight_direct, n.obs.weight_corrected, abs(n.obs.weight_direct - n.obs.weight_corrected))
		n.save()

	active_nodes.unlock()

	# calculate and sort eigenvalues in descending order
	(eigvalues, eigvectors) = np.linalg.eig(corr_s_matrix)
	argsorted_eigvalues = np.argsort(-eigvalues)
	eigvalues = eigvalues[argsorted_eigvalues]
	eigvectors = eigvectors[:, argsorted_eigvalues]
	
	gaps = np.abs(eigvalues[1:]-eigvalues[:-1])
	gaps = np.append(gaps, 0.0)
	wgaps = gaps*eigvalues

	print "\n### Sorted eigenvalues of symmetrized S matrix:"
	for (idx, ev, gap, wgap) in zip(range(1, len(eigvalues)+1), eigvalues, gaps, wgaps):
		print "EV%04d: %f, gap to next: %f, EV-weighted gap to next: %f" % (idx, ev, gap, wgap)
	n_clusters = np.argmax(wgaps)+1
	print "\n### Maximum gap %f after top %d eigenvalues." % (np.max(gaps), n_clusters)
	print "### Maximum EV-weighted gap %f after top %d eigenvalues." % (np.max(wgaps), np.argmax(wgaps)+1)
	sys.stdout.flush()
	if not options.auto_cluster:
		n_clusters = userinput("Please enter the number of clusters for PCCA+", "int", "x>0")
	print "### Using %d clusters for PCCA+ ..."%n_clusters

	if options.export_matlab:
		savemat(pool.analysis_dir+"evs.mat", {"evs":eigvectors})
	
	# orthogonalize and normalize eigenvectors 
	eigvectors = orthogonalize(eigvalues, eigvectors, corr_node_weights)

	# perform PCCA+
	# First two return-values "c_f" and "indicator" are not needed
	(chi_matrix, rot_matrix) = cluster_by_isa(eigvectors, n_clusters)[2:]

	if(options.optimize_chi):
		print "\n### Optimizing chi matrix ..."
		
		outliers = 5
		mean_weight = np.mean(corr_node_weights)
		threshold = mean_weight/100*outliers
		print "Light-weight node threshold (%d%% of mean corrected node weight): %.4f."%(outliers, threshold)

		# accumulate nodes for optimization
		edges = np.where(np.max(chi_matrix, axis=1) > 0.9999)[0] # edges of simplex
		heavies = np.where( corr_node_weights > threshold)[0] # heavy-weight nodes
		filtered_eigvectors = eigvectors[ np.union1d(edges, heavies) ]

		# perform the actual optimization
		rot_matrix = opt_soft(filtered_eigvectors, rot_matrix, n_clusters)

		chi_matrix = np.dot(eigvectors[:,:n_clusters], rot_matrix)
		
		# deal with light-weight nodes: shift and scale
		for i in np.where(corr_node_weights <= threshold)[0]:
			if(i in edges):
				print "Column %d belongs to (potentially dangerous) light-weight node, but its node is a simplex edge."%(i+1)
				continue
			print "Column %d is shifted and scaled."%(i+1)
			col_min = np.min( chi_matrix[i,:] )
			chi_matrix[i,:] -= col_min
			chi_matrix[i,:] /= 1-(n_clusters*col_min)
			
	qc_matrix = np.dot( np.dot( np.linalg.inv(rot_matrix), np.diag(eigvalues[range(n_clusters)]) ), rot_matrix ) - np.eye(n_clusters)
	cluster_weights = rot_matrix[0]
	
	print "\n### Matrix numerics check"
	print "-- Q_c matrix row sums --"
	print np.sum(qc_matrix, axis=1)
	print "-- cluster weights: first column of rot_matrix --"
	print cluster_weights
	print "-- cluster weights: numpy.dot(node_weights, chi_matrix) --"
	print np.dot(corr_node_weights, chi_matrix)
	print "-- chi matrix column max values --"
	print np.max(chi_matrix, axis=0)
	print "-- chi matrix row sums --"
	print np.sum(chi_matrix, axis=1)

	# store final results
	np.savez(pool.chi_mat_fn, matrix=chi_matrix, n_clusters=n_clusters, node_names=[n.name for n in active_nodes])
	np.savez(pool.qc_mat_fn,  matrix=qc_matrix,  n_clusters=n_clusters, node_names=[n.name for n in active_nodes], weights=cluster_weights)

	if options.export_matlab:		
		savemat(pool.analysis_dir+"chi_mat.mat", {"chi_matrix":chi_matrix})
		savemat(pool.analysis_dir+"qc_mat.mat", {"qc_matrix":qc_matrix, "weights":cluster_weights})

	register_file_dependency(pool.chi_mat_fn, pool.s_corr_mat_fn)
	register_file_dependency(pool.qc_mat_fn, pool.s_corr_mat_fn)

	for fn in (pool.s_mat_fn, pool.s_corr_mat_fn):
		register_file_dependency(pool.chi_mat_fn, fn)
		register_file_dependency(pool.qc_mat_fn, fn)

	# touch analysis directory (triggering update in zgf_browser)
	atime = mtime = time.time()
	os.utime(pool.analysis_dir, (atime, mtime))

	# show summary
	if(options.summary):
		print "\n### Preparing cluster summary ..."
		chi_threshold = 1E-3
		from pprint import pformat
	
		for i in range(n_clusters):
			involved_nodes = [active_nodes[ni] for ni in np.argwhere(chi_matrix[:,i] > chi_threshold)]
			max_chi_node = active_nodes[ np.argmax(chi_matrix[:,i]) ]
			c_max = []

			for c in  pool.converter:
				coord_range = pool.coord_range(c)
				scale = c.plot_scale
				edges = scale(np.linspace(np.min(coord_range), np.max(coord_range), num=50))
				hist_cluster = np.zeros(edges.size-1)

				for (n, chi) in zip([n for n in active_nodes], chi_matrix[:,i]):
					samples = scale( n.trajectory.getcoord(c) )
					hist_node = np.histogram(samples, bins=edges, weights=n.frameweights, normed=True)[0]
					hist_cluster += n.obs.weight_corrected * hist_node * chi

				c_max.append( scale(np.linspace(np.min(coord_range), np.max(coord_range), num=50))[np.argmax(hist_cluster)] )

			msg = "### Cluster %d (weight=%.4f, #involved nodes=%d, representative='%s'):"%(i+1, cluster_weights[i], len(involved_nodes), max_chi_node.name)
			print "\n"+msg
			print "-- internal coordinates --"
			print "%s"%pformat(["%.2f"%cm for cm in c_max])
			print "-- involved nodes --"
			print "%s"%pformat([n.name for n in involved_nodes])			
			print "-"*len(msg)
예제 #14
0
def main():
	options = options_desc.parse_args(sys.argv)[0]
	zgf_cleanup.main()
	
	print("Options:\n%s\n"%pformat(eval(str(options))))
	
	pool = Pool()
	parent = pool.root
	active_nodes = pool.where("isa_partition")
	
	assert(len(active_nodes) == len(active_nodes.multilock())) # make sure we lock ALL nodes

	if active_nodes.where("'weight_direct' not in obs"):
		sys.exit("Q-Matrix calculation not possible: Not all of the nodes have been reweighted.")

	node_weights = np.array([node.obs.weight_direct for node in active_nodes])
	
	print "### Generate bins: equidist ###" 
	result = q_equidist(parent, options.numnodes)
	chosen_idx=result['chosen_idx']
	frames_chosen=result['frames_chosen']
	theta=result['theta']
	chosen_idx.sort() # makes preview-trajectory easier to understand
	dimension=len(chosen_idx)

	print "chosen_idx"
	print chosen_idx

	print "### Generate bin weights ###"
	bin_weights=np.zeros(dimension)
	for (i,n) in enumerate(active_nodes):
		w_denom = np.sum(n.frameweights) 
		for t in range(len(n.trajectory)):
			diffs = (frames_chosen - n.trajectory.getframe(t)).norm()
			j = np.argmin(diffs)
			bin_weights[j] = bin_weights[j] + node_weights[i] * n.frameweights[t] / w_denom
			
	
	print "bin_weights"
	print bin_weights
	
	print "### Generate q_all (entries only for neighboring bins) ###" 
	q_all = np.empty((dimension, dimension), dtype=np.float)
	for i in range(dimension):
		sum_row = 0.0
		diffs = (frames_chosen - frames_chosen.getframe(i)).norm()
		print "diffs"
		print diffs
		for j in range(dimension):
			if (diffs[j] < 2.0 * theta) and (bin_weights[i] > 0.0):
				q_all[i,j] = np.sqrt(bin_weights[j]) / np.sqrt(bin_weights[i])
				sum_row = sum_row + q_all[i , j]
			else:
				q_all[i,j] = 0
		q_all[i, i] = q_all[i, i]- sum_row  
			
	print "Q_All"
	print q_all
	
	if options.export_matlab:
		savemat(pool.analysis_dir+"q_all.mat", {"q_all":q_all})
		
	active_nodes.unlock()
	zgf_cleanup.main()
def main():
	options = options_desc.parse_args(sys.argv)[0]

	zgf_cleanup.main()
	
	pool = Pool()
	npz_file = np.load(pool.chi_mat_fn)
	chi_matrix = npz_file['matrix']
	node_names = npz_file['node_names']
	n_clusters = npz_file['n_clusters']
	active_nodes = [Node(nn) for nn in node_names]
	
	# create and open dest_files, intialize counters for statistics
	dest_filenames = [ pool.analysis_dir+"cluster%d.trr"%(c+1) for c in range(n_clusters) ]
	dest_files = [ open(fn, "wb") for fn in dest_filenames ]
	dest_frame_counters = np.zeros(n_clusters)
	
	
	# For each active node...
	for (i, n) in enumerate(active_nodes):
		# ... find the clusters to which it belongs (might be more than one)...
		belonging_clusters = np.argwhere(chi_matrix[i] > options.node_threshold)
		
		# ... and find all typical frames of this node.
		#TODO not an optimal solution... discuss
		# per default, we take every frame with above average weight
		frame_threshold = options.frame_threshold*2*np.mean(n.frameweights)
		typical_frame_nums = np.argwhere(n.frameweights > frame_threshold)
		
		# Go through the node's trajectory ...
		trr_in = TrrFile(n.trr_fn)
		curr_frame = trr_in.first_frame
		for i in typical_frame_nums:
			# ...stop at each typical frame...
			while(i != curr_frame.number):
				curr_frame = curr_frame.next()
			assert(curr_frame.number == i)
			#... and copy it into the dest_file of each belonging cluster.
			for c in belonging_clusters:
				dest_files[c].write(curr_frame.raw_data)
				dest_frame_counters[c] += 1
		trr_in.close() # close source file


	# close dest_files
	for f in dest_files:
		f.close()
	del(dest_files)
	
	# desolvate cluster-trajectories 'in-place'
	if(not options.write_sol):
		for dest_fn in dest_filenames:
			tmp_fn = mktemp(suffix='.trr', dir=pool.analysis_dir)
			os.rename(dest_fn, tmp_fn) # works as both files are in same dir
			cmd = ["trjconv", "-f", tmp_fn, "-o", dest_fn, "-n", pool.ndx_fn]
			p = Popen(cmd, stdin=PIPE)
			p.communicate(input="MOI\n")
			assert(p.wait() == 0)
			os.remove(tmp_fn)
			
	# register dependencies
	for fn in dest_filenames:
		register_file_dependency(fn, pool.chi_mat_fn)
	
	# check number of written frames
	sys.stdout.write("Checking lenghts of written trajectories... ")
	for i in range(n_clusters):
		f = TrrFile(dest_filenames[i])
		assert(f.count_frames() == dest_frame_counters[i])
		f.close()
	print("done.")
	
	#output statistics
	print "\n### Extraction summary ###\nnode threshold: %1.1f, frame threshold: %1.1f"%(options.node_threshold, options.frame_threshold)
	print "Cluster trajectories were written to %s:"%pool.analysis_dir
	for (c, f) in enumerate(dest_frame_counters):
		print "cluster%d.trr [%d frames] from node(s):"%(c+1, f)
		print list(np.argwhere(chi_matrix[:,c] > options.node_threshold).flat)
예제 #16
0
def main():
	options = options_desc.parse_args(sys.argv)[0]

	zgf_cleanup.main()
	
	pool = Pool()
	active_nodes = pool.where("isa_partition")
	
	assert(len(active_nodes) == len(active_nodes.multilock())) # make sure we lock ALL nodes

	if active_nodes.where("'weight_direct' not in obs"):
		active_nodes.unlock()
		sys.exit("Matrix calculation not possible: Not all of the nodes have been reweighted.")
	
	print "\n### Getting S matrix ..."
	s_matrix = cache_matrix(pool.s_mat_fn, active_nodes, overwrite=options.overwrite_mat)
	register_file_dependency(pool.s_mat_fn, pool.filename)

	print "\n### Getting K matrix ..."
	k_matrix = cache_matrix(pool.k_mat_fn, active_nodes, shift=options.lag_time, overwrite=options.overwrite_mat)
	register_file_dependency(pool.k_mat_fn, pool.filename)	

	node_weights = np.array([node.obs.weight_direct for node in active_nodes])
	
	print "\n### Symmetrizing S matrix ..."
	(corr_s_matrix, corr_node_weights) = symmetrize(s_matrix, node_weights, correct_weights=True, error=float(options.error))
	print "\n### Symmetrizing K matrix ..."
	(corr_k_matrix, corr_node_weights) = symmetrize(k_matrix, corr_node_weights)

	# store intermediate results
	register_file_dependency(pool.s_corr_mat_fn, pool.s_mat_fn)
	register_file_dependency(pool.k_corr_mat_fn, pool.k_mat_fn)
	np.savez(pool.s_corr_mat_fn, matrix=corr_s_matrix, node_names=[n.name for n in active_nodes])
	np.savez(pool.k_corr_mat_fn, matrix=corr_k_matrix, node_names=[n.name for n in active_nodes])
	
	if options.export_matlab:
		savemat(pool.analysis_dir+"node_weights.mat", {"node_weights":node_weights, "node_weights_corrected":corr_node_weights})
		savemat(pool.analysis_dir+"s_mats.mat", {"s_matrix":s_matrix, "s_matrix_corrected":corr_s_matrix})
		savemat(pool.analysis_dir+"k_mats.mat", {"k_matrix":k_matrix, "k_matrix_corrected":corr_k_matrix})
	
	for (n, cw) in zip(active_nodes, corr_node_weights):
		n.obs.weight_corrected = cw
		
	print "\n### Node weights after symmetrization of S matrix:"
	for n in active_nodes:
		print "%s: initial weight: %f, corrected weight: %f, weight change: %f" % (n.name, n.obs.weight_direct, n.obs.weight_corrected, abs(n.obs.weight_direct - n.obs.weight_corrected))
		n.save()

	active_nodes.unlock()

	# calculate and sort eigenvalues in descending order
	(eigvalues, eigvectors) = np.linalg.eig(corr_s_matrix)
	argsorted_eigvalues = np.argsort(-eigvalues)
	eigvalues = eigvalues[argsorted_eigvalues]
	eigvectors = eigvectors[:, argsorted_eigvalues]
	
	gaps = np.abs(eigvalues[1:]-eigvalues[:-1])
	gaps = np.append(gaps, 0.0)
	wgaps = gaps*eigvalues

	print "\n### Sorted eigenvalues of symmetrized S matrix:"
	for (idx, ev, gap, wgap) in zip(range(1, len(eigvalues)+1), eigvalues, gaps, wgaps):
		print "EV%04d: %f, gap to next: %f, EV-weighted gap to next: %f" % (idx, ev, gap, wgap)
	n_clusters = np.argmax(wgaps)+1
	print "\n### Maximum gap %f after top %d eigenvalues." % (np.max(gaps), n_clusters)
	print "### Maximum EV-weighted gap %f after top %d eigenvalues." % (np.max(wgaps), np.argmax(wgaps)+1)
	sys.stdout.flush()
	if not options.auto_cluster:
		n_clusters = userinput("Please enter the number of clusters for PCCA+", "int", "x>0")
	print "### Using %d clusters for PCCA+ ..."%n_clusters

	print "eigenvectors"
	print eigvectors[:, :n_clusters]

	if options.export_matlab:
		savemat(pool.analysis_dir+"evs.mat", {"evs":eigvectors})
	
	# orthogonalize and normalize eigenvectors 
	eigvectors = orthogonalize(eigvalues, eigvectors, corr_node_weights)

	# perform PCCA+
	# First two return-values "c_f" and "indicator" are not needed
	(chi_matrix, rot_matrix) = cluster_by_isa(eigvectors, n_clusters)[2:]
	
	#TODO at the moment, K-matrix is not used
	#xi = [] # calculate eigenvalues of Q_c, xi
	#for eigvec in np.transpose(eigvectors)[: n_clusters]:
	#	num = np.dot( np.dot( np.transpose(eigvec), corr_k_matrix ), eigvec )
	#	denom = np.dot( np.dot( np.transpose(eigvec), corr_s_matrix ), eigvec )
	#	xi.append(num/denom-1)

	#print np.diag(xi) #TODO what does this tell us? Marcus-check

	qc_matrix = np.dot( np.dot( np.linalg.inv(rot_matrix), np.diag(eigvalues[range(n_clusters)]) ), rot_matrix ) - np.eye(n_clusters)

	cluster_weights = rot_matrix[0]

	print "Q_c matrix:"
	print qc_matrix
	print "Q_c matrix row sums:"
	print np.sum(qc_matrix, axis=1)
	print "cluster weights (calculated twice for checking):"
	print cluster_weights
	print np.dot(corr_node_weights, chi_matrix)
	print "chi matrix column sums:"
	print np.sum(chi_matrix, axis=0)
	print "chi matrix row sums:"
	print np.sum(chi_matrix, axis=1)

	# store final results
	np.savez(pool.chi_mat_fn, matrix=chi_matrix, n_clusters=n_clusters, node_names=[n.name for n in active_nodes])
	np.savez(pool.qc_mat_fn,  matrix=qc_matrix,  n_clusters=n_clusters, node_names=[n.name for n in active_nodes], weights=cluster_weights)

	if options.export_matlab:
		
		savemat(pool.analysis_dir+"chi_mat.mat", {"chi_matrix":chi_matrix})
		savemat(pool.analysis_dir+"qc_mat.mat", {"qc_matrix":qc_matrix, "weights":cluster_weights})

	register_file_dependency(pool.chi_mat_fn, pool.s_corr_mat_fn)
	register_file_dependency(pool.qc_mat_fn, pool.s_corr_mat_fn)
	for fn in (pool.s_mat_fn, pool.s_corr_mat_fn, pool.k_mat_fn, pool.k_corr_mat_fn):
		register_file_dependency(pool.chi_mat_fn, fn)
		register_file_dependency(pool.qc_mat_fn, fn)
		
	zgf_cleanup.main()