Пример #1
0
    def reload_nodes(self):
        if (not path.exists("./nodes/")):
            return

        self._mtime_nodes = path.getmtime("./nodes")

        new_nodes = []
        for node_dir in sorted(glob("./nodes/*")):
            node_name = path.basename(node_dir)
            if (not path.exists(node_dir + "/" + node_name + "_desc.txt")):
                continue  #ignoring not readily created nodes
            found_node = None
            for n in self:
                if (n.name == node_name):
                    found_node = n

            if (found_node == None):
                found_node = Node(node_name)
            new_nodes.append(found_node)

        self[:] = new_nodes
def main():
    options = options_desc.parse_args(sys.argv)[0]

    zgf_cleanup.main()

    pool = Pool()
    npz_file = np.load(pool.chi_mat_fn)
    chi_matrix = npz_file['matrix']
    node_names = npz_file['node_names']
    n_clusters = npz_file['n_clusters']
    active_nodes = [Node(nn) for nn in node_names]

    # create and open dest_files, intialize counters for statistics
    dest_filenames = [
        pool.analysis_dir + "cluster%d.trr" % (c + 1)
        for c in range(n_clusters)
    ]
    dest_files = [open(fn, "wb") for fn in dest_filenames]
    dest_frame_counters = np.zeros(n_clusters)

    # For each active node...
    for (i, n) in enumerate(active_nodes):
        # ... find the clusters to which it belongs (might be more than one)...
        belonging_clusters = np.argwhere(
            chi_matrix[i] > options.node_threshold)

        # ... and find all typical frames of this node.
        #TODO not an optimal solution... discuss
        # per default, we take every frame with above average weight
        frame_threshold = options.frame_threshold * 2 * np.mean(n.frameweights)
        typical_frame_nums = np.argwhere(n.frameweights > frame_threshold)

        # Go through the node's trajectory ...
        trr_in = TrrFile(n.trr_fn)
        curr_frame = trr_in.first_frame
        for i in typical_frame_nums:
            # ...stop at each typical frame...
            while (i != curr_frame.number):
                curr_frame = curr_frame.next()
            assert (curr_frame.number == i)
            #... and copy it into the dest_file of each belonging cluster.
            for c in belonging_clusters:
                dest_files[c].write(curr_frame.raw_data)
                dest_frame_counters[c] += 1
        trr_in.close()  # close source file

    # close dest_files
    for f in dest_files:
        f.close()
    del (dest_files)

    # desolvate cluster-trajectories 'in-place'
    if (not options.write_sol):
        for dest_fn in dest_filenames:
            tmp_fn = mktemp(suffix='.trr', dir=pool.analysis_dir)
            os.rename(dest_fn, tmp_fn)  # works as both files are in same dir
            cmd = ["trjconv", "-f", tmp_fn, "-o", dest_fn, "-n", pool.ndx_fn]
            p = Popen(cmd, stdin=PIPE)
            p.communicate(input="MOI\n")
            assert (p.wait() == 0)
            os.remove(tmp_fn)

    # register dependencies
    for fn in dest_filenames:
        register_file_dependency(fn, pool.chi_mat_fn)

    # check number of written frames
    sys.stdout.write("Checking lenghts of written trajectories... ")
    for i in range(n_clusters):
        f = TrrFile(dest_filenames[i])
        assert (f.count_frames() == dest_frame_counters[i])
        f.close()
    print("done.")

    #output statistics
    print "\n### Extraction summary ###\nnode threshold: %1.1f, frame threshold: %1.1f" % (
        options.node_threshold, options.frame_threshold)
    print "Cluster trajectories were written to %s:" % pool.analysis_dir
    for (c, f) in enumerate(dest_frame_counters):
        print "cluster%d.trr [%d frames] from node(s):" % (c + 1, f)
        print list(np.argwhere(chi_matrix[:, c] > options.node_threshold).flat)
Пример #3
0
def main(argv=None):
    if (argv == None):
        argv = sys.argv
        options = options_desc.parse_args(argv)[0]

    pool = Pool()

    found_parents = [n for n in pool if n.name == options.parent_node]
    assert (len(found_parents) == 1)
    parent = found_parents[0]

    chosen_idx = np.linspace(start=0,
                             stop=parent.trajectory.n_frames - 1,
                             num=options.numnodes).astype(int)

    print "choosen_idx: ", chosen_idx

    for i in chosen_idx:
        n = Node()
        n.parent_frame_num = i
        n.parent = parent
        n.state = "created"
        n.extensions_counter = 0
        n.extensions_max = 0
        n.extensions_length = 0
        n.sampling_length = parent.sampling_length * 3
        n.internals = parent.trajectory.getframe(i)
        pool.append(n)
        n.save()
Пример #4
0
def main():
    options = options_desc.parse_args(sys.argv)[0]

    if options.common_filename:
        options.molecule = options.common_filename + ".pdb"
        options.presampling = options.common_filename + ".trr"
        options.internals = options.common_filename + ".int"
        options.grompp = options.common_filename + ".mdp"
        options.topology = options.common_filename + ".top"
        options.index = options.common_filename + ".ndx"

    print("Options:\n%s\n" % pformat(eval(str(options))))

    assert path.exists(options.molecule)
    assert path.exists(options.presampling)
    assert path.exists(options.internals)
    assert path.exists(options.grompp)
    assert path.exists(options.topology)

    # TODO: what if there is no index-file? (make_ndx)
    assert path.exists(options.index)
    assert "MOI" in gromacs.read_index_file(options.index), "group MOI should be defined in index file"

    # checks e.g. if the mdp-file looks good
    mdp_options = gromacs.read_mdp_file(options.grompp)

    # options we cannot fix
    for ref_t in re.findall("[0-9]+", mdp_options["ref_t"]):
        assert int(ref_t) == options.temperature, "temperature in mdp file does not match ZIBgridfree temperature"
        # TODO drop options.temperature and get temperature directly from mdp file... ask again if temperature is above 310K

        # options we can fix
    mdp_options_dirty = False  # if set, a new mdp-file will be written
    required_mdp_options = {"dihre": "yes", "dihre_fc": "1", "disre": "simple", "disre_fc": "1"}
    for (k, v) in required_mdp_options.items():
        if mdp_options.has_key(k):
            assert mdp_options[k] == v  # check, if we would overwrite something
        else:
            mdp_options[k] = v
            mdp_options_dirty = True

    if mdp_options.has_key("energygrps"):
        assert "MOI" in [
            str(egrp) for egrp in re.findall("[\S]+", mdp_options["energygrps"])
        ], "group MOI should be among energygrps in mdp file"
    else:
        mdp_options["energygrps"] = "MOI"
        mdp_options_dirty = True

    a, b = mdp_options.has_key("nstxout"), mdp_options.has_key("nstenergy")
    if a and not b:
        mdp_options["nstenergy"] = mdp_options["nstxout"]
        mdp_options_dirty = True
    elif b and not a:
        mdp_options["nstxout"] = mdp_options["nstenergy"]
        mdp_options_dirty = True
    elif b and a:
        assert mdp_options["nstxout"] == mdp_options["nstenergy"], "nstxout should equal nstenergy"

    if int(mdp_options["nsteps"]) > 1e6:
        msg = "Number of MD-steps?"
        mdp_options["nsteps"] = str(userinput(msg, "int", default=int(mdp_options["nsteps"])))

        # create a fixed mdp-file
    if mdp_options_dirty:
        print("Creating copy of mdp-file and adding missing options.")
        out_fn = options.grompp.rsplit(".", 1)[0] + "_fixed.mdp"
        f = open(out_fn, "w")  # append
        f.write("; Generated by zgf_create_pool\n")
        for i in mdp_options.items():
            f.write("%s = %s\n" % i)
        f.write("; EOF\n")
        f.close()
        options.grompp = out_fn

        # check if subsampling is reasonable
    if os.path.getsize(options.presampling) > 100e6:  # 100MB
        print("Presampling trajectory is large")
        trr = TrrFile(options.presampling)
        dt = trr.first_frame.next().t - trr.first_frame.t
        trr.close()
        print("Presampling timestep is %.2f ps" % dt)
        if dt < 10:  # picoseconds
            # TODO: maybe calculate subsampling factor individually, or ask?
            msg = "Subsample presampling trajectory by a tenth?"
            if userinput(msg, "bool"):
                out_fn = options.presampling.rsplit(".", 1)[0] + "_tenth.trr"
                cmd = ["trjconv", "-f", options.presampling, "-o", out_fn, "-skip", "10"]
                check_call(cmd)
                options.presampling = out_fn

                # balance linears
    if options.balance_linears:
        print("Balance Linears")
        old_converter = Converter(options.internals)
        print("Loading presampling....")
        frames = old_converter.read_trajectory(options.presampling)
        new_coord_list = []
        for c in old_converter:
            if not isinstance(c, LinearCoordinate):
                new_coord_list.append(c)
                continue  # we do not work on other Coordinate-Types
                # TODO: is this a good way to determine new_weight and new_offset???
            new_weight = c.weight / sqrt(2 * frames.var().getcoord(c))
            new_offset = c.offset + frames.mean().getcoord(c)
            new_coord = LinearCoordinate(*c.atoms, label=c.label, weight=new_weight, offset=new_offset)
            new_coord_list.append(new_coord)
        new_converter = Converter(coord_list=new_coord_list)

        assert old_converter.filename.endswith(".int")
        options.internals = old_converter.filename[:-4] + "_balanced.int"
        print("Writing balanced Converter to: " + options.internals)
        f = open(options.internals, "w")
        f.write(new_converter.serialize())
        f.close()
        assert len(Converter(options.internals)) == len(new_coord_list)  # try parsing

        # Finally: Create root-node and pool
    pool = Pool()
    if len(pool) != 0:
        print("ERROR: A pool already exists here.")
        sys.exit(1)

    pool.int_fn = options.internals
    pool.mdp_fn = options.grompp
    pool.top_fn = options.topology
    pool.ndx_fn = options.index
    pool.temperature = options.temperature
    pool.gr_threshold = options.gr_threshold
    pool.gr_chains = options.gr_chains
    pool.alpha = None
    pool.save()  # save pool for the first time...

    # ... then we can save the first node...
    node0 = Node()
    node0.state = "refined"
    node0.save()  # also creates the node directory ... needed for symlink
    os.symlink(os.path.relpath(options.presampling, node0.dir), node0.trr_fn)
    os.symlink(os.path.relpath(options.molecule, node0.dir), node0.pdb_fn)

    pool.root_name = node0.name
    pool.save()  # ... now we have to save the pool again.

    if not path.exists("analysis"):
        os.mkdir("analysis")
Пример #5
0
def main():
	options = options_desc.parse_args(sys.argv)[0]

	if(options.common_filename):
		options.molecule = options.common_filename+".pdb"
		options.presampling = options.common_filename+".trr"
		options.internals = options.common_filename+".int"
		options.grompp = options.common_filename+".mdp"
		options.topology = options.common_filename+".top"
		options.index = options.common_filename+".ndx"

	print("Options:\n%s\n"%pformat(eval(str(options))))

	assert(path.exists(options.molecule))
	assert(path.exists(options.presampling))
	assert(path.exists(options.internals))
	assert(path.exists(options.grompp))
	assert(path.exists(options.topology))
		
	#TODO: what if there is no index-file? (make_ndx)
	assert(path.exists(options.index))
	assert('moi' in gromacs.read_index_file(options.index)), "group 'MOI' should be defined in index file"
 
	# checks e.g. if the mdp-file looks good
	mdp_options = gromacs.read_mdp_file(options.grompp)
	
	temperatures = [ref_t for ref_t in re.findall("[0-9]+", mdp_options["ref_t"])]
	assert(len(set(temperatures)) == 1), "temperature definition in mdp file is ambiguous"
	temperature = temperatures[0]

	# get sampling temperature from mdp file
	if(int(temperature) > 310):
		if not(userinput("Your sampling temperature is set to %s K. Continue?"%temperature, "bool")):
			sys.exit("Quit by user.")

	# options we can fix 
 	mdp_options_dirty = False #if set, a new mdp-file will be written

	# the value of the following options need to be fixed
	critical_mdp_options = {"dihre":"yes", "dihre_fc":"1", "disre":"simple", "disre_fc":"1", "gen_temp":temperature}
	for (k,v) in critical_mdp_options.items():
 		if(mdp_options.has_key(k) and mdp_options[k].strip() != v):
			print "Error. I do not want to use '%s' for option '%s' ('%s' required). Please fix your mdp file."%(mdp_options[k].strip(),k,v)
			sys.exit("Quitting.")
 		else:
 			mdp_options[k] = v
 			mdp_options_dirty = True

	# the value of the following options does not matter, but they should be there
	noncritical_mdp_options = {"tcoupl":"no", "pcoupl":"no", "gen_vel":"no", "gen_seed":"-1"}
	for (k,v) in noncritical_mdp_options.items():
		if not(mdp_options.has_key(k)):
			mdp_options[k] = v
			mdp_options_dirty = True

	a = mdp_options.has_key("energygrps") and "moi" not in [str(egrp) for egrp in re.findall('[\S]+', mdp_options["energygrps"])]
	b = not(mdp_options.has_key("energygrps"))
	if(a or b):
		if not(userinput("'MOI' is not defined as an energy group in your mdp file. Maybe you have forgotten to define proper 'energygrps'. Continue?", "bool")):
			sys.exit("Quit by user.")

	a, b = mdp_options.has_key("nstxout"), mdp_options.has_key("nstenergy")
	if(a and not b):
		mdp_options["nstenergy"] = mdp_options["nstxout"]
		mdp_options_dirty = True
	elif(b and not a):
		mdp_options["nstxout"] = mdp_options["nstenergy"]
		mdp_options_dirty = True
	elif(b and a):
		assert(mdp_options["nstxout"] == mdp_options["nstenergy"]), "nstxout should equal nstenergy"
		
	if(int(mdp_options["nsteps"]) > 1e6):
		msg = "Number of MD-steps?"
		mdp_options["nsteps"] = str( userinput(msg, "int", default=int(mdp_options["nsteps"])) )
	
	# create a fixed mdp-file
	if(mdp_options_dirty):
		print("Creating copy of mdp-file and adding missing options.")
		out_fn = options.grompp.rsplit(".", 1)[0] + "_fixed.mdp"
		f = open(out_fn, "w") # append
		f.write("; Generated by zgf_create_pool\n")
		for i in sorted(mdp_options.items()):
			f.write("%s = %s\n"%i)
		f.write("; EOF\n")
		f.close()
		options.grompp = out_fn
		
	
	# check if subsampling is reasonable
	if(os.path.getsize(options.presampling) > 100e6): # 100MB
		print("Presampling trajectory is large")
		trr = TrrFile(options.presampling)
		dt = trr.first_frame.next().t - trr.first_frame.t
		trr.close()
		print("Presampling timestep is %.2f ps"%dt)
		if(dt < 10): # picoseconds
			#TODO: maybe calculate subsampling factor individually, or ask? 
			msg = "Subsample presampling trajectory by a tenth?"
			if(userinput(msg, "bool")):
				out_fn = options.presampling.rsplit(".", 1)[0] + "_tenth.trr"
				cmd = ["trjconv", "-f", options.presampling, "-o", out_fn, "-skip", "10"]
				check_call(cmd)
				options.presampling = out_fn
	
			
	# balance linears
	if(options.balance_linears):
		print("Balance Linears")
		old_converter = Converter(options.internals)
		print("Loading presampling....")
		frames = old_converter.read_trajectory(options.presampling)
		new_coord_list = []
		for c in old_converter:
			if(not isinstance(c, LinearCoordinate)):
				new_coord_list.append(c)
				continue # we do not work on other Coordinate-Types
			#TODO: is this a good way to determine new_weight and new_offset??? 
			new_weight = c.weight / sqrt(2*frames.var().getcoord(c))
			new_offset = c.offset + frames.mean().getcoord(c)
			new_coord = LinearCoordinate(*c.atoms, label=c.label, weight=new_weight, offset=new_offset)
			new_coord_list.append(new_coord)
		new_converter = Converter(coord_list=new_coord_list)
	
		assert(old_converter.filename.endswith(".int"))
		options.internals = old_converter.filename[:-4] + "_balanced.int"
		print("Writing balanced Converter to: "+options.internals)
		f = open(options.internals, "w")
		f.write(new_converter.serialize())
		f.close()
		assert(len(Converter(options.internals)) == len(new_coord_list)) #try parsing
	
	# Finally: Create root-node and pool
	pool = Pool()
	if(len(pool) != 0):
		print("ERROR: A pool already exists here.")
		sys.exit(1)
	
	pool.int_fn = options.internals
	pool.mdp_fn = options.grompp
	pool.top_fn = options.topology
	pool.ndx_fn = options.index
	pool.temperature = int(temperature)
	pool.gr_threshold = options.gr_threshold
	pool.gr_chains = options.gr_chains
	pool.alpha = None
	pool.save() # save pool for the first time...

	# ... then we can save the first node...
	node0 = Node()
	node0.state = "refined"	
	node0.save() # also creates the node directory ... needed for symlink
	os.symlink(os.path.relpath(options.presampling, node0.dir), node0.trr_fn)
	os.symlink(os.path.relpath(options.molecule, node0.dir), node0.pdb_fn)
	
	pool.root_name = node0.name
	pool.save() #... now we have to save the pool again.
	
	if(not path.exists("analysis")):
		os.mkdir("analysis")
Пример #6
0
def main(argv=None):
	if(argv==None): 
		argv = sys.argv
	options = options_desc.parse_args(argv)[0]
	
	print("Options:\n%s\n"%pformat(eval(str(options))))

	if(options.random_seed):
		# using numpy-random because python-random differs beetween 32 and 64 bit
		np.random.seed(hash(options.random_seed))
	
	pool = Pool()
	old_pool_size = len(pool)
	print "pool", pool
	
	if(options.parent_node == "root"):
		parent = pool.root
	else:
		found = [n for n in pool if n.name == options.parent_node]
		assert(len(found) == 1)
		parent = found[0]
	
	
	print "### Generate nodes: %s ###" % options.methodnodes
	if(options.methodnodes == "kmeans"):
		chosen_idx = mknodes_kmeans(parent, options.numnodes)
	elif(options.methodnodes == "equidist"):
		chosen_idx = mknodes_equidist(parent, options.numnodes)
	elif(options.methodnodes == "maxdist"):
		chosen_idx = mknodes_maxdist(parent, options.numnodes)
	elif(options.methodnodes == "all"):
		chosen_idx = mknodes_all(parent)
	else:
		raise(Exception("Method unknown: "+options.methodnodes))

	chosen_idx.sort() # makes preview-trajectory easier to understand 
	if(options.write_preview):
		write_node_preview(pool, parent, chosen_idx)
	
	for i in chosen_idx:
		n = Node()
		n.parent_frame_num = i
		n.parent = parent
		n.state = "creating-a-partition" # will be set to "created" at end of script
		n.extensions_counter = 0
		n.extensions_max = options.ext_max
		n.extensions_length = options.ext_length
		n.sampling_length = options.sampling_length	
		n.internals = parent.trajectory.getframe(i)
		pool.append(n)
		
	print "\n### Obtain alpha: %s ###" % options.methodalphas
	old_alpha = pool.alpha
	if(options.methodalphas == "theta"):
		pool.alpha = calc_alpha_theta(pool)
	elif(options.methodalphas == "user"):
		pool.alpha = userinput("Please enter a value for alpha", "float")
	else:
		raise(Exception("Method unknown: "+options.methodalphas))
	
	pool.history.append({'refined_node': (parent.name, parent.state), 'size':old_pool_size, 'alpha':old_alpha, 'timestamp':datetime.now()})
	
	pool.save() # alpha might have changed
	
	print "\n### Obtain phi fit: %s ###" % options.methodphifit
	if(options.methodphifit == "harmonic"):
		do_phifit_harmonic(pool)
	elif(options.methodphifit == "switch"):
		do_phifit_switch(pool)
	elif(options.methodphifit == "leastsq"):
		do_phifit_leastsq(pool)
	else:
		raise(Exception("Method unkown: "+options.methodphifit))

	for n in pool.where("state == 'creating-a-partition'"):
		n.state = "created"
		n.save()
		print "saving " +str(n)
		
	zgf_cleanup.main()
def main(argv=None):
    if argv == None:
        argv = sys.argv
        options = options_desc.parse_args(argv)[0]

    pool = Pool()

    found_parents = [n for n in pool if n.name == options.parent_node]
    assert len(found_parents) == 1
    parent = found_parents[0]

    chosen_idx = np.linspace(start=0, stop=parent.trajectory.n_frames - 1, num=options.numnodes).astype(int)

    print "choosen_idx: ", chosen_idx

    for i in chosen_idx:
        n = Node()
        n.parent_frame_num = i
        n.parent = parent
        n.state = "created"
        n.extensions_counter = 0
        n.extensions_max = 0
        n.extensions_length = 0
        n.sampling_length = parent.sampling_length * 3
        n.internals = parent.trajectory.getframe(i)
        pool.append(n)
        n.save()
Пример #8
0
def main():
    options = options_desc.parse_args(sys.argv)[0]

    if (options.common_filename):
        options.molecule = options.common_filename + ".pdb"
        options.presampling = options.common_filename + ".trr"
        options.internals = options.common_filename + ".int"
        options.grompp = options.common_filename + ".mdp"
        options.topology = options.common_filename + ".top"
        options.index = options.common_filename + ".ndx"

    print("Options:\n%s\n" % pformat(eval(str(options))))

    assert (path.exists(options.molecule))
    assert (path.exists(options.presampling))
    assert (path.exists(options.internals))
    assert (path.exists(options.grompp))
    assert (path.exists(options.topology))

    #TODO: what if there is no index-file? (make_ndx)
    assert (path.exists(options.index))
    assert ('moi' in gromacs.read_index_file(
        options.index)), "group 'MOI' should be defined in index file"

    # checks e.g. if the mdp-file looks good
    mdp_options = gromacs.read_mdp_file(options.grompp)

    temperatures = [
        ref_t for ref_t in re.findall("[0-9]+", mdp_options["ref_t"])
    ]
    assert (len(set(temperatures)) == 1
            ), "temperature definition in mdp file is ambiguous"
    temperature = temperatures[0]

    # get sampling temperature from mdp file
    if (int(temperature) > 310):
        if not (userinput(
                "Your sampling temperature is set to %s K. Continue?" %
                temperature, "bool")):
            sys.exit("Quit by user.")

    # options we can fix
    mdp_options_dirty = False  #if set, a new mdp-file will be written

    # the value of the following options need to be fixed
    critical_mdp_options = {
        "dihre": "yes",
        "dihre_fc": "1",
        "disre": "simple",
        "disre_fc": "1",
        "gen_temp": temperature
    }
    for (k, v) in critical_mdp_options.items():
        if (mdp_options.has_key(k) and mdp_options[k].strip() != v):
            print "Error. I do not want to use '%s' for option '%s' ('%s' required). Please fix your mdp file." % (
                mdp_options[k].strip(), k, v)
            sys.exit("Quitting.")
        else:
            mdp_options[k] = v
            mdp_options_dirty = True

    # the value of the following options does not matter, but they should be there
    noncritical_mdp_options = {
        "tcoupl": "no",
        "pcoupl": "no",
        "gen_vel": "no",
        "gen_seed": "-1"
    }
    for (k, v) in noncritical_mdp_options.items():
        if not (mdp_options.has_key(k)):
            mdp_options[k] = v
            mdp_options_dirty = True

    a = mdp_options.has_key("energygrps") and "moi" not in [
        str(egrp) for egrp in re.findall('[\S]+', mdp_options["energygrps"])
    ]
    b = not (mdp_options.has_key("energygrps"))
    if (a or b):
        if not (userinput(
                "'MOI' is not defined as an energy group in your mdp file. Maybe you have forgotten to define proper 'energygrps'. Continue?",
                "bool")):
            sys.exit("Quit by user.")

    a, b = mdp_options.has_key("nstxout"), mdp_options.has_key("nstenergy")
    if (a and not b):
        mdp_options["nstenergy"] = mdp_options["nstxout"]
        mdp_options_dirty = True
    elif (b and not a):
        mdp_options["nstxout"] = mdp_options["nstenergy"]
        mdp_options_dirty = True
    elif (b and a):
        assert (mdp_options["nstxout"] == mdp_options["nstenergy"]
                ), "nstxout should equal nstenergy"

    if (int(mdp_options["nsteps"]) > 1e6):
        msg = "Number of MD-steps?"
        mdp_options["nsteps"] = str(
            userinput(msg, "int", default=int(mdp_options["nsteps"])))

    # create a fixed mdp-file
    if (mdp_options_dirty):
        print("Creating copy of mdp-file and adding missing options.")
        out_fn = options.grompp.rsplit(".", 1)[0] + "_fixed.mdp"
        f = open(out_fn, "w")  # append
        f.write("; Generated by zgf_create_pool\n")
        for i in sorted(mdp_options.items()):
            f.write("%s = %s\n" % i)
        f.write("; EOF\n")
        f.close()
        options.grompp = out_fn

    # check if subsampling is reasonable
    if (os.path.getsize(options.presampling) > 100e6):  # 100MB
        print("Presampling trajectory is large")
        trr = TrrFile(options.presampling)
        dt = trr.first_frame.next().t - trr.first_frame.t
        trr.close()
        print("Presampling timestep is %.2f ps" % dt)
        if (dt < 10):  # picoseconds
            #TODO: maybe calculate subsampling factor individually, or ask?
            msg = "Subsample presampling trajectory by a tenth?"
            if (userinput(msg, "bool")):
                out_fn = options.presampling.rsplit(".", 1)[0] + "_tenth.trr"
                cmd = [
                    "trjconv", "-f", options.presampling, "-o", out_fn,
                    "-skip", "10"
                ]
                check_call(cmd)
                options.presampling = out_fn

    # balance linears
    if (options.balance_linears):
        print("Balance Linears")
        old_converter = Converter(options.internals)
        print("Loading presampling....")
        frames = old_converter.read_trajectory(options.presampling)
        new_coord_list = []
        for c in old_converter:
            if (not isinstance(c, LinearCoordinate)):
                new_coord_list.append(c)
                continue  # we do not work on other Coordinate-Types
            #TODO: is this a good way to determine new_weight and new_offset???
            new_weight = c.weight / sqrt(2 * frames.var().getcoord(c))
            new_offset = c.offset + frames.mean().getcoord(c)
            new_coord = LinearCoordinate(*c.atoms,
                                         label=c.label,
                                         weight=new_weight,
                                         offset=new_offset)
            new_coord_list.append(new_coord)
        new_converter = Converter(coord_list=new_coord_list)

        assert (old_converter.filename.endswith(".int"))
        options.internals = old_converter.filename[:-4] + "_balanced.int"
        print("Writing balanced Converter to: " + options.internals)
        f = open(options.internals, "w")
        f.write(new_converter.serialize())
        f.close()
        assert (len(Converter(options.internals)) == len(new_coord_list)
                )  #try parsing

    # Finally: Create root-node and pool
    pool = Pool()
    if (len(pool) != 0):
        print("ERROR: A pool already exists here.")
        sys.exit(1)

    pool.int_fn = options.internals
    pool.mdp_fn = options.grompp
    pool.top_fn = options.topology
    pool.ndx_fn = options.index
    pool.temperature = int(temperature)
    pool.gr_threshold = options.gr_threshold
    pool.gr_chains = options.gr_chains
    pool.alpha = None
    pool.save()  # save pool for the first time...

    # ... then we can save the first node...
    node0 = Node()
    node0.state = "refined"
    node0.save()  # also creates the node directory ... needed for symlink
    os.symlink(os.path.relpath(options.presampling, node0.dir), node0.trr_fn)
    os.symlink(os.path.relpath(options.molecule, node0.dir), node0.pdb_fn)

    pool.root_name = node0.name
    pool.save()  #... now we have to save the pool again.

    if (not path.exists("analysis")):
        os.mkdir("analysis")
Пример #9
0
def main():
	options = options_desc.parse_args(sys.argv)[0]

	pool = Pool()
	active_nodes = pool.where("isa_partition")

	if options.transition_level == "clusters":
		npz_file = np.load(pool.chi_mat_fn)
		chi_matrix = npz_file['matrix']
		n_clusters = npz_file['n_clusters']

		default_cluster_threshold = options.coreset_power

		# determine cluster
		#TODO this part is too cryptic
		# amount_phi[j] = amount of basis functions per cluster j
		amount_phi=np.ones(n_clusters,dtype=np.uint64)
		amount_phi=amount_phi*len(chi_matrix)
		amount_phi_total=len(chi_matrix)	

		# sort columns of chi and return new sorted args
		arg_sort_cluster=np.argsort(chi_matrix,axis=0)
		# sort columns of chi and return new sorted chi
		# notice that the last row has to be [1 ... 1]
		sort_cluster=np.sort(chi_matrix,axis=0)
		# show_cluster contains arrays of the type [a b] where a is the row
		# and b the column of the entry from chi matrix  where 
		# chi_sorted(a,b) > default_cluster_threshold
		show_cluster=np.argwhere(sort_cluster > 0.5 )

		# from the above it could be clear
		# that the amount of phi function
		# of cluster i is given by x where x the number so that 
		# [x i] is in show_cluster and for all 
		# [y i] in show_cluster we have x>y 
		# we define amount_phi[i]=x	
		for element in show_cluster:
			index=element[0]
			cluster=element[1]
			if amount_phi[cluster]>index:
				amount_phi[cluster]=index

		# create cluster list which contains arrays
		# each array consinst of a set of numbers corresponding to 
		# the phi function of node_number
		cluster=[]
		for i in range(0,n_clusters):
			cluster_set=[]		
			for j in range(amount_phi[i],amount_phi_total):
				#if (j < amount_phi[i] + 3):
					cluster_set.append(arg_sort_cluster[j][i])	
			cluster.append(cluster_set)

		for i in range(len(cluster)):
			counter = 0
			for node_index in cluster[i]:
				counter += 1
				# and ignore nodes which have a higher chi value then default_cluster_threshold
				if( chi_matrix[node_index][i] > default_cluster_threshold and counter>options.min_nodes):
					continue
				
				node = active_nodes[node_index]
				trajectory= node.trajectory
			
				print "-----"
				print "Generating transition nodes for node %s..."%node.name
			
				neighbour_frames = get_indices_equidist(node, options.num_tnodes)
		
				# create transition node for node_index
				for frame_number in neighbour_frames:
					print "Using frame %d as starting configuration."%frame_number
					n = Node()
					n.parent_frame_num = frame_number
					n.parent = node
					n.state = "created"
					n.extensions_counter = 0
					n.extensions_max = options.num_runs-1
					n.extensions_length = options.sampling_length
					n.sampling_length = options.sampling_length
					n.internals = trajectory.getframe(frame_number)
					n.save_mode = options.save_mode
					pool.append(n)
					n.save()
				print "%d transition nodes generated."%options.num_tnodes
				print "-----"

		zgf_setup_nodes.main()
		zgf_grompp.main()
	
		cluster_dict = {}
		for (ic,c) in enumerate(cluster):
			cluster_dict['cluster_%d'%ic] = c

		# save cluster
		np.savez(pool.analysis_dir+"core_set_cluster.npz", **cluster_dict)

	elif options.transition_level == "nodes":
		for node in active_nodes:
			trajectory= node.trajectory
			
			# TODO duplicate code... use the one above
			print "-----"
			print "Generating transition nodes for node %s..."%node.name

			neighbour_frames = get_indices_equidist(node, options.num_tnodes)

			# create transition point for node_index
			for frame_number in neighbour_frames:
				print "Using frame %d as starting configuration."%frame_number
				n = Node()
				n.parent_frame_num = frame_number
				n.parent = node
				n.state = "created"
				n.extensions_counter = 0
				n.extensions_max = options.num_runs-1
				n.extensions_length = options.sampling_length
				n.sampling_length = options.sampling_length
				n.internals = trajectory.getframe(frame_number)
				n.save_mode = options.save_mode
				pool.append(n)
				n.save()
			print "%d transition nodes generated."%options.num_tnodes
			print "-----"

		zgf_setup_nodes.main()
		zgf_grompp.main()


	instructionFile = pool.analysis_dir+"instruction.txt"

	f = open(instructionFile, "w")
	f.write("{'power': %f, 'tnodes': %d, 'level': '%s', 'min_nodes': %d}"%(options.coreset_power, options.num_tnodes, options.transition_level, options.min_nodes))
	f.close()