Пример #1
0
def run_temp(iter_index, json_file):
    iter_name = make_iter_name(iter_index)
    work_path = iter_name + "/" + temp_name + "/"

    fp = open(json_file, 'r')
    jdata = json.load(fp)
    gmx_prep = jdata["gmx_prep"]
    gmx_run = jdata["gmx_run"]
    temp_thread = jdata["temp_thread"]
    gmx_run = gmx_run + (" -nt %d " % temp_thread)
    gmx_prep_log = "gmx_grompp.log"
    gmx_run_log = "gmx_mdrun.log"
    gmx_run = gmx_run + " -plumed " + temp_plm
    gmx_prep_cmd = cmd_append_log(gmx_prep, gmx_prep_log)
    gmx_run_cmd = cmd_append_log(gmx_run, gmx_run_log)
    numb_walkers = jdata["numb_walkers"]

    all_task = glob.glob(work_path + "/[0-9]*[0-9]")
    all_task.sort()

    global exec_machine
    exec_hosts(MachineLocal, gmx_prep_cmd, 1, all_task, None)
    if len(all_task) == 1:
        exec_hosts(MachineLocal, gmx_run_cmd, temp_thread, all_task, None)
    else:
        exec_hosts_batch(exec_machine, gmx_run_cmd, temp_thread, all_task,
                         None)
Пример #2
0
def post_enhc(iter_index, json_file):
    iter_name = make_iter_name(iter_index)
    work_path = iter_name + "/" + enhc_name + "/"

    fp = open(json_file, 'r')
    jdata = json.load(fp)
    gmx_split = jdata["gmx_split_traj"]
    gmx_split_log = "gmx_split.log"
    gmx_split_cmd = cmd_append_log(gmx_split, gmx_split_log)

    all_task = glob.glob(work_path + "/[0-9]*[0-9]")
    all_task.sort()

    cwd = os.getcwd()
    numb_walkers = jdata["numb_walkers"]
    for ii in range(numb_walkers):
        walker_path = work_path + make_walker_name(ii) + "/"
        os.chdir(walker_path)
        if os.path.isdir("confs"):
            shutil.rmtree("confs")
        os.makedirs("confs")
        os.chdir(cwd)

    global exec_machine
    exec_hosts(MachineLocal, gmx_split_cmd, 1, all_task, None)

    for ii in range(numb_walkers):
        walker_path = work_path + make_walker_name(ii) + "/"
        angles = np.loadtxt(walker_path + enhc_out_plm)
        np.savetxt(walker_path + enhc_out_angle, angles[:, 1:], fmt="%.6f")
Пример #3
0
def run_res (iter_index,
             json_file, 
             exec_machine = MachineLocal) :
    fp = open (json_file, 'r')
    jdata = json.load (fp)
    gmx_prep = jdata["gmx_prep"]
    gmx_run = jdata["gmx_run"]
    res_thread = jdata["res_thread"]
    gmx_run = gmx_run + (" -nt %d " % res_thread)
    gmx_run = gmx_run + " -plumed " + res_plm
    gmx_cont_run = gmx_run + " -cpi state.cpt "
    gmx_prep_log = "gmx_grompp.log"
    gmx_run_log = "gmx_mdrun.log"
    gmx_prep_cmd = cmd_append_log (gmx_prep, gmx_prep_log)
    gmx_run_cmd = cmd_append_log (gmx_run, gmx_run_log)
    gmx_cont_run_cmd = cmd_append_log (gmx_cont_run, gmx_run_log)
    res_group_size = jdata['res_group_size']
    batch_jobs = jdata['batch_jobs']
    batch_time_limit = jdata['batch_time_limit']
    batch_modules = jdata['batch_modules']
    batch_sources = jdata['batch_sources']
    
    iter_name = make_iter_name (iter_index)
    res_path = iter_name + "/" + res_name + "/"  
    base_path = os.getcwd() + "/"

    if not os.path.isdir (res_path) : 
        raise RuntimeError ("do not see any restrained simulation (%s)." % res_path)

    all_task_propose = glob.glob(res_path + "/[0-9]*[0-9]")
    if len(all_task_propose) == 0 :
        return
    all_task_propose.sort()
    if batch_jobs :
        all_task = all_task_propose
    else :
        all_task = []
        all_cont_task = []
        for ii in all_task_propose :
            if not os.path.isfile(os.path.join(ii, "confout.gro")) :
                if os.path.isfile(os.path.join(ii, "state.cpt")) :
                    all_cont_task.append(ii)
                else :
                    all_task.append(ii)

    if batch_jobs:
        exec_hosts (MachineLocal, gmx_prep_cmd, 1, all_task, None)
        exec_batch_group(gmx_run_cmd, res_thread, 1, all_task, task_args = None, group_size = res_group_size, time_limit = batch_time_limit, modules = batch_modules, sources = batch_sources)
    else :
        if len(all_task) == 1 :
            exec_hosts (MachineLocal, gmx_prep_cmd, 1, all_task, None)
            exec_hosts (MachineLocal, gmx_run_cmd, res_thread, all_task, None)
        elif len(all_task) > 1 :
            exec_hosts (MachineLocal, gmx_prep_cmd, 1, all_task, None)
            exec_hosts_batch (exec_machine, gmx_run_cmd, res_thread, all_task, None)
        if len(all_cont_task) == 1 :
            exec_hosts (MachineLocal, gmx_cont_run_cmd, res_thread, all_cont_task, None)
        elif len(all_cont_task) > 1 :
            exec_hosts_batch (exec_machine, gmx_cont_run_cmd, res_thread, all_cont_task, None)
Пример #4
0
def post_res(iter_index, json_file):
    fp = open(json_file, 'r')
    jdata = json.load(fp)
    res_cmpf_error = jdata["res_cmpf_error"]

    iter_name = make_iter_name(iter_index)
    res_path = iter_name + "/" + res_name + "/"
    base_path = os.getcwd() + "/"

    all_task = glob.glob(res_path + "/[0-9]*[0-9]")
    if len(all_task) == 0:
        np.savetxt(res_path + 'data.raw', [], fmt="%.6e")
        return
    all_task.sort()
    if res_cmpf_error:
        cmpf_cmd = "./cmpf.sh"
    else:
        cmpf_cmd = "./cmpf.py"
    cmpf_log = "cmpf.log"
    cmpf_cmd = cmd_append_log(cmpf_cmd, cmpf_log)

    centers = []
    force = []
    ndim = 0

    # run_node_tasks(max_thread, 1, all_task, cmpf_cmd)
    exec_hosts(MachineLocal, cmpf_cmd, 1, all_task, None)

    for work_path in all_task:
        os.chdir(work_path)
        this_centers = np.loadtxt('centers.out')
        centers = np.append(centers, this_centers)
        this_force = np.loadtxt('force.out')
        force = np.append(force, this_force)
        ndim = this_force.size
        assert (ndim == this_centers.size
                ), "center size is diff to force size in " + work_path
        os.chdir(base_path)

    centers = np.reshape(centers, [-1, ndim])
    force = np.reshape(force, [-1, ndim])
    data = np.concatenate((centers, force), axis=1)
    np.savetxt(res_path + 'data.raw', data, fmt="%.6e")

    norm_force = np.linalg.norm(force, axis=1)
    log_task("min|f| = %e  max|f| = %e  avg|f| = %e" %
             (np.min(norm_force), np.max(norm_force), np.average(norm_force)))
Пример #5
0
def run_enhc(iter_index, json_file):
    iter_name = make_iter_name(iter_index)
    work_path = iter_name + "/" + enhc_name + "/"

    fp = open(json_file, 'r')
    jdata = json.load(fp)
    gmx_prep = jdata["gmx_prep"]
    gmx_run = jdata["gmx_run"]
    enhc_thread = jdata["bias_thread"]
    gmx_run = gmx_run + (" -nt %d " % enhc_thread)
    gmx_prep_log = "gmx_grompp.log"
    gmx_run_log = "gmx_mdrun.log"
    # assuming at least one walker
    graph_files = glob.glob(work_path + (make_walker_name(0)) + "/*.pb")
    if len(graph_files) != 0:
        gmx_run = gmx_run + " -plumed " + enhc_plm
    else:
        gmx_run = gmx_run + " -plumed " + enhc_bf_plm
    gmx_prep_cmd = cmd_append_log(gmx_prep, gmx_prep_log)
    gmx_run_cmd = cmd_append_log(gmx_run, gmx_run_log)
    numb_walkers = jdata["numb_walkers"]
    batch_jobs = jdata['batch_jobs']
    batch_time_limit = jdata['batch_time_limit']
    batch_modules = jdata['batch_modules']
    batch_sources = jdata['batch_sources']

    all_task = glob.glob(work_path + "/[0-9]*[0-9]")
    all_task.sort()

    global exec_machine
    exec_hosts(MachineLocal, gmx_prep_cmd, 1, all_task, None)
    if batch_jobs:
        exec_batch(gmx_run_cmd,
                   enhc_thread,
                   1,
                   all_task,
                   task_args=None,
                   time_limit=batch_time_limit,
                   modules=batch_modules,
                   sources=batch_sources)
    else:
        if len(all_task) == 1:
            exec_hosts(MachineLocal, gmx_run_cmd, enhc_thread, all_task, None)
        else:
            exec_hosts_batch(exec_machine, gmx_run_cmd, enhc_thread, all_task,
                             None)
Пример #6
0
def post_temp(iter_index, json_file):
    iter_name = make_iter_name(iter_index)
    work_path = iter_name + "/" + temp_name + "/"

    fp = open(json_file, 'r')
    jdata = json.load(fp)
    gmx_split = jdata["gmx_split_traj"]
    gmx_split_log = "gmx_split.log"
    gmx_split_cmd = cmd_append_log(gmx_split, gmx_split_log)
    temp_test_intvl = jdata['temp_test_intvl']
    dev_trust_lvl = jdata['dev_trust_lvl']
    temp_incr_ratio = jdata['temp_incr_ratio']
    temp_decr_ratio = jdata['temp_decr_ratio']
    temp_incr_lvl = jdata['temp_incr_lvl']
    temp_decr_lvl = jdata['temp_decr_lvl']
    phys_temp = jdata['phys_temp']
    max_temp = jdata['max_temp']

    all_task = glob.glob(work_path + "/[0-9]*[0-9]")
    all_task.sort()

    cwd = os.getcwd()
    numb_walkers = jdata["numb_walkers"]
    for ii in range(numb_walkers):
        walker_path = work_path + make_walker_name(ii) + "/"
        os.chdir(walker_path)
        if os.path.isdir("confs"):
            shutil.rmtree("confs")
        os.makedirs("confs")
        os.chdir(cwd)

    global exec_machine
    exec_hosts(MachineLocal, gmx_split_cmd, 1, all_task, None)

    walker_stop_flags = []
    for ii in range(numb_walkers):
        walker_path = work_path + make_walker_name(ii) + "/"
        angles = np.loadtxt(walker_path + temp_out_plm)
        angles = angles[:, 1:]
        np.savetxt(walker_path + temp_out_angle, angles, fmt="%.6f")
        graph_files = glob.glob(walker_path + "/*.pb")
        cur_temp = np.loadtxt(os.path.join(walker_path, 'cur.temp'))
        if len(graph_files) != 0:
            next_temp, dec_list, model_dev\
                = make_next_temp(graph_files,
                                 cur_temp,
                                 angles,
                                 temp_test_intvl,
                                 dev_trust_lvl,
                                 temp_incr_ratio,
                                 temp_decr_ratio,
                                 temp_incr_lvl,
                                 temp_decr_lvl,
                                 phys_temp,
                                 max_temp)
            model_dev = np.concatenate(
                (angles, np.reshape(model_dev, [-1, 1])), axis=1)
            np.savetxt(os.path.join(walker_path, 'model.devi'), model_dev)
            np.savetxt(os.path.join(walker_path, 'dec_list'), dec_list)
        else:
            next_temp = cur_temp
        np.savetxt(os.path.join(walker_path, 'next.temp'), [next_temp])

        if cur_temp == max_temp and next_temp == max_temp:
            walker_stop_flags.append(True)
        else:
            walker_stop_flags.append(False)

    # return if the iteration continues
    if all(walker_stop_flags):
        return False
    else:
        return True
Пример #7
0
def run_enhc(iter_index, json_file):
    iter_name = make_iter_name(iter_index)
    work_path = iter_name + "/" + enhc_name + "/"

    fp = open(json_file, 'r')
    jdata = json.load(fp)
    bPosre = jdata.get("gmx_posre", False)
    gmx_prep = jdata["gmx_prep"]
    if bPosre:
        gmx_prep += " -f grompp_restraint.mdp -r conf_init.gro"
    gmx_run = jdata["gmx_run"]
    enhc_thread = jdata["bias_thread"]
    gmx_run = gmx_run + (" -nt %d " % enhc_thread)
    gmx_prep_log = "gmx_grompp.log"
    gmx_run_log = "gmx_mdrun.log"
    # assuming at least one walker
    graph_files = glob.glob(work_path + (make_walker_name(0)) + "/*.pb")
    if len(graph_files) != 0:
        gmx_run = gmx_run + " -plumed " + enhc_plm
    else:
        gmx_run = gmx_run + " -plumed " + enhc_bf_plm
    gmx_prep_cmd = cmd_append_log(gmx_prep, gmx_prep_log)
    gmx_run_cmd = cmd_append_log(gmx_run, gmx_run_log)
    numb_walkers = jdata["numb_walkers"]
    batch_jobs = jdata['batch_jobs']
    batch_time_limit = jdata['batch_time_limit']
    batch_modules = jdata['batch_modules']
    batch_sources = jdata['batch_sources']

    # print('debug', glob.glob(work_path + "/[0-9]*[0-9]"))
    # all_task = glob.glob(work_path + "/[0-9]*[0-9]")
    all_task = list(
        filter(lambda x: os.path.isdir(x),
               glob.glob(work_path + "/[0-9]*[0-9]")))
    all_task.sort()

    # all_task_basedir = [os.path.relpath(ii, work_path) for ii in all_task]
    # print('run_enhc:work_path', work_path)
    # print('run_enhc:gmx_prep_cmd:', gmx_prep_cmd)
    # print('run_enhc:gmx_run_cmd:', gmx_run_cmd)
    # print('run_enhc:all_task:', all_task)
    # print('run_enhc:all_task_basedir:', all_task_basedir)
    # print('run_enhc:batch_jobs:', batch_jobs)
    #
    # lazy_local_context = LazyLocalContext(local_root='./', work_profile=None)
    # # pbs = PBS(context=lazy_local_context)
    # slurm = Slurm(context=lazy_local_context)
    # gmx_prep_task = [Task(command=gmx_prep_cmd, task_work_path=ii, outlog='gmx_grompp.log', errlog='gmx_grompp.err') for
    #                  ii in all_task_basedir]
    # gmx_prep_submission = Submission(work_base=work_path, resources=resources, batch=slurm, task_list=gmx_prep_task)
    #
    # gmx_prep_submission.run_submission()
    #
    # gmx_run_task = [Task(command=gmx_run_cmd, task_work_path=ii, outlog='gmx_mdrun.log', errlog='gmx_mdrun.log') for ii
    #                 in all_task_basedir]
    # gmx_run_submission = Submission(work_base=work_path, resources=resources, batch=slurm, task_list=gmx_run_task)
    # gmx_run_submission.run_submission()

    global exec_machine
    exec_hosts(MachineLocal, gmx_prep_cmd, 1, all_task, None)
    if batch_jobs:
        exec_batch(gmx_run_cmd,
                   enhc_thread,
                   1,
                   all_task,
                   task_args=None,
                   time_limit=batch_time_limit,
                   modules=batch_modules,
                   sources=batch_sources)
    else:
        if len(all_task) == 1:
            exec_hosts(MachineLocal, gmx_run_cmd, enhc_thread, all_task, None)
        else:
            exec_hosts_batch(exec_machine, gmx_run_cmd, enhc_thread, all_task,
                             None)
Пример #8
0
def run_train(iter_index, json_file, exec_machine=MachineLocal):
    fp = open(json_file, 'r')
    jdata = json.load(fp)
    numb_model = jdata["numb_model"]
    train_thread = jdata["train_thread"]
    res_iter = jdata["res_iter"]

    iter_name = make_iter_name(iter_index)
    train_path = iter_name + "/" + train_name + "/"
    base_path = os.getcwd() + "/"

    # check if new data is empty
    new_data_file = os.path.join(train_path, 'data/data.new.raw')
    if os.stat(new_data_file).st_size == 0:
        prev_iter_index = iter_index - 1
        prev_train_path = base_path + make_iter_name(
            prev_iter_index) + "/" + train_name + "/"
        prev_models = glob.glob(prev_train_path + "*.pb")
        for ii in prev_models:
            model_name = os.path.basename(ii)
            os.symlink(ii, os.path.join(train_path, model_name))
        return

    neurons = jdata["neurons"]
    batch_size = jdata["batch_size"]
    if iter_index < res_iter:
        numb_epoches = jdata["numb_epoches"]
        starter_lr = jdata["starter_lr"]
        decay_steps = jdata["decay_steps"]
        decay_rate = jdata["decay_rate"]
        cmdl_args = ""
    else:
        numb_epoches = jdata["res_numb_epoches"]
        starter_lr = jdata["res_starter_lr"]
        decay_steps = jdata["res_decay_steps"]
        decay_rate = jdata["res_decay_rate"]
        old_ratio = jdata["res_olddata_ratio"]
        cmdl_args = " --restart --use-mix --old-ratio %f " % old_ratio

    if jdata["resnet"]:
        cmdl_args += " --resnet "
    cmdl_args += " -n "
    for nn in neurons:
        cmdl_args += "%d " % nn
    cmdl_args += " -b " + str(batch_size)
    cmdl_args += " -e " + str(numb_epoches)
    cmdl_args += " -l " + str(starter_lr)
    cmdl_args += " --decay-steps " + str(decay_steps)
    cmdl_args += " --decay-rate " + str(decay_rate)

    train_cmd = "../main.py -t %d" % train_thread
    train_cmd += cmdl_args
    train_cmd = cmd_append_log(train_cmd, "train.log")
    freez_cmd = "../freeze.py -o graph.pb"
    freez_cmd = cmd_append_log(freez_cmd, "freeze.log")
    task_dirs = [("%03d" % ii) for ii in range(numb_model)]

    batch_jobs = jdata['batch_jobs']
    batch_time_limit = jdata['batch_time_limit']
    batch_modules = jdata['batch_modules']
    batch_sources = jdata['batch_sources']

    os.chdir(train_path)
    if batch_jobs:
        exec_batch(train_cmd,
                   train_thread,
                   1,
                   task_dirs,
                   task_args=None,
                   time_limit=batch_time_limit,
                   modules=batch_modules,
                   sources=batch_sources)
    else:
        if len(task_dirs) == 1:
            exec_hosts(MachineLocal, train_cmd, train_thread, task_dirs, None)
        else:
            exec_hosts_batch(exec_machine, train_cmd, train_thread, task_dirs,
                             None)

    exec_hosts(MachineLocal, freez_cmd, 1, task_dirs, None)
    for ii in range(numb_model):
        os.symlink("%03d/graph.pb" % ii, "graph.%03d.pb" % ii)
    os.chdir(base_path)
Пример #9
0
def make_res(iter_index, json_file):
    fp = open(json_file, 'r')
    jdata = json.load(fp)
    numb_walkers = jdata["numb_walkers"]
    template_dir = jdata["template_dir"]
    bias_nsteps = jdata["bias_nsteps"]
    bias_frame_freq = jdata["bias_frame_freq"]
    nsteps = jdata["res_nsteps"]
    frame_freq = jdata["res_frame_freq"]
    sel_threshold = jdata["sel_threshold"]
    max_sel = jdata["max_sel"]
    cluster_threshold = jdata["cluster_threshold"]

    base_path = os.getcwd() + "/"
    iter_name = make_iter_name(iter_index)
    enhc_path = iter_name + "/" + enhc_name + "/"
    os.chdir(enhc_path)
    enhc_path = os.getcwd() + "/"
    os.chdir(base_path)
    templ_mol_path = template_dir + "/" + mol_name + "/"
    templ_res_path = template_dir + "/" + res_name + "/"
    res_path = iter_name + "/" + res_name + "/"
    create_path(res_path)

    ret_list = [True for ii in range(numb_walkers)]

    # sel angles
    ## check if we have graph in enhc
    for walker_idx in range(numb_walkers):
        walker_path = enhc_path + walker_format % walker_idx + "/"
        graph_files = glob.glob(walker_path + "/*.pb")
        if len(graph_files) != 0:
            os.chdir(walker_path)
            sel_cmd = "python3 test.std.py -m *.pb -t %f -d %s --output sel.out --output-angle sel.angle.out" % (
                sel_threshold, enhc_out_angle)
            sel_cmd = cmd_append_log(sel_cmd, "sel.log")
            log_task("select with threshold %f" % sel_threshold)
            log_task(sel_cmd)
            sp.check_call(sel_cmd, shell=True)
            os.chdir(base_path)
            sel_idx = []
            sel_angles = np.array([])
            with open(walker_path + "sel.out") as fp:
                for line in fp:
                    sel_idx += [int(x) for x in line.split()]
            if len(sel_idx) != 0:
                sel_angles = np.reshape(
                    np.loadtxt(walker_path + 'sel.angle.out'), [-1, cv_dim])
            elif len(sel_idx) == 0:
                np.savetxt(walker_path + 'num_of_cluster.dat', [0], fmt='%d')
                np.savetxt(walker_path + 'cls.sel.out', [], fmt='%d')
                continue

        else:
            sel_idx = range(
                len(glob.glob(walker_path + enhc_out_conf + "conf*gro")))
            sel_angles = np.loadtxt(walker_path + enhc_out_angle)
            sel_angles = np.reshape(sel_angles, [-1, cv_dim])
            np.savetxt(walker_path + 'sel.out', sel_idx, fmt='%d')
            np.savetxt(walker_path + 'sel.angle.out', sel_angles, fmt='%.6f')
        conf_start = 0
        conf_every = 1

        sel_idx = np.array(sel_idx, dtype=np.int)
        assert (len(sel_idx) == sel_angles.shape[0])
        if shell_clustering and len(sel_idx) > 1:
            cmd_sel_from_cluster = (
                base_path +
                "template/tools/cluster_cv.py -i %s -c %s -t %f --output-idx %s  --output-cv %s"
                % (walker_path + 'sel.out', walker_path + 'sel.angle.out',
                   cluster_threshold, walker_path + 'cls.sel.out',
                   walker_path + 'cls.sel.angle.out'))
            sp.check_call(cmd_sel_from_cluster, shell=True)
            sel_idx = np.loadtxt(walker_path + 'cls.sel.out', dtype=np.int)
        elif shell_clustering == False and len(sel_idx) > 1:
            cls_sel = sel_from_cluster(sel_angles, cluster_threshold)
            ##############################################################################################
            np.savetxt(walker_path + 'num_of_cluster.dat', [len(set(cls_sel))],
                       fmt='%d')
            if len(cls_sel) > max_sel:
                cls_sel = cls_sel[-max_sel:]
            sel_idx = sel_idx[cls_sel]
            np.savetxt(walker_path + 'cls.sel.angle.0.out',
                       sel_angles[cls_sel],
                       fmt='%.6f')
        elif len(sel_idx) == 1:
            np.savetxt(walker_path + 'num_of_cluster.dat', [1], fmt='%d')
        res_angles = np.loadtxt(walker_path + enhc_out_angle)
        res_angles = np.reshape(res_angles, [-1, cv_dim])
        res_angles = res_angles[sel_idx]
        np.savetxt(walker_path + 'cls.sel.out', sel_idx, fmt='%d')
        np.savetxt(walker_path + 'cls.sel.angle.out', res_angles, fmt='%.6f')
        res_confs = []
        for ii in sel_idx:
            res_confs.append(walker_path + enhc_out_conf + ("conf%d.gro" % ii))

        assert (len(res_confs) == res_angles.shape[0]
                ), "number of enhc out conf does not match out angle"
        assert (len(sel_idx) == res_angles.shape[0]
                ), "number of enhc out conf does not numb sel"
        nconf = len(res_confs)
        if nconf == 0:
            ret_list[walker_idx] = False
            continue

        sel_list = ""
        for ii in range(nconf):
            if ii == 0: sel_list = str(sel_idx[ii])
            else: sel_list += "," + str(sel_idx[ii])
        log_task("selected %d confs, indexes: %s" % (nconf, sel_list))

        for ii in range(conf_start, nconf, conf_every):
            # print (ii, sel_idx[ii])
            work_path = res_path + ((walker_format + ".%06d") %
                                    (walker_idx, sel_idx[ii])) + "/"
            os.makedirs(work_path)
            copy_file_list(mol_files, templ_mol_path, work_path)
            copy_file_list(res_files, templ_res_path, work_path)
            conf_file = walker_path + enhc_out_conf + ("conf%d.gro" %
                                                       sel_idx[ii])
            if os.path.exists(work_path + "conf.gro"):
                os.remove(work_path + "conf.gro")
            conf_file = os.path.abspath(conf_file)
            tmp_cwd = os.getcwd()
            os.chdir(work_path)
            os.symlink(os.path.relpath(conf_file), "conf.gro")
            os.chdir(tmp_cwd)

        task_dirs = []
        task_args = []
        for ii in range(conf_start, nconf, conf_every):
            dir_str = ((walker_format + ".%06d") % (walker_idx, sel_idx[ii]))
            arg_str = np.array2string(res_angles[ii],
                                      formatter={
                                          'float_kind': lambda x: "%.6f" % x
                                      }).replace("[",
                                                 "").replace("]", "").replace(
                                                     "\n", " ")
            task_dirs.append(dir_str)
            task_args.append(arg_str)
            log_task(task_dirs[-1] + ": " + task_args[-1])

        os.chdir(res_path)
        exec_hosts(MachineLocal, "./general_mkres.sh", 1, task_dirs, task_args)
        os.chdir(base_path)

        for ii in range(conf_start, nconf, conf_every):
            work_path = res_path + ((walker_format + ".%06d") %
                                    (walker_idx, sel_idx[ii])) + "/"
            mol_conf_file = work_path + "grompp.mdp"
            make_grompp_res(mol_conf_file, nsteps, frame_freq)
            replace(work_path + res_plm, "STRIDE=[^ ]* ",
                    "STRIDE=%d " % frame_freq)

    if any(ret_list):
        return True
    else:
        return False
Пример #10
0
def run_train(iter_index,
              json_file,
              exec_machine=MachineLocal,
              data_dir="data",
              data_name="data",
              sits_iter=False):
    fp = open(json_file, 'r')
    jdata = json.load(fp)
    cmd_env = jdata.get("cmd_sources", [])
    sits_param = jdata.get("sits_settings", None)

    numb_model = jdata["numb_model"]
    train_thread = jdata["train_thread"]
    res_iter = jdata["res_iter"]

    iter_name = make_iter_name(iter_index)
    if sits_param is not None:
        if sits_iter:
            iter_name = join("sits", make_iter_name(iter_index))
    train_path = join(iter_name, train_name)
    base_path = os.getcwd() + "/"

    # check if new data is empty
    new_data_file = os.path.join(train_path, data_dir, data_name + '.new.raw')
    filesize = os.stat(new_data_file).st_size if os.path.exists(
        new_data_file) else 0
    if (filesize == 0) & (not sits_iter):
        prev_iter_index = iter_index - 1
        prev_train_path = join(base_path, make_iter_name(prev_iter_index),
                               train_name) + "/"
        prev_models = glob.glob(join(prev_train_path, "*.pb"))
        for ii in prev_models:
            model_name = os.path.basename(ii)
            os.symlink(ii, join(train_path, model_name))
        return

    neurons = jdata["neurons"]
    batch_size = jdata["batch_size"]
    if iter_index < res_iter:
        numb_epoches = jdata["numb_epoches"]
        starter_lr = jdata["starter_lr"]
        decay_steps = jdata["decay_steps"]
        decay_rate = jdata["decay_rate"]
        cmdl_args = ""
    else:
        numb_epoches = jdata["res_numb_epoches"]
        starter_lr = jdata["res_starter_lr"]
        decay_steps = jdata["res_decay_steps"]
        decay_rate = jdata["res_decay_rate"]
        old_ratio = jdata["res_olddata_ratio"]
        cmdl_args = " --restart --use-mix --old-ratio %f " % old_ratio

    if jdata["resnet"]:
        cmdl_args += " --resnet "
    cmdl_args += " -n "
    for nn in neurons:
        cmdl_args += "%d " % nn
    cmdl_args += " -b " + str(batch_size)
    cmdl_args += " -e " + str(numb_epoches)
    cmdl_args += " -l " + str(starter_lr)
    cmdl_args += " --decay-steps " + str(decay_steps)
    cmdl_args += " --decay-rate " + str(decay_rate)

    train_cmd = "../main.py -t %d" % train_thread
    train_cmd += cmdl_args
    train_cmd = cmd_append_log(train_cmd, "train.log", env=cmd_env)
    freez_cmd = "../freeze.py -o graph.pb"
    freez_cmd = cmd_append_log(freez_cmd, "freeze.log", env=cmd_env)
    task_dirs = [("%03d" % ii) for ii in range(numb_model)]

    batch_jobs = jdata['batch_jobs']
    batch_time_limit = jdata['batch_time_limit']
    batch_modules = jdata['batch_modules']
    batch_sources = jdata['batch_sources']

    # print('lib.modeling.run_train:train_cmd:', train_cmd)
    # print('lib.modeling.run_train:freez_cmd:', freez_cmd)
    # print('lib.modeling.run_train:train_path:', train_path)
    # print('lib.modeling.run_train:task_dirs:', task_dirs)

    # lazy_local_context = LazyLocalContext(local_root='./', work_profile=None)
    # # pbs = PBS(context=lazy_local_context)
    # slurm = Slurm(context=lazy_local_context)

    # train_task = [Task(command=train_cmd, task_work_path=ii, outlog='train.log', errlog='train.log') for ii in
    #               task_dirs]
    # train_submission = Submission(work_base=train_path, resources=resources, batch=slurm, task_list=train_task)
    # train_submission.run_submission()

    # freez_task = [Task(command=freez_cmd, task_work_path=ii, outlog='freeze.log', errlog='freeze.log') for ii in
    #               task_dirs]
    # freez_submission = Submission(work_base=train_path, resources=resources, batch=slurm, task_list=freez_task)
    # freez_submission.run_submission()

    os.chdir(train_path)
    if batch_jobs:
        exec_batch(train_cmd,
                   train_thread,
                   1,
                   task_dirs,
                   task_args=None,
                   time_limit=batch_time_limit,
                   modules=batch_modules,
                   sources=batch_sources)
    else:
        if len(task_dirs) == 1:
            exec_hosts(MachineLocal, train_cmd, train_thread, task_dirs, None)
        else:
            exec_hosts_batch(exec_machine, train_cmd, train_thread, task_dirs,
                             None)

    # exec_hosts(MachineLocal, freez_cmd, 1, task_dirs, None)
    for task_dir in task_dirs:
        exec_hosts(MachineLocal, freez_cmd, 1, [task_dir], None)
    for ii in range(numb_model):
        os.symlink("%03d/graph.pb" % ii, "graph.%03d.pb" % ii)
    os.chdir(base_path)
Пример #11
0
def make_train_eff(sits_iter_index, json_file):
    fp = open(json_file, 'r')
    jdata = json.load(fp)
    cmd_env = jdata.get("cmd_sources", [])
    template_dir = jdata["template_dir"]
    numb_model = jdata["numb_model"]
    res_iter = jdata["res_iter"]
    res_cmpf_error = jdata["res_cmpf_error"]

    sits_iter_name = join("sits", make_iter_name(sits_iter_index))

    data_dir = "data"
    data_name = "data%03d" % (sits_iter_index + 1)

    for j in range(sits_iter_index):
        sits_iterj_name = join("sits", make_iter_name(j))
        sits_rid_iter = np.array([
            np.loadtxt(join(sits_iterj_name, "rid_iter_begin.dat")),
            np.loadtxt(join(sits_iterj_name, "rid_iter_end.dat"))
        ]).astype(int)
        for iter_index in range(sits_rid_iter[0], sits_rid_iter[1]):
            iter_name = make_iter_name(iter_index)
            res_path = iter_name + "/" + res_name + "/"
            base_path = os.getcwd() + "/"

            all_task = glob.glob(res_path + "/[0-9]*[0-9]")
            if len(all_task) == 0:
                np.savetxt(res_path + data_name + '.raw', [], fmt="%.6e")
                continue
            all_task.sort()
            cmpf_cmd = "python cmpf_wtij.py -d %s -i %s -j %s" % (join(
                base_path,
                "sits"), make_iter_name(sits_iter_index), make_iter_name(j))
            cmpf_log = "cmpf.log"
            cmpf_cmd = cmd_append_log(cmpf_cmd, cmpf_log, env=cmd_env)

            centers = []
            force = []
            ndim = 0

            # run_node_tasks(max_thread, 1, all_task, cmpf_cmd)
            exec_hosts(MachineLocal, cmpf_cmd, 1, all_task, None)

            for work_path in all_task:
                os.chdir(work_path)
                this_centers = np.loadtxt('centers.out')
                centers = np.append(centers, this_centers)
                this_force = np.loadtxt('force.out')
                force = np.append(force, this_force)
                ndim = this_force.size
                assert (ndim == this_centers.size
                        ), "center size is diff to force size in " + work_path
                os.chdir(base_path)

            centers = np.reshape(centers, [-1, ndim])
            force = np.reshape(force, [-1, ndim])
            data = np.concatenate((centers, force), axis=1)
            np.savetxt(res_path + 'data%03d.raw' % (sits_iter_index + 1),
                       data,
                       fmt="%.6e")

            norm_force = np.linalg.norm(force, axis=1)
            log_task("min|f| = %e  max|f| = %e  avg|f| = %e" %
                     (np.min(norm_force), np.max(norm_force),
                      np.average(norm_force)))

    iter_end = int(sits_rid_iter[1]) if sits_iter_index > 0 else 0
    for iter_index in range(iter_end):
        iter_name = make_iter_name(iter_index)
        train_path = join(iter_name, train_name)
        data_path = join(train_path, data_dir)

        data_file = join(data_path, data_name + ".raw")
        data_old_file = join(data_path, data_name + ".old.raw")
        data_new_file = join(data_path, data_name + ".new.raw")
        base_path = os.getcwd() + "/"

        if not os.path.exists(data_path):
            os.makedirs(data_path)

        # collect data
        log_task("collect data upto %d" % (iter_index))
        if iter_index == 0:
            ii = 0
            this_raw = join(base_path, make_iter_name(ii), res_name,
                            data_name + ".raw")
            os.chdir(data_path)
            os.symlink(os.path.relpath(this_raw),
                       os.path.basename(data_new_file))
            os.symlink(os.path.basename(data_new_file),
                       os.path.basename(data_file))
            os.chdir(base_path)
            open(data_old_file, "w").close()
        else:
            prev_iter_index = iter_index - 1
            prev_data_file = join(base_path, make_iter_name(prev_iter_index),
                                  train_name, data_dir, data_name + ".raw")
            this_raw = join(base_path, make_iter_name(iter_index), res_name,
                            data_name + ".raw")
            os.chdir(data_path)
            os.symlink(os.path.relpath(prev_data_file),
                       os.path.basename(data_old_file))
            os.symlink(os.path.relpath(this_raw),
                       os.path.basename(data_new_file))
            os.chdir(base_path)
            with open(data_file, "wb") as fo:
                with open(data_old_file,
                          "rb") as f0, open(data_new_file, "rb") as f1:
                    shutil.copyfileobj(f0, fo)
                    shutil.copyfileobj(f1, fo)

    if sits_iter_index > 0:
        train_path = join(sits_iter_name, train_name)
        data_path = join(train_path, data_dir)

        templ_train_path = join(template_dir, train_name)

        data_file_sits = join(data_path, data_name + ".raw")
        # create train dirs
        log_task("create train dirs")
        create_path(train_path)
        create_path(data_path)
        shutil.copy(data_file, data_file_sits)
        copy_file_list(train_files, templ_train_path, train_path)
        replace(join(train_path, "model.py"), "\./data", "./" + data_dir)
        replace(join(train_path, "model.py"), "data\.", data_name + ".")
        replace(join(train_path, "main.py"), "\./data", "./" + data_dir)
        replace(join(train_path, "main.py"), "data\.raw", data_name + ".raw")
    for ii in range(numb_model):
        work_path = join(train_path, ("%03d" % ii))
        old_model_path = join(work_path, "old_model")

        create_path(work_path)
        os.chdir(work_path)
        os.symlink(join("..", data_dir), data_dir)
        os.chdir(base_path)
        if sits_iter_index >= 1:
            prev_iter_index = iter_end - 1
            prev_iter_name = make_iter_name(prev_iter_index)
            prev_train_path = prev_iter_name + "/" + train_name + "/"
            prev_train_path = os.path.abspath(prev_train_path) + "/"
            prev_work_path = prev_train_path + ("%03d/" % ii)
            prev_model_files = glob.glob(
                join(prev_work_path,
                     "model.ckpt.*")) + [join(prev_work_path, "checkpoint")]
            # prev_model_files += [join(prev_work_path, "checkpoint")]
            create_path(old_model_path)
            os.chdir(old_model_path)
            for mfile in prev_model_files:
                os.symlink(os.path.relpath(mfile), os.path.basename(mfile))
                # shutil.copy (ii, old_model_path)
            os.chdir(base_path)
            for mfile in prev_model_files:
                shutil.copy(mfile, work_path)
Пример #12
0
def post_res(iter_index, json_file, data_name="data"):
    fp = open(json_file, 'r')
    jdata = json.load(fp)
    cmd_env = jdata.get("cmd_sources", [])
    sits_param = jdata.get("sits_settings", None)
    res_cmpf_error = jdata["res_cmpf_error"]

    iter_name = make_iter_name(iter_index)
    res_path = iter_name + "/" + res_name + "/"
    base_path = os.getcwd() + "/"

    all_task = list(
        filter(lambda x: os.path.isdir(x),
               glob.glob(res_path + "/[0-9]*[0-9]")))
    if len(all_task) == 0:
        np.savetxt(res_path + data_name + '.raw', [], fmt="%.6e")
        return
    all_task.sort()
    if sits_param is not None:
        sits_iter_name = "sits"
        cmpf_cmd = "python cmpf_wtij.py -d %s -i %s" % (join(
            base_path, "sits"), ".")
    else:
        if res_cmpf_error:
            cmpf_cmd = "bash cmpf.sh"
        else:
            cmpf_cmd = "python cmpf.py"
    cmpf_log = "cmpf.log"
    cmpf_cmd = cmd_append_log(cmpf_cmd, cmpf_log, env=cmd_env)

    centers = []
    force = []
    ndim = 0

    # run_node_tasks(max_thread, 1, all_task, cmpf_cmd)
    exec_hosts(MachineLocal, cmpf_cmd, 1, all_task, None)

    # group_size = int((len(all_task) + 1) // 8)
    # cmpf_resources = Resources(number_node=1, cpu_per_node=1, gpu_per_node=0, queue_name="GPU_2080Ti",
    #                            group_size=group_size, if_cuda_multi_devices=False)
    # lazy_local_context = LazyLocalContext(local_root='./', work_profile=None)
    # slurm = Slurm(context=lazy_local_context)
    # cmpf_task = [Task(command=cmpf_cmd, task_work_path="./{}".format(ii), outlog='cmpf.out', errlog='cmpf.err') for ii
    #              in all_task]
    # cmpf_submission = Submission(work_base='./', resources=cmpf_resources, batch=slurm, task_list=cmpf_task)
    # cmpf_submission.run_submission()
    # print('cmpf done')

    abs_res_path = os.getcwd()
    for work_path in all_task:
        os.chdir(work_path)
        this_centers = np.loadtxt('centers.out')
        centers = np.append(centers, this_centers)
        this_force = np.loadtxt('force.out')
        force = np.append(force, this_force)
        ndim = this_force.size
        assert (ndim == this_centers.size
                ), "center size is diff to force size in " + work_path
        os.chdir(base_path)

    centers = np.reshape(centers, [-1, ndim])
    force = np.reshape(force, [-1, ndim])
    data = np.concatenate((centers, force), axis=1)
    np.savetxt(res_path + data_name + '.raw', data, fmt="%.6e")

    norm_force = np.linalg.norm(force, axis=1)
    log_task("min|f| = %e  max|f| = %e  avg|f| = %e" %
             (np.min(norm_force), np.max(norm_force), np.average(norm_force)))
    print('save cmpf done!')
Пример #13
0
def run_res(iter_index, json_file, exec_machine=MachineLocal):
    fp = open(json_file, 'r')
    jdata = json.load(fp)
    cmd_env = jdata.get("cmd_sources", [])
    sits_param = jdata.get("sits_settings", None)
    gmx_prep = jdata["gmx_prep"]
    bPosre = jdata.get("gmx_posre", False)
    if sits_param is not None:
        if not bPosre:
            gmx_prep += " -f grompp_sits.mdp"
        else:
            gmx_prep += " -f grompp_sits_restraint.mdp -r conf_init.gro"
        if sits_param.get("sits_energrp", None) not in ["Protein", "MOL"]:
            gmx_prep += " -n index.ndx"
    gmx_run = jdata["gmx_run"]
    res_thread = jdata["res_thread"]
    gmx_run = gmx_run + (" -nt %d " % res_thread)
    gmx_run = gmx_run + " -plumed " + res_plm
    gmx_cont_run = gmx_run + " -cpi state.cpt "
    gmx_prep_log = "gmx_grompp.log"
    gmx_run_log = "gmx_mdrun.log"
    gmx_prep_cmd = cmd_append_log(gmx_prep, gmx_prep_log, env=cmd_env)
    gmx_run_cmd = cmd_append_log(gmx_run, gmx_run_log, env=cmd_env)
    gmx_cont_run_cmd = cmd_append_log(gmx_cont_run, gmx_run_log, env=cmd_env)
    res_group_size = jdata['res_group_size']
    batch_jobs = jdata['batch_jobs']
    batch_time_limit = jdata['batch_time_limit']
    batch_modules = jdata['batch_modules']
    batch_sources = jdata['batch_sources']

    iter_name = make_iter_name(iter_index)
    res_path = iter_name + "/" + res_name + "/"
    base_path = os.getcwd() + "/"

    if not os.path.isdir(res_path):
        raise RuntimeError("do not see any restrained simulation (%s)." %
                           res_path)

    # all_task_propose = glob.glob(res_path + "/[0-9]*[0-9]")
    # assume that
    # TODO
    all_task_propose = list(
        filter(lambda x: os.path.isdir(x),
               glob.glob(res_path + "/[0-9]*[0-9]")))
    # print('lib.modeling.run_res:all_task_propose:', all_task)
    # print('lib.modeling.run_res:gmx_prep_cmd:', gmx_prep_cmd)
    # print('lib.modeling.run_res:gmx_run_cmd:', gmx_run_cmd)
    # print('lib.modeling.run_res:gmx_cont_run_cmd:', gmx_cont_run_cmd)
    # raise RuntimeError('lib.modeling.run_res:debug')

    if len(all_task_propose) == 0:
        return
    all_task_propose.sort()
    if batch_jobs:
        all_task = all_task_propose
    else:
        all_task = []
        all_cont_task = []
        for ii in all_task_propose:
            if not os.path.isfile(os.path.join(ii, "confout.gro")):
                if os.path.isfile(os.path.join(ii, "state.cpt")):
                    all_cont_task.append(ii)
                else:
                    all_task.append(ii)
    # if len(all_task) == 0:
    #     return None
    # all_task.sort()

    # all_task_basedir = [os.path.relpath(ii, res_path) for ii in all_task]
    # lazy_local_context = LazyLocalContext(local_root='./', work_profile=None)
    # slurm = Slurm(context=lazy_local_context)
    # # pbs = PBS(context=lazy_local_context)

    # gmx_prep_task = [Task(command=gmx_prep_cmd, task_work_path=ii, outlog='gmx_grompp.log', errlog='gmx_grompp.err') for
    #                  ii in all_task_basedir]
    # gmx_prep_submission = Submission(work_base=res_path, resources=res_resources, batch=slurm, task_list=gmx_prep_task)
    # gmx_prep_submission.run_submission()

    # gmx_run_task = [Task(command=gmx_run_cmd, task_work_path=ii, outlog='gmx_mdrun.log', errlog='gmx_mdrun.log') for ii
    #                 in all_task_basedir]
    # gmx_run_submission = Submission(work_base=res_path, resources=res_resources, batch=slurm, task_list=gmx_run_task)
    # gmx_run_submission.run_submission()

    if batch_jobs:
        exec_hosts(MachineLocal, gmx_prep_cmd, 1, all_task, None)
        exec_batch_group(gmx_run_cmd,
                         res_thread,
                         1,
                         all_task,
                         task_args=None,
                         group_size=res_group_size,
                         time_limit=batch_time_limit,
                         modules=batch_modules,
                         sources=batch_sources)
    else:
        if len(all_task) == 1:
            exec_hosts(MachineLocal, gmx_prep_cmd, 1, all_task, None)
            exec_hosts(MachineLocal, gmx_run_cmd, res_thread, all_task, None)
        elif len(all_task) > 1:
            exec_hosts(MachineLocal, gmx_prep_cmd, 1, all_task, None)
            exec_hosts_batch(exec_machine, gmx_run_cmd, res_thread, all_task,
                             None)
        if len(all_cont_task) == 1:
            exec_hosts(MachineLocal, gmx_cont_run_cmd, res_thread,
                       all_cont_task, None)
        elif len(all_cont_task) > 1:
            exec_hosts_batch(exec_machine, gmx_cont_run_cmd, res_thread,
                             all_cont_task, None)
Пример #14
0
def make_res(iter_index, json_file):
    fp = open(json_file, 'r')
    jdata = json.load(fp)
    cmd_env = jdata.get("cmd_sources", [])
    sits_param = jdata.get("sits_settings", None)
    if sits_param is not None:
        sits_param["nst-sits-enerd-out"] = jdata["res_frame_freq"]
    bPosre = jdata.get("gmx_posre", False)
    bPosre_res = jdata.get("gmx_posre_res", False)
    numb_walkers = jdata["numb_walkers"]
    template_dir = jdata["template_dir"]
    bias_nsteps = jdata["bias_nsteps"]
    bias_frame_freq = jdata["bias_frame_freq"]
    nsteps = jdata["res_nsteps"]
    frame_freq = jdata["res_frame_freq"]
    sel_threshold = jdata["sel_threshold"]
    max_sel = jdata["max_sel"]
    cluster_threshold = jdata["cluster_threshold"]
    init_numb_cluster = [22, 30]

    base_path = os.getcwd()
    iter_name = make_iter_name(iter_index)
    enhc_path = join(base_path, iter_name, enhc_name)

    templ_mol_path = join(template_dir, mol_name)
    templ_res_path = join(template_dir, res_name)
    res_path = join(iter_name, res_name) + "/"
    create_path(res_path)

    ret_list = [True for ii in range(numb_walkers)]

    # sel angles
    # check if we have graph in enhc
    for walker_idx in range(numb_walkers):
        walker_path = join(enhc_path, walker_format % walker_idx) + "/"
        if sits_param is not None:
            if os.path.exists(join("sits", "log_nk.dat")):
                shutil.copyfile(join("sits", "log_nk.dat"),
                                join(walker_path, "log_nk.dat"))
            if os.path.exists(join("sits", "log_norm.dat")):
                shutil.copyfile(join("sits", "log_norm.dat"),
                                join(walker_path, "log_norm.dat"))
        os.chdir(walker_path)

        graph_files = glob.glob("*.pb")
        if len(graph_files) != 0:
            cluster_threshold = np.loadtxt(
                join(base_path, "cluster_threshold.dat"))
            sel_cmd = "python3 test.std.py -m *.pb -t %f -d %s --output sel.out --output-angle sel.angle.out" % \
                      (sel_threshold, enhc_out_angle)
            sel_cmd = cmd_append_log(sel_cmd, "sel.log")
            log_task("select with threshold %f" % sel_threshold)
            log_task(sel_cmd)
            sp.check_call(sel_cmd, shell=True)
            sel_idx = []
            sel_angles = np.array([])
            with open("sel.out") as fp:
                for line in fp:
                    sel_idx += [int(x) for x in line.split()]
            if len(sel_idx) != 0:
                sel_angles = np.reshape(np.loadtxt('sel.angle.out'),
                                        [-1, cv_dim])
            elif len(sel_idx) == 0:
                np.savetxt('num_of_cluster.dat', [0], fmt='%d')
                np.savetxt('cls.sel.out', [], fmt='%d')
                continue

        else:
            cluster_threshold = jdata["cluster_threshold"]
            conf_files = glob.glob(join(enhc_out_conf, "conf*gro"))
            sel_idx = range(len(conf_files))
            sel_angles = np.loadtxt(enhc_out_angle)
            sel_angles = np.reshape(sel_angles, [-1, cv_dim])
            np.savetxt('sel.out', sel_idx, fmt='%d')
            np.savetxt('sel.angle.out', sel_angles, fmt='%.6f')
            if walker_idx == 0:
                cls_sel = sel_from_cluster(sel_angles, cluster_threshold)
                test_numb_cluster = len(set(cls_sel))
                print(test_numb_cluster)
                for test_iter in range(500):
                    if test_numb_cluster < init_numb_cluster[0]:
                        cluster_threshold = cluster_threshold * 0.95
                        cls_sel = sel_from_cluster(sel_angles,
                                                   cluster_threshold)
                        test_numb_cluster = len(set(cls_sel))
                        print(cluster_threshold)
                        print(test_numb_cluster)
                    elif test_numb_cluster > init_numb_cluster[1]:
                        cluster_threshold = cluster_threshold * 1.05
                        cls_sel = sel_from_cluster(sel_angles,
                                                   cluster_threshold)
                        test_numb_cluster = len(set(cls_sel))
                        print(cluster_threshold)
                        print(test_numb_cluster)
                    else:
                        print(cluster_threshold)
                        np.savetxt(join(base_path, 'cluster_threshold.dat'),
                                   [cluster_threshold],
                                   fmt='%f')
                        np.savetxt('cluster_threshold.dat',
                                   [cluster_threshold],
                                   fmt='%f')
                        break
            else:
                cluster_threshold = np.loadtxt(
                    join(base_path, "cluster_threshold.dat"))

        conf_start = 0
        conf_every = 1

        sel_idx = np.array(sel_idx, dtype=np.int)
        assert (len(sel_idx) == sel_angles.shape[0])
        if shell_clustering and len(sel_idx) > 1:
            cmd_sel_from_cluster = (
                base_path +
                "/template/tools/cluster_cv.py -i %s -c %s -t %f --output-idx %s  --output-cv %s"
                % ('sel.out', 'sel.angle.out', cluster_threshold,
                   'cls.sel.out', 'cls.sel.angle.out'))
            sp.check_call(cmd_sel_from_cluster, shell=True)
            sel_idx = np.loadtxt('cls.sel.out', dtype=np.int)
        elif shell_clustering == False and len(sel_idx) > 1:
            cls_sel = sel_from_cluster(sel_angles, cluster_threshold)
            ##############################################################################################
            np.savetxt('num_of_cluster.dat', [len(set(cls_sel))], fmt='%d')
            np.savetxt('cluster_threshold.dat', [cluster_threshold], fmt='%f')
            if len(cls_sel) > max_sel:
                cls_sel = cls_sel[-max_sel:]
            sel_idx = sel_idx[cls_sel]
            np.savetxt('cls.sel.angle.0.out', sel_angles[cls_sel], fmt='%.6f')
        elif len(sel_idx) == 1:
            np.savetxt('num_of_cluster.dat', [1], fmt='%d')
        res_angles = np.loadtxt(enhc_out_angle)
        res_angles = np.reshape(res_angles, [-1, cv_dim])
        res_angles = res_angles[sel_idx]
        np.savetxt('cls.sel.out', sel_idx, fmt='%d')
        np.savetxt('cls.sel.angle.out', res_angles, fmt='%.6f')
        res_confs = []
        for ii in sel_idx:
            res_confs.append(walker_path + enhc_out_conf + ("conf%d.gro" % ii))

        assert (len(res_confs) == res_angles.shape[0]
                ), "number of enhc out conf does not match out angle"
        assert (len(sel_idx) == res_angles.shape[0]
                ), "number of enhc out conf does not numb sel"
        nconf = len(res_confs)
        if nconf == 0:
            ret_list[walker_idx] = False
            continue

        sel_list = ""
        for ii in range(nconf):
            if ii == 0:
                sel_list = str(sel_idx[ii])
            else:
                sel_list += "," + str(sel_idx[ii])
        log_task("selected %d confs, indexes: %s" % (nconf, sel_list))
        os.chdir(base_path)

        for ii in range(conf_start, nconf, conf_every):
            # print (ii, sel_idx[ii])
            work_path = join(res_path, (walker_format + ".%06d") %
                             (walker_idx, sel_idx[ii])) + "/"
            os.makedirs(work_path)
            copy_file_list(mol_files, templ_mol_path, work_path)
            copy_file_list(res_files, templ_res_path, work_path)
            if sits_param is not None:
                if os.path.exists(join("sits", "log_nk.dat")):
                    shutil.copyfile(join("sits", "log_nk.dat"),
                                    join(work_path, "log_nk.dat"))
                if os.path.exists(join("sits", "log_norm.dat")):
                    shutil.copyfile(join("sits", "log_norm.dat"),
                                    join(work_path, "log_norm.dat"))
                mol_conf_file = join(work_path, "grompp_sits.mdp")
                if bPosre_res:
                    mol_conf_file = join(work_path,
                                         "grompp_sits_restraint.mdp")
                make_grompp_sits(mol_conf_file,
                                 sits_param,
                                 sits_iter=False,
                                 iter_index=iter_index)
            conf_file = walker_path + enhc_out_conf + ("conf%d.gro" %
                                                       sel_idx[ii])
            if os.path.exists(work_path + "conf.gro"):
                os.remove(work_path + "conf.gro")
            conf_file = os.path.abspath(conf_file)
            conf_init_file = walker_path + "conf_init.gro"
            if os.path.exists(work_path + "conf_init.gro"):
                os.remove(work_path + "conf_init.gro")
            conf_init_file = os.path.abspath(conf_init_file)
            tmp_cwd = os.getcwd()
            os.chdir(work_path)
            os.symlink(os.path.relpath(conf_file), "conf.gro")
            os.symlink(os.path.relpath(conf_init_file), "conf_init.gro")
            os.chdir(tmp_cwd)

        task_dirs = []
        task_args = []
        for ii in range(conf_start, nconf, conf_every):
            dir_str = ((walker_format + ".%06d") % (walker_idx, sel_idx[ii]))
            arg_str = np.array2string(res_angles[ii],
                                      formatter={
                                          'float_kind': lambda x: "%.6f" % x
                                      }).replace("[",
                                                 "").replace("]", "").replace(
                                                     "\n", " ")
            task_dirs.append(dir_str)
            task_args.append(arg_str)
            log_task(task_dirs[-1] + ": " + task_args[-1])
        os.chdir(base_path)
        os.chdir(res_path)
        exec_hosts(MachineLocal, "./general_mkres.sh", 1, task_dirs, task_args)
        os.chdir(base_path)

        for ii in range(conf_start, nconf, conf_every):
            work_path = res_path + ((walker_format + ".%06d") %
                                    (walker_idx, sel_idx[ii])) + "/"
            mol_conf_files = glob.glob(work_path + "*.mdp")
            for mol_conf_file in mol_conf_files:
                make_grompp_res(mol_conf_file, nsteps, frame_freq)
            replace(work_path + res_plm, "STRIDE=[^ ]* ",
                    "STRIDE=%d " % frame_freq)

        os.chdir(base_path)

    if any(ret_list):
        return True
    else:
        return False
Пример #15
0
def train_ori(iter_index,
              json_file,
              exec_machine=MachineLocal,
              data_dir="data",
              data_name="data000"):
    fp = open(json_file, 'r')
    jdata = json.load(fp)
    cmd_env = jdata.get("cmd_sources", [])
    sits_param = jdata.get("sits_settings", None)
    res_cmpf_error = jdata["res_cmpf_error"]

    train_ori_name = "03.train_ori"
    iter_name = make_iter_name(iter_index)
    res_path = iter_name + "/" + res_name + "/"
    base_path = os.getcwd() + "/"

    all_task = glob.glob(res_path + "/[0-9]*[0-9]")
    if len(all_task) == 0:
        np.savetxt(res_path + data_name + '.raw', [], fmt="%.6e")
    else:
        all_task.sort()
        centers = []
        force = []
        ndim = 0

        for work_path in all_task:
            os.chdir(work_path)
            this_centers = np.loadtxt('centers.out')
            centers = np.append(centers, this_centers)
            this_force = np.loadtxt('force_000.out')
            force = np.append(force, this_force)
            ndim = this_force.size
            assert (ndim == this_centers.size
                    ), "center size is diff to force size in " + work_path
            os.chdir(base_path)

        centers = np.reshape(centers, [-1, ndim])
        force = np.reshape(force, [-1, ndim])
        data = np.concatenate((centers, force), axis=1)
        np.savetxt(res_path + data_name + '.raw', data, fmt="%.6e")

        norm_force = np.linalg.norm(force, axis=1)
        log_task(
            "min|f| = %e  max|f| = %e  avg|f| = %e" %
            (np.min(norm_force), np.max(norm_force), np.average(norm_force)))

    template_dir = jdata["template_dir"]
    numb_model = jdata["numb_model"]
    res_iter = jdata["res_iter"]

    iter_name = make_iter_name(iter_index)

    train_path = join(iter_name, train_ori_name)
    data_path = join(train_path, data_dir)

    data_file = join(data_path, data_name + ".raw")
    data_old_file = join(data_path, data_name + ".old.raw")
    data_new_file = join(data_path, data_name + ".new.raw")
    templ_train_path = join(template_dir, train_name)

    create_path(train_path)
    os.makedirs(data_path)
    copy_file_list(train_files, templ_train_path, train_path)
    replace(join(train_path, "model.py"), "\./data", "./" + data_dir)
    replace(join(train_path, "model.py"), "data\.", data_name + ".")
    replace(join(train_path, "main.py"), "\./data", "./" + data_dir)
    replace(join(train_path, "main.py"), "data\.raw", data_name + ".raw")

    # collect data
    log_task("collect data upto %d" % (iter_index))
    if iter_index == 0:
        ii = 0
        this_raw = join(base_path, make_iter_name(ii), res_name,
                        data_name + ".raw")
        os.chdir(data_path)
        os.symlink(os.path.relpath(this_raw), os.path.basename(data_new_file))
        os.symlink(os.path.basename(data_new_file),
                   os.path.basename(data_file))
        os.chdir(base_path)
        open(data_old_file, "w").close()
    else:
        prev_iter_index = iter_index - 1
        prev_data_file = join(base_path, make_iter_name(prev_iter_index),
                              train_ori_name, data_dir, data_name + ".raw")
        this_raw = join(base_path, make_iter_name(iter_index), res_name,
                        data_name + ".raw")
        os.chdir(data_path)
        os.symlink(os.path.relpath(prev_data_file),
                   os.path.basename(data_old_file))
        os.symlink(os.path.relpath(this_raw), os.path.basename(data_new_file))
        os.chdir(base_path)
        with open(data_file, "wb") as fo:
            with open(data_old_file, "rb") as f0, open(data_new_file,
                                                       "rb") as f1:
                shutil.copyfileobj(f0, fo)
                shutil.copyfileobj(f1, fo)

    # create train dirs
    log_task("create train dirs")
    for ii in range(numb_model):
        work_path = join(train_path, ("%03d" % ii))
        old_model_path = join(work_path, "old_model")

        create_path(work_path)
        os.chdir(work_path)
        os.symlink(join("..", data_dir), data_dir)
        os.chdir(base_path)
        if iter_index >= 1:
            prev_iter_index = iter_index - 1
            prev_iter_name = make_iter_name(prev_iter_index)
            prev_train_path = prev_iter_name + "/" + train_ori_name + "/"
            prev_train_path = os.path.abspath(prev_train_path) + "/"
            prev_work_path = prev_train_path + ("%03d/" % ii)
            prev_model_files = glob.glob(
                join(prev_work_path,
                     "model.ckpt.*")) + [join(prev_work_path, "checkpoint")]
            # prev_model_files += [join(prev_work_path, "checkpoint")]
            create_path(old_model_path)
            os.chdir(old_model_path)
            for ii in prev_model_files:
                os.symlink(os.path.relpath(ii), os.path.basename(ii))
                # shutil.copy (ii, old_model_path)
            os.chdir(base_path)
            for ii in prev_model_files:
                shutil.copy(ii, work_path)

    numb_model = jdata["numb_model"]
    train_thread = jdata["train_thread"]
    res_iter = jdata["res_iter"]

    iter_name = make_iter_name(iter_index)
    # if sits_param is not None:
    #     if sits_iter:
    #         iter_name = join("sits", make_iter_name(iter_index))
    base_path = os.getcwd() + "/"

    # check if new data is empty
    new_data_file = os.path.join(train_path, data_dir, data_name + '.new.raw')
    filesize = os.stat(new_data_file).st_size if os.path.exists(
        new_data_file) else 0
    if filesize == 0:
        prev_iter_index = iter_index - 1
        prev_train_path = join(base_path, make_iter_name(prev_iter_index),
                               train_ori_name) + "/"
        prev_models = glob.glob(join(prev_train_path, "*.pb"))
        for ii in prev_models:
            model_name = os.path.basename(ii)
            os.symlink(ii, join(train_path, model_name))
    else:
        neurons = jdata["neurons"]
        batch_size = jdata["batch_size"]
        if iter_index < res_iter:
            numb_epoches = jdata["numb_epoches"]
            starter_lr = jdata["starter_lr"]
            decay_steps = jdata["decay_steps"]
            decay_rate = jdata["decay_rate"]
            cmdl_args = ""
        else:
            numb_epoches = jdata["res_numb_epoches"]
            starter_lr = jdata["res_starter_lr"]
            decay_steps = jdata["res_decay_steps"]
            decay_rate = jdata["res_decay_rate"]
            old_ratio = jdata["res_olddata_ratio"]
            cmdl_args = " --restart --use-mix --old-ratio %f " % old_ratio

        if jdata["resnet"]:
            cmdl_args += " --resnet "
        cmdl_args += " -n "
        for nn in neurons:
            cmdl_args += "%d " % nn
        cmdl_args += " -b " + str(batch_size)
        cmdl_args += " -e " + str(numb_epoches)
        cmdl_args += " -l " + str(starter_lr)
        cmdl_args += " --decay-steps " + str(decay_steps)
        cmdl_args += " --decay-rate " + str(decay_rate)

        train_cmd = "../main.py -t %d" % train_thread
        train_cmd += cmdl_args
        train_cmd = cmd_append_log(train_cmd, "train.log", env=cmd_env)
        freez_cmd = "../freeze.py -o graph.pb"
        freez_cmd = cmd_append_log(freez_cmd, "freeze.log", env=cmd_env)
        task_dirs = [("%03d" % ii) for ii in range(numb_model)]

        batch_jobs = jdata['batch_jobs']
        batch_time_limit = jdata['batch_time_limit']
        batch_modules = jdata['batch_modules']
        batch_sources = jdata['batch_sources']

        os.chdir(train_path)
        if batch_jobs:
            exec_batch(train_cmd,
                       train_thread,
                       1,
                       task_dirs,
                       task_args=None,
                       time_limit=batch_time_limit,
                       modules=batch_modules,
                       sources=batch_sources)
        else:
            if len(task_dirs) == 1:
                exec_hosts(MachineLocal, train_cmd, train_thread, task_dirs,
                           None)
            else:
                exec_hosts_batch(exec_machine, train_cmd, train_thread,
                                 task_dirs, None)

        # exec_hosts(MachineLocal, freez_cmd, 1, task_dirs, None)
        for task_dir in task_dirs:
            exec_hosts(MachineLocal, freez_cmd, 1, [task_dir], None)
        for ii in range(numb_model):
            os.symlink("%03d/graph.pb" % ii, "graph.%03d.pb" % ii)
        os.chdir(base_path)