def make_stat_scf(systems_train, systems_test=None, *, train_dump="data_train", test_dump="data_test", group_data=False, workdir='.', outlog="log.data", **stat_args): # follow same convention for systems as run_scf systems_train = [os.path.abspath(s) for s in load_sys_paths(systems_train)] systems_test = [os.path.abspath(s) for s in load_sys_paths(systems_test)] if not systems_test: systems_test.append(systems_train[-1]) # if len(systems_train) > 1: # del systems_train[-1] # load stats function from deepks.scf.stats import print_stats stat_args.update(systems=systems_train, test_sys=systems_test, dump_dir=train_dump, test_dump=test_dump, group=group_data) # make task return PythonTask(print_stats, call_kwargs=stat_args, outlog=outlog, errlog="err", workdir=workdir)
def make_test_train(data_paths, model_file="model.pth", *, output_prefix="test", group_results=True, workdir='.', outlog="log.test", **test_args): from deepks.model.test import main as test_func test_args.update(data_paths=data_paths, model_file=model_file, output_prefix=output_prefix, group=group_results) # make task return PythonTask(test_func, call_kwargs=test_args, outlog=outlog, errlog="err", workdir=workdir)
from deepks.task.task import PythonTask from deepks.task.workflow import Sequence, Iteration niter = 5 nmol = 1500 ntrain = 1000 ntest = 500 train_input = load_yaml('share/train_input.yaml') scf_input = load_yaml('share/scf_input.yaml') train_idx = np.arange(ntrain) task_scf = PythonTask(scf_main, call_kwargs=scf_input, outlog='log.scf', workdir='00.scf', link_prev_files=['model.pth'], share_folder='share', link_share_files=['mol_files.raw']) task_data = PythonTask(collect_data_grouped, call_args=[train_idx], outlog='log.data', workdir='01.data', link_prev_files=['model.pth', "results"], share_folder='share', link_share_files=['e_ref.npy']) task_train = PythonTask(train_main, call_args=["old_model.pth"], call_kwargs=train_input,
dispatcher=disp, resources=scf_res, outlog="log.scf", link_prev_files=['model.pth'], forward_files=['model.pth'], backward_files=['results/*']) all_idx = np.loadtxt('share/index.raw', dtype=int) train_idx = all_idx[:ntrain] test_idx = all_idx[ntrain:] post_scf = PythonTask(collect_data, call_args=[train_idx, test_idx], call_kwargs={ "sys_dir": "results", "ene_ref": "e_ref.npy" }, outlog='log.data', share_folder='share', link_share_files=['e_ref.npy']) clean_scf = ShellTask("rm slurm-*.out") scf_flow = Sequence([run_scf, post_scf, clean_scf], workdir='01.scf') # Group them together per_iter = Sequence([train_flow, scf_flow]) iterate = Iteration(per_iter, niter, init_folder='share/init', record_file='RECORD')
shutil.copy('results/system.raw', 'train') shutil.copy('results/system.raw', 'test') Path('train_paths.raw').write_text(str(Path('train').absolute())) Path('test_paths.raw').write_text(str(Path('test').absolute())) niter = 5 nmol = 1500 ntrain = 1000 ntest = 500 train_input = load_yaml('share/train_input.yaml') scf_input = load_yaml('share/scf_input.yaml') task_train = PythonTask(train_main, call_kwargs=train_input, outlog='log.train', workdir='00.train', link_prev_files=['train_paths.raw', 'test_paths.raw']) task_scf = PythonTask(scf_main, call_kwargs=scf_input, outlog='log.scf', workdir='01.scf', link_prev_files=['model.pth'], share_folder='share', link_share_files=['mol_files.raw']) task_data = PythonTask(collect_data, call_args=[nmol, ntrain], outlog='log.data', workdir='02.data', link_prev_files=['results'], share_folder='share', link_share_files=['e_ref.npy']) seq = Sequence([task_train, task_scf, task_data])
for i in range(nsys) ], workdir='00.scf', outlog='log.scf', resources=scf_res, link_prev_files=['model.pth']) # labeling task_data = PythonTask(lambda: [ collect_data_grouped(train_idx=train_idx, append=True, ene_ref=f"e_ref.npy", force_ref=f"f_ref.npy", sys_dir=f"results") for i in range(nsys) ], outlog='log.data', workdir='01.data', link_prev_files=['model.pth'] + [f"results" for i in range(nsys)], share_folder='share', link_share_files=[f'e_ref.npy' for i in range(nsys)] + [f'f_ref.npy' for i in range(nsys)]) # training train_cmd = " ".join([ "python -u /path/to/source/deepks/train/main.py", "train_input.yaml", "--restart old_model.pth" ]) train_res = {"time_limit": "24:00:00", "mem_limit": 32, "numb_gpu": 1}
old_trn = np.loadtxt("train_paths.raw", dtype=str) old_tst = np.loadtxt("test_paths.raw", dtype=str) trn_res = np.stack( [np.loadtxt(f"{m}/test/train.all.out")[:, 1] for m in paths], -1) tst_res = np.stack( [np.loadtxt(f"{m}/test/test.all.out")[:, 1] for m in paths], -1) tst_std = np.std(tst_res, axis=-1) order = np.argsort(tst_std)[::-1] sel = order[:nsel] rst = np.sort(order[nsel:]) new_trn = np.concatenate([old_trn, old_tst[sel]]) new_tst = old_tst[rst] np.savetxt("new_train_paths.raw", new_trn, fmt="%s") np.savetxt("new_test_paths.raw", new_tst, fmt="%s") task_select = PythonTask(select_data, call_args=[nsel]) # combine them together iterate = Iteration([task_train, task_test, task_select], niter, init_folder='share/init', record_file='RECORD') if os.path.exists('RECORD'): iterate.restart() else: iterate.run()