def train_cli(args=None): parser = argparse.ArgumentParser( prog="deepks train", description="Train a model according to given input.", argument_default=argparse.SUPPRESS) parser.add_argument('input', type=str, nargs="?", help='the input yaml file for args') parser.add_argument('-r', '--restart', help='the restart file to load model from, would ignore model_args if given') parser.add_argument('-d', '--train-paths', nargs="*", help='paths to the folders of training data') parser.add_argument('-t', '--test-paths', nargs="*", help='paths to the folders of testing data') parser.add_argument('-o', '--ckpt-file', help='file to save the model parameters, default: model.pth') parser.add_argument('-S', '--seed', type=int, help='use specified seed in initialization and training') parser.add_argument("-D", "--device", help="device name used in training the model") args = parser.parse_args(args) if hasattr(args, "input"): argdict = load_yaml(args.input) del args.input argdict.update(vars(args)) else: argdict = vars(args) from deepks.model.train import main main(**argdict)
def scf_cli(args=None): parser = argparse.ArgumentParser( prog="deepks scf", description="Calculate and save SCF results using given model.", argument_default=argparse.SUPPRESS) parser.add_argument("input", nargs="?", help='the input yaml file for args') parser.add_argument("-s", "--systems", nargs="*", help="input molecule systems, can be xyz files or folders with npy data") parser.add_argument("-m", "--model-file", help="file of the trained model") parser.add_argument("-d", "--dump-dir", help="dir of dumped files") parser.add_argument("-v", "--verbose", type=int, choices=range(0,6), help="output level of calculation information") parser.add_argument("-F", "--dump-fields", nargs="*", help="fields to be dumped into the folder") parser.add_argument("-B", "--basis", help="basis set used to solve the model") parser.add_argument("-P", "--proj_basis", help="basis set used to project dm, must match with model") parser.add_argument("-D", "--device", help="device name used in nn model inference") group0 = parser.add_mutually_exclusive_group() group0.add_argument("-G", "--group", action='store_true', dest="group", help="group results for all systems, only works for same number of atoms") group0.add_argument("-NG", "--no-group", action='store_false', dest="group", help="Do not group results for different systems (default behavior)") parser.add_argument("-X", "--scf-xc", help="base xc functional used in scf equation, default is HF") parser.add_argument("--scf-conv-tol", type=float, help="converge threshold of scf iteration") parser.add_argument("--scf-conv-tol-grad", type=float, help="gradient converge threshold of scf iteration") parser.add_argument("--scf-max-cycle", type=int, help="max number of scf iteration cycles") parser.add_argument("--scf-diis-space", type=int, help="subspace dimension used in diis mixing") parser.add_argument("--scf-level-shift", type=float, help="level shift used in scf calculation") args = parser.parse_args(args) scf_args={} for k, v in vars(args).copy().items(): if k.startswith("scf_"): scf_args[k[4:]] = v delattr(args, k) if hasattr(args, "input"): argdict = load_yaml(args.input) del args.input argdict.update(vars(args)) argdict["scf_args"].update(scf_args) else: argdict = vars(args) argdict["scf_args"] = scf_args from deepks.scf.run import main main(**argdict)
def check_arg_dict(data, default, strict=True): if data is None: data = {} if isinstance(data, str): data = load_yaml(data) allowed = {k:v for k,v in data.items() if k in default} outside = {k:v for k,v in data.items() if k not in default} if outside: print(f"following ars are not in the default list: {list(outside.keys())}" +"and would be discarded" if strict else "but kept", file=sys.stderr) if strict: return {**default, **allowed} else: return {**default, **data}
def stats_cli(args=None): parser = argparse.ArgumentParser( prog="deepks stats", description="Print the stats of SCF results.", argument_default=argparse.SUPPRESS) parser.add_argument("input", nargs="?", help='the input yaml file used for SCF calculation') parser.add_argument("-s", "--systems", nargs="*", help='system paths used as training set (i.e. calculate shift)') parser.add_argument("-d", "--dump-dir", help="directory used to save SCF results of training systems") parser.add_argument("-ts", "--test-sys", nargs="*", help='system paths used as testing set (i.e. not calculate shift)') parser.add_argument("-td", "--test-dump", help="directory used to save SCF results of testing systems") parser.add_argument("-G", "--group", action='store_true', help="if set, assume results are grouped") parser.add_argument("-NC", action="store_false", dest="with_conv", help="do not print convergence results") parser.add_argument("-NE", action="store_false", dest="with_e", help="do not print energy results") parser.add_argument("-NF", action="store_false", dest="with_f", help="do not print force results") parser.add_argument("--e-name", help="name of the energy file (no extension)") parser.add_argument("--f-name", help="name of the force file (no extension)") args = parser.parse_args(args) if hasattr(args, "input"): rawdict = load_yaml(args.input) del args.input argdict = {fd: rawdict[fd] for fd in ("systems", "dump_dir", "group") if fd in rawdict} argdict.update(vars(args)) else: argdict = vars(args) from deepks.scf.stats import print_stats print_stats(**argdict)
def iter_cli(args=None): parser = argparse.ArgumentParser( prog="deepks iterate", description="Run the iteration procedure to train a SCF model.", argument_default=argparse.SUPPRESS) parser.add_argument("argfile", nargs="*", default=[], help='the input yaml file for args, ' 'if more than one, the latter has higher priority') parser.add_argument("-s", "--systems-train", nargs="*", help='systems for training, ' 'can be xyz files or folders with npy data') parser.add_argument("-t", "--systems-test", nargs="*", help='systems for training, ' 'can be xyz files or folders with npy data') parser.add_argument("-n", "--n-iter", type=int, help='the number of iterations to run') parser.add_argument("--workdir", help='working directory, default is current directory') parser.add_argument("--share-folder", help='folder to store share files, default is "share"') parser.add_argument("--cleanup", action="store_true", dest="cleanup", help='if set, clean up files used for job dispatching') parser.add_argument("--no-strict", action="store_false", dest="strict", help='if set, allow other arguments to be passed to task') # allow cli specified argument files sub_names = ["scf-input", "scf-machine", "train-input", "train-machine", "init-model", "init-scf", "init-train"] for name in sub_names: parser.add_argument(f"--{name}", help='if specified, subsitude the original arguments with given file') args = parser.parse_args(args) argdict = {} for fl in args.argfile: argdict = deep_update(argdict, load_yaml(fl)) del args.argfile argdict.update(vars(args)) from deepks.iterate.iterate import main main(**argdict)
def test_cli(args=None): parser = argparse.ArgumentParser( prog="deepks test", description="Test a model with given data (Not SCF).", argument_default=argparse.SUPPRESS) parser.add_argument("input", nargs="?", help='the input yaml file used for training') parser.add_argument("-d", "--data-paths", type=str, nargs='+', help="the paths to data folders containing .npy files for test") parser.add_argument("-m", "--model-file", type=str, nargs='+', help="the dumped model file to test") parser.add_argument("-o", "--output-prefix", type=str, help=r"the prefix of output file, would wite into file %%prefix.%%sysidx.out") parser.add_argument("-E", "--e-name", type=str, help="the name of energy file to be read (no .npy extension)") parser.add_argument("-D", "--d-name", type=str, nargs="+", help="the name of descriptor file(s) to be read (no .npy extension)") parser.add_argument("-G", "--group", action='store_true', help="group test results for all systems") args = parser.parse_args(args) if hasattr(args, "input"): rawdict = load_yaml(args.input) del args.input argdict = {} if "ckpt_file" in rawdict["train_args"]: argdict["model_file"] = rawdict["train_args"]["ckpt_file"] if "e_name" in rawdict["data_args"]: argdict["e_name"] = rawdict["data_args"]["e_name"] if "d_name" in rawdict["data_args"]: argdict["d_name"] = rawdict["data_args"]["d_name"] if "test_paths" in rawdict: argdict["data_paths"] = rawdict["test_paths"] argdict.update(vars(args)) else: argdict = vars(args) from deepks.model.test import main main(**argdict)
parser.add_argument("--scf-input", help="yaml file to specify scf arguments") args = parser.parse_args() if args.verbose: print(f"starting calculation with OMP threads: {lib.num_threads()}", f"and max memory: {lib.param.MAX_MEMORY}") if args.dump_dir is not None: os.makedirs(args.dump_dir, exist_ok = True) for fn in args.files: tic = time.time() mol = gto.M(atom=fn, basis=args.basis, verbose=args.verbose, charge=args.charge, parse_arg=False) model = args.model_file scfargs = {} if args.scf_input is not None: argdict = load_yaml(args.scf_input) if "scf_args" in argdict: scfargs = argdict["scf_args"] if model is None and "model" in argdict: model = argdict["model"] else: scfargs = argdict hess = calc_hessian(mol, model, args.delta, args.proj_basis, **scfargs) if not args.unit.upper().startswith(("B", "AU")): hess /= BOHR**2 if args.dump_dir is None: dump_dir = os.path.dirname(fn) else: dump_dir = args.dump_dir dump = os.path.join(dump_dir, os.path.splitext(os.path.basename(fn))[0]) np.save(dump+".hessian.npy", hess)
# sys.path.append('/path/to/source') import deepks from deepks.model.train import main as train_main from deepks.model.test import main as train_test from deepks.scf.run import main as scf_main from deepks.scf.stats import collect_data_grouped from deepks.utils import load_yaml from deepks.task.task import PythonTask from deepks.task.workflow import Sequence, Iteration niter = 5 nmol = 1500 ntrain = 1000 ntest = 500 train_input = load_yaml('share/train_input.yaml') scf_input = load_yaml('share/scf_input.yaml') train_idx = np.arange(ntrain) task_scf = PythonTask(scf_main, call_kwargs=scf_input, outlog='log.scf', workdir='00.scf', link_prev_files=['model.pth'], share_folder='share', link_share_files=['mol_files.raw']) task_data = PythonTask(collect_data_grouped, call_args=[train_idx], outlog='log.data', workdir='01.data',
parser.add_argument("--conv-input", help="yaml file to specify convergence arguments") args = parser.parse_args() if args.verbose: print(f"starting calculation with OMP threads: {lib.num_threads()}", f"and max memory: {lib.param.MAX_MEMORY}") if args.dump_dir is not None: os.makedirs(args.dump_dir, exist_ok = True) for fn in args.files: tic = time.time() mol = gto.M(atom=fn, basis=args.basis, verbose=args.verbose, charge=args.charge, parse_arg=False) model = args.model_file scf_args = {} if args.scf_input is not None: argdict = load_yaml(args.scf_input) if "scf_args" in argdict: scf_args = argdict["scf_args"] if model is None and "model" in argdict: model = argdict["model"] else: scf_args = argdict conv_args = load_yaml(args.conv_input) if args.conv_input is not None else {} mol_eq = run_optim(mol, model, args.proj_basis, scf_args, conv_args) suffix = args.suffix if args.dump_dir is None: dump_dir = os.path.dirname(fn) if not suffix: suffix = "eq" else: dump_dir = args.dump_dir