Exemplo n.º 1
0
def make_stat_scf(systems_train,
                  systems_test=None,
                  *,
                  train_dump="data_train",
                  test_dump="data_test",
                  group_data=False,
                  workdir='.',
                  outlog="log.data",
                  **stat_args):
    # follow same convention for systems as run_scf
    systems_train = [os.path.abspath(s) for s in load_sys_paths(systems_train)]
    systems_test = [os.path.abspath(s) for s in load_sys_paths(systems_test)]
    if not systems_test:
        systems_test.append(systems_train[-1])
        # if len(systems_train) > 1:
        #     del systems_train[-1]
    # load stats function
    from deepks.scf.stats import print_stats
    stat_args.update(systems=systems_train,
                     test_sys=systems_test,
                     dump_dir=train_dump,
                     test_dump=test_dump,
                     group=group_data)
    # make task
    return PythonTask(print_stats,
                      call_kwargs=stat_args,
                      outlog=outlog,
                      errlog="err",
                      workdir=workdir)
Exemplo n.º 2
0
def collect_systems(systems, folder=None):
    # check all systems have different basename
    # if there's duplicate, concat its dirname into the basename sep by a "."
    # then collect all systems into `folder` by symlink
    sys_list = [os.path.abspath(s) for s in load_sys_paths(systems)]
    parents, bases = map(list, zip(*[os.path.split(s.rstrip(os.path.sep)) 
                                        for s in sys_list]))
    dups = range(len(sys_list))
    while True:
        count_dict = {bases[i]:[] for i in dups}
        for i in dups:
            count_dict[bases[i]].append(i)
        dup_dict = {k:v for k,v in count_dict.items() if len(v)>1}
        if not dup_dict:
            break
        dups = sum(dup_dict.values(), [])
        if all(parents[i] in ("/", "") for i in dups):
            print("System list have duplicated terms, index:", dups, file=sys.stderr)
            break
        for di in dups:
            if parents[di] in ("/", ""):
                continue
            newp, newb = os.path.split(parents[di])
            parents[di] = newp
            bases[di] = f"{newb}.{bases[di]}"
    if folder is None:
        return bases
    targets = [os.path.join(folder, b) for b in bases]
    for s, t in zip(sys_list, targets):
        link_file(s, t, use_abs=True)
    return targets
Exemplo n.º 3
0
def make_scf_task(*,
                  workdir=".",
                  arg_file="scf_input.yaml",
                  source_arg=None,
                  model_file="model.pth",
                  source_model=None,
                  systems="systems.raw",
                  link_systems=True,
                  dump_dir="results",
                  share_folder="share",
                  outlog="log.scf",
                  group_data=None,
                  dispatcher=None,
                  resources=None,
                  python="python",
                  **task_args):
    # set up basic args
    command = SCF_CMD.format(python=python)
    link_share = task_args.pop("link_share_files", [])
    link_prev = task_args.pop("link_prev_files", [])
    link_abs = task_args.pop("link_abs_files", [])
    forward_files = task_args.pop("forward_files", [])
    backward_files = task_args.pop("backward_files", [])
    sys_name = None
    #set up optional args
    if arg_file:
        command += f" {arg_file}"
        if source_arg is not None:
            link_share.append((source_arg, arg_file))
        forward_files.append(arg_file)
    if model_file:
        command += f" -m {model_file}"
        if model_file.upper() != "NONE":
            if source_model is not None:
                link_prev.append((source_model, model_file))
            forward_files.append(model_file)
    if systems:
        # check system paths and make forward files
        sys_paths = [os.path.abspath(s) for s in load_sys_paths(systems)]
        sys_base = [get_sys_name(s) for s in sys_paths]
        sys_name = [os.path.basename(s) for s in sys_base]
        if link_systems:
            target_dir = "systems"
            src_files = sum((glob(f"{base}*") for base in sys_base), [])
            for fl in src_files:
                dst = os.path.join(target_dir, os.path.basename(fl))
                link_abs.append((fl, dst))
            forward_files.append(target_dir)
            sys_str = os.path.join(target_dir, "*")
        else:  # cannot forward files here
            sys_str = " ".join(sys_paths)
        command += f" -s {sys_str}"
    if dump_dir:
        command += f" -d {dump_dir}"
        if sys_name:
            for nm in sys_name:
                backward_files.append(os.path.join(dump_dir, nm))
        else:  # backward whole folder, may cause problem
            backward_files.append(dump_dir)
    if group_data is not None:
        command += " -G" if group_data else " -NG"
    # make task
    return BatchTask(command,
                     workdir=workdir,
                     dispatcher=dispatcher,
                     resources=resources,
                     outlog=outlog,
                     share_folder=share_folder,
                     link_share_files=link_share,
                     link_prev_files=link_prev,
                     link_abs_files=link_abs,
                     forward_files=forward_files,
                     backward_files=backward_files,
                     **task_args)
Exemplo n.º 4
0
def make_run_scf(systems_train,
                 systems_test=None,
                 *,
                 train_dump="data_train",
                 test_dump="data_test",
                 no_model=False,
                 group_data=None,
                 workdir='.',
                 share_folder='share',
                 outlog="log.scf",
                 source_arg="scf_input.yaml",
                 source_model="model.pth",
                 dispatcher=None,
                 resources=None,
                 sub_size=1,
                 group_size=1,
                 ingroup_parallel=1,
                 sub_res=None,
                 python='python',
                 **task_args):
    # if no test systems, use last one in train systems
    systems_train = [os.path.abspath(s) for s in load_sys_paths(systems_train)]
    systems_test = [os.path.abspath(s) for s in load_sys_paths(systems_test)]
    if not systems_test:
        systems_test.append(systems_train[-1])
        # if len(systems_train) > 1:
        #     del systems_train[-1]
    check_system_names(systems_train)
    check_system_names(systems_test)
    # split systems into groups
    nsys_trn = len(systems_train)
    nsys_tst = len(systems_test)
    ntask_trn = int(np.ceil(nsys_trn / sub_size))
    ntask_tst = int(np.ceil(nsys_tst / sub_size))
    train_sets = [systems_train[i::ntask_trn] for i in range(ntask_trn)]
    test_sets = [systems_test[i::ntask_tst] for i in range(ntask_tst)]
    # make subtasks
    model_file = "../model.pth" if not no_model else "NONE"
    nd = max(len(str(ntask_trn + ntask_tst)), 2)
    if sub_res is None:
        sub_res = {}
    sub_res = {**DEFAULT_SCF_SUB_RES, **sub_res}
    trn_tasks = [
        make_scf_task(systems=sset,
                      workdir=f"task.trn.{i:0{nd}}",
                      arg_file="../scf_input.yaml",
                      source_arg=None,
                      model_file=model_file,
                      source_model=None,
                      dump_dir=f"../{train_dump}",
                      group_data=group_data,
                      link_systems=True,
                      resources=sub_res,
                      python=python) for i, sset in enumerate(train_sets)
    ]
    tst_tasks = [
        make_scf_task(systems=sset,
                      workdir=f"task.tst.{i:0{nd}}",
                      arg_file="../scf_input.yaml",
                      source_arg=None,
                      model_file=model_file,
                      source_model=None,
                      dump_dir=f"../{test_dump}",
                      group_data=group_data,
                      link_systems=True,
                      resources=sub_res,
                      python=python) for i, sset in enumerate(test_sets)
    ]
    # set up optional args
    link_share = task_args.pop("link_share_files", [])
    link_share.append((source_arg, "scf_input.yaml"))
    link_prev = task_args.pop("link_prev_files", [])
    if not no_model:
        link_prev.append((source_model, "model.pth"))
    if resources is None:
        resources = {}
    resources = {**DEFAULT_SCF_RES, "numb_node": ingroup_parallel, **resources}
    # make task
    return GroupBatchTask(trn_tasks + tst_tasks,
                          group_size=group_size,
                          ingroup_parallel=ingroup_parallel,
                          dispatcher=dispatcher,
                          resources=resources,
                          outlog=outlog,
                          errlog="err",
                          share_folder=share_folder,
                          link_share_files=link_share,
                          link_prev_files=link_prev)
Exemplo n.º 5
0
def main(systems,
         model_file="model.pth",
         basis='ccpvdz',
         proj_basis=None,
         penalty_terms=None,
         device=None,
         dump_dir=".",
         dump_fields=DEFAULT_FNAMES,
         group=False,
         mol_args=None,
         scf_args=None,
         verbose=0):
    if model_file is None or model_file.upper() == "NONE":
        model = None
        default_scf_args = DEFAULT_HF_ARGS
    else:
        model = CorrNet.load(model_file).double()
        default_scf_args = DEFAULT_SCF_ARGS

    # check arguments
    penalty_terms = check_list(penalty_terms)
    if mol_args is None: mol_args = {}
    if scf_args is None: scf_args = {}
    scf_args = {**default_scf_args, **scf_args}
    fields = select_fields(dump_fields)
    # check label names from label fields and penalties
    label_names = get_required_labels(fields["scf"] + fields["grad"],
                                      penalty_terms)

    if verbose:
        print(f"starting calculation with OMP threads: {lib.num_threads()}",
              f"and max memory: {lib.param.MAX_MEMORY}")
        if verbose > 1:
            print(f"basis: {basis}")
            print(f"specified scf args:\n  {scf_args}")

    meta = old_meta = None
    res_list = []
    systems = load_sys_paths(systems)

    for fl in systems:
        fl = fl.rstrip(os.path.sep)
        for atom, attrs, labels in system_iter(fl, label_names):
            mol_input = {
                **mol_args, "verbose": verbose,
                "atom": atom,
                "basis": basis,
                **attrs
            }
            mol = build_mol(**mol_input)
            penalties = [build_penalty(pd, labels) for pd in penalty_terms]
            try:
                meta, result = solve_mol(mol,
                                         model,
                                         fields,
                                         labels,
                                         proj_basis=proj_basis,
                                         penalties=penalties,
                                         device=device,
                                         verbose=verbose,
                                         **scf_args)
            except Exception as e:
                print(fl, 'failed! error:', e, file=sys.stderr)
                # continue
                raise
            if group and old_meta is not None and np.any(meta != old_meta):
                break
            res_list.append(result)

        if not group:
            sub_dir = os.path.join(dump_dir,
                                   get_sys_name(os.path.basename(fl)))
            dump_meta(sub_dir, meta)
            dump_data(sub_dir, **collect_fields(fields, meta, res_list))
            res_list = []
        elif old_meta is not None and np.any(meta != old_meta):
            print(fl,
                  'meta does not match! saving previous results only.',
                  file=sys.stderr)
            break
        old_meta = meta
        if verbose:
            print(fl, 'finished')

    if group:
        dump_meta(dump_dir, meta)
        dump_data(dump_dir, **collect_fields(fields, meta, res_list))
        if verbose:
            print('group finished')