Пример #1
0
def build_deepmd(path, nsw, outcar, deepmd):
    ls = LabeledSystem(outcar, fmt='outcar')
    """
    sub_ls = ls.sub_system(idx)
    
    """
    if args.idx:
        print("index file provided")
        idx = np.loadtxt(args.idx).astype(int)


#        ls = ls.sub_system(idx)
    if args.vaspidx:
        print("vasp index file provided")
        vaspidx = np.loadtxt(args.vaspidx)
        fp = open(outcar)
        fp.readline()
        nsw_sel = fp.readline()
        if 'nsw_sel' in nsw_sel:
            print('file generated by merge_out.py')
            #    print(nsw_sel)
            tmp = nsw_sel.split('=')[1].strip().split(' ')
            nsw_sel = [int(tmp_idx) for tmp_idx in tmp]
            idx = []
            for i in range(len(nsw_sel)):
                if nsw_sel[i] in vaspidx:
                    idx.append(i)
        else:
            print('OUTCAR file generated by VASP')
            idx = vaspidx - 1
    idx2 = [i for i in range(len(ls)) if i not in idx]
    ls2 = ls.sub_system(idx2)
    ls = ls.sub_system(idx)

    deepmd = os.path.join(path, deepmd)
    if args.batchsize:
        set_size = args.batchsize
    else:
        if nsw <= 4:  # we know nsw must > 100
            set_size = 1
            print("{0} has only {1}".format(path, nsw))
        if nsw > 4:
            set_size, _ = best_size(
                nsw
            )  # 25% used as , but if say 82, then 20, 20, 20, 2, too less
    ls.to_deepmd_npy(deepmd, set_size=set_size)
    if args.test:
        ls2.to_deepmd_npy('test_tmp', set_size=100000)
        shutil.copytree('test_tmp/set.000', os.path.join(deepmd, 'set.001'))
        shutil.rmtree('test_tmp')
Пример #2
0
 def to_system(self, data, **kwargs):
     """
     convert system to list, usefull for data collection
     """
     from dpdata import System, LabeledSystem
     if 'forces' in data:
         system = LabeledSystem(data=data)
     else:
         system = System(data=data)
     if len(system) == 0:
         return []
     if len(system) == 1:
         return [system]
     else:
         systems = []
         for ii in range(len(system)):
             systems.append(system.sub_system([ii]))
         return systems
Пример #3
0
def build_deepmd_frames(path, outcar, deepmd):
    """
    sub_ls = ls.sub_system(idx)
    
    """
    try:
        ls = LabeledSystem(outcar, fmt=args.format)
    except:
        ls = System(outcar, fmt=args.format)

    if args.exclude:
        oldsize = len(ls)
        idx_new = [i for i in range(len(ls)) if i not in args.exclude]
        ls = ls.sub_system(idx_new)
        newsize = len(ls)
        print('{0}/{1} is selected'.format(newsize, oldsize))

    if args.force_limit:
        fmin = min(args.force_limit)
        fmax = max(args.force_limit)
        print("force limit imposed, force in between {0}, {1}".format(
            fmin, fmax))
        idx_new = []
        exclude = []
        for i in range(len(ls)):
            forces = ls[i].data['forces']
            if forces.min() >= fmin and forces.max() <= fmax:
                idx_new.append(i)
            else:
                exclude.append(i)
        print('excluded frames', exclude)
        print('{0} / {1} is selected'.format(len(idx_new), len(ls)))
        ls = ls.sub_system(idx_new)

    if args.idx:
        print("index file provided")
        idx = np.loadtxt(args.idx).astype(int)
    elif (not args.idx) and args.vaspidx:
        print("vasp index file provided")
        vaspidx = np.loadtxt(args.vaspidx)
        fp = open(outcar)
        fp.readline()
        nsw_sel = fp.readline()
        if 'nsw_sel' in nsw_sel:
            print('file generated by merge_out.py')
            tmp = nsw_sel.split('=')[1].strip().split(' ')
            nsw_sel = [int(tmp_idx) for tmp_idx in tmp]
            idx = []
            for i in range(len(nsw_sel)):
                if nsw_sel[i] in vaspidx:
                    idx.append(i)
        else:
            print('OUTCAR file generated by VASP')
            idx = vaspidx - 1
    else:
        print("split train and test by ratio {0} : {1}".format(
            args.train_test_ratio, 1))
        train_size = round(
            len(ls) * (args.train_test_ratio) / (args.train_test_ratio + 1))
        idx = np.random.choice(range(len(ls)), train_size, replace=False)
        idx.sort()

    idx2 = [i for i in range(len(ls)) if i not in idx]  # test
    ls2 = ls.sub_system(idx2)  # test
    ls = ls.sub_system(idx)

    deepmd = os.path.join(path, deepmd)

    ls.to_deepmd_npy(deepmd,
                     set_size=1000000)  # give a *large* value, default is 5000
    if len(ls2) == 0:
        print('test set has no data')
    elif args.savetest and len(ls2) > 0:
        ls2.to_deepmd_npy('test_tmp', set_size=1000000)
        shutil.copytree('test_tmp/set.000', os.path.join(deepmd, 'set.001'))
        shutil.rmtree('test_tmp')
Пример #4
0
#max_f = []
#for f in force:
#    max_f.append(np.max(np.max(f)))
#print(max_f)    
#plt.plot(max_f)
tmp  = '/Users/jiedeng/GD/papers/pv3_crystallization/post_nn/exsolution_pert/u.project.ESS.lstixrud.jd848.pv+hf.dp-train.lmp_run.6k.rp5.160-cpu.pert.10k_good_p3.recal/deepmd_all'

tmp2 = '/Users/jiedeng/GD/papers/pv3_crystallization/post_nn/exsolution_pert/u.home.j.jd848.project-lstixrud.metad.3rd.recal/deepmd/'
ls = LabeledSystem(tmp2,fmt='deepmd/npy')

from dpdata import LabeledSystem
import numpy as np
ls = LabeledSystem('deepmd_ttr2',fmt='deepmd/npy')
idx = list(range(len(ls)))
idx.remove(14)
ls2 = ls.sub_system(idx)
ls2.to_deepmd_npy('test')
fparam = np.load('deepmd_ttr2/set.001/fparam.npy')
np.save('test/set.000/fparam.npy',fparam[idx])

cp -r deepmd_ttr2 deepmd_ttr2_tmp
rm -r deepmd_ttr2_tmp/set.000

from dpdata import LabeledSystem
import numpy as np
ls = LabeledSystem('deepmd_ttr2_tmp',fmt='deepmd/npy')
idx = list(range(len(ls)))
idx.remove(10)
ls2 = ls.sub_system(idx)
ls2.to_deepmd_npy('test')
fparam = np.load('deepmd_ttr2_tmp/set.000/fparam.npy')
Пример #5
0
from glob import glob
from tqdm import tqdm
"""
process multi systems
"""
fs = glob('iter.0000[4-7]*/02.fp/task*/OUTCAR')
maxf = 3.0
ms = MultiSystems()
ic = 0
vacuum_size = 13
for f in tqdm(fs):
    if check_cluster(f.replace('OUTCAR', 'POSCAR'), vacuum_size, fmt='POSCAR'):
        print(f)
        continue
    try:
        ls = LabeledSystem(f)
    except:
        print(f)
        continue
    if len(ls) > 0:
        if ls.sub_system([0]).data['forces'].max() > maxf:
            pass
        else:
            ic += 1
            ms.append(ls)

print(len(fs))
print(ic)
ms.to_deepmd_raw('deepmd-f%s' % maxf)
ms.to_deepmd_npy('deepmd-f%s' % maxf)
Пример #6
0
from glob import glob
from dpdata import LabeledSystem
from monty.serialization import dumpfn, loadfn
from tqdm import tqdm

fs = glob('usefull-[1-3]/sys-*/OUTCAR')
entries = []
for f in tqdm(fs):
    ls = LabeledSystem(f)
    ls.sub_system([-1]).to_pymatgen_ComputedStructureEntry()
    entry = ls.sub_system([-1]).to_pymatgen_ComputedStructureEntry()[0]
    entries.append(entry)
dumpfn(entries, 'all-vasp-entries.json')