def encode_method_test(): a = VASP_DataExtract(vasp_dir="data\Pd_CH_s_fcc_stand.cif") c = a.get_output_as_atom3Dspace() e = a.get_energy_info() coord, energy, atom_case = c.generate_data() a = AtomGrid(coord, atom_case) a.get_grid_border() a.make_grid(minX=-0.5, maxX=7, minY=-3, maxY=7.5, minZ=-0.5, maxZ=9.5, resolutionX=100, resolutionY=100, resolutionZ=100) train_index = [1] en2 = a.grid_encode(train_index)[0] en1 = a.grid_encode1(train_index)[0] # en2 方法比en1快很多很多,同时要比较一下编码结果是否一致 print(en1) print("_______________________") print(en2) print(en1 - en2) print(np.sum(en1 - en2))
# 1 得到坐标的atom case的信息,输入Vasp dir # print(coord) a = AtomGrid(coord, atom_case) # 2 得到边界信息,然后根据边界人为确定格子大小和边界 a.get_grid_border() a.make_grid(minX=-6, maxX=15, minY=-6, maxY=15, minZ=-6, maxZ=15, resolutionX=60, resolutionY=60, resolutionZ=60) # 3 进行编码,给出编码的index,打乱分数据集,然后存储不同的数据集 all_index = list(range(coord.shape[0])) np.random.shuffle(all_index) print(all_index) sample_num = len(all_index) train_test_ratio = 0.7 train_index = all_index[:int(sample_num * train_test_ratio)] test_index = all_index[int(sample_num * train_test_ratio):] a.grid_encode(train_index, "trainX.npy") a.grid_encode(test_index, "testX.npy")
def make_dataset(vasp_dir, atom_case, save=False, max_dataset_number=-1): a = VASP_DataExtract(vasp_dir=vasp_dir) vasp_dir_name = vasp_dir.split("/")[-1].split('\\')[-1] c = a.get_output_as_atom3Dspace() e = a.get_energy_info() coord, energy, _atom_case = c.generate_data() for i in _atom_case: if i not in atom_case: raise ValueError( "Input Atom Case Didn't Contain %s, make sure all atom cases are in atom_case" % _atom_case) atom_case = atom_case # 1 得到坐标的atom case的信息,输入Vasp dir # print(coord) a = AtomGrid(coord, atom_case) # 2 得到边界信息,然后根据边界人为确定格子大小和边界 a.get_grid_border() a.make_grid(minX=-6, maxX=15, minY=-6, maxY=15, minZ=-6, maxZ=15, resolutionX=60, resolutionY=60, resolutionZ=60) # 3 进行编码,给出编码的index,打乱分数据集,然后存储不同的数据集 all_index = list(range(coord.shape[0]))[:max_dataset_number] np.random.shuffle(all_index) print(all_index) sample_num = len(all_index) train_test_ratio = 0.7 trainX = testX = None train_index = all_index[:int(sample_num * train_test_ratio)] test_index = all_index[int(sample_num * train_test_ratio):] if (save != False): a.grid_encode(train_index, "%s_trainX.npy" % vasp_dir_name) a.grid_encode(test_index, "%s_testX.npy" % vasp_dir_name) else: trainX = a.grid_encode(train_index, save=False) testX = a.grid_encode(test_index, save=False) trainY = [] testY = [] for i in train_index: trainY.append(energy[i]) for i in test_index: testY.append(energy[i]) trainY = np.array(trainY) testY = np.array(testY) if save != False: np.save("%s_trainY.npy" % vasp_dir_name, trainY) np.save("%s_testY.npy" % vasp_dir_name, testY) else: return trainX, trainY, testX, testY