def evaluate(self, test_structures, ref_energies, ref_forces, ref_stresses): """ Evaluate energies, forces and stresses of structures with trained interatomic potentials. Args: test_structures ([Structure]): List of Pymatgen Structure Objects. ref_energies ([float]): List of DFT-calculated total energies of each structure in structures list. ref_forces ([np.array]): List of DFT-calculated (m, 3) forces of each structure with m atoms in structures list. m can be varied with each single structure case. ref_stresses (list): List of DFT-calculated (6, ) viriral stresses of each structure in structures list. """ predict_pool = pool_from(test_structures, ref_energies, ref_forces, ref_stresses) _, df_orig = convert_docs(predict_pool) _, df_predict = convert_docs(pool_from(test_structures)) outputs = self.model.predict(inputs=test_structures, override=True) df_predict['y_orig'] = df_predict['n'] * outputs return df_orig, df_predict
def train(self, train_structures, energies, forces, stresses=None, **kwargs): """ Training data with model. Args: train_structures ([Structure]): The list of Pymatgen Structure object. energies ([float]): The list of total energies of each structure in structures list. energies ([float]): List of total energies of each structure in structures list. forces ([np.array]): List of (m, 3) forces array of each structure with m atoms in structures list. m can be varied with each single structure case. stresses (list): List of (6, ) virial stresses of each structure in structures list. """ train_pool = pool_from(train_structures, energies, forces, stresses) _, df = convert_docs(train_pool) ytrain = df['y_orig'] / df['n'] self.model.fit(inputs=train_structures, outputs=ytrain, **kwargs) self.specie = Element(train_structures[0].symbol_set[0])
def evaluate(self, test_structures, ref_energies=None, ref_forces=None, ref_stresses=None, predict_energies=True, predict_forces=True, predict_stress=False): """ Evaluate energies, forces and stresses of structures with trained interatomic potentials. Args: test_structures ([Structure]): List of Pymatgen Structure Objects. ref_energies ([float]): List of DFT-calculated total energies of each structure in structures list. ref_forces ([np.array]): List of DFT-calculated (m, 3) forces of each structure with m atoms in structures list. m can be varied with each single structure case. ref_stresses (list): List of DFT-calculated (6, ) viriral stresses of each structure in structures list. predict_energies (bool): Whether to predict energies of configurations. predict_forces (bool): Whether to predict forces of configurations. predict_stress (bool): Whether to predict virial stress of configurations. """ if not which('quip'): raise RuntimeError( "quip has not been found.\n", "Please refer to https://github.com/libAtoms/QUIP for ", "further detail.") xml_file = 'predict.xml' original_file = 'original.xyz' predict_file = 'predict.xyz' predict_pool = pool_from(test_structures, ref_energies, ref_forces, ref_stresses) with ScratchDir('.'): _ = self.write_param(xml_file) original_file = self.write_cfgs(original_file, cfg_pool=predict_pool) _, df_orig = self.read_cfgs(original_file) exe_command = ["quip"] exe_command.append("atoms_filename={}".format(original_file)) exe_command.append("param_filename={}".format(xml_file)) if predict_energies: exe_command.append("energy=T") if predict_forces: exe_command.append("forces=T") if predict_stress: exe_command.append("virial=T") p = subprocess.Popen(exe_command, stdout=open(predict_file, 'w')) stdout = p.communicate()[0] rc = p.returncode _, df_predict = self.read_cfgs(predict_file, predict=True) return df_orig, df_predict
def test_pool_from(self): test_pool = pool_from(self.test_structures, self.test_energies, self.test_forces, self.test_stresses) for p1, p2 in zip(test_pool, self.test_pool): self.assertEqual(p1['outputs']['energy'], p2['outputs']['energy']) self.assertEqual(p1['outputs']['forces'], p2['outputs']['forces']) self.assertEqual(p1['outputs']['virial_stress'], p2['outputs']['virial_stress'])
def evaluate(self, test_structures, ref_energies, ref_forces, ref_stresses): """ Evaluate energies, forces and stresses of structures with trained interatomic potentials. Args: test_structures ([Structure]): List of Pymatgen Structure Objects. ref_energies ([float]): List of DFT-calculated total energies of each structure in structures list. ref_forces ([np.array]): List of DFT-calculated (m, 3) forces of each structure with m atoms in structures list. m can be varied with each single structure case. ref_stresses (list): List of DFT-calculated (6, ) viriral stresses of each structure in structures list. """ if not which('nnp-predict'): raise RuntimeError("NNP Predictor has not been found.") original_file = 'input.data' predict_file = 'output.data' predict_pool = pool_from(test_structures, ref_energies, ref_forces, ref_stresses) with ScratchDir('.'): _, _ = self.write_param() original_file = self.write_cfgs(original_file, cfg_pool=predict_pool) _, df_orig = self.read_cfgs(original_file) input_filename = self.write_input() dfs = [] for data in predict_pool: _ = self.write_cfgs(original_file, cfg_pool=[data]) p = subprocess.Popen(['nnp-predict', input_filename], stdout=subprocess.PIPE) stdout = p.communicate()[0] rc = p.returncode if rc != 0: error_msg = 'RuNNer exited with return code %d' % rc msg = stdout.decode("utf-8").split('\n')[:-1] try: error_line = [i for i, m in enumerate(msg) if m.startswith('ERROR')][0] error_msg += ', '.join([e for e in msg[error_line:]]) except: error_msg += msg[-1] raise RuntimeError(error_msg) _, df = self.read_cfgs(predict_file) dfs.append(df) df_predict = pd.concat(dfs, ignore_index=True) return df_orig, df_predict
def train(self, train_structures, energies=None, forces=None, stresses=None, **kwargs): """ Training data with moment tensor method. Args: train_structures ([Structure]): The list of Pymatgen Structure object. energies ([float]): The list of total energies of each structure in structures list. energies ([float]): List of total energies of each structure in structures list. forces ([np.array]): List of (m, 3) forces array of each structure with m atoms in structures list. m can be varied with each single structure case. stresses (list): List of (6, ) virial stresses of each structure in structures list. kwargs: Parameters in write_input method. """ if not which('nnp-train'): raise RuntimeError("NNP Trainer has not been found.") train_pool = pool_from(train_structures, energies, forces, stresses) atoms_filename = 'input.data' with ScratchDir('.'): atoms_filename = self.write_cfgs(filename=atoms_filename, cfg_pool=train_pool) output = 'training_output' input_filename = self.write_input(**kwargs) p_scaling = subprocess.Popen(['nnp-scaling', input_filename]) stdout = p_scaling.communicate()[0] p_train = subprocess.Popen(['nnp-train', input_filename], stdout=open(output, 'w')) stdout = p_train.communicate()[0] rc = p_train.returncode if rc != 0: error_msg = 'RuNNer exited with return code %d' % rc msg = stdout.decode("utf-8").split('\n')[:-1] try: error_line = [i for i, m in enumerate(msg) if m.startswith('ERROR')][0] error_msg += ', '.join([e for e in msg[error_line:]]) except: error_msg += msg[-1] raise RuntimeError(error_msg) with zopen(output) as f: error_lines = f.read() energy_rmse_pattern = re.compile('ENERGY\s*\S*\s*(\S*)\s*(\S*).*?\n') forces_rmse_pattern = re.compile('FORCES\s*\S*\s*(\S*)\s*(\S*).*?\n') self.train_energy_rmse, self.validation_energy_rmse = \ np.array([line for line in energy_rmse_pattern.findall(error_lines)], dtype=np.float).T self.train_forces_rmse, self.validation_forces_rmse = \ np.array([line for line in forces_rmse_pattern.findall(error_lines)], dtype=np.float).T weights_filename_pattern = 'weights*{}.out'.format(self.param.get('epochs')) weights_filename = glob.glob(weights_filename_pattern)[0] self.suffix = weights_filename.split('.')[1] self.load_weights(weights_filename) self.load_scaler('scaling.data') return rc
def evaluate(self, test_structures, ref_energies=None, ref_forces=None, ref_stresses=None, **kwargs): """ Evaluate energies, forces and stresses of structures with trained interatomic potentials. Args: test_structures ([Structure]): List of Pymatgen Structure Objects. ref_energies ([float]): List of DFT-calculated total energies of each structure in structures list. ref_forces ([np.array]): List of DFT-calculated (m, 3) forces of each structure with m atoms in structures list. m can be varied with each single structure case. ref_stresses (list): List of DFT-calculated (6, ) viriral stresses of each structure in structures list. kwargs: Parameters of write_param method. """ if not which('mlp'): raise RuntimeError( "mlp has not been found.\n", "Please refer to http://gitlab.skoltech.ru/shapeev/mlip ", "for further detail.") fitted_mtp = 'fitted.mtp' original_file = 'original.cfgs' predict_file = 'predict.cfgs' predict_pool = pool_from(test_structures, ref_energies, ref_forces, ref_stresses) dataset = predict_pool[0] if isinstance(dataset['structure'], dict): structure = Structure.from_dict(dataset['structure']) else: structure = dataset['structure'] symbol = structure.symbol_set[0] with ScratchDir('.'): self.write_param(fitted_mtp=fitted_mtp, Abinitio=0, Driver=1, Write_cfgs=predict_file, Database_filename=original_file, **kwargs) original_file = self.write_cfg(original_file, cfg_pool=predict_pool) _, df_orig = self.read_cfgs(original_file, symbol=symbol) p = subprocess.Popen([ 'mlp', 'run', 'mlip.ini', '--filename={}'.format(original_file) ], stdout=subprocess.PIPE) stdout = p.communicate()[0] rc = p.returncode if rc != 0: error_msg = 'MLP exited with return code %d' % rc msg = stdout.decode("utf-8").split('\n')[:-1] try: error_line = [ i for i, m in enumerate(msg) if m.startswith('ERROR') ][0] error_msg += ', '.join([e for e in msg[error_line:]]) except Exception: error_msg += msg[-1] raise RuntimeError(error_msg) if not os.path.exists(predict_file): predict_file = '_'.join([predict_file, '0']) _, df_predict = self.read_cfgs(predict_file, symbol=symbol) return df_orig, df_predict
def train(self, train_structures, energies=None, forces=None, stresses=None, unfitted_mtp=None, max_dist=5, radial_basis_size=8, max_iter=500, energy_weight=1, force_weight=1e-2, stress_weight=0): """ Training data with moment tensor method. Args: train_structures ([Structure]): The list of Pymatgen Structure object. energies ([float]): The list of total energies of each structure in structures list. energies ([float]): List of total energies of each structure in structures list. forces ([np.array]): List of (m, 3) forces array of each structure with m atoms in structures list. m can be varied with each single structure case. stresses (list): List of (6, ) virial stresses of each structure in structures list. unfitted_mtp (str): Define the initial mtp file. Default to the mtp file stored in .params directory. max_dist (float): The actual radial cutoff. radial_basis_size (int): Relevant to number of radial basis function. max_iter (int): The number of maximum iteration. energy_weight (float): The weight of energy. force_weight (float): The weight of forces. stress_weight (float): The weight of stresses. """ if not which('mlp'): raise RuntimeError( "mlp has not been found.\n", "Please refer to http://gitlab.skoltech.ru/shapeev/mlip ", "for further detail.") train_pool = pool_from(train_structures, energies, forces, stresses) atoms_filename = 'train.cfgs' with ScratchDir('.'): atoms_filename = self.write_cfg(filename=atoms_filename, cfg_pool=train_pool) if not unfitted_mtp: raise RuntimeError("No specific potentials file provided.") MTP_file_path = os.path.join(module_dir, 'params', unfitted_mtp) shutil.copyfile(MTP_file_path, os.path.join(os.getcwd(), unfitted_mtp)) with open(unfitted_mtp) as f: template = f.read() s = template % (max_dist, radial_basis_size) with open(unfitted_mtp, 'w') as f: f.write(s) save_fitted_mtp = '.'.join([ unfitted_mtp.split('.')[0] + '_fitted', unfitted_mtp.split('.')[1] ]) p = subprocess.Popen([ 'mlp', 'train', unfitted_mtp, atoms_filename, '--max-iter={}'.format(max_iter), '--trained-pot-name={}'.format(save_fitted_mtp), '--curr-pot-name={}'.format(unfitted_mtp), '--energy-weight={}'.format(energy_weight), '--force-weight={}'.format(force_weight), '--stress-weight={}'.format(stress_weight), '--init-params=same', '--auto-min-dist' ], stdout=subprocess.PIPE) stdout = p.communicate()[0] rc = p.returncode if rc != 0: error_msg = 'MLP exited with return code %d' % rc msg = stdout.decode("utf-8").split('\n')[:-1] try: error_line = [ i for i, m in enumerate(msg) if m.startswith('ERROR') ][0] error_msg += ', '.join([e for e in msg[error_line:]]) except Exception: error_msg += msg[-1] raise RuntimeError(error_msg) def load_config(filename): param = OrderedDict() with open(filename, 'r') as f: lines = f.readlines() param['safe'] = [line.rstrip() for line in lines[:-2]] for line in lines[-2:]: key = line.rstrip().split(' = ')[0] value = json.loads(line.rstrip().split(' = ')[1].replace( '{', '[').replace('}', ']')) param[key] = value return param self.param = load_config(save_fitted_mtp) return rc
def train(self, train_structures, energies=None, forces=None, stresses=None, default_sigma=[0.0005, 0.1, 0.05, 0.01], use_energies=True, use_forces=True, use_stress=False, **kwargs): """ Training data with gaussian process regression. Args: train_structures ([Structure]): The list of Pymatgen Structure object. energies ([float]): The list of total energies of each structure in structures list. energies ([float]): List of total energies of each structure in structures list. forces ([np.array]): List of (m, 3) forces array of each structure with m atoms in structures list. m can be varied with each single structure case. stresses (list): List of (6, ) virial stresses of each structure in structures list. default_sigma (list): Error criteria in energies, forces, stress and hessian. Should have 4 numbers. use_energies (bool): Whether to use dft total energies for training. Default to True. use_forces (bool): Whether to use dft atomic forces for training. Default to True. use_stress (bool): Whether to use dft virial stress for training. Default to False. kwargs: l_max (int): Parameter to configure GAP. The band limit of spherical harmonics basis function. Default to 12. n_max (int): Parameter to configure GAP. The number of radial basis function. Default to 10. atom_sigma (float): Parameter to configure GAP. The width of gaussian atomic density. Default to 0.5. zeta (float): Present when covariance function type is do product. Default to 4. cutoff (float): Parameter to configure GAP. The cutoff radius. Default to 4.0. cutoff_transition_width (float): Parameter to configure GAP. The transition width of cutoff radial. Default to 0.5. delta (float): Parameter to configure Sparsification. The signal variance of noise. Default to 1. f0 (float): Parameter to configure Sparsification. The signal mean of noise. Default to 0.0. n_sparse (int): Parameter to configure Sparsification. Number of sparse points. covariance_type (str): Parameter to configure Sparsification. The type of convariance function. Default to dot_product. sparse_method (str): Method to perform clustering in sparsification. Default to 'cur_points'. sparse_jitter (float): Intrisic error of atomic/bond energy, used to regularise the sparse covariance matrix. Default to 1e-8. e0 (float): Atomic energy value to be subtracted from energies before fitting. Default to 0.0. e0_offset (float): Offset of baseline. If zero, the offset is the average atomic energy of the input data or the e0 specified manually. Default to 0.0. """ if not which('gap_fit'): raise RuntimeError( "gap_fit has not been found.\n", "Please refer to https://github.com/libAtoms/QUIP for ", "further detail.") atoms_filename = 'train.xyz' xml_filename = 'train.xml' train_pool = pool_from(train_structures, energies, forces, stresses) exe_command = ["gap_fit"] exe_command.append('at_file={}'.format(atoms_filename)) gap_configure_params = [ 'l_max', 'n_max', 'atom_sigma', 'zeta', 'cutoff', 'cutoff_transition_width', 'delta', 'f0', 'n_sparse', 'covariance_type', 'sparse_method' ] preprocess_params = ['sparse_jitter', 'e0', 'e0_offset'] if len(default_sigma) != 4: raise ValueError( "The default sigma is supposed to have 4 numbers.") gap_command = ['soap'] for param_name in gap_configure_params: param = kwargs.get(param_name) if kwargs.get(param_name) \ else soap_params.get(param_name) gap_command.append(param_name + '=' + '{}'.format(param)) exe_command.append("gap=" + "{" + "{}".format(' '.join(gap_command)) + "}") for param_name in preprocess_params: param = kwargs.get(param_name) if kwargs.get(param_name) \ else soap_params.get(param_name) exe_command.append(param_name + '=' + '{}'.format(param)) default_sigma = [str(f) for f in default_sigma] exe_command.append("default_sigma={%s}" % (' '.join(default_sigma))) if use_energies: exe_command.append('energy_parameter_name=dft_energy') if use_forces: exe_command.append('force_parameter_name=dft_force') if use_stress: exe_command.append('virial_parameter_name=dft_virial') exe_command.append('gp_file={}'.format(xml_filename)) with ScratchDir('.'): self.write_cfgs(filename=atoms_filename, cfg_pool=train_pool) p = subprocess.Popen(exe_command, stdout=subprocess.PIPE) stdout = p.communicate()[0] rc = p.returncode if rc != 0: error_msg = 'QUIP exited with return code %d' % rc msg = stdout.decode("utf-8").split('\n')[:-1] try: error_line = [ i for i, m in enumerate(msg) if m.startswith('ERROR') ][0] error_msg += ', '.join([e for e in msg[error_line:]]) except Exception: error_msg += msg[-1] raise RuntimeError(error_msg) def get_xml(xml_file): tree = ET.parse(xml_file) root = tree.getroot() potential_label = root.tag gpcoordinates = list(root.iter('gpCoordinates'))[0] param_file = gpcoordinates.get('sparseX_filename') param = np.loadtxt(param_file) return tree, param, potential_label tree, param, potential_label = get_xml(xml_filename) self.param['xml'] = tree self.param['param'] = param self.param['potential_label'] = potential_label return rc