def generate_mb_envs_pos(positions0, species_1, cutoffs, cell, delt, d1, mask=None): positions = [positions0] noa = len(positions0) positions_2 = deepcopy(positions0) positions_2[0][d1 - 1] = delt positions += [positions_2] positions_3 = deepcopy(positions[0]) positions_3[0][d1 - 1] = -delt positions += [positions_3] test_struc = [] for i in range(3): test_struc += [Structure(cell, species_1, positions[i])] env_0 = [] env_p = [] env_m = [] for i in range(noa): env_0 += [AtomicEnvironment(test_struc[0], i, cutoffs, cutoffs_mask=mask)] env_p += [AtomicEnvironment(test_struc[1], i, cutoffs, cutoffs_mask=mask)] env_m += [AtomicEnvironment(test_struc[2], i, cutoffs, cutoffs_mask=mask)] return [env_0, env_p, env_m]
def GenGrid_serial(self, GP): ''' generate grid data of mean prediction and L^{-1}k* for each triplet implemented in a parallelized style ''' # ------ get 3body kernel info ------ kernel, efk, cutoffs, hyps, hyps_mask = get_3bkernel(GP) # ------ construct grids ------ nop = self.grid_num[0] noa = self.grid_num[2] bond_lengths = np.linspace(self.l_bounds[0], self.u_bounds[0], nop) cos_angles = np.linspace(self.l_bounds[2], self.u_bounds[2], noa) bond_means = np.zeros([nop, nop, noa]) bond_vars = np.zeros([nop, nop, noa, len(GP.alpha)]) env12 = AtomicEnvironment(self.bond_struc, 0, self.cutoffs) if self.update: if 'kv3' in os.listdir(): os.rmdir('kv3') os.mkdir('kv3') size = len(GP.training_data) ds = [1, 2, 3] k_v = np.zeros(3) k12_v_all = np.zeros([len(bond_lengths), len(bond_lengths), len(cos_angles), size*3]) for b1, r1 in enumerate(bond_lengths): for b2, r2 in enumerate(bond_lengths): for a12, cos_angle12 in enumerate(cos_angles): x2 = r2 * cos_angle12 y2 = r2 * np.sqrt(1-cos_angle12**2) r12 = np.linalg.norm(np.array([x2-r1, y2, 0])) env12.bond_array_3 = np.array([[r1, 1, 0, 0], [r2, 0, 0, 0]]) env12.cross_bond_dists = np.array([[0, r12], [r12, 0]]) for isample, sample in enumerate(GP.training_data): for d in ds: k_v[d-1] = kernel(env12, sample, 1, d, hyps, cutoffs) k12_v_all[b1, b2, a12, isample*3:isample*3+3] = k_v for b1, r1 in enumerate(bond_lengths): for b2, r2 in enumerate(bond_lengths): for a12, cos_angle in enumerate(cos_angles): k12_v = k12_v_all[b1, b2, a12, :] bond_means[b1, b2, a12] = np.matmul(k12_v, GP.alpha) if not self.mean_only: bond_vars[b1, b2, a12, :] = solve_triangular(GP.l_mat, k12_v, lower=True) # # ------ save mean and var to file ------- np.save('grid3_mean', bond_means) np.save('grid3_var', bond_vars) return bond_means, bond_vars
def test_env_methods(structure, mask, cutoff, result): if mask is True: mask = generate_mask(cutoff) else: mask = None env_test = AtomicEnvironment(structure, atom=0, cutoffs=cutoff, cutoffs_mask=mask) assert str(env_test) == \ f'Atomic Env. of Type 1 surrounded by {result[0]} atoms' \ ' of Types [1, 2, 3]' the_dict = env_test.as_dict() assert isinstance(the_dict, dict) for key in ['positions', 'cell', 'atom', 'cutoffs', 'species']: assert key in the_dict.keys() remade_env = AtomicEnvironment.from_dict(the_dict) assert isinstance(remade_env, AtomicEnvironment) assert np.array_equal(remade_env.bond_array_2, env_test.bond_array_2) if len(cutoff) > 1: assert np.array_equal(remade_env.bond_array_3, env_test.bond_array_3) if len(cutoff) > 2: assert np.array_equal(remade_env.q_array, env_test.q_array)
def test_backwards_compatibility(structure, mask, cutoff, result): """ This test can be deleted if backwards compatibility is dropped for the sake of code cleanup. (This test executes in about 5 milliseconds). Tests a particular branch of code within the Environment's as_dict() method for older pickled environments without a cutoffs mask. :return: """ if mask is True: mask = generate_mask(cutoff) else: mask = None env_test = deepcopy( AtomicEnvironment(structure, atom=0, cutoffs=cutoff, cutoffs_mask=mask)) pre_test_dict = env_test.as_dict() delattr(env_test, "cutoffs_mask") test_dict = env_test.as_dict() assert pre_test_dict["cutoffs_mask"] == test_dict["cutoffs_mask"] new_env = AtomicEnvironment.from_dict(test_dict) assert isinstance(new_env, AtomicEnvironment) assert str(new_env) == str(env_test)
def update_db(self, struc: Structure, forces: List, custom_range: List[int] = (), energy: float = None): """Given a structure and forces, add local environments from the structure to the training set of the GP. If energy is given, add the entire structure to the training set. Args: struc (Structure): Input structure. Local environments of atoms in this structure will be added to the training set of the GP. forces (np.ndarray): Forces on atoms in the structure. custom_range (List[int]): Indices of atoms whose local environments will be added to the training set of the GP. energy (float): Energy of the structure. """ # By default, use all atoms in the structure noa = len(struc.positions) update_indices = custom_range or list(range(noa)) # If forces are given, update the environment list. if forces is not None: for atom in update_indices: env_curr = \ AtomicEnvironment(struc, atom, self.cutoffs, cutoffs_mask=self.hyps_mask) forces_curr = np.array(forces[atom]) self.training_data.append(env_curr) self.training_labels.append(forces_curr) # create numpy array of training labels self.training_labels_np = np.hstack(self.training_labels) # If an energy is given, update the structure list. if energy is not None: structure_list = [] # Populate with all environments of the struc for atom in range(noa): env_curr = \ AtomicEnvironment(struc, atom, self.cutoffs, cutoffs_mask=self.hyps_mask) structure_list.append(env_curr) self.energy_labels.append(energy) self.training_structures.append(structure_list) self.energy_labels_np = np.array(self.energy_labels) # update list of all labels self.all_labels = np.concatenate((self.training_labels_np, self.energy_labels_np)) self.sync_data()
def test_auto_sweep(): """Test that the number of neighbors inside the local environment is correctly computed.""" # Make an arbitrary non-cubic structure. cell = np.array([[1.3, 0.5, 0.8], [-1.2, 1, 0.73], [-0.8, 0.1, 0.9]]) positions = np.array([[1.2, 0.7, 2.3], [3.1, 2.5, 8.9], [-1.8, -5.8, 3.0], [0.2, 1.1, 2.1], [3.2, 1.1, 3.3]]) species = np.array([1, 2, 3, 4, 5]) arbitrary_structure = Structure(cell, species, positions) # Construct an environment. cutoffs = np.array([4., 3.]) arbitrary_environment = \ AtomicEnvironment(arbitrary_structure, 0, cutoffs) # Count the neighbors. n_neighbors_1 = len(arbitrary_environment.etypes) # Reduce the sweep value, and check that neighbors are missing. sweep_val = arbitrary_environment.sweep_val arbitrary_environment.sweep_array = \ np.arange(-sweep_val + 1, sweep_val, 1) arbitrary_environment.compute_env() n_neighbors_2 = len(arbitrary_environment.etypes) assert (n_neighbors_1 > n_neighbors_2) # Increase the sweep value, and check that the count is the same. arbitrary_environment.sweep_array = \ np.arange(-sweep_val - 1, sweep_val + 2, 1) arbitrary_environment.compute_env() n_neighbors_3 = len(arbitrary_environment.etypes) assert (n_neighbors_1 == n_neighbors_3)
def adjust_cutoffs(self, new_cutoffs: Union[list, tuple, 'np.ndarray'], reset_L_alpha=True, train=True): """ Loop through atomic environment objects stored in the training data, and re-compute cutoffs for each. Useful if you want to gauge the impact of cutoffs given a certain training set! Unless you know *exactly* what you are doing for some development or test purpose, it is **highly** suggested that you call set_L_alpha and re-optimize your hyperparameters afterwards as is default here. :param new_cutoffs: :return: """ old_structures = [env.structure for env in self.training_data] old_atoms = [env.atom for env in self.training_data] new_environments = [AtomicEnvironment(struc, atom, new_cutoffs) for struc, atom in zip(old_structures, old_atoms)] self.training_data = new_environments # Ensure that training data and labels are still consistent _global_training_data[self.name] = self.training_data _global_training_labels[self.name] = self.training_labels_np self.cutoffs = np.array(new_cutoffs) if reset_L_alpha: del self.l_mat del self.ky_mat self.set_L_alpha() if train: self.train()
def predict_on_atom_en( param: Tuple[Structure, int, GaussianProcess] ) -> ('np.ndarray', 'np.ndarray', float): """ Return the forces/std. dev. uncertainty / energy associated with an individual atom in a structure, without necessarily having cast it to a chemical environment. In order to work with other functions, all arguments are passed in as a tuple. :param param: tuple of FLARE Structure, atom index, and Gaussian Process object :type param: Tuple(Structure, integer, GaussianProcess) :return: 3-element force array, associated uncertainties, and local energy :rtype: (np.ndarray, np.ndarray, float) """ # Unpack the input tuple, convert a chemical environment structure, atom, gp = param # Obtain the associated chemical environment chemenv = AtomicEnvironment(structure, atom, gp.cutoffs) comps = [] stds = [] # predict force components and standard deviations for i in range(3): force, var = gp.predict(chemenv, i + 1) comps.append(float(force)) stds.append(np.sqrt(np.abs(var))) # predict local energy local_energy = gp.predict_local_energy(chemenv) return np.array(comps), np.array(stds), local_energy
def update_db(self, struc: Structure, forces: list, custom_range: List[int] = ()): """Given structure and forces, add to training set. :param struc: structure to add to db :type struc: Structure :param forces: list of corresponding forces to add to db :type forces: list<float> :param custom_range: Indices to use in lieu of the whole structure :type custom_range: List[int] """ # By default, use all atoms in the structure noa = len(struc.positions) update_indices = custom_range or list(range(noa)) for atom in update_indices: env_curr = AtomicEnvironment(struc, atom, self.cutoffs) forces_curr = np.array(forces[atom]) self.training_data.append(env_curr) self.training_labels.append(forces_curr) # create numpy array of training labels self.training_labels_np = self.force_list_to_np(self.training_labels)
def methanol_gp(): the_gp = GaussianProcess( kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03, ]), cutoffs=np.array([5, 3]), hyp_labels=["l2", "s2", "l3", "s3", "n0"], maxiter=1, opt_algorithm="L-BFGS-B", ) with open(path.join(TEST_FILE_DIR, "methanol_envs.json"), "r") as f: dicts = [loads(s) for s in f.readlines()] for cur_dict in dicts: force = cur_dict["forces"] env = AtomicEnvironment.from_dict(cur_dict) the_gp.add_one_env(env, force) the_gp.set_L_alpha() return the_gp
def update_db(self, struc: Structure, forces: List, custom_range: List[int] = ()): """Given a structure and forces, add local environments from the structure to the training set of the GP. Args: struc (Structure): Input structure. Local environments of atoms in this structure will be added to the training set of the GP. forces (np.ndarray): Forces on atoms in the structure. custom_range (List[int]): Indices of atoms whose local environments will be added to the training set of the GP. """ # By default, use all atoms in the structure noa = len(struc.positions) update_indices = custom_range or list(range(noa)) for atom in update_indices: env_curr = AtomicEnvironment(struc, atom, self.cutoffs) forces_curr = np.array(forces[atom]) self.training_data.append(env_curr) self.training_labels.append(forces_curr) # create numpy array of training labels self.training_labels_np = np.hstack(self.training_labels) _global_training_data[self.name] = self.training_data _global_training_labels[self.name] = self.training_labels_np
def from_dict(dictionary): """Create GP object from dictionary representation.""" multihyps = dictionary.get('multihyps', False) new_gp = GaussianProcess(kernel_name=dictionary['kernel_name'], cutoffs=np.array(dictionary['cutoffs']), hyps=np.array(dictionary['hyps']), hyp_labels=dictionary['hyp_labels'], parallel=dictionary.get('parallel', False) or dictionary.get('par', False), per_atom_par=dictionary.get('per_atom_par', True), n_cpus=dictionary.get( 'n_cpus') or dictionary.get('no_cpus'), maxiter=dictionary['maxiter'], opt_algorithm=dictionary.get( 'opt_algorithm', 'L-BFGS-B'), multihyps=multihyps, hyps_mask=dictionary.get('hyps_mask', None), name=dictionary.get('name', 'default_gp') ) # Save time by attempting to load in computed attributes new_gp.training_data = [AtomicEnvironment.from_dict(env) for env in dictionary['training_data']] new_gp.training_labels = deepcopy(dictionary['training_labels']) new_gp.training_labels_np = deepcopy(dictionary['training_labels_np']) new_gp.likelihood = dictionary['likelihood'] new_gp.likelihood_gradient = dictionary['likelihood_gradient'] new_gp.training_labels_np = np.hstack(new_gp.training_labels) _global_training_data[new_gp.name] = new_gp.training_data _global_training_labels[new_gp.name] = new_gp.training_labels_np # Save time by attempting to load in computed attributes if len(new_gp.training_data) > 5000: try: new_gp.ky_mat = np.load(dictionary['ky_mat_file']) new_gp.compute_matrices() except: new_gp.ky_mat = None new_gp.l_mat = None new_gp.alpha = None new_gp.ky_mat_inv = None filename = dictionary['ky_mat_file'] Warning("the covariance matrices are not loaded" \ f"because {filename} cannot be found") else: new_gp.ky_mat_inv = np.array(dictionary['ky_mat_inv']) \ if dictionary.get('ky_mat_inv') is not None else None new_gp.ky_mat = np.array(dictionary['ky_mat']) \ if dictionary.get('ky_mat') is not None else None new_gp.l_mat = np.array(dictionary['l_mat']) \ if dictionary.get('l_mat') is not None else None new_gp.alpha = np.array(dictionary['alpha']) \ if dictionary.get('alpha') is not None else None return new_gp
def predict_on_structure(structure: Structure, gp: GaussianProcess, n_cpus: int = None) -> ('np.ndarray', 'np.ndarray'): """ Return the forces/std. dev. uncertainty associated with each individual atom in a structure. Forces are stored directly to the structure and are also returned. :param structure: FLARE structure to obtain forces for, with N atoms :param gp: Gaussian Process model :return: N x 3 numpy array of foces, Nx3 numpy array of uncertainties :rtype: (np.ndarray, np.ndarray) """ # Loop through individual atoms, cast to atomic environments, # make predictions for n in range(structure.nat): chemenv = AtomicEnvironment(structure, n, gp.cutoffs) for i in range(3): force, var = gp.predict(chemenv, i + 1) structure.forces[n][i] = float(force) structure.stds[n][i] = np.sqrt(np.abs(var)) forces = np.array(structure.forces) stds = np.array(structure.stds) return forces, stds
def predict_on_atom_en( param: Tuple[Structure, int, GaussianProcess] ) -> ("np.ndarray", "np.ndarray", float): """ Return the forces/std. dev. uncertainty / energy associated with an individual atom in a structure, without necessarily having cast it to a chemical environment. In order to work with other functions, all arguments are passed in as a tuple. :param param: tuple of FLARE Structure, atom index, and Gaussian Process object :type param: Tuple(Structure, integer, GaussianProcess) :return: 3-element force array, associated uncertainties, and local energy :rtype: (np.ndarray, np.ndarray, float) """ # Unpack the input tuple, convert a chemical environment structure, atom, gp = param # Obtain the associated chemical environment chemenv = AtomicEnvironment(structure, atom, gp.cutoffs, cutoffs_mask=gp.hyps_mask) # Predict forces / std. dev / energy force, var = gp.predict_force_xyz(chemenv) std = np.sqrt(np.abs(var)) local_energy = gp.predict_local_energy(chemenv) return force, std, local_energy
def calculate_mgp_serial(self, atoms): nat = len(atoms) struc_curr = Structure(np.array(atoms.cell), atoms.get_atomic_numbers(), atoms.positions) forces = np.zeros((nat, 3)) stress = np.zeros((nat, 6)) stds = np.zeros((nat, 3)) for n in range(nat): chemenv = AtomicEnvironment(struc_curr, n, self.mgp_model.cutoffs) f, v, vir = self.mgp_model.predict(chemenv, mean_only=False) forces[n] = f stress[n] = vir stds[n] = np.sqrt(np.absolute(v)) self.results['forces'] = forces self.results['stds'] = stds self.results['stresses'] = stress self.results['stress'] = np.sum(stress, axis=0) # TODO: implement energy mapping self.results['local_energies'] = np.zeros(forces.shape) self.results['energy'] = 0 atoms.get_uncertainties = self.get_uncertainties return forces
def predict_on_structure_en( structure: Structure, gp: GaussianProcess, n_cpus: int = None) -> ('np.ndarray', 'np.ndarray', 'np.ndarray'): """ Return the forces/std. dev. uncertainty / local energy associated with each individual atom in a structure. Forces are stored directly to the structure and are also returned. :param structure: FLARE structure to obtain forces for, with N atoms :param gp: Gaussian Process model :param n_cpus: Dummy parameter passed as an argument to allow for flexibility when the callable may or may not be parallelized :return: N x 3 array of forces, N x 3 array of uncertainties, N-length array of energies :rtype: (np.ndarray, np.ndarray, np.ndarray) """ # Set up local energy array local_energies = np.array([0 for _ in range(structure.nat)]) # Loop through atoms in structure and predict forces, uncertainties, # and energies for n in range(structure.nat): chemenv = AtomicEnvironment(structure, n, gp.cutoffs) for i in range(3): force, var = gp.predict(chemenv, i + 1) structure.forces[n][i] = float(force) structure.stds[n][i] = np.sqrt(np.abs(var)) local_energies[n] = gp.predict_local_energy(chemenv) forces = np.array(structure.forces) stds = np.array(structure.stds) return forces, stds, local_energies
def predict_on_atom_efs(param): """Predict the local energy, forces, and partial stresses and predictive variances of a chemical environment.""" structure, atom, gp = param chemenv = AtomicEnvironment(structure, atom, gp.cutoffs) return gp.predict_efs(chemenv)
def predict_on_structure_en(self): for n in range(self.structure.nat): chemenv = AtomicEnvironment(self.structure, n, self.gp.cutoffs) for i in range(3): force, var = self.gp.predict(chemenv, i + 1) self.structure.forces[n][i] = float(force) self.structure.stds[n][i] = np.sqrt(np.absolute(var)) self.local_energies[n] = self.gp.predict_local_energy(chemenv)
def predict_on_structure_en( structure: Structure, gp: GaussianProcess, n_cpus: int = None, write_to_structure: bool = True, selective_atoms: List[int] = None, skipped_atom_value=0) -> ('np.ndarray', 'np.ndarray', 'np.ndarray'): """ Return the forces/std. dev. uncertainty / local energy associated with each individual atom in a structure. Forces are stored directly to the structure and are also returned. :param structure: FLARE structure to obtain forces for, with N atoms :param gp: Gaussian Process model :param n_cpus: Dummy parameter passed as an argument to allow for flexibility when the callable may or may not be parallelized :return: N x 3 array of forces, N x 3 array of uncertainties, N-length array of energies :rtype: (np.ndarray, np.ndarray, np.ndarray) """ # Set up local energy array forces = np.zeros((structure.nat, 3)) stds = np.zeros((structure.nat, 3)) local_energies = np.zeros(structure.nat) forces = np.zeros(shape=(structure.nat, 3)) stds = np.zeros(shape=(structure.nat, 3)) if selective_atoms: forces.fill(skipped_atom_value) stds.fill(skipped_atom_value) local_energies.fill(skipped_atom_value) else: selective_atoms = [] # Loop through atoms in structure and predict forces, uncertainties, # and energies for n in range(structure.nat): if selective_atoms and n not in selective_atoms: continue chemenv = AtomicEnvironment(structure, n, gp.cutoffs, cutoffs_mask=gp.hyps_mask) for i in range(3): force, var = gp.predict(chemenv, i + 1) forces[n][i] = float(force) stds[n][i] = np.sqrt(np.abs(var)) if write_to_structure and structure.forces is not None: structure.forces[n][i] = float(force) structure.stds[n][i] = np.sqrt(np.abs(var)) local_energies[n] = gp.predict_local_energy(chemenv) return forces, stds, local_energies
def another_env(cutoffs, delt): cell = 10.0 * np.eye(3) # atomic structure 1 pos_1 = np.vstack([[0, 0, 0], 0.1*random([3, 3])]) pos_1[1, 1] += 1 pos_1[2, 0] += 1 pos_1[3, :2] += 1 pos_2 = deepcopy(pos_1) pos_2[0][0] = delt pos_3 = deepcopy(pos_1) pos_3[0][0] = -delt species_1 = [1, 1, 1, 1] test_structure_1 = Structure(cell, species_1, pos_1) test_structure_2 = Structure(cell, species_1, pos_2) test_structure_3 = Structure(cell, species_1, pos_3) # atom 0, original position env1_1_0 = AtomicEnvironment(test_structure_1, 0, cutoffs) # atom 0, 0 perturbe along x env1_2_0 = AtomicEnvironment(test_structure_2, 0, cutoffs) # atom 1, 0 perturbe along x env1_2_1 = AtomicEnvironment(test_structure_2, 1, cutoffs) # atom 2, 0 perturbe along x env1_2_2 = AtomicEnvironment(test_structure_2, 2, cutoffs) # atom 0, 0 perturbe along -x env1_3_0 = AtomicEnvironment(test_structure_3, 0, cutoffs) # atom 1, 0 perturbe along -x env1_3_1 = AtomicEnvironment(test_structure_3, 1, cutoffs) # atom 2, 0 perturbe along -x env1_3_2 = AtomicEnvironment(test_structure_3, 2, cutoffs) # create env 2 pos_1 = np.vstack([[0, 0, 0], 0.1*random([3, 3])]) pos_1[1, 1] += 1 pos_1[2, 0] += 1 pos_1[3, :2] += 1 pos_2 = deepcopy(pos_1) pos_2[0][0] = delt pos_3 = deepcopy(pos_1) pos_3[0][0] = -delt species_2 = [1, 2, 2, 1] test_structure_1 = Structure(cell, species_2, pos_1) env2_1_0 = AtomicEnvironment(test_structure_1, 0, cutoffs) return env1_1_0, env1_2_0, env1_3_0, \ env1_2_1, env1_3_1, env1_2_2, env1_3_2, env2_1_0
def test_pred_on_elements(): the_gp = GaussianProcess(kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03 ]), cutoffs=np.array([7, 3]), hyp_labels=['l2', 's2', 'l3', 's3', 'n0'], maxiter=1, opt_algorithm='L-BFGS-B') with open('./test_files/methanol_frames.json', 'r') as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] with open('./test_files/methanol_envs.json', 'r') as f: data_dicts = [loads(s) for s in f.readlines()[:6]] envs = [AtomicEnvironment.from_dict(d) for d in data_dicts] forces = [np.array(d['forces']) for d in data_dicts] seeds = list(zip(envs, forces)) all_frames = deepcopy(frames) tt = TrajectoryTrainer(frames, gp=the_gp, shuffle_frames=False, rel_std_tolerance=0, abs_std_tolerance=0, abs_force_tolerance=.001, skip=5, min_atoms_per_train=100, pre_train_seed_envs=seeds, pre_train_seed_frames=[frames[-1]], max_atoms_from_frame=4, output_name='meth_test', model_format='json', atom_checkpoint_interval=50, pre_train_atoms_per_element={'H': 1}, predict_atoms_per_element={ 'H': 0, 'C': 1, 'O': 0 }) # Set to predict only on Carbon after training on H to ensure errors are # high and that they get added to the gp tt.run() # Ensure forces weren't written directly to structure for i in range(len(all_frames)): assert np.array_equal(all_frames[i].forces, frames[i].forces) # Assert that Carbon atoms were correctly added assert the_gp.training_statistics['envs_by_species']['C'] > 2 for f in glob(f"meth_test*"): remove(f) for f in glob(f"gp_from_aimd*"): remove(f)
def predict_on_structure(structure: Structure, gp: GaussianProcess): for n in range(structure.nat): chemenv = AtomicEnvironment(structure, n, gp.cutoffs) for i in range(3): force, var = gp.predict(chemenv, i + 1) structure.forces[n][i] = float(force) structure.stds[n][i] = np.sqrt(np.abs(var)) forces = np.array(structure.forces) stds = np.array(structure.stds) return forces, stds
def predict_on_structure( structure: Structure, gp: GaussianProcess, n_cpus: int = None, write_to_structure: bool = True, selective_atoms: List[int] = None, skipped_atom_value=0, ) -> ("np.ndarray", "np.ndarray"): """ Return the forces/std. dev. uncertainty associated with each individual atom in a structure. Forces are stored directly to the structure and are also returned. :param structure: FLARE structure to obtain forces for, with N atoms :param gp: Gaussian Process model :param write_to_structure: Write results to structure's forces, std attributes :param selective_atoms: Only predict on these atoms; e.g. [0,1,2] will only predict and return for those atoms :param skipped_atom_value: What value to use for atoms that are skipped. Defaults to 0 but other options could be e.g. NaN. Will NOT write this to the structure if write_to_structure is True. :return: N x 3 numpy array of foces, Nx3 numpy array of uncertainties :rtype: (np.ndarray, np.ndarray) """ forces = np.zeros((structure.nat, 3)) stds = np.zeros((structure.nat, 3)) if selective_atoms: forces.fill(skipped_atom_value) stds.fill(skipped_atom_value) else: selective_atoms = [] for n in range(structure.nat): # Skip the atoms which we aren't predicting on if # selective atoms is on. if n not in selective_atoms and selective_atoms: continue chemenv = AtomicEnvironment(structure, n, gp.cutoffs, cutoffs_mask=gp.hyps_mask) force, var = gp.predict_force_xyz(chemenv) std = np.sqrt(np.abs(var)) forces[n] = force stds[n] = std if write_to_structure: structure.forces[n] = force structure.stds[n] = std return forces, stds
def test_seed_and_run(): the_gp = GaussianProcess( kernel_name="2+3_mc", hyps=np.array([ 3.75996759e-06, 1.53990678e-02, 2.50624782e-05, 5.07884426e-01, 1.70172923e-03, ]), cutoffs=np.array([5, 3]), hyp_labels=["l2", "s2", "l3", "s3", "n0"], maxiter=1, opt_algorithm="L-BFGS-B", ) with open(path.join(TEST_FILE_DIR, "methanol_frames.json"), "r") as f: frames = [Structure.from_dict(loads(s)) for s in f.readlines()] with open(path.join(TEST_FILE_DIR, "methanol_envs.json"), "r") as f: data_dicts = [loads(s) for s in f.readlines()[:6]] envs = [AtomicEnvironment.from_dict(d) for d in data_dicts] forces = [np.array(d["forces"]) for d in data_dicts] seeds = list(zip(envs, forces)) tt = TrajectoryTrainer( frames, gp=the_gp, shuffle_frames=True, rel_std_tolerance=0, abs_std_tolerance=0, skip=10, pre_train_seed_envs=seeds, pre_train_seed_frames=[frames[-1]], max_atoms_from_frame=4, output_name="meth_test", model_format="pickle", train_checkpoint_interval=1, pre_train_atoms_per_element={"H": 1}, ) tt.run() with open("meth_test_model.pickle", "rb") as f: new_gp = pickle.load(f) test_env = envs[0] for d in [1, 2, 3]: assert np.all( the_gp.predict(x_t=test_env, d=d) == new_gp.predict(x_t=test_env, d=d)) for f in glob(f"meth_test*"): remove(f)
def predict_on_atom_en_std(param): """Predict local energy and predictive std of a chemical environment.""" structure, atom, gp = param chemenv = AtomicEnvironment(structure, atom, gp.cutoffs, cutoffs_mask=gp.hyps_mask) # predict local energy loc_en, loc_en_var = gp.predict_local_energy_and_var(chemenv) loc_en_std = np.sqrt(np.abs(loc_en_var)) return loc_en, loc_en_std
def predict_on_atom_mgp(atom, structure, cutoffs, mgp): chemenv = AtomicEnvironment(structure, atom, cutoffs) # predict force components and standard deviations force, var = mgp.predict(chemenv) comps = force stds = np.sqrt(np.absolute(var)) # predict local energy # local_energy = self.gp.predict_local_energy(chemenv) local_energy = 0 return comps, stds, local_energy
def get_forces(self, atoms): nat = len(atoms) struc_curr = struc.Structure(atoms.cell, ['A'] * nat, atoms.positions) forces = np.zeros((nat, 3)) for n in range(nat): chemenv = AtomicEnvironment(struc_curr, n, self.mff_model.GP.cutoffs) force, _ = self.mff_model.predict(chemenv, mean_only=True) return forces
def test_env_methods(cutoff): cell = np.eye(3) species = [1, 2, 3] positions = np.array([[0, 0, 0], [0.5, 0.5, 0.5], [0.1, 0.1, 0.1]]) struc_test = Structure(cell, species, positions) env_test = AtomicEnvironment(struc_test, 0, np.array([1, 1])) assert str(env_test) == 'Atomic Env. of Type 1 surrounded by 12 atoms' \ ' of Types [2, 3]' the_dict = env_test.as_dict() assert isinstance(the_dict, dict) for key in ['positions', 'cell', 'atom', 'cutoffs', 'species']: assert key in the_dict.keys() remade_env = AtomicEnvironment.from_dict(the_dict) assert isinstance(remade_env, AtomicEnvironment) assert np.array_equal(remade_env.bond_array_2, env_test.bond_array_2) assert np.array_equal(remade_env.bond_array_3, env_test.bond_array_3) assert np.array_equal(remade_env.bond_array_mb, env_test.bond_array_mb)
def test_species_count(cutoff): cell = np.eye(3) species = [1, 2, 3] positions = np.array([[0, 0, 0], [0.5, 0.5, 0.5], [0.1, 0.1, 0.1]]) struc_test = Structure(cell, species, positions) env_test = AtomicEnvironment(structure=struc_test, atom=0, cutoffs=np.array([1, 1])) assert (len(struc_test.positions) == len(struc_test.coded_species)) assert (len(env_test.bond_array_2) == len(env_test.etypes)) assert (isinstance(env_test.etypes[0], np.int8))
def predict_on_structure_mgp(self): # changed """ Assign forces to self.structure based on self.gp """ output.write_to_output('\npredict with mapping:\n', self.output_name) for n in range(self.structure.nat): chemenv = AtomicEnvironment(self.structure, n, self.gp.cutoffs) force, var = self.mgp.predict(chemenv) self.structure.forces[n][:] = force self.structure.stds[n][:] = np.sqrt(np.absolute(var)) self.structure.dft_forces = False