def image_pred(self, image, params_dict): chemical_symbols = np.array(image.get_chemical_symbols()) params = [] for element in chemical_symbols: re = params_dict[element]["re"] D = params_dict[element]["D"] sig = params_dict[element]["sig"] params.append(np.array([[re, D, sig]])) params = np.vstack(np.array(params)) natoms = len(image) image_hash = get_hash(image) image_neighbors = self.get_neighbors(self.neighborlist, image_hash) positions = image.positions cell = image.cell energy = 0.0 forces = np.zeros((natoms, 3)) for a1 in range(natoms): re_1 = params[a1][0] D_1 = np.abs(params[a1][1]) sig_1 = params[a1][2] neighbors, offsets = image_neighbors[a1] cells = np.dot(offsets, cell) d = positions[neighbors] + cells - positions[a1] re_n = params[neighbors][:, 0] D_n = params[neighbors][:, 1] sig_n = params[neighbors][:, 2] if self.combo == 'mean': D = np.sqrt(D_1 * D_n) sig = (sig_1 + sig_n) / 2 re = (re_1 + re_n) / 2 elif self.combo == 'yang': D = (2 * D_1 * D_n) / (D_1 + D_n) sig = (sig_1 * sig_n) * (sig_1 + sig_n) / (sig_1**2 + sig_n**2) re = (re_1 * re_n) * (re_1 + re_n) / (re_1**2 + re_n**2) r = np.sqrt((d**2).sum(1)) r_star = r / sig re_star = re / sig C = np.log(2) / (re_star - 1) atom_energy = D * (np.exp(-2 * C * (r_star - re_star)) - 2 * np.exp(-C * (r_star - re_star))) energy += atom_energy.sum() f = ((2 * D * C / sig) * (1 / r) * (np.exp(-2 * C * (r_star - re_star)) - np.exp(-C * (r_star - re_star))))[:, np.newaxis] * d forces[a1] -= f.sum(axis=0) for a2, f2 in zip(neighbors, f): forces[a2] += f2 return energy, forces, natoms
def preprocess_data(self): # TODO cleanup/optimize fingerprint_dataset = [] fprimes_dataset = [] energy_dataset = np.array([]) num_of_atoms = np.array([]) forces_dataset = [] index_hashes = [] self.fp_length = self.fp_length() rearange_forces = {} n = 0 for index, atoms_object in enumerate(self.atom_images): if self.isamp_hash: hash_name = get_amp_hash(atoms_object) else: hash_name = get_hash(atoms_object, self.Gs) index_hashes.append(hash_name) image_fingerprint = self.descriptor.fingerprints[hash_name] n_atoms = float(len(image_fingerprint)) num_of_atoms = np.append(num_of_atoms, n_atoms) fprange = self.fprange atom_order = [] # fingerprint scaling to [-1,1] for i, (atom, afp) in enumerate(image_fingerprint): _afp = copy.copy(afp) fprange_atom = np.array(fprange[atom]) for _ in range(np.shape(_afp)[0]): if (fprange_atom[_][1] - fprange_atom[_][0]) > (10.0 **(-8.0)): _afp[_] = -1 + 2.0 * ( (_afp[_] - fprange_atom[_][0]) / (fprange_atom[_][1] - fprange_atom[_][0])) image_fingerprint[i] = (atom, _afp) atom_order.append(atom) fingerprint_dataset.append(image_fingerprint) image_potential_energy = self.hashed_images[ hash_name].get_potential_energy( apply_constraint=False) / n_atoms energy_dataset = np.append(energy_dataset, image_potential_energy) if self.forcetraining: image_forces = self.hashed_images[hash_name].get_forces( apply_constraint=False) / n_atoms # subtract off delta force contributions if self.delta: delta_forces = self.delta_forces[index] / n_atoms image_forces -= delta_forces if self.store_primes and os.path.isfile("./stored-primes/" + hash_name): pass else: prime_mapping = [] for element in self.elements: indices = [ i for i, x in enumerate(atom_order) if x == element ] prime_mapping += indices new_order = [atom_order[i] for i in prime_mapping] used = set() t = np.array([]) for i, x in enumerate(atom_order): for k, l in enumerate(new_order): if (x == l) and (k not in used): used.add(k) t = np.append(t, k) break rearange_forces[index] = t.astype(int) image_primes = self.descriptor.fingerprintprimes[hash_name] # scaling of fingerprint derivatives to be consistent with # fingerprint scaling. _image_primes = copy.copy(image_primes) for _, key in enumerate(list(image_primes.keys())): base_atom = key[3] fprange_atom = np.array(fprange[base_atom]) fprange_dif = fprange_atom[:, 1] - fprange_atom[:, 0] fprange_dif[fprange_dif < 10.0**(-8.0)] = 2 fprime = np.array(image_primes[key]) fprime = 2 * fprime / fprange_dif _image_primes[key] = fprime image_prime_values = list(_image_primes.values()) image_prime_keys = list(_image_primes.keys()) fp_length = len(image_fingerprint[0][1]) num_atoms = len(image_fingerprint) if self.specific_atoms: ad_atom_index = get_ad_index(atoms_object) total_atoms_num = len(atoms_object) fingerprintprimes = torch.zeros(fp_length * num_atoms, 3 * total_atoms_num) for idx, fp_key in enumerate(image_prime_keys): image_prime = torch.tensor(image_prime_values[idx]) if self.specific_atoms: base_atom = ad_atom_index.index(fp_key[2]) else: base_atom = fp_key[2] wrt_atom = fp_key[0] coord = fp_key[4] fingerprintprimes[base_atom * fp_length:base_atom * fp_length + fp_length, wrt_atom * 3 + coord, ] = image_prime # store primes in a sparse matrix format if self.store_primes: sp_matrix = sparse.coo_matrix(fingerprintprimes) sparse.save_npz( open("./stored-primes/" + hash_name, "wb"), sp_matrix) fprimes_dataset.append(fingerprintprimes) forces_dataset.append(torch.from_numpy(image_forces)) if self.delta: self.delta_energies /= num_of_atoms target_ref_per_atom = energy_dataset[0] delta_ref_per_atom = self.delta_energies[0] relative_targets = energy_dataset - target_ref_per_atom relative_delta = self.delta_energies - delta_ref_per_atom energy_dataset = torch.FloatTensor(relative_targets - relative_delta) scalings = [target_ref_per_atom, delta_ref_per_atom] else: energy_dataset = torch.FloatTensor(energy_dataset) scalings = [0, 0] return ( fingerprint_dataset, energy_dataset, num_of_atoms, fprimes_dataset, forces_dataset, index_hashes, scalings, rearange_forces, )
def preprocess_data(self): #TODO cleanup/optimize fingerprint_dataset = [] fprimes_dataset = [] energy_dataset = np.array([]) num_of_atoms = np.array([]) forces_dataset = [] index_hashes = [] self.fp_length = self.fp_length() rearange_forces = {} n = 0 for index, atoms_object in enumerate(self.atom_images): if self.isamp_hash: hash_name = get_amp_hash(atoms_object) else: hash_name = get_hash(atoms_object, self.Gs) index_hashes.append(hash_name) image_fingerprint = self.descriptor.fingerprints[hash_name] fprange = self.fprange atom_order = [] # fingerprint scaling to [-1,1] for i, (atom, afp) in enumerate(image_fingerprint): _afp = copy.copy(afp) fprange_atom = fprange[atom] for _ in range(np.shape(_afp)[0]): if (fprange_atom[_][1] - fprange_atom[_][0]) > (10.0 ** (-8.0)): _afp[_] = -1 + 2.0 * ( (_afp[_] - fprange_atom[_][0]) / (fprange_atom[_][1] - fprange_atom[_][0]) ) image_fingerprint[i] = (atom, _afp) atom_order.append(atom) image_potential_energy = self.hashed_images[hash_name].get_potential_energy( apply_constraint=False ) # subtract off lj contribution if self.lj: lj_energy = self.lj_energies[index] image_potential_energy -= lj_energy if self.forcetraining: image_forces = self.hashed_images[hash_name].get_forces( apply_constraint=False ) # subtract off lj force contribution if self.lj: lj_forces = np.array(self.lj_forces[index]) image_forces -= lj_forces if self.store_primes and os.path.isfile("./stored-primes/" + hash_name): pass else: prime_mapping = [] for element in self.elements: indices = [i for i, x in enumerate(atom_order) if x == element] prime_mapping += indices new_order = [atom_order[i] for i in prime_mapping] used = set() t = np.array([]) for i, x in enumerate(atom_order): for k, l in enumerate(new_order): if (x == l) and (k not in used): used.add(k) t = np.append(t, k) break rearange_forces[index] = t.astype(int) image_primes = self.descriptor.fingerprintprimes[hash_name] # fingerprint derivative scaling to [0,1] _image_primes = copy.copy(image_primes) for _, key in enumerate(list(image_primes.keys())): base_atom = key[3] fprange_atom = fprange[base_atom] fprime = image_primes[key] for i in range(len(fprime)): if (fprange_atom[i][1] - fprange_atom[i][0]) > ( 10.0 ** (-8.0) ): fprime[i] = 2.0 * ( fprime[i] / (fprange_atom[i][1] - fprange_atom[i][0]) ) _image_primes[key] = fprime image_prime_values = list(_image_primes.values()) image_prime_keys = list(_image_primes.keys()) fp_length = len(image_fingerprint[0][1]) num_atoms = len(image_fingerprint) fingerprintprimes = torch.zeros( fp_length * num_atoms, 3 * num_atoms ) for idx, fp_key in enumerate(image_prime_keys): image_prime = torch.tensor(image_prime_values[idx]) base_atom = fp_key[2] wrt_atom = fp_key[0] coord = fp_key[4] fingerprintprimes[ base_atom * fp_length : base_atom * fp_length + fp_length, wrt_atom * 3 + coord, ] = image_prime # store primes in a sparse matrix format if self.store_primes: sp_matrix = sparse.coo_matrix(fingerprintprimes) sparse.save_npz( open("./stored-primes/" + hash_name, "wb"), sp_matrix ) fprimes_dataset.append(fingerprintprimes) forces_dataset.append(torch.from_numpy(image_forces)) fingerprint_dataset.append(image_fingerprint) energy_dataset = np.append(energy_dataset, image_potential_energy) num_of_atoms = np.append(num_of_atoms, float(len(image_fingerprint))) energy_dataset = torch.FloatTensor(energy_dataset) if self.scaling_scheme == "minmax": scaling_min = torch.min(energy_dataset) scaling_max = torch.max(energy_dataset) scaling_slope = (scaling_max - scaling_min) / 2 scaling_intercept = (scaling_max + scaling_min) / 2 energy_dataset = (energy_dataset - scaling_intercept) / (scaling_slope) if self.forcetraining: for idx, force in enumerate(forces_dataset): forces_dataset[idx] = force / scaling_slope scalings = [scaling_slope, scaling_intercept] elif self.scaling_scheme == "standardize": scaling_mean = torch.mean(energy_dataset) scaling_sd = torch.std(energy_dataset, dim=0) energy_dataset = (energy_dataset - scaling_mean) / scaling_sd if self.forcetraining: for idx, force in enumerate(forces_dataset): forces_dataset[idx] = force / scaling_sd scalings = [scaling_sd, scaling_mean] elif self.scaling_scheme is None: scalings = [1, 0] return ( fingerprint_dataset, energy_dataset, num_of_atoms, fprimes_dataset, forces_dataset, index_hashes, scalings, rearange_forces, )