def image_pred(self, image, params_dict):
        chemical_symbols = np.array(image.get_chemical_symbols())
        params = []
        for element in chemical_symbols:
            re = params_dict[element]["re"]
            D = params_dict[element]["D"]
            sig = params_dict[element]["sig"]
            params.append(np.array([[re, D, sig]]))
        params = np.vstack(np.array(params))
        natoms = len(image)

        image_hash = get_hash(image)
        image_neighbors = self.get_neighbors(self.neighborlist, image_hash)

        positions = image.positions
        cell = image.cell

        energy = 0.0
        forces = np.zeros((natoms, 3))

        for a1 in range(natoms):
            re_1 = params[a1][0]
            D_1 = np.abs(params[a1][1])
            sig_1 = params[a1][2]
            neighbors, offsets = image_neighbors[a1]
            cells = np.dot(offsets, cell)
            d = positions[neighbors] + cells - positions[a1]
            re_n = params[neighbors][:, 0]
            D_n = params[neighbors][:, 1]
            sig_n = params[neighbors][:, 2]
            if self.combo == 'mean':
                D = np.sqrt(D_1 * D_n)
                sig = (sig_1 + sig_n) / 2
                re = (re_1 + re_n) / 2
            elif self.combo == 'yang':
                D = (2 * D_1 * D_n) / (D_1 + D_n)
                sig = (sig_1 * sig_n) * (sig_1 + sig_n) / (sig_1**2 + sig_n**2)
                re = (re_1 * re_n) * (re_1 + re_n) / (re_1**2 + re_n**2)
            r = np.sqrt((d**2).sum(1))
            r_star = r / sig
            re_star = re / sig
            C = np.log(2) / (re_star - 1)
            atom_energy = D * (np.exp(-2 * C * (r_star - re_star)) -
                               2 * np.exp(-C * (r_star - re_star)))
            energy += atom_energy.sum()
            f = ((2 * D * C / sig) * (1 / r) *
                 (np.exp(-2 * C * (r_star - re_star)) -
                  np.exp(-C * (r_star - re_star))))[:, np.newaxis] * d
            forces[a1] -= f.sum(axis=0)
            for a2, f2 in zip(neighbors, f):
                forces[a2] += f2
        return energy, forces, natoms
    def preprocess_data(self):
        # TODO cleanup/optimize
        fingerprint_dataset = []
        fprimes_dataset = []
        energy_dataset = np.array([])
        num_of_atoms = np.array([])
        forces_dataset = []
        index_hashes = []
        self.fp_length = self.fp_length()
        rearange_forces = {}
        n = 0
        for index, atoms_object in enumerate(self.atom_images):
            if self.isamp_hash:
                hash_name = get_amp_hash(atoms_object)
            else:
                hash_name = get_hash(atoms_object, self.Gs)
            index_hashes.append(hash_name)
            image_fingerprint = self.descriptor.fingerprints[hash_name]
            n_atoms = float(len(image_fingerprint))
            num_of_atoms = np.append(num_of_atoms, n_atoms)
            fprange = self.fprange
            atom_order = []
            # fingerprint scaling to [-1,1]
            for i, (atom, afp) in enumerate(image_fingerprint):
                _afp = copy.copy(afp)
                fprange_atom = np.array(fprange[atom])
                for _ in range(np.shape(_afp)[0]):
                    if (fprange_atom[_][1] - fprange_atom[_][0]) > (10.0
                                                                    **(-8.0)):
                        _afp[_] = -1 + 2.0 * (
                            (_afp[_] - fprange_atom[_][0]) /
                            (fprange_atom[_][1] - fprange_atom[_][0]))
                image_fingerprint[i] = (atom, _afp)
                atom_order.append(atom)
            fingerprint_dataset.append(image_fingerprint)
            image_potential_energy = self.hashed_images[
                hash_name].get_potential_energy(
                    apply_constraint=False) / n_atoms
            energy_dataset = np.append(energy_dataset, image_potential_energy)
            if self.forcetraining:
                image_forces = self.hashed_images[hash_name].get_forces(
                    apply_constraint=False) / n_atoms
                # subtract off delta force contributions
                if self.delta:
                    delta_forces = self.delta_forces[index] / n_atoms
                    image_forces -= delta_forces
                if self.store_primes and os.path.isfile("./stored-primes/" +
                                                        hash_name):
                    pass
                else:
                    prime_mapping = []
                    for element in self.elements:
                        indices = [
                            i for i, x in enumerate(atom_order) if x == element
                        ]
                        prime_mapping += indices
                    new_order = [atom_order[i] for i in prime_mapping]
                    used = set()
                    t = np.array([])
                    for i, x in enumerate(atom_order):
                        for k, l in enumerate(new_order):
                            if (x == l) and (k not in used):
                                used.add(k)
                                t = np.append(t, k)
                                break
                    rearange_forces[index] = t.astype(int)
                    image_primes = self.descriptor.fingerprintprimes[hash_name]
                    # scaling of fingerprint derivatives to be consistent with
                    # fingerprint scaling.
                    _image_primes = copy.copy(image_primes)
                    for _, key in enumerate(list(image_primes.keys())):
                        base_atom = key[3]
                        fprange_atom = np.array(fprange[base_atom])
                        fprange_dif = fprange_atom[:, 1] - fprange_atom[:, 0]
                        fprange_dif[fprange_dif < 10.0**(-8.0)] = 2
                        fprime = np.array(image_primes[key])
                        fprime = 2 * fprime / fprange_dif
                        _image_primes[key] = fprime

                    image_prime_values = list(_image_primes.values())
                    image_prime_keys = list(_image_primes.keys())
                    fp_length = len(image_fingerprint[0][1])
                    num_atoms = len(image_fingerprint)
                    if self.specific_atoms:
                        ad_atom_index = get_ad_index(atoms_object)
                    total_atoms_num = len(atoms_object)
                    fingerprintprimes = torch.zeros(fp_length * num_atoms,
                                                    3 * total_atoms_num)
                    for idx, fp_key in enumerate(image_prime_keys):
                        image_prime = torch.tensor(image_prime_values[idx])
                        if self.specific_atoms:
                            base_atom = ad_atom_index.index(fp_key[2])
                        else:
                            base_atom = fp_key[2]
                        wrt_atom = fp_key[0]
                        coord = fp_key[4]
                        fingerprintprimes[base_atom *
                                          fp_length:base_atom * fp_length +
                                          fp_length,
                                          wrt_atom * 3 + coord, ] = image_prime
                    # store primes in a sparse matrix format
                    if self.store_primes:
                        sp_matrix = sparse.coo_matrix(fingerprintprimes)
                        sparse.save_npz(
                            open("./stored-primes/" + hash_name, "wb"),
                            sp_matrix)
                    fprimes_dataset.append(fingerprintprimes)
                forces_dataset.append(torch.from_numpy(image_forces))
        if self.delta:
            self.delta_energies /= num_of_atoms
            target_ref_per_atom = energy_dataset[0]
            delta_ref_per_atom = self.delta_energies[0]
            relative_targets = energy_dataset - target_ref_per_atom
            relative_delta = self.delta_energies - delta_ref_per_atom
            energy_dataset = torch.FloatTensor(relative_targets -
                                               relative_delta)
            scalings = [target_ref_per_atom, delta_ref_per_atom]
        else:
            energy_dataset = torch.FloatTensor(energy_dataset)
            scalings = [0, 0]

        return (
            fingerprint_dataset,
            energy_dataset,
            num_of_atoms,
            fprimes_dataset,
            forces_dataset,
            index_hashes,
            scalings,
            rearange_forces,
        )
Exemplo n.º 3
0
    def preprocess_data(self):
        #TODO cleanup/optimize
        fingerprint_dataset = []
        fprimes_dataset = []
        energy_dataset = np.array([])
        num_of_atoms = np.array([])
        forces_dataset = []
        index_hashes = []
        self.fp_length = self.fp_length()
        rearange_forces = {}
        n = 0
        for index, atoms_object in enumerate(self.atom_images):
            if self.isamp_hash:
                hash_name = get_amp_hash(atoms_object)
            else:
                hash_name = get_hash(atoms_object, self.Gs)
            index_hashes.append(hash_name)
            image_fingerprint = self.descriptor.fingerprints[hash_name]
            fprange = self.fprange
            atom_order = []
            # fingerprint scaling to [-1,1]
            for i, (atom, afp) in enumerate(image_fingerprint):
                _afp = copy.copy(afp)
                fprange_atom = fprange[atom]
                for _ in range(np.shape(_afp)[0]):
                    if (fprange_atom[_][1] - fprange_atom[_][0]) > (10.0 ** (-8.0)):
                        _afp[_] = -1 + 2.0 * (
                            (_afp[_] - fprange_atom[_][0])
                            / (fprange_atom[_][1] - fprange_atom[_][0])
                        )
                image_fingerprint[i] = (atom, _afp)
                atom_order.append(atom)
            image_potential_energy = self.hashed_images[hash_name].get_potential_energy(
                apply_constraint=False
            )
            # subtract off lj contribution
            if self.lj:
                lj_energy = self.lj_energies[index]
                image_potential_energy -= lj_energy
            if self.forcetraining:
                image_forces = self.hashed_images[hash_name].get_forces(
                    apply_constraint=False
                )
                # subtract off lj force contribution
                if self.lj:
                    lj_forces = np.array(self.lj_forces[index])
                    image_forces -= lj_forces

                if self.store_primes and os.path.isfile("./stored-primes/" + hash_name):
                    pass
                else:
                    prime_mapping = []
                    for element in self.elements:
                        indices = [i for i, x in enumerate(atom_order) if x == element]
                        prime_mapping += indices
                    new_order = [atom_order[i] for i in prime_mapping]
                    used = set()
                    t = np.array([])
                    for i, x in enumerate(atom_order):
                        for k, l in enumerate(new_order):
                            if (x == l) and (k not in used):
                                used.add(k)
                                t = np.append(t, k)
                                break
                    rearange_forces[index] = t.astype(int)
                    image_primes = self.descriptor.fingerprintprimes[hash_name]
                    # fingerprint derivative scaling to [0,1]
                    _image_primes = copy.copy(image_primes)
                    for _, key in enumerate(list(image_primes.keys())):
                        base_atom = key[3]
                        fprange_atom = fprange[base_atom]
                        fprime = image_primes[key]
                        for i in range(len(fprime)):
                            if (fprange_atom[i][1] - fprange_atom[i][0]) > (
                                10.0 ** (-8.0)
                            ):
                                fprime[i] = 2.0 * (
                                    fprime[i]
                                    / (fprange_atom[i][1] - fprange_atom[i][0])
                                )
                        _image_primes[key] = fprime

                    image_prime_values = list(_image_primes.values())
                    image_prime_keys = list(_image_primes.keys())
                    fp_length = len(image_fingerprint[0][1])
                    num_atoms = len(image_fingerprint)
                    fingerprintprimes = torch.zeros(
                        fp_length * num_atoms, 3 * num_atoms
                    )
                    for idx, fp_key in enumerate(image_prime_keys):
                        image_prime = torch.tensor(image_prime_values[idx])
                        base_atom = fp_key[2]
                        wrt_atom = fp_key[0]
                        coord = fp_key[4]
                        fingerprintprimes[
                            base_atom * fp_length : base_atom * fp_length + fp_length,
                            wrt_atom * 3 + coord,
                        ] = image_prime
                    # store primes in a sparse matrix format
                    if self.store_primes:
                        sp_matrix = sparse.coo_matrix(fingerprintprimes)
                        sparse.save_npz(
                            open("./stored-primes/" + hash_name, "wb"), sp_matrix
                        )
                    fprimes_dataset.append(fingerprintprimes)
                forces_dataset.append(torch.from_numpy(image_forces))

            fingerprint_dataset.append(image_fingerprint)
            energy_dataset = np.append(energy_dataset, image_potential_energy)
            num_of_atoms = np.append(num_of_atoms, float(len(image_fingerprint)))
        energy_dataset = torch.FloatTensor(energy_dataset)
        if self.scaling_scheme == "minmax":
            scaling_min = torch.min(energy_dataset)
            scaling_max = torch.max(energy_dataset)
            scaling_slope = (scaling_max - scaling_min) / 2
            scaling_intercept = (scaling_max + scaling_min) / 2
            energy_dataset = (energy_dataset - scaling_intercept) / (scaling_slope)
            if self.forcetraining:
                for idx, force in enumerate(forces_dataset):
                    forces_dataset[idx] = force / scaling_slope
            scalings = [scaling_slope, scaling_intercept]
        elif self.scaling_scheme == "standardize":
            scaling_mean = torch.mean(energy_dataset)
            scaling_sd = torch.std(energy_dataset, dim=0)
            energy_dataset = (energy_dataset - scaling_mean) / scaling_sd
            if self.forcetraining:
                for idx, force in enumerate(forces_dataset):
                    forces_dataset[idx] = force / scaling_sd
            scalings = [scaling_sd, scaling_mean]
        elif self.scaling_scheme is None:
            scalings = [1, 0]

        return (
            fingerprint_dataset,
            energy_dataset,
            num_of_atoms,
            fprimes_dataset,
            forces_dataset,
            index_hashes,
            scalings,
            rearange_forces,
        )