Exemplo n.º 1
0
    def test_number_of_features(self):
        """Tests that the reported number of features is correct.
        """
        species = [1, 8]
        n_elem = len(species)

        desc = ACSF(rcut=6.0, species=species)
        n_features = desc.get_number_of_features()
        self.assertEqual(n_features, n_elem)

        desc = ACSF(rcut=6.0, species=species, g2_params=[[1, 2], [4, 5]])
        n_features = desc.get_number_of_features()
        self.assertEqual(n_features, n_elem * (2 + 1))

        desc = ACSF(rcut=6.0, species=[1, 8], g3_params=[1, 2, 3, 4])
        n_features = desc.get_number_of_features()
        self.assertEqual(n_features, n_elem * (4 + 1))

        desc = ACSF(rcut=6.0,
                    species=[1, 8],
                    g4_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]])
        n_features = desc.get_number_of_features()
        self.assertEqual(n_features, n_elem + 4 * 3)

        desc = ACSF(rcut=6.0,
                    species=[1, 8],
                    g2_params=[[1, 2], [4, 5]],
                    g3_params=[1, 2, 3, 4],
                    g4_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]])
        n_features = desc.get_number_of_features()
        self.assertEqual(n_features, n_elem * (1 + 2 + 4) + 4 * 3)
Exemplo n.º 2
0
 def test_properties(self):
     """Used to test that changing the setup through properties works as
     intended.
     """
     # Test changing species
     a = ACSF(
         rcut=6.0,
         species=[1, 8],
         g2_params=[[1, 2]],
         sparse=False,
     )
     nfeat1 = a.get_number_of_features()
     vec1 = a.create(H2O)
     a.species = ["C", "H", "O"]
     nfeat2 = a.get_number_of_features()
     vec2 = a.create(molecule("CH3OH"))
     self.assertTrue(nfeat1 != nfeat2)
     self.assertTrue(vec1.shape[1] != vec2.shape[1])
Exemplo n.º 3
0
    def test_parallel_sparse(self):
        """Tests creating sparse output parallelly.
        """
        # Test indices
        samples = [molecule("CO"), molecule("N2O")]
        desc = ACSF(rcut=6.0,
                    species=[6, 7, 8],
                    g2_params=[[1, 2], [4, 5]],
                    g3_params=[1, 2, 3, 4],
                    g4_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]],
                    g5_params=[[1, 2, 3], [3, 1, 4], [4, 5, 6], [7, 8, 9]],
                    sparse=True)
        n_features = desc.get_number_of_features()

        # Multiple systems, serial job
        output = desc.create(
            system=samples,
            positions=[[0], [0, 1]],
            n_jobs=1,
        ).toarray()
        assumed = np.empty((3, n_features))
        assumed[0, :] = desc.create(samples[0], [0]).toarray()
        assumed[1, :] = desc.create(samples[1], [0]).toarray()
        assumed[2, :] = desc.create(samples[1], [1]).toarray()
        self.assertTrue(np.allclose(output, assumed))

        # Test when position given as indices
        output = desc.create(
            system=samples,
            positions=[[0], [0, 1]],
            n_jobs=2,
        ).toarray()
        assumed = np.empty((3, n_features))
        assumed[0, :] = desc.create(samples[0], [0]).toarray()
        assumed[1, :] = desc.create(samples[1], [0]).toarray()
        assumed[2, :] = desc.create(samples[1], [1]).toarray()
        self.assertTrue(np.allclose(output, assumed))

        # Test with no positions specified
        output = desc.create(
            system=samples,
            positions=[None, None],
            n_jobs=2,
        ).toarray()

        assumed = np.empty((2 + 3, n_features))
        assumed[0, :] = desc.create(samples[0], [0]).toarray()
        assumed[1, :] = desc.create(samples[0], [1]).toarray()
        assumed[2, :] = desc.create(samples[1], [0]).toarray()
        assumed[3, :] = desc.create(samples[1], [1]).toarray()
        assumed[4, :] = desc.create(samples[1], [2]).toarray()
        self.assertTrue(np.allclose(output, assumed))
Exemplo n.º 4
0
def createDescriptorsACSF(
    data,
    metadata,
    cutoff_ACSF=default_cutoff_ACSF,
    g2_params=default_g2_params,
    g3_params=default_g3_params,
):
    # Updating metadata

    # Prepping ACSF descriptor structure
    acsf = ACSF(species=metadata['species'],
                rcut=metadata['cutoff_acsf'],
                g2_params=metadata['g2_params'],
                g4_params=metadata['g3_params'])
    metadata['n_features'] = acsf.get_number_of_features()
    # Computing descriptors
    descriptors = []
    for index_atom in tqdm.tqdm(range(metadata['n_atoms'])):
        descriptors_loc = np.empty((np.shape(data)[0], metadata['n_features']))
        for index_structure in range(metadata['train_set_size']):
            descriptors_loc[index_structure, :] = acsf.create(
                data[index_structure], positions=[index_atom])
        descriptors.append(descriptors_loc)
    return metadata, descriptors
Exemplo n.º 5
0
def create_data_ACSF(data, metadata):
    particles, scaler, test_size, rcut, nmax, lmax, N_PCA, sigma_SOAP = [
        metadata[x] for x in [
            'particles', 'scaler', 'test_size', 'rcut', 'nmax', 'lmax',
            'N_PCA', 'sigma_SOAP'
        ]
    ]

    acsf = ACSF(species=["H", "O"],
                rcut=9.0,
                g2_params=[[1, 0], [0.1, 0], [0.01, 0], [0.01, 0], [0.001, 0]],
                g4_params=[[1, 1, 1], [1, 2, 1], [1, 1, -1], [1, 2, -1],
                           [0.1, 1, 1], [0.1, 2, 1], [0.1, 1, -1],
                           [0.1, 2, -1], [0.01, 1, 1], [0.01, 2, 1],
                           [0.01, 1, -1], [0.01, 2, -1]])

    nb_features = acsf.get_number_of_features()

    descriptors = pd.np.empty(
        (data.index.max() + 1, len(particles), nb_features))

    for i_time in tqdm.tqdm(range(data.index.max() + 1)):
        descriptors[i_time] = acsf.create(data['molec'][i_time],
                                          positions=np.arange(len(particles)))

    #create training set
    try:
        data['is_train']
    except KeyError:
        data['is_train'] = create_is_train(data.index.max() + 1)
    else:
        pass
    #selecting best params
    if N_PCA:
        try:
            metadata['PCAs']
        except KeyError:
            PCAs = select_best_params(descriptors[data['is_train'].values],
                                      nb_features, N_PCA)
            new_descriptors = pd.np.empty(
                (data.index.max() + 1, len(particles), N_PCA))
            new_descriptors[:, :2, :] = PCAs[0].transform(
                descriptors[:, :2, :].reshape(
                    descriptors[:, :2, :].shape[0] * 2,
                    nb_features)).reshape(descriptors.shape[0], 2, N_PCA)
            new_descriptors[:, 2:, :] = PCAs[1].transform(
                descriptors[:, 2:, :].reshape(
                    descriptors[:, 2:, :].shape[0] * 5,
                    nb_features)).reshape(descriptors.shape[0], 5, N_PCA)
            descriptors = new_descriptors
            metadata['old_N_feature'] = nb_features
            nb_features = N_PCA
            metadata['PCAs'] = PCAs

        else:
            PCAs = metadata['PCAs']
            new_descriptors = pd.np.empty(
                (data.index.max() + 1, len(particles), N_PCA))
            new_descriptors[:, :2, :] = PCAs[0].transform(
                descriptors[:, :2, :].reshape(
                    descriptors[:, :2, :].shape[0] * 2,
                    nb_features)).reshape(descriptors.shape[0], 2, N_PCA)
            new_descriptors[:, 2:, :] = PCAs[1].transform(
                descriptors[:, 2:, :].reshape(
                    descriptors[:, 2:, :].shape[0] * 5,
                    nb_features)).reshape(descriptors.shape[0], 5, N_PCA)
            descriptors = new_descriptors
            nb_features = N_PCA

    else:
        pass
    #scaling
    if scaler == False:
        pass

    elif type(scaler) == type(None):
        descriptors, scaler = scale_descriptors(data, descriptors)

    else:
        descriptors[:, 0:2, :] = scaler[0].transform(
            descriptors[:, 0:2, :].reshape(descriptors[:, 0:2, :].shape[0] * 2,
                                           nb_features)).reshape(
                                               descriptors.shape[0], 2,
                                               nb_features)
        descriptors[:,
                    2:, :] = scaler[1].transform(descriptors[:, 2:, :].reshape(
                        descriptors[:, 2:, :].shape[0] * 5,
                        nb_features)).reshape(descriptors.shape[0], 5,
                                              nb_features)

    metadata['scaler'] = scaler
    return data.join(pd.DataFrame({'descriptor': list(descriptors)})), metadata