Пример #1
0
 def test_chalcogenides(self):
     images = database_to_list('data/bajdichWO32018_ads.db')
     images = images_connectivity(images)
     slabs = database_to_list('data/bajdichWO32018_slabs.db')
     slabs_dict = {}
     for slab in slabs:
         slabs_dict[slab.info['id']] = slab
     for i in range(len(images)):
         species = images[i].info['key_value_pairs']['species']
         images[i].subsets['ads_atoms'] = \
             slab_positions2ads_index(images[i], slabs[i], species)
         if 'slab_atoms' not in images[i].subsets:
             images[i].subsets['slab_atoms'] = slab_index(images[i])
         if ('chemisorbed_atoms' not in images[i].subsets
                 or 'site_atoms' not in images[i].subsets
                 or 'ligand_atoms' not in images[i].subsets):
             chemi, site, ligand = info2primary_index(images[i])
             images[i].subsets['chemisorbed_atoms'] = chemi
             images[i].subsets['site_atoms'] = site
             images[i].subsets['ligand_atoms'] = ligand
         attach_cations(images[i], anion_number=8)
     gen = FeatureGenerator(nprocs=1)
     train_fpv = default_fingerprinters(gen, 'chalcogenides')
     matrix = gen.return_vec(images, train_fpv)
     labels = gen.return_names(train_fpv)
     if __name__ == '__main__':
         for i, l in enumerate(labels):
             print(i, l)
     self.assertTrue(len(labels) == np.shape(matrix)[1])
Пример #2
0
 def test_ads_fp_gen(self):
     """Test the feature generation."""
     images = self.setup_atoms()
     images = autogen_info(images)
     print(str(len(images)) + ' training examples.')
     gen = FeatureGenerator()
     train_fpv = [
         gen.mean_chemisorbed_atoms,
         gen.count_chemisorbed_fragment,
         gen.count_ads_atoms,
         gen.count_ads_bonds,
         gen.mean_site,
         gen.sum_site,
         gen.mean_surf_ligands,
         gen.term,
         gen.bulk,
         gen.strain,
         gen.en_difference,
         # gen.ads_av,
         # gen.ads_sum,
     ]
     matrix = gen.return_vec(images, train_fpv)
     labels = gen.return_names(train_fpv)
     print(np.shape(matrix), type(matrix))
     if __name__ == '__main__':
         for i, l in enumerate(labels):
             print(i, l)
     self.assertTrue(len(labels) == np.shape(matrix)[1])
Пример #3
0
    def test_ase_api(self):
        """Test the ase api."""
        gadb = DataConnection('{}/data/gadb.db'.format(wkdir))
        all_cand = gadb.get_all_relaxed_candidates()

        cf = all_cand[0].get_chemical_formula()

        extend_atoms_class(all_cand[0])
        self.assertTrue(isinstance(all_cand[0], type(all_cand[1])))

        f = FeatureGenerator()
        fp = f.composition_vec(all_cand[0])
        all_cand[0].set_features(fp)

        self.assertTrue(np.allclose(all_cand[0].get_features(), fp))
        self.assertTrue(all_cand[0].get_chemical_formula() == cf)

        extend_atoms_class(all_cand[1])
        self.assertTrue(all_cand[1].get_features() is None)

        g = ase_to_networkx(all_cand[2])
        all_cand[2].set_graph(g)

        self.assertTrue(all_cand[2].get_graph() == g)
        self.assertTrue(all_cand[1].get_graph() is None)
Пример #4
0
def predict_catkit_demo(images):
    """Return a prediction of adsorption energies for structures generated with
    CatKitDemo.

    Parameters
    ----------
    images : list
        List of atoms objects representing adsorbate-surface structures.
    model : str
        Path and filename of Catlearn model pickle.
    """
    model_ref = {'H': 'H2', 'O': 'H2O, H2', 'C': 'CH4, H2'}

    # Make list of strings showing the references.
    display_ref = []
    for atoms in images:
        try:
            initial_state = [
                model_ref[s] for s in ase.atoms.string2symbols(
                    atoms.info['key_value_pairs']['species'])
            ]
        except KeyError:
            return {}
        display_ref.append('*, ' + ', '.join(list(np.unique(initial_state))))

    images = autogen_info(images)

    gen = FeatureGenerator(nprocs=1)
    train_fpv = default_fingerprinters(gen, 'adsorbates')
    train_fpv = [
        gen.mean_chemisorbed_atoms, gen.count_chemisorbed_fragment,
        gen.count_ads_atoms, gen.count_ads_bonds, gen.ads_av, gen.ads_sum,
        gen.bulk, gen.term, gen.strain, gen.mean_surf_ligands, gen.mean_site,
        gen.median_site, gen.max_site, gen.min_site, gen.sum_site,
        gen.generalized_cn, gen.en_difference_ads, gen.en_difference_chemi,
        gen.en_difference_active, gen.db_size, gen.delta_energy
    ]
    matrix = gen.return_vec(images, train_fpv)

    feature_index = np.load(clean_index_name)
    clean_feature_mean = np.load(clean_mean)

    impute = SimpleImputer(strategy='mean')
    impute.statistics_ = clean_feature_mean
    new_data = impute.transform(matrix[:, feature_index])

    prediction = gp.predict(new_data,
                            get_validation_error=False,
                            get_training_error=False,
                            uncertainty=True)

    output = {
        'mean': list(prediction['prediction']),
        'uncertainty': list(prediction['uncertainty']),
        'references': display_ref
    }
    return output
Пример #5
0
    def test_bulk_fp_gen(self):
        """Test the feature generation."""
        images = self.setup_metal()

        gen = FeatureGenerator()
        train_fpv = default_fingerprinters(gen, 'bulk')
        matrix = gen.return_vec(images, train_fpv)
        labels = gen.return_names(train_fpv)
        print(np.shape(matrix), print(type(matrix)))
        self.assertTrue(len(labels) == np.shape(matrix)[1])
Пример #6
0
    def test_bulk_fp_gen(self):
        """Test the feature generation."""
        images = self.setup_metal()

        gen = FeatureGenerator()
        train_fpv = [gen.summation, gen.average, gen.std]
        matrix = gen.return_vec(images, train_fpv)
        labels = gen.return_names(train_fpv)
        print(np.shape(matrix), print(type(matrix)))
        self.assertTrue(len(labels) == np.shape(matrix)[1])
Пример #7
0
 def test_tags(self):
     """Test the feature generation."""
     images = self.setup_atoms()
     images = autogen_info(images)
     print(str(len(images)) + ' training examples.')
     gen = FeatureGenerator(nprocs=1)
     train_fpv = default_fingerprinters(gen, 'adsorbates')
     matrix = gen.return_vec(images, train_fpv)
     labels = gen.return_names(train_fpv)
     print(np.shape(matrix), type(matrix))
     if __name__ == '__main__':
         for i, l in enumerate(labels):
             print(i, l)
     self.assertTrue(len(labels) == np.shape(matrix)[1])
Пример #8
0
 def test_db_ads(self):
     """Test the feature generation."""
     images = database_to_list('data/ads_example.db')
     images = autogen_info(images)
     print(str(len(images)) + ' training examples.')
     gen = FeatureGenerator(nprocs=1)
     train_fpv = default_fingerprinters(gen, 'adsorbates')
     train_fpv += [gen.db_size, gen.ctime, gen.dbid, gen.delta_energy]
     matrix = gen.return_vec(images, train_fpv)
     labels = gen.return_names(train_fpv)
     print(np.shape(matrix), type(matrix))
     if __name__ == '__main__':
         for i, l in enumerate(labels):
             print(i, l)
     self.assertTrue(len(labels) == np.shape(matrix)[1])
Пример #9
0
 def test_db_ads(self):
     """Test the feature generation."""
     images = database_to_list('data/ads_example.db')
     [atoms.set_tags(np.zeros(len(atoms))) for atoms in images]
     images = autogen_info(images)
     print(str(len(images)) + ' training examples.')
     gen = FeatureGenerator(nprocs=1)
     train_fpv = default_fingerprinters(gen, 'adsorbates')
     # Test db specific functions.
     train_fpv += [gen.db_size, gen.ctime, gen.dbid, gen.delta_energy]
     # Old CatApp AxBy fingerprints.
     train_fpv += [gen.catapp_AB]
     matrix = gen.return_vec(images, train_fpv)
     labels = gen.return_names(train_fpv)
     print(np.shape(matrix), type(matrix))
     if __name__ == '__main__':
         for i, l in enumerate(labels):
             print(i, l)
     self.assertTrue(len(labels) == np.shape(matrix)[1])
Пример #10
0
 def test_constrained_ads(self):
     """Test the feature generation."""
     images = self.setup_atoms()
     for atoms in images:
         c_atoms = [
             a.index for a in atoms if a.z < atoms.cell[2, 2] / 2. + 0.1
         ]
         atoms.set_constraint(FixAtoms(c_atoms))
     images = autogen_info(images)
     print(str(len(images)) + ' training examples.')
     gen = FeatureGenerator(nprocs=1)
     train_fpv = default_fingerprinters(gen, 'adsorbates')
     matrix = gen.return_vec(images, train_fpv)
     labels = gen.return_names(train_fpv)
     print(np.shape(matrix), type(matrix))
     if __name__ == '__main__':
         for i, l in enumerate(labels):
             print(i, l)
     self.assertTrue(len(labels) == np.shape(matrix)[1])
Пример #11
0
    def get_data(self):
        """Generate features from atoms objects."""
        # Connect database generated by a GA search.
        gadb = DataConnection('{}/data/gadb.db'.format(wkdir))

        # Get all relaxed candidates from the db file.
        print('Getting candidates from the database')
        all_cand = gadb.get_all_relaxed_candidates(use_extinct=False)

        # Setup the test and training datasets.
        testset = get_unique(atoms=all_cand, size=test_size, key='raw_score')

        trainset = get_train(atoms=all_cand,
                             size=train_size,
                             taken=testset['taken'],
                             key='raw_score')

        # Clear out some old saved data.
        for i in trainset['atoms']:
            del i.info['data']['nnmat']

        # Initiate the fingerprint generators with relevant input variables.
        print('Getting the fingerprints')
        f = FeatureGenerator()

        train_features = f.return_vec(trainset['atoms'],
                                      [f.nearestneighbour_vec])
        test_features = f.return_vec(testset['atoms'],
                                     [f.nearestneighbour_vec])

        train_targets = []
        for a in trainset['atoms']:
            train_targets.append(a.info['key_value_pairs']['raw_score'])
        test_targets = []
        for a in testset['atoms']:
            test_targets.append(a.info['key_value_pairs']['raw_score'])

        return train_features, train_targets, trainset['atoms'], \
            test_features, test_targets, testset['atoms']
    def test_generators(self):
        """Generate features from atoms objects."""
        # Test generic features for Pt then both Pt and Au.
        get_mendeleev_params(atomic_number=78)
        get_mendeleev_params(atomic_number=[78, 79],
                             params=default_params + ['en_ghosh'])

        # Connect database generated by a GA search.
        gadb = DataConnection('{}/data/gadb.db'.format(wkdir))

        # Get all relaxed candidates from the db file.
        print('Getting candidates from the database')
        all_cand = gadb.get_all_relaxed_candidates(use_extinct=False)

        # Setup the test and training datasets.
        testset = get_unique(atoms=all_cand, size=test_size, key='raw_score')
        self.assertTrue(len(testset['atoms']) == test_size)
        self.assertTrue(len(testset['taken']) == test_size)

        trainset = get_train(atoms=all_cand,
                             size=train_size,
                             taken=testset['taken'],
                             key='raw_score')
        self.assertTrue(len(trainset['atoms']) == train_size)
        self.assertTrue(len(trainset['target']) == train_size)

        # Initiate the fingerprint generators with relevant input variables.
        print('Getting the fingerprints')
        f = FeatureGenerator(element_parameters='atomic_radius', nprocs=1)
        f.normalize_features(trainset['atoms'], testset['atoms'])

        data = f.return_vec(trainset['atoms'], [f.nearestneighbour_vec])
        n, d = np.shape(data)
        self.assertTrue(n == train_size and d == 4)
        self.assertTrue(len(f.return_names([f.nearestneighbour_vec])) == d)
        print('passed nearestneighbour_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.bond_count_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 52)
        print('passed bond_count_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.distribution_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 10)
        print('passed distribution_vec')

        # EXPENSIVE to calculate. Not included in training data.
        train_fp = f.return_vec(testset['atoms'], [f.connections_vec])
        n, d = np.shape(train_fp)
        self.assertTrue(n == test_size and d == 26)
        print('passed connections_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.rdf_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 20)
        print('passed rdf_vec')

        # Start testing the standard fingerprint vector generators.
        train_fp = f.return_vec(trainset['atoms'], [f.element_mass_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 1)
        self.assertTrue(len(f.return_names([f.element_mass_vec])) == d)
        print('passed element_mass_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.element_parameter_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        # print(f.return_names([f.element_parameter_vec]))
        self.assertTrue(n == train_size and d == 4)
        self.assertTrue(len(f.return_names([f.element_parameter_vec])) == d)
        print('passed element_parameter_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.composition_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 2)
        self.assertTrue(len(f.return_names([f.composition_vec])) == d)
        print('passed composition_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.eigenspectrum_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 147)
        self.assertTrue(len(f.return_names([f.eigenspectrum_vec])) == d)
        print('passed eigenspectrum_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.distance_vec])
        n, d = np.shape(train_fp)
        data = np.concatenate((data, train_fp), axis=1)
        self.assertTrue(n == train_size and d == 2)
        self.assertTrue(len(f.return_names([f.distance_vec])) == d)
        print('passed distance_vec')

        train_fp = f.return_vec(
            trainset['atoms'],
            [f.eigenspectrum_vec, f.element_mass_vec, f.composition_vec])
        n, d = np.shape(train_fp)
        self.assertTrue(n == train_size and d == 150)
        self.assertTrue(
            len(
                f.return_names([
                    f.eigenspectrum_vec, f.element_mass_vec, f.composition_vec
                ])) == d)
        print('passed combined generation')

        train_fp = f.return_vec(trainset['atoms'], [f.neighbor_sum_vec])
        n, d = np.shape(train_fp)
        self.assertTrue(n == train_size and d == len(trainset['atoms'][0]))
        # self.assertTrue(len(f.return_names([f.distance_vec])) == d)
        print('passed neighbor_sum_vec')

        train_fp = f.return_vec(trainset['atoms'], [f.neighbor_mean_vec])
        n, d = np.shape(train_fp)
        self.assertTrue(n == train_size and d == len(trainset['atoms'][0]))
        # self.assertTrue(len(f.return_names([f.distance_vec])) == d)
        print('passed neighbor_mean_vec')

        f = FeatureGenerator(element_parameters='atomic_radius',
                             max_neighbors='full',
                             nprocs=1)
        f.normalize_features(trainset['atoms'], testset['atoms'])

        train_fp = f.return_vec(trainset['atoms'], [f.neighbor_sum_vec])
        n, d = np.shape(train_fp)
        self.assertTrue(n == train_size and d == len(trainset['atoms'][0]))
        print('passed neighbor_sum_vec all neighbors')

        train_fp = f.return_vec(trainset['atoms'], [f.neighbor_mean_vec])
        n, d = np.shape(train_fp)
        self.assertTrue(n == train_size and d == len(trainset['atoms'][0]))
        print('passed neighbor_mean_vec all neighbors')

        # Do basic check for atomic porperties.
        no_prop = []
        an_prop = []
        # EXPENSIVE to calculate. Not included in training data.
        for atoms in testset['atoms']:
            no_prop.append(neighbor_features(atoms=atoms))
            an_prop.append(
                neighbor_features(atoms=atoms, property=['atomic_number']))
        self.assertTrue(np.shape(no_prop) == (test_size, 15))
        self.assertTrue(np.shape(an_prop) == (test_size, 30))
        print('passed graph_vec')

        self.__class__.all_cand = all_cand
        self.__class__.data = data