def test_chalcogenides(self): images = database_to_list('data/bajdichWO32018_ads.db') images = images_connectivity(images) slabs = database_to_list('data/bajdichWO32018_slabs.db') slabs_dict = {} for slab in slabs: slabs_dict[slab.info['id']] = slab for i in range(len(images)): species = images[i].info['key_value_pairs']['species'] images[i].subsets['ads_atoms'] = \ slab_positions2ads_index(images[i], slabs[i], species) if 'slab_atoms' not in images[i].subsets: images[i].subsets['slab_atoms'] = slab_index(images[i]) if ('chemisorbed_atoms' not in images[i].subsets or 'site_atoms' not in images[i].subsets or 'ligand_atoms' not in images[i].subsets): chemi, site, ligand = info2primary_index(images[i]) images[i].subsets['chemisorbed_atoms'] = chemi images[i].subsets['site_atoms'] = site images[i].subsets['ligand_atoms'] = ligand attach_cations(images[i], anion_number=8) gen = FeatureGenerator(nprocs=1) train_fpv = default_fingerprinters(gen, 'chalcogenides') matrix = gen.return_vec(images, train_fpv) labels = gen.return_names(train_fpv) if __name__ == '__main__': for i, l in enumerate(labels): print(i, l) self.assertTrue(len(labels) == np.shape(matrix)[1])
def test_ads_fp_gen(self): """Test the feature generation.""" images = self.setup_atoms() images = autogen_info(images) print(str(len(images)) + ' training examples.') gen = FeatureGenerator() train_fpv = [ gen.mean_chemisorbed_atoms, gen.count_chemisorbed_fragment, gen.count_ads_atoms, gen.count_ads_bonds, gen.mean_site, gen.sum_site, gen.mean_surf_ligands, gen.term, gen.bulk, gen.strain, gen.en_difference, # gen.ads_av, # gen.ads_sum, ] matrix = gen.return_vec(images, train_fpv) labels = gen.return_names(train_fpv) print(np.shape(matrix), type(matrix)) if __name__ == '__main__': for i, l in enumerate(labels): print(i, l) self.assertTrue(len(labels) == np.shape(matrix)[1])
def test_ase_api(self): """Test the ase api.""" gadb = DataConnection('{}/data/gadb.db'.format(wkdir)) all_cand = gadb.get_all_relaxed_candidates() cf = all_cand[0].get_chemical_formula() extend_atoms_class(all_cand[0]) self.assertTrue(isinstance(all_cand[0], type(all_cand[1]))) f = FeatureGenerator() fp = f.composition_vec(all_cand[0]) all_cand[0].set_features(fp) self.assertTrue(np.allclose(all_cand[0].get_features(), fp)) self.assertTrue(all_cand[0].get_chemical_formula() == cf) extend_atoms_class(all_cand[1]) self.assertTrue(all_cand[1].get_features() is None) g = ase_to_networkx(all_cand[2]) all_cand[2].set_graph(g) self.assertTrue(all_cand[2].get_graph() == g) self.assertTrue(all_cand[1].get_graph() is None)
def predict_catkit_demo(images): """Return a prediction of adsorption energies for structures generated with CatKitDemo. Parameters ---------- images : list List of atoms objects representing adsorbate-surface structures. model : str Path and filename of Catlearn model pickle. """ model_ref = {'H': 'H2', 'O': 'H2O, H2', 'C': 'CH4, H2'} # Make list of strings showing the references. display_ref = [] for atoms in images: try: initial_state = [ model_ref[s] for s in ase.atoms.string2symbols( atoms.info['key_value_pairs']['species']) ] except KeyError: return {} display_ref.append('*, ' + ', '.join(list(np.unique(initial_state)))) images = autogen_info(images) gen = FeatureGenerator(nprocs=1) train_fpv = default_fingerprinters(gen, 'adsorbates') train_fpv = [ gen.mean_chemisorbed_atoms, gen.count_chemisorbed_fragment, gen.count_ads_atoms, gen.count_ads_bonds, gen.ads_av, gen.ads_sum, gen.bulk, gen.term, gen.strain, gen.mean_surf_ligands, gen.mean_site, gen.median_site, gen.max_site, gen.min_site, gen.sum_site, gen.generalized_cn, gen.en_difference_ads, gen.en_difference_chemi, gen.en_difference_active, gen.db_size, gen.delta_energy ] matrix = gen.return_vec(images, train_fpv) feature_index = np.load(clean_index_name) clean_feature_mean = np.load(clean_mean) impute = SimpleImputer(strategy='mean') impute.statistics_ = clean_feature_mean new_data = impute.transform(matrix[:, feature_index]) prediction = gp.predict(new_data, get_validation_error=False, get_training_error=False, uncertainty=True) output = { 'mean': list(prediction['prediction']), 'uncertainty': list(prediction['uncertainty']), 'references': display_ref } return output
def test_bulk_fp_gen(self): """Test the feature generation.""" images = self.setup_metal() gen = FeatureGenerator() train_fpv = default_fingerprinters(gen, 'bulk') matrix = gen.return_vec(images, train_fpv) labels = gen.return_names(train_fpv) print(np.shape(matrix), print(type(matrix))) self.assertTrue(len(labels) == np.shape(matrix)[1])
def test_bulk_fp_gen(self): """Test the feature generation.""" images = self.setup_metal() gen = FeatureGenerator() train_fpv = [gen.summation, gen.average, gen.std] matrix = gen.return_vec(images, train_fpv) labels = gen.return_names(train_fpv) print(np.shape(matrix), print(type(matrix))) self.assertTrue(len(labels) == np.shape(matrix)[1])
def test_tags(self): """Test the feature generation.""" images = self.setup_atoms() images = autogen_info(images) print(str(len(images)) + ' training examples.') gen = FeatureGenerator(nprocs=1) train_fpv = default_fingerprinters(gen, 'adsorbates') matrix = gen.return_vec(images, train_fpv) labels = gen.return_names(train_fpv) print(np.shape(matrix), type(matrix)) if __name__ == '__main__': for i, l in enumerate(labels): print(i, l) self.assertTrue(len(labels) == np.shape(matrix)[1])
def test_db_ads(self): """Test the feature generation.""" images = database_to_list('data/ads_example.db') images = autogen_info(images) print(str(len(images)) + ' training examples.') gen = FeatureGenerator(nprocs=1) train_fpv = default_fingerprinters(gen, 'adsorbates') train_fpv += [gen.db_size, gen.ctime, gen.dbid, gen.delta_energy] matrix = gen.return_vec(images, train_fpv) labels = gen.return_names(train_fpv) print(np.shape(matrix), type(matrix)) if __name__ == '__main__': for i, l in enumerate(labels): print(i, l) self.assertTrue(len(labels) == np.shape(matrix)[1])
def test_db_ads(self): """Test the feature generation.""" images = database_to_list('data/ads_example.db') [atoms.set_tags(np.zeros(len(atoms))) for atoms in images] images = autogen_info(images) print(str(len(images)) + ' training examples.') gen = FeatureGenerator(nprocs=1) train_fpv = default_fingerprinters(gen, 'adsorbates') # Test db specific functions. train_fpv += [gen.db_size, gen.ctime, gen.dbid, gen.delta_energy] # Old CatApp AxBy fingerprints. train_fpv += [gen.catapp_AB] matrix = gen.return_vec(images, train_fpv) labels = gen.return_names(train_fpv) print(np.shape(matrix), type(matrix)) if __name__ == '__main__': for i, l in enumerate(labels): print(i, l) self.assertTrue(len(labels) == np.shape(matrix)[1])
def test_constrained_ads(self): """Test the feature generation.""" images = self.setup_atoms() for atoms in images: c_atoms = [ a.index for a in atoms if a.z < atoms.cell[2, 2] / 2. + 0.1 ] atoms.set_constraint(FixAtoms(c_atoms)) images = autogen_info(images) print(str(len(images)) + ' training examples.') gen = FeatureGenerator(nprocs=1) train_fpv = default_fingerprinters(gen, 'adsorbates') matrix = gen.return_vec(images, train_fpv) labels = gen.return_names(train_fpv) print(np.shape(matrix), type(matrix)) if __name__ == '__main__': for i, l in enumerate(labels): print(i, l) self.assertTrue(len(labels) == np.shape(matrix)[1])
def get_data(self): """Generate features from atoms objects.""" # Connect database generated by a GA search. gadb = DataConnection('{}/data/gadb.db'.format(wkdir)) # Get all relaxed candidates from the db file. print('Getting candidates from the database') all_cand = gadb.get_all_relaxed_candidates(use_extinct=False) # Setup the test and training datasets. testset = get_unique(atoms=all_cand, size=test_size, key='raw_score') trainset = get_train(atoms=all_cand, size=train_size, taken=testset['taken'], key='raw_score') # Clear out some old saved data. for i in trainset['atoms']: del i.info['data']['nnmat'] # Initiate the fingerprint generators with relevant input variables. print('Getting the fingerprints') f = FeatureGenerator() train_features = f.return_vec(trainset['atoms'], [f.nearestneighbour_vec]) test_features = f.return_vec(testset['atoms'], [f.nearestneighbour_vec]) train_targets = [] for a in trainset['atoms']: train_targets.append(a.info['key_value_pairs']['raw_score']) test_targets = [] for a in testset['atoms']: test_targets.append(a.info['key_value_pairs']['raw_score']) return train_features, train_targets, trainset['atoms'], \ test_features, test_targets, testset['atoms']
def test_generators(self): """Generate features from atoms objects.""" # Test generic features for Pt then both Pt and Au. get_mendeleev_params(atomic_number=78) get_mendeleev_params(atomic_number=[78, 79], params=default_params + ['en_ghosh']) # Connect database generated by a GA search. gadb = DataConnection('{}/data/gadb.db'.format(wkdir)) # Get all relaxed candidates from the db file. print('Getting candidates from the database') all_cand = gadb.get_all_relaxed_candidates(use_extinct=False) # Setup the test and training datasets. testset = get_unique(atoms=all_cand, size=test_size, key='raw_score') self.assertTrue(len(testset['atoms']) == test_size) self.assertTrue(len(testset['taken']) == test_size) trainset = get_train(atoms=all_cand, size=train_size, taken=testset['taken'], key='raw_score') self.assertTrue(len(trainset['atoms']) == train_size) self.assertTrue(len(trainset['target']) == train_size) # Initiate the fingerprint generators with relevant input variables. print('Getting the fingerprints') f = FeatureGenerator(element_parameters='atomic_radius', nprocs=1) f.normalize_features(trainset['atoms'], testset['atoms']) data = f.return_vec(trainset['atoms'], [f.nearestneighbour_vec]) n, d = np.shape(data) self.assertTrue(n == train_size and d == 4) self.assertTrue(len(f.return_names([f.nearestneighbour_vec])) == d) print('passed nearestneighbour_vec') train_fp = f.return_vec(trainset['atoms'], [f.bond_count_vec]) n, d = np.shape(train_fp) data = np.concatenate((data, train_fp), axis=1) self.assertTrue(n == train_size and d == 52) print('passed bond_count_vec') train_fp = f.return_vec(trainset['atoms'], [f.distribution_vec]) n, d = np.shape(train_fp) data = np.concatenate((data, train_fp), axis=1) self.assertTrue(n == train_size and d == 10) print('passed distribution_vec') # EXPENSIVE to calculate. Not included in training data. train_fp = f.return_vec(testset['atoms'], [f.connections_vec]) n, d = np.shape(train_fp) self.assertTrue(n == test_size and d == 26) print('passed connections_vec') train_fp = f.return_vec(trainset['atoms'], [f.rdf_vec]) n, d = np.shape(train_fp) data = np.concatenate((data, train_fp), axis=1) self.assertTrue(n == train_size and d == 20) print('passed rdf_vec') # Start testing the standard fingerprint vector generators. train_fp = f.return_vec(trainset['atoms'], [f.element_mass_vec]) n, d = np.shape(train_fp) data = np.concatenate((data, train_fp), axis=1) self.assertTrue(n == train_size and d == 1) self.assertTrue(len(f.return_names([f.element_mass_vec])) == d) print('passed element_mass_vec') train_fp = f.return_vec(trainset['atoms'], [f.element_parameter_vec]) n, d = np.shape(train_fp) data = np.concatenate((data, train_fp), axis=1) # print(f.return_names([f.element_parameter_vec])) self.assertTrue(n == train_size and d == 4) self.assertTrue(len(f.return_names([f.element_parameter_vec])) == d) print('passed element_parameter_vec') train_fp = f.return_vec(trainset['atoms'], [f.composition_vec]) n, d = np.shape(train_fp) data = np.concatenate((data, train_fp), axis=1) self.assertTrue(n == train_size and d == 2) self.assertTrue(len(f.return_names([f.composition_vec])) == d) print('passed composition_vec') train_fp = f.return_vec(trainset['atoms'], [f.eigenspectrum_vec]) n, d = np.shape(train_fp) data = np.concatenate((data, train_fp), axis=1) self.assertTrue(n == train_size and d == 147) self.assertTrue(len(f.return_names([f.eigenspectrum_vec])) == d) print('passed eigenspectrum_vec') train_fp = f.return_vec(trainset['atoms'], [f.distance_vec]) n, d = np.shape(train_fp) data = np.concatenate((data, train_fp), axis=1) self.assertTrue(n == train_size and d == 2) self.assertTrue(len(f.return_names([f.distance_vec])) == d) print('passed distance_vec') train_fp = f.return_vec( trainset['atoms'], [f.eigenspectrum_vec, f.element_mass_vec, f.composition_vec]) n, d = np.shape(train_fp) self.assertTrue(n == train_size and d == 150) self.assertTrue( len( f.return_names([ f.eigenspectrum_vec, f.element_mass_vec, f.composition_vec ])) == d) print('passed combined generation') train_fp = f.return_vec(trainset['atoms'], [f.neighbor_sum_vec]) n, d = np.shape(train_fp) self.assertTrue(n == train_size and d == len(trainset['atoms'][0])) # self.assertTrue(len(f.return_names([f.distance_vec])) == d) print('passed neighbor_sum_vec') train_fp = f.return_vec(trainset['atoms'], [f.neighbor_mean_vec]) n, d = np.shape(train_fp) self.assertTrue(n == train_size and d == len(trainset['atoms'][0])) # self.assertTrue(len(f.return_names([f.distance_vec])) == d) print('passed neighbor_mean_vec') f = FeatureGenerator(element_parameters='atomic_radius', max_neighbors='full', nprocs=1) f.normalize_features(trainset['atoms'], testset['atoms']) train_fp = f.return_vec(trainset['atoms'], [f.neighbor_sum_vec]) n, d = np.shape(train_fp) self.assertTrue(n == train_size and d == len(trainset['atoms'][0])) print('passed neighbor_sum_vec all neighbors') train_fp = f.return_vec(trainset['atoms'], [f.neighbor_mean_vec]) n, d = np.shape(train_fp) self.assertTrue(n == train_size and d == len(trainset['atoms'][0])) print('passed neighbor_mean_vec all neighbors') # Do basic check for atomic porperties. no_prop = [] an_prop = [] # EXPENSIVE to calculate. Not included in training data. for atoms in testset['atoms']: no_prop.append(neighbor_features(atoms=atoms)) an_prop.append( neighbor_features(atoms=atoms, property=['atomic_number'])) self.assertTrue(np.shape(no_prop) == (test_size, 15)) self.assertTrue(np.shape(an_prop) == (test_size, 30)) print('passed graph_vec') self.__class__.all_cand = all_cand self.__class__.data = data