def test_k3p_uniq_default(self): cursor, _ = res2dict(REAL_PATH + "data/K3P_uniq/*.res") cursor = sorted(cursor, key=lambda x: x["enthalpy_per_atom"]) uniq_inds, _, _, _ = get_uniq_cursor(cursor) filtered_cursor = [cursor[ind] for ind in uniq_inds] self.assertEqual(len(cursor), 10) self.assertEqual(len(filtered_cursor), 5) found = [] correct_structures = [ "K3P-OQMD_4786-CollCode25550", "K3P-mode-follow-swap-Na3N-OQMD_21100-CollCode165992", "KP-fvsqdf", "PK-NNa3-OQMD_21100-CollCode165992", "KP-yzcni8", ] for struct in correct_structures: for doc in filtered_cursor: if struct in doc["source"][0]: found.append(True) break else: found.append(False) if not all(found): print([doc["source"][0] for doc in filtered_cursor]) self.assertTrue(all(found))
def test_double_uniqueness_hierarchy(self): import glob files = glob.glob(REAL_PATH + "data/uniqueness_hierarchy/*.res") files += glob.glob(REAL_PATH + "data/hull-LLZO/*LLZO*.res") cursor = [res2dict(f)[0] for f in files] cursor = sorted(cursor, key=lambda x: x["enthalpy_per_atom"])[0:10] uniq_inds, _, _, _ = get_uniq_cursor(cursor, sim_tol=0.1, energy_tol=1e20, projected=True, **{ "dr": 0.01, "gaussian_width": 0.1 }) filtered_cursor = [cursor[ind] for ind in uniq_inds] self.assertEqual(len(uniq_inds), 3) self.assertEqual(len(filtered_cursor), 3) print([doc["source"] for doc in filtered_cursor]) self.assertTrue( "cubic-LLZO-CollCode999999" in filtered_cursor[0]["source"][0]) self.assertTrue("KP-NaP-OQMD_2817-CollCode14009" in filtered_cursor[1] ["source"][0]) self.assertTrue( "KP-NaP-CollCode421420" in filtered_cursor[2]["source"][0])
def test_icsd_priority(self): test_docs = [] i = 0 while i < 10: test_doc, _ = res2dict(REAL_PATH + "data/KP_primitive.res", db=False) test_doc["text_id"] = ["primitive", "cell"] test_docs.append(test_doc) i += 1 uniq_inds, _, _, _ = get_uniq_cursor(test_docs) self.assertEqual(uniq_inds, [0]) test_docs[6]["source"] = ["KP-CollCode999999.res"] test_docs[6]["icsd"] = 999999 test_docs[6]["text_id"] = ["keep", "this"] uniq_inds, _, _, _ = get_uniq_cursor( test_docs, **{ "dr": 0.1, "gaussian_width": 0.1 }) self.assertEqual(uniq_inds, [6])
def test_no_overlap_retains_all_structures(self): import glob files = glob.glob(REAL_PATH + "data/uniqueness_hierarchy/*.res") cursor = [res2dict(f)[0] for f in files] uniq_inds, _, _, _ = get_uniq_cursor(cursor, sim_tol=0, energy_tol=1e20, projected=True, debug=True, **{ "dr": 0.1, "gaussian_width": 0.1 }) filtered_cursor = [cursor[ind] for ind in uniq_inds] self.assertEqual(len(filtered_cursor), len(cursor))
def test_uniq_filter_with_hierarchy_2(self): cursor, f_ = res2dict(REAL_PATH + "data/hull-LLZO/*LLZO*.res") cursor = sorted(cursor, key=lambda x: x["enthalpy_per_atom"])[0:10] uniq_inds, _, _, _ = get_uniq_cursor(cursor, sim_tol=0.1, energy_tol=1e10, projected=True, **{ "dr": 0.01, "gaussian_width": 0.1 }) filtered_cursor = [cursor[ind] for ind in uniq_inds] self.assertEqual(len(uniq_inds), 1) self.assertEqual(len(filtered_cursor), 1) self.assertTrue( "cubic-LLZO-CollCode999999" in filtered_cursor[0]["source"][0])
def test_volume_rescale(self): import numpy as np test_doc, success = res2dict(REAL_PATH + "data/KP_primitive.res", db=False) self.assertTrue(success) test_docs = [] rescale = np.linspace(0.1, 10, 8) lattice = np.asarray(test_doc["lattice_abc"]) for val in rescale: test_docs.append(test_doc) test_docs[-1]["lattice_abc"] = lattice test_docs[-1]["lattice_abc"][0] *= val test_docs[-1]["lattice_abc"] = test_docs[-1]["lattice_abc"].tolist( ) uniq_inds, _, _, _ = get_uniq_cursor(test_docs) self.assertEqual(uniq_inds, [0])
def test_with_crystals(self): from matador.crystal import Crystal import glob files = glob.glob(REAL_PATH + "data/uniqueness_hierarchy/*.res") cursor = [Crystal(res2dict(f)[0]) for f in files] uniq_inds, _, _, _ = get_uniq_cursor(cursor, sim_tol=0, energy_tol=1e20, projected=True, debug=True, **{ "dr": 0.1, "gaussian_width": 0.1 }) filtered_cursor = [cursor[ind] for ind in uniq_inds] self.assertEqual(len(filtered_cursor), len(cursor))
def filter_unique_structures(cursor, quiet=False, **kwargs): """ Wrapper for `matador.fingerprints.similarity.get_uniq_cursor` that displays the results and returns the filtered cursor. """ from matador.fingerprints.similarity import get_uniq_cursor uniq_inds, dupe_dict, _, _ = get_uniq_cursor(cursor, **kwargs) filtered_cursor = [cursor[ind] for ind in uniq_inds] if not quiet: display_cursor = [] additions = [] deletions = [] for key in dupe_dict: additions.append(len(display_cursor)) display_cursor.append(cursor[key]) if dupe_dict[key]: for _, jnd in enumerate(dupe_dict[key]): deletions.append(len(display_cursor)) display_cursor.append(cursor[jnd]) if not display_cursor: display_cursor = filtered_cursor display_results( display_cursor, additions=additions, deletions=deletions, sort=True, use_source=True, **kwargs ) print('Filtered {} down to {}'.format(len(cursor), len(uniq_inds))) return filtered_cursor
nprocs = int(argv[1]) cursor = [res2dict(res)[0] for res in glob('seed/*.res')] hull = QueryConvexHull(cursor=cursor, no_plot=True, kpoint_tolerance=0.03, summary=True, hull_cutoff=7.5e-2) print('Filtering down to only ternary phases... {}'.format( len(hull.hull_cursor))) hull.hull_cursor = [ doc for doc in hull.hull_cursor if len(doc['stoichiometry']) == 3 ] print('Filtering unique structures... {}'.format(len(hull.hull_cursor))) uniq_list, _, _, _ = list(get_uniq_cursor(hull.hull_cursor[1:-1], debug=False)) cursor = [hull.hull_cursor[1:-1][ind] for ind in uniq_list] print('Final cursor length... {}'.format(len(cursor))) print('over {} stoichiometries...'.format( len(set([get_formula_from_stoich(doc['stoichiometry']) for doc in cursor])))) print([doc['stoichiometry'] for doc in cursor]) ArtificialSelector(gene_pool=cursor, seed='KPSn', hull=hull, debug=False, fitness_metric='hull', nodes=['node1', 'node2', 'node15'], ncores=[16, 16, 20], check_dupes=1,
def main(): """ Run GA. """ from glob import glob from sys import argv from matador.hull import QueryConvexHull from matador.fingerprints.similarity import get_uniq_cursor from matador.utils.chem_utils import get_formula_from_stoich from matador.scrapers.castep_scrapers import res2dict from ilustrado.ilustrado import ArtificialSelector nprocs = int(argv[1]) # specify nprocs at the command-line cursor = [res2dict(res)[0] for res in glob('seed/*.res')] hull = QueryConvexHull(cursor=cursor, no_plot=True, kpoint_tolerance=0.03, summary=True, hull_cutoff=1e-1) print('Filtering down to only ternary phases... {}'.format( len(hull.hull_cursor))) hull.hull_cursor = [ doc for doc in hull.hull_cursor if len(doc['stoichiometry']) == 3 ] print('Filtering unique structures... {}'.format(len(hull.hull_cursor))) uniq_list, _, _, _ = list( get_uniq_cursor(hull.hull_cursor[1:-1], debug=False)) cursor = [hull.hull_cursor[1:-1][ind] for ind in uniq_list] print('Final cursor length... {}'.format(len(cursor))) print('over {} stoichiometries...'.format( len( set([ get_formula_from_stoich(doc['stoichiometry']) for doc in cursor ])))) print([doc['stoichiometry'] for doc in cursor]) def filter_fn(doc): """ Filter out any non-ternary phases. """ return True if len(doc['stoichiometry']) == 3 else False relaxer_params = {'bnl': True} # required to use srun instead of mpirun ArtificialSelector( gene_pool=cursor, seed='KPSn', hull=hull, debug=False, fitness_metric='hull', # number of cores per individual calculation # to use less than one node, decrease this to e.g. 10 # then increase nprocs to 2*nnodes ncores=20, check_dupes=1, # number of total procs, taken as command-line argument to script nprocs=nprocs, executable='castep', relaxer_params=relaxer_params, structure_filter=filter_fn, best_from_stoich=True, max_num_mutations=3, max_num_atoms=50, mutation_rate=0.4, crossover_rate=0.6, num_generations=20, population=30, num_survivors=20, elitism=0.5, loglevel='debug')