Python get_uniq_cursor示例，matador.fingerprints.similarity.get_uniq_cursor Python示例

示例#1

0

显示文件

文件： test_similarity.py 项目： ml-evs/matador

    def test_k3p_uniq_default(self):
        cursor, _ = res2dict(REAL_PATH + "data/K3P_uniq/*.res")
        cursor = sorted(cursor, key=lambda x: x["enthalpy_per_atom"])
        uniq_inds, _, _, _ = get_uniq_cursor(cursor)
        filtered_cursor = [cursor[ind] for ind in uniq_inds]
        self.assertEqual(len(cursor), 10)
        self.assertEqual(len(filtered_cursor), 5)
        found = []
        correct_structures = [
            "K3P-OQMD_4786-CollCode25550",
            "K3P-mode-follow-swap-Na3N-OQMD_21100-CollCode165992",
            "KP-fvsqdf",
            "PK-NNa3-OQMD_21100-CollCode165992",
            "KP-yzcni8",
        ]
        for struct in correct_structures:
            for doc in filtered_cursor:
                if struct in doc["source"][0]:
                    found.append(True)
                    break
            else:
                found.append(False)

        if not all(found):
            print([doc["source"][0] for doc in filtered_cursor])

        self.assertTrue(all(found))

示例#2

0

显示文件

文件： test_similarity.py 项目： ml-evs/matador

    def test_double_uniqueness_hierarchy(self):
        import glob

        files = glob.glob(REAL_PATH + "data/uniqueness_hierarchy/*.res")
        files += glob.glob(REAL_PATH + "data/hull-LLZO/*LLZO*.res")
        cursor = [res2dict(f)[0] for f in files]
        cursor = sorted(cursor, key=lambda x: x["enthalpy_per_atom"])[0:10]
        uniq_inds, _, _, _ = get_uniq_cursor(cursor,
                                             sim_tol=0.1,
                                             energy_tol=1e20,
                                             projected=True,
                                             **{
                                                 "dr": 0.01,
                                                 "gaussian_width": 0.1
                                             })
        filtered_cursor = [cursor[ind] for ind in uniq_inds]
        self.assertEqual(len(uniq_inds), 3)
        self.assertEqual(len(filtered_cursor), 3)
        print([doc["source"] for doc in filtered_cursor])
        self.assertTrue(
            "cubic-LLZO-CollCode999999" in filtered_cursor[0]["source"][0])
        self.assertTrue("KP-NaP-OQMD_2817-CollCode14009" in filtered_cursor[1]
                        ["source"][0])
        self.assertTrue(
            "KP-NaP-CollCode421420" in filtered_cursor[2]["source"][0])

示例#3

0

显示文件

文件： test_similarity.py 项目： ml-evs/matador

    def test_icsd_priority(self):
        test_docs = []
        i = 0
        while i < 10:
            test_doc, _ = res2dict(REAL_PATH + "data/KP_primitive.res",
                                   db=False)
            test_doc["text_id"] = ["primitive", "cell"]
            test_docs.append(test_doc)
            i += 1

        uniq_inds, _, _, _ = get_uniq_cursor(test_docs)
        self.assertEqual(uniq_inds, [0])

        test_docs[6]["source"] = ["KP-CollCode999999.res"]
        test_docs[6]["icsd"] = 999999
        test_docs[6]["text_id"] = ["keep", "this"]

        uniq_inds, _, _, _ = get_uniq_cursor(
            test_docs, **{
                "dr": 0.1,
                "gaussian_width": 0.1
            })
        self.assertEqual(uniq_inds, [6])

示例#4

0

显示文件

文件： test_similarity.py 项目： ml-evs/matador

    def test_no_overlap_retains_all_structures(self):
        import glob

        files = glob.glob(REAL_PATH + "data/uniqueness_hierarchy/*.res")
        cursor = [res2dict(f)[0] for f in files]
        uniq_inds, _, _, _ = get_uniq_cursor(cursor,
                                             sim_tol=0,
                                             energy_tol=1e20,
                                             projected=True,
                                             debug=True,
                                             **{
                                                 "dr": 0.1,
                                                 "gaussian_width": 0.1
                                             })
        filtered_cursor = [cursor[ind] for ind in uniq_inds]
        self.assertEqual(len(filtered_cursor), len(cursor))

示例#5

0

显示文件

文件： test_similarity.py 项目： ml-evs/matador

 def test_uniq_filter_with_hierarchy_2(self):
     cursor, f_ = res2dict(REAL_PATH + "data/hull-LLZO/*LLZO*.res")
     cursor = sorted(cursor, key=lambda x: x["enthalpy_per_atom"])[0:10]
     uniq_inds, _, _, _ = get_uniq_cursor(cursor,
                                          sim_tol=0.1,
                                          energy_tol=1e10,
                                          projected=True,
                                          **{
                                              "dr": 0.01,
                                              "gaussian_width": 0.1
                                          })
     filtered_cursor = [cursor[ind] for ind in uniq_inds]
     self.assertEqual(len(uniq_inds), 1)
     self.assertEqual(len(filtered_cursor), 1)
     self.assertTrue(
         "cubic-LLZO-CollCode999999" in filtered_cursor[0]["source"][0])

示例#6

0

显示文件

文件： test_similarity.py 项目： ml-evs/matador

    def test_volume_rescale(self):
        import numpy as np

        test_doc, success = res2dict(REAL_PATH + "data/KP_primitive.res",
                                     db=False)
        self.assertTrue(success)
        test_docs = []
        rescale = np.linspace(0.1, 10, 8)
        lattice = np.asarray(test_doc["lattice_abc"])
        for val in rescale:
            test_docs.append(test_doc)
            test_docs[-1]["lattice_abc"] = lattice
            test_docs[-1]["lattice_abc"][0] *= val
            test_docs[-1]["lattice_abc"] = test_docs[-1]["lattice_abc"].tolist(
            )
        uniq_inds, _, _, _ = get_uniq_cursor(test_docs)
        self.assertEqual(uniq_inds, [0])

示例#7

0

显示文件

文件： test_similarity.py 项目： ml-evs/matador

    def test_with_crystals(self):
        from matador.crystal import Crystal
        import glob

        files = glob.glob(REAL_PATH + "data/uniqueness_hierarchy/*.res")
        cursor = [Crystal(res2dict(f)[0]) for f in files]
        uniq_inds, _, _, _ = get_uniq_cursor(cursor,
                                             sim_tol=0,
                                             energy_tol=1e20,
                                             projected=True,
                                             debug=True,
                                             **{
                                                 "dr": 0.1,
                                                 "gaussian_width": 0.1
                                             })
        filtered_cursor = [cursor[ind] for ind in uniq_inds]
        self.assertEqual(len(filtered_cursor), len(cursor))

示例#8

0

显示文件

def filter_unique_structures(cursor, quiet=False, **kwargs):
    """ Wrapper for `matador.fingerprints.similarity.get_uniq_cursor` that
    displays the results and returns the filtered cursor.

    """
    from matador.fingerprints.similarity import get_uniq_cursor
    uniq_inds, dupe_dict, _, _ = get_uniq_cursor(cursor, **kwargs)
    filtered_cursor = [cursor[ind] for ind in uniq_inds]

    if not quiet:
        display_cursor = []
        additions = []
        deletions = []
        for key in dupe_dict:
            additions.append(len(display_cursor))
            display_cursor.append(cursor[key])
            if dupe_dict[key]:
                for _, jnd in enumerate(dupe_dict[key]):
                    deletions.append(len(display_cursor))
                    display_cursor.append(cursor[jnd])

        if not display_cursor:
            display_cursor = filtered_cursor

        display_results(
            display_cursor,
            additions=additions,
            deletions=deletions,
            sort=True,
            use_source=True,
            **kwargs
        )

    print('Filtered {} down to {}'.format(len(cursor), len(uniq_inds)))

    return filtered_cursor

示例#9

0

显示文件

文件： KSnP.py 项目： ml-evs/ilustrado

nprocs = int(argv[1])

cursor = [res2dict(res)[0] for res in glob('seed/*.res')]
hull = QueryConvexHull(cursor=cursor,
                       no_plot=True,
                       kpoint_tolerance=0.03,
                       summary=True,
                       hull_cutoff=7.5e-2)
print('Filtering down to only ternary phases... {}'.format(
    len(hull.hull_cursor)))
hull.hull_cursor = [
    doc for doc in hull.hull_cursor if len(doc['stoichiometry']) == 3
]
print('Filtering unique structures... {}'.format(len(hull.hull_cursor)))
uniq_list, _, _, _ = list(get_uniq_cursor(hull.hull_cursor[1:-1], debug=False))
cursor = [hull.hull_cursor[1:-1][ind] for ind in uniq_list]
print('Final cursor length... {}'.format(len(cursor)))
print('over {} stoichiometries...'.format(
    len(set([get_formula_from_stoich(doc['stoichiometry'])
             for doc in cursor]))))
print([doc['stoichiometry'] for doc in cursor])

ArtificialSelector(gene_pool=cursor,
                   seed='KPSn',
                   hull=hull,
                   debug=False,
                   fitness_metric='hull',
                   nodes=['node1', 'node2', 'node15'],
                   ncores=[16, 16, 20],
                   check_dupes=1,

示例#10

0

显示文件

def main():
    """ Run GA. """
    from glob import glob
    from sys import argv
    from matador.hull import QueryConvexHull
    from matador.fingerprints.similarity import get_uniq_cursor
    from matador.utils.chem_utils import get_formula_from_stoich
    from matador.scrapers.castep_scrapers import res2dict
    from ilustrado.ilustrado import ArtificialSelector

    nprocs = int(argv[1])  # specify nprocs at the command-line

    cursor = [res2dict(res)[0] for res in glob('seed/*.res')]
    hull = QueryConvexHull(cursor=cursor,
                           no_plot=True,
                           kpoint_tolerance=0.03,
                           summary=True,
                           hull_cutoff=1e-1)
    print('Filtering down to only ternary phases... {}'.format(
        len(hull.hull_cursor)))
    hull.hull_cursor = [
        doc for doc in hull.hull_cursor if len(doc['stoichiometry']) == 3
    ]
    print('Filtering unique structures... {}'.format(len(hull.hull_cursor)))
    uniq_list, _, _, _ = list(
        get_uniq_cursor(hull.hull_cursor[1:-1], debug=False))
    cursor = [hull.hull_cursor[1:-1][ind] for ind in uniq_list]
    print('Final cursor length... {}'.format(len(cursor)))
    print('over {} stoichiometries...'.format(
        len(
            set([
                get_formula_from_stoich(doc['stoichiometry']) for doc in cursor
            ]))))
    print([doc['stoichiometry'] for doc in cursor])

    def filter_fn(doc):
        """ Filter out any non-ternary phases. """
        return True if len(doc['stoichiometry']) == 3 else False

    relaxer_params = {'bnl': True}  # required to use srun instead of mpirun
    ArtificialSelector(
        gene_pool=cursor,
        seed='KPSn',
        hull=hull,
        debug=False,
        fitness_metric='hull',
        # number of cores per individual calculation
        # to use less than one node, decrease this to e.g. 10
        # then increase nprocs to 2*nnodes
        ncores=20,
        check_dupes=1,
        # number of total procs, taken as command-line argument to script
        nprocs=nprocs,
        executable='castep',
        relaxer_params=relaxer_params,
        structure_filter=filter_fn,
        best_from_stoich=True,
        max_num_mutations=3,
        max_num_atoms=50,
        mutation_rate=0.4,
        crossover_rate=0.6,
        num_generations=20,
        population=30,
        num_survivors=20,
        elitism=0.5,
        loglevel='debug')