Exemplo n.º 1
0
def get_structures_from_protosearch(formulas,
                                    source='icsd',
                                    db_interface=None):
    """
    Calls protosearch to get the hypothetical structures.

    Args:
        formulas ([str]): list of chemical formulas from which
            to generate candidate structures
        source (str): project name in OQMD to be used as source.
            Defaults to ICSD.
        db_interface (DbInterface): interface to OQMD database
            by default uses the one pulled from data.matr.io

    Returns:
        (pandas.DataFrame) hypothetical pymatgen structures
            generated and their unique ids from protosearch

    TODO:
        - For efficiency, n_max_atoms can be handled within OqmdInterface
    """

    if db_interface is None:
        cache_matrio_data("oqmd_ver3.db")
        oqmd_db_path = os.path.join(CAMD_CACHE, "oqmd_ver3.db")
        db_interface = OqmdInterface(oqmd_db_path)
    dataframes = [
        db_interface.create_proto_data_set(source=source,
                                           chemical_formula=formula)
        for formula in formulas
    ]
    _structures = pd.concat(dataframes)

    # Drop bad structures
    _structures.dropna(axis=0, how='any', inplace=True)

    # conversion to pymatgen structures
    ase_adap = AseAtomsAdaptor()
    pmg_structures = [
        ase_adap.get_structure(_structures.iloc[i]['atoms'])
        for i in range(len(_structures))
    ]
    _structures['pmg_structures'] = pmg_structures

    # The uuid below is probably an overkill. But want the strings
    # to be unique. Sometimes in spaces with similar stoichiometries
    # they may clash e.g. IrSb2O2 and Ir2SbO2 may end up producing
    # the same string, despite different substitutions on same structure.
    # We should figure out a way to get the right order from protosearch.
    structure_uids = [
        _structures.iloc[i]['proto_name'].replace('_', '-') + '-' +
        '-'.join(pmg_structures[i].symbol_set) + '-' +
        str(uuid.uuid4()).replace('-', '')[:6] for i in range(len(_structures))
    ]
    _structures.index = structure_uids
    return _structures
Exemplo n.º 2
0
    def test_create_proto_dataset(self):
        O = OqmdInterface()
        atoms_list = O.create_proto_data_set(chemical_formula='FeO6',
                                             max_atoms=7)

        assert len(atoms_list) == 5

        for atoms in atoms_list["atoms"][:5]:
            assert atoms.get_number_of_atoms() == 7
            assert atoms.get_chemical_symbols().count('Fe') == 1
            assert atoms.get_chemical_symbols().count('O') == 6
Exemplo n.º 3
0
def get_structures_from_protosearch(formulas,
                                    source="icsd",
                                    db_interface=None):
    """
    Calls protosearch to get the hypothetical structures.

    Args:
        formulas ([str]): list of chemical formulas from which
            to generate candidate structures
        source (str): project name in OQMD to be used as source.
            Defaults to ICSD.
        db_interface (DbInterface): interface to OQMD database
            by default uses the one pulled from data.matr.io

    Returns:
        (pandas.DataFrame) hypothetical pymatgen structures
            generated and their unique ids from protosearch
    """

    if db_interface is None:
        db_interface = OqmdInterface(source)
    dataframes = [
        db_interface.create_proto_data_set(chemical_formula=formula)
        for formula in formulas
    ]
    _structures = pd.concat(dataframes)

    # Drop bad structures
    _structures.dropna(axis=0, how="any", inplace=True)

    # conversion to pymatgen structures
    ase_adap = AseAtomsAdaptor()
    pmg_structures = [
        ase_adap.get_structure(_structures.iloc[i]["atoms"])
        for i in range(len(_structures))
    ]
    _structures["structure"] = pmg_structures
    # This is for compatibility with Mc1, which doesn't allow
    # underscores
    structure_uids = [
        _structures.iloc[i]["structure_name"].replace('_', '-')
        for i in range(len(_structures))
    ]
    _structures.index = structure_uids
    return _structures