Пример #1
0
def get_fps(structure, cutoff=10.0, processes=8):
    all_descrs = []

    try:
        coordination_number_ = CoordinationNumber.from_preset('VoronoiNN')
        voronoi_fps_ = VoronoiFingerprintModified(
            cutoff=cutoff).featurize_structure(structure)
        crystal_nn_fingerprint_ = CrystalNNFingerprint.from_preset('cn')
        op_site_fingerprint_ = OPSiteFingerprint()
        agni_fingerprints_ = AGNIFingerprints()
        gaussian_symm_func_fps_ = GaussianSymmFuncModified(
        ).featurize_structure(structure)
        pymatgen_data_ = PymatgenData()
        magpie_data_ = MagpieData()

        data_list = [[
            structure, i, site, coordination_number_, voronoi_fps_,
            crystal_nn_fingerprint_, op_site_fingerprint_, agni_fingerprints_,
            gaussian_symm_func_fps_, pymatgen_data_, magpie_data_
        ] for i, site in enumerate(structure)]

        pool = multiprocessing.Pool(processes=processes)
        all_descrs = np.array(pool.map(get_all_site_descrs, data_list))

    except (AttributeError, IndexError) as error:
        pass

    return all_descrs
Пример #2
0
def matminer_wrapper(structure, preset, crystal_site_args, site_stats_args):
    csf = CrystalNNFingerprint.from_preset(preset, **crystal_site_args)
    ssf = SiteStatsFingerprint(csf, **site_stats_args)
    try:
        return ssf.featurize(structure)
    except Exception as e:
        print('Exception caught!')
        logger.error(e)
 def featurizer(self):
     """Return the featurizer (with the suitable cutoff)"""
     cutoff = self.cutoff
     return MultipleFeaturizer(
         [
             CrystalNNFingerprint.from_preset("ops", search_cutoff=cutoff),
             LocalPropertyStatsNew.from_preset("interpretable", cutoff=cutoff),
             GaussianSymmFunc(),
         ]
     )
Пример #4
0
def get_structure_fingerprint(
    structure: IStructure,
    preset: str = "CrystalNNFingerprint_ops",
    stats: Optional[Tuple[str]] = ("mean", "std_dev"),
    prototype_match: bool = False,
) -> np.ndarray:
    """Gets the fingerprint for a structure.

    Args:
        structure: A structure.
        preset: The preset to use when calculating the fingerprint. See
            :class:`matminer.featurizers.structure.SiteStatsFingerprint``
            for more details.
        stats: The stats to include in fingerprint. See
            :class:`matminer.featurizers.structure.SiteStatsFingerprint``
            for more details.
        prototype_match: Whether to use distance cutoffs and electron negativity
            differences when calculating the structure fingerprint.

    Returns:
        The structure fingerprint as a :class:`numpy.ndarray`.
    """
    # TODO: Add distance_cutoff option to matminer so we can user preset arg
    # currently don't use SiteStatsFingerprint.from_preset as we need to pass in
    # distance_cutoffs param
    if prototype_match:
        ssf = SiteStatsFingerprint(
            CrystalNNFingerprint.from_preset("ops",
                                             cation_anion=False,
                                             distance_cutoffs=None,
                                             x_diff_weight=None),
            stats=stats,
        )
    else:
        ssf = SiteStatsFingerprint(CrystalNNFingerprint.from_preset(
            "ops", cation_anion=False),
                                   stats=stats)
    return np.array(ssf.featurize(structure))
Пример #5
0
    def __init__(self, materials, descriptors, **kwargs):
        """
        Calculates site-based descriptors (e.g., coordination numbers
        with different near-neighbor finding approaches) for materials and
        runs statistics analysis on selected descriptor types
        (order parameter-based site fingerprints).  The latter is
        useful as a definition of a structure fingerprint
        on the basis of local coordination information.
        Furthermore, composition descriptors are calculated
        (Magpie element property vector).

        Args:
            materials (Store): Store of materials documents.
            descriptors (Store): Store of composition, site, and
                                 structure descriptor data such
                                 as tetrahedral order parameter or
                                 fraction of being 8-fold coordinated.
            mat_query (dict): dictionary to limit materials to be analyzed.
        """

        self.materials = materials
        self.descriptors = descriptors

        # Set up all targeted site descriptors.
        self.sds = {}
        for nn in nn_target_classes:
            nn_ = getattr(local_env, nn)
            k = "cn_{}".format(nn)
            self.sds[k] = CoordinationNumber(nn_(), use_weights="none")
            k = "cn_wt_{}".format(nn)
            self.sds[k] = CoordinationNumber(nn_(), use_weights="sum")
        self.all_output_pieces = {"site_descriptors": [k for k in self.sds.keys()]}
        self.sds["csf"] = CrystalNNFingerprint.from_preset("ops",
                                                           distance_cutoffs=None,
                                                           x_diff_weight=None)
        self.all_output_pieces["statistics"] = ["csf"]

        # Set up all targeted composition descriptors.
        self.cds = {}
        self.cds["magpie"] = ElementProperty.from_preset("magpie")
        self.all_output_pieces["composition_descriptors"] = ["magpie"]

        self.all_output_pieces["meta"] = ["atomate"]

        super().__init__(source=materials,
                         target=descriptors,
                         ufn=self.calc,
                         projection=["structure"],
                         **kwargs)
Пример #6
0
    def __init__(self, materials, descriptors, mat_query=None, **kwargs):
        """
        Calculates site-based descriptors (e.g., coordination numbers
        with different near-neighbor finding approaches) for materials and
        runs statistics analysis on selected descriptor types
        (order parameter-based site fingerprints).  The latter is
        useful as a definition of a structure fingerprint
        on the basis of local coordination information.
        Furthermore, composition descriptors are calculated
        (Magpie element property vector).

        Args:
            materials (Store): Store of materials documents.
            descriptors (Store): Store of composition, site, and
                                 structure descriptor data such
                                 as tetrahedral order parameter or
                                 fraction of being 8-fold coordinated.
            mat_query (dict): dictionary to limit materials to be analyzed.
        """

        self.materials = materials
        self.descriptors = descriptors
        self.mat_query = mat_query if mat_query else {}

        # Set up all targeted site descriptors.
        self.sds = {}
        for nn in nn_target_classes:
            nn_ = getattr(pymatgen.analysis.local_env, nn)
            k = 'cn_{}'.format(nn)
            self.sds[k] = CoordinationNumber(nn_(), use_weights='none')
            k = 'cn_wt_{}'.format(nn)
            self.sds[k] = CoordinationNumber(nn_(), use_weights='sum')
        self.all_output_pieces = {
            'site_descriptors': [k for k in self.sds.keys()]
        }
        self.sds['csf'] = CrystalNNFingerprint.from_preset(
            'ops', distance_cutoffs=None, x_diff_weight=None)
        self.all_output_pieces['statistics'] = ['csf']

        # Set up all targeted composition descriptors.
        self.cds = {}
        self.cds["magpie"] = ElementProperty.from_preset('magpie')
        self.all_output_pieces['composition_descriptors'] = ['magpie']

        self.all_output_pieces['meta'] = ['atomate']

        super().__init__(sources=[materials], targets=[descriptors], **kwargs)
Пример #7
0
    def test_op_site_fingerprint(self):
        opsf = OPSiteFingerprint()
        l = opsf.feature_labels()
        t = ['sgl_bd CN_1', 'L-shaped CN_2', 'water-like CN_2', \
             'bent 120 degrees CN_2', 'bent 150 degrees CN_2', \
             'linear CN_2', 'trigonal planar CN_3', \
             'trigonal non-coplanar CN_3', 'T-shaped CN_3', \
             'square co-planar CN_4', 'tetrahedral CN_4', \
             'rectangular see-saw-like CN_4', 'see-saw-like CN_4', \
             'trigonal pyramidal CN_4', 'pentagonal planar CN_5', \
             'square pyramidal CN_5', 'trigonal bipyramidal CN_5', \
             'hexagonal planar CN_6', 'octahedral CN_6', \
             'pentagonal pyramidal CN_6', 'hexagonal pyramidal CN_7', \
             'pentagonal bipyramidal CN_7', 'body-centered cubic CN_8', \
             'hexagonal bipyramidal CN_8', 'q2 CN_9', 'q4 CN_9', 'q6 CN_9', \
             'q2 CN_10', 'q4 CN_10', 'q6 CN_10', \
             'q2 CN_11', 'q4 CN_11', 'q6 CN_11', \
             'cuboctahedral CN_12', 'q2 CN_12', 'q4 CN_12', 'q6 CN_12']
        for i in range(len(l)):
            self.assertEqual(l[i], t[i])
        ops = opsf.featurize(self.sc, 0)
        self.assertEqual(len(ops), 37)
        self.assertAlmostEqual(
            ops[opsf.feature_labels().index('octahedral CN_6')],
            0.9995,
            places=7)
        ops = opsf.featurize(self.cscl, 0)
        self.assertAlmostEqual(
            ops[opsf.feature_labels().index('body-centered cubic CN_8')],
            0.8955,
            places=7)
        opsf = OPSiteFingerprint(dist_exp=0)
        ops = opsf.featurize(self.cscl, 0)
        self.assertAlmostEqual(
            ops[opsf.feature_labels().index('body-centered cubic CN_8')],
            0.9555,
            places=7)

        # The following test aims at ensuring the copying of the OP dictionaries work.
        opsfp = OPSiteFingerprint()
        cnnfp = CrystalNNFingerprint.from_preset('ops')
        self.assertEqual(
            len([1 for l in opsfp.feature_labels() if l.split()[0] == 'wt']),
            0)
Пример #8
0
 def test_crystal_nn_fingerprint(self):
     cnnfp = CrystalNNFingerprint.from_preset('ops',
                                              distance_cutoffs=None,
                                              x_diff_weight=None)
     l = cnnfp.feature_labels()
     t = [
         'wt CN_1', 'sgl_bd CN_1', 'wt CN_2', 'L-shaped CN_2',
         'water-like CN_2', 'bent 120 degrees CN_2',
         'bent 150 degrees CN_2', 'linear CN_2', 'wt CN_3',
         'trigonal planar CN_3', 'trigonal non-coplanar CN_3',
         'T-shaped CN_3', 'wt CN_4', 'square co-planar CN_4',
         'tetrahedral CN_4', 'rectangular see-saw-like CN_4',
         'see-saw-like CN_4', 'trigonal pyramidal CN_4', 'wt CN_5',
         'pentagonal planar CN_5', 'square pyramidal CN_5',
         'trigonal bipyramidal CN_5', 'wt CN_6', 'hexagonal planar CN_6',
         'octahedral CN_6', 'pentagonal pyramidal CN_6', 'wt CN_7',
         'hexagonal pyramidal CN_7', 'pentagonal bipyramidal CN_7',
         'wt CN_8', 'body-centered cubic CN_8',
         'hexagonal bipyramidal CN_8', 'wt CN_9', 'q2 CN_9', 'q4 CN_9',
         'q6 CN_9', 'wt CN_10', 'q2 CN_10', 'q4 CN_10', 'q6 CN_10',
         'wt CN_11', 'q2 CN_11', 'q4 CN_11', 'q6 CN_11', 'wt CN_12',
         'cuboctahedral CN_12', 'q2 CN_12', 'q4 CN_12', 'q6 CN_12',
         'wt CN_13', 'wt CN_14', 'wt CN_15', 'wt CN_16', 'wt CN_17',
         'wt CN_18', 'wt CN_19', 'wt CN_20', 'wt CN_21', 'wt CN_22',
         'wt CN_23', 'wt CN_24'
     ]
     for i in range(len(l)):
         self.assertEqual(l[i], t[i])
     ops = cnnfp.featurize(self.sc, 0)
     self.assertEqual(len(ops), 61)
     self.assertAlmostEqual(ops[cnnfp.feature_labels().index('wt CN_6')],
                            1,
                            places=7)
     self.assertAlmostEqual(
         ops[cnnfp.feature_labels().index('octahedral CN_6')], 1, places=7)
     ops = cnnfp.featurize(self.cscl, 0)
     self.assertAlmostEqual(ops[cnnfp.feature_labels().index('wt CN_8')],
                            0.5,
                            places=1)
     self.assertAlmostEqual(
         ops[cnnfp.feature_labels().index('body-centered cubic CN_8')],
         0.5,
         places=1)
Пример #9
0
    def __init__(self, structure: Structure, outpath: Union[str, Path]):
        """Generates features for a structures

        Args:
            structure (Structure): Pymatgen Structure object
            outpath (Union[str, Path]): path to which the features will be dumped
        Returns:

        """
        featurizelogger = logging.getLogger("Featurize")
        featurizelogger.setLevel(logging.INFO)
        logging.basicConfig(
            format="%(filename)s: %(message)s",
            level=logging.INFO,
        )

        self.outpath = outpath
        if ((outpath != "") and (outpath is not None)
                and (not os.path.exists(self.outpath))):
            os.mkdir(self.outpath)
        self.logger = featurizelogger
        self.path = None
        self.structure = structure
        self.metal_sites = []
        self.metal_indices = []
        self.features = []
        if self.path is not None:
            self.outname = os.path.join(
                self.outpath, "".join([Path(self.path).stem, ".pkl"]))
        else:
            self.outname = os.path.join(
                self.outpath,
                "".join([self.structure.formula.replace(" ", "_"), ".pkl"]),
            )
        self.featurizer = MultipleFeaturizer([
            CrystalNNFingerprint.from_preset("ops"),
            LocalPropertyStatsNew.from_preset("interpretable"),
            GaussianSymmFunc(),
        ])
Пример #10
0
    def __init__(self, structure, outpath):
        """Generates features for a list of structures

        Args:
            structure
            outpath (str): path to which the features will be dumped
        Returns:

        """
        featurizelogger = logging.getLogger('Featurize')
        featurizelogger.setLevel(logging.INFO)
        logging.basicConfig(
            format='%(filename)s: %(message)s',
            level=logging.INFO,
        )

        self.outpath = outpath
        if outpath != '' and not os.path.exists(self.outpath):
            os.mkdir(self.outpath)
        self.logger = featurizelogger
        self.path = None
        self.structure = structure
        self.metal_sites = []
        self.metal_indices = []
        self.features = []
        if self.path is not None:
            self.outname = os.path.join(
                self.outpath, ''.join([Path(self.path).stem, '.pkl']))
        else:
            self.outname = os.path.join(
                self.outpath,
                ''.join([self.structure.formula.replace(' ', '_'), '.pkl']),
            )
        self.featurizer = MultipleFeaturizer([
            CrystalNNFingerprint.from_preset('ops'),
            LocalPropertyStatsNew.from_preset('interpretable'),
            GaussianSymmFunc(),
        ])
Пример #11
0
    def test_op_site_fingerprint(self):
        opsf = OPSiteFingerprint()
        l = opsf.feature_labels()
        t = ['sgl_bd CN_1', 'L-shaped CN_2', 'water-like CN_2', \
             'bent 120 degrees CN_2', 'bent 150 degrees CN_2', \
             'linear CN_2', 'trigonal planar CN_3', \
             'trigonal non-coplanar CN_3', 'T-shaped CN_3', \
             'square co-planar CN_4', 'tetrahedral CN_4', \
             'rectangular see-saw-like CN_4', 'see-saw-like CN_4', \
             'trigonal pyramidal CN_4', 'pentagonal planar CN_5', \
             'square pyramidal CN_5', 'trigonal bipyramidal CN_5', \
             'hexagonal planar CN_6', 'octahedral CN_6', \
             'pentagonal pyramidal CN_6', 'hexagonal pyramidal CN_7', \
             'pentagonal bipyramidal CN_7', 'body-centered cubic CN_8', \
             'hexagonal bipyramidal CN_8', 'q2 CN_9', 'q4 CN_9', 'q6 CN_9', \
             'q2 CN_10', 'q4 CN_10', 'q6 CN_10', \
             'q2 CN_11', 'q4 CN_11', 'q6 CN_11', \
             'cuboctahedral CN_12', 'q2 CN_12', 'q4 CN_12', 'q6 CN_12']
        for i in range(len(l)):
            self.assertEqual(l[i], t[i])
        ops = opsf.featurize(self.sc, 0)
        self.assertEqual(len(ops), 37)
        self.assertAlmostEqual(ops[opsf.feature_labels().index(
            'octahedral CN_6')], 0.9995, places=7)
        ops = opsf.featurize(self.cscl, 0)
        self.assertAlmostEqual(ops[opsf.feature_labels().index(
            'body-centered cubic CN_8')], 0.8955, places=7)
        opsf = OPSiteFingerprint(dist_exp=0)
        ops = opsf.featurize(self.cscl, 0)
        self.assertAlmostEqual(ops[opsf.feature_labels().index(
            'body-centered cubic CN_8')], 0.9555, places=7)

        # The following test aims at ensuring the copying of the OP dictionaries work.
        opsfp = OPSiteFingerprint()
        cnnfp = CrystalNNFingerprint.from_preset('ops')
        self.assertEqual(len([1 for l in opsfp.feature_labels() if l.split()[0] == 'wt']), 0)
from pymatgen.core.structure import Structure

from matminer.data_retrieval.retrieve_MP import MPDataRetrieval
from matminer.featurizers.site import CrystalNNFingerprint
from matminer.featurizers.structure import SiteStatsFingerprint
'''
this module implements a pipeline for generating a mongo database of structures
and their feature vectors from the materials project database. pipeline
operations are implemented as instance methods
'''

# transformer for converting structures into feature vectors
FRAMEWORK_FEATURIZER = SiteStatsFingerprint(
    site_featurizer=CrystalNNFingerprint.from_preset(preset="ops",
                                                     distance_cutoffs=None,
                                                     x_diff_weight=0.0,
                                                     porous_adjustment=False),
    stats=['mean', 'std_dev', 'maximum', 'minimum'])


class GenerateStructureCollection(Pipe):
    '''
    structures are collected from the materials project and deposited in a
    mongodb collection under the document field "structure". these structures
    are featurized using transformers from the matminer library. the feature
    vectors are stored as dictionaries in the "structure_features" field of the
    collection. the data is indexed by the document field "material_id"

    Notes: document schema for the graph collection
        material_id (str) the unique identifier for a material
        structure (dict) representation of a pymatgen Structure
Пример #13
0
from pymatgen import Structure
import joblib
import matplotlib.pyplot as plt
import numpy as np

plt.rcParams["figure.figsize"] = [6, 7]
font = {'family': 'Avenir', 'weight': 'normal', 'size': 26}
math_font = 'stixsans'
plt.rc('font', **font)
plt.rcParams['mathtext.fontset'] = math_font
plt.rcParams['axes.labelsize'] = font['size']
plt.rcParams['xtick.labelsize'] = font['size'] - 2
plt.rcParams['ytick.labelsize'] = font['size'] - 2
plt.rcParams['legend.fontsize'] = font['size'] - 2

ssf = SiteStatsFingerprint(CrystalNNFingerprint.from_preset(
    'ops', distance_cutoffs=None, x_diff_weight=0),
                           stats=('mean', 'std_dev', 'minimum', 'maximum'))
v_new = []
name = np.arange(29)
for folder in ('../FTCP-designed compounds/Case 1/Ef_03/',
               '../FTCP-designed compounds/Case 1/Ef_05/',
               '../FTCP-designed compounds/Case 1/Ef_06/',
               '../FTCP-designed compounds/Case 1/Ef_07/'):
    for j in tqdm(name):
        try:
            new_crystal = Structure.from_file(f"{folder}{j}_fin.cif")
            v_new.append(np.array(ssf.featurize(new_crystal)))
        except FileNotFoundError:
            try:
                new_crystal = Structure.from_file(f"{folder}gen{j}_fin.cif")
                v_new.append(np.array(ssf.featurize(new_crystal)))
Пример #14
0
class FUTURE_PROSPECTS_2021(featurizer.extendedMODFeaturizer):

    from matminer.featurizers.composition import (
        AtomicOrbitals,
        AtomicPackingEfficiency,
        BandCenter,
        CohesiveEnergy,
        ElectronAffinity,
        ElectronegativityDiff,
        ElementFraction,
        ElementProperty,
        IonProperty,
        Miedema,
        OxidationStates,
        Stoichiometry,
        TMetalFraction,
        ValenceOrbital,
        YangSolidSolution,
    )
    from matminer.featurizers.structure import (
        BagofBonds,
        BondFractions,
        ChemicalOrdering,
        CoulombMatrix,
        DensityFeatures,
        EwaldEnergy,
        GlobalSymmetryFeatures,
        MaximumPackingEfficiency,
        PartialRadialDistributionFunction,
        RadialDistributionFunction,
        SineCoulombMatrix,
        StructuralHeterogeneity,
        XRDPowderPattern,
    )

    from matminer.featurizers.site import (
        AGNIFingerprints,
        AverageBondAngle,
        AverageBondLength,
        BondOrientationalParameter,
        ChemEnvSiteFingerprint,
        CoordinationNumber,
        CrystalNNFingerprint,
        GaussianSymmFunc,
        GeneralizedRadialDistributionFunction,
        LocalPropertyDifference,
        OPSiteFingerprint,
        VoronoiFingerprint,
    )
    from matminer.featurizers.dos import (
        DOSFeaturizer,
        SiteDOS,
        Hybridization,
        DosAsymmetry,
    )
    from matminer.featurizers.bandstructure import (
        BandFeaturizer,
        BranchPointEnergy
    )

    composition_featurizers = (
        AtomicOrbitals(),
        AtomicPackingEfficiency(),
        BandCenter(),
        ElementFraction(),
        ElementProperty.from_preset("magpie"),
        IonProperty(),
        Miedema(),
        Stoichiometry(),
        TMetalFraction(),
        ValenceOrbital(),
        YangSolidSolution(),
    )

    oxid_composition_featurizers = (
        ElectronegativityDiff(),
        OxidationStates(),
    )

    structure_featurizers = (
        DensityFeatures(),
        GlobalSymmetryFeatures(),
        RadialDistributionFunction(),
        CoulombMatrix(),
        #PartialRadialDistributionFunction(), #Introduces a large amount of features
        SineCoulombMatrix(),
        EwaldEnergy(),
        BondFractions(),
        StructuralHeterogeneity(),
        MaximumPackingEfficiency(),
        ChemicalOrdering(),
        XRDPowderPattern(),
    )
    site_featurizers = (
        AGNIFingerprints(),
        AverageBondAngle(VoronoiNN()),
        AverageBondLength(VoronoiNN()),
        BondOrientationalParameter(),
        ChemEnvSiteFingerprint.from_preset("simple"),
        CoordinationNumber(),
        CrystalNNFingerprint.from_preset("ops"),
        GaussianSymmFunc(),
        GeneralizedRadialDistributionFunction.from_preset("gaussian"),
        LocalPropertyDifference(),
        OPSiteFingerprint(),
        VoronoiFingerprint(),
    )

    dos_featurizers = (
        DOSFeaturizer(),
        SiteDOS(),
        Hybridization()
    )

    band_featurizers = (
        BandFeaturizer(),
        BranchPointEnergy()
    )
    def __init__(self, n_jobs=None):
            self._n_jobs = n_jobs

    def featurize_composition(self, df):
        """Applies the preset composition featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """
        df = super().featurize_composition(df)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}
        df["AtomicOrbitals|HOMO_character"] = df["AtomicOrbitals|HOMO_character"].map(
            _orbitals
        )
        df["AtomicOrbitals|LUMO_character"] = df["AtomicOrbitals|LUMO_character"].map(
            _orbitals
        )

        df["AtomicOrbitals|HOMO_element"] = df["AtomicOrbitals|HOMO_element"].apply(
            lambda x: -1 if not isinstance(x, str) else Element(x).Z
        )
        df["AtomicOrbitals|LUMO_element"] = df["AtomicOrbitals|LUMO_element"].apply(
            lambda x: -1 if not isinstance(x, str) else Element(x).Z
        )

        return clean_df(df)

    def featurize_structure(self, df):
        """Applies the preset structural featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """
        df = super().featurize_structure(df)

        dist = df["RadialDistributionFunction|radial distribution function"].iloc[0][
            "distances"
        ][:50]
        for i, d in enumerate(dist):
            _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format(
                d
            )
            df[_rdf_key] = df[
                "RadialDistributionFunction|radial distribution function"
            ].apply(lambda x: x["distribution"][i])

        df = df.drop("RadialDistributionFunction|radial distribution function", axis=1)

        _crystal_system = {
            "cubic": 1,
            "tetragonal": 2,
            "orthorombic": 3,
            "hexagonal": 4,
            "trigonal": 5,
            "monoclinic": 6,
            "triclinic": 7,
        }

        def _int_map(x):
            if x == np.nan:
                return 0
            elif x:
                return 1
            else:
                return 0

        df["GlobalSymmetryFeatures|crystal_system"] = df[
            "GlobalSymmetryFeatures|crystal_system"
        ].map(_crystal_system)
        df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[
            "GlobalSymmetryFeatures|is_centrosymmetric"
        ].map(_int_map)

        return clean_df(df)

    def featurize_dos(self, df):
        """Applies the presetdos featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        df = super().featurize_dos(df)


        hotencodeColumns = ["DOSFeaturizer|vbm_specie_1","DOSFeaturizer|cbm_specie_1"]

        one_hot = pd.get_dummies(df[hotencodeColumns])
        df = df.drop(hotencodeColumns, axis = 1).join(one_hot)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}

        df["DOSFeaturizer|vbm_character_1"] = df[
           "DOSFeaturizer|vbm_character_1"
           ].map(_orbitals)
        df["DOSFeaturizer|cbm_character_1"] = df[
           "DOSFeaturizer|cbm_character_1"
           ].map(_orbitals)

        # Splitting one feature into several floating features
        # e.g. number;number;number into three columns
        splitColumns = ["DOSFeaturizer|cbm_location_1", "DOSFeaturizer|vbm_location_1"]

        for column in splitColumns:
            try:
                newColumns = df[column].str.split(";", n = 2, expand = True)
                for i in range(0,3):
                    df[column + "_" + str(i)] = np.array(newColumns[i]).astype(np.float)
            except:
                continue
        df = df.drop(splitColumns, axis=1)
        df = df.drop(["dos"], axis=1)
        return clean_df(df)

    def featurize_bandstructure(self, df):
        """Applies the preset band structure featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        df = super().featurize_bandstructure(df)

        def _int_map(x):
            if str(x) == "False":
                return 0
            elif str(x) == "True":
                return 1

        df["BandFeaturizer|is_gap_direct"] = df[
            "BandFeaturizer|is_gap_direct"
        ].map(_int_map)


        df = df.drop(["bandstructure"], axis=1)

        return clean_df(df)


    def featurize_site(self, df):
        """Applies the preset site featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        aliases = {
            "GeneralizedRadialDistributionFunction": "GeneralizedRDF",
            "AGNIFingerprints": "AGNIFingerPrint",
            "BondOrientationalParameter": "BondOrientationParameter",
            "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc",
        }
        df = super().featurize_site(df, aliases=aliases)
        df = df.loc[:, (df != 0).any(axis=0)]

        return clean_df(df)
Пример #15
0
    def test_crystal_nn_fingerprint(self):
        cnnfp = CrystalNNFingerprint.from_preset(
                'ops', distance_cutoffs=None, x_diff_weight=None)
        l = cnnfp.feature_labels()
        t = ['wt CN_1', 'sgl_bd CN_1', 'wt CN_2', 'L-shaped CN_2',
             'water-like CN_2', 'bent 120 degrees CN_2',
             'bent 150 degrees CN_2', 'linear CN_2', 'wt CN_3',
             'trigonal planar CN_3', 'trigonal non-coplanar CN_3',
             'T-shaped CN_3', 'wt CN_4', 'square co-planar CN_4',
             'tetrahedral CN_4', 'rectangular see-saw-like CN_4',
             'see-saw-like CN_4', 'trigonal pyramidal CN_4', 'wt CN_5',
             'pentagonal planar CN_5', 'square pyramidal CN_5',
             'trigonal bipyramidal CN_5', 'wt CN_6', 'hexagonal planar CN_6',
             'octahedral CN_6', 'pentagonal pyramidal CN_6', 'wt CN_7',
             'hexagonal pyramidal CN_7', 'pentagonal bipyramidal CN_7',
             'wt CN_8', 'body-centered cubic CN_8',
             'hexagonal bipyramidal CN_8', 'wt CN_9', 'q2 CN_9', 'q4 CN_9',
             'q6 CN_9', 'wt CN_10', 'q2 CN_10', 'q4 CN_10', 'q6 CN_10',
             'wt CN_11', 'q2 CN_11', 'q4 CN_11', 'q6 CN_11', 'wt CN_12',
             'cuboctahedral CN_12', 'q2 CN_12', 'q4 CN_12', 'q6 CN_12',
             'wt CN_13', 'wt CN_14', 'wt CN_15', 'wt CN_16', 'wt CN_17',
             'wt CN_18', 'wt CN_19', 'wt CN_20', 'wt CN_21', 'wt CN_22',
             'wt CN_23', 'wt CN_24']
        for i in range(len(l)):
            self.assertEqual(l[i], t[i])
        ops = cnnfp.featurize(self.sc, 0)
        self.assertEqual(len(ops), 61)
        self.assertAlmostEqual(ops[cnnfp.feature_labels().index(
            'wt CN_6')], 1, places=7)
        self.assertAlmostEqual(ops[cnnfp.feature_labels().index(
            'octahedral CN_6')], 1, places=7)
        ops = cnnfp.featurize(self.cscl, 0)
        self.assertAlmostEqual(ops[cnnfp.feature_labels().index(
            'wt CN_8')], 0.498099, places=3)

        self.assertAlmostEqual(ops[cnnfp.feature_labels().index(
            'body-centered cubic CN_8')], 0.47611, places=3)

        op_types = {6: ["wt", "oct_max"], 8: ["wt", "bcc"]}
        cnnfp = CrystalNNFingerprint(
            op_types, distance_cutoffs=None, \
            x_diff_weight=None)
        labels = ['wt CN_6', 'oct_max CN_6', \
                  'wt CN_8', 'bcc CN_8']
        for l1, l2 in zip(cnnfp.feature_labels(), labels):
            self.assertEqual(l1, l2)
        feats = cnnfp.featurize(self.sc, 0)
        self.assertEqual(len(feats), 4)

        chem_info = {"mass": {"Al": 26.9, "Cs+": 132.9,"Cl-": 35.4}, \
            "Pauling scale": {"Al": 1.61, "Cs+": 0.79, "Cl-": 3.16}}
        cnnchemfp = CrystalNNFingerprint(
            op_types, chem_info=chem_info, distance_cutoffs=None, \
            x_diff_weight=None)
        labels = labels + ['mass local diff', \
            'Pauling scale local diff']
        for l1, l2 in zip(cnnchemfp.feature_labels(), labels):
            self.assertEqual(l1, l2)

        feats = cnnchemfp.featurize(self.sc, 0)
        self.assertEqual(len(feats), 6)
        self.assertAlmostEqual(feats[cnnchemfp.feature_labels().index(
            'wt CN_6')], 1, places=7)
        self.assertAlmostEqual(feats[cnnchemfp.feature_labels().index(
            'oct_max CN_6')], 1, places=7)
        self.assertAlmostEqual(feats[cnnchemfp.feature_labels().index(
            'mass local diff')], 0, places=7)
        self.assertAlmostEqual(feats[cnnchemfp.feature_labels().index(
            'Pauling scale local diff')], 0, places=7)

        feats = cnnchemfp.featurize(self.cscl, 0)
        self.assertAlmostEqual(feats[cnnchemfp.feature_labels().index(
            'bcc CN_8')], 0.4761107, places=3)
        self.assertAlmostEqual(feats[cnnchemfp.feature_labels().index(
            'mass local diff')], 97.5, places=3)
        self.assertAlmostEqual(feats[cnnchemfp.feature_labels().index(
            'Pauling scale local diff')], -2.37, places=3)
Пример #16
0
    def from_preset(preset, **kwargs):
        """
        Create a SiteStatsFingerprint class according to a preset

        Args:
            preset (str) - Name of preset
            kwargs - Options for SiteStatsFingerprint
        """

        if preset == "CrystalNNFingerprint_cn":
            return SiteStatsFingerprint(
                CrystalNNFingerprint.from_preset("cn", cation_anion=False),
                **kwargs)

        elif preset == "CrystalNNFingerprint_cn_cation_anion":
            return SiteStatsFingerprint(
                CrystalNNFingerprint.from_preset("cn", cation_anion=True),
                **kwargs)

        elif preset == "CrystalNNFingerprint_ops":
            return SiteStatsFingerprint(
                CrystalNNFingerprint.from_preset("ops", cation_anion=False),
                **kwargs)

        elif preset == "CrystalNNFingerprint_ops_cation_anion":
            return SiteStatsFingerprint(
                CrystalNNFingerprint.from_preset("ops", cation_anion=True),
                **kwargs)

        elif preset == "OPSiteFingerprint":
            return SiteStatsFingerprint(OPSiteFingerprint(), **kwargs)

        elif preset == "LocalPropertyDifference_ward-prb-2017":
            return SiteStatsFingerprint(
                LocalPropertyDifference.from_preset("ward-prb-2017"),
                stats=["minimum", "maximum", "range", "mean", "avg_dev"])

        elif preset == "CoordinationNumber_ward-prb-2017":
            return SiteStatsFingerprint(
                CoordinationNumber(nn=VoronoiNN(weight='area'),
                                   use_weights="effective"),
                stats=["minimum", "maximum", "range", "mean", "avg_dev"])

        elif preset == "Composition-dejong2016_AD":
            return SiteStatsFingerprint(
                LocalPropertyDifference(properties=[
                    "Number", "AtomicWeight", "Column", "Row",
                    "CovalentRadius", "Electronegativity"
                ],
                                        signed=False),
                stats=['holder_mean::%d' % d
                       for d in range(0, 4 + 1)] + ['std_dev'],
            )

        elif preset == "Composition-dejong2016_SD":
            return SiteStatsFingerprint(
                LocalPropertyDifference(properties=[
                    "Number", "AtomicWeight", "Column", "Row",
                    "CovalentRadius", "Electronegativity"
                ],
                                        signed=True),
                stats=['holder_mean::%d' % d for d in [1, 2, 4]] + ['std_dev'],
            )

        elif preset == "BondLength-dejong2016":
            return SiteStatsFingerprint(
                AverageBondLength(VoronoiNN()),
                stats=['holder_mean::%d' % d for d in range(-4, 4 + 1)] +
                ['std_dev', 'geom_std_dev'])

        elif preset == "BondAngle-dejong2016":
            return SiteStatsFingerprint(
                AverageBondAngle(VoronoiNN()),
                stats=['holder_mean::%d' % d for d in range(-4, 4 + 1)] +
                ['std_dev', 'geom_std_dev'])

        else:
            # TODO: Why assume coordination number? Should this just raise an error? - lw
            # One of the various Coordination Number presets:
            # MinimumVIRENN, MinimumDistanceNN, JmolNN, VoronoiNN, etc.
            try:
                return SiteStatsFingerprint(
                    CoordinationNumber.from_preset(preset), **kwargs)
            except:
                pass

        raise ValueError("Unrecognized preset!")
Пример #17
0
import numpy as np
import pymatgen
# from pymatgen import MPRester

from matminer.featurizers.site import CrystalNNFingerprint
from matminer.featurizers.structure import SiteStatsFingerprint
structuraldif = []
with pymatgen.MPRester("iUCzg5aBMJ1w30KT") as mpr:

    # Get structures.
    SnSelow = mpr.get_structure_by_material_id("mp-1984")
    SnSehigh = mpr.get_structure_by_material_id("mp-1411")

    # Calculate structure fingerprints.
    ssf = SiteStatsFingerprint(CrystalNNFingerprint.from_preset('cn'))
    v_SnSelow = np.array(ssf.featurize(SnSelow))
    v_SnSehigh = np.array(ssf.featurize(SnSehigh))
    v_SnSelow = v_SnSelow / np.linalg.norm(v_SnSelow)
    v_SnSehigh = v_SnSehigh / np.linalg.norm(v_SnSehigh)

    # Print out distance between structures.
    fish = '{:.4f}'.format(np.linalg.norm(v_SnSehigh - v_SnSelow))
    structuraldif.append(fish)
print(structuraldif)
Пример #18
0
    def test_crystal_nn_fingerprint(self):
        cnnfp = CrystalNNFingerprint.from_preset('ops',
                                                 distance_cutoffs=None,
                                                 x_diff_weight=None)
        l = cnnfp.feature_labels()
        t = [
            'wt CN_1', 'sgl_bd CN_1', 'wt CN_2', 'L-shaped CN_2',
            'water-like CN_2', 'bent 120 degrees CN_2',
            'bent 150 degrees CN_2', 'linear CN_2', 'wt CN_3',
            'trigonal planar CN_3', 'trigonal non-coplanar CN_3',
            'T-shaped CN_3', 'wt CN_4', 'square co-planar CN_4',
            'tetrahedral CN_4', 'rectangular see-saw-like CN_4',
            'see-saw-like CN_4', 'trigonal pyramidal CN_4', 'wt CN_5',
            'pentagonal planar CN_5', 'square pyramidal CN_5',
            'trigonal bipyramidal CN_5', 'wt CN_6', 'hexagonal planar CN_6',
            'octahedral CN_6', 'pentagonal pyramidal CN_6', 'wt CN_7',
            'hexagonal pyramidal CN_7', 'pentagonal bipyramidal CN_7',
            'wt CN_8', 'body-centered cubic CN_8',
            'hexagonal bipyramidal CN_8', 'wt CN_9', 'q2 CN_9', 'q4 CN_9',
            'q6 CN_9', 'wt CN_10', 'q2 CN_10', 'q4 CN_10', 'q6 CN_10',
            'wt CN_11', 'q2 CN_11', 'q4 CN_11', 'q6 CN_11', 'wt CN_12',
            'cuboctahedral CN_12', 'q2 CN_12', 'q4 CN_12', 'q6 CN_12',
            'wt CN_13', 'wt CN_14', 'wt CN_15', 'wt CN_16', 'wt CN_17',
            'wt CN_18', 'wt CN_19', 'wt CN_20', 'wt CN_21', 'wt CN_22',
            'wt CN_23', 'wt CN_24'
        ]
        for i in range(len(l)):
            self.assertEqual(l[i], t[i])
        ops = cnnfp.featurize(self.sc, 0)
        self.assertEqual(len(ops), 61)
        self.assertAlmostEqual(ops[cnnfp.feature_labels().index('wt CN_6')],
                               1,
                               places=7)
        self.assertAlmostEqual(
            ops[cnnfp.feature_labels().index('octahedral CN_6')], 1, places=7)
        ops = cnnfp.featurize(self.cscl, 0)
        self.assertAlmostEqual(ops[cnnfp.feature_labels().index('wt CN_8')],
                               0.498099,
                               places=3)

        self.assertAlmostEqual(
            ops[cnnfp.feature_labels().index('body-centered cubic CN_8')],
            0.47611,
            places=3)

        op_types = {6: ["wt", "oct_max"], 8: ["wt", "bcc"]}
        cnnfp = CrystalNNFingerprint(
            op_types, distance_cutoffs=None, \
            x_diff_weight=None)
        labels = ['wt CN_6', 'oct_max CN_6', \
                  'wt CN_8', 'bcc CN_8']
        for l1, l2 in zip(cnnfp.feature_labels(), labels):
            self.assertEqual(l1, l2)
        feats = cnnfp.featurize(self.sc, 0)
        self.assertEqual(len(feats), 4)

        chem_info = {"mass": {"Al": 26.9, "Cs+": 132.9,"Cl-": 35.4}, \
            "Pauling scale": {"Al": 1.61, "Cs+": 0.79, "Cl-": 3.16}}
        cnnchemfp = CrystalNNFingerprint(
            op_types, chem_info=chem_info, distance_cutoffs=None, \
            x_diff_weight=None)
        labels = labels + ['mass local diff', \
            'Pauling scale local diff']
        for l1, l2 in zip(cnnchemfp.feature_labels(), labels):
            self.assertEqual(l1, l2)

        feats = cnnchemfp.featurize(self.sc, 0)
        self.assertEqual(len(feats), 6)
        self.assertAlmostEqual(
            feats[cnnchemfp.feature_labels().index('wt CN_6')], 1, places=7)
        self.assertAlmostEqual(
            feats[cnnchemfp.feature_labels().index('oct_max CN_6')],
            1,
            places=7)
        self.assertAlmostEqual(
            feats[cnnchemfp.feature_labels().index('mass local diff')],
            0,
            places=7)
        self.assertAlmostEqual(feats[cnnchemfp.feature_labels().index(
            'Pauling scale local diff')],
                               0,
                               places=7)

        feats = cnnchemfp.featurize(self.cscl, 0)
        self.assertAlmostEqual(
            feats[cnnchemfp.feature_labels().index('bcc CN_8')],
            0.4761107,
            places=3)
        self.assertAlmostEqual(
            feats[cnnchemfp.feature_labels().index('mass local diff')],
            97.5,
            places=3)
        self.assertAlmostEqual(feats[cnnchemfp.feature_labels().index(
            'Pauling scale local diff')],
                               -2.37,
                               places=3)
Пример #19
0
class DeBreuck2020Featurizer(modnet.featurizers.MODFeaturizer):
    """ Featurizer presets used for the paper 'Machine learning
    materials properties for small datasets' by Pierre-Paul De Breuck,
    Geoffroy Hautier & Gian-Marco Rignanese, arXiv:2004.14766 (2020).

    Uses most of the featurizers implemented by matminer at the time of
    writing with their default hyperparameters and presets.

    """
    from matminer.featurizers.composition import (
        AtomicOrbitals,
        AtomicPackingEfficiency,
        BandCenter,
        # CohesiveEnergy, - This descriptor was not used in the paper preset
        # ElectronAffinity, - This descriptor was not used in the paper preset
        ElectronegativityDiff,
        ElementFraction,
        ElementProperty,
        IonProperty,
        Miedema,
        OxidationStates,
        Stoichiometry,
        TMetalFraction,
        ValenceOrbital,
        YangSolidSolution,
    )
    from matminer.featurizers.structure import (
        # BagofBonds, - This descriptor was not used in the paper preset
        BondFractions,
        ChemicalOrdering,
        CoulombMatrix,
        DensityFeatures,
        EwaldEnergy,
        GlobalSymmetryFeatures,
        MaximumPackingEfficiency,
        # PartialRadialDistributionFunction,
        RadialDistributionFunction,
        SineCoulombMatrix,
        StructuralHeterogeneity,
        XRDPowderPattern,
    )

    from matminer.featurizers.site import (
        AGNIFingerprints,
        AverageBondAngle,
        AverageBondLength,
        BondOrientationalParameter,
        ChemEnvSiteFingerprint,
        CoordinationNumber,
        CrystalNNFingerprint,
        GaussianSymmFunc,
        GeneralizedRadialDistributionFunction,
        LocalPropertyDifference,
        OPSiteFingerprint,
        VoronoiFingerprint,
    )

    composition_featurizers = (
        AtomicOrbitals(),
        AtomicPackingEfficiency(),
        BandCenter(),
        ElementFraction(),
        ElementProperty.from_preset("magpie"),
        IonProperty(),
        Miedema(),
        Stoichiometry(),
        TMetalFraction(),
        ValenceOrbital(),
        YangSolidSolution(),
    )

    oxide_composition_featurizers = (
        ElectronegativityDiff(),
        OxidationStates(),
    )

    structure_featurizers = (
        DensityFeatures(),
        GlobalSymmetryFeatures(),
        RadialDistributionFunction(),
        CoulombMatrix(),
        # PartialRadialDistributionFunction(),
        SineCoulombMatrix(),
        EwaldEnergy(),
        BondFractions(),
        StructuralHeterogeneity(),
        MaximumPackingEfficiency(),
        ChemicalOrdering(),
        XRDPowderPattern(),
        # BagofBonds(),
    )
    site_featurizers = (
        AGNIFingerprints(),
        AverageBondAngle(VoronoiNN()),
        AverageBondLength(VoronoiNN()),
        BondOrientationalParameter(),
        ChemEnvSiteFingerprint.from_preset("simple"),
        CoordinationNumber(),
        CrystalNNFingerprint.from_preset("ops"),
        GaussianSymmFunc(),
        GeneralizedRadialDistributionFunction.from_preset("gaussian"),
        LocalPropertyDifference(),
        OPSiteFingerprint(),
        VoronoiFingerprint(),
    )

    def featurize_composition(self, df):
        """ Applies the preset composition featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.

        """
        df = super().featurize_composition(df)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}
        df['AtomicOrbitals|HOMO_character'] = df[
            'AtomicOrbitals|HOMO_character'].map(_orbitals)
        df['AtomicOrbitals|LUMO_character'] = df[
            'AtomicOrbitals|LUMO_character'].map(_orbitals)

        df['AtomicOrbitals|HOMO_element'] = df[
            'AtomicOrbitals|HOMO_element'].apply(
                lambda x: -1 if not isinstance(x, str) else Element(x).Z)
        df['AtomicOrbitals|LUMO_element'] = df[
            'AtomicOrbitals|LUMO_element'].apply(
                lambda x: -1 if not isinstance(x, str) else Element(x).Z)

        df = df.replace([np.inf, -np.inf, np.nan], 0)

        return modnet.featurizers.clean_df(df)

    def featurize_structure(self, df):
        """ Applies the preset structural featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.

        """
        df = super().featurize_structure(df)

        dist = df[
            "RadialDistributionFunction|radial distribution function"].iloc[0][
                'distances'][:50]
        for i, d in enumerate(dist):
            _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format(
                d)
            df[_rdf_key] = df[
                "RadialDistributionFunction|radial distribution function"].apply(
                    lambda x: x['distribution'][i])

        df = df.drop("RadialDistributionFunction|radial distribution function",
                     axis=1)

        _crystal_system = {
            "cubic": 1,
            "tetragonal": 2,
            "orthorombic": 3,
            "hexagonal": 4,
            "trigonal": 5,
            "monoclinic": 6,
            "triclinic": 7
        }

        def _int_map(x):
            if x == np.nan:
                return 0
            elif x:
                return 1
            else:
                return 0

        df["GlobalSymmetryFeatures|crystal_system"] = df[
            "GlobalSymmetryFeatures|crystal_system"].map(_crystal_system)
        df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[
            "GlobalSymmetryFeatures|is_centrosymmetric"].map(_int_map)

        return modnet.featurizers.clean_df(df)

    def featurize_site(self, df):
        """ Applies the preset site featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.

        """

        # rename some features for backwards compatibility with pretrained models
        aliases = {
            "GeneralizedRadialDistributionFunction": "GeneralizedRDF",
            "AGNIFingerprints": "AGNIFingerPrint",
            "BondOrientationalParameter": "BondOrientationParameter",
            "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc",
        }
        df = super().featurize_site(df, aliases=aliases)
        df = df.loc[:, (df != 0).any(axis=0)]

        return modnet.featurizers.clean_df(df)
Пример #20
0
def featurize_site(df: pd.DataFrame, site_stats=("mean", "std_dev")) -> pd.DataFrame:
    """ Decorate input `pandas.DataFrame` of structures with site
    features from matminer.

    Currently creates the set of all matminer structure features with
    the `matminer.featurizers.structure.SiteStatsFingerprint`.

    Args:
        df (pandas.DataFrame): the input dataframe with `"structure"`
            column containing `pymatgen.Structure` objects.
        site_stats (Tuple[str]): the matminer site stats to use in the
            `SiteStatsFingerprint` for all features.

    Returns:
        pandas.DataFrame: the decorated DataFrame.

    """

    logging.info("Applying site featurizers...")

    df = df.copy()
    df.columns = ["Input data|" + x for x in df.columns]

    site_fingerprints = (
        AGNIFingerprints(),
        GeneralizedRadialDistributionFunction.from_preset("gaussian"),
        OPSiteFingerprint(),
        CrystalNNFingerprint.from_preset("ops"),
        VoronoiFingerprint(),
        GaussianSymmFunc(),
        ChemEnvSiteFingerprint.from_preset("simple"),
        CoordinationNumber(),
        LocalPropertyDifference(),
        BondOrientationalParameter(),
        AverageBondLength(VoronoiNN()),
        AverageBondAngle(VoronoiNN())
    )

    for fingerprint in site_fingerprints:
        site_stats_fingerprint = SiteStatsFingerprint(
            fingerprint,
            stats=site_stats
        )

        df = site_stats_fingerprint.featurize_dataframe(
            df,
            "Input data|structure",
            multiindex=False,
            ignore_errors=True
        )

        fingerprint_name = fingerprint.__class__.__name__

        # rename some features for backwards compatibility with pretrained models
        if fingerprint_name == "GeneralizedRadialDistributionFunction":
            fingerprint_name = "GeneralizedRDF"
        elif fingerprint_name == "AGNIFingerprints":
            fingerprint_name = "AGNIFingerPrint"
        elif fingerprint_name == "BondOrientationalParameter":
            fingerprint_name = "BondOrientationParameter"
        elif fingerprint_name == "GaussianSymmFunc":
            fingerprint_name = "ChemEnvSiteFingerprint|GaussianSymmFunc"

        if "|" not in fingerprint_name:
            fingerprint_name += "|"

        df.columns = [f"{fingerprint_name}{x}" if "|" not in x else x for x in df.columns]

    df = df.loc[:, (df != 0).any(axis=0)]

    return clean_df(df)