Пример #1
0
    def test_miedema_all(self):
        miedema_df = pd.DataFrame({
            "composition": [
                Composition("TiZr"),
                Composition("Mg10Cu50Ca40"),
                Composition("Fe2O3")
            ]
        })
        df_miedema = Miedema(struct_types='all').featurize_dataframe(
            miedema_df, col_id="composition")
        self.assertAlmostEqual(df_miedema['formation_enthalpy_inter'][0],
                               -0.0034450221522328503)
        self.assertAlmostEqual(df_miedema['formation_enthalpy_amor'][0],
                               0.070765883630040161)
        self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_min'][0],
                               0.036635997549833224)

        self.assertAlmostEqual(df_miedema['formation_enthalpy_inter'][1],
                               -0.23512597842733007)
        self.assertAlmostEqual(df_miedema['formation_enthalpy_amor'][1],
                               -0.16454184827089643)
        self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_min'][1],
                               -0.052808433113994087)

        self.assertAlmostEqual(
            math.isnan(df_miedema['formation_enthalpy_inter'][2]), True)
        self.assertAlmostEqual(
            math.isnan(df_miedema['formation_enthalpy_amor'][2]), True)
        self.assertAlmostEqual(
            math.isnan(df_miedema['formation_enthalpy_ss_min'][2]), True)
Пример #2
0
    def test_miedema_all(self):
        df = pd.DataFrame({
            "composition": [
                Composition("TiZr"),
                Composition("Mg10Cu50Ca40"),
                Composition("Fe2O3")
            ]
        })
        miedema = Miedema(struct_types='all')
        self.assertFalse(miedema.precheck(df["composition"].iloc[-1]))
        self.assertAlmostEqual(miedema.precheck_dataframe(df, "composition"),
                               2 / 3)
        mfps = miedema.featurize_dataframe(df, col_id="composition")
        self.assertAlmostEqual(mfps['Miedema_deltaH_inter'][0],
                               -0.003445022152)
        self.assertAlmostEqual(mfps['Miedema_deltaH_amor'][0], 0.0707658836300)
        self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][0], 0.03663599755)

        self.assertAlmostEqual(mfps['Miedema_deltaH_inter'][1],
                               -0.235125978427)
        self.assertAlmostEqual(mfps['Miedema_deltaH_amor'][1], -0.164541848271)
        self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][1],
                               -0.05280843311)

        self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_inter'][2]),
                               True)
        self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_amor'][2]),
                               True)
        self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_min'][2]),
                               True)
Пример #3
0
def featurize_composition(df: pd.DataFrame) -> pd.DataFrame:
    """ Decorate input `pandas.DataFrame` of structures with composition
    features from matminer.

    Currently applies the set of all matminer composition features.

    Args:
        df (pandas.DataFrame): the input dataframe with `"structure"`
            column containing `pymatgen.Structure` objects.

    Returns:
        pandas.DataFrame: the decorated DataFrame.

    """
    logging.info("Applying composition featurizers...")
    df = df.copy()
    df['composition'] = df['structure'].apply(lambda s: s.composition)
    featurizer = MultipleFeaturizer([ElementProperty.from_preset("magpie"),
                                     AtomicOrbitals(),
                                     BandCenter(),
                                     # ElectronAffinity(), - This descriptor was not used in the paper preset
                                     Stoichiometry(),
                                     ValenceOrbital(),
                                     IonProperty(),
                                     ElementFraction(),
                                     TMetalFraction(),
                                     # CohesiveEnergy(), - This descriptor was not used in the paper preset
                                     Miedema(),
                                     YangSolidSolution(),
                                     AtomicPackingEfficiency(),
                                     ])

    df = featurizer.featurize_dataframe(df, "composition", multiindex=True, ignore_errors=True)
    df.columns = df.columns.map('|'.join).str.strip('|')

    ox_featurizer = MultipleFeaturizer([OxidationStates(),
                                        ElectronegativityDiff()
                                        ])

    df = CompositionToOxidComposition().featurize_dataframe(df, "Input Data|composition")

    df = ox_featurizer.featurize_dataframe(df, "composition_oxid", multiindex=True, ignore_errors=True)
    df = df.rename(columns={'Input Data': ''})
    df.columns = df.columns.map('|'.join).str.strip('|')

    _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}

    df['AtomicOrbitals|HOMO_character'] = df['AtomicOrbitals|HOMO_character'].map(_orbitals)
    df['AtomicOrbitals|LUMO_character'] = df['AtomicOrbitals|LUMO_character'].map(_orbitals)

    df['AtomicOrbitals|HOMO_element'] = df['AtomicOrbitals|HOMO_element'].apply(
        lambda x: -1 if not isinstance(x, str) else Element(x).Z
    )
    df['AtomicOrbitals|LUMO_element'] = df['AtomicOrbitals|LUMO_element'].apply(
        lambda x: -1 if not isinstance(x, str) else Element(x).Z
    )

    df = df.replace([np.inf, -np.inf, np.nan], 0)

    return clean_df(df)
 def test_miedema_all(self):
     miedema_df = pd.DataFrame({"composition": [Composition("TiZr")]})
     df_miedema_all = Miedema(struct='all').featurize_dataframe(
         miedema_df, col_id="composition")
     self.assertAlmostEqual(df_miedema_all['formation_enthalpy_inter'][0],
                            -0.0034450221522328503)
     self.assertAlmostEqual(df_miedema_all['formation_enthalpy_amor'][0],
                            0.070765883630040161)
     self.assertAlmostEqual(df_miedema_all['formation_enthalpy_ss_min'][0],
                            0.036635997549833224)
 def test_miedema_ss(self):
     miedema_df = pd.DataFrame({"composition": [Composition("TiZr")]})
     df_miedema_all = Miedema(
         struct='ss|ss,none,bcc,fcc,hcp').featurize_dataframe(
             miedema_df, col_id="composition")
     self.assertAlmostEqual(df_miedema_all['formation_enthalpy_ss_min'][0],
                            0.036635997549833224)
     self.assertAlmostEqual(df_miedema_all['formation_enthalpy_ss_none'][0],
                            0.036635997549833224)
     self.assertAlmostEqual(df_miedema_all['formation_enthalpy_ss_bcc'][0],
                            0.083275226530828264)
     self.assertAlmostEqual(df_miedema_all['formation_enthalpy_ss_fcc'][0],
                            0.047000270656721008)
     self.assertAlmostEqual(df_miedema_all['formation_enthalpy_ss_hcp'][0],
                            0.036635997549833224)
Пример #6
0
    def test_miedema_all(self):
        df = pd.DataFrame({
            "composition": [
                Composition("TiZr"),
                Composition("Mg10Cu50Ca40"),
                Composition("Fe2O3")
            ]
        })
        miedema = Miedema(struct_types='all')
        self.assertTrue(miedema.precheck(df["composition"].iloc[0]))
        self.assertFalse(miedema.precheck(df["composition"].iloc[-1]))
        self.assertAlmostEqual(miedema.precheck_dataframe(df, "composition"),
                               2 / 3)

        # test precheck for oxidation-state decorated compositions
        df = CompositionToOxidComposition(return_original_on_error=True).\
            featurize_dataframe(df, 'composition')
        self.assertTrue(miedema.precheck(df["composition_oxid"].iloc[0]))
        self.assertFalse(miedema.precheck(df["composition_oxid"].iloc[-1]))
        self.assertAlmostEqual(
            miedema.precheck_dataframe(df, "composition_oxid"), 2 / 3)

        mfps = miedema.featurize_dataframe(df, col_id="composition")
        self.assertAlmostEqual(mfps['Miedema_deltaH_inter'][0],
                               -0.003445022152)
        self.assertAlmostEqual(mfps['Miedema_deltaH_amor'][0], 0.0707658836300)
        self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][0], 0.03663599755)

        self.assertAlmostEqual(mfps['Miedema_deltaH_inter'][1],
                               -0.235125978427)
        self.assertAlmostEqual(mfps['Miedema_deltaH_amor'][1], -0.164541848271)
        self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][1],
                               -0.05280843311)

        self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_inter'][2]),
                               True)
        self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_amor'][2]),
                               True)
        self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_min'][2]),
                               True)

        # make sure featurization works equally for compositions with or without
        # oxidation states
        mfps = miedema.featurize_dataframe(df, col_id="composition_oxid")
        self.assertAlmostEqual(mfps['Miedema_deltaH_inter'][0],
                               -0.003445022152)
        self.assertAlmostEqual(mfps['Miedema_deltaH_amor'][0], 0.0707658836300)
        self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][0], 0.03663599755)
Пример #7
0
    def test_miedema_ss(self):
        miedema_df = pd.DataFrame({
            "composition": [
                Composition("TiZr"),
                Composition("Mg10Cu50Ca40"),
                Composition("Fe2O3")
            ]
        })
        df_miedema = Miedema(struct_types='ss',
                             ss_types=['min', 'fcc', 'bcc', 'hcp',
                                       'no_latt']).featurize_dataframe(
                                           miedema_df, col_id="composition")
        self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_min'][0],
                               0.036635997549833224)
        self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_fcc'][0],
                               0.047000270656721008)
        self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_bcc'][0],
                               0.083275226530828264)
        self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_hcp'][0],
                               0.036635997549833224)
        self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_no_latt'][0],
                               0.036635997549833224)

        self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_min'][1],
                               -0.052808433113994087)
        self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_fcc'][1],
                               0.030105751741108196)
        self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_bcc'][1],
                               -0.052808433113994087)
        self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_hcp'][1],
                               0.030105751741108196)
        self.assertAlmostEqual(df_miedema['formation_enthalpy_ss_no_latt'][1],
                               -0.0035781358562771083)

        self.assertAlmostEqual(
            math.isnan(df_miedema['formation_enthalpy_ss_min'][2]), True)
        self.assertAlmostEqual(
            math.isnan(df_miedema['formation_enthalpy_ss_fcc'][2]), True)
        self.assertAlmostEqual(
            math.isnan(df_miedema['formation_enthalpy_ss_bcc'][2]), True)
        self.assertAlmostEqual(
            math.isnan(df_miedema['formation_enthalpy_ss_hcp'][2]), True)
        self.assertAlmostEqual(
            math.isnan(df_miedema['formation_enthalpy_ss_no_latt'][2]), True)
Пример #8
0
    def _generate_pairwise_features(self):
        """Generates features for each bonding pair.

        Column labels contain a prefix "pairwise_feature {}", where the
        string in parethesis is either "bcc_tet1", "bcc_tet2", or "tet1_tet2".
        """

        # initializes requested matminer featurizers
        feat_element_property = PairwiseElementProperty(
            data_source='deml',
            features=[
                'atom_radius', 'electronegativity', 'first_ioniz', 'col_num',
                'row_num', 'molar_vol', 'heat_fusion', 'melting_point',
                'GGAU_Etot', 'mus_fere', 'FERE correction'
            ],
            stats=['difference', 'mean'])
        feat_miedema = Miedema(struct_types=['inter', 'amor'])

        # generates freatures for each bonding pair
        for bonding_pair in ['bcc_tet1', 'bcc_tet2', 'tet1_tet2']:

            # gets the string composition of the bonding pairs
            composition_index = 'pairwise_composition {}'.format(bonding_pair)
            composition = self.memory[[composition_index]]

            # adds ElementProperty features
            features = feat_element_property.featurize_dataframe(
                df=composition, col_id=composition_index, inplace=False).drop(
                    labels=composition_index, axis=1).add_prefix(
                        prefix='pairwise_feature {} '.format(bonding_pair))
            self.memory = concat([self.memory, features], axis=1)

            # gets the pymatgen.Composition of the bonding pairs
            composition[composition_index] = [
                Composition(i) for i in composition[composition_index]
            ]

            # adds Miedema features
            features = feat_miedema.featurize_dataframe(
                df=composition, col_id=composition_index, inplace=False).drop(
                    labels=composition_index, axis=1).add_prefix(
                        prefix='pairwise_feature {} '.format(bonding_pair))
            self.memory = concat([self.memory, features], axis=1)
Пример #9
0
    def test_miedema_ss(self):
        df = pd.DataFrame({
            "composition": [
                Composition("TiZr"),
                Composition("Mg10Cu50Ca40"),
                Composition("Fe2O3")
            ]
        })
        miedema = Miedema(struct_types='ss',
                          ss_types=['min', 'fcc', 'bcc', 'hcp', 'no_latt'])
        mfps = miedema.featurize_dataframe(df, col_id="composition")
        self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][0], 0.03663599755)
        self.assertAlmostEqual(mfps['Miedema_deltaH_ss_fcc'][0], 0.04700027066)
        self.assertAlmostEqual(mfps['Miedema_deltaH_ss_bcc'][0], 0.08327522653)
        self.assertAlmostEqual(mfps['Miedema_deltaH_ss_hcp'][0], 0.03663599755)
        self.assertAlmostEqual(mfps['Miedema_deltaH_ss_no_latt'][0],
                               0.036635998)

        self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][1],
                               -0.05280843311)
        self.assertAlmostEqual(mfps['Miedema_deltaH_ss_fcc'][1], 0.03010575174)
        self.assertAlmostEqual(mfps['Miedema_deltaH_ss_bcc'][1],
                               -0.05280843311)
        self.assertAlmostEqual(mfps['Miedema_deltaH_ss_hcp'][1], 0.03010575174)
        self.assertAlmostEqual(mfps['Miedema_deltaH_ss_no_latt'][1],
                               -0.0035781359)

        self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_min'][2]),
                               True)
        self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_fcc'][2]),
                               True)
        self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_bcc'][2]),
                               True)
        self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_hcp'][2]),
                               True)
        self.assertAlmostEqual(
            math.isnan(mfps['Miedema_deltaH_ss_no_latt'][2]), True)
Пример #10
0
class DeBreuck2020Featurizer(modnet.featurizers.MODFeaturizer):
    """ Featurizer presets used for the paper 'Machine learning
    materials properties for small datasets' by Pierre-Paul De Breuck,
    Geoffroy Hautier & Gian-Marco Rignanese, arXiv:2004.14766 (2020).

    Uses most of the featurizers implemented by matminer at the time of
    writing with their default hyperparameters and presets.

    """
    from matminer.featurizers.composition import (
        AtomicOrbitals,
        AtomicPackingEfficiency,
        BandCenter,
        # CohesiveEnergy, - This descriptor was not used in the paper preset
        # ElectronAffinity, - This descriptor was not used in the paper preset
        ElectronegativityDiff,
        ElementFraction,
        ElementProperty,
        IonProperty,
        Miedema,
        OxidationStates,
        Stoichiometry,
        TMetalFraction,
        ValenceOrbital,
        YangSolidSolution,
    )
    from matminer.featurizers.structure import (
        # BagofBonds, - This descriptor was not used in the paper preset
        BondFractions,
        ChemicalOrdering,
        CoulombMatrix,
        DensityFeatures,
        EwaldEnergy,
        GlobalSymmetryFeatures,
        MaximumPackingEfficiency,
        # PartialRadialDistributionFunction,
        RadialDistributionFunction,
        SineCoulombMatrix,
        StructuralHeterogeneity,
        XRDPowderPattern,
    )

    from matminer.featurizers.site import (
        AGNIFingerprints,
        AverageBondAngle,
        AverageBondLength,
        BondOrientationalParameter,
        ChemEnvSiteFingerprint,
        CoordinationNumber,
        CrystalNNFingerprint,
        GaussianSymmFunc,
        GeneralizedRadialDistributionFunction,
        LocalPropertyDifference,
        OPSiteFingerprint,
        VoronoiFingerprint,
    )

    composition_featurizers = (
        AtomicOrbitals(),
        AtomicPackingEfficiency(),
        BandCenter(),
        ElementFraction(),
        ElementProperty.from_preset("magpie"),
        IonProperty(),
        Miedema(),
        Stoichiometry(),
        TMetalFraction(),
        ValenceOrbital(),
        YangSolidSolution(),
    )

    oxide_composition_featurizers = (
        ElectronegativityDiff(),
        OxidationStates(),
    )

    structure_featurizers = (
        DensityFeatures(),
        GlobalSymmetryFeatures(),
        RadialDistributionFunction(),
        CoulombMatrix(),
        # PartialRadialDistributionFunction(),
        SineCoulombMatrix(),
        EwaldEnergy(),
        BondFractions(),
        StructuralHeterogeneity(),
        MaximumPackingEfficiency(),
        ChemicalOrdering(),
        XRDPowderPattern(),
        # BagofBonds(),
    )
    site_featurizers = (
        AGNIFingerprints(),
        AverageBondAngle(VoronoiNN()),
        AverageBondLength(VoronoiNN()),
        BondOrientationalParameter(),
        ChemEnvSiteFingerprint.from_preset("simple"),
        CoordinationNumber(),
        CrystalNNFingerprint.from_preset("ops"),
        GaussianSymmFunc(),
        GeneralizedRadialDistributionFunction.from_preset("gaussian"),
        LocalPropertyDifference(),
        OPSiteFingerprint(),
        VoronoiFingerprint(),
    )

    def featurize_composition(self, df):
        """ Applies the preset composition featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.

        """
        df = super().featurize_composition(df)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}
        df['AtomicOrbitals|HOMO_character'] = df[
            'AtomicOrbitals|HOMO_character'].map(_orbitals)
        df['AtomicOrbitals|LUMO_character'] = df[
            'AtomicOrbitals|LUMO_character'].map(_orbitals)

        df['AtomicOrbitals|HOMO_element'] = df[
            'AtomicOrbitals|HOMO_element'].apply(
                lambda x: -1 if not isinstance(x, str) else Element(x).Z)
        df['AtomicOrbitals|LUMO_element'] = df[
            'AtomicOrbitals|LUMO_element'].apply(
                lambda x: -1 if not isinstance(x, str) else Element(x).Z)

        df = df.replace([np.inf, -np.inf, np.nan], 0)

        return modnet.featurizers.clean_df(df)

    def featurize_structure(self, df):
        """ Applies the preset structural featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.

        """
        df = super().featurize_structure(df)

        dist = df[
            "RadialDistributionFunction|radial distribution function"].iloc[0][
                'distances'][:50]
        for i, d in enumerate(dist):
            _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format(
                d)
            df[_rdf_key] = df[
                "RadialDistributionFunction|radial distribution function"].apply(
                    lambda x: x['distribution'][i])

        df = df.drop("RadialDistributionFunction|radial distribution function",
                     axis=1)

        _crystal_system = {
            "cubic": 1,
            "tetragonal": 2,
            "orthorombic": 3,
            "hexagonal": 4,
            "trigonal": 5,
            "monoclinic": 6,
            "triclinic": 7
        }

        def _int_map(x):
            if x == np.nan:
                return 0
            elif x:
                return 1
            else:
                return 0

        df["GlobalSymmetryFeatures|crystal_system"] = df[
            "GlobalSymmetryFeatures|crystal_system"].map(_crystal_system)
        df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[
            "GlobalSymmetryFeatures|is_centrosymmetric"].map(_int_map)

        return modnet.featurizers.clean_df(df)

    def featurize_site(self, df):
        """ Applies the preset site featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.

        """

        # rename some features for backwards compatibility with pretrained models
        aliases = {
            "GeneralizedRadialDistributionFunction": "GeneralizedRDF",
            "AGNIFingerprints": "AGNIFingerPrint",
            "BondOrientationalParameter": "BondOrientationParameter",
            "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc",
        }
        df = super().featurize_site(df, aliases=aliases)
        df = df.loc[:, (df != 0).any(axis=0)]

        return modnet.featurizers.clean_df(df)
Пример #11
0
class FUTURE_PROSPECTS_2021(featurizer.extendedMODFeaturizer):

    from matminer.featurizers.composition import (
        AtomicOrbitals,
        AtomicPackingEfficiency,
        BandCenter,
        CohesiveEnergy,
        ElectronAffinity,
        ElectronegativityDiff,
        ElementFraction,
        ElementProperty,
        IonProperty,
        Miedema,
        OxidationStates,
        Stoichiometry,
        TMetalFraction,
        ValenceOrbital,
        YangSolidSolution,
    )
    from matminer.featurizers.structure import (
        BagofBonds,
        BondFractions,
        ChemicalOrdering,
        CoulombMatrix,
        DensityFeatures,
        EwaldEnergy,
        GlobalSymmetryFeatures,
        MaximumPackingEfficiency,
        PartialRadialDistributionFunction,
        RadialDistributionFunction,
        SineCoulombMatrix,
        StructuralHeterogeneity,
        XRDPowderPattern,
    )

    from matminer.featurizers.site import (
        AGNIFingerprints,
        AverageBondAngle,
        AverageBondLength,
        BondOrientationalParameter,
        ChemEnvSiteFingerprint,
        CoordinationNumber,
        CrystalNNFingerprint,
        GaussianSymmFunc,
        GeneralizedRadialDistributionFunction,
        LocalPropertyDifference,
        OPSiteFingerprint,
        VoronoiFingerprint,
    )
    from matminer.featurizers.dos import (
        DOSFeaturizer,
        SiteDOS,
        Hybridization,
        DosAsymmetry,
    )
    from matminer.featurizers.bandstructure import (
        BandFeaturizer,
        BranchPointEnergy
    )

    composition_featurizers = (
        AtomicOrbitals(),
        AtomicPackingEfficiency(),
        BandCenter(),
        ElementFraction(),
        ElementProperty.from_preset("magpie"),
        IonProperty(),
        Miedema(),
        Stoichiometry(),
        TMetalFraction(),
        ValenceOrbital(),
        YangSolidSolution(),
    )

    oxid_composition_featurizers = (
        ElectronegativityDiff(),
        OxidationStates(),
    )

    structure_featurizers = (
        DensityFeatures(),
        GlobalSymmetryFeatures(),
        RadialDistributionFunction(),
        CoulombMatrix(),
        #PartialRadialDistributionFunction(), #Introduces a large amount of features
        SineCoulombMatrix(),
        EwaldEnergy(),
        BondFractions(),
        StructuralHeterogeneity(),
        MaximumPackingEfficiency(),
        ChemicalOrdering(),
        XRDPowderPattern(),
    )
    site_featurizers = (
        AGNIFingerprints(),
        AverageBondAngle(VoronoiNN()),
        AverageBondLength(VoronoiNN()),
        BondOrientationalParameter(),
        ChemEnvSiteFingerprint.from_preset("simple"),
        CoordinationNumber(),
        CrystalNNFingerprint.from_preset("ops"),
        GaussianSymmFunc(),
        GeneralizedRadialDistributionFunction.from_preset("gaussian"),
        LocalPropertyDifference(),
        OPSiteFingerprint(),
        VoronoiFingerprint(),
    )

    dos_featurizers = (
        DOSFeaturizer(),
        SiteDOS(),
        Hybridization()
    )

    band_featurizers = (
        BandFeaturizer(),
        BranchPointEnergy()
    )
    def __init__(self, n_jobs=None):
            self._n_jobs = n_jobs

    def featurize_composition(self, df):
        """Applies the preset composition featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """
        df = super().featurize_composition(df)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}
        df["AtomicOrbitals|HOMO_character"] = df["AtomicOrbitals|HOMO_character"].map(
            _orbitals
        )
        df["AtomicOrbitals|LUMO_character"] = df["AtomicOrbitals|LUMO_character"].map(
            _orbitals
        )

        df["AtomicOrbitals|HOMO_element"] = df["AtomicOrbitals|HOMO_element"].apply(
            lambda x: -1 if not isinstance(x, str) else Element(x).Z
        )
        df["AtomicOrbitals|LUMO_element"] = df["AtomicOrbitals|LUMO_element"].apply(
            lambda x: -1 if not isinstance(x, str) else Element(x).Z
        )

        return clean_df(df)

    def featurize_structure(self, df):
        """Applies the preset structural featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """
        df = super().featurize_structure(df)

        dist = df["RadialDistributionFunction|radial distribution function"].iloc[0][
            "distances"
        ][:50]
        for i, d in enumerate(dist):
            _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format(
                d
            )
            df[_rdf_key] = df[
                "RadialDistributionFunction|radial distribution function"
            ].apply(lambda x: x["distribution"][i])

        df = df.drop("RadialDistributionFunction|radial distribution function", axis=1)

        _crystal_system = {
            "cubic": 1,
            "tetragonal": 2,
            "orthorombic": 3,
            "hexagonal": 4,
            "trigonal": 5,
            "monoclinic": 6,
            "triclinic": 7,
        }

        def _int_map(x):
            if x == np.nan:
                return 0
            elif x:
                return 1
            else:
                return 0

        df["GlobalSymmetryFeatures|crystal_system"] = df[
            "GlobalSymmetryFeatures|crystal_system"
        ].map(_crystal_system)
        df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[
            "GlobalSymmetryFeatures|is_centrosymmetric"
        ].map(_int_map)

        return clean_df(df)

    def featurize_dos(self, df):
        """Applies the presetdos featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        df = super().featurize_dos(df)


        hotencodeColumns = ["DOSFeaturizer|vbm_specie_1","DOSFeaturizer|cbm_specie_1"]

        one_hot = pd.get_dummies(df[hotencodeColumns])
        df = df.drop(hotencodeColumns, axis = 1).join(one_hot)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}

        df["DOSFeaturizer|vbm_character_1"] = df[
           "DOSFeaturizer|vbm_character_1"
           ].map(_orbitals)
        df["DOSFeaturizer|cbm_character_1"] = df[
           "DOSFeaturizer|cbm_character_1"
           ].map(_orbitals)

        # Splitting one feature into several floating features
        # e.g. number;number;number into three columns
        splitColumns = ["DOSFeaturizer|cbm_location_1", "DOSFeaturizer|vbm_location_1"]

        for column in splitColumns:
            try:
                newColumns = df[column].str.split(";", n = 2, expand = True)
                for i in range(0,3):
                    df[column + "_" + str(i)] = np.array(newColumns[i]).astype(np.float)
            except:
                continue
        df = df.drop(splitColumns, axis=1)
        df = df.drop(["dos"], axis=1)
        return clean_df(df)

    def featurize_bandstructure(self, df):
        """Applies the preset band structure featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        df = super().featurize_bandstructure(df)

        def _int_map(x):
            if str(x) == "False":
                return 0
            elif str(x) == "True":
                return 1

        df["BandFeaturizer|is_gap_direct"] = df[
            "BandFeaturizer|is_gap_direct"
        ].map(_int_map)


        df = df.drop(["bandstructure"], axis=1)

        return clean_df(df)


    def featurize_site(self, df):
        """Applies the preset site featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        aliases = {
            "GeneralizedRadialDistributionFunction": "GeneralizedRDF",
            "AGNIFingerprints": "AGNIFingerPrint",
            "BondOrientationalParameter": "BondOrientationParameter",
            "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc",
        }
        df = super().featurize_site(df, aliases=aliases)
        df = df.loc[:, (df != 0).any(axis=0)]

        return clean_df(df)
def AddFeatures(df):  # Add features by Matminer
    from matminer.featurizers.conversions import StrToComposition
    df = StrToComposition().featurize_dataframe(df, "formula")

    from matminer.featurizers.composition import ElementProperty

    ep_feat = ElementProperty.from_preset(preset_name="magpie")
    df = ep_feat.featurize_dataframe(
        df, col_id="composition"
    )  # input the "composition" column to the featurizer

    from matminer.featurizers.conversions import CompositionToOxidComposition
    from matminer.featurizers.composition import OxidationStates

    df = CompositionToOxidComposition().featurize_dataframe(df, "composition")

    os_feat = OxidationStates()
    df = os_feat.featurize_dataframe(df, "composition_oxid")

    from matminer.featurizers.composition import ElectronAffinity

    ea_feat = ElectronAffinity()
    df = ea_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import BandCenter

    bc_feat = BandCenter()
    df = bc_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import CohesiveEnergy

    ce_feat = CohesiveEnergy()
    df = ce_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import Miedema

    m_feat = Miedema()
    df = m_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True)

    from matminer.featurizers.composition import TMetalFraction

    tmf_feat = TMetalFraction()
    df = tmf_feat.featurize_dataframe(df,
                                      "composition_oxid",
                                      ignore_errors=True)

    from matminer.featurizers.composition import ValenceOrbital

    vo_feat = ValenceOrbital()
    df = vo_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import YangSolidSolution

    yss_feat = YangSolidSolution()
    df = yss_feat.featurize_dataframe(df,
                                      "composition_oxid",
                                      ignore_errors=True)

    from matminer.featurizers.structure import GlobalSymmetryFeatures

    # This is the border between compositional features and structural features. Comment out the following featurizers to use only compostional features.

    gsf_feat = GlobalSymmetryFeatures()
    df = gsf_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import StructuralComplexity
    sc_feat = StructuralComplexity()
    df = sc_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import ChemicalOrdering
    co_feat = ChemicalOrdering()
    df = co_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import MaximumPackingEfficiency
    mpe_feat = MaximumPackingEfficiency()
    df = mpe_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import MinimumRelativeDistances
    mrd_feat = MinimumRelativeDistances()
    df = mrd_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import StructuralHeterogeneity
    sh_feat = StructuralHeterogeneity()
    df = sh_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import SiteStatsFingerprint

    from matminer.featurizers.site import AverageBondLength
    from pymatgen.analysis.local_env import CrystalNN
    bl_feat = SiteStatsFingerprint(
        AverageBondLength(CrystalNN(search_cutoff=20)))
    df = bl_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.site import AverageBondAngle
    ba_feat = SiteStatsFingerprint(
        AverageBondAngle(CrystalNN(search_cutoff=20)))
    df = ba_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.site import BondOrientationalParameter
    bop_feat = SiteStatsFingerprint(BondOrientationalParameter())
    df = bop_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.site import CoordinationNumber
    cn_feat = SiteStatsFingerprint(CoordinationNumber())
    df = cn_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import DensityFeatures
    df_feat = DensityFeatures()
    df = df_feat.featurize_dataframe(df, "structure", ignore_errors=True)
    return (df)