Exemplo n.º 1
0
 def __init__(self, pbar=False):
     self.regressor = RandomForestRegressor(n_estimators=500, n_jobs=-1, verbose=3)
     self.stc = StrToComposition()
     ep = ElementProperty.from_preset("magpie")
     ef = ElementFraction()
     self.featurizer = MultipleFeaturizer([ep, ef])
     self.pbar = pbar
Exemplo n.º 2
0
    def split(self, X, y=None, groups=None):

        # Generate the composition vectors
        frac_computer = ElementFraction()
        elem_fracs = frac_computer.featurize_many(list(map(Composition, X)),
                                                  pbar=False)

        # Generate the nearest-neighbor lookup tool
        neigh = NearestNeighbors(**self.nn_kwargs)
        neigh.fit(elem_fracs)

        # Generate a list of all entries
        all_inds = np.arange(0, len(X), 1)

        # Loop through each entry in X
        for i, x in enumerate(elem_fracs):

            # Get all the entries within the threshold distance of the test point
            too_close, = neigh.radius_neighbors([x],
                                                self.dist_threshold,
                                                return_distance=False)

            # Get the training set as "not these points"
            train_inds = np.setdiff1d(all_inds, too_close)

            yield train_inds, [i]
Exemplo n.º 3
0
    def featurize(self, comp):
        """
        Get elemental property attributes

        Args:
            comp: Pymatgen composition object

        Returns:
            all_attributes: Specified property statistics of features
        """

        # First 103 features are element fractions, we can get these from the ElementFraction featurizer
        element_fraction_features = ElementFraction().featurize(comp)

        # Next 9 features are statistics on elemental properties
        elements, fractions = zip(*comp.element_composition.items())
        element_property_features = [0] * len(self._element_property_feature_labels)

        for i,feat in enumerate(self._element_property_feature_labels):
            stat = feat.split(" ")[0]
            attr = " ".join(feat.split(" ")[1:])

            elem_data = [self.data_source.get_elemental_property(e, attr) for e in elements]
            element_property_features[i] = self.pstats.calc_stat(elem_data, stat, fractions)

        # Final 8 features are statistics on valence orbitals, available from the ValenceOrbital featurizer
        valence_orbital_features = ValenceOrbital(orbitals=("s", "p", "d", "f"), props=("avg", "frac")).featurize(comp)

        return element_fraction_features+element_property_features+valence_orbital_features
Exemplo n.º 4
0
def featurize_composition(df: pd.DataFrame) -> pd.DataFrame:
    """ Decorate input `pandas.DataFrame` of structures with composition
    features from matminer.

    Currently applies the set of all matminer composition features.

    Args:
        df (pandas.DataFrame): the input dataframe with `"structure"`
            column containing `pymatgen.Structure` objects.

    Returns:
        pandas.DataFrame: the decorated DataFrame.

    """
    logging.info("Applying composition featurizers...")
    df = df.copy()
    df['composition'] = df['structure'].apply(lambda s: s.composition)
    featurizer = MultipleFeaturizer([ElementProperty.from_preset("magpie"),
                                     AtomicOrbitals(),
                                     BandCenter(),
                                     # ElectronAffinity(), - This descriptor was not used in the paper preset
                                     Stoichiometry(),
                                     ValenceOrbital(),
                                     IonProperty(),
                                     ElementFraction(),
                                     TMetalFraction(),
                                     # CohesiveEnergy(), - This descriptor was not used in the paper preset
                                     Miedema(),
                                     YangSolidSolution(),
                                     AtomicPackingEfficiency(),
                                     ])

    df = featurizer.featurize_dataframe(df, "composition", multiindex=True, ignore_errors=True)
    df.columns = df.columns.map('|'.join).str.strip('|')

    ox_featurizer = MultipleFeaturizer([OxidationStates(),
                                        ElectronegativityDiff()
                                        ])

    df = CompositionToOxidComposition().featurize_dataframe(df, "Input Data|composition")

    df = ox_featurizer.featurize_dataframe(df, "composition_oxid", multiindex=True, ignore_errors=True)
    df = df.rename(columns={'Input Data': ''})
    df.columns = df.columns.map('|'.join).str.strip('|')

    _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}

    df['AtomicOrbitals|HOMO_character'] = df['AtomicOrbitals|HOMO_character'].map(_orbitals)
    df['AtomicOrbitals|LUMO_character'] = df['AtomicOrbitals|LUMO_character'].map(_orbitals)

    df['AtomicOrbitals|HOMO_element'] = df['AtomicOrbitals|HOMO_element'].apply(
        lambda x: -1 if not isinstance(x, str) else Element(x).Z
    )
    df['AtomicOrbitals|LUMO_element'] = df['AtomicOrbitals|LUMO_element'].apply(
        lambda x: -1 if not isinstance(x, str) else Element(x).Z
    )

    df = df.replace([np.inf, -np.inf, np.nan], 0)

    return clean_df(df)
Exemplo n.º 5
0
    def __init__(self, threshold=0.01, n_nearest=(1, 3, 5), max_types=6):
        """
        Initialize the featurizer

        Args:
            threshold (float):Threshold to use for determining whether
                a cluster is efficiently packed.
            n_nearest ({int}): Number of nearest clusters to use when
                considering features
            max_types (int): Maximum number of atom types to consider when
                looking for efficient clusters. The process for finding
                efficient clusters very expensive for large numbers of types
        """

        # Store the options
        self.threshold = threshold
        self.n_nearest = n_nearest
        self.max_types = max_types

        # Tool to convert composition objects to fractions as a vector
        self._el_frac = ElementFraction()

        # Get the number of elements in the output of `_el_frac`
        self._n_elems = len(self._el_frac.featurize(Composition('H')))

        # Tool for looking up radii
        self._data_source = MagpieData()

        # Lookup table of ideal radius ratios
        self.ideal_ratio = dict(
            [(3, 0.154701), (4, 0.224745), (5, 0.361654), (6, 0.414214),
             (7, 0.518145), (8, 0.616517), (9, 0.709914), (10, 0.798907),
             (11, 0.884003), (12, 0.902113), (13, 0.976006), (14, 1.04733),
             (15, 1.11632), (16, 1.18318), (17, 1.2481), (18, 1.31123),
             (19, 1.37271), (20, 1.43267), (21, 1.49119), (22, 1.5484),
             (23, 1.60436), (24, 1.65915)])
Exemplo n.º 6
0
class DeBreuck2020Featurizer(modnet.featurizers.MODFeaturizer):
    """ Featurizer presets used for the paper 'Machine learning
    materials properties for small datasets' by Pierre-Paul De Breuck,
    Geoffroy Hautier & Gian-Marco Rignanese, arXiv:2004.14766 (2020).

    Uses most of the featurizers implemented by matminer at the time of
    writing with their default hyperparameters and presets.

    """
    from matminer.featurizers.composition import (
        AtomicOrbitals,
        AtomicPackingEfficiency,
        BandCenter,
        # CohesiveEnergy, - This descriptor was not used in the paper preset
        # ElectronAffinity, - This descriptor was not used in the paper preset
        ElectronegativityDiff,
        ElementFraction,
        ElementProperty,
        IonProperty,
        Miedema,
        OxidationStates,
        Stoichiometry,
        TMetalFraction,
        ValenceOrbital,
        YangSolidSolution,
    )
    from matminer.featurizers.structure import (
        # BagofBonds, - This descriptor was not used in the paper preset
        BondFractions,
        ChemicalOrdering,
        CoulombMatrix,
        DensityFeatures,
        EwaldEnergy,
        GlobalSymmetryFeatures,
        MaximumPackingEfficiency,
        # PartialRadialDistributionFunction,
        RadialDistributionFunction,
        SineCoulombMatrix,
        StructuralHeterogeneity,
        XRDPowderPattern,
    )

    from matminer.featurizers.site import (
        AGNIFingerprints,
        AverageBondAngle,
        AverageBondLength,
        BondOrientationalParameter,
        ChemEnvSiteFingerprint,
        CoordinationNumber,
        CrystalNNFingerprint,
        GaussianSymmFunc,
        GeneralizedRadialDistributionFunction,
        LocalPropertyDifference,
        OPSiteFingerprint,
        VoronoiFingerprint,
    )

    composition_featurizers = (
        AtomicOrbitals(),
        AtomicPackingEfficiency(),
        BandCenter(),
        ElementFraction(),
        ElementProperty.from_preset("magpie"),
        IonProperty(),
        Miedema(),
        Stoichiometry(),
        TMetalFraction(),
        ValenceOrbital(),
        YangSolidSolution(),
    )

    oxide_composition_featurizers = (
        ElectronegativityDiff(),
        OxidationStates(),
    )

    structure_featurizers = (
        DensityFeatures(),
        GlobalSymmetryFeatures(),
        RadialDistributionFunction(),
        CoulombMatrix(),
        # PartialRadialDistributionFunction(),
        SineCoulombMatrix(),
        EwaldEnergy(),
        BondFractions(),
        StructuralHeterogeneity(),
        MaximumPackingEfficiency(),
        ChemicalOrdering(),
        XRDPowderPattern(),
        # BagofBonds(),
    )
    site_featurizers = (
        AGNIFingerprints(),
        AverageBondAngle(VoronoiNN()),
        AverageBondLength(VoronoiNN()),
        BondOrientationalParameter(),
        ChemEnvSiteFingerprint.from_preset("simple"),
        CoordinationNumber(),
        CrystalNNFingerprint.from_preset("ops"),
        GaussianSymmFunc(),
        GeneralizedRadialDistributionFunction.from_preset("gaussian"),
        LocalPropertyDifference(),
        OPSiteFingerprint(),
        VoronoiFingerprint(),
    )

    def featurize_composition(self, df):
        """ Applies the preset composition featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.

        """
        df = super().featurize_composition(df)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}
        df['AtomicOrbitals|HOMO_character'] = df[
            'AtomicOrbitals|HOMO_character'].map(_orbitals)
        df['AtomicOrbitals|LUMO_character'] = df[
            'AtomicOrbitals|LUMO_character'].map(_orbitals)

        df['AtomicOrbitals|HOMO_element'] = df[
            'AtomicOrbitals|HOMO_element'].apply(
                lambda x: -1 if not isinstance(x, str) else Element(x).Z)
        df['AtomicOrbitals|LUMO_element'] = df[
            'AtomicOrbitals|LUMO_element'].apply(
                lambda x: -1 if not isinstance(x, str) else Element(x).Z)

        df = df.replace([np.inf, -np.inf, np.nan], 0)

        return modnet.featurizers.clean_df(df)

    def featurize_structure(self, df):
        """ Applies the preset structural featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.

        """
        df = super().featurize_structure(df)

        dist = df[
            "RadialDistributionFunction|radial distribution function"].iloc[0][
                'distances'][:50]
        for i, d in enumerate(dist):
            _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format(
                d)
            df[_rdf_key] = df[
                "RadialDistributionFunction|radial distribution function"].apply(
                    lambda x: x['distribution'][i])

        df = df.drop("RadialDistributionFunction|radial distribution function",
                     axis=1)

        _crystal_system = {
            "cubic": 1,
            "tetragonal": 2,
            "orthorombic": 3,
            "hexagonal": 4,
            "trigonal": 5,
            "monoclinic": 6,
            "triclinic": 7
        }

        def _int_map(x):
            if x == np.nan:
                return 0
            elif x:
                return 1
            else:
                return 0

        df["GlobalSymmetryFeatures|crystal_system"] = df[
            "GlobalSymmetryFeatures|crystal_system"].map(_crystal_system)
        df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[
            "GlobalSymmetryFeatures|is_centrosymmetric"].map(_int_map)

        return modnet.featurizers.clean_df(df)

    def featurize_site(self, df):
        """ Applies the preset site featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.

        """

        # rename some features for backwards compatibility with pretrained models
        aliases = {
            "GeneralizedRadialDistributionFunction": "GeneralizedRDF",
            "AGNIFingerprints": "AGNIFingerPrint",
            "BondOrientationalParameter": "BondOrientationParameter",
            "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc",
        }
        df = super().featurize_site(df, aliases=aliases)
        df = df.loc[:, (df != 0).any(axis=0)]

        return modnet.featurizers.clean_df(df)
Exemplo n.º 7
0
    def test_ape(self):
        f = AtomicPackingEfficiency()
        ef = ElementFraction()
        ef.set_n_jobs(1)

        # Test the APE calculation routines
        self.assertAlmostEqual(1.11632, f.get_ideal_radius_ratio(15))
        self.assertAlmostEqual(0.154701, f.get_ideal_radius_ratio(2))
        self.assertAlmostEqual(1.65915, f.get_ideal_radius_ratio(27))
        self.assertAlmostEqual(15, f.find_ideal_cluster_size(1.116)[0])
        self.assertAlmostEqual(3, f.find_ideal_cluster_size(0.1)[0])
        self.assertAlmostEqual(24, f.find_ideal_cluster_size(2)[0])

        # Test the nearest neighbor lookup tool
        nn_lookup = f.create_cluster_lookup_tool(
            [Element('Cu'), Element('Zr')])

        #  Check that the table gets the correct structures
        stable_clusters = [
            Composition('CuZr10'),
            Composition('Cu6Zr6'),
            Composition('Cu8Zr5'),
            Composition('Cu13Zr1'),
            Composition('Cu3Zr12'),
            Composition('Cu8Zr8'),
            Composition('Cu12Zr5'),
            Composition('Cu17Zr')
        ]
        ds, _ = nn_lookup.kneighbors(ef.featurize_many(stable_clusters),
                                     n_neighbors=1)
        self.assertArrayAlmostEqual([[0]] * 8, ds)
        self.assertEqual(8, nn_lookup._fit_X.shape[0])

        # Swap the order of the clusters, make sure it gets the same list
        nn_lookup_swapped = f.create_cluster_lookup_tool(
            [Element('Zr'), Element('Cu')])
        self.assertArrayAlmostEqual(sorted(nn_lookup._fit_X.tolist()),
                                    sorted(nn_lookup_swapped._fit_X.tolist()))

        # Make sure we had a cache hit
        self.assertEqual(1, f._create_cluster_lookup_tool.cache_info().misses)
        self.assertEqual(1, f._create_cluster_lookup_tool.cache_info().hits)

        # Change the tolerance, see if it changes the results properly
        f.threshold = 0.002
        nn_lookup = f.create_cluster_lookup_tool(
            [Element('Cu'), Element('Zr')])
        self.assertEqual(2, nn_lookup._fit_X.shape[0])
        ds, _ = nn_lookup.kneighbors(ef.featurize_many(
            [Composition('CuZr10'),
             Composition('Cu3Zr12')]),
                                     n_neighbors=1)
        self.assertArrayAlmostEqual([[0]] * 2, ds)

        # Make sure we had a cache miss
        self.assertEqual(2, f._create_cluster_lookup_tool.cache_info().misses)
        self.assertEqual(1, f._create_cluster_lookup_tool.cache_info().hits)

        # Compute the distances from Cu50Zr50
        mean_dists = f.compute_nearest_cluster_distance(Composition('CuZr'))
        self.assertArrayAlmostEqual([0.424264, 0.667602, 0.800561],
                                    mean_dists,
                                    decimal=6)

        # Compute the optimal APE for Cu50Zr50
        self.assertArrayAlmostEqual([0.000233857, 0.003508794],
                                    f.compute_simultaneous_packing_efficiency(
                                        Composition('Cu50Zr50')))

        # Test the dataframe calculator
        df = pd.DataFrame({'comp': [Composition('CuZr')]})
        df = f.featurize_dataframe(df, 'comp')

        self.assertEqual(6, len(df.columns))
        self.assertIn('dist from 5 clusters |APE| < 0.002', df.columns)

        self.assertAlmostEqual(0.003508794,
                               df['mean abs simul. packing efficiency'][0])

        # Make sure it works with composition that do not match any efficient clusters
        feat = f.compute_nearest_cluster_distance(Composition('Al'))
        self.assertArrayAlmostEqual([1] * 3, feat)
Exemplo n.º 8
0
 def test_fraction(self):
     df_frac = ElementFraction().featurize_dataframe(self.df,
                                                     col_id="composition")
     self.assertEqual(df_frac["O"][0], 0.6)
     self.assertEqual(df_frac["Fe"][0], 0.4)
Exemplo n.º 9
0
class FUTURE_PROSPECTS_2021(featurizer.extendedMODFeaturizer):

    from matminer.featurizers.composition import (
        AtomicOrbitals,
        AtomicPackingEfficiency,
        BandCenter,
        CohesiveEnergy,
        ElectronAffinity,
        ElectronegativityDiff,
        ElementFraction,
        ElementProperty,
        IonProperty,
        Miedema,
        OxidationStates,
        Stoichiometry,
        TMetalFraction,
        ValenceOrbital,
        YangSolidSolution,
    )
    from matminer.featurizers.structure import (
        BagofBonds,
        BondFractions,
        ChemicalOrdering,
        CoulombMatrix,
        DensityFeatures,
        EwaldEnergy,
        GlobalSymmetryFeatures,
        MaximumPackingEfficiency,
        PartialRadialDistributionFunction,
        RadialDistributionFunction,
        SineCoulombMatrix,
        StructuralHeterogeneity,
        XRDPowderPattern,
    )

    from matminer.featurizers.site import (
        AGNIFingerprints,
        AverageBondAngle,
        AverageBondLength,
        BondOrientationalParameter,
        ChemEnvSiteFingerprint,
        CoordinationNumber,
        CrystalNNFingerprint,
        GaussianSymmFunc,
        GeneralizedRadialDistributionFunction,
        LocalPropertyDifference,
        OPSiteFingerprint,
        VoronoiFingerprint,
    )
    from matminer.featurizers.dos import (
        DOSFeaturizer,
        SiteDOS,
        Hybridization,
        DosAsymmetry,
    )
    from matminer.featurizers.bandstructure import (
        BandFeaturizer,
        BranchPointEnergy
    )

    composition_featurizers = (
        AtomicOrbitals(),
        AtomicPackingEfficiency(),
        BandCenter(),
        ElementFraction(),
        ElementProperty.from_preset("magpie"),
        IonProperty(),
        Miedema(),
        Stoichiometry(),
        TMetalFraction(),
        ValenceOrbital(),
        YangSolidSolution(),
    )

    oxid_composition_featurizers = (
        ElectronegativityDiff(),
        OxidationStates(),
    )

    structure_featurizers = (
        DensityFeatures(),
        GlobalSymmetryFeatures(),
        RadialDistributionFunction(),
        CoulombMatrix(),
        #PartialRadialDistributionFunction(), #Introduces a large amount of features
        SineCoulombMatrix(),
        EwaldEnergy(),
        BondFractions(),
        StructuralHeterogeneity(),
        MaximumPackingEfficiency(),
        ChemicalOrdering(),
        XRDPowderPattern(),
    )
    site_featurizers = (
        AGNIFingerprints(),
        AverageBondAngle(VoronoiNN()),
        AverageBondLength(VoronoiNN()),
        BondOrientationalParameter(),
        ChemEnvSiteFingerprint.from_preset("simple"),
        CoordinationNumber(),
        CrystalNNFingerprint.from_preset("ops"),
        GaussianSymmFunc(),
        GeneralizedRadialDistributionFunction.from_preset("gaussian"),
        LocalPropertyDifference(),
        OPSiteFingerprint(),
        VoronoiFingerprint(),
    )

    dos_featurizers = (
        DOSFeaturizer(),
        SiteDOS(),
        Hybridization()
    )

    band_featurizers = (
        BandFeaturizer(),
        BranchPointEnergy()
    )
    def __init__(self, n_jobs=None):
            self._n_jobs = n_jobs

    def featurize_composition(self, df):
        """Applies the preset composition featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """
        df = super().featurize_composition(df)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}
        df["AtomicOrbitals|HOMO_character"] = df["AtomicOrbitals|HOMO_character"].map(
            _orbitals
        )
        df["AtomicOrbitals|LUMO_character"] = df["AtomicOrbitals|LUMO_character"].map(
            _orbitals
        )

        df["AtomicOrbitals|HOMO_element"] = df["AtomicOrbitals|HOMO_element"].apply(
            lambda x: -1 if not isinstance(x, str) else Element(x).Z
        )
        df["AtomicOrbitals|LUMO_element"] = df["AtomicOrbitals|LUMO_element"].apply(
            lambda x: -1 if not isinstance(x, str) else Element(x).Z
        )

        return clean_df(df)

    def featurize_structure(self, df):
        """Applies the preset structural featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """
        df = super().featurize_structure(df)

        dist = df["RadialDistributionFunction|radial distribution function"].iloc[0][
            "distances"
        ][:50]
        for i, d in enumerate(dist):
            _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format(
                d
            )
            df[_rdf_key] = df[
                "RadialDistributionFunction|radial distribution function"
            ].apply(lambda x: x["distribution"][i])

        df = df.drop("RadialDistributionFunction|radial distribution function", axis=1)

        _crystal_system = {
            "cubic": 1,
            "tetragonal": 2,
            "orthorombic": 3,
            "hexagonal": 4,
            "trigonal": 5,
            "monoclinic": 6,
            "triclinic": 7,
        }

        def _int_map(x):
            if x == np.nan:
                return 0
            elif x:
                return 1
            else:
                return 0

        df["GlobalSymmetryFeatures|crystal_system"] = df[
            "GlobalSymmetryFeatures|crystal_system"
        ].map(_crystal_system)
        df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[
            "GlobalSymmetryFeatures|is_centrosymmetric"
        ].map(_int_map)

        return clean_df(df)

    def featurize_dos(self, df):
        """Applies the presetdos featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        df = super().featurize_dos(df)


        hotencodeColumns = ["DOSFeaturizer|vbm_specie_1","DOSFeaturizer|cbm_specie_1"]

        one_hot = pd.get_dummies(df[hotencodeColumns])
        df = df.drop(hotencodeColumns, axis = 1).join(one_hot)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}

        df["DOSFeaturizer|vbm_character_1"] = df[
           "DOSFeaturizer|vbm_character_1"
           ].map(_orbitals)
        df["DOSFeaturizer|cbm_character_1"] = df[
           "DOSFeaturizer|cbm_character_1"
           ].map(_orbitals)

        # Splitting one feature into several floating features
        # e.g. number;number;number into three columns
        splitColumns = ["DOSFeaturizer|cbm_location_1", "DOSFeaturizer|vbm_location_1"]

        for column in splitColumns:
            try:
                newColumns = df[column].str.split(";", n = 2, expand = True)
                for i in range(0,3):
                    df[column + "_" + str(i)] = np.array(newColumns[i]).astype(np.float)
            except:
                continue
        df = df.drop(splitColumns, axis=1)
        df = df.drop(["dos"], axis=1)
        return clean_df(df)

    def featurize_bandstructure(self, df):
        """Applies the preset band structure featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        df = super().featurize_bandstructure(df)

        def _int_map(x):
            if str(x) == "False":
                return 0
            elif str(x) == "True":
                return 1

        df["BandFeaturizer|is_gap_direct"] = df[
            "BandFeaturizer|is_gap_direct"
        ].map(_int_map)


        df = df.drop(["bandstructure"], axis=1)

        return clean_df(df)


    def featurize_site(self, df):
        """Applies the preset site featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        aliases = {
            "GeneralizedRadialDistributionFunction": "GeneralizedRDF",
            "AGNIFingerprints": "AGNIFingerPrint",
            "BondOrientationalParameter": "BondOrientationParameter",
            "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc",
        }
        df = super().featurize_site(df, aliases=aliases)
        df = df.loc[:, (df != 0).any(axis=0)]

        return clean_df(df)
Exemplo n.º 10
0
 def feature_labels(self):
     # Since we have more features than just element fractions, append 'fraction' to element symbols for clarity
     element_fraction_features = [e + " fraction" for e in ElementFraction().feature_labels()]
     valence_orbital_features = ValenceOrbital().feature_labels()
     return element_fraction_features+self._element_property_feature_labels+valence_orbital_features