示例#1
0
    def test_density_features(self):
        df = DensityFeatures()
        f = df.featurize(self.diamond)
        self.assertAlmostEqual(f[0], 3.49, 2)
        self.assertAlmostEqual(f[1], 5.71, 2)
        self.assertAlmostEqual(f[2], 0.25, 2)

        f = df.featurize(self.nacl)
        self.assertAlmostEqual(f[0], 2.105, 2)
        self.assertAlmostEqual(f[1], 23.046, 2)
        self.assertAlmostEqual(f[2], 0.620, 2)
示例#2
0
    def test_density_features(self):
        df = DensityFeatures()
        f = df.featurize(self.diamond)
        self.assertAlmostEqual(f[0], 3.49, 2)
        self.assertAlmostEqual(f[1], 5.71, 2)
        self.assertAlmostEqual(f[2], 0.25, 2)

        f = df.featurize(self.nacl)
        self.assertAlmostEqual(f[0], 2.105, 2)
        self.assertAlmostEqual(f[1], 23.046, 2)
        self.assertAlmostEqual(f[2], 0.620, 2)
示例#3
0
    def test_density_features(self):
        df = DensityFeatures()
        f = df.featurize(self.diamond)
        self.assertAlmostEqual(f[0], 3.49, 2)
        self.assertAlmostEqual(f[1], 5.71, 2)
        self.assertAlmostEqual(f[2], 0.25, 2)

        f = df.featurize(self.nacl)
        self.assertAlmostEqual(f[0], 2.105, 2)
        self.assertAlmostEqual(f[1], 23.046, 2)
        self.assertAlmostEqual(f[2], 0.620, 2)

        nacl_disordered = copy.deepcopy(self.nacl)
        nacl_disordered.replace_species({"Cl1-": "Cl0.99H0.01"})
        self.assertFalse(df.precheck(nacl_disordered))
        structures = [self.diamond, self.nacl, nacl_disordered]
        df2 = pd.DataFrame({"structure": structures})
        self.assertAlmostEqual(df.precheck_dataframe(df2, "structure"), 2 / 3)
示例#4
0
    def test_density_features(self):
        df = DensityFeatures()
        f = df.featurize(self.diamond)
        self.assertAlmostEqual(f[0], 3.49, 2)
        self.assertAlmostEqual(f[1], 5.71, 2)
        self.assertAlmostEqual(f[2], 0.25, 2)

        f = df.featurize(self.nacl)
        self.assertAlmostEqual(f[0], 2.105, 2)
        self.assertAlmostEqual(f[1], 23.046, 2)
        self.assertAlmostEqual(f[2], 0.620, 2)

        nacl_disordered = copy.deepcopy(self.nacl)
        nacl_disordered.replace_species({"Cl1-": "Cl0.99H0.01"})
        self.assertFalse(df.precheck(nacl_disordered))
        structures = [self.diamond, self.nacl, nacl_disordered]
        df2 = pd.DataFrame({"structure": structures})
        self.assertAlmostEqual(df.precheck_dataframe(df2, "structure"), 2 / 3)
示例#5
0
    def _extract_features(self, df_input):
        """
        Extract features using Matminer from the 'structure' column in
            df_input

         Args:
             df_input (DataFrame): Pandas DataFrame whcih conatains features
                from Materials Project Database of the input samples

         Returns:
             df_extracted (DataFrame): Pandas DataFrame which contains
                features of input samples extracted using Matminer

        """

        # Dropping the 'theoretical' column
        df_input.drop(columns=["theoretical"], inplace=True)

        # Extracting the features
        dfeat = DensityFeatures()
        symmfeat = GlobalSymmetryFeatures()
        mfeat = Meredig()
        cefeat = CohesiveEnergy()

        df_input["density"] = df_input.structure.apply(
            lambda x: dfeat.featurize(x)[0])
        df_input["vpa"] = df_input.structure.apply(
            lambda x: dfeat.featurize(x)[1])
        df_input["packing fraction"] = df_input.structure.apply(
            lambda x: dfeat.featurize(x)[2])
        df_input["spacegroup_num"] = df_input.structure.apply(
            lambda x: symmfeat.featurize(x)[0])
        df_input["cohesive_energy"] = df_input.apply(
            lambda x: cefeat.featurize(
                x.structure.composition,
                formation_energy_per_atom=x.formation_energy_per_atom,
            )[0],
            axis=1,
        )
        df_input["mean AtomicWeight"] = df_input.structure.apply(
            lambda x: mfeat.featurize(x.composition)[-17])
        df_input["range AtomicRadius"] = df_input.structure.apply(
            lambda x: mfeat.featurize(x.composition)[-12])
        df_input["mean AtomicRadius"] = df_input.structure.apply(
            lambda x: mfeat.featurize(x.composition)[-11])
        df_input["range Electronegativity"] = df_input.structure.apply(
            lambda x: mfeat.featurize(x.composition)[-10])
        df_input["mean Electronegativity"] = df_input.structure.apply(
            lambda x: mfeat.featurize(x.composition)[-9])

        # Drop 'structure' column
        df_input.drop(columns=["structure"], inplace=True)

        # ignore compounds that failed to featurize
        df_extracted = df_input.fillna(
            df_input.mean()).query("cohesive_energy > 0.0")

        # Re-arranging the 'PU Label' column
        pu_label = df_extracted["PU_label"]
        df_extracted = df_extracted.drop(["PU_label"], axis=1)
        df_extracted["PU_label"] = pu_label

        # Drop the icsd_ids column
        df_extracted.drop(columns=["icsd_ids"], inplace=True)

        return df_extracted