def test_density_features(self): df = DensityFeatures() f = df.featurize(self.diamond) self.assertAlmostEqual(f[0], 3.49, 2) self.assertAlmostEqual(f[1], 5.71, 2) self.assertAlmostEqual(f[2], 0.25, 2) f = df.featurize(self.nacl) self.assertAlmostEqual(f[0], 2.105, 2) self.assertAlmostEqual(f[1], 23.046, 2) self.assertAlmostEqual(f[2], 0.620, 2)
def test_density_features(self): df = DensityFeatures() f = df.featurize(self.diamond) self.assertAlmostEqual(f[0], 3.49, 2) self.assertAlmostEqual(f[1], 5.71, 2) self.assertAlmostEqual(f[2], 0.25, 2) f = df.featurize(self.nacl) self.assertAlmostEqual(f[0], 2.105, 2) self.assertAlmostEqual(f[1], 23.046, 2) self.assertAlmostEqual(f[2], 0.620, 2) nacl_disordered = copy.deepcopy(self.nacl) nacl_disordered.replace_species({"Cl1-": "Cl0.99H0.01"}) self.assertFalse(df.precheck(nacl_disordered)) structures = [self.diamond, self.nacl, nacl_disordered] df2 = pd.DataFrame({"structure": structures}) self.assertAlmostEqual(df.precheck_dataframe(df2, "structure"), 2 / 3)
def _extract_features(self, df_input): """ Extract features using Matminer from the 'structure' column in df_input Args: df_input (DataFrame): Pandas DataFrame whcih conatains features from Materials Project Database of the input samples Returns: df_extracted (DataFrame): Pandas DataFrame which contains features of input samples extracted using Matminer """ # Dropping the 'theoretical' column df_input.drop(columns=["theoretical"], inplace=True) # Extracting the features dfeat = DensityFeatures() symmfeat = GlobalSymmetryFeatures() mfeat = Meredig() cefeat = CohesiveEnergy() df_input["density"] = df_input.structure.apply( lambda x: dfeat.featurize(x)[0]) df_input["vpa"] = df_input.structure.apply( lambda x: dfeat.featurize(x)[1]) df_input["packing fraction"] = df_input.structure.apply( lambda x: dfeat.featurize(x)[2]) df_input["spacegroup_num"] = df_input.structure.apply( lambda x: symmfeat.featurize(x)[0]) df_input["cohesive_energy"] = df_input.apply( lambda x: cefeat.featurize( x.structure.composition, formation_energy_per_atom=x.formation_energy_per_atom, )[0], axis=1, ) df_input["mean AtomicWeight"] = df_input.structure.apply( lambda x: mfeat.featurize(x.composition)[-17]) df_input["range AtomicRadius"] = df_input.structure.apply( lambda x: mfeat.featurize(x.composition)[-12]) df_input["mean AtomicRadius"] = df_input.structure.apply( lambda x: mfeat.featurize(x.composition)[-11]) df_input["range Electronegativity"] = df_input.structure.apply( lambda x: mfeat.featurize(x.composition)[-10]) df_input["mean Electronegativity"] = df_input.structure.apply( lambda x: mfeat.featurize(x.composition)[-9]) # Drop 'structure' column df_input.drop(columns=["structure"], inplace=True) # ignore compounds that failed to featurize df_extracted = df_input.fillna( df_input.mean()).query("cohesive_energy > 0.0") # Re-arranging the 'PU Label' column pu_label = df_extracted["PU_label"] df_extracted = df_extracted.drop(["PU_label"], axis=1) df_extracted["PU_label"] = pu_label # Drop the icsd_ids column df_extracted.drop(columns=["icsd_ids"], inplace=True) return df_extracted