def generate_data(): df = load_elastic_tensor() df.to_csv('原始elastic数据.csv') print(df.columns) unwanted_columns = [ 'volume', 'nsites', 'compliance_tensor', 'elastic_tensor', 'elastic_tensor_original', 'K_Voigt', 'G_Voigt', 'K_Reuss', 'G_Reuss' ] df = df.drop(unwanted_columns, axis=1) print(df.head()) df.to_csv('扔掉不需要的部分.csv') #首先使用describe获得对于数据的整体把握 print(df.describe()) df.describe().to_csv('general_look.csv') #通过观察数据发现并没有什么异常之处 df = StrToComposition().featurize_dataframe(df, 'formula') print(df.head()) df.to_csv('引入composition.csv') #下一步,我们需要其中一个特征化来增加一系列的特征算符 ep_feat = ElementProperty.from_preset(preset_name='magpie') df = ep_feat.featurize_dataframe( df, col_id='composition') #将composition这一列作为特征化的输入 print(df.head()) print(ep_feat.citations()) df.to_csv('将composition特征化后.csv') #开始引入新的特征化算符吧 df = CompositionToOxidComposition().featurize_dataframe( df, 'composition') #引入了氧化态的相关特征 os_feat = OxidationStates() df = os_feat.featurize_dataframe(df, col_id='composition_oxid') print(df.head()) df.to_csv('引入氧化态之后.csv') #其实除了基于composition的特征之外还有很多其他的,比如基于结构的 df_feat = DensityFeatures() df = df_feat.featurize_dataframe(df, 'structure') print(df.head()) df.to_csv('引入结构中的密度.csv') print(df_feat.feature_labels())
def plot_mean_elastic_tensors(): """ An example of heatmap_df where the input data is real and in dataframe format. We want to look at how average of the elastic constant tensor changes with the density and crystal system. Note that density is not a categorical variable in the final dataframe. Returns: plotly plot in "offline" mode poped in the default browser. """ df = load_elastic_tensor() # data preparation: df['Mean Elastic Constant'] = df['elastic_tensor'].apply(lambda x: np.mean(x)) gs = GlobalSymmetryFeatures(desired_features=['crystal_system']) df = gs.featurize_dataframe(df, col_id='structure') dsf = DensityFeatures(desired_features=['density']) df = dsf.featurize_dataframe(df, col_id='structure') # actual plotting pf = PlotlyFig(fontscale=0.75, filename='static_elastic_constants', colorscale='RdBu') pf.heatmap_df(df[['crystal_system', 'density', 'Mean Elastic Constant']])
def add_cs_features(df,rdf_flag=False): df["composition"] = str_to_composition(df["pretty_formula"]) df["composition_oxid"] = composition_to_oxidcomposition(df["composition"]) df["structure"] = dict_to_object(df["structure"]) vo = ValenceOrbital() df = vo.featurize_dataframe(df,"composition") ox = OxidationStates() df = ox.featurize_dataframe(df, "composition_oxid") # structure features den = DensityFeatures() df = den.featurize_dataframe(df, "structure") if rdf_flag: rdf = RadialDistributionFunction(cutoff=15.0,bin_size=0.2) df = rdf.featurize_dataframe(df, "structure") return df
def plot_mean_elastic_tensors(): """ An example of heatmap_df where the input data is real and in dataframe format. We want to look at how average of the elastic constant tensor changes with the density and crystal system. Note that density is not a categorical variable in the final dataframe. Returns: plotly plot in "offline" mode poped in the default browser. """ df = load_dataset("elastic_tensor_2015") # data preparation: df['Mean Elastic Constant'] = df['elastic_tensor'].apply( lambda x: np.mean(x)) gs = GlobalSymmetryFeatures(desired_features=['crystal_system']) df = gs.featurize_dataframe(df, col_id='structure') dsf = DensityFeatures(desired_features=['density']) df = dsf.featurize_dataframe(df, col_id='structure') # actual plotting pf = PlotlyFig(fontscale=0.75, filename='static_elastic_constants', colorscale='RdBu') pf.heatmap_df(df[['crystal_system', 'density', 'Mean Elastic Constant']])
ep_feat = ElementProperty.from_preset(preset_name="magpie") data_3 = ep_feat.featurize_dataframe(data_3, col_id="composition") from matminer.featurizers.conversions import CompositionToOxidComposition from matminer.featurizers.composition import OxidationStates data_3 = CompositionToOxidComposition().featurize_dataframe( data_3, "composition") os_feat = OxidationStates() data_3 = os_feat.featurize_dataframe(data_3, "composition_oxid") from matminer.featurizers.structure import DensityFeatures df_feat = DensityFeatures() data_3 = df_feat.featurize_dataframe(data_3, "structure") unwanted_columns = [ "elasticity", "material_id", "nsites", "compliance_tensor", "elastic_tensor", "elastic_tensor_original", "K_Voigt", "G_Voigt", "K_Reuss", "G_Reuss", "warnings" ] data_4 = data_3.drop(unwanted_columns, axis=1) # In[ ]: # Additional data cleaning after some trial runs y = data_4['K_VRH'].values excluded = [ "G_VRH", "K_VRH", "elastic_anisotropy", "pretty_formula", "poisson_ratio", "structure", "composition", "composition_oxid", "G_Voigt_Reuss_Hill",
ep_feat = ElementProperty.from_preset(preset_name="magpie") df = ep_feat.featurize_dataframe(df, col_id='composition') from matminer.featurizers.conversions import CompositionToOxidComposition from matminer.featurizers.composition import OxidationStates df = CompositionToOxidComposition().featurize_dataframe(df, "composition") os_feat = OxidationStates() df = os_feat.featurize_dataframe(df, "composition_oxid") from matminer.featurizers.structure import DensityFeatures df_feat = DensityFeatures() df = df_feat.featurize_dataframe(df, col_id='structure') y = df['K_VRH'].values excluded = ["G_VRH", "K_VRH", "elastic_anisotropy", "formula", "material_id", "poisson_ratio", "structure", "composition", "composition_oxid"] X = df.drop(excluded, axis=1) print("There are {} possible descriptors:\n\n{}".format(X.shape[1], X.columns.values)) from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error import numpy as np lr = LinearRegression() lr.fit(X, y) print(lr.score(X, y)) print(np.sqrt(mean_squared_error(y_true=y, y_pred=lr.predict(X))))
def AddFeatures(df): # Add features by Matminer from matminer.featurizers.conversions import StrToComposition df = StrToComposition().featurize_dataframe(df, "formula") from matminer.featurizers.composition import ElementProperty ep_feat = ElementProperty.from_preset(preset_name="magpie") df = ep_feat.featurize_dataframe( df, col_id="composition" ) # input the "composition" column to the featurizer from matminer.featurizers.conversions import CompositionToOxidComposition from matminer.featurizers.composition import OxidationStates df = CompositionToOxidComposition().featurize_dataframe(df, "composition") os_feat = OxidationStates() df = os_feat.featurize_dataframe(df, "composition_oxid") from matminer.featurizers.composition import ElectronAffinity ea_feat = ElectronAffinity() df = ea_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import BandCenter bc_feat = BandCenter() df = bc_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import CohesiveEnergy ce_feat = CohesiveEnergy() df = ce_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import Miedema m_feat = Miedema() df = m_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import TMetalFraction tmf_feat = TMetalFraction() df = tmf_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import ValenceOrbital vo_feat = ValenceOrbital() df = vo_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import YangSolidSolution yss_feat = YangSolidSolution() df = yss_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.structure import GlobalSymmetryFeatures # This is the border between compositional features and structural features. Comment out the following featurizers to use only compostional features. gsf_feat = GlobalSymmetryFeatures() df = gsf_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import StructuralComplexity sc_feat = StructuralComplexity() df = sc_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import ChemicalOrdering co_feat = ChemicalOrdering() df = co_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import MaximumPackingEfficiency mpe_feat = MaximumPackingEfficiency() df = mpe_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import MinimumRelativeDistances mrd_feat = MinimumRelativeDistances() df = mrd_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import StructuralHeterogeneity sh_feat = StructuralHeterogeneity() df = sh_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import SiteStatsFingerprint from matminer.featurizers.site import AverageBondLength from pymatgen.analysis.local_env import CrystalNN bl_feat = SiteStatsFingerprint( AverageBondLength(CrystalNN(search_cutoff=20))) df = bl_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.site import AverageBondAngle ba_feat = SiteStatsFingerprint( AverageBondAngle(CrystalNN(search_cutoff=20))) df = ba_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.site import BondOrientationalParameter bop_feat = SiteStatsFingerprint(BondOrientationalParameter()) df = bop_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.site import CoordinationNumber cn_feat = SiteStatsFingerprint(CoordinationNumber()) df = cn_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import DensityFeatures df_feat = DensityFeatures() df = df_feat.featurize_dataframe(df, "structure", ignore_errors=True) return (df)