plt.savefig(histplot_fig,dpi=300,bbox_inches='tight') # ## Scale the data # In[11]: X = df.drop(['Alert'], axis= 1) y= pd.DataFrame(df['Alert']) #Scale Data scaler = MinMaxScaler() X=MinMaxScaler().fit_transform(X.values) X = pd.DataFrame(X) X.columns=(df.drop(['Alert'], axis= 1)).columns # In[12]: Xy=pd.concat([y,X],axis=1) # ## Pearson Coefficient # In[13]: fix,ax = plt.subplots(figsize=(22,22)) heatmap_data = Xy
data = pd.get_dummies(data, columns=['Embarked']) #check if missval exist #print(data.isnull().sum()) #replace missval in attribute age with mean imp = Imputer(strategy='mean') data['Age'] = imp.fit_transform(data[['Age']]) index = data.columns #check if missval still exist #print(data.isnull().sum()) #normalization data = MinMaxScaler().fit_transform(data) data = pd.DataFrame(data) data.columns = index #split attribute and target class X = data.drop(['Survived'], axis=1) y = data['Survived'] #find outliers FS = IsolationForest() FS.fit(X) # FS=EllipticEnvelope() # FS.fit(X) outliers = FS.predict(X) drop = []
input_path = 'D:\\全2018_日出足够.xlsx' data_all = pd.read_excel(input_path, index_col='日期') data_all = data_all.dropna() data_ts_df = data_all[[ 'tm_mon', 'tm_mday', 'tm_wday', 'tm_yday', 'tm_week', 'id' ]] data_to_std = data_all.drop( ['tm_mon', 'tm_mday', 'tm_wday', 'tm_yday', 'tm_week', 'id'], axis=1) # 标准化 from sklearn.preprocessing import MinMaxScaler data_to_std2 = MinMaxScaler().fit_transform(data_to_std) data_to_std2 = pd.DataFrame(data_to_std2) data_to_std2 = data_to_std2.set_index(data_to_std.index) data_to_std2.columns = data_to_std.columns print(data_to_std2.shape) print(data_ts_df.shape) data_out = pd.concat([data_ts_df, data_to_std2], join='outer', axis=1) #data_out2.to_csv('test.csv') data_test = data_out[(data_out['tm_mon'] == 1) | (data_out['tm_mon'] == 4) | (data_out['tm_mon'] == 7) | (data_out['tm_mon'] == 10)] data_train = data_out[(data_out['tm_mon'] == 3) | (data_out['tm_mon'] == 2) | (data_out['tm_mon'] == 6) | (data_out['tm_mon'] == 5) | (data_out['tm_mon'] == 9) | (data_out['tm_mon'] == 8) | (data_out['tm_mon'] == 12) | (data_out['tm_mon'] == 11)] # AOD data_aod_test = data_test[['AOD_0']] data_aods_test = data_test[[ 'AOD_1', 'AOD_2', 'AOD_3', 'AOD_4', 'AOD_5', 'AOD_6', 'AOD_7', 'AOD_8',