示例#1
0
plt.savefig(histplot_fig,dpi=300,bbox_inches='tight')


# ## Scale the data

# In[11]:


X = df.drop(['Alert'], axis= 1)
y= pd.DataFrame(df['Alert'])

#Scale Data
scaler = MinMaxScaler()
X=MinMaxScaler().fit_transform(X.values)
X = pd.DataFrame(X)
X.columns=(df.drop(['Alert'], axis= 1)).columns


# In[12]:


Xy=pd.concat([y,X],axis=1)


# ## Pearson Coefficient

# In[13]:


fix,ax = plt.subplots(figsize=(22,22))
heatmap_data = Xy
示例#2
0
data = pd.get_dummies(data, columns=['Embarked'])
#check if missval exist
#print(data.isnull().sum())

#replace missval in attribute age with mean
imp = Imputer(strategy='mean')
data['Age'] = imp.fit_transform(data[['Age']])
index = data.columns

#check if missval still exist
#print(data.isnull().sum())

#normalization
data = MinMaxScaler().fit_transform(data)
data = pd.DataFrame(data)
data.columns = index

#split attribute and target class
X = data.drop(['Survived'], axis=1)
y = data['Survived']

#find outliers
FS = IsolationForest()
FS.fit(X)

# FS=EllipticEnvelope()
# FS.fit(X)

outliers = FS.predict(X)

drop = []
input_path = 'D:\\全2018_日出足够.xlsx'
data_all = pd.read_excel(input_path, index_col='日期')
data_all = data_all.dropna()

data_ts_df = data_all[[
    'tm_mon', 'tm_mday', 'tm_wday', 'tm_yday', 'tm_week', 'id'
]]
data_to_std = data_all.drop(
    ['tm_mon', 'tm_mday', 'tm_wday', 'tm_yday', 'tm_week', 'id'], axis=1)
# 标准化
from sklearn.preprocessing import MinMaxScaler

data_to_std2 = MinMaxScaler().fit_transform(data_to_std)
data_to_std2 = pd.DataFrame(data_to_std2)
data_to_std2 = data_to_std2.set_index(data_to_std.index)
data_to_std2.columns = data_to_std.columns
print(data_to_std2.shape)
print(data_ts_df.shape)

data_out = pd.concat([data_ts_df, data_to_std2], join='outer', axis=1)
#data_out2.to_csv('test.csv')
data_test = data_out[(data_out['tm_mon'] == 1) | (data_out['tm_mon'] == 4) |
                     (data_out['tm_mon'] == 7) | (data_out['tm_mon'] == 10)]
data_train = data_out[(data_out['tm_mon'] == 3) | (data_out['tm_mon'] == 2) |
                      (data_out['tm_mon'] == 6) | (data_out['tm_mon'] == 5) |
                      (data_out['tm_mon'] == 9) | (data_out['tm_mon'] == 8) |
                      (data_out['tm_mon'] == 12) | (data_out['tm_mon'] == 11)]
# AOD
data_aod_test = data_test[['AOD_0']]
data_aods_test = data_test[[
    'AOD_1', 'AOD_2', 'AOD_3', 'AOD_4', 'AOD_5', 'AOD_6', 'AOD_7', 'AOD_8',