示例#1
0
google['oil_high'] = oil['High']
google['oil_low'] = oil['Low']
google['eur_myr'] = eur_myr['Unnamed: 1']
google['usd_myr'] = usd_myr['Unnamed: 1']

# In[4]:

date_ori = pd.to_datetime(google.iloc[:, 0]).tolist()
google.head()

# In[5]:

minmax = MinMaxScaler().fit(google.iloc[:, 4].values.reshape((-1, 1)))
df_log = MinMaxScaler().fit_transform(google.iloc[:, 1:].astype('float32'))
df_log = pd.DataFrame(df_log)
df_log.head()

# In[6]:

thought_vector = autoencoder.reducedimension(df_log.values, 4, 0.001, 128, 100)

# In[7]:

thought_vector.shape

# In[8]:

num_layers = 1
size_layer = 128
timestamp = 5
epoch = 500
示例#2
0
from IPython.display import display
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

data_path = 'data/'
df_train = pd.read_csv(data_path + 'titanic_train.csv')
df_test = pd.read_csv(data_path + 'titanic_test.csv')

train_Y = df_train['Survived']
ids = df_test['PassengerId']
df_train = df_train.drop(['PassengerId', 'Survived'], axis=1)
df_test = df_test.drop(['PassengerId'], axis=1)
df = pd.concat([df_train, df_test])
df.head()


# 檢查 DataFrame 空缺值的狀態
def na_check(df_data):
    data_na = (df_data.isnull().sum() / len(df_data)) * 100
    data_na = data_na.drop(
        data_na[data_na == 0].index).sort_values(ascending=False)
    missing_data = pd.DataFrame({'Missing Ratio': data_na})
    display(missing_data.head(10))


na_check(df)

# 以下 In[3]~In[10] 只是鐵達尼預測中的一組特徵工程, 並以此組特徵工程跑參數, 若更換其他特徵工程, In[10]的參數需要重新跑
# Sex : 直接轉男 0 女 1