Exemplo n.º 1
0
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.metrics import confusion_matrix

# In[43]:

N_B = MultinomialNB()
N_B.fit(train_x, train_y)
pred_y = N_B.predict(test_x)
print(confusion_matrix(test_y, pred_y))

# In[44]:

sv = LinearSVC()
sv.fit(train_x, train_y)
pred = sv.predict(test_x)
print(confusion_matrix(test_y, pred))

# In[45]:

from sklearn.linear_model import LogisticRegression

# In[46]:

le = LogisticRegression()
le.fit(train_x, train_y)
pp = le.predict(test_x)
print(confusion_matrix(test_y, pp))

# In[ ]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

# # Logistics Regression Classification

# The target variable(or output), y, can take only discrete values for given set of features(or inputs), X.

# In[37]:

#Logistic regression

from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)

# In[38]:

cm = confusion_matrix(y_pred, y_test)
cm

# In[39]:

a_s = accuracy_score(y_pred, y_test)
a_s

# # Decision Tree Classification

# Decision tree uses the tree representation to solve the problem in which each leaf node corresponds to a class label and attributes are represented on the internal node of the tree.
Exemplo n.º 3
0
le = LabelEncoder()
data['Education'] = le.fit_transform(data['Education'])
le = LabelEncoder()
data['Self_Employed'] = le.fit_transform(data['Self_Employed'])
le = LabelEncoder()
data['Credit_History'] = le.fit_transform(data['Credit_History'])
le = LabelEncoder()
data['Property_Area'] = le.fit_transform(data['Property_Area'])
le = LabelEncoder()
data['Dependents'] = le.fit_transform(data['Dependents'])
print(data.head())
x = data.drop(['Loan_Status'], axis=1)
y = data['Loan_Status']
sns.countplot(data['Loan_Status'], label='count')
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
# print(x_train)
from sklearn.linear_model import LogisticRegression
le = LogisticRegression()
le.fit(x_train, y_train)
prediction = le.predict(x_test)
print(prediction)
print(accuracy_score(y_test, prediction))
# print(y_train.head())
from sklearn.tree import DecisionTreeClassifier
dc = DecisionTreeClassifier()
dc.fit(x_train, y_train)
prediction = dc.predict(x_test)
print(prediction)
print(accuracy_score(y_test, prediction))
Exemplo n.º 4
0
#dividing the data into dependent and independent variable
y=df['expenses']
x=df.drop(['expenses'],axis=1)

#splitting of data
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)
# we can scale town the data but since regression model can handle this no need to scale

from sklearn.linear_model import LinearRegression
le=LinearRegression()
le.fit(x_train,y_train)

#predict
y_pred=le.predict(x_test)

# finding residulas for various check
residual=y_test-y_pred

# to check accuracy of model
score=le.score(x_test,y_test)
#accuracy 
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
acc=r2_score(y_test,y_pred)
mse=mean_squared_error(y_test,y_pred)
rmse=np.sqrt(mse)

# model give 73.90% accuracy
Exemplo n.º 5
0
    #'nthread':6,
    #'tree_method':'gpu_hist',
    #'n_gpus':2,
    'colsample_bytree': 0.7
}


def evalerror(preds, dtrain):
    labels = dtrain.get_label()
    # return a pair metric_name, result
    y = [math.exp(x) - 1 for x in labels[labels > 0]]
    yhat = [math.exp(x) - 1 for x in preds[labels > 0]]
    ssquare = [math.pow((y[i] - yhat[i]) / y[i], 2) for i in range(len(y))]
    return 'rmpse', math.sqrt(np.mean(ssquare))


watchlist = [(dtrain, 'train_rmpse'), (dvalid, 'valid_rmpse')]
clf = xgb.train(param,
                dtrain,
                10000,
                watchlist,
                feval=evalerror,
                verbose_eval=100)
dtest = xgb.DMatrix(test[test['Open'] == 1][features].values)
test['Sales'] = 0
test.loc[test['Open'] == 1,
         'Sales'] = [math.exp(x) - 1 for x in clf.predict(dtest)]
print("-> Write submission file ... ")
print(datetime.now(), datetime.now() - start)
test[['Id', 'Sales']].to_csv("submission.csv", index=False)
def run():

    hidden_layers = [35, 49, 4]

    batch_size = 219

    dropout = 0.2

    seq_len = 25

    epochs_pre = [625, 115, 933]

    epochs_finetune = 197

    window_size = 0

    features = 8

    series = read_csv('pollution.csv', header=0, index_col=0)
    raw_values = series.values

    # integer encode wind direction
    encoder = LabelEncoder()
    raw_values[:, 4] = encoder.fit_transform(raw_values[:, 4])

    # transform data to be stationary
    diff = difference(raw_values, 1)

    dataset = diff.values
    dataset = create_dataset(dataset, features, window_size)

    # frame as supervised learning
    reframed = series_to_supervised(dataset, features, seq_len, 1)
    drop = [
        i for i in range(seq_len * features + 1, ((seq_len + 1) * features))
    ]
    reframed.drop(reframed.columns[drop], axis=1, inplace=True)
    reframed = reframed.values

    # split into train and test sets
    train_size = 365 * 24 * 4
    train, test = reframed[0:train_size], reframed[train_size:]

    # transform the scale of the data
    scaler, train_scaled, test_scaled = scale(train, test)

    # split into input and outputs
    x_train, y_train = train_scaled[:, 0:-1], train_scaled[:, -1]
    x_test, y_test = test_scaled[:, 0:-1], test_scaled[:, -1]

    # reshape input to be 3D [samples, timesteps, features]
    x_train = x_train.reshape(x_train.shape[0], seq_len, features)
    x_test = x_test.reshape(x_test.shape[0], seq_len, features)

    print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

    print('\nstart pretraining')
    print('===============')

    timesteps = x_train.shape[1]
    input_dim = x_train.shape[2]
    trained_encoder = []
    x_train_temp = x_train
    i = 1
    for hidden, epochs in zip(hidden_layers, epochs_pre):

        print('pretrain Autoencoder: {} ----> Encoder: {} ----> Epochs: {}'.
              format(i, hidden, epochs))
        print(x_train_temp.shape)
        print('=============================================================')

        inputs = Input(batch_shape=(batch_size, timesteps,
                                    x_train_temp.shape[2]))
        encoded = CuDNNLSTM(hidden,
                            batch_input_shape=(batch_size, timesteps,
                                               x_train_temp.shape[2]),
                            stateful=False)(inputs)
        decoded = RepeatVector(timesteps)(encoded)
        decoded = CuDNNLSTM(input_dim, stateful=False,
                            return_sequences=True)(decoded)

        AE = Model(inputs, decoded)

        encoder = Model(inputs, encoded)

        AE.compile(loss='mean_squared_error', optimizer='Adam')

        encoder.compile(loss='mean_squared_error', optimizer='Adam')

        AE.fit(x_train_temp,
               x_train,
               epochs=epochs,
               batch_size=batch_size,
               shuffle=True,
               verbose=1)

        # store trained encoder and its weights
        trained_encoder.append((AE.layers[1], AE.layers[1].get_weights()))

        # update training data
        x_train_temp = encoder.predict(x_train_temp, batch_size=batch_size)

        # reshape encoded input to 3D
        inputs = Input(shape=(x_train_temp.shape[1], ))
        reshape = RepeatVector(timesteps)(inputs)
        Repeat = Model(inputs, reshape)

        x_train_temp = Repeat.predict(x_train_temp, batch_size=batch_size)
        i = i + 1

    # Fine-turning
    print('\nFine-turning')
    print('============')

    l = len(trained_encoder)
    #build finetuning model
    model = Sequential()
    for i, encod in enumerate(trained_encoder):
        model.add(encod[0])
        model.layers[-1].set_weights(encod[1])
        model.add(Dropout(dropout))
        if (i + 1 != l): model.add(RepeatVector(timesteps))

    model.add(Dense(1))

    model.compile(loss='mean_squared_error', optimizer='Adam')

    model.fit(x_train,
              y_train,
              epochs=epochs_finetune,
              batch_size=batch_size,
              verbose=1,
              shuffle=True)

    # save trained model
    model.save('3layer_25.h5')

    # redefine the model in order to test with one sample at a time (batch_size = 1)
    new_model = Sequential()
    new_model.add(
        CuDNNLSTM(hidden_layers[0],
                  batch_input_shape=(1, timesteps, input_dim),
                  stateful=False))
    for layer in model.layers[1:]:
        new_model.add(layer)

    # copy weights
    old_weights = model.get_weights()
    new_model.set_weights(old_weights)

    # forecast the entire training dataset to build up state for forecasting
    print('Forecasting Training Data')
    predictions_train = list()
    for i in range(len(y_train)):
        # make one-step forecast
        X = x_train[i]
        y = y_train[i]
        yhat = forecast_lstm(new_model, 1, X)
        # invert scaling
        yhat = invert_scale(scaler, X, yhat)
        # invert differencing
        yhat = inverse_difference(raw_values, yhat, len(raw_values) - i)
        # store forecast
        predictions_train.append(yhat)
        expected = raw_values[:, 0][i + 1]
        #print('Month=%d, Predicted=%f, Expected=%f' % (i+1, yhat, expected))

    # report performance
    rmse_train = sqrt(
        mean_squared_error(raw_values[:, 0][1:len(train_scaled) + 1],
                           predictions_train))
    print('Train RMSE: %.5f' % rmse_train)
    # #report performance using RMSPE
    # RMSPE_train = RMSPE(raw_values[:,0][1:len(train_scaled)+1],predictions_train)
    # print('Train RMSPE: %.5f' % RMSPE_train)
    MAE_train = mean_absolute_error(raw_values[:, 0][1:len(train_scaled) + 1],
                                    predictions_train)
    print('Train MAE: %.5f' % MAE_train)
    # MAPE_train = MAPE(raw_values[:,0][1:len(train_scaled)+1], predictions_train)
    # print('Train MAPE: %.5f' % MAPE_train)
    SMAPE_train = smape(raw_values[:, 0][1:len(train_scaled) + 1],
                        predictions_train)
    print('Train SMAPE: %.5f' % SMAPE_train)

    # forecast the test data
    print('Forecasting Testing Data')
    predictions_test = list()
    for i in range(len(y_test)):
        # make one-step forecast
        X = x_test[i]
        y = y_test[i]
        yhat = forecast_lstm(new_model, 1, X)
        # invert scaling
        yhat = invert_scale(scaler, X, yhat)
        # invert differencing
        yhat = inverse_difference(raw_values, yhat, len(test_scaled) + 1 - i)
        # store forecast
        predictions_test.append(yhat)
        expected = raw_values[:, 0][len(train) + i + 1]
        #print('Month=%d, Predicted=%f, Expected=%f' % (i+1, yhat, expected))

    # report performance using RMSE
    rmse_test = sqrt(
        mean_squared_error(raw_values[:, 0][-len(test_scaled):],
                           predictions_test))
    print('Test RMSE: %.5f' % rmse_test)
    #report performance using RMSPE
    # RMSPE_test = RMSPE(raw_values[:,0][-len(test_scaled):], predictions_test)
    # print('Test RMSPE: %.5f' % RMSPE_test)
    MAE_test = mean_absolute_error(raw_values[:, 0][-len(test_scaled):],
                                   predictions_test)
    print('Test MAE: %.5f' % MAE_test)
    # MAPE_test = MAPE(raw_values[:,0][-len(test_scaled):], predictions_test)
    # print('Test MAPE: %.5f' % MAPE_test)
    SMAPE_test = smape(raw_values[:, 0][-len(test_scaled):], predictions_test)
    print('Test SMAPE: %.5f' % SMAPE_test)

    #predictions = np.concatenate((predictions_train,predictions_test),axis=0)

    # line plot of observed vs predicted
    fig, ax = plt.subplots(1)
    ax.plot(raw_values[:, 0][-80:], 'mo-', label='original', linewidth=2)
    ax.plot(predictions_test[-80:], 'co-', label='predictions', linewidth=2)
    #ax.axvline(x=len(train_scaled)+1,color='k', linestyle='--')
    ax.legend(loc='upper right')
    ax.set_title(
        'PM2.5 hourly concentration prediction from 28/12/2014 to 31/12/2014')
    ax.set_ylabel('PM2.5 concentration')
    plt.show()
Exemplo n.º 7
0
decoded = Dense(16384,activation='tanh')(encoder_output)

# 构建自编码模型
autoencoder = Model(inputs=input_img, outputs=decoded)

# 构建编码模型
encoder = Model(inputs=input_img, outputs=encoder_output)

#编译模型
autoencoder.compile(optimizer='adam',loss='mse')

autoencoder.summary()
# training
autoencoder.fit(X_train, X_train, epochs=40, batch_size=256, shuffle=True)

auto_trainX=encoder.predict(X_train)
auto_testX = encoder.predict(X_test)
print(auto_trainX.shape)
print(auto_testX.shape)

#使用决策树
print("Training---------- 决策树")
clf = tree.DecisionTreeClassifier()
clf.fit(auto_trainX,train_y)
score = clf.score(auto_testX,test_y)
print(score)

#使用BP网络
print("Training---------- BP")
bp_c = MLPClassifier(solver='lbfgs', alpha=0.00001, hidden_layer_sizes=(5, 5, 4), activation='relu')
bp_c.fit(auto_trainX,train_y)
Exemplo n.º 8
0
classification.fit(x_train,
                   dummy_y,
                   shuffle=False,
                   epochs=30,
                   batch_size=batch_size)

scores = classification.evaluate(x_test, dummy_y_test)
print("\nResult on evaluation set...\n%s: %.2f%%" %
      (classification.metrics_names[1], scores[1] * 100))

# build a model to project inputs on the latent space
encoder = Model(x, z_mean)

# display a 2D plot of the digit classes in the latent space
x_test_encoded = encoder.predict(x_test, batch_size=batch_size)
plt.figure(figsize=(6, 6))
plt.scatter(x_test_encoded[:, 0], x_test_encoded[:, 1], c=y_test)
plt.colorbar()
plt.show()

# build a digit generator that can sample from the learned distribution
decoder_input = Input(shape=(latent_dim, ))
_h_decoded = decoder_h(decoder_input)
_x_decoded_mean = decoder_mean(_h_decoded)
generator = Model(decoder_input, _x_decoded_mean)

# display a 2D manifold of the digits
n = 15  # figure with 15x15 digits
digit_size = 28
figure = np.zeros((digit_size * n, digit_size * n))
Exemplo n.º 9
0
                activity_regularizer=regularizers.l1(10e-5))(input_dim)
decoded = Dense(ncol, activation='sigmoid')(encoded)
autoencoder = Model(inputs=input_dim, outputs=decoded)
autoencoder.compile(optimizer='adam', loss='mse')
history = autoencoder.fit(S_X_train,
                          S_X_train,
                          epochs=1000,
                          batch_size=15,
                          shuffle=True,
                          validation_data=(S_X_test, S_X_test),
                          verbose=0)

# THE ENCODER TO EXTRACT THE REDUCED DIMENSION FROM THE ABOVE AUTOENCODER
encoder = Model(inputs=input_dim, outputs=encoded)
encoded_input = Input(shape=(encoding_dim, ))
encoded_out = encoder.predict(S_X_test)
encoded_out2 = encoder.predict(S_X_train)
result = encoder.predict(Xs)

#print shape
encoded_out.shape, encoded_out2.shape

# Plot all losses
print(history.history.keys())

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')