예제 #1
0
                           min_child_weight=min_child_weight,
                           reg_alpha=reg_alpha,
                           reg_lambda=reg_lambda
                           #eval_metric=eval_metric
                           )
t2 = time.time()
classifier.fit(X_train, y_train)
t3 = time.time()
print(t3 - t2)

# Predicting the Test set results
y_pred = classifier.predict_proba(X_test)[:, 1]
y_pred_train = classifier.predict_proba(X_train)[:, 1]

print("gini normalized score (train): ")
gini_score = gini_normalized(y_train, y_pred_train)
print(gini_score)

print("gini normalized score (test): ")
gini_score = gini_normalized(y_test, y_pred)
print(gini_score)

import numpy as np
np.savetxt("y_test", y_test)
np.savetxt("y_pred", y_pred)

np.savetxt("y_train", y_test)
np.savetxt("y_pred_train", y_pred)

print("mean de y pred")
print(np.mean(y_pred))
예제 #2
0
# Fitting the ANN to the Training set
class_weight = {0: 1., 1: alpha}
classifier.fit(X_train,
               y_train,
               batch_size=batch_size,
               epochs=epochs,
               class_weight=class_weight)

# Part 3 - Making the predictions and evaluating the model

# Predicting the Test set results
y_pred = classifier.predict(X_test)

print("gini normalized score: ")
gini_score = gini_normalized(y_test, y_pred)
print(gini_score)

import numpy as np
np.savetxt("y_test", y_test)
np.savetxt("y_pred", y_pred)

print("mean de y pred")
print(np.mean(y_pred))

f = open("results.json", "r")
results_txt = f.read()
f.close()
results = json.loads(results_txt)
# décommenter cette ligne si vous voulez sauvegarder les résultats
# results.append(parameters)
예제 #3
0
lgb_eval = lgb.Dataset(X_eval, y_eval, reference=lgb_train)

# Create the 3 classifiers with 1000 rounds and a window of 100 for early stopping
clf_1 = lgb.train(params_1,
                  lgb_train,
                  num_boost_round=num_boost_round,
                  valid_sets=lgb_eval,
                  early_stopping_rounds=100,
                  verbose_eval=50)

from util import gini_normalized
# Predict raw scores for validation ids
y_eval_pred = clf_1.predict(X_eval, raw_score=True)

print("Gini eval mean on all trees: ")
print(gini_normalized(y_eval, y_eval_pred))

y_train_pred = clf_1.predict(X_train, raw_score=True)

print("Gini train mean on all trees: ")
print(gini_normalized(y_train, y_train_pred))

c1 = 0
c2 = 0

temp = y_train_pred < 0.2
indexes_to_retrain = [i for i in range(temp) if y_train_pred[i]]
X_to_retrain = X_train[indexes_to_retrain]
y_to_retrain = y_train[indexes_to_retrain]

clf_2 = lgb.train(params_1,
예제 #4
0
    for i in range(len(layers)-3):
        classifier.add(Dense(units=layers[i+2], kernel_initializer = 'uniform', activation = activation_functions[i+1]))

    # Adding the output layer
    classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))

    # Compiling the ANN
    classifier.compile(optimizer = 'adam', loss = loss, metrics = [])
    train_x, train_y = X_train[train_index], y_train[train_index]
    eval_x, eval_y = X_train[test_index], y_train[test_index]
    classifier.fit(train_x, train_y, batch_size = batch_size, epochs = epochs, class_weight=class_weight)
    res_eval = classifier.predict(eval_x)
    res = classifier.predict(X_test)
    results.append(res)
    print('gini_eval', i)
    gini_score = gini_normalized(eval_y, res_eval)
    print(gini_score)
    i+=1

def to_csv(y_pred, ids):
    import csv
    with open('sumbission_5Kfold_nn.csv', 'w') as csvfile:
        spamwriter = csv.writer(csvfile, delimiter=',')
        spamwriter.writerow(['id', 'target'])
        for i in range(len(y_pred)):
            spamwriter.writerow([ids[i], y_pred[i]])

submission = (results[0] + results[1] + results[2] + results[3] + results[4]) / 5
idx = dataset_test.iloc[:, 0].values
to_csv(submission[:,0],idx)
kf = KFold(n_splits=K, random_state=42, shuffle=True)
#training with KFold Cross Validation
i=0
results = []
for train_index, test_index in kf.split(X_train):
    train_x, train_y = X_train[train_index], y_train[train_index]
    eval_x, eval_y = X_train[test_index], y_train[test_index]
    classifier = RandomForestClassifier(n_estimators=30, criterion = 'gini', random_state = 1, max_depth=5, max_features='auto', class_weight=class_weight)
    classifier.fit(train_x, train_y)
    res_train = classifier.predict(train_x)
    res_eval = classifier.predict(eval_x)
    res = classifier.predict(X_test)
    results.append(res)
    print('round k=',i)
    print('eval gini score  ', 'train gini score')
    gini_eval = gini_normalized(eval_y, res_eval)
    gini_train = gini_normalized(train_y, res_train)
    print(gini_eval,'  ', gini_train)
    print()
    i+=1


def to_csv(y_pred, ids):
    import csv
    with open('sumbission_5Kfold_random_forest.csv', 'w') as csvfile:
        spamwriter = csv.writer(csvfile, delimiter=',')
        spamwriter.writerow(['id', 'target'])
        for i in range(len(y_pred)):
            spamwriter.writerow([ids[i], y_pred[i]])

submission = (results[0] + results[1] + results[2] + results[3] + results[4]) / 5