Exemplo n.º 1
0
    'reg_lambda': .1,
    'subsample': .9,
    'min_split_gain': .01,
    'min_child_weight': 2,
    'colsample_bytree': .9,  # Subsample ratio of columns when constructing each tree.
    'scale_pos_weight': 9,  # because training data is unbalanced
    'verbose': -1
}

features = list(pd.read_csv(DATA_FOLDER + '/v3/importances.csv', index_col=0).head(800).index)
train_features = [*features, "target"]


train = pd.read_pickle(DATA_FOLDER + '/v3/train.pkl')[train_features]
folds = prepare_folds(train)

models, result = train_folds(folds, config)

test = load_test(DATA_FOLDER + '/v3/test.pkl')[features]
test_target = evaluate(models, test)

print("AUC: %.4f, F1: %.4f" % (result['auc'], result['f1']))

importance = result['importances'].groupby(['feature']) \
    .agg({'importance': 'mean'}) \
    .sort_values(by="importance", ascending=False)

importance.to_csv(DATA_FOLDER + "/v3/importances.csv")

prepare_submission(test_target, "v3_AUC_%.4f_F1_%.4f" % (result['auc'], result['f1']))
Exemplo n.º 2
0
}

train = pd.read_pickle(DATA_FOLDER + '/v2/train.pkl')

inter_features = list(pd.read_csv(DATA_FOLDER + '/v2/importances.csv', index_col=0).head(200).index)

categorical = set(train.columns[train.dtypes == int]).intersection(inter_features)

cats = [[x] for x in categorical]

folds = prepare_folds(train)

folds = mean_encode_train_fold(folds, cats)

models, result = train_folds(folds, config)

test = load_test(DATA_FOLDER + '/v2/test.pkl')
test = mean_encode_test(train, test, cats)

test_target = evaluate(models, test)

print("AUC: %.4f, F1: %.4f" % (result['auc'], result['f1']))

importance = result['importances'].groupby(['feature']) \
    .agg({'importance': 'mean'}) \
    .sort_values(by="importance", ascending=False)

importance.to_csv(DATA_FOLDER + "/v4/importances.csv")

prepare_submission(test_target, "v4_AUC_%.4f_F1_%.4f" % (result['auc'], result['f1']))