model.add(Dense(units=64, input_dim=n)) model.add(Activation('relu')) model.add(Dropout(cfg["dropout"])) model.add(Dense(units=64, input_dim=n)) model.add(Activation('relu')) model.add(Dropout(cfg["dropout"])) model.add(Dense(units=1)) model.add(Activation('sigmoid')) model.compile(loss='mse', optimizer='adam', metrics=['accuracy']) logger.info("Fitting model on X_train...") model.fit(X_train, y_train, epochs=cfg["epochs"], batch_size=cfg["batch_size"]) logger.info("Predicting on X_val...") results_val = model.predict(X_val) score = gini_normalized(y_val, results_val) logger.info("normalized gini score on validation set is {}".format(score)) logger.info("Loading and predicting on Test set...") test = load_file("test") test["bias"] = 1 X_test = pipe.transform(test) results_test = model.predict(X_test) test['target'] = results_test write_submission_file(test, columns=['target'], name='keras-v1') logger.info("Finished with time {}".format(datetime.now() - start))
X = train.drop('target', axis = 1) drop_cols = drop_cols(X, names = True) X.drop(drop_cols, axis = 1, inplace = True) y = train.target cat_columns = get_cat_features_idx(X) logger.info("Making Ensemble...") classifiers = [('xgb', XGBClassifier(learning_rate=0.07, reg_alpha=8, reg_lambda=0.75, max_depth=4, n_estimators = 800, gamma = 3)), ('lgbm', LGBMClassifier(learning_rate = 0.018, max_depth = 6, num_leaves = 11, colsample_bytree=0.85)), ('rf', RandomForestClassifier(n_estimators = 200, criterion = 'gini'))] model = Pipeline([('impute', Imputer(missing_values = -1, strategy = "most_frequent")), ('encode', OneHotEncoder(categorical_features=cat_columns, handle_unknown = 'ignore')), ('ensemble', VotingClassifier(estimators = classifiers, voting = 'soft'))]) logger.info("Fitting model on X...") model.fit(X, y) logger.info("Predicting score (w/Cross-Val) on X...") results = cross_val_predict(model, X, y, cv = 3, method = 'predict_proba')[:, 1] score = gini_normalized(y, results) logger.info("normalized gini score on training set is {}".format(score)) logger.info("Loading and predicting on Test set...") test = load_file("test") test.drop(drop_cols, axis = 1, inplace = True) test['target'] = model.predict_proba(test)[:, 1] write_submission_file(test, columns = ['target'], name = 'ensemble-v1') logger.info("Finished with time {}".format(datetime.now() - start))