def test(): test_df = pd.read_csv("test_processed.csv",index_col="PassengerId") Xtest = test_df[feature_names] gbdt = common.load_predictor("gbdt.pkl") predictions = gbdt.predict(Xtest) common.make_submission(Xtest.index,predictions,"submit_gbdt.csv")
import numpy as np import pandas as pd import common def read_result(filename): results = pd.read_csv(filename,index_col="PassengerId") results = results.iloc[:,0] results[results == 0] = -1 return results methods = ["lr","svc","rf","gbdt","knn"] results = [read_result(filename) for filename in ("submit_%s.csv"%m for m in methods)] results = pd.concat(results,axis=1, keys=methods) consensus = results.sum(axis=1) consensus = (consensus == 5) | (consensus == -5) consensus.value_counts() consensus_result = results.loc[consensus,:].sum(axis=1) nonconsensus_result = results.loc[-consensus,:].sum(axis=1) neg_nonconsensus_result = -1 * nonconsensus_result guess = pd.concat([consensus_result,neg_nonconsensus_result]) guess = (guess > 0).astype(int) common.make_submission(guess.index,guess,"submit_guess.csv")
import numpy as np import pandas as pd import common def read_result(filename): results = pd.read_csv(filename,index_col="PassengerId") results = results.iloc[:,0] results[results == 0] = -1 return results methods = ["lr","svc","rf","gbdt","knn"] results = [read_result(filename) for filename in ("submit_%s.csv"%m for m in methods)] results = pd.concat(results,axis=1, keys=methods) majority = results.sum(axis=1) majority = (majority > 0).astype(int) common.make_submission(majority.index,majority,"submit_average.csv")
def read_result(filename): results = pd.read_csv(filename, index_col="PassengerId") results = results.iloc[:, 0] results[results == 0] = -1 return results methods = ["lr", "svc", "rf", "gbdt", "knn"] results = [ read_result(filename) for filename in ("submit_%s.csv" % m for m in methods) ] results = pd.concat(results, axis=1, keys=methods) consensus = results.sum(axis=1) consensus = (consensus == 5) | (consensus == -5) consensus.value_counts() consensus_result = results.loc[consensus, :].sum(axis=1) nonconsensus_result = results.loc[-consensus, :].sum(axis=1) neg_nonconsensus_result = -1 * nonconsensus_result guess = pd.concat([consensus_result, neg_nonconsensus_result]) guess = (guess > 0).astype(int) common.make_submission(guess.index, guess, "submit_guess.csv")
basepredicts = [ estimator.estimator.predict(scaledX) if estimator.need_scale else estimator.estimator.predict(X) for estimator in base_estimators ] return pd.DataFrame( np.asarray(basepredicts).T, index=X.index, columns=[estimator.name for estimator in base_estimators] ) # ***************************** fit advanced features to validation target validate_basepredicts = predict_features(base_estimators, Xvalidate, Xvalidate_scaled) lrcv = LogisticRegressionCV(Cs=30, cv=10) lrcv.fit(validate_basepredicts, yvalidate) lrcv.score(validate_basepredicts, yvalidate) common.make_coefs_frame(validate_basepredicts.columns, lrcv.coef_.ravel()) # fit again with whole data basepredict_lr = LogisticRegression(C=lrcv.C_[0]) basepredict_lr.fit(validate_basepredicts, yvalidate) basepredict_lr.score(validate_basepredicts, yvalidate) common.make_coefs_frame(validate_basepredicts.columns, basepredict_lr.coef_.ravel()) # ***************************** test test_df = pd.read_csv("test_processed.csv", index_col="PassengerId") Xtest = test_df[feature_names] Xtest_scaled = scaler.transform(Xtest) test_basepredict = predict_features(base_estimators, Xtest, Xtest_scaled) final_predictions = basepredict_lr.predict(test_basepredict) common.make_submission(Xtest.index, final_predictions, "submit_reweight_learners.csv")
return pd.DataFrame(np.asarray(basepredicts).T, index = X.index, columns = [estimator.name for estimator in base_estimators]) # ***************************** fit advanced features to validation target validate_basepredicts = predict_features(base_estimators,Xvalidate,Xvalidate_scaled) lrcv = LogisticRegressionCV(Cs=30,cv=10) lrcv.fit(validate_basepredicts,yvalidate) lrcv.score(validate_basepredicts,yvalidate) common.make_coefs_frame(validate_basepredicts.columns,lrcv.coef_.ravel()) # fit again with whole data basepredict_lr = LogisticRegression(C = lrcv.C_[0]) basepredict_lr.fit(validate_basepredicts,yvalidate) basepredict_lr.score(validate_basepredicts,yvalidate) common.make_coefs_frame(validate_basepredicts.columns,basepredict_lr.coef_.ravel()) # ***************************** test test_df = pd.read_csv("test_processed.csv",index_col="PassengerId") Xtest = test_df[feature_names] Xtest_scaled = scaler.transform(Xtest) test_basepredict = predict_features(base_estimators,Xtest,Xtest_scaled) final_predictions = basepredict_lr.predict(test_basepredict) common.make_submission(Xtest.index,final_predictions,"submit_reweight_learners.csv")