Exemplo n.º 1
0
def test():
    test_df = pd.read_csv("test_processed.csv",index_col="PassengerId")
    Xtest = test_df[feature_names]

    gbdt = common.load_predictor("gbdt.pkl")
    predictions = gbdt.predict(Xtest)
    common.make_submission(Xtest.index,predictions,"submit_gbdt.csv")
Exemplo n.º 2
0
import numpy as np
import pandas as pd
import common

def read_result(filename):
    results = pd.read_csv(filename,index_col="PassengerId")
    results = results.iloc[:,0]
    results[results == 0] = -1
    return results

methods = ["lr","svc","rf","gbdt","knn"]
results = [read_result(filename) for filename in ("submit_%s.csv"%m for m in methods)]
results = pd.concat(results,axis=1, keys=methods)

consensus = results.sum(axis=1)
consensus = (consensus == 5) | (consensus == -5)

consensus.value_counts()

consensus_result = results.loc[consensus,:].sum(axis=1)

nonconsensus_result = results.loc[-consensus,:].sum(axis=1)

neg_nonconsensus_result = -1 * nonconsensus_result

guess = pd.concat([consensus_result,neg_nonconsensus_result])

guess = (guess > 0).astype(int)

common.make_submission(guess.index,guess,"submit_guess.csv")
Exemplo n.º 3
0

import numpy as np
import pandas as pd
import common

def read_result(filename):
    results = pd.read_csv(filename,index_col="PassengerId")
    results = results.iloc[:,0]
    results[results == 0] = -1
    return results

methods = ["lr","svc","rf","gbdt","knn"]
results = [read_result(filename) for filename in ("submit_%s.csv"%m for m in methods)]
results = pd.concat(results,axis=1, keys=methods)

majority = results.sum(axis=1)
majority = (majority > 0).astype(int)

common.make_submission(majority.index,majority,"submit_average.csv")
Exemplo n.º 4
0
def read_result(filename):
    results = pd.read_csv(filename, index_col="PassengerId")
    results = results.iloc[:, 0]
    results[results == 0] = -1
    return results


methods = ["lr", "svc", "rf", "gbdt", "knn"]
results = [
    read_result(filename)
    for filename in ("submit_%s.csv" % m for m in methods)
]
results = pd.concat(results, axis=1, keys=methods)

consensus = results.sum(axis=1)
consensus = (consensus == 5) | (consensus == -5)

consensus.value_counts()

consensus_result = results.loc[consensus, :].sum(axis=1)

nonconsensus_result = results.loc[-consensus, :].sum(axis=1)

neg_nonconsensus_result = -1 * nonconsensus_result

guess = pd.concat([consensus_result, neg_nonconsensus_result])

guess = (guess > 0).astype(int)

common.make_submission(guess.index, guess, "submit_guess.csv")
Exemplo n.º 5
0
    basepredicts = [
        estimator.estimator.predict(scaledX) if estimator.need_scale else estimator.estimator.predict(X)
        for estimator in base_estimators
    ]
    return pd.DataFrame(
        np.asarray(basepredicts).T, index=X.index, columns=[estimator.name for estimator in base_estimators]
    )


# ***************************** fit advanced features to validation target
validate_basepredicts = predict_features(base_estimators, Xvalidate, Xvalidate_scaled)
lrcv = LogisticRegressionCV(Cs=30, cv=10)
lrcv.fit(validate_basepredicts, yvalidate)
lrcv.score(validate_basepredicts, yvalidate)
common.make_coefs_frame(validate_basepredicts.columns, lrcv.coef_.ravel())

# fit again with whole data
basepredict_lr = LogisticRegression(C=lrcv.C_[0])
basepredict_lr.fit(validate_basepredicts, yvalidate)
basepredict_lr.score(validate_basepredicts, yvalidate)
common.make_coefs_frame(validate_basepredicts.columns, basepredict_lr.coef_.ravel())

# ***************************** test
test_df = pd.read_csv("test_processed.csv", index_col="PassengerId")
Xtest = test_df[feature_names]
Xtest_scaled = scaler.transform(Xtest)

test_basepredict = predict_features(base_estimators, Xtest, Xtest_scaled)
final_predictions = basepredict_lr.predict(test_basepredict)
common.make_submission(Xtest.index, final_predictions, "submit_reweight_learners.csv")
Exemplo n.º 6
0
    return pd.DataFrame(np.asarray(basepredicts).T,
                        index = X.index,
                        columns = [estimator.name  for estimator in base_estimators])

# ***************************** fit advanced features to validation target 
validate_basepredicts = predict_features(base_estimators,Xvalidate,Xvalidate_scaled)
lrcv = LogisticRegressionCV(Cs=30,cv=10)
lrcv.fit(validate_basepredicts,yvalidate)
lrcv.score(validate_basepredicts,yvalidate)
common.make_coefs_frame(validate_basepredicts.columns,lrcv.coef_.ravel())

# fit again with whole data
basepredict_lr = LogisticRegression(C = lrcv.C_[0])
basepredict_lr.fit(validate_basepredicts,yvalidate)
basepredict_lr.score(validate_basepredicts,yvalidate)
common.make_coefs_frame(validate_basepredicts.columns,basepredict_lr.coef_.ravel())

# ***************************** test
test_df = pd.read_csv("test_processed.csv",index_col="PassengerId")
Xtest = test_df[feature_names]
Xtest_scaled = scaler.transform(Xtest)

test_basepredict = predict_features(base_estimators,Xtest,Xtest_scaled)
final_predictions = basepredict_lr.predict(test_basepredict)
common.make_submission(Xtest.index,final_predictions,"submit_reweight_learners.csv")