Exemplo n.º 1
0
from scipy.stats import chi2
from scipy.stats import entropy
import numpy as np
from collections import Counter
import math
from petfinder.get_explore import read_data, Columns
from petfinder.preprocessing import prepare_data
from petfinder.feature_engineering import finalize_data, add_features, filter_by_varth
from petfinder.tools import detect_outliers, plog

if __name__ == "__main__":
    sys.stdout.buffer.write(chr(9986).encode('utf8'))
    pd.set_option('display.max_columns', 500)
    pd.set_option('display.width', 1000)
    train, test = read_data()
    train, test = prepare_data(train, test)
    train, test = add_features(train, test)

    ccols = train.columns.values.tolist()
    for c in ccols:
        if (train[c].isna().any()) & \
                (c in Columns.img_num_cols_1.value + Columns.img_num_cols_2.value  + Columns.img_num_cols_3.value ):
            plog("filling " + c + " with 0 on train dataset")
            train[c].fillna(0, inplace=True)

    ccols = test.columns.values.tolist()
    for c in ccols:
        if (test[c].isna().any()) & \
                (c in Columns.img_num_cols_1.value + Columns.img_num_cols_2.value  + Columns.img_num_cols_3.value ):
            plog("filling " + c + " with 0 on test dataset")
            test[c].fillna(0, inplace=True)
Exemplo n.º 2
0
from mord import OrdinalRidge
from petfinder.get_explore import read_data
from petfinder.preprocessing import prepare_data
import pandas as pd

if __name__ == "__main__":
    train, test = read_data()
    x_train, y_train, x_test, id_test = prepare_data(train, test)
    clf = OrdinalRidge()

    clf.fit(x_train, y_train)
    pred = clf.predict(x_test)

    print(clf.score(x_train, y_train))
    print(pd.DataFrame({'PetID': id_test.PetID.values.ravel(), 'AdoptionSpeed': pred}))
Exemplo n.º 3
0
        'PetID': test_id.values.ravel(),
        'AdoptionSpeed': pred
    })

    return prediction_df


if __name__ == "__main__":
    pd.set_option('display.max_columns', 500)
    pd.set_option('display.width', 1500)
    df = pd.DataFrame(np.random.randn(5, 3), columns=['perf', 'B', 'C'])

    #predict(df)
    train, test = read_data()

    train_x, train_y, test_x, test_id = prepare_data(train, test)
    #rs = randomsearchpipeline(train_x.drop(["RescuerID"], axis=1), train_y)
    train_x.drop(["RescuerID"], axis=1, inplace=True)
    test_x.drop(["RescuerID"], axis=1, inplace=True)
    #print(train_x)
    #print(rs)
    #sys.exit()
    #print(train_x)
    #print(train_y)
    #print(sp.stats.randint(1, 6).value)

    clf, mean = iterate_by_randomsearch(train_x, train_y.values.ravel())
    print(clf)
    print(mean)
    #for c in clf[""]:
    #print(c)