Пример #1
0
def load_data(data_type,shuffle=True):
    assert(data_type == "train" or data_type == "test" or data_type == "cv")
    if data_type == "cv":   data_type = "train"
    path = os.path.join(load_params().data_dir,data_type + ".csv")
    data = read_csv(path)

    # print data.columns.values
    if data_type != "test":
        if shuffle: data = data.reindex(np.random.permutation(data.index))
        y = data.revenue.values
        data = data.drop(["revenue"],axis=1)
        data = init_transforms(data)
        cols = data.columns.values
        X = data.as_matrix()
        return cols,np.log(X + 1),y
    else:
        data = init_transforms(data)
        X = data.as_matrix()
        return np.log(X + 1)
Пример #2
0
def model_apply(data_type, clf_info, processor):
    params = load_params()
    if data_type == "train":
        print "Training"
        (X, y) = read_maindata(params, type="train")
        #visualize_pretform(X,y,19)
        (clf, cv_params) = clf_info
    if data_type == "test":
        print "Testing"
        (X) = read_maindata(params, type="test")
        (clf) = clf_info
    print processor
    Xeeg = processor.transform(X)
    Xfbinfo = SubjectFeedbackInfo(data_type, params).transform()
    # Xeog = EOGInfo(data_type,params).transform()
    X = np.concatenate((Xfbinfo, Xeeg), axis=1)
    del Xeeg, Xfbinfo
    print X.shape
    exit()
    if data_type == "train": return cross_validation(X, y, clf, cv_params)
    if data_type == "test": return clf.predict_proba(X)[:, 1]
Пример #3
0
def model_apply(data_type,clf_info,processor):
    params = load_params()
    if data_type == "train":
        print "Training"
        (X,y) = read_maindata(params,type="train")
        #visualize_pretform(X,y,19)
        (clf,cv_params) = clf_info
    if data_type == "test":
        print "Testing"
        (X) = read_maindata(params,type="test")
        (clf) = clf_info
    print processor
    Xeeg = processor.transform(X)
    Xfbinfo = SubjectFeedbackInfo(data_type,params).transform()
    # Xeog = EOGInfo(data_type,params).transform()
    X = np.concatenate((Xfbinfo,Xeeg),axis=1)
    del Xeeg,Xfbinfo
    print X.shape
    exit()
    if data_type == "train": return cross_validation(X,y,clf,cv_params)
    if data_type == "test": return clf.predict_proba(X)[:,1]
Пример #4
0
def main(mode):
    """
    main function in which the code is applied for either submission/
    model generation
    :param mode: "submission" or "cross_validation"
    :return:
    """
    if mode == "submission":
        params = load_params()
        ytest = read_csv(os.path.join(params.data_dir,"SampleSubmission.csv"))
        processor = proc_generator().next()
        clf = clf_generator().next()
        clf = model_apply("train",clf,processor)
        y_pred = model_apply("test",(clf),processor)
        ytest["Prediction"] = y_pred
        ytest.to_csv("submission_%s" % str(clf.best_estimator_).split("(")[0],index=False)
    elif mode == "cross_validation":
        for clf in clf_generator():
            for processor in proc_generator():
                model_apply("train",clf,processor)
    else:
        raise("Available modes are `submission` and `cross_validation`")
Пример #5
0
def main(mode):
    """
    main function in which the code is applied for either submission/
    model generation
    :param mode: "submission" or "cross_validation"
    :return:
    """
    if mode == "submission":
        params = load_params()
        ytest = read_csv(os.path.join(params.data_dir, "SampleSubmission.csv"))
        processor = proc_generator().next()
        clf = clf_generator().next()
        clf = model_apply("train", clf, processor)
        y_pred = model_apply("test", (clf), processor)
        ytest["Prediction"] = y_pred
        ytest.to_csv("submission_%s" % str(clf.best_estimator_).split("(")[0],
                     index=False)
    elif mode == "cross_validation":
        for clf in clf_generator():
            for processor in proc_generator():
                model_apply("train", clf, processor)
    else:
        raise ("Available modes are `submission` and `cross_validation`")
Пример #6
0
def make_submission(name,preds):
    sub = read_csv(os.path.join(load_params().data_dir,'sampleSubmission.csv'))
    # create submission file
    sub['Prediction']=preds
    sub.to_csv(name, index=False)