def load_data(data_type,shuffle=True): assert(data_type == "train" or data_type == "test" or data_type == "cv") if data_type == "cv": data_type = "train" path = os.path.join(load_params().data_dir,data_type + ".csv") data = read_csv(path) # print data.columns.values if data_type != "test": if shuffle: data = data.reindex(np.random.permutation(data.index)) y = data.revenue.values data = data.drop(["revenue"],axis=1) data = init_transforms(data) cols = data.columns.values X = data.as_matrix() return cols,np.log(X + 1),y else: data = init_transforms(data) X = data.as_matrix() return np.log(X + 1)
def model_apply(data_type, clf_info, processor): params = load_params() if data_type == "train": print "Training" (X, y) = read_maindata(params, type="train") #visualize_pretform(X,y,19) (clf, cv_params) = clf_info if data_type == "test": print "Testing" (X) = read_maindata(params, type="test") (clf) = clf_info print processor Xeeg = processor.transform(X) Xfbinfo = SubjectFeedbackInfo(data_type, params).transform() # Xeog = EOGInfo(data_type,params).transform() X = np.concatenate((Xfbinfo, Xeeg), axis=1) del Xeeg, Xfbinfo print X.shape exit() if data_type == "train": return cross_validation(X, y, clf, cv_params) if data_type == "test": return clf.predict_proba(X)[:, 1]
def model_apply(data_type,clf_info,processor): params = load_params() if data_type == "train": print "Training" (X,y) = read_maindata(params,type="train") #visualize_pretform(X,y,19) (clf,cv_params) = clf_info if data_type == "test": print "Testing" (X) = read_maindata(params,type="test") (clf) = clf_info print processor Xeeg = processor.transform(X) Xfbinfo = SubjectFeedbackInfo(data_type,params).transform() # Xeog = EOGInfo(data_type,params).transform() X = np.concatenate((Xfbinfo,Xeeg),axis=1) del Xeeg,Xfbinfo print X.shape exit() if data_type == "train": return cross_validation(X,y,clf,cv_params) if data_type == "test": return clf.predict_proba(X)[:,1]
def main(mode): """ main function in which the code is applied for either submission/ model generation :param mode: "submission" or "cross_validation" :return: """ if mode == "submission": params = load_params() ytest = read_csv(os.path.join(params.data_dir,"SampleSubmission.csv")) processor = proc_generator().next() clf = clf_generator().next() clf = model_apply("train",clf,processor) y_pred = model_apply("test",(clf),processor) ytest["Prediction"] = y_pred ytest.to_csv("submission_%s" % str(clf.best_estimator_).split("(")[0],index=False) elif mode == "cross_validation": for clf in clf_generator(): for processor in proc_generator(): model_apply("train",clf,processor) else: raise("Available modes are `submission` and `cross_validation`")
def main(mode): """ main function in which the code is applied for either submission/ model generation :param mode: "submission" or "cross_validation" :return: """ if mode == "submission": params = load_params() ytest = read_csv(os.path.join(params.data_dir, "SampleSubmission.csv")) processor = proc_generator().next() clf = clf_generator().next() clf = model_apply("train", clf, processor) y_pred = model_apply("test", (clf), processor) ytest["Prediction"] = y_pred ytest.to_csv("submission_%s" % str(clf.best_estimator_).split("(")[0], index=False) elif mode == "cross_validation": for clf in clf_generator(): for processor in proc_generator(): model_apply("train", clf, processor) else: raise ("Available modes are `submission` and `cross_validation`")
def make_submission(name,preds): sub = read_csv(os.path.join(load_params().data_dir,'sampleSubmission.csv')) # create submission file sub['Prediction']=preds sub.to_csv(name, index=False)