import os sys.path.append(os.path.abspath("../preprocessing/")) import features_preprocessing as fp import submission_preprocessing as sp import submission_postprocess as postpro from sklearn import svm from sklearn import cross_validation from sklearn import metrics from configuration import CONFIG import pandas as pd import time t1 = time.time() print('preprocessing...') preprocessing = fp.feature_preprocessing() preprocessing.full_preprocess(used_columns=['WEEK_DAY', 'YEAR', 'YEAR_DAY', 'ASS_ID', 'TIME', 'CSPL_CALLS']) sub_p = sp.submission_preprocessing() sub_p.full_preprocess(used_columns=['WEEK_DAY', 'YEAR', 'YEAR_DAY', 'ASS_ID', 'TIME', 'CSPL_CALLS']) submission_data = sub_p.data print(submission_data.columns) data = preprocessing.data prediction = [] print('data loaded, beginning prediction...') for i in range(submission_data.shape[0]):
import sys import os sys.path.append(os.path.abspath("../preprocessing/")) import features_preprocessing as fp from sklearn import linear_model from sklearn import cross_validation from sklearn import metrics import numpy as np preprocessing = fp.feature_preprocessing() preprocessing.full_preprocess() data = preprocessing.data Y = data['CSPL_CALLS'] X = data.drop(['CSPL_CALLS'], axis=1) X_train, X_test, y_train, y_test = cross_validation.train_test_split( X, Y, test_size=0.4, random_state=0) print X_train clf = linear_model.Ridge(alpha=.5) clf.fit(X_train, y_train) clf.fit(X_train, y_train) y_predict = clf.predict(X_test) print(y_predict) print(clf.score(X_test, y_test))
import linex as ln import numpy as np from sklearn import cross_validation from sklearn.cross_validation import cross_val_score from sklearn.ensemble import RandomForestRegressor from sklearn.ensemble import AdaBoostRegressor import pandas as pd from sklearn import linear_model import datetime as dt from linex import loss_linex from sklearn.feature_selection import RFE from tqdm import tqdm #Loading training dataset and submission dataset preprocessing = fp.feature_preprocessing('train_2011_2012_2013.csv', ';') submission = pd.read_csv('submission.txt', sep = '\t') submission['ASS_ID'] = submission['ASS_ASSIGNMENT'].apply(lambda x: int(CONFIG.ass_assign[x])) print('Ready for training...') #Training model on 27 assignments for id in tqdm([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,18,19,20,21,22,23,24,25,26]): print("Working on assignment : ",id) preprocessing_id = fp.feature_preprocessing() preprocessing_id.data = preprocessing.data.copy() preprocessing_id.full_preprocess(id) data = preprocessing_id.data print(id,' Data loaded') Y = data['CSPL_RECEIVED_CALLS']