def separate_label_and_data(): util.print_debug_msg('separating label and data') y = list(train['ACTION']) # creating a temp object of train, so as to preserve its originality. # i an not sure if we really need to do this. X = train del X['ACTION'] return X,y
def group_data(data, degree=3, hash=hash): util.print_debug_msg('Grouping Data') new_data = [] m,n = data.shape #print m,n for indicies in combinations(range(n), degree): new_data.append([hash(tuple(v)) for v in data[:,indicies]]) return array(new_data).T
def grid_search(X_train, y_train): util.print_debug_msg('Starting grid search') parameters = {'C':[0.5,1,1.5,2,2.5,3,3.5,4],'penalty':['l1','l2']} clf = GridSearchCV(lg, parameters, scoring="roc_auc", n_jobs=2) util.print_debug_msg('Now fitting in grid search') clf.fit(X_train, y_train) print 'cv_scores: ', clf.cv_scores_ print 'best_estimator: ', clf.best_estimator_ print 'best_params: ', clf.best_params_
def normalize_features(X,y=None): util.print_debug_msg('Selected Features. Now One Hot Encoding') enc = OneHotEncoder() if (y!=None): enc.fit(np.vstack((X,y))) X = enc.transform(X) y = enc.transform(y) return X,y return enc.fit_transform(X)
def pre_process_data(X): util.print_debug_msg('Pre processing Data') x1 = array((X.ix[:])) x2 = group_data(x1, degree=2) x3 = group_data(x1, degree=3) #print x1.shape #print x2.shape #print x3.shape x_all = np.hstack((x1,x2,x3)) enc = OneHotEncoder() enc.fit(x_all) x_transformed = enc.transform(x_all).toarray() return x_transformed
def pre_process_data(X): util.print_debug_msg('Pre processing Data') x1 = array((X.ix[:])) x2 = group_data(x1, degree=2) x3 = group_data(x1, degree=3) #print x1.shape #print x2.shape #print x3.shape x_all = np.hstack((x1,x2,x3)) #features = [0,8,9,10,36,37,38,41,41,43,47,53,60,61,63,64,67,69,71,75,85] #features = [0, 8, 9, 10, 19, 34, 36, 37, 38, 41, 42, 43, 47, 53, 55, 60, 61, 63, 64, 67, 69, 71, 75, 81, 82, 85] x_selected = x_all#[:,features] '''util.print_debug_msg('Selected Features. Now One Hot Encoding') enc = OneHotEncoder() #enc.fit(x_selected) #x_transformed = enc.transform(x_all) #enc = MinMaxScaler(feature_range=(0,1), copy=False) x_transformed = enc.fit_transform(X_selected)''' return x_selected
def train_predict(X_train, y_train, X_test): util.print_debug_msg('Training SVM Classifier') model = svm.SVR(kernel='linear', verbose=True) model.fit(X_train, y_train) util.print_debug_msg('Predicting SVM Classifier') return model.predict(X_test)
def train_predict(X_train, y_train, X_test): util.print_debug_msg('Training NB Classifier') nb = BernoulliNB() nb.fit(X_train, y_train) util.print_debug_msg('Predicting NB Classifier') return nb.predict(X_test)
def train_predict(X_train, y_train, X_test): util.print_debug_msg('Training kNN Classifier') knn = KNeighborsClassifier(n_neighbors=10) knn.fit(X_train, y_train) util.print_debug_msg('Predicting kNN Classifier') return knn.predict_proba(X_test)[:, 1]
def train_predict(X_train, y_train, X_test): util.print_debug_msg('Training LG Classifier') lg.fit(X_train, y_train) util.print_debug_msg('Predicting LG Classifier') return lg.predict_proba(X_test)[:, 1]