def fit_logistic_or_with_crossvalidation(X, y): """An ordinal model of dataset with hyperparameter cross-validation. Ordinal Ridge (regression) variant. Parameters & returns as per other training functions. """ basemod = mord.OrdinalRidge() cv = 5 param_grid = { 'fit_intercept': [True, False], 'alpha': [0.2, 0.4, 0.6, 0.8, 1.0, 2.0, 3.0], 'normalize': [True, False] } return fit_classifier_with_crossvalidation(X, y, basemod, cv, param_grid, verbose=False)
def train_ordinal_logistic(train_features, train_labels, skip_grid_search, evaluation, num_jobs, loss, alpha, cost, ordinal_algorithm): """ returns the trained ordinal logistic model. loss, alpha and cost are ignored if grid search is requested. alpha: used only for se, it, at, and ridge and if grid search is not requested cost: used only for lad and if grid search is not requested loss: used only for lad and if grid search is not requested """ # requested grid search. find best parameters, to achieve highest average score if not skip_grid_search: penalty_weights = 'dummy' clf = grid_search.grid_search(evaluation, train_features, train_labels, penalty_weights, ordinal_algorithm, num_jobs) params = clf.best_params_ if 'penalty' in params: loss = params['loss'] if 'alpha' in params: alpha = params['alpha'] if 'cost' in params: cost = params['cost'] # Now perform the training on full train data. if ordinal_algorithm == 'logisticse': model = mord.LogisticSE(alpha=alpha, max_iter=20000) elif ordinal_algorithm == 'logisticit': model = mord.LogisticIT(alpha=alpha, max_iter=20000) elif ordinal_algorithm == 'logisticat': model = mord.LogisticAT(alpha=alpha, max_iter=20000) elif ordinal_algorithm == 'ordinalridge': model = mord.OrdinalRidge(alpha=alpha) elif ordinal_algorithm == 'lad': model = mord.LAD(C=cost, loss=loss, max_iter=10000) model = model.fit(train_features, train_labels) return model
def run_classification(X_train, X_test, y_train, y_test, how='rfc', random_state=0, n_jobs=2, cv=False, stand=False, verbose=True, full_output=False, **classpar): """ """ if stand: X_train = StandardScaler().fit_transform(X_train) X_test = StandardScaler().fit_transform(X_test) if how == 'or1': pars = {'alpha': 1e0, 'verbose': 1, 'max_iter': 1e5} for par in pars: if par not in classpar: classpar.update({par: pars.get(par)}) clasif = mord.LogisticAT(**classpar) elif how == 'or2': pars = {'alpha': 1e0, 'verbose': 1, 'max_iter': 1e5} for par in pars: if par not in classpar: classpar.update({par: pars.get(par)}) clasif = mord.LogisticIT(**classpar) elif how == 'or3': pars = { 'alpha': 1e0, 'fit_intercept': True, 'normalize': False, 'copy_X': True, 'max_iter': None, 'tol': 0.001, 'solver': 'auto' } for par in pars: if par not in classpar: classpar.update({par: pars.get(par)}) clasif = mord.OrdinalRidge(random_state=random_state, **classpar) elif how == 'or4': pars = { 'epsilon': 0.0, 'tol': 0.0001, 'C': 1.0, 'loss': 'l1', 'fit_intercept': True, 'intercept_scaling': 1.0, 'dual': True, 'verbose': 0, 'max_iter': 10000 } for par in pars: if par not in classpar: classpar.update({par: pars.get(par)}) clasif = mord.LAD(random_state=random_state, **classpar) elif how == 'prank': pars = {'n_iter': 1000, 'shuffle': True} for par in pars: if par not in classpar: classpar.update({par: pars.get(par)}) clasif = ranking.PRank(random_state=random_state, **classpar) elif how == 'kprank': pars = { 'n_iter': 200, 'shuffle': True, 'kernel': 'rbf', 'gamma': 1e2, 'degree': 3, 'coef0': 1 } for par in pars: if par not in classpar: classpar.update({par: pars.get(par)}) clasif = ranking.KernelPRank(random_state=random_state, **classpar) elif how == 'rfc': pars = { 'n_estimators': 1000, 'criterion': 'gini', 'max_depth': None, 'min_samples_split': 2, 'min_samples_leaf': 1, 'min_weight_fraction_leaf': 0.0, 'max_features': 'auto', 'max_leaf_nodes': None, 'min_impurity_split': 1e-07, 'bootstrap': True, 'oob_score': True, 'verbose': 0, 'warm_start': False, 'class_weight': None } for par in pars: if par not in classpar: classpar.update({par: pars.get(par)}) clasif = RFC(random_state=random_state, n_jobs=n_jobs, **classpar) elif how == 'svc': pars = { 'C': 1.0, 'kernel': 'rbf', 'degree': 3, 'gamma': 'auto', 'coef0': 0.0, 'shrinking': True, 'probability': False, 'tol': 0.001, 'cache_size': 200, 'class_weight': None, 'verbose': False, 'max_iter': -1, 'decision_function_shape': None } for par in pars: if par not in classpar: classpar.update({par: pars.get(par)}) clasif = SVC(random_state=random_state, **classpar) else: print 'Classifier not yet supported' return if cv: crosv = ShuffleSplit(n_splits=5, test_size=0.3, random_state=random_state) # y_pred = cross_val_predict(clasif, X_train, y_train, cv=5, n_jobs=n_jobs, # verbose=1) # f1 = f1_score(y_test, y_pred, average='weighted') # ck = cohen_kappa_score(y_test, y_pred) # rec = recall_score(y_test, y_pred, average='weighted') # if verbose: # print '\nF1={:.2f}, Recall={:.2f}, Cohen Kappa={:.2f}'.format(f1, rec, ck) # return f1, rec, ck f1_cv_scores = cross_val_score(clasif, X_train, y_train, cv=crosv, scoring='f1_weighted', verbose=1, n_jobs=n_jobs) mean_cv_f1 = np.mean(f1_cv_scores) if verbose: print f1_cv_scores print 'Mean F1 score={:.3f}'.format(mean_cv_f1) return mean_cv_f1, f1_cv_scores else: if verbose: print clasif.fit(X_train, y_train.astype(int)) else: clasif.fit(X_train, y_train.astype(int)) y_pred = clasif.predict(X_test) if verbose: print '\n', imbmet.classification_report_imbalanced(y_test, y_pred) if verbose and hasattr(clasif, 'feature_importances_'): print 'Feature importances:' print clasif.feature_importances_ ck = cohen_kappa_score(y_test, y_pred) rec = recall_score(y_test, y_pred, average='weighted') f1 = f1_score(y_test, y_pred, average='weighted') if verbose: print '\nF1={:.2f}, Recall={:.2f}, Cohen Kappa={:.2f}'.format( f1, rec, ck) if full_output: return clasif, f1, rec, ck else: return f1, rec, ck
# In[13]: #train_x = StandardScaler().fit_transform(train_x) #test_x = StandardScaler().fit_transform(test_x) # train_x=preprocessing.scale(train_x) # test_x=preprocessing.scale(test_x) # In[14]: import mord as m # In[15]: model = m.OrdinalRidge(alpha=0.1, max_iter=10000) # In[16]: train_x[:1] # In[17]: model.fit(train_x, train_y) # In[18]: model.score(test_x, test_y) # In[19]:
args = parser.parse_args() df = pd.read_csv(args.training_csv, header=None) # Getting features and labels y = df[0].values features_df = df.loc[:, 1:25].fillna(0) X = features_df.loc[:, 1:25].values # Generating train/test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) # Training the model reg = mord.OrdinalRidge(alpha=1.0, fit_intercept=True, normalize=False, copy_X=True, max_iter=None, tol=0.001, solver='auto') reg.fit(X_train, y_train) # Showing some results and saving model y_pred = reg.predict(X_test) b = 1 print(np.bincount(y_pred.astype(np.int32) + b)) filename = args.out_model pickle.dump(reg, open(filename, 'wb'))
X = X.drop(X.columns[1], axis=1) X = X.drop(X.columns[1], axis=1) clf1 = linear_model.LogisticRegression( solver='lbfgs', multi_class='multinomial') clf1.fit(X, y) print('Mean Absolute Error of LogisticRegression: %s' % metrics.mean_absolute_error(clf1.predict(X), y)) clf2 = mord.LogisticAT(alpha=1.) clf2.fit(X, y) print('Mean Absolute Error of LogisticAT %s' % metrics.mean_absolute_error(clf2.predict(X), y)) clf3 = mord.LogisticIT(alpha=1.) clf3.fit(X, y) print('Mean Absolute Error of LogisticIT %s' % metrics.mean_absolute_error(clf3.predict(X), y)) clf4 = mord.OrdinalRidge(alpha=1.) clf4.fit(X, y) print('Mean Absolute Error of LogisticSE %s' % metrics.mean_absolute_error(clf4.predict(X), y)) y_pred = clf4.predict(X) plt.scatter(X, y, color='black') plt.plot(X, y_pred, color='blue', linewidth=3) plt.show()