示例#1
0
def fit_logistic_or_with_crossvalidation(X, y):
    """An ordinal model of dataset with hyperparameter 
    cross-validation.  Ordinal Ridge (regression) variant.
    
    Parameters & returns as per other training functions.
    """

    basemod = mord.OrdinalRidge()
    cv = 5
    param_grid = {
        'fit_intercept': [True, False],
        'alpha': [0.2, 0.4, 0.6, 0.8, 1.0, 2.0, 3.0],
        'normalize': [True, False]
    }
    return fit_classifier_with_crossvalidation(X,
                                               y,
                                               basemod,
                                               cv,
                                               param_grid,
                                               verbose=False)
示例#2
0
def train_ordinal_logistic(train_features, train_labels, skip_grid_search,
                           evaluation, num_jobs, loss, alpha, cost,
                           ordinal_algorithm):
    """
  returns the trained ordinal logistic model. loss, alpha and cost are ignored if grid
  search is requested.
  alpha: used only for se, it, at, and ridge and if grid search is not requested
  cost: used only for lad and if grid search is not requested
  loss: used only for lad and if grid search is not requested
  """
    # requested grid search. find best parameters, to achieve highest average score
    if not skip_grid_search:
        penalty_weights = 'dummy'
        clf = grid_search.grid_search(evaluation, train_features, train_labels,
                                      penalty_weights, ordinal_algorithm,
                                      num_jobs)
        params = clf.best_params_
        if 'penalty' in params:
            loss = params['loss']
        if 'alpha' in params:
            alpha = params['alpha']
        if 'cost' in params:
            cost = params['cost']

    # Now perform the training on full train data.
    if ordinal_algorithm == 'logisticse':
        model = mord.LogisticSE(alpha=alpha, max_iter=20000)
    elif ordinal_algorithm == 'logisticit':
        model = mord.LogisticIT(alpha=alpha, max_iter=20000)
    elif ordinal_algorithm == 'logisticat':
        model = mord.LogisticAT(alpha=alpha, max_iter=20000)
    elif ordinal_algorithm == 'ordinalridge':
        model = mord.OrdinalRidge(alpha=alpha)
    elif ordinal_algorithm == 'lad':
        model = mord.LAD(C=cost, loss=loss, max_iter=10000)
    model = model.fit(train_features, train_labels)

    return model
示例#3
0
def run_classification(X_train,
                       X_test,
                       y_train,
                       y_test,
                       how='rfc',
                       random_state=0,
                       n_jobs=2,
                       cv=False,
                       stand=False,
                       verbose=True,
                       full_output=False,
                       **classpar):
    """

    """
    if stand:
        X_train = StandardScaler().fit_transform(X_train)
        X_test = StandardScaler().fit_transform(X_test)

    if how == 'or1':
        pars = {'alpha': 1e0, 'verbose': 1, 'max_iter': 1e5}
        for par in pars:
            if par not in classpar: classpar.update({par: pars.get(par)})
        clasif = mord.LogisticAT(**classpar)
    elif how == 'or2':
        pars = {'alpha': 1e0, 'verbose': 1, 'max_iter': 1e5}
        for par in pars:
            if par not in classpar: classpar.update({par: pars.get(par)})
        clasif = mord.LogisticIT(**classpar)
    elif how == 'or3':
        pars = {
            'alpha': 1e0,
            'fit_intercept': True,
            'normalize': False,
            'copy_X': True,
            'max_iter': None,
            'tol': 0.001,
            'solver': 'auto'
        }
        for par in pars:
            if par not in classpar: classpar.update({par: pars.get(par)})
        clasif = mord.OrdinalRidge(random_state=random_state, **classpar)
    elif how == 'or4':
        pars = {
            'epsilon': 0.0,
            'tol': 0.0001,
            'C': 1.0,
            'loss': 'l1',
            'fit_intercept': True,
            'intercept_scaling': 1.0,
            'dual': True,
            'verbose': 0,
            'max_iter': 10000
        }
        for par in pars:
            if par not in classpar: classpar.update({par: pars.get(par)})
        clasif = mord.LAD(random_state=random_state, **classpar)
    elif how == 'prank':
        pars = {'n_iter': 1000, 'shuffle': True}
        for par in pars:
            if par not in classpar: classpar.update({par: pars.get(par)})
        clasif = ranking.PRank(random_state=random_state, **classpar)
    elif how == 'kprank':
        pars = {
            'n_iter': 200,
            'shuffle': True,
            'kernel': 'rbf',
            'gamma': 1e2,
            'degree': 3,
            'coef0': 1
        }
        for par in pars:
            if par not in classpar: classpar.update({par: pars.get(par)})
        clasif = ranking.KernelPRank(random_state=random_state, **classpar)
    elif how == 'rfc':
        pars = {
            'n_estimators': 1000,
            'criterion': 'gini',
            'max_depth': None,
            'min_samples_split': 2,
            'min_samples_leaf': 1,
            'min_weight_fraction_leaf': 0.0,
            'max_features': 'auto',
            'max_leaf_nodes': None,
            'min_impurity_split': 1e-07,
            'bootstrap': True,
            'oob_score': True,
            'verbose': 0,
            'warm_start': False,
            'class_weight': None
        }
        for par in pars:
            if par not in classpar: classpar.update({par: pars.get(par)})
        clasif = RFC(random_state=random_state, n_jobs=n_jobs, **classpar)
    elif how == 'svc':
        pars = {
            'C': 1.0,
            'kernel': 'rbf',
            'degree': 3,
            'gamma': 'auto',
            'coef0': 0.0,
            'shrinking': True,
            'probability': False,
            'tol': 0.001,
            'cache_size': 200,
            'class_weight': None,
            'verbose': False,
            'max_iter': -1,
            'decision_function_shape': None
        }
        for par in pars:
            if par not in classpar: classpar.update({par: pars.get(par)})
        clasif = SVC(random_state=random_state, **classpar)
    else:
        print 'Classifier not yet supported'
        return

    if cv:
        crosv = ShuffleSplit(n_splits=5,
                             test_size=0.3,
                             random_state=random_state)
        #         y_pred = cross_val_predict(clasif, X_train, y_train, cv=5, n_jobs=n_jobs,
        #                                    verbose=1)

        #         f1 = f1_score(y_test, y_pred, average='weighted')
        #         ck = cohen_kappa_score(y_test, y_pred)
        #         rec = recall_score(y_test, y_pred, average='weighted')

        #         if verbose:
        #             print '\nF1={:.2f}, Recall={:.2f}, Cohen Kappa={:.2f}'.format(f1, rec, ck)

        #         return f1, rec, ck

        f1_cv_scores = cross_val_score(clasif,
                                       X_train,
                                       y_train,
                                       cv=crosv,
                                       scoring='f1_weighted',
                                       verbose=1,
                                       n_jobs=n_jobs)
        mean_cv_f1 = np.mean(f1_cv_scores)
        if verbose:
            print f1_cv_scores
            print 'Mean F1 score={:.3f}'.format(mean_cv_f1)
        return mean_cv_f1, f1_cv_scores

    else:
        if verbose:
            print clasif.fit(X_train, y_train.astype(int))
        else:
            clasif.fit(X_train, y_train.astype(int))

        y_pred = clasif.predict(X_test)

        if verbose:
            print '\n', imbmet.classification_report_imbalanced(y_test, y_pred)
            if verbose and hasattr(clasif, 'feature_importances_'):
                print 'Feature importances:'
                print clasif.feature_importances_

        ck = cohen_kappa_score(y_test, y_pred)
        rec = recall_score(y_test, y_pred, average='weighted')
        f1 = f1_score(y_test, y_pred, average='weighted')

        if verbose:
            print '\nF1={:.2f}, Recall={:.2f}, Cohen Kappa={:.2f}'.format(
                f1, rec, ck)

        if full_output:
            return clasif, f1, rec, ck
        else:
            return f1, rec, ck
示例#4
0
# In[13]:

#train_x = StandardScaler().fit_transform(train_x)
#test_x = StandardScaler().fit_transform(test_x)

# train_x=preprocessing.scale(train_x)
# test_x=preprocessing.scale(test_x)

# In[14]:

import mord as m

# In[15]:

model = m.OrdinalRidge(alpha=0.1, max_iter=10000)

# In[16]:

train_x[:1]

# In[17]:

model.fit(train_x, train_y)

# In[18]:

model.score(test_x, test_y)

# In[19]:
args = parser.parse_args()

df = pd.read_csv(args.training_csv, header=None)

# Getting features and labels
y = df[0].values

features_df = df.loc[:, 1:25].fillna(0)
X = features_df.loc[:, 1:25].values

# Generating train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

# Training the model
reg = mord.OrdinalRidge(alpha=1.0,
                        fit_intercept=True,
                        normalize=False,
                        copy_X=True,
                        max_iter=None,
                        tol=0.001,
                        solver='auto')
reg.fit(X_train, y_train)

# Showing some results and saving model
y_pred = reg.predict(X_test)
b = 1
print(np.bincount(y_pred.astype(np.int32) + b))

filename = args.out_model
pickle.dump(reg, open(filename, 'wb'))
示例#6
0
X = X.drop(X.columns[1], axis=1)
X = X.drop(X.columns[1], axis=1)

clf1 = linear_model.LogisticRegression(
    solver='lbfgs',
    multi_class='multinomial')
clf1.fit(X, y)

print('Mean Absolute Error of LogisticRegression: %s' %
      metrics.mean_absolute_error(clf1.predict(X), y))

clf2 = mord.LogisticAT(alpha=1.)
clf2.fit(X, y)
print('Mean Absolute Error of LogisticAT %s' %
      metrics.mean_absolute_error(clf2.predict(X), y))


clf3 = mord.LogisticIT(alpha=1.)
clf3.fit(X, y)
print('Mean Absolute Error of LogisticIT %s' %
      metrics.mean_absolute_error(clf3.predict(X), y))

clf4 = mord.OrdinalRidge(alpha=1.)
clf4.fit(X, y)
print('Mean Absolute Error of LogisticSE %s' %
      metrics.mean_absolute_error(clf4.predict(X), y))
y_pred = clf4.predict(X)
plt.scatter(X, y,  color='black')
plt.plot(X, y_pred, color='blue', linewidth=3)
plt.show()