Exemplo n.º 1
0
def test_roc_auc_score():
    y_true = np.array([0, 0, 1, 1])
    y_pred = np.array([0.1, 0.4, 0.35, 0.8])
    assert_almost_equal(roc_auc_score(y_true, y_pred),
                        sklearn_roc_auc_score(y_true, y_pred))

    y_true = np.array([0, 0, 1, 1, 0])
    y_pred = np.array([0.8, 0.4, 0.4, 0.8, 0.8])
    assert_almost_equal(roc_auc_score(y_true, y_pred),
                        sklearn_roc_auc_score(y_true, y_pred))
Exemplo n.º 2
0
 def get_loss(self, y, y_pred):
     if self.metric == 'error':
         return 1 - accuracy_score(y, y_pred)
     elif self.metric == 'auc':  # TODO: Add a warning checking if y_predict is all [0, 1], it should be probability
         return 1 - roc_auc_score(y, y_pred)
     else:
         raise Exception("Not implemented yet.")
Exemplo n.º 3
0
def mlp(path):
    
    X,y,Xt,yt,cols = get_data(fillna=0, norm=True, dropids=True)
    X = cp.asarray(X)
    y = cp.asarray(y)
    Xt = cp.asarray(Xt)
    yt = cp.asarray(yt)
    params = {
                 'learning_rate_init':0.005,
                 'verbose': True,
                 'hidden_layer_sizes': (64,64,64),
                 'max_iter':100,
                 'batch_size':4096,
                 'shuffle':True,
                 'alpha':0.000,
                 'model_path':f'{path}/cache/mlp.pth',
             }

    clf = MLPClassifier(**params)
    clf.fit(X, y, Xt, yt)

    yp = clf.predict_proba(Xt)[:,1]
    score = roc_auc_score(yt, yp)
    print('AUC: %.4f'%score)

    cp.save('mlp.npy', yp)
    #return yp
    print(cols)
    return score
Exemplo n.º 4
0
def xgb(path):

    X, y, Xt, yt, cols = get_data()
    params = {
        'n_estimators': 100,
        'eta': 0.1,
        'early_stopping_rounds': 10,
        'max_depth': 7,
        'colsample_bytree': 1.0,
        'subsample': 0.5,
        'verbosity': 1,
        'objective': 'binary:logistic',
        'eval_metric': 'auc',
        'validation_fraction': 0,
    }

    clf = XGBClassifier(**params)
    clf.fit(X, y, Xt, yt)
    clf.clf.save_model(f'{path}/cache/xgb.json')

    yp = clf.predict_proba(Xt)
    #return yp
    print(cols)
    cp.save('xgb_va.npy', cp.asarray(yp))
    yx = clf.predict_proba(X)
    cp.save('xgb_tr.npy', cp.asarray(yx))
    return roc_auc_score(yt, yp)
Exemplo n.º 5
0
def test_roc_auc_score_at_limits():
    y_true = np.array([0., 0., 0.], dtype=np.float)
    y_pred = np.array([0., 0.5, 1.], dtype=np.float)

    err_msg = ("roc_auc_score cannot be used when "
               "only one class present in y_true. ROC AUC score "
               "is not defined in that case.")

    with pytest.raises(ValueError, match=err_msg):
        roc_auc_score(y_true, y_pred)

    y_true = np.array([0., 0.5, 1.0], dtype=np.float)
    y_pred = np.array([0., 0.5, 1.], dtype=np.float)

    err_msg = ("Continuous format of y_true  " "is not supported.")

    with pytest.raises(ValueError, match=err_msg):
        roc_auc_score(y_true, y_pred)
Exemplo n.º 6
0
def test_roc_auc_score_random(n_samples, dtype):

    y_true, _, _, _ = generate_random_labels(
        lambda rng: rng.randint(0, 2, n_samples).astype(dtype))

    y_pred, _, _, _ = generate_random_labels(
        lambda rng: rng.randint(0, 1000, n_samples).astype(dtype))

    auc = roc_auc_score(y_true, y_pred)
    skl_auc = sklearn_roc_auc_score(y_true, y_pred)
    assert_almost_equal(auc, skl_auc)
Exemplo n.º 7
0
def _calc_score_cuml(y_true, y_preds, y_proba=None, metrics=('accuracy',), task=const.TASK_BINARY, pos_label=1,
                     classes=None, average=None):
    if y_proba is None:
        y_proba = y_preds
    if len(y_proba.shape) == 2 and y_proba.shape[-1] == 1:
        y_proba = y_proba.reshape(-1)
    if len(y_preds.shape) == 2 and y_preds.shape[-1] == 1:
        y_preds = y_preds.reshape(-1)

    y_true = _to_dtype(y_true, 'float64')
    y_preds = _to_dtype(y_preds, 'float64')
    y_proba = _to_dtype(y_proba, 'float64')

    if task == const.TASK_REGRESSION:
        if isinstance(y_true, cudf.Series):
            y_true = y_true.values
        if isinstance(y_preds, cudf.Series):
            y_preds = y_preds.values
        if isinstance(y_proba, cudf.Series):
            y_proba = y_proba.values

    scores = {}
    for metric in metrics:
        if callable(metric):
            scores[metric.__name__] = metric(y_true, y_preds)
        else:
            metric_lower = metric.lower()
            if metric_lower == 'auc':
                if len(y_proba.shape) == 2:
                    # if task == const.TASK_MULTICLASS:
                    #     s = cu_metrics.roc_auc_score(y_true, y_proba, multi_class='ovo', labels=classes)
                    # else:
                    #     s = cu_metrics.roc_auc_score(y_true, y_proba[:, 1])
                    s = cu_metrics.roc_auc_score(y_true, y_proba[:, 1])
                else:
                    s = cu_metrics.roc_auc_score(y_true, y_proba)
            elif metric_lower == 'accuracy':
                if y_preds is None:
                    s = 0
                else:
                    s = cu_metrics.accuracy_score(y_true, y_preds)
            # elif metric_lower == 'recall':
            #     s = cu_metrics.recall_score(y_true, y_preds, **recall_options)
            # elif metric_lower == 'precision':
            #     s = cu_metrics.precision_score(y_true, y_preds, **recall_options)
            # elif metric_lower == 'f1':
            #     s = cu_metrics.f1_score(y_true, y_preds, **recall_options)
            elif metric_lower == 'mse':
                s = cu_metrics.mean_squared_error(y_true, y_preds)
            elif metric_lower == 'mae':
                s = cu_metrics.mean_absolute_error(y_true, y_preds)
            elif metric_lower == 'msle':
                s = cu_metrics.mean_squared_log_error(y_true, y_preds)
            elif metric_lower in {'rmse', 'rootmeansquarederror', 'root_mean_squared_error'}:
                s = cu_metrics.mean_squared_error(y_true, y_preds, squared=False)
            elif metric_lower == 'r2':
                s = cu_metrics.r2_score(y_true, y_preds)
            elif metric_lower in {'logloss', 'log_loss'}:
                # s = cu_metrics.log_loss(y_true, y_proba, labels=classes)
                s = cu_metrics.log_loss(y_true, y_proba)
            else:
                logger.warning(f'unknown metric: {metric}')
                continue
            if isinstance(s, cp.ndarray):
                s = float(cp.asnumpy(s))
            scores[metric] = s
    return scores
Exemplo n.º 8
0
def metrics(y_true, y_score):
    auc = roc_auc_score(y_true=y_true, y_score=y_score)
    ap = average_precision_score(y_true, y_score)
    return [auc, ap]
Exemplo n.º 9
0
import cupy as cp
from cuml.metrics import roc_auc_score

y1 = cp.load('backup/2021-01-03-20-58-05-DGX-S/mlp.npy')
y2 = cp.load('backup/2021-01-03-21-39-45-DGX-S/mlp.npy')
y3 = cp.load('backup/2021-01-03-21-56-52-DGX-S/mlp.npy')
yt = cp.load('yt.npy')
y1 = (y1 + y2 + y3) / 3

y2 = cp.load('xgb_va.npy')
print('AUC: %.4f' % roc_auc_score(yt, y1))

y1 = y1.argsort().argsort() / y1.shape[0]
y2 = y2.argsort().argsort() / y2.shape[0]

for i in range(11):
    w = i * 0.1
    yp = y1 * w + y2 * (1 - w)
    print(i, 'AUC: %.4f' % roc_auc_score(yt, yp))
Exemplo n.º 10
0
    def fit(self,train_dl,valid_dl=None):
        
        lr = self.params['learning_rate_init']
        epochs = self.params['max_iter']
        opt_type = self.params['solver']
        wd = self.params['alpha']
        momentum = self.params['momentum']
        beta1, beta2 = self.params['beta_1'], self.params['beta_2']
        eps = self.params['epsilon']
        
        if opt_type == 'sgd':
            opt = torch.optim.SGD(self.model.parameters(), lr=lr, weight_decay=wd, momentum=momentum, nesterov=self.params['nesterovs_momentum'])
        elif opt_type == 'adam':
            opt = torch.optim.Adam(self.model.parameters(), lr=lr, weight_decay=wd, betas=(beta1, beta2), eps=eps)
        else:
            opt = torch.optim.LBFGS(self.model.parameters(), lr=lr)
            
        best_score = 1e9 # lower the better  
        not_improved_iter = 0
        tol = self.params['tol']
        n_iter_no_change = self.params['n_iter_no_change']
        
        best = -1
        best_iter = 0
        for epoch in range(epochs):
            train_loss = 0
 
            start = time.time()
            ys,yps = [],[]   
            for batch in train_dl:
                
                xb, yb = batch
                xb, yb = xb.cuda(),yb.cuda()
                pred = self.model(xb)
                loss = self.loss_func(pred, yb)
                ys.append(yb.detach().cpu().numpy())
                yps.append(pred.detach().cpu().numpy())
                def closure():
                    return loss
                train_loss += loss.cpu().detach().numpy()
                loss.backward()
                if opt_type!='lbfgs':
                    opt.step()
                else:
                    opt.step(closure)
                opt.zero_grad()
                
            ys = np.concatenate(ys)
            yps = np.vstack(yps)
            yps = cp.asarray(yps)
            yps = softmax(yps)[:,1].ravel()
            #print(ys[:10], yps[:10])
            auc = roc_auc_score(ys, yps)
            
            if valid_dl is None:
                duration = time.time() - start
                print('Epoch %d Training Loss:%.4f Time: %.2f seconds'%(epoch,
                            train_loss/train_dl.total_batches,duration))
                continue
                
            valid_loss = 0
            ys,yps = [],[]
            for batch in valid_dl:
                xb, yb = batch
                xb, yb = xb.cuda(),yb.cuda()
                pred = self.model(xb)
                loss = self.loss_func(pred, yb)
                valid_loss += loss.cpu().detach().numpy()
                ys.append(yb.detach().cpu().numpy())
                yps.append(pred.detach().cpu().numpy()) 
                
            ys = np.concatenate(ys)
            yps = np.vstack(yps)
            yps = cp.asarray(yps)
            yps = softmax(yps)[:,1].ravel()
            #print(ys[:10], yps[:10])
            auc_va = roc_auc_score(ys, yps)
            
            if best < auc_va:
                best = auc_va
                best_iter = epoch
                torch.save(self.model.state_dict(), self.params['model_path'])
                torch.save(self.model.state_dict(), './mlp.pth')

            valid_loss /= valid_dl.total_batches
            duration = time.time() - start
            if self.params['verbose']:
                print('Epoch %d Training Loss:%.4f AUC:%.4f Valid Loss:%4f AUC:%.4f Best:%.4f Best Iter: %d Time: %.2f seconds'%(epoch,
                            train_loss/train_dl.total_batches, auc, valid_loss, auc_va, best, best_iter, duration))
            
            if valid_loss < best_score - tol:
                best_score = valid_loss
            else:
                not_improved_iter += 1
                
            if self.params['early_stopping'] and not_improved_iter > n_iter_no_change:
                break