def test_variance_equal(self): weights = numpy.ones(shape=self.y_test.shape)*numpy.pi auc, variance = compare_auc_delong_xu.delong_roc_variance(self.y_test, self.predictions, sample_weight=weights) auc_no_weights, variance_no_weights = compare_auc_delong_xu.delong_roc_variance( self.y_test, self.predictions) numpy.testing.assert_allclose(auc_no_weights, auc) numpy.testing.assert_allclose(variance_no_weights, variance)
def test_variance_positive(self): N = 7 weights = numpy.linspace(0, 10, num=N) auc, variance = compare_auc_delong_xu.delong_roc_variance( self.y_test[:N], self.predictions[:N], sample_weight=weights) k = numpy.pi auc_mode, variance_mode = compare_auc_delong_xu.delong_roc_variance( self.y_test[:N], self.predictions[:N], sample_weight=weights*k) numpy.testing.assert_allclose(auc, auc_mode) numpy.testing.assert_allclose(variance, variance_mode)
def test_weights_positive(self): weights = numpy.linspace(0, 3, num=len(self.y_test)) auc, variance = compare_auc_delong_xu.delong_roc_variance(self.y_test, self.predictions, sample_weight=weights) true_auc = sklearn.metrics.roc_auc_score(self.y_test, self.predictions, sample_weight=weights) numpy.testing.assert_allclose(true_auc, auc)
def test_weights_positive_small_N(self): weights = numpy.linspace(0, 10, num=self.y_test.shape[0]) N = 7 auc, variance = compare_auc_delong_xu.delong_roc_variance( self.y_test[:N], self.predictions[:N], sample_weight=weights[:N]) true_auc = sklearn.metrics.roc_auc_score(self.y_test[:N], self.predictions[:N], sample_weight=weights[:N]) numpy.testing.assert_allclose(true_auc, auc)
def test_weights_equal_big(self): weights = numpy.ones(shape=self.y_test.shape)*2.13 N = 7 auc, variance = compare_auc_delong_xu.delong_roc_variance( self.y_test[:N], self.predictions[:N], sample_weight=weights[:N]) true_auc = sklearn.metrics.roc_auc_score(self.y_test[:N], self.predictions[:N], sample_weight=weights[:N]) numpy.testing.assert_allclose(true_auc, auc)
def test_variance(): data = sklearn.datasets.load_iris() x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split( data.data, data.target == 1, test_size=0.8, random_state=42) predictions = sklearn.linear_model.LogisticRegression().fit( x_train, y_train).predict_proba(x_test)[:, 1] auc, variance = compare_auc_delong_xu.delong_roc_variance( y_test, predictions) true_auc = sklearn.metrics.roc_auc_score(y_test, predictions) numpy.testing.assert_allclose(true_auc, auc) numpy.testing.assert_allclose(0.0014569635512, variance)
def test_variance(self): sample_size_x = 7 sample_size_y = 14 n_trials = 50000 aucs = numpy.empty(n_trials) variances = numpy.empty(n_trials) numpy.random.seed(1234235) labels = numpy.concatenate([numpy.ones(sample_size_x), numpy.zeros(sample_size_y)]) for trial in range(n_trials): scores = numpy.concatenate([ self.x_distr.rvs(sample_size_x), self.y_distr.rvs(sample_size_y)]) aucs[trial] = sklearn.metrics.roc_auc_score(labels, scores) auc_delong, variances[trial] = compare_auc_delong_xu.delong_roc_variance( labels, scores) numpy.testing.assert_allclose(aucs[trial], auc_delong) numpy.testing.assert_allclose(variances.mean(), aucs.var(), rtol=0.1)
import sklearn.datasets import sklearn.model_selection import sklearn.linear_model import numpy import compare_auc_delong_xu import unittest import scipy.stats x_distr = scipy.stats.norm(0.5, 1) y_distr = scipy.stats.norm(-0.5, 1) sample_size_x = 7 sample_size_y = 14 n_trials = 1000000 aucs = numpy.empty(n_trials) variances = numpy.empty(n_trials) numpy.random.seed(1234235) labels = numpy.concatenate( [numpy.ones(sample_size_x), numpy.zeros(sample_size_y)]) for trial in range(n_trials): scores = numpy.concatenate( [x_distr.rvs(sample_size_x), y_distr.rvs(sample_size_y)]) aucs[trial] = sklearn.metrics.roc_auc_score(labels, scores) auc_delong, variances[trial] = compare_auc_delong_xu.delong_roc_variance( labels, scores) print(variances.mean(), aucs.var())
def train_model(model, dataloaders, criterion, optimizer, scheduler, competition_tasks, num_epochs=25, max_fpr=None, u_approach=None, is_inception=False, checkpoint=200): since = time.time() best_model_wts = copy.deepcopy(model.state_dict()) best_auc = 0.0 missing = 0 losses = { 'train': [], 'val': [] } accuracy = { 'train': [], 'val': [] } variances = {'train': [], 'val': []} for epoch in range(num_epochs): print(f'Epoch {epoch}/{num_epochs-1}') print('-' * 10) # Each epoch has a training and validation phase for phase in ['train', 'val']: if phase == 'train': scheduler.step() model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0.0 running_auc = 0.0 running_var = 0.0 # Iterate over data. for i, (inputs, labels) in enumerate(dataloaders[phase]): inputs = inputs.to(device) labels = labels.to(device) # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): if is_inception and phase == 'train': # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958 outputs, aux_outputs = model(inputs) loss1 = criterion(outputs, labels) loss2 = criterion(aux_outputs, labels) loss = loss1 + 0.4 * loss2 else: outputs = model(inputs) loss = criterion(outputs, labels) if u_approach == "ignore": mask = labels.lt(0) #select u labels (-1) loss = torch.sum(loss.masked_select( mask)) #mask out uncertain labels # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # scheduler.batch_step() #USE WITH DENSENET and other scheduler # statistics running_loss += loss.item() * inputs.size(0) #select subset of 5 pathologies of interest labels_sub = labels[:, competition_tasks].cpu().squeeze().numpy() preds_sub = outputs[:, competition_tasks].detach().cpu().squeeze( ).numpy() if u_approach == "ignore": #mask out the negative values mask_sub = (labels_sub > -1) for j in range(labels_sub.shape[1]): label = labels_sub[:, j] pred = preds_sub[:, j] m = mask_sub[:, j] label = label[m] pred = pred[m] try: tmp = compare_auc_delong_xu.delong_roc_variance( label, pred) running_auc += tmp[0] running_var += np.nansum(tmp[1]) except: missing += 1 continue else: for j in range(labels_sub.shape[1]): label = labels_sub[:, j] pred = preds_sub[:, j] tmp = compare_auc_delong_xu.delong_roc_variance( label, pred) running_auc += tmp[0] running_var += np.nansum(tmp[1]) # if (i+1) % checkpoint == 0: # print every 'checkpoint' mini-batches if (i + 1) % 200 == 0: # print every 'checkpoint' mini-batches # print('Missed {}'.format(missing)) print( f'{phase} Loss: {running_loss / (i+1)} DeLong AUC: {running_auc / (labels_sub.shape[1] * (i+1) * batch_size)} Variance: {running_var / (labels_sub.shape[1] * (i+1) * batch_size)}' ) losses[phase].append(running_loss / ((i + 1) * batch_size)) accuracy[phase].append(running_auc / (labels_sub.shape[1] * (i + 1) * batch_size)) variances[phase].append(running_var / (labels_sub.shape[1] * (i + 1) * batch_size)) epoch_loss = running_loss / dataset_sizes[phase] epoch_auc = running_auc / (dataset_sizes[phase] * labels_sub.shape[1]) epoch_var = running_var / (dataset_sizes[phase] * labels_sub.shape[1]) print( f'{phase} Epoch Loss: {epoch_loss} Epoch AUC: {epoch_auc} Epoch Variance: {epoch_var}' ) #With a small validation set would otherwise get no recorded values so: if phase == 'val': losses[phase].append(epoch_loss) accuracy[phase].append(epoch_auc) variances[phase].append(epoch_var) # deep copy the model if phase == 'val' and epoch_auc > best_auc: best_auc = epoch_auc best_model_wts = copy.deepcopy(model.state_dict()) time_elapsed = time.time() - since print(f'Training complete in {time_elapsed // 60}m {time_elapsed % 60}s') print(f'Best val AUC: {best_auc}') print(f'Missed {missing} examples.') # load best model weights model.load_state_dict(best_model_wts) metrics = (losses, accuracy, variances) # for phase in ['train', 'val']: # with open(f'metrics/{filename}_{phase}.txt','w+') as f: # for idx in len(losses[phase]): # f.write(f'{losses[phase][idx]} {accuracy[phase][idx]} {variances[phase][idx]}\n') return model, metrics
def test_variance_const(self): auc, variance = compare_auc_delong_xu.delong_roc_variance(self.y_test, self.predictions) numpy.testing.assert_allclose(self.sklearn_auc, auc) numpy.testing.assert_allclose(0.0015359814789736538, variance)
def test_weights_equal_small(self): weights = numpy.ones(shape=self.y_test.shape)*0.214124 auc, variance = compare_auc_delong_xu.delong_roc_variance(self.y_test, self.predictions, sample_weight=weights) numpy.testing.assert_allclose(self.sklearn_auc, auc)