Exemplo n.º 1
0
def main(debug: bool,
         data: str,
         column: str,
         extension: str,
         classes: int,
         augmentation: bool,
         network: str,
         depth: int,
         wildcat: bool,
         pretrained: bool,
         optim: str,
         resume: bool,
         name: str,
         lr: float,
         batchsize: int,
         workers: int,
         patience: int,
         balanced: bool,
         maps: int,
         alpha: float,
         variable_type: str,
         error_range: int,
         ID: str):

    # ------------------------------------------------------
    print('Initializing model...', end='')
    if resume:
        assert len(wsl_model_dir.glob(f'*{name}')) == 1
        full_mname = wsl_model_dir.glob(f'*{name}')[0]
        mname = str(full_mname).split('_')[-1]
    else:
        if debug:
            mname = 'debug'

        elif ID == 'placeholder':
            try:
                # Get a random word to use as a more readable name
                response = requests.get("https://random-word-api.herokuapp.com/word")
                assert response.status_code == 200
                mname = response.json()[0]
            except Exception:
                # As a fallback use the date and time
                mname = datetime.datetime.now().strftime('%d_%m_%H_%M_%S')

        else:
            mname = ID

        full_mname = (data + '_' + column + '_' +
                      f'lr{lr}_bs{batchsize}_{optim}' +
                      ('_pre' if pretrained else '') +
                      ('_bal' if balanced else '') + '_' +
                      f'{network}{depth}' +
                      (f'_wildcat_maps{maps}_alpha{alpha}' if wildcat else '') + '_' +
                      mname)

        model_dir = wsl_model_dir / full_mname
    print('done')
    print('Model Name:', mname)

    # ------------------------------------------------------
    print('Initializing loaders...', end='', flush=True)
    print('train...', end='', flush=True)
    train_dataset = Loader(data,
                           split='train',
                           extension=extension,
                           classes=classes,
                           column=column,
                           variable_type=variable_type,
                           augmentation=augmentation,
                           debug=debug)
    train_loader = DataLoader(  # type: ignore
        train_dataset, batch_size=batchsize, num_workers=workers, shuffle=True
    )

    print('test...', end='', flush=True)
    test_dataset = Loader(data,
                          split='valid',
                          extension=extension,
                          classes=classes,
                          column=column,
                          variable_type=variable_type,
                          debug=debug)
    test_loader = DataLoader(  # type: ignore
        test_dataset, batch_size=batchsize, num_workers=workers, shuffle=True
    )
    print('done')

    # ------------------------------------------------------
    print('Initializing optim/criterion...', end='')
    if resume:
        checkpoint = torch.load(full_mname / 'best.pt', map_location='cuda:0' if torch.cuda.is_available() else 'cpu')
    else:
        if variable_type == 'continous':
            criterion = nn.MSELoss()
        elif variable_type == 'categorical':
            criterion = nn.CrossEntropyLoss()
        elif variable_type == 'binary':
            if balanced:
                criterion = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor(train_dataset.pos_weight))
            else:
                criterion = nn.BCEWithLogitsLoss()
        else:
            raise ValueError('Variable type should be one of binary/categorical/continous.')
        criterion = criterion.cuda()

        model = Architecture(network, depth, wildcat, classes, maps, alpha, pretrained)
        model = nn.DataParallel(model).cuda()

        if optim == 'sgd':
            optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.1, weight_decay=1e-4)
        elif optim == 'adam':
            optimizer = torch.optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999))
        else:
            raise ValueError(f'{optim} is not supported')

        checkpoint = {
            'model': model,
            'optimizer': optimizer,
            'criterion': criterion,
            'epoch': 0,
            'loss': 100,
            'train_loss_all': [],
            'test_loss_all': [],
            'train_rmetric_all': [],
            'test_rmetric_all': []
        }

    best_epoch = checkpoint['epoch']
    best_loss = checkpoint['loss']
    print('done')
    # ------------------------------------------------------

    while (checkpoint['epoch'] - best_epoch <= patience) and checkpoint['epoch'] < 150:
        start = time.time()
        checkpoint['epoch'] += 1
        print('Epoch:', checkpoint['epoch'], '-Training')
        checkpoint['model'].train()
        checkpoint['loss'], rmetric, summary_train = engine(train_loader, checkpoint, batchsize,
                                                            classes, variable_type, error_range, is_train=True)
        checkpoint['train_loss_all'].append(checkpoint['loss'])
        checkpoint['train_rmetric_all'].append(rmetric)

        print('Epoch:', checkpoint['epoch'], '-Testing')
        checkpoint['model'].eval()
        checkpoint['loss'], rmetric, summary_test = engine(test_loader, checkpoint, batchsize,
                                                           classes, variable_type, error_range, is_train=False)
        checkpoint['test_loss_all'].append(checkpoint['loss'])
        checkpoint['test_rmetric_all'].append(rmetric)

        os.makedirs(model_dir, exist_ok=True)
        torch.save(checkpoint, model_dir / 'current.pt')

        if best_loss > checkpoint['loss']:
            print('Best model updated')
            best_loss = checkpoint['loss']
            best_epoch = checkpoint['epoch']
            torch.save(checkpoint, model_dir / 'best.pt')
        else:
            print('Best model unchanged- Epoch:', best_epoch, 'Loss:', best_loss)

        with open(model_dir / 'summary.txt', 'a+') as file:
            epoch = checkpoint['epoch']
            file.write(f'Epoch: {epoch} \n Train:{summary_train} \n Test:{summary_test}')

        plt.figure(figsize=(12, 18))
        plt.subplot(2, 1, 1)
        plt.plot(checkpoint['train_loss_all'], label='Train loss')
        plt.plot(checkpoint['test_loss_all'], label='Valid loss')
        plt.legend()
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.subplot(2, 1, 2)
        plt.plot(checkpoint['train_rmetric_all'], label='Train rmetric')
        plt.plot(checkpoint['test_rmetric_all'], label='Test rmetric')
        plt.legend()
        plt.xlabel('Epoch')
        plt.ylabel('rmetric')
        plt.savefig(model_dir / 'graphs.png', dpi=300)
        plt.close()

        print('Time taken:', int(time.time() - start), 'secs')

        if debug:
            print('Breaking early since we are in debug mode')
            print('You can find the trained model at -', model_dir)
            break

    configs = {
        'name': mname,
        'time': datetime.datetime.now().strftime('%d_%m_%H_%M_%S'),
        'data': data,
        'column': column,
        'extension': extension,
        'classes': classes,
        'network': network,
        'depth': depth,
        'wildcat': wildcat,
        'pretrained': pretrained,
        'optim': optim,
        'learning_rate': lr,
        'batchsize': batchsize,
        'balanced': balanced,
        'maps': maps if wildcat else None,
        'alpha': alpha if wildcat else None,
        'variable_type': variable_type,
        'error_range': error_range if variable_type == 'continous' else None,
        'best_epoch': best_epoch,
        'best_loss': best_loss,
        'rmetric': checkpoint['test_rmetric_all'][best_epoch - 1],
    }
    with open(model_dir / 'configs.json', 'w') as fp:
        json.dump(configs, fp)
    return
Exemplo n.º 2
0
Arquivo: ood.py Projeto: jjwang01/wsl
def main(out_data: str = 'chexpert'):
    models = wsl_model_dir.glob('*')
    # all_configs = []

    for idx, path in enumerate(models):
        if 'debug' in str(path):  # Debugging model
            continue
        elif not (path / 'configs.json').exists():  # Model not completed
            continue
        else:
            with open(path / 'configs.json') as f:
                configs = json.load(f)
                # print(configs)
        print(f'Model {idx} : {path}')

        # ------------------------------------------------------
        train_dataset = Loader(data=configs['data'],
                               split='train',
                               extension=configs['extension'],
                               classes=configs['classes'],
                               column=configs['column'],
                               regression=configs['regression'])
        train_loader = DataLoader(  # type: ignore
            train_dataset,
            batch_size=configs['batchsize'],
            num_workers=4,
            pin_memory=True,
            shuffle=True)

        valid_dataset = Loader(data=configs['data'],
                               split='valid',
                               extension=configs['extension'],
                               classes=configs['classes'],
                               column=configs['column'],
                               regression=configs['regression'])
        valid_loader = DataLoader(  # type: ignore
            valid_dataset,
            batch_size=configs['batchsize'],
            num_workers=4,
            pin_memory=True,
            shuffle=True)

        out_dataset = Loader(data=out_data,
                             split='valid',
                             extension=configs['extension'],
                             classes=configs['classes'],
                             column=configs['column'],
                             regression=configs['regression'])
        out_loader = DataLoader(  # type: ignore
            out_dataset,
            batch_size=configs['batchsize'],
            num_workers=4,
            pin_memory=True,
            shuffle=True)

        checkpoint = torch.load(
            path / 'best.pt',
            map_location='cuda:0' if torch.cuda.is_available() else 'cpu')
        checkpoint['model'] = checkpoint['model'].module
        checkpoint['model'].network = configs['network']
        checkpoint['model'].get_map = False
        checkpoint['model'].eval()
        # sigmoid = torch.nn.Sigmoid()
        group_lasso = EmpiricalCovariance(assume_centered=False)
        layer_names = {}

        # ------------------------------------------------------
        def get_mean_precision(loader):

            print('building hook function...')
            features = {}

            def hook(layer, inp, out):
                name = layer_names[layer]
                if name not in features:
                    features[name] = out.detach().data.view(
                        out.size(0), out.size(1), -1).mean(dim=-1)
                else:
                    features[name] = torch.cat(
                        (features[name], out.detach().data.view(
                            out.size(0), out.size(1), -1).mean(dim=-1)),
                        dim=0)

            handles = checkpoint['model'].register_forward_hooks(
                checkpoint['model'], hook, layer_names)

            start = time.time()
            with torch.set_grad_enabled(False):
                for idx, data in enumerate(loader):
                    imgs = data[0].cuda().float()
                    _ = data[1]
                    _ = checkpoint['model'](imgs)
                    speed = configs['batchsize'] * idx // (time.time() - start)
                    print('Iter:',
                          idx,
                          'Speed:',
                          int(speed),
                          'img/s',
                          end='\r',
                          flush=True)
                    if idx > 20:
                        break
            print('Total time:', time.time() - start, 'secs')

            print('calculating sample mean...')
            mean = {}
            precision = {}
            for key, value in features.items():
                mean[key] = value.mean(dim=0)
                features[key] -= mean[key]
                group_lasso.fit(features[key].cpu().numpy())
                precision[key] = torch.from_numpy(
                    group_lasso.precision_).float().cuda()

            for handle in handles:
                handle.remove()
            return mean, precision

        train_mean, train_precision = get_mean_precision(train_loader)

        # ------------------------------------------------------
        def get_mahalanobis_score(loader: Any, features: Any,
                                  magnitude: float):

            scores = {}
            gaussian = {}
            for layer, name in layer_names.items():
                checkpoint['optimizer'].zero_grad()

                def hook(layer, inp, out):
                    zero_feat = out.view(out.size(0), out.size(1),
                                         -1).mean(dim=-1) - train_mean[name]
                    gaussian[name] = -0.5 * torch.mm(
                        torch.mm(zero_feat, train_precision[name]),
                        zero_feat.t()).diag()

                handle = layer.register_forward_hook(hook)

                start = time.time()
                for idx, data in enumerate(loader):
                    with torch.set_grad_enabled(True):
                        imgs = data[1].cuda().float()
                        imgs.requires_grad = True
                        _ = checkpoint['model'](imgs)

                        loss = gaussian[name].mean()
                        loss.backward()

                        gradient = torch.ge(imgs.grad.data, 0)
                        gradient = (gradient.float() - 0.5) * 2

                    with torch.set_grad_enabled(False):
                        noisy_imgs = torch.add(imgs.data,
                                               gradient,
                                               alpha=-magnitude)
                        _ = checkpoint['model'](noisy_imgs)
                        if name not in scores:
                            scores[name] = gaussian[name].detach().data
                        else:
                            scores[name] = torch.cat(
                                (scores[name], gaussian[name].detach().data),
                                dim=0)
                        print(scores[name].mean())

                    checkpoint['optimizer'].zero_grad()
                    speed = configs['batchsize'] * idx // (time.time() - start)
                    print(name,
                          'Iter:',
                          idx,
                          'Speed:',
                          int(speed),
                          'img/s',
                          end='\r',
                          flush=True)

                handle.remove()
                print()
            return scores

        print('get mahalanobis scores...')
        magnitudes = [0.0, 0.01, 0.005, 0.002, 0.0014, 0.001, 0.0005]
        maha_valid_scores = {}
        maha_out_scores = {}
        for magnitude in magnitudes:
            print('Noise:', magnitude)
            print('Data - Assumed negative class:', configs['data'])
            maha_valid_scores[magnitude] = get_mahalanobis_score(
                valid_loader, layer_names, magnitude)
            print('Data - Assumed positive class:', out_data)
            maha_out_scores[magnitude] = get_mahalanobis_score(
                out_loader, layer_names, magnitude)
            print()

        print('merge mahalanobis scores...')
Exemplo n.º 3
0
def main(name: str, start: int, plot: bool):

    if name == 'all':
        model_dirs = wsl_model_dir.glob('rsna*')
    else:
        if 'rsna' in name:
            model_dirs = wsl_model_dir.glob(f'*{name}*')
        else:
            model_dirs = wsl_model_dir.glob(f'rsna*{name}*')

    model_dirs = list(model_dirs)
    num_model_dirs = 50
    print(f'Number of potential model directory matches = {len(model_dirs)}, but doing top {num_model_dirs} models for now.')
    model_dirs = model_dirs[start:start + num_model_dirs]

    if plot:
        ncolors = 256
        color_array = plt.get_cmap('viridis')(range(ncolors))
        # change alpha values
        color_array[:, -1] = np.linspace(1.0, 0.0, ncolors)

    for m_idx, model_dir in enumerate(model_dirs):

        if 'debug' in str(model_dir):  # Debugging model
            print('Debugging model')
            continue

        elif not (model_dir / 'configs.json').exists():  # Model not completed
            print('Model not completed')
            continue

        else:
            with open(model_dir / 'configs.json') as f:
                configs = json.load(f)
            dataset = Loader(data=configs['data'],
                             split='test',
                             extension=configs['extension'],
                             classes=configs['classes'],
                             column=configs['column'],
                             variable_type=configs['variable_type'])
            print('Number of images -', len(dataset))

        print(f'Model {m_idx} : {model_dir}')

        if configs['data'] in known_tasks:
            task = known_tasks[configs['data']]

        checkpoint = torch.load(model_dir / 'best.pt', map_location='cuda:0' if torch.cuda.is_available() else 'cpu')
        checkpoint['model'].gradient = None
        checkpoint['model'].eval()

        # currently hardcoded, should ideally be inferred from image
        org_size = (1024, 1024)
        new_size = (224, 224)

        all_scores = defaultdict(list)

        GD = BackProp(checkpoint['model'])
        GBP = BackProp(checkpoint['model'], True)

        start_time = time.time()

        for idx, data in enumerate(dataset):
            checkpoint['model'].zero_grad()
            name, img, label = data
            label = label.squeeze().cuda()

            if label != 1:
                continue

            # Make the ground map
            if task == 'detect':
                ground_map = box_to_map(dataset.df[dataset.df.Id == name].box.to_list(), np.zeros(org_size))
            elif task == 'segment':
                ground_map = np.zeros(org_size)
                eps = dataset.df[dataset.df.Id == name].EncodedPixels.to_list()
                for ep in eps:
                    ground_map += rle2mask(ep, np.zeros(org_size)).T

            ground_map = cv2.resize(ground_map, new_size, interpolation=cv2.INTER_NEAREST).clip(0, 1)

            # Make the saliency map
            checkpoint['model'].get_map = True
            if configs['wildcat']:
                _, _, wild, handle = checkpoint['model'](img.unsqueeze(dim=0).cuda().float())
                handle.remove()
                wild = torch.max(wild, dim=1)[0]
                wild = wild.squeeze().cpu().data.numpy()
                wild = (wild - wild.min()) / (wild.max() - wild.min())
                wild = cv2.resize(wild, new_size, interpolation=cv2.INTER_NEAREST)
            else:
                wild = np.zeros_like(ground_map)

            gcam = GD.generate_cam(img.unsqueeze(dim=0).cuda().float()).squeeze()

            checkpoint['model'].get_map = False
            grad = GD.generate_gradients(img.unsqueeze(dim=0).cuda().float())
            ig = GD.generate_integrated_gradients(img.unsqueeze(dim=0).cuda().float(), 25)

            sg = GD.generate_smooth_grad(img.unsqueeze(dim=0).cuda().float(), 5, 0.1, 0)
            sig = GD.generate_smooth_grad(img.unsqueeze(dim=0).cuda().float(), 5, 0.1, 10)

            gbp = GBP.generate_gradients(img.unsqueeze(dim=0).cuda().float())
            ggcam = np.multiply(gcam, gbp)

            all_scores['WILD'].append(aupr(ground_map.flatten(), wild.flatten()))
            all_scores['GRAD'].append(aupr(ground_map.flatten(), grad.flatten()))
            all_scores['SG'].append(aupr(ground_map.flatten(), sg.flatten()))
            all_scores['IG'].append(aupr(ground_map.flatten(), ig.flatten()))
            all_scores['SIG'].append(aupr(ground_map.flatten(), sig.flatten()))
            all_scores['GBP'].append(aupr(ground_map.flatten(), gbp.flatten()))
            all_scores['GCAM'].append(aupr(ground_map.flatten(), gcam.flatten()))
            all_scores['GGCAM'].append(aupr(ground_map.flatten(), ggcam.flatten()))

            if plot:
                row, col = 2, 5
                map_names = [['XRAY', 'WILD', 'GRAD', 'GCAM', 'GGCAM'], ['MASK', 'GBP', 'SG', 'IG', 'SIG']]
                maps = [[img, wild, grad, gcam, ggcam], [ground_map, gbp, sg, ig, sig]]
                x = LinearSegmentedColormap.from_list(name='rainbow', colors=color_array)
                plt.register_cmap(cmap=x)

                fig, ax = plt.subplots(row, col, figsize=(18, 8))
                for i in range(row):
                    for j in range(col):
                        ax[i, j].imshow(np.transpose(img, (1, 2, 0)))
                        if not (i == 0 and j == 0):
                            ax[i, j].imshow(maps[i][j], alpha=0.8, cmap='rainbow')
                        ax[i, j].text(0, 220, map_names[i][j], fontsize='x-large', color='white', weight='bold', bbox=dict(fill=True, linewidth=0))
                        ax[i, j].axis('off')
                plt.subplots_adjust(wspace=0.05, hspace=0.05)
                plt.savefig(f'{wsl_plot_dir}/saliency_{name}.png', dpi=300, bbox_inches='tight')
                plt.show()
                plt.close()

            print_str = f'{idx}: | '
            for key, value in all_scores.items():
                print_str += f'{key}-{int(np.mean(value) * 100)} | '
            print_str += str(round((time.time() - start_time) / (idx + 1), 2)) + ' s/img'
            print(print_str, end='\r')

        for key in all_scores.keys():
            configs[key] = np.mean(all_scores[key])
            print(key, ' ', configs[key])

        with open(model_dir / 'configs.json', 'w') as fp:
            json.dump(configs, fp)
Exemplo n.º 4
0
def main(name: str, task: str):

    if name == 'all':
        models = wsl_model_dir.glob('*')
    else:
        models = wsl_model_dir.glob(f'*{name}*')

    models = list(models)
    models.reverse()
    print('Number of potential model matches =', len(models))
    all_configs = []

    for m, path in enumerate(models):

        if 'debug' in str(path):  # Debugging model
            print('Debugging model')
            continue
        elif 'wild' not in str(path):
            continue
        elif not (path / 'configs.json').exists():  # Model not completed
            print('Model not completed')
            continue
        else:
            with open(path / 'configs.json') as f:
                configs = json.load(f)
                print(configs)
            dataset = Loader(data=configs['data'],
                             split='valid',
                             extension=configs['extension'],
                             classes=configs['classes'],
                             column=configs['column'],
                             regression=configs['regression'])

        print(f'Model {m} : {path}')
        if configs['data'] in known_tasks:
            task = known_tasks[configs['data']]

        checkpoint = torch.load(
            path / 'best.pt',
            map_location='cuda:0' if torch.cuda.is_available() else 'cpu')
        checkpoint['model'] = checkpoint['model'].module
        checkpoint['model'].get_map = True
        checkpoint['model'].eval()

        org_size = (1024, 1024)
        new_size = (224, 224)
        sigmoid = torch.nn.Sigmoid().cuda()
        all_scores = []

        with torch.set_grad_enabled(False):
            for idx, data in enumerate(dataset):
                img, label = data
                name = dataset.names[idx]
                labels = dataset.labels[idx]

                predicted_map = checkpoint['model'](
                    img.unsqueeze(dim=0).cuda().float()).squeeze(dim=0)
                predicted_map = sigmoid(
                    predicted_map.sum(dim=0)).cpu().data.numpy()

                score = []
                for i, label in enumerate(labels):
                    if label == 0:
                        continue

                    if task == 'detect':
                        ground_map = box_to_map(
                            dataset.df[dataset.df.Id == name].box.to_list(),
                            np.zeros(org_size))
                    elif task == 'segment':
                        ground_map = np.zeros(org_size)
                        eps = dataset.df[dataset.df.Id ==
                                         name].EncodedPixels.to_list()
                        for ep in eps:
                            ground_map += rle2mask(ep, np.zeros(org_size)).T
                    else:
                        print('Ground truth not available.')

                    # plt.imshow(ground_map)
                    ground_map = cv2.resize(
                        ground_map, new_size,
                        interpolation=cv2.INTER_NEAREST).clip(0, 1)
                    re_pred_map = cv2.resize(predicted_map[i],
                                             new_size,
                                             interpolation=cv2.INTER_AREA)
                    score.append(
                        roc_auc_score(ground_map.flatten(),
                                      re_pred_map.flatten()))

                all_scores += score
                if (len(all_scores) + 1) % 32 == 0:
                    print('Idx:', idx, 'Mean:', np.mean(all_scores), end='\r')

        configs['wild_metric'] = np.mean(all_scores)
        print('Wild Metric:', configs['wild_metric'])
        with open(path / 'configs.json', 'w') as fp:
            json.dump(configs, fp)
Exemplo n.º 5
0
def main(name: str, start: int, plot: bool):

    if name == 'all':
        model_dirs = wsl_model_dir.glob('*')
    else:
        model_dirs = wsl_model_dir.glob(f'*{name}*')

    model_dirs = list(model_dirs)
    model_dirs = model_dirs[start:]

    print('Number of potential model directory matches =', len(model_dirs))

    if plot:
        ncolors = 256
        color_array = plt.get_cmap('viridis')(range(ncolors))
        # change alpha values
        color_array[:, -1] = np.linspace(1.0, 0.0, ncolors)

    for m_idx, model_dir in enumerate(model_dirs):

        if 'debug' in str(model_dir):  # Debugging model
            print('Debugging model')
            continue

        elif not (model_dir / 'configs.json').exists():  # Model not completed
            print('Model not completed')
            continue

        else:
            with open(model_dir / 'configs.json') as f:
                configs = json.load(f)
            dataset = Loader(data=configs['data'],
                             split='test',
                             extension=configs['extension'],
                             classes=configs['classes'],
                             column=configs['column'],
                             regression=configs['regression'])
            print('Number of images -', len(dataset))

        print(f'Model {m_idx} : {model_dir}')

        if configs['data'] in known_tasks:
            task = known_tasks[configs['data']]

        checkpoint = torch.load(
            model_dir / 'best.pt',
            map_location='cuda:0' if torch.cuda.is_available() else 'cpu')
        checkpoint['model'] = checkpoint['model'].module
        checkpoint['model'].gradient = None
        checkpoint['model'].eval()

        org_size = (1024, 1024)
        new_size = (224, 224)

        all_scores = defaultdict(list)

        GD = BackProp(checkpoint['model'])
        GBP = BackProp(checkpoint['model'], True)

        start_time = time.time()

        for idx, data in enumerate(dataset):
            print(
                f'{idx} speed-{(time.time() - start_time) // (idx + 1)} s/img',
                end='\r')
            checkpoint['model'].zero_grad()
            name, img, label = data
            label = label.squeeze().cuda()

            if label != 1:
                continue

            # Make the ground map
            if task == 'detect':
                ground_map = box_to_map(
                    dataset.df[dataset.df.Id == name].box.to_list(),
                    np.zeros(org_size))
            elif task == 'segment':
                ground_map = np.zeros(org_size)
                eps = dataset.df[dataset.df.Id == name].EncodedPixels.to_list()
                for ep in eps:
                    ground_map += rle2mask(ep, np.zeros(org_size)).T

            ground_map = cv2.resize(ground_map,
                                    new_size,
                                    interpolation=cv2.INTER_NEAREST).clip(
                                        0, 1)

            # Make the saliency map
            if configs['wildcat']:
                checkpoint['model'].get_map = True
                with torch.set_grad_enabled(False):
                    wild, _, handle = checkpoint['model'](
                        img.unsqueeze(dim=0).cuda().float())
                    handle.remove_hook()
                    wild = wild.squeeze().cpu().data.numpy()
                wild = (wild - wild.min()) / (wild.max() - wild.min())
                wild = cv2.resize(wild,
                                  new_size,
                                  interpolation=cv2.INTER_NEAREST)

                all_scores['WILD'].append(
                    aupr(ground_map.flatten(), wild.flatten()))

                if plot:
                    plt.figure(figsize=(4, 12))
                    x = LinearSegmentedColormap.from_list(name='rainbow',
                                                          colors=color_array)
                    plt.register_cmap(cmap=x)
                    plt.subplot(1, 3, 1)
                    plt.imshow(np.transpose(img, (1, 2, 0)))
                    plt.subplot(1, 3, 2)
                    plt.imshow(ground_map, alpha=0.8, cmap='rainbow')
                    plt.subplot(1, 3, 2)
                    plt.imshow(wild, alpha=0.8, cmap='rainbow')
                    plt.savefig(f'{wsl_plot_dir}/wild_{name}.png',
                                dpi=300,
                                bbox_inches='tight')
                    plt.show()
                    plt.close()

            else:
                checkpoint['model'].get_map = False
                grad = GD.generate_gradients(
                    img.unsqueeze(dim=0).cuda().float())
                ig = GD.generate_integrated_gradients(
                    img.unsqueeze(dim=0).cuda().float(), 100)

                sg = GD.generate_smooth_grad(
                    img.unsqueeze(dim=0).cuda().float(), 5, 0.3, 0)
                sig = GD.generate_smooth_grad(
                    img.unsqueeze(dim=0).cuda().float(), 5, 0.3, 0)

                gbp = GBP.generate_gradients(
                    img.unsqueeze(dim=0).cuda().float())

                checkpoint['model'].get_map = True
                gcam = GD.generate_cam(
                    img.unsqueeze(dim=0).cuda().float()).squeeze()
                ggcam = np.multiply(gcam, gbp)

                all_scores['GRAD'].append(
                    aupr(ground_map.flatten(), grad.flatten()))
                all_scores['SG'].append(
                    aupr(ground_map.flatten(), sg.flatten()))
                all_scores['IG'].append(
                    aupr(ground_map.flatten(), ig.flatten()))
                all_scores['SIG'].append(
                    aupr(ground_map.flatten(), sig.flatten()))
                all_scores['GBP'].append(
                    aupr(ground_map.flatten(), gbp.flatten()))
                all_scores['GCAM'].append(
                    aupr(ground_map.flatten(), gcam.flatten()))
                all_scores['GGCAM'].append(
                    aupr(ground_map.flatten(), ggcam.flatten()))

                if plot:
                    row, col = range(2), range(4)
                    map_names = [['MASK', 'GRAD', 'SG', 'IG'],
                                 ['SIG', 'GCAM', 'GBP', 'GGCAM']]
                    maps = [[ground_map, grad, sg, ig],
                            [sig, gcam, gbp, ggcam]]
                    x = LinearSegmentedColormap.from_list(name='rainbow',
                                                          colors=color_array)
                    plt.register_cmap(cmap=x)

                    fig, ax = plt.subplots(2, 4, figsize=(18, 8))
                    for i in row:
                        for j in col:
                            ax[i, j].imshow(np.transpose(img, (1, 2, 0)))
                            ax[i, j].imshow(maps[i][j],
                                            alpha=0.8,
                                            cmap='rainbow')
                            ax[i, j].text(0,
                                          220,
                                          map_names[i][j],
                                          fontsize='x-large',
                                          color='white',
                                          weight='bold',
                                          bbox=dict(fill=True, linewidth=0))
                            ax[i, j].axis('off')
                    plt.subplots_adjust(wspace=0.05, hspace=0.05)
                    plt.savefig(f'{wsl_plot_dir}/saliency_{name}.png',
                                dpi=300,
                                bbox_inches='tight')
                    plt.show()
                    plt.close()

            del data

        for key in all_scores.keys():
            configs[key] = np.mean(all_scores[key])
            print(key, ' ', configs[key])

        with open(path / 'configs.json', 'w') as fp:
            json.dump(configs, fp)
Exemplo n.º 6
0
Arquivo: ood.py Projeto: Sharut/wsl
def main(model: str = 'cancer_dmist2_cancer_lr0.0001_bs8_adam_densenet121_romanticization',
         data: str = 'cancer_dmist4', debug: bool = False):

    path = wsl_model_dir / model
    print(f'Model: {path}')
    assert path.exists()

    if (path / 'configs.json').exists():  # Model not completed
        with open(path / 'configs.json') as f:
            configs = json.load(f)
            # print(configs)
    else:
        print('Incomplete model')
        return

    # ------------------------------------------------------
    train_dataset = Loader(data=configs['data'],
                           split='train',
                           extension=configs['extension'],
                           classes=configs['classes'],
                           column=configs['column'],
                           variable_type=configs['variable_type'],
                           debug=debug)
    train_loader = DataLoader(  # type: ignore
        train_dataset, batch_size=configs['batchsize'], num_workers=4,
        pin_memory=True, shuffle=True)

    valid_dataset = Loader(data=configs['data'],
                           split='valid',
                           extension=configs['extension'],
                           classes=configs['classes'],
                           column=configs['column'],
                           variable_type=configs['variable_type'],
                           debug=debug)
    valid_loader = DataLoader(  # type: ignore
        valid_dataset, batch_size=configs['batchsize'], num_workers=4,
        pin_memory=True, shuffle=True)

    out_dataset = Loader(data=data,
                         split='valid',
                         extension=known_extensions[data],
                         classes=configs['classes'],
                         column=configs['column'],
                         variable_type=configs['variable_type'],
                         debug=debug)
    out_loader = DataLoader(  # type: ignore
        out_dataset, batch_size=configs['batchsize'], num_workers=4,
        pin_memory=True, shuffle=True)

    print('Length of datasets: In', len(valid_dataset), ' Out', len(out_dataset))

    checkpoint = torch.load(path / 'best.pt', map_location='cuda:0' if torch.cuda.is_available() else 'cpu')
    checkpoint['model'] = checkpoint['model'].module
    checkpoint['model'].network = configs['network']
    checkpoint['model'].get_map = False
    checkpoint['model'].eval()
    # sigmoid = torch.nn.Sigmoid()
    group_lasso = EmpiricalCovariance(assume_centered=False)
    layer_names = {}

    # ------------------------------------------------------
    def get_mean_precision(loader):

        print('building hook function...')
        features = {}

        def hook(layer, inp, out):
            name = layer_names[layer]
            if name not in features:
                features[name] = out.detach().data.view(out.size(0), out.size(1), -1).mean(dim=-1)
            else:
                features[name] = torch.cat((features[name], out.detach().data.view(out.size(0), out.size(1), -1).mean(dim=-1)), dim=0)
        handles = checkpoint['model'].register_forward_hooks(checkpoint['model'], hook, layer_names)

        start = time.time()
        with torch.set_grad_enabled(False):
            for idx, data in enumerate(loader):
                imgs = data[1].cuda().float()
                _ = checkpoint['model'](imgs)
                speed = configs['batchsize'] * idx // (time.time() - start)
                print('Iter:', idx, 'Speed:', int(speed), 'img/s', end='\r', flush=True)
        print('Total time:', time.time() - start, 'secs')

        print('calculating sample mean...')
        mean = {}
        precision = {}
        for key, value in features.items():
            mean[key] = value.mean(dim=0)
            features[key] -= mean[key]
            group_lasso.fit(features[key].cpu().numpy())
            precision[key] = torch.from_numpy(group_lasso.precision_).float().cuda()

        for handle in handles:
            handle.remove()
        return mean, precision

    train_mean, train_precision = get_mean_precision(train_loader)

    # ------------------------------------------------------
    def get_mahalanobis_score(loader: Any, features: Any, magnitude: float):

        scores = {}
        gaussian = {}
        for layer, name in layer_names.items():
            if 'pool' in name or 'relu' in name or 'bn' in name:
                continue
            checkpoint['optimizer'].zero_grad()

            def hook(layer, inp, out):
                zero_feat = out.view(out.size(0), out.size(1), -1).mean(dim=-1) - train_mean[name]
                gaussian[name] = -0.5 * torch.mm(torch.mm(zero_feat, train_precision[name]), zero_feat.t()).diag()

            handle = layer.register_forward_hook(hook)

            start = time.time()
            for idx, data in enumerate(loader):
                with torch.set_grad_enabled(True):
                    imgs = data[1].cuda().float()
                    imgs.requires_grad = True
                    _ = checkpoint['model'](imgs)

                    loss = gaussian[name].mean()
                    loss.backward()

                    gradient = torch.ge(imgs.grad.data, 0)
                    gradient = (gradient.float() - 0.5) * 2

                with torch.set_grad_enabled(False):
                    noisy_imgs = torch.add(imgs.data, gradient, alpha=-magnitude)
                    _ = checkpoint['model'](noisy_imgs)
                    if name not in scores:
                        scores[name] = gaussian[name].detach().data
                    else:
                        scores[name] = torch.cat((scores[name], gaussian[name].detach().data), dim=0)

                checkpoint['optimizer'].zero_grad()
                speed = configs['batchsize'] * idx // (time.time() - start)
                print(name, 'Iter:', idx, 'Speed:', int(speed), 'img/s', end='\r', flush=True)

            scores[name] = scores[name].cpu().numpy()
            handle.remove()
        return scores

    def dict_to_numpy(scores):
        scores_list = []
        for value in scores.values():
            scores_list.append(value.tolist())
        scores = np.stack(scores_list)
        return scores.T

    print('get mahalanobis scores...')
    magnitudes = [0.0, 0.001, 0.005, 0.01, 0.05, 0.1]
    for magnitude in magnitudes:
        print('Noise:', magnitude)
        print('Data - Assumed negative class:', configs['data'])
        in_scores = get_mahalanobis_score(valid_loader, layer_names, magnitude)
        in_scores = dict_to_numpy(in_scores)
        print('Data - Assumed positive class:', data)
        out_scores = get_mahalanobis_score(out_loader, layer_names, magnitude)
        out_scores = dict_to_numpy(out_scores)

        X = np.concatenate((in_scores, out_scores), axis=0)
        Y = np.asarray([0] * len(in_scores) + [1] * len(out_scores))
        print(X.shape, Y.shape)
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=1)

        lr = LogisticRegressionCV(n_jobs=-1).fit(X_train, Y_train)
        Y_pred = lr.predict_proba(X_test)[:, 1]
        performance = roc_auc_score(Y_test, Y_pred)
        print('Performance:', performance)
        print()
Exemplo n.º 7
0
def main(name: str,
		 task: str,
		 store: bool):

	if name == 'all':
		models = wsl_model_dir.glob('*')
	else:
		models = wsl_model_dir.glob(f'*{name}*')
	models = list(models)
	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 

	print('Number of potential model matches =', len(models))
	all_configs = []
	for m, path in enumerate(models):

		if 'debug' in str(path):  # Debugging models
			print('Debugging model')
			continue
		elif 'vgg19' not in str(path):  # Model is not wildcat
			print('Model is not densenet')
			continue
		elif 'wildcat' in str(path):  # Model is not wildcat
			print('Model is wildcat')
			continue
		elif not (path / 'configs.json').exists():  # Model not completed
			print('Model not completed')
			continue
		else:
			with open(path / 'configs.json') as f:
				configs = json.load(f)
				print(configs)
				# if configs['pretrained'] == False:
				# 	continue
			dataset = Loader(data=configs['data'],
							 split='test',
							 extension=configs['extension'],
							 classes=configs['classes'],
							 column=configs['column'],
							 regression=configs['regression'])

		# print(f'Model {m} : {path}')
		try:
			checkpoint = torch.load(path / 'best.pt', map_location='cuda:0' if torch.cuda.is_available() else 'cpu')
		except:
			continue
		# checkpoint = torch.load(path / 'best.pt', map_location='cpu')
		checkpoint['model'] = checkpoint['model'].module
		checkpoint['model'].get_map = True
		checkpoint['model'].eval()
		# summary(checkpoint['model'],(3,224,224))
		org_size = (1024, 1024)
		new_size = (224, 224)
		sigmoid = torch.nn.Sigmoid().cuda()
		all_scores = defaultdict(list)

		VBP = vanilla_backprop.VanillaBackprop(checkpoint['model'])
		IG = integrated_gradients.IntegratedGradients(checkpoint['model'])
		GBP = guided_backprop.GuidedBackprop(checkpoint['model'])
		GCAM = gradcam.GradCam(checkpoint['model'],target_layer=34)
		# with torch.set_grad_enabled(False):
		print(len(dataset))
		for idx, data in tqdm(enumerate(dataset)):
			img, label = data
			name = dataset.names[idx]
			labels = dataset.labels[idx]

			saliency_label = 1

			for i, label in enumerate(labels):
				if label == 0:
					saliency_label = 0
					break

			if saliency_label == 0:
				continue
			# saliency_label = torch.tensor(saliency_label)
			saliency_label = torch.tensor(saliency_label).to(device)

			vanilla_grads = VBP.generate_gradients(img.unsqueeze(dim=0).cuda().float(), saliency_label)
			grayscale_vanilla_grads = vanilla_backprop.convert_to_grayscale(vanilla_grads)
			# print(np.shape(grayscale_vanilla_grads))
			# vanilla_backprop.save_gradient_images(grayscale_vanilla_grads, '/data/2015P002510/nishanth/WSL/wsl/wsl/Example_maps/GRAD')
			integrated_grads = IG.generate_integrated_gradients(img.unsqueeze(dim=0).cuda().float(), saliency_label, 100)
			grayscale_integrated_grads = integrated_gradients.convert_to_grayscale(integrated_grads)
			# vanilla_backprop.save_gradient_images(grayscale_integrated_grads, '/data/2015P002510/nishanth/WSL/wsl/wsl/Example_maps/IG')
			guided_grads = GBP.generate_gradients(img.unsqueeze(dim=0).cuda().float(), saliency_label)
			grayscale_guided_grads = guided_backprop.convert_to_grayscale(guided_grads)
			# vanilla_backprop.save_gradient_images(grayscale_guided_grads, '/data/2015P002510/nishanth/WSL/wsl/wsl/Example_maps/GBP')
			smooth_grad_mask = smooth_grad.generate_smooth_grad(VBP, img.unsqueeze(dim=0).cuda().float(), saliency_label, 5, 0.3)
			grayscale_smooth_grad = smooth_grad.convert_to_grayscale(smooth_grad_mask)

			smooth_grad_mask = smooth_grad.generate_smooth_grad(IG, img.unsqueeze(dim=0).cuda().float(), saliency_label, 5, 0.3)
			grayscale_smooth_ig = smooth_grad.convert_to_grayscale(smooth_grad_mask)
			cam = GCAM.generate_cam(img.unsqueeze(dim=0).cuda().float(), saliency_label)
			# grayscale_cam = guided_backprop.convert_to_grayscale(cam)

			cam_gb = guided_gradcam.guided_grad_cam(cam, guided_grads)
			grayscale_cam_gb = guided_gradcam.convert_to_grayscale(cam_gb)
			# vanilla_backprop.save_gradient_images(cam, '/data/2015P002510/nishanth/WSL/wsl/wsl/Example_maps/GCAM')

			# # Save mask2
			# save_class_activation_images(img, cam, '/data/2015P002510/nishanth/WSL/wsl/wsl/Example_maps/GCAM_color')
			# score = []
			# np.save('/data/2015P002510/nishanth/WSL/wsl/wsl/AUPRC_scores/{}_{}.npy'.format('GRAD','resnet18'),np.zeros((2,2))) #test

			for i, label in enumerate(labels):
				if label == 0:
					continue

				if task == 'detect':
					ground_map = box_to_map(dataset.df[dataset.df.Id == name].box.to_list(),
											np.zeros(org_size))
					ground_map = cv2.resize(ground_map, new_size, interpolation=cv2.INTER_NEAREST).clip(0, 1)

					all_scores['GRAD'].append(aupr(grayscale_vanilla_grads,ground_map))
					all_scores['SG'].append(aupr(grayscale_smooth_grad,ground_map))
					all_scores['IG'].append(aupr(grayscale_integrated_grads,ground_map))
					all_scores['SIG'].append(aupr(grayscale_smooth_ig,ground_map))
					all_scores['GBP'].append(aupr(grayscale_guided_grads,ground_map))
					all_scores['GCAM'].append(aupr(cam,ground_map))
					all_scores['GGCAM'].append(aupr(grayscale_cam_gb,ground_map))
					# all_scores['GRAD'].append(aupr(cv2.resize(grayscale_vanilla_grads, new_size, interpolation=cv2.INTER_AREA),ground_map))
				
				elif task == 'segment':
					ground_map = np.zeros(org_size)
					eps = dataset.df[dataset.df.Id == name].EncodedPixels.to_list()
					for ep in eps:
						ground_map += rle2mask(ep, np.zeros(org_size)).T
				else:
					print('Ground truth not available.')

		for key in all_scores.keys():
			print(key, ' ', np.mean(all_scores[key]))
			np.save('/data/2015P002510/nishanth/WSL/wsl/wsl/AUPRC_scores/{}_{}.npy'.format(key,'vgg_test'),all_scores[key])


		configs['wild'] = np.mean(all_scores)
		
		all_configs.append(configs)

	df = pd.DataFrame.from_dict(all_configs)
	print(df)
	time = datetime.datetime.now().strftime('%H_%d_%m')
	if store:
		df.to_csv(wsl_summary_dir / f'wild_{time}')