示例#1
0
        current_testset = model.training_parameters['set']
        if testset and current_testset != testset:
            continue
        else:
            testset = current_testset

        if n_by_rep[rep] > max_per_rep:
            continue

        if args.when == 'min-loss':
            epoch = model.training_parameters.get('early-min-loss', 'last')

        if args.when == 'last' or epoch == 'last':
            epoch = max(model.testing)

        recorders = LossRecorder.loadall(os.path.join(mdir, 'samples', '{:04d}'.format(epoch)),
                                         map_location=args.device)
        current_y_true = recorders[testset]._tensors['y_true']

        if y_true is not None and (y_true != current_y_true).any():
            logging.debug('{} has a diffrent shuffle, can not use!'.format(name))
            continue
        else:
            y_true = current_y_true

        sets = [*recorders.keys()]

        # exclude rotated set
        oodsets = [_ for _ in sets if (not _.startswith(testset) and _ not in args.sets_to_exclude)]
        # sets = [kept_testset, 'lsunr']  # + sets
        sets = [testset] + oodsets
示例#2
0
    testset = model.training_parameters['set']
    allsets = [testset]
    allsets.extend(get_same_size_by_name(testset))

    transformer = model.training_parameters['transformer']

    for s in allsets:
        logging.info('Working on {}'.format(s))

        _, dset = get_dataset(s, transformer=transformer, splits=['test'])

        dataloader = torch.utils.data.DataLoader(dset,
                                                 batch_size=args.batch_size,
                                                 shuffle=False)

        recorder = LossRecorder(args.batch_size)

        t0 = time.time()

        n = min(args.num_batch, len(dataloader))

        samples = {'x_': [], 'x': [], 'y': [], 'losses': []}

        for i, (x, y) in enumerate(dataloader):

            if i >= args.num_batch:
                break
            if i:
                ti = time.time()
                t_per_i = (ti - t0) / i
                eta = (n - i) * t_per_i
示例#3
0
    if net.type == 'cvae':
        a.soft = 'iws'
    elif net.type == 'vib':
        a.hard = 'odin*'
    else:
        logging.error('Type %s of model not supported', net.type)
        sys.exit(1)

if a.elbo:
    a.soft = 'total'

if a.baseline:
    a.soft = 'logits'
    a.hard = None

recorders = LossRecorder.loadall(dir_path, testset, *a.oodsets, device='cpu')

oodsets = [s for s in a.oodsets if s in recorders]

losses = recorders[testset]._tensors

# for k in losses:
#     print(k, *losses[k].shape)

for s in [testset] + oodsets:

    losses = recorders[s]._tensors

    sign_for_ood = 1
    sign_for_mis = 1
            '$ rsync -avP --files-from=/tmp/files remote:dir/joint-vae .')

        sys.exit()

    fprs = []
    confs = []

    for mdir in loaded_files:

        model = M.load(mdir, load_net=False)

        print('__', model.job_number, rmodels[mdir]['ind'], ':', *rmodels[mdir]
              ['oods'])  # , model.training_parameters.get('early-min-loss'))

        record_dir = os.path.join(mdir, 'samples', 'last')
        recorders = LossRecorder.loadall(record_dir)

        is_testset = True

        classes_ = get_classes_by_name(rmodels[mdir]['ind'])  # + ['OOD']
        confusion_matrix = {}

        print(' ', f'{" ":{str_col_width}}',
              ' '.join(f'{_:{str_col_width}}' for _ in classes_))

        thresholds = (-np.inf, np.inf)
        for dset in [rmodels[mdir]['ind']] + rmodels[mdir]['oods']:

            rec = recorders[dset]
            iws = rec._tensors['iws'].max(axis=0)[0].cpu()
            if is_testset:
示例#5
0
def do_what_you_gotta_do(dir_name,
                         result_dir,
                         n_images=10,
                         png=True,
                         tex=['means'],
                         prec=2,
                         tpr=0.95):

    try:
        model = IteratedModels.load(dir_name, load_state=False)
    except FileNotFoundError:
        log.error('{} not a model'.format(dir_name))
        return

    testset = model.training_parameters['set']
    allsets = [testset]
    allsets.extend(get_same_size_by_name(testset))

    recorders = LossRecorder.loadall(dir_name, map_location='cpu')
    samples_files = LossRecorder.loadall(dir_name,
                                         file_name='sample-{w}.pth',
                                         output='path',
                                         map_location='cpu')
    samples = {_: torch.load(samples_files[_]) for _ in samples_files}

    dset = model.training_parameters['set']
    oodsets = list(recorders)
    oodsets.remove(dset)

    sets = [dset] + oodsets

    samples_idx = {}
    samples_i = {}
    y_pred_ = {}

    output = {}

    k_with_y = {_: _ for _ in ('kl', 'zdist', 'iws', 'loss')}
    k_with_y_moving = {_ + '_': _ for _ in k_with_y}

    k_without_y = {'mse': 'mse'}

    k_all = dict(**k_without_y, **k_with_y, **k_with_y_moving)

    signs = {_: 1 for _ in k_all}
    signs['iws'] = -1
    signs['iws_'] = -1

    def which_y(t, k, dim=0):

        s = signs[k]
        return s * (s * t).min(dim=dim)[0]

    pr = {}

    disagreement = {}

    for s in sets:

        log.debug('Working on %s', s)

        rec = recorders[s]
        t = rec._tensors
        kl = t['kl']

        i_mse = [0]
        for j in range(1, len(model)):
            i_mse.append(i_mse[-1] + j)

        beta = np.prod(get_shape_by_name(s)[0]) / 1e-3
        t['loss'] = t['kl'] + beta * t['mse'][i_mse].unsqueeze(-2)

        y_pred = kl.argmin(1)
        y_pred_[s] = y_pred

        disagreement[s] = torch.zeros_like(y_pred[0])

        for i in range(len(disagreement[s])):
            disagreement[s][i] = len(y_pred[:, i].unique())

        if s == dset:
            y_true = t['y_true']

            for i in range(y_pred.shape[0]):
                print('Acc of step {}: {:.2%}'.format(
                    i, (y_true == y_pred[i]).float().mean()))

            i_true = y_true == y_pred[0]

            t_y = {}
            thr = {}
            pr['correct'] = {}
            pr['incorrect'] = {}

            for k in k_all:

                thr[k] = {}
                if k in k_with_y:
                    index_y = torch.ones_like(t[k_all[k]],
                                              dtype=int) * y_pred[0]
                    t_y[k] = t[k_all[k]].gather(1, index_y)[:, 0]

                elif k in k_with_y_moving:

                    t_y[k] = which_y(t[k_all[k]], k, dim=1)

                else:
                    t_y[k] = t[k]

                t_y[k] *= signs[k]

                i_tpr = int(len(y_true) * tpr)
                thr[k] = t_y[k].sort()[0][..., i_tpr]

                for w in ('correct', 'incorrect'):
                    pr[w][k] = torch.zeros(len(model))

                for i, w in zip((i_true, ~i_true), ['correct', 'incorrect']):
                    for m in range(len(model)):
                        mean = t_y[k][m][i].mean()
                        pr[w][k][m] = (t_y[k][m][i] <=
                                       thr[k][m]).sum() / i.sum()
                        print('*** {} {} {} {:.1%} {:.3e}'.format(
                            w, k, m, pr[w][k][m], mean))

        else:
            i_true = torch.ones_like(y_pred[0], dtype=bool)
            pr[s] = {}

            for k in k_all:

                if k in k_with_y:
                    index_y = torch.ones_like(t[k_all[k]],
                                              dtype=int) * y_pred[0]
                    t_y[k] = t[k_all[k]].gather(1, index_y)[:, 0]

                elif k in k_with_y_moving:
                    t_y[k] = which_y(t[k_all[k]], k, dim=1)

                else:
                    t_y[k] = t[k]

                t_y[k] *= signs[k]

                pr[s][k] = torch.zeros(len(model))
                for m in range(len(model)):
                    pr[s][k][m] = (t_y[k][m] <= thr[k][m]).sum() / len(
                        y_pred[0])

        w = (True, False) if s == dset else (True, )

        title = classif_titles if s == dset else {True: s}

        i_ = {'all': i_true + True, True: i_true, False: ~i_true}

        for _ in w:
            disagreement[title[_]] = disagreement[s][i_[_]]

        print(s)

        batch_size = recorders[s].batch_size
        num_batch = len(recorders[s])
        len_samples = len(samples[s]['y'])
        samples_per_batch = len_samples // num_batch

        samples_idx[s] = torch.tensor(
            [_ % batch_size < samples_per_batch for _ in range(len(i_true))])

        samples_i[s] = {
            True: i_true[samples_idx[s]],
            False: ~i_true[samples_idx[s]]
        }

        y_pred = y_pred_[s]
        x = {
            _: samples[s]['x'][samples_i[s][_]][:n_images]
            for _ in (True, False)
        }
        x_ = {
            _: samples[s]['x_'][:, 0, samples_i[s][_]][:, :n_images]
            for _ in (True, False)
        }

        y_ = {
            _: y_pred[:, samples_idx[s]][:, samples_i[s][_]][:, :n_images]
            for _ in (True, False)
        }

        y = {
            _: samples[s]['y'][samples_i[s][_]][:n_images]
            for _ in (True, False)
        }

        if s != dset:
            pass
            # y = {_: -1 * torch.ones_like(y[_]) for _ in y}

        w = (True, False) if s == dset else (True, )

        for _ in w:
            x[_] = torch.cat([x[_].unsqueeze(0), x_[_]])
            y[_] = torch.cat([y[_].unsqueeze(0), y_[_]])

        classes = {
            _: get_classes_by_name(s) if not _ else get_classes_by_name(dset)
            for _ in range(len(model) + 1)
        }

        for k in k_with_y_moving:
            t[k] = which_y(t[k_all[k]], k, dim=1)

        for k in k_with_y:
            index = torch.ones_like(t[k_all[k]], dtype=int) * y_pred[0]
            t[k] = t[k_all[k]].gather(1, index)[:, 0, ]

        averaged = {
            title[_]: {k: t[k][..., i_[_]].mean(-1)
                       for k in k_all}
            for _ in title
        }

        for _ in title:
            output[title[_]] = averaged[title[_]]
            output[title[_]].update({'x': x[_], 'y': y[_], 'c': classes})
            output[title[_]]['disagree'] = torch.zeros(len(model))
            for m in range(len(model)):
                output[title[_]]['disagree'][m] = (
                    disagreement[title[_]] == m + 1).float().mean()

        for _ in title:
            mse = {}
            n = 0
            for i in range(len(model) + 1):
                for j in range(i):
                    mse[(j, i)] = output[title[_]]['mse'][n]
                    n += 1

            output[title[_]]['mse'] = mse

        if png:
            with open(os.path.join(result_dir, 'arch.tex'), 'w') as f:
                f.write('\\def\\niter{{{}}}\n'.format(len(model)))
                f.write('\\def\\trainset{{{}}}\n'.format(dset))
                _sets = ','.join(oodsets)
                f.write('\\def\\oodsets{{{}}}\n'.format(_sets))
                _sets = ','.join(
                    [classif_titles[True], classif_titles[False], *oodsets])
                f.write('\\def\\allsets{{{}}}\n'.format(_sets))

            for _ in w:
                image_dir = os.path.join(result_dir, 'samples', title[_])
                if not os.path.exists(image_dir):
                    os.makedirs(image_dir)
                for i in range(n_images):
                    tex_file = os.path.join(image_dir,
                                            'image_{}.tex'.format(i))
                    with open(tex_file, 'w') as f:

                        for k in range(len(model) + 1):

                            image = x[_][k][i]
                            image_name = 'x_{}_{}.png'.format(i, k)
                            save_image(image,
                                       os.path.join(image_dir, image_name))
                        f.write(r'\def\yin{{{}}}'.format(
                            classes[0][y[_][0][i]]).replace('_', '-'))
                        f.write(r'\def\yout{')
                        out_classes = [
                            classes[k][y[_][k][i]]
                            for k in range(1,
                                           len(model) + 1)
                        ]
                        f.write('\"' +
                                '\",\"'.join(out_classes[k].replace('_', '-')
                                             for k in range(len(model))))
                        f.write('\"}\n')
                        f.write(r'\def\n{{{}}}'.format(len(model)))
                        f.write('\n')
                        f.write(r'\def\rotatedlabel{}'.format(
                            '{90}' if s.endswith('90') else '{0}'))
                        f.write('\n')

    if tex:
        """ MSE tex file """
        # tab_width = len(model) * 3
        first_row = True

        max_mse = max([max(output[_]['mse'].values()) for _ in output])
        min_mse = min([min(output[_]['mse'].values()) for _ in output])

        min_mse_exp = np.floor(np.log10(min_mse))
        mse_factor = int(np.ceil(-min_mse_exp / 3) * 3 - 3)
        max_mse_exp = int(np.floor(np.log10(max_mse)))

        swidth = mse_factor + max_mse_exp + 1
        stable_mse = 's{}.3'.format(swidth)
        stable_pr = 's2.1'

        col_format = [
            stable_mse, stable_pr
        ] + [stable_mse, stable_pr, stable_mse] * (len(model) - 1)

        tab = TexTab('l', *col_format, float_format='{:.1f}')

        for m in range(len(model)):
            c = 3 * m + 1 + (m == 0)
            tab.add_col_sep(c, ' (')
            tab.add_col_sep(c + 1, '\\%) ')

        for _ in output:
            header = _.capitalize() if _.endswith('correct') else tex_command(
                'makecommand', _)
            tab.append_cell(header, row='header' + _)
            subheader = tex_command('acron', 'mse') + ' avec'
            tab.append_cell(subheader, row='subheader' + _)
            tab.append_cell('', row=_)

            for j in range(1, len(model) + 1):
                tab.append_cell('Out {}'.format(j),
                                width=2 + (j > 1),
                                multicol_format='c',
                                row='header' + _)
                tab.append_cell('In',
                                width=2,
                                multicol_format='c',
                                row='subheader' + _)
                if j > 1:
                    tab.append_cell('Out {}'.format(j - 1),
                                    multicol_format='c',
                                    row='subheader' + _)
                tab.append_cell(output[_]['mse'][(0, j)] * 10**mse_factor,
                                row=_,
                                formatter='{:.3f}')
                tab.append_cell(100 * pr[_]['mse'][j - 1],
                                row=_,
                                formatter='{:.1f}')
                if j > 1:
                    tab.append_cell(output[_]['mse'][(j - 1, j)] *
                                    10**mse_factor,
                                    row=_,
                                    formatter='{:.3f}')

            if first_row:
                first_row = False
            else:
                tab.add_midrule('header' + _)

            for j in range(1, len(model) + 1):
                start = 1 if j == 1 else 3 * j - 3
                tab.add_midrule(_, start=start, end=start + 1 + (j > 1))

        with open(os.path.join(result_dir, 'mse.tex'), 'w') as f:
            tab.render(f)
            f.write('\\def\\msefactor{{{}}}'.format(
                '{:1.0f}'.format(-mse_factor)))
        """ ZDisT / KL tex file """
        for k in [*k_with_y, *k_with_y_moving]:

            first_row = True

            max_k = max([max(output[_][k].abs()) for _ in output])
            min_k = min([min(output[_][k].abs()) for _ in output])
            min_k_exp = np.floor(np.log10(min_k))

            # print('*** MIN / MAX {} = {:.2e} ({}) / {:.2e}'.format(k, min_k, min_k_exp, max_k))

            if min_k_exp <= -3:
                min_k_exp -= 3

            k_factor = int(np.ceil(-min_k_exp / 3) * 3)

            max_k_exp = int(np.floor(np.log10(max_k)))

            swidth = k_factor + max_k_exp + 1

            col_format = ['l'] + ['s{}.3'.format(swidth), 's2.1'] * len(model)
            tab = TexTab(*col_format)
            for m in range(len(model)):
                tab.add_col_sep(2 + 2 * m, ' (')
                tab.add_col_sep(3 + 2 * m, '\\%)' + ' ' * (m < len(model) - 1))

            tab.append_cell('', row='header')
            for j in range(len(model)):
                tab.append_cell('M{}'.format(j + 1),
                                width=2,
                                multicol_format='c',
                                row='header')

            for _ in output:
                tab.append_cell(_.capitalize() if _.endswith('correct') else
                                tex_command('makecommand', _),
                                row=_)

                for j in range(len(model)):

                    tab.append_cell(output[_][k][j] * 10**k_factor,
                                    row=_,
                                    formatter='{:.3f}')
                    tab.append_cell(100 * pr[_][k][j],
                                    row=_,
                                    formatter='{:.1f}')

                if first_row:
                    first_row = False
                    tab.add_midrule(_)

            with open(os.path.join(result_dir, '{}.tex'.format(k)), 'w') as f:
                tab.render(f)
                f.write('\\def\\{}factor{{{}}}\n'.format(
                    k, '{:1.0f}'.format(-k_factor)))
        """ Agreement """
        col_format = ['l'] + ['s2.1'] * len(model)
        tab = TexTab(*col_format)
        tab.append_cell(r'$|\mathcal Y|$', row='header')
        for m in range(len(model)):
            tab.append_cell(m + 1, multicol_format='c', row='header')

        for _ in output:
            tab.append_cell(_, row=_)
            for m in range(len(model)):
                tab.append_cell(100 * output[_]['disagree'][m],
                                row=_,
                                formatter='{:.1f}')

        tab.add_midrule(next(iter(output)))

        with open(os.path.join(result_dir, 'disagree.tex'), 'w') as f:
            tab.render(f)

    return output
示例#6
0
def loss_comparisons(net,
                     root=os.path.join(DEFAULT_RESULTS_DIR, '%j', 'losses'),
                     plot=False,
                     **kw):

    if plot == True:
        plot = 'all'

    sample_directory = os.path.join(net.saved_dir, 'samples', 'last')
    root = job_to_str(net.job_number, root)

    if not os.path.exists(sample_directory):
        logging.warning(f'Net #{net.job_number} has no recorded loss')
        return
    if not os.path.exists(root):
        os.makedirs(root)

    testset = net.training_parameters['set']
    datasets = [testset] + list(net.ood_results.keys())

    losses = {}
    logits = {}
    y_pred = {}

    recorders = LossRecorder.loadall(sample_directory, *datasets)

    for s in recorders:
        r = recorders[s]
        r.to('cpu')
        losses[s] = r._tensors
        logits[s] = losses[s].pop('logits').T
        y_pred[s] = net.predict_after_evaluate(logits[s], losses[s])

    y_true = losses[testset].pop('y_true')
    i_miss = np.where(y_true != y_pred[testset])[0]
    i_true = np.where(y_true == y_pred[testset])[0]

    for s in losses:
        for k in losses[s]:
            if len(losses[s][k].shape) == 2:
                losses[s][k] = losses[s][k].gather(
                    0, y_pred[s].unsqueeze(0)).squeeze()

    for w, i in zip(('correct', 'missed'), (i_true, i_miss)):

        losses[w] = {k: losses[testset][k][i] for k in losses[testset]}

    for k in ('total', 'cross_x', 'kl'):
        logging.info('Distribution of %s', k)
        for graph in ('hist', 'boxp'):
            f_ = f'losses-{k}-per-set'
            f = os.path.join(root, f_ + f'-{graph}.tab')
            a = None
            if plot and (plot == 'all' or plot.startswith(graph)):
                a = plt.figure(f_ + str(net.job_number)).subplots(1)

            losses_distribution_graphs({s: losses[s][k]
                                        for s in losses},
                                       f,
                                       sys.stdout,
                                       a,
                                       graph=graph,
                                       **kw)

    for k in ('total', 'cross_x', 'kl'):  # losses[testset]:
        logging.info('Distribution of %s per class', k)
        losses_per_class = {}
        for c in range(net.num_labels):
            pred_is_c = torch.where(y_pred[testset] == c)[0]
            losses_per_class[f'{c}'] = losses[testset][k][pred_is_c]
        for graph in ('hist', 'boxp'):
            f_ = f'losses-{k}-per-class.tab'
            f = os.path.join(root, f_ + f'-{graph}.tab')
            a = None
            if plot and (plot == 'all' or plot.startswith(graph)):
                a = plt.figure(f_ + str(net.job_number)).subplots(1)

            losses_distribution_graphs(losses_per_class,
                                       f,
                                       sys.stdout,
                                       a,
                                       graph=graph,
                                       **kw)

    n_pred = {}
    for s in y_pred:
        n_pred[s] = [sum(y_pred[s] == c) for c in range(net.num_labels)]

    f = os.path.join(root, 'predicted-classes-per-set.tab')
    with open(f, 'w') as f:

        f.write(' '.join([f'{s:6}' for s in n_pred]) + '\n')
        for c in range(net.num_labels):
            f.write(' '.join([f'{n_pred[s][c]:6}' for s in n_pred]) + '\n')
示例#7
0
    for _ in mdirs:

        model = M.load(_, load_state=True)
        model.to(device)

        dset = rmodels[_]['set']

        all_sets = tl.get_same_size_by_name(dset)
        all_sets.append(dset)

        job = rmodels[_]['job']

        num_batch = args.num_batch
        batch_size = args.batch_size

        recorders = {_: LossRecorder(batch_size) for _ in all_sets}

        _s = '*** Computing accuracy for {} on model # {} with {} images on {}'
        print(
            _s.format(dset, job, num_batch * batch_size,
                      next(model.parameters()).device))

        sample_dir = os.path.join('/tmp/reload/samples', str(job))
        if not os.path.exists(sample_dir):
            os.makedirs(sample_dir)
        with turnoff_debug():
            with torch.no_grad():

                acc = model.accuracy(
                    batch_size=args.batch_size,
                    num_batch=args.num_batch,