current_testset = model.training_parameters['set'] if testset and current_testset != testset: continue else: testset = current_testset if n_by_rep[rep] > max_per_rep: continue if args.when == 'min-loss': epoch = model.training_parameters.get('early-min-loss', 'last') if args.when == 'last' or epoch == 'last': epoch = max(model.testing) recorders = LossRecorder.loadall(os.path.join(mdir, 'samples', '{:04d}'.format(epoch)), map_location=args.device) current_y_true = recorders[testset]._tensors['y_true'] if y_true is not None and (y_true != current_y_true).any(): logging.debug('{} has a diffrent shuffle, can not use!'.format(name)) continue else: y_true = current_y_true sets = [*recorders.keys()] # exclude rotated set oodsets = [_ for _ in sets if (not _.startswith(testset) and _ not in args.sets_to_exclude)] # sets = [kept_testset, 'lsunr'] # + sets sets = [testset] + oodsets
testset = model.training_parameters['set'] allsets = [testset] allsets.extend(get_same_size_by_name(testset)) transformer = model.training_parameters['transformer'] for s in allsets: logging.info('Working on {}'.format(s)) _, dset = get_dataset(s, transformer=transformer, splits=['test']) dataloader = torch.utils.data.DataLoader(dset, batch_size=args.batch_size, shuffle=False) recorder = LossRecorder(args.batch_size) t0 = time.time() n = min(args.num_batch, len(dataloader)) samples = {'x_': [], 'x': [], 'y': [], 'losses': []} for i, (x, y) in enumerate(dataloader): if i >= args.num_batch: break if i: ti = time.time() t_per_i = (ti - t0) / i eta = (n - i) * t_per_i
if net.type == 'cvae': a.soft = 'iws' elif net.type == 'vib': a.hard = 'odin*' else: logging.error('Type %s of model not supported', net.type) sys.exit(1) if a.elbo: a.soft = 'total' if a.baseline: a.soft = 'logits' a.hard = None recorders = LossRecorder.loadall(dir_path, testset, *a.oodsets, device='cpu') oodsets = [s for s in a.oodsets if s in recorders] losses = recorders[testset]._tensors # for k in losses: # print(k, *losses[k].shape) for s in [testset] + oodsets: losses = recorders[s]._tensors sign_for_ood = 1 sign_for_mis = 1
'$ rsync -avP --files-from=/tmp/files remote:dir/joint-vae .') sys.exit() fprs = [] confs = [] for mdir in loaded_files: model = M.load(mdir, load_net=False) print('__', model.job_number, rmodels[mdir]['ind'], ':', *rmodels[mdir] ['oods']) # , model.training_parameters.get('early-min-loss')) record_dir = os.path.join(mdir, 'samples', 'last') recorders = LossRecorder.loadall(record_dir) is_testset = True classes_ = get_classes_by_name(rmodels[mdir]['ind']) # + ['OOD'] confusion_matrix = {} print(' ', f'{" ":{str_col_width}}', ' '.join(f'{_:{str_col_width}}' for _ in classes_)) thresholds = (-np.inf, np.inf) for dset in [rmodels[mdir]['ind']] + rmodels[mdir]['oods']: rec = recorders[dset] iws = rec._tensors['iws'].max(axis=0)[0].cpu() if is_testset:
def do_what_you_gotta_do(dir_name, result_dir, n_images=10, png=True, tex=['means'], prec=2, tpr=0.95): try: model = IteratedModels.load(dir_name, load_state=False) except FileNotFoundError: log.error('{} not a model'.format(dir_name)) return testset = model.training_parameters['set'] allsets = [testset] allsets.extend(get_same_size_by_name(testset)) recorders = LossRecorder.loadall(dir_name, map_location='cpu') samples_files = LossRecorder.loadall(dir_name, file_name='sample-{w}.pth', output='path', map_location='cpu') samples = {_: torch.load(samples_files[_]) for _ in samples_files} dset = model.training_parameters['set'] oodsets = list(recorders) oodsets.remove(dset) sets = [dset] + oodsets samples_idx = {} samples_i = {} y_pred_ = {} output = {} k_with_y = {_: _ for _ in ('kl', 'zdist', 'iws', 'loss')} k_with_y_moving = {_ + '_': _ for _ in k_with_y} k_without_y = {'mse': 'mse'} k_all = dict(**k_without_y, **k_with_y, **k_with_y_moving) signs = {_: 1 for _ in k_all} signs['iws'] = -1 signs['iws_'] = -1 def which_y(t, k, dim=0): s = signs[k] return s * (s * t).min(dim=dim)[0] pr = {} disagreement = {} for s in sets: log.debug('Working on %s', s) rec = recorders[s] t = rec._tensors kl = t['kl'] i_mse = [0] for j in range(1, len(model)): i_mse.append(i_mse[-1] + j) beta = np.prod(get_shape_by_name(s)[0]) / 1e-3 t['loss'] = t['kl'] + beta * t['mse'][i_mse].unsqueeze(-2) y_pred = kl.argmin(1) y_pred_[s] = y_pred disagreement[s] = torch.zeros_like(y_pred[0]) for i in range(len(disagreement[s])): disagreement[s][i] = len(y_pred[:, i].unique()) if s == dset: y_true = t['y_true'] for i in range(y_pred.shape[0]): print('Acc of step {}: {:.2%}'.format( i, (y_true == y_pred[i]).float().mean())) i_true = y_true == y_pred[0] t_y = {} thr = {} pr['correct'] = {} pr['incorrect'] = {} for k in k_all: thr[k] = {} if k in k_with_y: index_y = torch.ones_like(t[k_all[k]], dtype=int) * y_pred[0] t_y[k] = t[k_all[k]].gather(1, index_y)[:, 0] elif k in k_with_y_moving: t_y[k] = which_y(t[k_all[k]], k, dim=1) else: t_y[k] = t[k] t_y[k] *= signs[k] i_tpr = int(len(y_true) * tpr) thr[k] = t_y[k].sort()[0][..., i_tpr] for w in ('correct', 'incorrect'): pr[w][k] = torch.zeros(len(model)) for i, w in zip((i_true, ~i_true), ['correct', 'incorrect']): for m in range(len(model)): mean = t_y[k][m][i].mean() pr[w][k][m] = (t_y[k][m][i] <= thr[k][m]).sum() / i.sum() print('*** {} {} {} {:.1%} {:.3e}'.format( w, k, m, pr[w][k][m], mean)) else: i_true = torch.ones_like(y_pred[0], dtype=bool) pr[s] = {} for k in k_all: if k in k_with_y: index_y = torch.ones_like(t[k_all[k]], dtype=int) * y_pred[0] t_y[k] = t[k_all[k]].gather(1, index_y)[:, 0] elif k in k_with_y_moving: t_y[k] = which_y(t[k_all[k]], k, dim=1) else: t_y[k] = t[k] t_y[k] *= signs[k] pr[s][k] = torch.zeros(len(model)) for m in range(len(model)): pr[s][k][m] = (t_y[k][m] <= thr[k][m]).sum() / len( y_pred[0]) w = (True, False) if s == dset else (True, ) title = classif_titles if s == dset else {True: s} i_ = {'all': i_true + True, True: i_true, False: ~i_true} for _ in w: disagreement[title[_]] = disagreement[s][i_[_]] print(s) batch_size = recorders[s].batch_size num_batch = len(recorders[s]) len_samples = len(samples[s]['y']) samples_per_batch = len_samples // num_batch samples_idx[s] = torch.tensor( [_ % batch_size < samples_per_batch for _ in range(len(i_true))]) samples_i[s] = { True: i_true[samples_idx[s]], False: ~i_true[samples_idx[s]] } y_pred = y_pred_[s] x = { _: samples[s]['x'][samples_i[s][_]][:n_images] for _ in (True, False) } x_ = { _: samples[s]['x_'][:, 0, samples_i[s][_]][:, :n_images] for _ in (True, False) } y_ = { _: y_pred[:, samples_idx[s]][:, samples_i[s][_]][:, :n_images] for _ in (True, False) } y = { _: samples[s]['y'][samples_i[s][_]][:n_images] for _ in (True, False) } if s != dset: pass # y = {_: -1 * torch.ones_like(y[_]) for _ in y} w = (True, False) if s == dset else (True, ) for _ in w: x[_] = torch.cat([x[_].unsqueeze(0), x_[_]]) y[_] = torch.cat([y[_].unsqueeze(0), y_[_]]) classes = { _: get_classes_by_name(s) if not _ else get_classes_by_name(dset) for _ in range(len(model) + 1) } for k in k_with_y_moving: t[k] = which_y(t[k_all[k]], k, dim=1) for k in k_with_y: index = torch.ones_like(t[k_all[k]], dtype=int) * y_pred[0] t[k] = t[k_all[k]].gather(1, index)[:, 0, ] averaged = { title[_]: {k: t[k][..., i_[_]].mean(-1) for k in k_all} for _ in title } for _ in title: output[title[_]] = averaged[title[_]] output[title[_]].update({'x': x[_], 'y': y[_], 'c': classes}) output[title[_]]['disagree'] = torch.zeros(len(model)) for m in range(len(model)): output[title[_]]['disagree'][m] = ( disagreement[title[_]] == m + 1).float().mean() for _ in title: mse = {} n = 0 for i in range(len(model) + 1): for j in range(i): mse[(j, i)] = output[title[_]]['mse'][n] n += 1 output[title[_]]['mse'] = mse if png: with open(os.path.join(result_dir, 'arch.tex'), 'w') as f: f.write('\\def\\niter{{{}}}\n'.format(len(model))) f.write('\\def\\trainset{{{}}}\n'.format(dset)) _sets = ','.join(oodsets) f.write('\\def\\oodsets{{{}}}\n'.format(_sets)) _sets = ','.join( [classif_titles[True], classif_titles[False], *oodsets]) f.write('\\def\\allsets{{{}}}\n'.format(_sets)) for _ in w: image_dir = os.path.join(result_dir, 'samples', title[_]) if not os.path.exists(image_dir): os.makedirs(image_dir) for i in range(n_images): tex_file = os.path.join(image_dir, 'image_{}.tex'.format(i)) with open(tex_file, 'w') as f: for k in range(len(model) + 1): image = x[_][k][i] image_name = 'x_{}_{}.png'.format(i, k) save_image(image, os.path.join(image_dir, image_name)) f.write(r'\def\yin{{{}}}'.format( classes[0][y[_][0][i]]).replace('_', '-')) f.write(r'\def\yout{') out_classes = [ classes[k][y[_][k][i]] for k in range(1, len(model) + 1) ] f.write('\"' + '\",\"'.join(out_classes[k].replace('_', '-') for k in range(len(model)))) f.write('\"}\n') f.write(r'\def\n{{{}}}'.format(len(model))) f.write('\n') f.write(r'\def\rotatedlabel{}'.format( '{90}' if s.endswith('90') else '{0}')) f.write('\n') if tex: """ MSE tex file """ # tab_width = len(model) * 3 first_row = True max_mse = max([max(output[_]['mse'].values()) for _ in output]) min_mse = min([min(output[_]['mse'].values()) for _ in output]) min_mse_exp = np.floor(np.log10(min_mse)) mse_factor = int(np.ceil(-min_mse_exp / 3) * 3 - 3) max_mse_exp = int(np.floor(np.log10(max_mse))) swidth = mse_factor + max_mse_exp + 1 stable_mse = 's{}.3'.format(swidth) stable_pr = 's2.1' col_format = [ stable_mse, stable_pr ] + [stable_mse, stable_pr, stable_mse] * (len(model) - 1) tab = TexTab('l', *col_format, float_format='{:.1f}') for m in range(len(model)): c = 3 * m + 1 + (m == 0) tab.add_col_sep(c, ' (') tab.add_col_sep(c + 1, '\\%) ') for _ in output: header = _.capitalize() if _.endswith('correct') else tex_command( 'makecommand', _) tab.append_cell(header, row='header' + _) subheader = tex_command('acron', 'mse') + ' avec' tab.append_cell(subheader, row='subheader' + _) tab.append_cell('', row=_) for j in range(1, len(model) + 1): tab.append_cell('Out {}'.format(j), width=2 + (j > 1), multicol_format='c', row='header' + _) tab.append_cell('In', width=2, multicol_format='c', row='subheader' + _) if j > 1: tab.append_cell('Out {}'.format(j - 1), multicol_format='c', row='subheader' + _) tab.append_cell(output[_]['mse'][(0, j)] * 10**mse_factor, row=_, formatter='{:.3f}') tab.append_cell(100 * pr[_]['mse'][j - 1], row=_, formatter='{:.1f}') if j > 1: tab.append_cell(output[_]['mse'][(j - 1, j)] * 10**mse_factor, row=_, formatter='{:.3f}') if first_row: first_row = False else: tab.add_midrule('header' + _) for j in range(1, len(model) + 1): start = 1 if j == 1 else 3 * j - 3 tab.add_midrule(_, start=start, end=start + 1 + (j > 1)) with open(os.path.join(result_dir, 'mse.tex'), 'w') as f: tab.render(f) f.write('\\def\\msefactor{{{}}}'.format( '{:1.0f}'.format(-mse_factor))) """ ZDisT / KL tex file """ for k in [*k_with_y, *k_with_y_moving]: first_row = True max_k = max([max(output[_][k].abs()) for _ in output]) min_k = min([min(output[_][k].abs()) for _ in output]) min_k_exp = np.floor(np.log10(min_k)) # print('*** MIN / MAX {} = {:.2e} ({}) / {:.2e}'.format(k, min_k, min_k_exp, max_k)) if min_k_exp <= -3: min_k_exp -= 3 k_factor = int(np.ceil(-min_k_exp / 3) * 3) max_k_exp = int(np.floor(np.log10(max_k))) swidth = k_factor + max_k_exp + 1 col_format = ['l'] + ['s{}.3'.format(swidth), 's2.1'] * len(model) tab = TexTab(*col_format) for m in range(len(model)): tab.add_col_sep(2 + 2 * m, ' (') tab.add_col_sep(3 + 2 * m, '\\%)' + ' ' * (m < len(model) - 1)) tab.append_cell('', row='header') for j in range(len(model)): tab.append_cell('M{}'.format(j + 1), width=2, multicol_format='c', row='header') for _ in output: tab.append_cell(_.capitalize() if _.endswith('correct') else tex_command('makecommand', _), row=_) for j in range(len(model)): tab.append_cell(output[_][k][j] * 10**k_factor, row=_, formatter='{:.3f}') tab.append_cell(100 * pr[_][k][j], row=_, formatter='{:.1f}') if first_row: first_row = False tab.add_midrule(_) with open(os.path.join(result_dir, '{}.tex'.format(k)), 'w') as f: tab.render(f) f.write('\\def\\{}factor{{{}}}\n'.format( k, '{:1.0f}'.format(-k_factor))) """ Agreement """ col_format = ['l'] + ['s2.1'] * len(model) tab = TexTab(*col_format) tab.append_cell(r'$|\mathcal Y|$', row='header') for m in range(len(model)): tab.append_cell(m + 1, multicol_format='c', row='header') for _ in output: tab.append_cell(_, row=_) for m in range(len(model)): tab.append_cell(100 * output[_]['disagree'][m], row=_, formatter='{:.1f}') tab.add_midrule(next(iter(output))) with open(os.path.join(result_dir, 'disagree.tex'), 'w') as f: tab.render(f) return output
def loss_comparisons(net, root=os.path.join(DEFAULT_RESULTS_DIR, '%j', 'losses'), plot=False, **kw): if plot == True: plot = 'all' sample_directory = os.path.join(net.saved_dir, 'samples', 'last') root = job_to_str(net.job_number, root) if not os.path.exists(sample_directory): logging.warning(f'Net #{net.job_number} has no recorded loss') return if not os.path.exists(root): os.makedirs(root) testset = net.training_parameters['set'] datasets = [testset] + list(net.ood_results.keys()) losses = {} logits = {} y_pred = {} recorders = LossRecorder.loadall(sample_directory, *datasets) for s in recorders: r = recorders[s] r.to('cpu') losses[s] = r._tensors logits[s] = losses[s].pop('logits').T y_pred[s] = net.predict_after_evaluate(logits[s], losses[s]) y_true = losses[testset].pop('y_true') i_miss = np.where(y_true != y_pred[testset])[0] i_true = np.where(y_true == y_pred[testset])[0] for s in losses: for k in losses[s]: if len(losses[s][k].shape) == 2: losses[s][k] = losses[s][k].gather( 0, y_pred[s].unsqueeze(0)).squeeze() for w, i in zip(('correct', 'missed'), (i_true, i_miss)): losses[w] = {k: losses[testset][k][i] for k in losses[testset]} for k in ('total', 'cross_x', 'kl'): logging.info('Distribution of %s', k) for graph in ('hist', 'boxp'): f_ = f'losses-{k}-per-set' f = os.path.join(root, f_ + f'-{graph}.tab') a = None if plot and (plot == 'all' or plot.startswith(graph)): a = plt.figure(f_ + str(net.job_number)).subplots(1) losses_distribution_graphs({s: losses[s][k] for s in losses}, f, sys.stdout, a, graph=graph, **kw) for k in ('total', 'cross_x', 'kl'): # losses[testset]: logging.info('Distribution of %s per class', k) losses_per_class = {} for c in range(net.num_labels): pred_is_c = torch.where(y_pred[testset] == c)[0] losses_per_class[f'{c}'] = losses[testset][k][pred_is_c] for graph in ('hist', 'boxp'): f_ = f'losses-{k}-per-class.tab' f = os.path.join(root, f_ + f'-{graph}.tab') a = None if plot and (plot == 'all' or plot.startswith(graph)): a = plt.figure(f_ + str(net.job_number)).subplots(1) losses_distribution_graphs(losses_per_class, f, sys.stdout, a, graph=graph, **kw) n_pred = {} for s in y_pred: n_pred[s] = [sum(y_pred[s] == c) for c in range(net.num_labels)] f = os.path.join(root, 'predicted-classes-per-set.tab') with open(f, 'w') as f: f.write(' '.join([f'{s:6}' for s in n_pred]) + '\n') for c in range(net.num_labels): f.write(' '.join([f'{n_pred[s][c]:6}' for s in n_pred]) + '\n')
for _ in mdirs: model = M.load(_, load_state=True) model.to(device) dset = rmodels[_]['set'] all_sets = tl.get_same_size_by_name(dset) all_sets.append(dset) job = rmodels[_]['job'] num_batch = args.num_batch batch_size = args.batch_size recorders = {_: LossRecorder(batch_size) for _ in all_sets} _s = '*** Computing accuracy for {} on model # {} with {} images on {}' print( _s.format(dset, job, num_batch * batch_size, next(model.parameters()).device)) sample_dir = os.path.join('/tmp/reload/samples', str(job)) if not os.path.exists(sample_dir): os.makedirs(sample_dir) with turnoff_debug(): with torch.no_grad(): acc = model.accuracy( batch_size=args.batch_size, num_batch=args.num_batch,