Exemplo n.º 1
0
def is_derailed(model, load_model_for_check=False):
    from cvae import ClassificationVariationalNetwork

    if isinstance(model, dict):
        directory = model['dir']

    elif isinstance(model, str):
        directory = model

    else:
        directory = model.saved_dir

    if os.path.exists(os.path.join(directory, 'derailed')):
        return True
    
    elif load_model_for_check:
        try:
            model = ClassificationVariationalNetwork.load(directory)
            if torch.cuda.is_available():
                model.to('cuda')
            x = torch.zeros(1, *model.input_shape, device=model.device)
            model.evaluate(x)
        except ValueError:
            return True

    return False            
Exemplo n.º 2
0
def collect_models(directory,
                     wanted_epoch='last',
                     load_state=True, tpr_for_max=0.95, **default_load_paramaters):

    from cvae import ClassificationVariationalNetwork

    if 'dump' in directory:
        return

    assert wanted_epoch == 'last' or not load_state
    
    try:
        logging.debug(f'Loading net in: {directory}')
        model = ClassificationVariationalNetwork.load(directory,
                                                      load_state=load_state,
                                                      **default_load_paramaters)

        return make_dict_from_model(model, directory, tpr=tpr_for_max, wanted_epoch=wanted_epoch) 

    except (FileNotFoundError, PermissionError, NoModelError) as e:    
        pass

    except RuntimeError as e:
        logging.warning(f'Load error in {directory} see log file')
        logging.debug(f'Load error: {e}')
Exemplo n.º 3
0
def needed_remote_files(*mdirs, epoch='last', which_rec='all', state=False):
    r""" list missing recorders to be fetched on a remote

    -- mdirs: list of directories

    -- epoch: last or min-loss or int

    -- which_rec: either 'none' 'ind' or 'all'

    -- state: wehter to include state.pth

    returns generator of needed files paths

    """

    assert not state or epoch == 'last'
    
    from cvae import ClassificationVariationalNetwork as M

    for d in mdirs:

        m = M.load(d, load_net=False)
        epoch_ = epoch
        if epoch_ == 'min-loss':
            epoch_ = m.training_parameters.get('early-min-loss', 'last')
        if epoch_ == 'last':
            epoch_ = max(m.testing)
            
        if isinstance(epoch_, int):
            epoch_ = '{:04d}'.format(epoch_)

        testset = m.training_parameters['set']

        sets = []

        recs_to_exclude = which_rec.split('-')[1:]
        which_rec_ = which_rec.split('-')[0]

        if which_rec_ in ('all', 'ind'):
            sets.append(testset)
            if which_rec_ == 'all':
                sets += get_same_size_by_name(testset)
                for _ in [_ for _ in recs_to_exclude if _ in  sets]:
                    sets.remove(_)
                    
        for s in sets:
            sdir = os.path.join(d, 'samples', epoch_, 'record-{}.pth'.format(s))
            if not os.path.exists(sdir):
                yield d, sdir

        if state:
            sdir = os.path.join(d, 'state.pth')
            if not os.path.exists(sdir):
                yield d, sdir
Exemplo n.º 4
0
def gather_registered_models(mdict, filter, tpr_for_max=0.95, wanted_epoch='last', **kw):

    from cvae import ClassificationVariationalNetwork

    mlist = []
    for _ in mdict:
        if filter is None or filter.filter(mdict[_]):
            m = ClassificationVariationalNetwork.load(_, **kw)
            mlist.append(make_dict_from_model(m, _, tpr=tpr_for_max, wanted_epoch=wanted_epoch))

    return mlist
Exemplo n.º 5
0
    def load(cls, dir_name, *a, **kw):

        architecture = save_load.load_json(dir_name, 'params.json')
        models = [architecture[str(_)] for _ in range(len(architecture))]

        m = cls(*[M.load(_, *a, **kw) for _ in models])

        try:
            m.testing = save_load.load_json(dir_name,
                                            'test.json',
                                            presumed_type=int)
        except (FileNotFoundError):
            pass

        try:
            m.ood_results = save_load.load_json(dir_name,
                                                'ood.json',
                                                presumed_type=int)
        except (FileNotFoundError):
            pass

        m.saved_dir = dir_name

        return m
Exemplo n.º 6
0
    logging.info('{} model{} over {}'.format(len(mdirs),
                                             's' if len(mdirs) > 1 else '',
                                             total_models))

    if removed:
        logging.error('Exiting, load files')
        logging.error(
            'E.g: %s',
            '$ rsync -avP --files-from=/tmp/files remote:dir/joint-vae .')
        logging.error(' Or: %s', '$ . /tmp/rsync-files remote:dir/joint-vae')
        with open('/tmp/rsync-files', 'w') as f:
            f.write('#!/bin/bash\n')
            f.write('rsync -avP --files-from=/tmp/files $1 .\n')
        sys.exit(1)

    models = [M.load(d, load_state=True) for d in mdirs]
    model = IteratedModels(*models)

    device = args.device

    model.to(device)

    logging.debug('Model sent to {} (device wanted: {})'.format(
        next(iter(model.parameters())), device))

    testset = model.training_parameters['set']
    allsets = [testset]
    allsets.extend(get_same_size_by_name(testset))

    transformer = model.training_parameters['transformer']
Exemplo n.º 7
0
    n_by_rep = dict(hsv=0, rgb=0)

    as_in = {'ind': {}, 'correct': {}}
    pr = {'ind': {}, 'correct': {}}

    y_classif = {}
    classif_acc = {}
    agreement = {}
    distribution = {}

    y_true = None

    for mdir in mdirs:

        model = M.load(mdir, load_net=False)
        rep = model.architecture['representation']
        name = rep.upper() + str(n_by_rep[rep])
        name = str(model.job_number)

        n_by_rep[rep] += 1
        current_testset = model.training_parameters['set']
        if testset and current_testset != testset:
            continue
        else:
            testset = current_testset

        if n_by_rep[rep] > max_per_rep:
            continue

        if args.when == 'min-loss':
Exemplo n.º 8
0
from cvae import ClassificationVariationalNetwork as M
from utils.save_load import available_results, make_dict_from_model, LossRecorder, find_by_job_number, load_json
import logging

logging.getLogger().setLevel(logging.DEBUG)

mdir = '/tmp/000033'
mdir = '/tmp/000186'
mdir = '/tmp/151320'
mdir = '/tmp/151024'
mdir = '/tmp/151409'
mdir = '/tmp/151020'
mdir = '/tmp/148722'

print('Loading')
model = M.load(mdir, load_state=False)

new = M((3, 32, 32), 10, type_of_net='vib')

print('Loaded')
"""
acc = {}
acc = {_: model.accuracy(wygiwyu=True, wanted_epoch=_) for _ in (0, 10, 200, 'last')}

print(acc)
"""
# model.trained = 2000

# model.testing[2000].pop('iws')
# model.ood_results.pop(2000)
Exemplo n.º 9
0
args.add_argument('--soft', choices=['kl', 'iws'], default='default')
args.add_argument('--hard', choices=['kl', 'iws'])
args.add_argument('--entropy', '-H', action='store_true')
args.add_argument('--elbo', action='store_true')
args.add_argument('--baseline', action='store_true')
args.add_argument('--2s', action='store_true', dest='two_sided')
args.add_argument('--print', action='store_true')

a = args.parse_args()
j = a.j
ood_tpr = a.ood_tpr / 100
mis_tpr = a.mis_tpr / 100

reload = False
if a.direct_load:
    net = Model.load(a.direct_load, load_state=False)
    print(net.job_number, 'loaded')
try:
    reload = net.job_number != j and not a.direct_load
except NameError:
    reload = True

if reload:
    net = find_by_job_number(j, load_state=False)['net']

dir_path = os.path.join(net.saved_dir, 'samples', 'last')
testset = net.training_parameters['set']

if net.type == 'vib':
    pass  # a.plot = False
Exemplo n.º 10
0
from matplotlib import pyplot as plt

logging.getLogger().setLevel(logging.WARNING)

compute = False
compute = True

if compute:
    j = 107495
    j = 108183
    j = 37

    load_dir = find_by_job_number('./jobs', j, load_net=False)[j]['dir']

    print('Load net', end='')
    net = ClassificationVariationalNetwork.load(load_dir, load_state=True)
    print(' to gpu')
    net.to('cuda')
    net.latent_sampling = 16

    print('Getting sets')
    trainset_name = net.training['set']
    trainset, testset = dl.get_dataset(trainset_name,
                                       transformer=net.training['transformer'])
    oodsets = [dl.get_dataset(n)[1] for n in testset.same_size]
    oodset = oodsets[0]

    batch_size = 200
    n_batch = 1000 // batch_size

    loader = torch.utils.data.DataLoader(testset,
Exemplo n.º 11
0
    for job_number in job_numbers:
        jobs[job_number]

except (NameError, KeyError):
    print('Loading jobs')
    reload = True
    recompute = True

if reload:
    jobs = find_by_job_number(search_dir, *job_numbers, load_state=False)
    #, json_file='networks-lss.json')

    to_be_removed = []
    for job_number in jobs:
        try:
            jobs[job_number]['net'] = ClassificationVariationalNetwork.load(
                jobs[job_number]['dir'])
        except RuntimeError:
            print(f'Error loading {job_number}')
            to_be_removed.append(job_number)

    for job_number in to_be_removed:
        jobs.pop(job_number)

fgrid = {}
fexamples = {}
food = {}
foodexamples = {}
fmuvar = {}
fhist = {}
fx_ = {}
Exemplo n.º 12
0
        oodsets = test_dataset.same_size

        for o in oodsets:
            _, ood_dataset = tl.get_dataset(o, transformer=transformer)
            x[o], y[o] = tl.get_batch(ood_dataset,
                                      device=device,
                                      batch_size=max(z_sample, N))

        if not L:
            L = args.total_width // (1 + len(x))

        for n in list_of_nets:

            logging.info('loading state of %s', n['job'])

            model = Net.load(n['dir'])
            model.to(device)
            logging.info('done')
            logging.info('Compute max batch size')

            batch_size = min(
                m, model.compute_max_batch_size(batch_size=m, which='test'))
            logging.info(f'done ({batch_size})')

            for s in x:

                logging.info('sampling %s', s)

                if N:
                    list_of_images = sample(model,
                                            x[s][:N],
Exemplo n.º 13
0
    for s in archs:
        archs[s] = {
            n['model']['arch']
            for n in models_to_be_kept if n['model']['set'] == s
        }

    for m_ in models_to_be_kept:
        m = m_['model']
        epoch = m_['epoch']
        plan = m_['plan']

        if plan['recorders'] or plan['compute']:
            print('Computing rates of job {} of type {} at epoch {}'.format(
                m['job'], m['type'], epoch))
            logging.debug('Plan for {}; {}'.format(m['job'], plan))
            model = CVNet.load(m['dir'], load_state=plan['compute'])
            if plan['compute']:
                device = args.device or 'cuda'
            else:
                device = args.device or 'cpu'

            logging.debug('Will work on {}'.format(device))
            model.to(device)
            with torch.no_grad():
                print('OOD')
                model.ood_detection_rates(
                    epoch=epoch,
                    from_where=where,
                    sample_dirs=[
                        os.path.join(m['dir'], 'samples',
                                     '{:4d}'.format(epoch))
Exemplo n.º 14
0
                if done_epochs == 0:
                    verb = 'will start from scratch.'
                elif done_epochs < args.epochs:
                    verb = f'will resume from {done_epochs}.'
                else:
                    verb = 'is already done.'
                log.info(f'Training {verb}')
            except (NoModelError):
                log.error(f'model #{job_TBR_num} not found!')
                sys.exit(1)

        else:
            try:
                resumed_from = resume
                log.info('Loading network in %s', resume)
                jvae = CVNet.load(args.resume, load_state=True)
                log.debug('Network loaded in {}'.format(resumed_from))
                done_epochs = jvae.trained
                if done_epochs == 0:
                    verb = 'will start from scratch.'
                elif done_epochs < args.epochs:
                    verb = f'will resume from {done_epochs}.'
                else:
                    verb = 'is already done.'
                log.info(f'Training {verb}')

            except (FileNotFoundError, NameError):
                log.error(f'network not found in {resume}')
                sys.exit(1)

    else: