Пример #1
0
def run_learning(task_data, meta_model, prm, verbose=1):

    # -------------------------------------------------------------------------------------------
    #  Setting-up
    # -------------------------------------------------------------------------------------------

    # Loss criterion
    loss_criterion = get_loss_criterion(prm.loss_type)

    # Create model for task:
    task_model = get_model(prm)

    #  Load initial point from meta-parameters:
    task_model.load_state_dict(meta_model.state_dict())

    # The data-sets of the new task:
    train_loader = task_data['train']
    test_loader = task_data['test']
    n_train_samples = len(train_loader.dataset)
    n_batches = len(train_loader)

    #  Get task optimizer:
    task_optimizer = SGD(task_model.parameters(), lr=prm.alpha)

    # In meta-testing, use SGD with step-size alpha

    # -------------------------------------------------------------------------------------------
    #  Learning  function
    # -------------------------------------------------------------------------------------------

    def run_meta_test_learning(task_model, train_loader):

        task_model.train()
        train_loader_iter = iter(train_loader)

        # Gradient steps (training) loop
        for i_grad_step in range(prm.n_meta_test_grad_steps):
            # get batch:
            batch_data = data_gen.get_next_batch_cyclic(
                train_loader_iter, train_loader)
            inputs, targets = data_gen.get_batch_vars(batch_data, prm)

            # Calculate empirical loss:
            outputs = task_model(inputs)
            task_objective = loss_criterion(outputs, targets)

            # Take gradient step with the task weights:
            grad_step(task_objective, task_optimizer)

        # end gradient step loop

        return task_model

    # -------------------------------------------------------------------------------------------
    #  Test evaluation function
    # --------------------------------------------------------------------------------------------
    def run_test(model, test_loader):
        model.eval()
        test_loss = 0
        n_correct = 0
        for batch_data in test_loader:
            inputs, targets = data_gen.get_batch_vars(batch_data,
                                                      prm,
                                                      is_test=True)
            outputs = model(inputs)
            test_loss += loss_criterion(outputs,
                                        targets)  # sum the mean loss in batch
            n_correct += count_correct(outputs, targets)

        n_test_samples = len(test_loader.dataset)
        n_test_batches = len(test_loader)
        test_loss = test_loss.data[0] / n_test_batches
        test_acc = n_correct / n_test_samples
        print('\nTest set: Average loss: {:.4}, Accuracy: {:.3} ( {}/{})\n'.
              format(test_loss, test_acc, n_correct, n_test_samples))
        return test_acc

    # -----------------------------------------------------------------------------------------------------------#
    # Update Log file
    if verbose == 1:
        write_to_log(
            'Total number of steps: {}'.format(n_batches * prm.num_epochs),
            prm)

    # -------------------------------------------------------------------------------------------
    #  Run epochs
    # -------------------------------------------------------------------------------------------
    start_time = timeit.default_timer()

    # Training loop:
    task_model = run_meta_test_learning(task_model, train_loader)

    # Test:
    test_acc = run_test(task_model, test_loader)

    stop_time = timeit.default_timer()
    cmn.write_final_result(test_acc,
                           stop_time - start_time,
                           prm,
                           verbose=verbose)

    test_err = 1 - test_acc
    return test_err, task_model
Пример #2
0
            '---- Infinite train tasks - New training tasks '
            'are drawn from tasks distribution in each iteration...', prm)

        # Meta-training to learn meta-model (theta params):
        meta_model = meta_train_MAML_infinite_tasks.run_meta_learning(
            prm, task_generator)

    # save learned meta-model:
    save_model_state(meta_model, save_path)
    write_to_log('Trained meta-model saved in ' + save_path, prm)

elif prm.mode == 'LoadMetaModel':

    # Loads  previously training prior.
    # First, create the model:
    meta_model = get_model(prm)
    # Then load the weights:
    load_model_state(meta_model, prm.load_model_path)
    write_to_log('Pre-trained  meta-model loaded from ' + prm.load_model_path,
                 prm)
else:
    raise ValueError('Invalid mode')

# -------------------------------------------------------------------------------------------
# Generate the data sets of the test tasks:
# -------------------------------------------------------------------------------------------

n_test_tasks = prm.n_test_tasks

limit_train_samples_in_test_tasks = prm.limit_train_samples_in_test_tasks
if limit_train_samples_in_test_tasks == 0:
def run_meta_learning(train_data_loaders, prm):

    # -------------------------------------------------------------------------------------------
    #  Setting-up
    # -------------------------------------------------------------------------------------------
    # Unpack parameters:
    optim_func, optim_args, lr_schedule =\
        prm.optim_func, prm.optim_args, prm.lr_schedule

    # Loss criterion
    loss_criterion = get_loss_criterion(prm.loss_type)

    n_tasks = len(train_data_loaders)

    # Create a 'dummy' model to generate the set of parameters of the shared initial point (theta):
    model = get_model(prm)
    model.train()

    # Create optimizer for meta-params (theta)
    meta_params = list(model.parameters())

    meta_optimizer = optim_func(meta_params, **optim_args)

    # number of sample-batches in each task:
    n_batch_list = [len(data_loader['train']) for data_loader in train_data_loaders]

    n_batches_per_task = np.max(n_batch_list)
    # note: if some tasks have less data that other tasks - it may be sampled more than once in an epoch

    # -------------------------------------------------------------------------------------------
    #  Training epoch  function
    # -------------------------------------------------------------------------------------------
    def run_train_epoch(i_epoch):

        # For each task, prepare an iterator to generate training batches:
        train_iterators = [iter(train_data_loaders[ii]['train']) for ii in range(n_tasks)]

        # The task order to take batches from:
        task_order = []
        task_ids_list = list(range(n_tasks))
        for i_batch in range(n_batches_per_task):
            random.shuffle(task_ids_list)
            task_order += task_ids_list

        # each meta-batch includes several tasks
        # we take a grad step with theta after each meta-batch
        meta_batch_starts = list(range(0, len(task_order), prm.meta_batch_size))
        n_meta_batches = len(meta_batch_starts)

        # ----------- meta-batches loop (batches of tasks) -----------------------------------#
        for i_meta_batch in range(n_meta_batches):

            meta_batch_start = meta_batch_starts[i_meta_batch]
            task_ids_in_meta_batch = task_order[meta_batch_start: (meta_batch_start + prm.meta_batch_size)]
            n_tasks_in_batch = len(task_ids_in_meta_batch)  # it may be less than  prm.meta_batch_size at the last one
            # note: it is OK if some task appear several times in the meta-batch

            mb_data_loaders = [train_data_loaders[task_id] for task_id in task_ids_in_meta_batch]
            mb_iterators = [train_iterators[task_id] for task_id in task_ids_in_meta_batch]

            # Get objective based on tasks in meta-batch:
            total_objective, info = meta_step(prm, model, mb_data_loaders, mb_iterators, loss_criterion)

            # Take gradient step with the meta-parameters (theta) based on validation data:
            grad_step(total_objective, meta_optimizer, lr_schedule, prm.lr, i_epoch)

            # Print status:
            log_interval = 200
            if i_meta_batch % log_interval == 0:
                batch_acc = info['correct_count'] / info['sample_count']
                print(cmn.status_string(i_epoch, num_epochs, i_meta_batch, n_meta_batches, batch_acc, get_value(total_objective)))
        # end  meta-batches loop

    # end run_epoch()

    # -----------------------------------------------------------------------------------------------------------#
    # Main script
    # -----------------------------------------------------------------------------------------------------------#

    # Update Log file
    write_to_log(cmn.get_model_string(model), prm)
    write_to_log('---- Meta-Training set: {0} tasks'.format(len(train_data_loaders)), prm)

    # -------------------------------------------------------------------------------------------
    #  Run epochs
    # -------------------------------------------------------------------------------------------
    start_time = timeit.default_timer()

    num_epochs = int(np.ceil(prm.n_meta_train_iterations / np.ceil(n_tasks / prm.meta_batch_size)))

    # Training loop:
    for i_epoch in range(num_epochs):
        run_train_epoch(i_epoch)

    stop_time = timeit.default_timer()

    # Update Log file:
    cmn.write_final_result(0.0, stop_time - start_time, prm)

    # Return learned meta-parameters:
    return model
Пример #4
0
    avg_param_vec = None
    for i_task in range(n_train_tasks):
        print('Learning train-task {} out of {}'.format(
            i_task + 1, n_train_tasks))
        data_loader = train_data_loaders[i_task]
        test_err, curr_model = learn_single_standard.run_learning(data_loader,
                                                                  prm,
                                                                  verbose=0)
        if i_task == 0:
            avg_param_vec = parameters_to_vector(
                curr_model.parameters()) * (1 / n_train_tasks)
        else:
            avg_param_vec += parameters_to_vector(
                curr_model.parameters()) * (1 / n_train_tasks)

    avg_model = deterministic_models.get_model(prm)
    vector_to_parameters(avg_param_vec, avg_model.parameters())

    # create the prior model:
    prior_model = stochastic_models.get_model(prm)
    prior_layers_list = [
        layer for layer in prior_model.modules()
        if isinstance(layer, StochasticLayer)
    ]
    avg_model_layers_list = [
        layer for layer in avg_model.modules()
        if isinstance(layer, torch.nn.Conv2d)
        or isinstance(layer, torch.nn.Linear)
    ]
    assert len(avg_model_layers_list) == len(
        prior_layers_list), "lists not equal"
Пример #5
0
def run_learning(data_loader, prm, verbose=1, initial_model=None):

    # Unpack parameters:
    optim_func, optim_args, lr_schedule = \
        prm.optim_func, prm.optim_args, prm.lr_schedule

    # Loss criterion
    loss_criterion = get_loss_criterion(prm.loss_type)

    # The data-sets:
    train_loader = data_loader['train']
    test_loader = data_loader['test']

    n_batches = len(train_loader)

    # Create  model:
    if hasattr(prm, 'func_model') and prm.func_model:
        import Models.deterministic_models as func_models
        model = func_models.get_model(prm)
    else:
        model = get_model(prm)

    # Load initial weights:
    if initial_model:
        model.load_state_dict(initial_model.state_dict())

    # Gather modules list:
    modules_list = list(model.named_children())
    if hasattr(model, 'net'):
        # extract the modules from 'net' field:
        modules_list += list(model.net.named_children())
        modules_list = [m for m in modules_list if m[0] is not 'net']

    # Determine which parameters are optimized and which are frozen:
    if hasattr(prm, 'freeze_list'):
        freeze_list = prm.freeze_list
        optimized_modules = [
            named_module[1] for named_module in modules_list
            if not named_module[0] in freeze_list
        ]
        optimized_params = sum(
            [list(mo.parameters()) for mo in optimized_modules], [])
    elif hasattr(prm, 'not_freeze_list'):
        not_freeze_list = prm.not_freeze_list
        optimized_modules = [
            named_module[1] for named_module in modules_list
            if named_module[0] in not_freeze_list
        ]
        optimized_params = sum(
            [list(mo.parameters()) for mo in optimized_modules], [])
    else:
        optimized_params = model.parameters()

    #  Get optimizer:
    optimizer = optim_func(optimized_params, **optim_args)

    # -------------------------------------------------------------------------------------------
    #  Training epoch  function
    # -------------------------------------------------------------------------------------------

    def run_train_epoch(i_epoch):
        log_interval = 500

        model.train()
        for batch_idx, batch_data in enumerate(train_loader):

            # get batch:
            inputs, targets = data_gen.get_batch_vars(batch_data, prm)

            # Calculate loss:
            outputs = model(inputs)
            loss = loss_criterion(outputs, targets)

            # Take gradient step:
            grad_step(loss, optimizer, lr_schedule, prm.lr, i_epoch)

            # Print status:
            if batch_idx % log_interval == 0:
                batch_acc = correct_rate(outputs, targets)
                print(
                    cmn.status_string(i_epoch, prm.num_epochs, batch_idx,
                                      n_batches, batch_acc, get_value(loss)))

    # -----------------------------------------------------------------------------------------------------------#
    # Update Log file
    # -----------------------------------------------------------------------------------------------------------#
    update_file = not verbose == 0
    cmn.write_to_log(cmn.get_model_string(model), prm, update_file=update_file)
    cmn.write_to_log('Total number of steps: {}'.format(n_batches *
                                                        prm.num_epochs),
                     prm,
                     update_file=update_file)
    cmn.write_to_log('Number of training samples: {}'.format(
        data_loader['n_train_samples']),
                     prm,
                     update_file=update_file)

    # -------------------------------------------------------------------------------------------
    #  Run epochs
    # -------------------------------------------------------------------------------------------
    start_time = timeit.default_timer()

    # Training loop:
    for i_epoch in range(prm.num_epochs):
        run_train_epoch(i_epoch)

    # Test:
    test_acc = run_test(model, test_loader, loss_criterion, prm)

    stop_time = timeit.default_timer()
    cmn.write_final_result(test_acc,
                           stop_time - start_time,
                           prm,
                           verbose=verbose,
                           result_name='Standard')

    test_err = 1 - test_acc
    return test_err, model
def run_meta_learning(prm, task_generator):

    # -------------------------------------------------------------------------------------------
    #  Setting-up
    # -------------------------------------------------------------------------------------------
    # Unpack parameters:
    optim_func, optim_args, lr_schedule =\
        prm.optim_func, prm.optim_args, prm.lr_schedule

    n_iterations = prm.n_meta_train_iterations

    # Loss criterion
    loss_criterion = get_loss_criterion(prm.loss_type)

    # Create a 'dummy' model to generate the set of parameters of the shared initial point (theta):
    model = get_model(prm)
    model.train()

    # Create optimizer for meta-params (theta)
    meta_params = list(model.parameters())

    meta_optimizer = optim_func(meta_params, **optim_args)

    meta_batch_size = prm.meta_batch_size

    # -------------------------------------------------------------------------------------------
    #  Training epoch  function
    # -------------------------------------------------------------------------------------------
    def run_meta_iteration(i_iter):
        # In each meta-iteration we draw a meta-batch of several tasks
        # Then we take a grad step with theta.

        # Generate the data sets of the training-tasks for meta-batch:
        mb_data_loaders = task_generator.create_meta_batch(
            prm, meta_batch_size, meta_split='meta_train')

        # For each task, prepare an iterator to generate training batches:
        mb_iterators = [
            iter(mb_data_loaders[ii]['train']) for ii in range(meta_batch_size)
        ]

        # Get objective based on tasks in meta-batch:
        total_objective, info = meta_step(prm, model, mb_data_loaders,
                                          mb_iterators, loss_criterion)

        # Take gradient step with the meta-parameters (theta) based on validation data:
        grad_step(total_objective, meta_optimizer, lr_schedule, prm.lr, i_iter)

        # Print status:
        log_interval = 5
        if (i_iter) % log_interval == 0:
            batch_acc = info['correct_count'] / info['sample_count']
            print(
                cmn.status_string(i_iter, n_iterations, 1, 1, batch_acc,
                                  total_objective.data[0]))

    # end run_meta_iteration()

    # -----------------------------------------------------------------------------------------------------------#
    # Main script
    # -----------------------------------------------------------------------------------------------------------#

    # Update Log file
    write_to_log(cmn.get_model_string(model), prm)
    write_to_log('---- Meta-Training with infinite tasks...', prm)

    # -------------------------------------------------------------------------------------------
    #  Run epochs
    # -------------------------------------------------------------------------------------------
    start_time = timeit.default_timer()

    # Training loop:
    for i_iter in range(n_iterations):
        run_meta_iteration(i_iter)

    stop_time = timeit.default_timer()

    # Update Log file:
    cmn.write_final_result(0.0, stop_time - start_time, prm)

    # Return learned meta-parameters:
    return model