Пример #1
0
    def run_train_epoch(i_epoch, log_mat):

        post_model.train()

        for batch_idx, batch_data in enumerate(train_loader):

            # get batch data:
            inputs, targets = data_gen.get_batch_vars(batch_data, prm)

            batch_size = inputs.shape[0]

            # Monte-Carlo iterations:
            avg_empiric_loss = torch.zeros(1, device=prm.device)
            n_MC = prm.n_MC

            for i_MC in range(n_MC):

                # calculate objective:
                outputs = post_model(inputs)
                avg_empiric_loss_curr = (1 / batch_size) * loss_criterion(
                    outputs, targets)
                avg_empiric_loss += (1 / n_MC) * avg_empiric_loss_curr

            # complexity/prior term:
            if prior_model:
                complexity_term = get_task_complexity(prm, prior_model,
                                                      post_model,
                                                      n_train_samples,
                                                      avg_empiric_loss)
            else:
                complexity_term = torch.zeros(1, device=prm.device)

            # Total objective:
            objective = avg_empiric_loss + complexity_term

            # Take gradient step:
            grad_step(objective, optimizer, lr_schedule, prm.lr, i_epoch)

            # Print status:
            log_interval = 1000
            if batch_idx % log_interval == 0:
                batch_acc = correct_rate(outputs, targets)
                print(
                    cmn.status_string(i_epoch, prm.num_epochs, batch_idx,
                                      n_batches, batch_acc, objective.item()) +
                    ' Loss: {:.4}\t Comp.: {:.4}'.format(
                        avg_empiric_loss.item(), complexity_term.item()))

        # End batch loop

        # save results for epochs-figure:
        if figure_flag and (i_epoch % prm.log_figure['interval_epochs'] == 0):
            save_result_for_figure(post_model, prior_model, data_loader, prm,
                                   log_mat, i_epoch)
Пример #2
0
def eval_bound(post_model, prior_model, data_loader, prm, avg_empiric_loss=None, dvrg_val=None):


    n_train_samples = data_loader['n_train_samples']

    if avg_empiric_loss is None:
        _, avg_empiric_loss = run_eval_Bayes(post_model, data_loader['train'], prm)


    #  complexity/prior term:
    complexity_term = get_task_complexity(
        prm, prior_model, post_model, n_train_samples, avg_empiric_loss, dvrg=dvrg_val)

    # Total objective:
    bound_val = avg_empiric_loss + complexity_term.item()
    return bound_val
Пример #3
0
def get_objective(prior_model, prm, mb_data_loaders, mb_iterators,
                  mb_posteriors_models, loss_criterion, n_train_tasks):
    '''  Calculate objective based on tasks in meta-batch '''
    # note: it is OK if some tasks appear several times in the meta-batch

    n_tasks_in_mb = len(mb_data_loaders)

    correct_count = 0
    sample_count = 0

    # Hyper-prior term:
    hyper_dvrg = get_hyper_divergnce(prm, prior_model)
    meta_complex_term = get_meta_complexity_term(hyper_dvrg, prm,
                                                 n_train_tasks)

    avg_empiric_loss_per_task = torch.zeros(n_tasks_in_mb, device=prm.device)
    complexity_per_task = torch.zeros(n_tasks_in_mb, device=prm.device)
    n_samples_per_task = torch.zeros(
        n_tasks_in_mb, device=prm.device
    )  # how many sampels there are total in each task (not just in a batch)

    # ----------- loop over tasks in meta-batch -----------------------------------#
    for i_task in range(n_tasks_in_mb):

        n_samples = mb_data_loaders[i_task]['n_train_samples']
        n_samples_per_task[i_task] = n_samples

        # get sample-batch data from current task to calculate the empirical loss estimate:
        batch_data = data_gen.get_next_batch_cyclic(
            mb_iterators[i_task], mb_data_loaders[i_task]['train'])

        # get batch variables:
        inputs, targets = data_gen.get_batch_vars(batch_data, prm)
        batch_size = inputs.shape[0]

        # The posterior model corresponding to the task in the batch:
        post_model = mb_posteriors_models[i_task]
        post_model.train()

        # Monte-Carlo iterations:
        n_MC = prm.n_MC

        avg_empiric_loss = 0.0
        complexity = 0.0

        # Monte-Carlo loop
        for i_MC in range(n_MC):

            # Debug
            # print(targets[0].data[0])  # print first image label
            # import matplotlib.pyplot as plt
            # plt.imshow(inputs[0].cpu().data[0].numpy())  # show first image
            # plt.show()

            # Empirical Loss on current task:
            outputs = post_model(inputs)
            avg_empiric_loss_curr = (1 / batch_size) * loss_criterion(
                outputs, targets)

            correct_count += count_correct(outputs, targets)  # for print
            sample_count += inputs.size(0)

            # Intra-task complexity of current task:
            # curr_complexity = get_task_complexity(prm, prior_model, post_model,
            #     n_samples, avg_empiric_loss_curr, hyper_dvrg, n_train_tasks=n_train_tasks, noised_prior=True)

            avg_empiric_loss += (1 / n_MC) * avg_empiric_loss_curr
            # complexity +=  (1 / n_MC) * curr_complexity
        # end Monte-Carlo loop

        complexity = get_task_complexity(prm,
                                         prior_model,
                                         post_model,
                                         n_samples,
                                         avg_empiric_loss,
                                         hyper_dvrg,
                                         n_train_tasks=n_train_tasks,
                                         noised_prior=True)
        avg_empiric_loss_per_task[i_task] = avg_empiric_loss
        complexity_per_task[i_task] = complexity
    # end loop over tasks in meta-batch

    # Approximated total objective:
    if prm.complexity_type == 'Variational_Bayes':
        # note that avg_empiric_loss_per_task is estimated by an average over batch samples,
        #  but its weight in the objective should be considered by how many samples there are total in the task
        total_objective =\
            (avg_empiric_loss_per_task * n_samples_per_task + complexity_per_task).mean() * n_train_tasks + meta_complex_term
        # total_objective = ( avg_empiric_loss_per_task * n_samples_per_task + complexity_per_task).mean() + meta_complex_term

    else:
        total_objective =\
            avg_empiric_loss_per_task.mean() + complexity_per_task.mean() + meta_complex_term

    info = {
        'sample_count': sample_count,
        'correct_count': correct_count,
        'avg_empirical_loss': avg_empiric_loss_per_task.mean().item(),
        'avg_intra_task_comp': complexity_per_task.mean().item(),
        'meta_comp': meta_complex_term.item()
    }
    return total_objective, info
Пример #4
0
def run_test(task,
             prior_policy,
             post_policy,
             baseline,
             args,
             env_name,
             env_kwargs,
             batch_size,
             observation_space,
             action_space,
             n_train_tasks,
             num_test_batches=10):
    optim_func, optim_args, lr_schedule =\
        args.optim_func, args.optim_args, args.lr_schedule
    #  Get optimizer:
    optimizer = optim_func(post_policy.parameters(), **optim_args)

    # *******************************************************************
    # Train: post_policy
    # *******************************************************************
    for batch in range(num_test_batches):
        # Hyper-prior term:
        # 计算超先验与超后验的散度
        hyper_dvrg = get_hyper_divergnce(kappa_prior=args.kappa_prior,
                                         kappa_post=args.kappa_post,
                                         divergence_type=args.divergence_type,
                                         device=args.device,
                                         prior_model=prior_policy)
        # 根据 超散度 hyper_dvrg 计算对应的 meta项  传参方式也可以直接安顺序传递
        meta_complex_term = get_meta_complexity_term(
            hyper_kl=hyper_dvrg,
            delta=args.delta,
            complexity_type=args.complexity_type,
            n_train_tasks=n_train_tasks)

        sampler = SampleTest(env_name,
                             env_kwargs,
                             batch_size=batch_size,
                             observation_space=observation_space,
                             action_space=action_space,
                             policy=post_policy,
                             baseline=baseline,
                             seed=args.seed,
                             prior_policy=prior_policy,
                             task=task)
        # calculate empirical error for per task
        loss_per_task, avg_reward, last_reward, train_episodes = sampler.sample(
        )

        complexity = get_task_complexity(delta=args.delta,
                                         complexity_type=args.complexity_type,
                                         device=args.device,
                                         divergence_type=args.divergence_type,
                                         kappa_post=args.kappa_post,
                                         prior_model=prior_policy,
                                         post_model=post_policy,
                                         n_samples=batch_size,
                                         avg_empiric_loss=loss_per_task,
                                         hyper_dvrg=hyper_dvrg,
                                         n_train_tasks=n_train_tasks,
                                         noised_prior=True)

        if args.complexity_type == 'Variational_Bayes':
            # note that avg_empiric_loss_per_task is estimated by an average over batch samples,
            #  but its weight in the objective should be considered by how many samples there are total in the task
            n_train_samples = 1
            total_objective = loss_per_task * (n_train_samples) + complexity
        else:
            # 该项类似于 PAC Bayes
            total_objective = loss_per_task + complexity

        # Take gradient step with the shared prior and all tasks' posteriors:
        grad_step(total_objective, optimizer, lr_schedule, args.lr)

    # *******************************************************************
    # Test: post_policy
    # *******************************************************************

    # test_acc, test_loss = run_eval_max_posterior(post_model, test_loader, prm)

    sampler = SampleTest(env_name,
                         env_kwargs,
                         batch_size=batch_size,
                         observation_space=observation_space,
                         action_space=action_space,
                         policy=post_policy,
                         baseline=baseline,
                         seed=args.seed,
                         task=task)
    # calculate empirical error for per task
    test_loss_per_task, test_avg_reward, test_last_reward, train_episodes = sampler.sample(
    )

    Data_post_Trajectory = train_episodes[0].observations.numpy()
    task = task[0]
    task = task['goal']
    plt.plot(Data_post_Trajectory[:, 0, 0], Data_post_Trajectory[:, 0, 1])
    plt.plot(task[0], task[1], 'g^')
    plt.savefig('Trajectories.pdf')
    plt.show()
    return test_loss_per_task, test_avg_reward, test_last_reward
def main(args, prior_policy=None, init_from_prior=True):

    # *******************************************************************
    # config log filename
    #    'r': read;  'w': write
    # *******************************************************************
    with open(args.config, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)

    if args.output_folder is not None:
        # 如果没有文件,则创建文件地址
        if not os.path.exists(args.output_folder):
            os.makedirs(args.output_folder)
        # 文件夹地址与文件名
        policy_filename = os.path.join(args.output_folder,
                                       'policy_2d_PAC_Bayes.th')
        config_filename = os.path.join(args.output_folder,
                                       'config_2d_PAC_Bayes.json')

        # with open(config_filename, 'w') as f:
        #     config.update(vars(args))
        #     json.dump(config, f, indent=2)

    if args.seed is not None:
        torch.manual_seed(args.seed)
        torch.cuda.manual_seed_all(args.seed)

    env = gym.make(config['env-name'], **config['env-kwargs'])
    # 待测试
    env.seed(args.seed)
    env.close()
    """
    ************************************************************
    新增加的参数:用于获取环境的动作观测空间大小,一便生成随机贝叶斯网络
    output_size = reduce(mul, env.action_space.shape, 1)
    input_size = reduce(mul, env.observation_space.shape, 1)
    ************************************************************
    """
    observation_space = env.observation_space
    action_space = env.action_space
    args.output_size = reduce(mul, env.action_space.shape, 1)
    args.input_size = reduce(mul, env.observation_space.shape, 1)
    """
    ************************************************************
    新增加的模型:随机网络
    device = ('cuda' if (torch.cuda.is_available()
                   and args.use_cuda) else 'cpu')
    log_var_init = {'mean': -10, 'std': 0.1}
    ************************************************************
    """
    if prior_policy and init_from_prior:
        # init from prior model:
        # deepcopy函数:复制并作为一个单独的个体存在;copy函数:复制原有对象,随着原有对象改变而改变
        prior_policy = deepcopy(prior_policy).to(args.device)
    else:
        # 否则直接加载新模型
        prior_policy = get_policy_for_env(args.device,
                                          args.log_var_init,
                                          env,
                                          hidden_sizes=config['hidden-sizes'])

    # 数据无需拷贝,即可使用
    # prior_policy.share_memory()
    """
    ************************************************************
    策略 prior model 与 post model 以及对应的参数 param
        prior_policy  posteriors_policies
        prior_params  all_post_param
        all_params
    ************************************************************
    """
    num_tasks = config['meta-batch-size']
    batch_size = config['fast-batch-size']

    # Unpack parameters:
    # 提取参数 优化方法 优化参数 学习率等
    optim_func, optim_args, lr_schedule =\
        args.optim_func, args.optim_args, args.lr_schedule

    posteriors_policies = [
        get_policy_for_env(args.device,
                           args.log_var_init,
                           env,
                           hidden_sizes=config['hidden-sizes'])
        for _ in range(num_tasks)
    ]
    all_post_param = sum([
        list(posterior_policy.parameters())
        for posterior_policy in posteriors_policies
    ], [])

    # Create optimizer for all parameters (posteriors + prior)
    # 对所有参数 包括 prior 以及 posterior 创建优化器
    prior_params = list(prior_policy.parameters())
    all_params = all_post_param + prior_params
    all_optimizer = optim_func(all_params, **optim_args)
    """生成固定的 tasks
        随机数问题尚未解决,可重复性不行
    """
    # Baseline
    baseline = LinearFeatureBaseline(get_input_size(env))

    # 生成 'meta-batch-size' 任务
    # for task in enumerate(tasks):
    tasks = env.unwrapped.sample_tasks(num_tasks)

    # meta-batch-size:Number of tasks in each batch of tasks
    # 一个batch中任务的个数,此处使用 PAC-Bayes方法,因此任务类型以及数量是固定
    # 也即在2D导航任务中,目标值固定,每次采用不同轨迹进行训练
    # tasks = sampler.sample_tasks(num_tasks=config['meta-batch-size'])

    avg_empiric_loss_per_task = torch.zeros(num_tasks, device=args.device)
    avg_reward_per_task = torch.zeros(num_tasks, device=args.device)
    complexity_per_task = torch.zeros(num_tasks, device=args.device)
    # 此参数针对不同任务有不同的训练数量的情况
    n_samples_per_task = torch.zeros(num_tasks, device=args.device)

    Info_avg_reward = []
    Info_total_objective = []
    Info_last_reward = []
    Info_train_trajectories = []

    # 训练的次数 num-batches 个 batch
    for batch in range(config['num-batches']):
        print(batch)

        # params_show_train = prior_policy.state_dict()

        # Hyper-prior term:
        # 计算超先验与超后验的散度
        hyper_dvrg = get_hyper_divergnce(kappa_prior=args.kappa_prior,
                                         kappa_post=args.kappa_post,
                                         divergence_type=args.divergence_type,
                                         device=args.device,
                                         prior_model=prior_policy)
        # 根据 超散度 hyper_dvrg 计算对应的 meta项  传参方式也可以直接安顺序传递
        meta_complex_term = get_meta_complexity_term(
            hyper_kl=hyper_dvrg,
            delta=args.delta,
            complexity_type=args.complexity_type,
            n_train_tasks=num_tasks)

        for i_task in range(num_tasks):
            sampler = SampleTest(config['env-name'],
                                 env_kwargs=config['env-kwargs'],
                                 batch_size=batch_size,
                                 observation_space=observation_space,
                                 action_space=action_space,
                                 policy=posteriors_policies[i_task],
                                 baseline=baseline,
                                 seed=args.seed,
                                 prior_policy=prior_policy,
                                 task=tasks[i_task])
            # calculate empirical error for per task
            loss_per_task, avg_reward, last_reward, train_episodes = sampler.sample(
            )

            complexity = get_task_complexity(
                delta=args.delta,
                complexity_type=args.complexity_type,
                device=args.device,
                divergence_type=args.divergence_type,
                kappa_post=args.kappa_post,
                prior_model=prior_policy,
                post_model=posteriors_policies[i_task],
                n_samples=batch_size,
                avg_empiric_loss=loss_per_task,
                hyper_dvrg=hyper_dvrg,
                n_train_tasks=num_tasks,
                noised_prior=True)

            avg_empiric_loss_per_task[i_task] = loss_per_task
            avg_reward_per_task[i_task] = avg_reward
            complexity_per_task[i_task] = complexity
            n_samples_per_task[i_task] = batch_size

        # Approximated total objective:
        if args.complexity_type == 'Variational_Bayes':
            # note that avg_empiric_loss_per_task is estimated by an average over batch samples,
            #  but its weight in the objective should be considered by how many samples there are total in the task
            total_objective = \
                (avg_empiric_loss_per_task * n_samples_per_task + complexity_per_task).mean() * num_tasks \
                + meta_complex_term
            # total_objective = ( avg_empiric_loss_per_task * n_samples_per_task
            # + complexity_per_task).mean() + meta_complex_term

        else:
            total_objective = \
                avg_empiric_loss_per_task.mean() + complexity_per_task.mean() + meta_complex_term

        # Take gradient step with the shared prior and all tasks' posteriors:
        grad_step(total_objective, all_optimizer, lr_schedule, args.lr)

        Info_avg_reward.append(avg_reward_per_task.mean())
        Info_total_objective.append(total_objective)
        Info_last_reward.append(last_reward)

    # *******************************************************************
    # Save policy
    # *******************************************************************
    # 将模型参数保存至 policy_filename 中的 python.th
    if args.output_folder is not None:
        with open(policy_filename, 'wb') as f:
            # 保存网络中的参数,f 为路径
            torch.save(prior_policy.state_dict(), f)

    # *******************************************************************
    # Test
    # learned policy   : prior_policy
    # saved parameters : 'policy_2d_PAC_Bayes.th'
    # *******************************************************************
    env_name = config['env-name'],
    env_kwargs = config['env-kwargs']
    test_num = 10

    Info_test_loss = []
    Info_test_avg_reward = []
    Info_test_last_reward = []

    for test_batch in range(test_num):
        # 生成新任务,训练并进行验证误差
        test_task = env.unwrapped.sample_tasks(1)
        post_policy = get_policy_for_env(args.device,
                                         args.log_var_init,
                                         env,
                                         hidden_sizes=config['hidden-sizes'])
        post_policy.load_state_dict(prior_policy.state_dict())

        # based on the prior_policy, train post_policy; then test learned post_policy
        test_loss_per_task, test_avg_reward, test_last_reward = run_test(
            task=test_task,
            prior_policy=prior_policy,
            post_policy=post_policy,
            baseline=baseline,
            args=args,
            env_name=env_name,
            env_kwargs=env_kwargs,
            batch_size=batch_size,
            observation_space=observation_space,
            action_space=action_space,
            n_train_tasks=num_tasks)

        Info_test_loss.append(test_loss_per_task)
        Info_test_avg_reward.append(test_avg_reward)
        Info_test_last_reward.append(test_last_reward)
Пример #6
0
    def run_train_epoch(i_epoch):
        log_interval = 500

        post_model.train()

        for batch_idx, batch_data in enumerate(train_loader):

            # get batch data:
            inputs, targets = data_gen.get_batch_vars(batch_data, prm)
            batch_size = inputs.shape[0]

            correct_count = 0
            sample_count = 0

            # Monte-Carlo iterations:
            n_MC = prm.n_MC
            avg_empiric_loss = 0
            complexity_term = 0

            for i_MC in range(n_MC):

                # Calculate empirical loss:
                outputs = post_model(inputs)
                avg_empiric_loss_curr = (1 / batch_size) * loss_criterion(
                    outputs, targets)

                # complexity_curr = get_task_complexity(prm, prior_model, post_model,
                #                                            n_train_samples, avg_empiric_loss_curr)

                avg_empiric_loss += (1 / n_MC) * avg_empiric_loss_curr
                # complexity_term += (1 / n_MC) * complexity_curr

                correct_count += count_correct(outputs, targets)
                sample_count += inputs.size(0)
            # end monte-carlo loop

            complexity_term = get_task_complexity(prm, prior_model, post_model,
                                                  n_train_samples,
                                                  avg_empiric_loss)

            # Approximated total objective (for current batch):
            if prm.complexity_type == 'Variational_Bayes':
                # note that avg_empiric_loss_per_task is estimated by an average over batch samples,
                #  but its weight in the objective should be considered by how many samples there are total in the task
                total_objective = avg_empiric_loss * (
                    n_train_samples) + complexity_term
            else:
                total_objective = avg_empiric_loss + complexity_term

            # Take gradient step with the posterior:
            grad_step(total_objective, optimizer, lr_schedule, prm.lr, i_epoch)

            # Print status:
            if batch_idx % log_interval == 0:
                batch_acc = correct_count / sample_count
                print(
                    cmn.status_string(i_epoch, prm.n_meta_test_epochs,
                                      batch_idx, n_batches, batch_acc,
                                      total_objective.item()) +
                    ' Empiric Loss: {:.4}\t Intra-Comp. {:.4}'.format(
                        avg_empiric_loss.item(), complexity_term.item()))