test_dataset = MultiGPData(mean, kernel, num_samples=args.batch_size, amplitude_range=x_range, num_points=200)
test_data_loader = DataLoader(test_dataset, batch_size=1, shuffle=True)

for data_init in data_loader:
    break
x_init, y_init = data_init
x_init, y_init, _, _ = context_target_split(x_init[0:1], y_init[0:1], args.num_context, args.num_target)
print('dataset created', x_init.size())

# create model
likelihood = gpytorch.likelihoods.GaussianLikelihood().to(device)
model_dkl = GPRegressionModel(x_init, y_init.squeeze(0).squeeze(-1), likelihood,
                          args.h_dim_dkl, args.z_dim_dkl, name_id='DKL').to(device)
if anp:
    model_np = AttentiveNeuralProcess(args.x_dim, args.y_dim, args.r_dim_np, args.z_dim_np, args.h_dim_np, args.a_dim_np,
                                      use_self_att=True, fixed_sigma=None).to(device)
else:
    model_np = NeuralProcess(args.x_dim, args.y_dim, args.r_dim_np, args.z_dim_np, args.h_dim_np, fixed_sigma=None).to(device)

optimizer_dkl = torch.optim.Adam([
    {'params': model_dkl.feature_extractor.parameters()},
    {'params': model_dkl.covar_module.parameters()},
    {'params': model_dkl.mean_module.parameters()},
    {'params': model_dkl.likelihood.parameters()}], lr=0.01)
trainer_dkl = DKMTrainer(device, model_dkl, optimizer_dkl, args, print_freq=args.print_freq)

optimizer_np = torch.optim.Adam(model_np.parameters(), lr=learning_rate)
np_trainer = NeuralProcessTrainer(device, model_np, optimizer_np,
                                  num_context_range=(args.num_context,args.num_context),
                                  num_extra_target_range=(args.num_target,args.num_target),
                                  print_freq=args.print_freq)
Пример #2
0
def test():
    def plot_weights_and_policy(policy, id):
        '''
        Plots the weights in the different layers.
        '''

        named_parameters = policy.named_parameters()
        fig = plt.figure(figsize=(16, 10))
        #fig.suptitle(id , fontsize=18)
        fig.tight_layout()

        ax_w = fig.add_subplot(211)
        ax_w.set_title(id + " Weights initialization", fontsize=18)

        for n, p in named_parameters:
            color = '#%06X' % randint(0, 0xFFFFFF)
            if 'weight' in n and 'layer_norm' not in n:
                try:
                    weights = p.mean(dim=1)
                except:
                    print(n)
                ax_w.plot(np.arange(len(weights)), weights.detach().numpy(),
                          alpha=0.5,  color=color, label=n.replace('.weight', ''))
        ax_w.legend(loc="upper right")
        ax_w.set_xlabel("Weights")
        ax_w.set_ylabel("initialization")
        ax_w.set_ylim(-2, 2)

        ax_policy = fig.add_subplot(212)
        max_std = 0
        min_std = 0
        for z_sample in z_samples:
            mu, sigma = policy.xz_to_y(x, z_sample)
            ax_policy.plot(x[0, :, 0].numpy(), mu[0, :, 0].detach().numpy(), color='b')
            std_h = mu + sigma
            max_std = max(max_std, std_h.max().detach())
            std_l = mu - sigma
            min_std = min(min_std, std_l.min().detach())
            ax_policy.fill_between(x[0, :, 0].detach(), std_l[0, :, 0].detach(), std_h[0, :, 0].detach(), alpha=0.01, color='b')

        ax_policy.set_xlabel('x')
        ax_policy.set_ylabel('y')
        ax_policy.set_ylim(min(min_std, -1), max(max_std, 1))
        ax_policy.set_title('Policies sampled with z ~ N(0,1)')
        plt.grid(True)
        plt.show()
        fig.savefig('/home/francesco/PycharmProjects/MasterThesis/plots/NP&ANP/1D/weights/'+id+'64')

    z_dim = 128
    dims = 128
    num_points = 100

    x = torch.linspace(-1, 1, num_points).unsqueeze(1).unsqueeze(0)

    z_samples =[]
    for n in range(16):
        z_samples.append(torch.randn((1, z_dim*2)).unsqueeze(1).repeat(1, num_points, 1))
    neural_process = AttentiveNeuralProcess(1, 1, dims, z_dim, dims, z_dim)
    plot_weights_and_policy(neural_process, ' default')
    for init_func in [InitFunc.init_xavier, InitFunc.init_normal, InitFunc.init_zero, InitFunc.init_kaiming, InitFunc.init_sparse]:  #
        init_policy = neural_process.apply(init_func)
        plot_weights_and_policy(init_policy, init_func.__name__)
Пример #3
0
    color = 'b'
    if use_attention:
        mdl = 'cross attention ' + mdl
        color = 'r'
        if use_self_att:
            mdl = 'self & ' + mdl
            color = 'g'

    id = mdl + time.ctime() + '{}e_{}b_{}c{}t_{}lr_{}r_{}z_{}a'.format(
        epochs, batch_size, num_context, num_target, l, r_dim, z_dim, a_dim)
    # create and train np
    if use_attention:
        neuralprocess = AttentiveNeuralProcess(
            x_dim,
            y_dim,
            r_dim,
            z_dim,
            h_dim,
            a_dim,
            use_self_att=use_self_att).to(device)
        first = False
    else:
        neuralprocess = NeuralProcess(x_dim, y_dim, r_dim, z_dim,
                                      h_dim).to(device)

    t0 = time.time()
    optimizer = torch.optim.Adam(neuralprocess.parameters(), lr=learning_rate)
    np_trainer = NeuralProcessTrainer(device,
                                      neuralprocess,
                                      optimizer,
                                      num_context_range=num_context,
                                      num_extra_target_range=num_target,
Пример #4
0
    fig.savefig(parent_dir+folder_name+name, dpi=250)
    plt.close(fig)


def sample_context(x, y, num_context=100):
    num_points = x.shape[1]
    # Sample locations of context and target points
    locations = np.random.choice(num_points,
                                 size=num_context,
                                 replace=False)
    x_context = x[:, locations[:num_context], :]
    y_context = y[:, locations[:num_context], :]
    return x_context, y_context

if use_attention:
    neuralprocess = AttentiveNeuralProcess(args.x_dim, args.y_dim, args.r_dim, args.z_dim,
                                            args.h_dim, args.a_dim, use_self_att=True).to(device)
else:
    neuralprocess = NeuralProcess(args.x_dim, args.y_dim, args.r_dim, args.z_dim, args.h_dim).to(device)

optimizer = torch.optim.Adam(neuralprocess.parameters(), lr=3e-4)
np_trainer = NeuralProcessTrainerRL(device, neuralprocess, optimizer,
                                    num_context_range=(400, 500),
                                    num_extra_target_range= (400, 500),
                                    print_freq=2)


def get_dataset(i_iter):
    file_name = memory_dir + str(i_iter) + '^iter_' + env_name

    with open(file_name, 'rb') as file_m:
        memory_iter = pickle.load(file_m)  # memory_iter.memory to access list of transitions
    )  # running list of states that allows to access precise mean and std
else:
    running_state = None
# running_reward = ZFilter((1,), demean=False, clip=10)
"""seeding"""
np.random.seed(args.seed)
torch.manual_seed(args.seed)
env.seed(args.seed)
max_episode_len = env._max_episode_steps
'''create neural process'''
ep_frq = 50
if args.use_attentive_np:
    policy_np = AttentiveNeuralProcess(state_dim,
                                       action_dim,
                                       args.r_dim,
                                       args.z_dim,
                                       args.h_dim,
                                       args.a_dim,
                                       use_self_att=False).to(device_np)
else:
    policy_np = NeuralProcess(state_dim, action_dim, args.r_dim, args.z_dim,
                              args.h_dim).to(device_np)
optimizer = torch.optim.Adam(policy_np.parameters(), lr=3e-4)
np_trainer = NeuralProcessTrainerRL(device_np,
                                    policy_np,
                                    optimizer, (1, max_episode_len // 2),
                                    print_freq=ep_frq)
'''create MKI model'''

mi_model = MeanInterpolator(state_dim,
                            args.h_mi_dim,
    print('Use NP anyways')

policy_np = NeuralProcessAblated(state_dim, action_dim, args.r_dim,
                                 args.h_dim).to(args.device_np)

optimizer = torch.optim.Adam(policy_np.parameters(), lr=3e-4)
np_trainer = TrainerAblated(args.device_np,
                            policy_np,
                            optimizer,
                            print_freq=50)

if args.v_use_attentive_np:
    value_np = AttentiveNeuralProcess(state_dim,
                                      1,
                                      args.v_r_dim,
                                      args.v_z_dim,
                                      args.v_h_dim,
                                      args.z_dim,
                                      use_self_att=False).to(args.device_np)
else:
    value_np = NeuralProcess(state_dim, 1, args.v_r_dim, args.v_z_dim,
                             args.v_h_dim).to(args.device_np)
value_optimizer = torch.optim.Adam(value_np.parameters(), lr=3e-4)
value_np_trainer = NeuralProcessTrainerRL(
    args.device_np,
    value_np,
    value_optimizer,
    num_context_range=(num_context_points, num_context_points),
    num_extra_target_range=(args.num_testing_points, args.num_testing_points),
    print_freq=50)
"""create replay memory"""