Exemplo n.º 1
0
    baseline_mtl = {}
    task_order = np.random.permutation(num_tasks)
    for task_id in range(num_tasks):
        e[task_id] = e_unshuffled[task_order[task_id]]
        baseline_mtl[task_id] = MLPBaseline(e[task_id].spec,
                                            reg_coef=1e-3,
                                            batch_size=64,
                                            epochs=2,
                                            learn_rate=1e-3,
                                            use_gpu=True)

    policy_mtl = LinearPolicyLPGFTW(e[0].spec, k=1, max_k=5, seed=SEED)
    agent_mtl = NPGFTW(e,
                       policy_mtl,
                       baseline_mtl,
                       normalized_step_size=1,
                       seed=SEED,
                       save_logs=True,
                       new_col_mode='max_k')

    for task_id in range(num_tasks):
        ts = timer.time()
        train_agent(job_name=job_name_lpgftw_seed,
                    agent=agent_mtl,
                    seed=SEED,
                    niter=50,
                    gamma=0.995,
                    gae_lambda=0.97,
                    num_cpu=num_cpu,
                    sample_mode='trajectories',
                    num_traj=10,
Exemplo n.º 2
0
            'policy_0.pickle', 'rb')
        policy_mtl = pickle.load(f)
        f.close()
        f = open(
            job_name_lpgftw_seed + '/iterations/task_{}/'.format(t) +
            'baseline_0.pickle', 'rb')
        baseline_mtl[t] = pickle.load(f)
        f.close()

        if isinstance(policy_mtl.model.theta, list):
            policy_mtl.model.theta = torch.autograd.Variable(torch.zeros(0))

        agent_mtl = NPGFTW(e,
                           policy_mtl,
                           baseline_mtl,
                           normalized_step_size=0.1,
                           seed=SEED,
                           save_logs=False,
                           new_col_mode='performance')

        mean_test_perf = agent_mtl.test_tasks(test_rollouts=10,
                                              num_cpu=num_cpu,
                                              task_ids=np.array([t]))

        forward_transfer_results = {
            **forward_transfer_results,
            **mean_test_perf
        }

    result_file = open(job_name_lpgftw_seed + '/start_results.txt', 'w')
    result_file.write(str(forward_transfer_results))
Exemplo n.º 3
0
        agent_stl[task_id].hess = hess_stl[task_id]

    k = 5
    n = policy_stl[0].n
    m = policy_stl[0].m
    d = (n + 1) * m
    A = np.zeros((d * k, d * k))
    b = np.zeros((d * k, 1))
    S = np.zeros((k, num_tasks))
    L = np.zeros((d, k))
    Theta = np.zeros((d, num_tasks))
    policy_mtl = LinearPolicyLPGFTW(e[0].spec, k=k, max_k=k, seed=SEED)
    agent_mtl = NPGFTW(e,
                       policy_mtl,
                       baseline_stl,
                       normalized_step_size=1,
                       seed=SEED,
                       save_logs=True,
                       new_col_mode='max_k')

    lasso_solver = Lasso(alpha=1e-5, fit_intercept=False)
    forward_transfer_results = {}
    for task_id in range(num_tasks):

        theta = policy_stl[task_id].trainable_params
        theta = torch.cat((theta[0], torch.unsqueeze(theta[1], 1)), 1)
        theta = theta.reshape((-1, 1)).data.numpy(
        )  # order of reshape same as for hessian in npg_cg.py
        agent_mtl.theta[task_id] = theta
        Theta[:, task_id] = theta.squeeze()
        D = -agent_stl[task_id].hess