Пример #1
0
def year_pass(k, v):

    Q = utils.load_object(etr_path + v["policy"])
    task = v["task"]

    task.starting_day_index = 0
    task.reset()
    num_days = task.n_days

    if n_jobs == 1:
        outputs = [day_pass(k, v, d) for d in range(num_days)]
    elif n_jobs > 1:
        outputs = Parallel(n_jobs=n_jobs,
                           max_nbytes=None)(delayed(day_pass)(k, v, d)
                                            for d in range(num_days))

    days = []
    actions = np.zeros((num_days, len(task.prices[0])))
    rewards = np.zeros((num_days, len(task.prices[0])))
    state_value_list = []

    for (d, r, a, svl) in outputs:

        days.append(d)
        rewards[d, :] = r
        actions[d, :] = a

        state_value_list.extend(svl)

    print("Days:", len(days))
    print("Rewards sum:", np.sum(rewards))
    print("State values list length:", len(state_value_list))

    utils.save_object(state_value_list, save_dataset_path + k)
    utils.save_object([days, actions, rewards], save_actions_path + k)
Пример #2
0
                 lambda_=lambda_,
                 n_weights=n_weights,
                 train_freq=train_freq,
                 eval_freq=eval_freq,
                 random_episodes=random_episodes,
                 eval_states=eval_states,
                 mean_episodes=mean_episodes,
                 preprocess=rbf,
                 sigma_reg=sigma_reg,
                 cholesky_clip=cholesky_clip,
                 time_coherent=time_coherent,
                 n_source=n_source,
                 source_file=source_file,
                 seed=seed,
                 render=render,
                 verbose=verbose)


seeds = [
    9, 44, 404, 240, 259, 141, 371, 794, 41, 507, 819, 959, 829, 558, 638, 127,
    672, 4, 635, 687
]
seeds = seeds[:n_runs]
if n_jobs == 1:
    results = [run(mdp, seed) for (mdp, seed) in zip(mdps, seeds)]
elif n_jobs > 1:
    results = Parallel(n_jobs=n_jobs)(delayed(run)(mdp, seed)
                                      for (mdp, seed) in zip(mdps, seeds))

utils.save_object(results, file_name)
Пример #3
0
                     lambda_=lambda_,
                     n_weights=n_weights,
                     train_freq=train_freq,
                     eval_freq=eval_freq,
                     random_episodes=random_episodes,
                     eval_states=eval_states,
                     mean_episodes=mean_episodes,
                     preprocess=rbf,
                     sigma_reg=sigma_reg,
                     cholesky_clip=cholesky_clip,
                     time_coherent=time_coherent,
                     n_source=i,
                     source_file=source_file,
                     seed=seed,
                     render=render,
                     verbose=verbose)

    seeds = [
        9, 44, 404, 240, 259, 141, 371, 794, 41, 507, 819, 959, 829, 558, 638,
        127, 672, 4, 635, 687
    ]
    seeds = seeds[:n_runs]
    if n_jobs == 1:
        results = [run(mdp, seed) for (mdp, seed) in zip(mdps, seeds)]
    elif n_jobs > 1:
        results = Parallel(n_jobs=n_jobs)(delayed(run)(mdp, seed)
                                          for (mdp, seed) in zip(mdps, seeds))
    scores.append([i, results])

utils.save_object(scores, file_name)
Пример #4
0
def transfer(dataset_path, mdp, save_path, iterations, year, seed=0):

    np.random.seed(seed)

    data = utils.load_object(dataset_path)
    data = np.array(data)

    state_dim = mdp.state_dim
    n_actions = mdp.action_space.n
    mdp.starting_day_index = 0
    mdp.reset()
    day_length = len(mdp.prices[0])

    Q = MLPQFunction(state_dim, n_actions, layers=layers)
    Q.init_weights()

    m_t = 0
    v_t = 0
    t = 0

    utils.save_object([], save_path)

    losses = [[], [], []]

    for i in range(iterations):

        # sample time of day
        time = int(np.random.uniform(low=0, high=day_length))
        datapoints = np.arange(0, len(data) - day_length, day_length)
        datapoints += time
        datapoints = data[datapoints]
        np.random.shuffle(datapoints)
        datapoints = datapoints[:batch_size]

        for a in range(n_actions):
            with torch.autograd.set_detect_anomaly(True):
                train_loss, grad = compute_gradient_single_action(
                    Q, datapoints, a)

            losses[a].append(train_loss)

            print(
                "Y: {0}, I: {1:5d}, Time: {2:4d}, A: {3:1d}, Grad: {4:8.6f}, Train Loss: {5:8.6f}"
                .format(year, i, time, a, np.linalg.norm(grad), train_loss))

            Q._w, t, m_t, v_t = utils.adam(Q._w,
                                           grad,
                                           t,
                                           m_t,
                                           v_t,
                                           alpha=alpha)

        if save_freq > 0 and i % save_freq == 0:
            past_Qs = utils.load_object(save_path)
            past_Qs.append(np.array(Q._w))
            utils.save_object(past_Qs, save_path)
            plot_actions(dataset_path, Q._w, i, mdp, n_actions_plot,
                         path + "/plot-" + year + "-" + str(i))

    print(
        "Model selected index: {0:4d}, Train Loss: [{1:8.6f}, {2:8.6f}, {3:8.6f}]"
        .format(i, losses[0][i], losses[1][i], losses[2][i]))

    return [mdp.get_info(), np.array(Q._w), losses]
Пример #5
0
            Q._w, t, m_t, v_t = utils.adam(Q._w,
                                           grad,
                                           t,
                                           m_t,
                                           v_t,
                                           alpha=alpha)

        if save_freq > 0 and i % save_freq == 0:
            past_Qs = utils.load_object(save_path)
            past_Qs.append(np.array(Q._w))
            utils.save_object(past_Qs, save_path)
            plot_actions(dataset_path, Q._w, i, mdp, n_actions_plot,
                         path + "/plot-" + year + "-" + str(i))

    print(
        "Model selected index: {0:4d}, Train Loss: [{1:8.6f}, {2:8.6f}, {3:8.6f}]"
        .format(i, losses[0][i], losses[1][i], losses[2][i]))

    return [mdp.get_info(), np.array(Q._w), losses]


results = []

for k, v in dataset.items():
    print(k)
    results.append(
        [transfer(v['data'], v['mdp'], v['save_path'], iterations, k)])
    utils.save_object(results, sources_file_name)

utils.save_object(tasks, tasks_file_name)
Пример #6
0
            actions[di, task.current_timestep] = a - 1  # [0, 2] -> [-1, 1]
            rewards[di, task.current_timestep] = r

        print("{0:s} - Day: {1:4d}, Cumulative reward: {2:8.6f}".format(
            k, di, np.sum(rewards)))

    return [days, actions, rewards, state_value_list]


def make_Q(weights, task):

    # task params
    state_dim = task.state_dim
    action_dim = 1
    n_actions = task.action_space.n

    return MLPQFunction(state_dim,
                        n_actions,
                        layers=layers,
                        initial_params=weights)


for k, v in w_dict.items():
    print(k)
    Q = make_Q(v["weights"], v["task"])
    v["task"].starting_day_index = 0
    v["task"].reset()
    output = year_pass(Q, v["task"])
    print(len(output))
    utils.save_object(output, save_actions_path + k)
Пример #7
0
        s = task.reset()
        s = [s]

        print("Day index:", di)

        days.append(task.selected_day)

        done = False
        while not done:

            a = np.argmax(Q._q_values(s))
            s, r, done, _ = task.step(a)
            s = [s]

            actions[di, task.current_timestep] = a - 1  # [0, 2] -> [-1, 1]
            rewards[di, task.current_timestep] = r

        print("Cumulative reward:", np.sum(rewards))

    return [days, actions, rewards]


for k, v in etrs.items():
    print(k)
    Q = utils.load_object(etr_path + v["policy"])
    task = v["task"]
    task.starting_day_index = 0
    task.reset()
    output = year_pass(Q, task)
    utils.save_object(output, "visualize-actions/" + k)