示例#1
0
def plot_distribution_at_time_all_processes(
    process1_traces: np.ndarray,
    process2_traces: np.ndarray,
    process3_traces: np.ndarray,
) -> None:

    from rl.gen_utils.plot_funcs import plot_list_of_curves

    num_traces = len(process1_traces)
    time_steps = len(process1_traces[0]) - 1

    x1, y1 = get_terminal_histogram(process1_traces)
    x2, y2 = get_terminal_histogram(process2_traces)
    x3, y3 = get_terminal_histogram(process3_traces)

    plot_list_of_curves(
        [x1, x2, x3],
        [y1, y2, y3],
        ["r", "b", "g"],
        [
            r"Process 1 ($\alpha_1=0.25$)",
            r"Process 2 ($\alpha_2=0.75$)",
            r"Process 3 ($\alpha_3=1.0$)",
        ],
        "Terminal Stock Price",
        "Counts",
        f"Terminal Price Counts (T={time_steps:d}, Traces={num_traces:d})",
    )
示例#2
0
def plot_single_trace_all_processes(process1_trace: np.ndarray,
                                    process2_trace: np.ndarray,
                                    process3_trace: np.ndarray) -> None:

    from rl.gen_utils.plot_funcs import plot_list_of_curves

    traces_len = len(process1_trace)

    plot_list_of_curves(
        [range(traces_len)] * 3,
        [process1_trace, process2_trace, process3_trace], ["r", "b", "g"], [
            r"Process 1 ($\alpha_1=0.25$)", r"Process 2 ($\alpha_2=0.75$)",
            r"Process 3 ($\alpha_3=1.0$)"
        ], "Time Steps", "Stock Price",
        "Single-Trace Simulation for Each Process")
示例#3
0
    print("Linear Model SGD")
    print("----------------")
    linear_model_rmse_seq: List[float] = []
    for lfa in islice(get_linear_model().iterate_updates(training_data_gen),
                      training_iterations):
        this_rmse: float = lfa.rmse(test_data)
        linear_model_rmse_seq.append(this_rmse)
        iter: int = len(linear_model_rmse_seq)
        print(f"Iteration {iter:d}: RMSE = {this_rmse:.3f}")

    print("DNN Model SGD")
    print("-------------")
    dnn_model_rmse_seq: List[float] = []
    for dfa in islice(get_dnn_model().iterate_updates(training_data_gen),
                      training_iterations):
        this_rmse: float = dfa.rmse(test_data)
        dnn_model_rmse_seq.append(this_rmse)
        iter: int = len(dnn_model_rmse_seq)
        print(f"Iteration {iter:d}: RMSE = {this_rmse:.3f}")

    x_vals = range(training_iterations)
    plot_list_of_curves(
        list_of_x_vals=[x_vals, x_vals],
        list_of_y_vals=[linear_model_rmse_seq, dnn_model_rmse_seq],
        list_of_colors=["b", "r"],
        list_of_curve_labels=["Linear Model", "Deep Neural Network Model"],
        x_label="Iterations of Gradient Descent",
        y_label="Root Mean Square Error",
        title="RMSE across Iterations of Gradient Descent")
示例#4
0
def get_unit_sigmoid_func(alpha: float) -> Callable[[float], float]:
    return lambda x, alpha=alpha: 1. / (1 + (1 / np.where(x == 0, VSML, x) - 1)
                                        **alpha)


if __name__ == '__main__':
    from rl.gen_utils.plot_funcs import plot_list_of_curves
    alpha = [2.0, 1.0, 0.5]
    colors = ["r", "b", "g"]
    labels = [(r"$\alpha$ = %.1f" % a) for a in alpha]
    logistics = [get_logistic_func(a) for a in alpha]
    x_vals = np.arange(-3.0, 3.01, 0.01)
    y_vals = [f(x_vals) for f in logistics]
    plot_list_of_curves([x_vals] * len(logistics),
                        y_vals,
                        colors,
                        labels,
                        title="Logistic Functions")

    alpha = [2.0, 1.0, 0.5]
    colors = ["r", "b", "g"]
    labels = [(r"$\alpha$ = %.1f" % a) for a in alpha]
    unit_sigmoids = [get_unit_sigmoid_func(a) for a in alpha]
    x_vals = np.arange(0.0, 1.01, 0.01)
    y_vals = [f(x_vals) for f in unit_sigmoids]
    plot_list_of_curves([x_vals] * len(logistics),
                        y_vals,
                        colors,
                        labels,
                        title="Unit-Sigmoid Functions")
示例#5
0
    opt_ex_bin_tree: OptimalExerciseBinTree = OptimalExerciseBinTree(
        spot_price=spot_price_val,
        payoff=opt_payoff,
        expiry=expiry_val,
        rate=rate_val,
        vol=vol_val,
        num_steps=num_steps_val,
    )

    vf_seq, policy_seq = zip(*opt_ex_bin_tree.get_opt_vf_and_policy())
    ex_boundary: Sequence[Tuple[
        float, float]] = opt_ex_bin_tree.option_exercise_boundary(
            policy_seq, is_call)
    time_pts, ex_bound_pts = zip(*ex_boundary)
    label = ("Call" if is_call else "Put") + " Option Exercise Boundary"
    plot_list_of_curves(
        list_of_x_vals=[time_pts],
        list_of_y_vals=[ex_bound_pts],
        list_of_colors=["b"],
        list_of_curve_labels=[label],
        x_label="Time",
        y_label="Underlying Price",
        title=label,
    )

    european: float = opt_ex_bin_tree.european_price(is_call, strike)
    print(f"European Price = {european:.3f}")

    am_price: float = vf_seq[0][0]
    print(f"American Price = {am_price:.3f}")
    bin_tree_ex_boundary: Sequence[Tuple[float, float]] = \
        opt_ex_bin_tree.option_exercise_boundary(policy_seq, False)
    bin_tree_x, bin_tree_y = zip(*bin_tree_ex_boundary)

    lspi_x, lspi_y = put_option_exercise_boundary(func=flspi,
                                                  expiry=expiry_val,
                                                  num_steps=num_steps_lspi,
                                                  strike=strike_val)
    dql_x, dql_y = put_option_exercise_boundary(func=fdql,
                                                expiry=expiry_val,
                                                num_steps=num_steps_dql,
                                                strike=strike_val)
    plot_list_of_curves(list_of_x_vals=[lspi_x, dql_x, bin_tree_x],
                        list_of_y_vals=[lspi_y, dql_y, bin_tree_y],
                        list_of_colors=["b", "r", "g"],
                        list_of_curve_labels=["LSPI", "DQL", "Binary Tree"],
                        x_label="Time",
                        y_label="Underlying Price",
                        title="LSPI, DQL, Binary Tree Exercise Boundaries")

    scoring_data: np.ndarray = scoring_sim_data(expiry=expiry_val,
                                                num_steps=num_steps_scoring,
                                                num_paths=num_scoring_paths,
                                                spot_price=spot_price_val,
                                                rate=rate_val,
                                                vol=vol_val)

    print(f"European Put Price = {european_price:.3f}")
    print(f"Binary Tree Price = {bin_tree_price:.3f}")

    lspi_opt_price: float = option_price(
示例#7
0
    return([x for x, _ in pairs], [y for _, y in pairs])

if __name__ == '__main__':

    game_size = 100
    snakes_ladders_map = {3:39, 7:48, 12:51, 20:41, 25:57, 28:35, 31:6, 38:1, 45:74, 49:8, 53:17, 60:85, 67:90, 69:92, 70:34, 76:37, 77:83, 82:63, 88:50, 94:42, 98:54}
    sl_mp = SnackLaddersMPFinite(snakes_ladders_map, game_size)

    #print(sl_mp)

    #Plot some traces
    start_d = Categorical({PlayerState(1):1})
    process_traces=sl_mp.traces(start_d)

    list_traces = []

    for i in range(5) :
        trace_points = generate_trace(process_traces)
        list_traces.append(trace_points)
    x_values = []
    plot_list_of_curves([range(1, len(trace)+1) for trace in list_traces], list_traces, ['b', 'g', 'r', 'y', 'black'], \
    [f'Trace {i}' for i in range(len(list_traces))], "Number of time steps", "Square on the board", "Snakes and Ladders game traces")

    #Generate number of time steps distribution :
    hist = generate_time_steps_distribution(process_traces, (10000))
    plt.bar(hist[0], hist[1], width=1)
    plt.xlabel("Number of steps")
    plt.ylabel("Counts")
    plt.title("Number of time steps; Traces = 10000 ")
    plt.show()
示例#8
0
#     plot_list_of_curves(
#         [x, x, x, x, x],
#         [y0, y1, y2, y3, y4],
#         ["r", "b", "g", "k", "y"],
#         ["True", "REINFORCE", "Actor-Critic", "Actor-Critic with Advantage",
#          "Actor-Critic with TD Error"],
#         "Iteration",
#         "Action",
#         "Action for Initial Wealth at Time 0"
#     )

    plot_list_of_curves(
        [x, x, x, x],
        [y0, y1, y2, y4],
        ["r", "b", "g", "k", "y"],
        ["True", "REINFORCE", "Actor-Critic", "Actor-Critic with TD Error"],
        "Iteration",
        "Action",
        "Action for Initial Wealth at Time 0"
    )

    print("Policy Gradient Solution")
    print("------------------------")
    print()

    opt_policies: Sequence[FunctionApprox[NonTerminal[AssetAllocState]]] = \
        list(itertools.islice(actor_critic_error_policies, 10000 * steps))
    for t in range(steps):
        opt_alloc: float = np.mean([p(NonTerminal((init_wealth, t)))
                                   for p in opt_policies])
        print(f"Time {t:d}: Optimal Risky Allocation = {opt_alloc:.3f}")
示例#9
0
            SnakesAndLadders(start=list_start_pos[i], end=list_end_pos[i])
        ]
    grid_size = 100
    #Below it corresponds to the execution of question 1/2

    game = SnakesAndLaddersGame(grid_size=grid_size, grid=grid)
    print("Transition Map")
    print("--------------")
    print(game)

    #We use the process_traces function to get access to a distribution of time steps to finish the game
    array_length = process_traces(10000, 10000, game)
    x, y = get_terminal_histogram(array_length)

    plot_list_of_curves([x], [y], ["r"], [r"Snakes and Ladders Game"],
                        "Time Steps to finish the game", "Counts",
                        "Distribution of the time steps to finish the game")

    #QUESTION 4
    game_reward = SnakesAndLaddersRewards(grid_size=grid_size, grid=grid)
    expected_steps = game_reward.get_value_function_vec(gamma=1)
    print(expected_steps)

    #QUESTION 5
    rewards = process1_reward_traces(start_price=100,
                                     level_param=100,
                                     alpha1=0.25,
                                     time_steps=100,
                                     num_traces=100)
    value_function = compute_value_function(start_price=100,
                                            level_param=100,
示例#10
0
            γ=gamma
        ),
        num_transitions
    ))
td_vf: np.ndarray = td_func.evaluate(nt_states)

num_polynomials: int = 5
features: Sequence[Callable[[NonTerminal[int]], float]] = \
    laguerre_state_features(num_polynomials)
lstd_transitions: Iterable[TransitionStep[int]] = \
    itertools.islice(transitions, num_transitions)
epsilon: float = 1e-4

lstd_func: LinearFunctionApprox[NonTerminal[int]] = \
    least_squares_td(
        transitions=lstd_transitions,
        feature_functions=features,
        γ=gamma,
        ε=epsilon
    )
lstd_vf: np.ndarray = lstd_func.evaluate(nt_states)

x_vals: Sequence[int] = [s.state for s in nt_states]

plot_list_of_curves([x_vals, x_vals, x_vals], [true_vf, td_vf, lstd_vf], [
    "b", "g", "r"
], ["True Value Function", "Tabular TD Value Function", "LSTD Value Function"],
                    x_label="States",
                    y_label="Value Function",
                    title="Tabular TD and LSTD versus True Value Function")
示例#11
0
    time_steps = [i * horizon / intervals for i in range(intervals)]

    optimal_consumption_rate: Sequence[float] = [
        mp.fractional_consumption_rate(i) for i in time_steps
    ]
    expected_portfolio_return: float = mp.portfolio_return()
    expected_wealth_growth: Sequence[float] = [
        mp.wealth_growth_rate(i) for i in time_steps
    ]

    plot_list_of_curves(
        [time_steps] * 3, [
            optimal_consumption_rate, expected_wealth_growth,
            [expected_portfolio_return] * intervals
        ], ["b", "g", "r"], [
            "Fractional Consumption Rate", "Expected Wealth Growth Rate",
            "Expected Portfolio Annual Return = %.1f%%" %
            (expected_portfolio_return * 100)
        ],
        x_label="Time in years",
        y_label="Annual Rate",
        title="Fractional Consumption and Expected Wealth Growth")

    extended_time_steps = time_steps + [horizon]
    expected_wealth: Sequence[float] = [
        mp.expected_wealth(i) for i in extended_time_steps
    ]

    plot_list_of_curves([extended_time_steps], [expected_wealth], ["b"],
                        ["Expected Wealth"],
                        x_label="Time in Years",
                        y_label="Wealth",
示例#12
0
    from rl.gen_utils.plot_funcs import plot_list_of_curves
    from rl.markov_process import NonTerminal

    villagers: int = 20
    vampire_mdp: VampireMDP = VampireMDP(villagers)
    true_vf, true_policy = vampire_mdp.vi_vf_and_policy()
    pprint(true_vf)
    print(true_policy)
    lspi_vf, lspi_policy = vampire_mdp.lspi_vf_and_policy()
    pprint(lspi_vf)
    print(lspi_policy)

    states = range(1, villagers + 1)
    true_vf_vals = [true_vf[NonTerminal(s)] for s in states]
    lspi_vf_vals = [lspi_vf[NonTerminal(s)] for s in states]
    true_policy_actions = [true_policy.action_for[s] for s in states]
    lspi_policy_actions = [lspi_policy.action_for[s] for s in states]

    plot_list_of_curves(
        [states, states], [true_vf_vals, lspi_vf_vals], ["r", "b"],
        ["True Optimal VF", "LSPI-Estimated Optimal VF"],
        x_label="States",
        y_label="Optimal Values",
        title="True Optimal VF versus LSPI-Estimated Optimal VF")
    plot_list_of_curves(
        [states, states], [true_policy_actions, lspi_policy_actions],
        ["r", "b"], ["True Optimal Policy", "LSPI-Estimated Optimal Policy"],
        x_label="States",
        y_label="Optimal Actions",
        title="True Optimal Policy versus LSPI-Estimated Optimal Policy")
示例#13
0
    decay_eg_cum_regret = decay_eg.get_expected_cum_regret(mu_star)

    eg = EpsilonGreedy(arm_distributions=arm_distrs,
                       time_steps=steps,
                       num_episodes=episodes,
                       epsilon=eps,
                       epsilon_half_life=1e8,
                       count_init=ci,
                       mean_init=mi)
    eg_cum_regret = eg.get_expected_cum_regret(mu_star)

    greedy = EpsilonGreedy(arm_distributions=arm_distrs,
                           time_steps=steps,
                           num_episodes=episodes,
                           epsilon=0.0,
                           epsilon_half_life=1e8,
                           count_init=ci,
                           mean_init=mi)
    greedy_cum_regret = greedy.get_expected_cum_regret(mu_star)

    plot_list_of_curves(
        [range(1, steps + 1),
         range(1, steps + 1),
         range(1, steps + 1)],
        [greedy_cum_regret, eg_cum_regret, decay_eg_cum_regret],
        ["r", "b", "g"],
        ["Greedy", "$\epsilon$-Greedy", "Decaying $\epsilon$-Greedy"],
        x_label="Time Steps",
        y_label="Expected Total Regret",
        title="Total Regret")
示例#14
0
    plot_period: int = 200
    start: int = 50
    x_vals = [[
        i * plot_period for i in range(start, int(num_episodes / plot_period))
    ]] * 4
    y_vals = []
    for y in [y0, y1, y2, y4]:
        y_vals.append([
            np.mean(y[i * plot_period:(i + 1) * plot_period])
            for i in range(start, int(num_episodes / plot_period))
        ])
    print(x_vals)
    print(y_vals)

    plot_list_of_curves(
        x_vals, y_vals, ["k--", "r-x", "g-.", "b-"],
        ["True", "REINFORCE", "Actor-Critic", "Actor-Critic with TD Error"],
        "Iteration", "Action", "Action for Initial Wealth at Time 0")

    print("Policy Gradient Solution")
    print("------------------------")
    print()

    opt_policies: Sequence[FunctionApprox[NonTerminal[AssetAllocState]]] = \
        list(itertools.islice(actor_critic_error_policies, 10000 * steps))
    for t in range(steps):
        opt_alloc: float = np.mean(
            [p(NonTerminal((init_wealth, t))) for p in opt_policies])
        print(f"Time {t:d}: Optimal Risky Allocation = {opt_alloc:.3f}")
        print()
示例#15
0
文件: prob2.py 项目: lkourti/RL-book
def plot_finish_time_distr(finish_times: np.ndarray, num_traces: int):
    x_finish_time, y_counter = get_finish_time_histogram(finish_times)
    plot_list_of_curves([x_finish_time], [y_counter], ["b"],
                        [f"Finish Time Distribution (traces={num_traces:d})"],
                        "Finish Time", "Counts", r"Finish Time Distribution")