Exemplo n.º 1
0
si_mrp = SimpleInventoryMRPFinite(capacity=user_capacity,
                                  poisson_lambda=user_poisson_lambda,
                                  holding_cost=user_holding_cost,
                                  stockout_cost=user_stockout_cost)

print("-----MRP Value Function-----:\n")
print(si_mrp.get_value_function_vec(user_gamma))

# create Iterable[TransitionStep[S]]
non_terminal_states = si_mrp.non_terminal_states
start_distribution = {
    state: 1 / len(non_terminal_states)
    for state in non_terminal_states
}
transitions = si_mrp.simulate_reward(Categorical(start_distribution))


def count_to_weight_func(n: int):
    return 1 / n


print("-----TD Value Function-----:\n")
td_pred = td_prediction(transitions, count_to_weight_func, user_gamma)
print(td_pred.evaluate(non_terminal_states))

print("-----MC Value Function-----:\n")
mc_pred = mc_prediction(transitions, count_to_weight_func, user_gamma)
print(mc_pred.evaluate(non_terminal_states))

print("-----TD NBOOTSTRAP Value Function-----:\n")
Exemplo n.º 2
0
    si_mrp = SimpleInventoryMRPFinite(
        capacity=user_capacity,
        poisson_lambda=user_poisson_lambda,
        holding_cost=user_holding_cost,
        stockout_cost=user_stockout_cost
    )
    print("Value Function")
    print("--------------")
    si_mrp.display_value_function(gamma=user_gamma)
    print()
    
    states:List[InventoryState] = si_mrp.non_terminal_states
    start_state_distrib: Categorical[InventoryState] = Categorical({i:1 for i in states})
    simulation_episodes = si_mrp.reward_traces(start_state_distrib)
    simulation_transitions = si_mrp.simulate_reward(start_state_distrib)
    approx_0 = Tabular({i : 0 for i in states})
    value_mc = mc_prediction_scratch(
                traces = simulation_episodes,
                states = states,
                γ = user_gamma,
                tolerance = 1e-6,
                num_episodes = 10000
        )
    print("Value Function with our implementation of MC")
    print(value_mc)
    
    value_mc_other = mc_prediction(
                traces = simulation_episodes,
                approx_0 = approx_0,
                γ = user_gamma