def solve_forest_example(forest_states_size=50, r1=50, r2=25, fire_prob=0.1, num_simulations=50, discount=0.9): P, R = forest(S=forest_states_size, r1=r1, r2=r2, p=fire_prob) vi = solve_mdp.test_algorithm(ValueIteration, P, R, discount=discount, num_sim=num_simulations) pi = solve_mdp.test_algorithm(PolicyIteration, P, R, discount=discount, num_sim=num_simulations) df = pd.concat([vi, pi]) return df
def solve_ctr_mdp(transitions, rewards, num_simulations=1000, discount=0.99): P, R = transitions, rewards vi = solve_mdp.test_algorithm(ValueIteration, P, R, discount=discount, num_sim=num_simulations) pi = solve_mdp.test_algorithm(PolicyIteration, P, R, discount=discount, num_sim=num_simulations) df = pd.concat([vi, pi]) return df