def test_qlearning_discounted_reward(discount_factor_range=(0.1, 0.3, 0.5, 0.9, 0.99), num_sim=50): dfs = [] for factor in discount_factor_range: series = [] for n in range(10000, 10000 + num_sim): P, R = forest(S=50, p=0.0, r1=50, r2=25) mdp = solve_mdp.solve_mdp_by_qlearning(P, R, discount=factor, max_iter=n) series.append(mdp) df = pd.concat(series, axis=1).T dfs.append(df) return pd.concat(dfs)
def test_qlearning_algorithm( forest_states_size=50, fire_prob=0.01, r1=50, r2=25, discount=0.9, num_sim_range=(10000, 10050), verbose=False ): P, R = forest(S=forest_states_size, r1=r1, r2=r2, p=fire_prob) min_value, max_value = num_sim_range series = [] for n in range(min_value, max_value): s = solve_mdp.solve_mdp_by_qlearning(P, R, discount=discount, max_iter=n, verbose=verbose) series.append(s) df = pd.concat(series, axis=1) return df.T
def test_qlearning_deterministic(fireprob_range=(0.0, 0.1, 0.2, 0.5, 1.0), num_sim=50): dfs = [] for factor in fireprob_range: series = [] for n in range(10000, 10000 + num_sim): P, R = forest(S=50, p=factor, r1=50, r2=25) vi = solve_mdp.solve_mdp_by_qlearning(P, R, max_iter=n) vi = vi.append(pd.Series(factor, index=["fire_probability"])) series.append(vi) df = pd.concat(series, axis=1).T dfs.append(df) return pd.concat(dfs)
def test_qlearning_algorithm(transitions, rewards, discount=0.9, num_sim_range=(10000, 10050), verbose=False): P, R = transitions, rewards min_value, max_value = num_sim_range series = [] for n in range(min_value, max_value): s = solve_mdp.solve_mdp_by_qlearning(P, R, discount=discount, max_iter=n, verbose=verbose) series.append(s) df = pd.concat(series, axis=1) return df.T