raise ValueError return ret def pf_as_policy_type(i: int) -> Callable[[int], Sequence[str]]: return get_sampling_func_from_prob_dict(policy_func(i)) this_qf = adp_pg_obj.get_act_value_func(pf_as_policy_type) this_vf = adp_pg_obj.get_value_func(pf_as_policy_type) print("Printing vf for a policy") print(this_vf(1)) print(this_vf(2)) print(this_vf(3)) print("Printing DP vf for a policy") from processes.policy import Policy true_vf_for_pol = mdp_ref_obj1.get_value_func_dict( Policy({s: policy_func(s) for s in {1, 2, 3}})) print(true_vf_for_pol) opt_det_polf = adp_pg_obj.get_optimal_det_policy_func() # noinspection PyShadowingNames def opt_polf(s: S, opt_det_polf=opt_det_polf) -> Mapping[A, float]: return {opt_det_polf(s): 1.0} print("Printing Opt Policy") print(opt_polf(1)) print(opt_polf(2)) print(opt_polf(3)) opt_vf = adp_pg_obj.get_value_func(adp_pg_obj.get_policy_as_policy_type())
# {s: policy_func(s) for s in {1, 2, 3}} # )) # print(true_vf_for_pol) # # # this_qf = adp_pg_obj.get_act_value_func_fa(policy_func) # this_vf = adp_pg_obj.get_value_func_fa(policy_func) # print("Printing vf for a policy") # print(this_vf(1)) # print(this_vf(2)) # print(this_vf(3)) tol_val = 1e-6 true_opt = mdp_ref_obj1.get_optimal_policy(tol=tol_val) print("Printing DP Opt Policy") print(true_opt) true_vf = mdp_ref_obj1.get_value_func_dict(true_opt) print("Printing DP Opt VF") print(true_vf) opt_det_polf = adp_pg_obj.get_optimal_det_policy_func() # noinspection PyShadowingNames def opt_polf(s: S, opt_det_polf=opt_det_polf) -> Mapping[A, float]: return {opt_det_polf(s): 1.0} print("Printing Opt Policy") print(opt_polf(1)) print(opt_polf(2)) print(opt_polf(3)) opt_vf = adp_pg_obj.get_value_func(adp_pg_obj.get_policy_as_policy_type())