def __init__(self): super().__init__() self._max_cars: int = 20 # problem statement = 20 self._environment_parameters = EnvironmentParameters( max_cars=self._max_cars, extra_rules=True, # change this for extra rules in book as per challenge ) self._comparison_settings = common.Settings( gamma=0.9, policy_parameters=common.PolicyParameters( policy_type=common.PolicyType.TABULAR_DETERMINISTIC, ), algorithm_parameters=common.AlgorithmParameters( theta=0.1 # accuracy of policy_evaluation ), display_every_step=True, ) self._graph3d_values = common.Graph3DValues( x_label="Cars at 1st location", y_label="Cars at 2nd location", z_label="V(s)", x_min=0, x_max=self._max_cars, y_min=0, y_max=self._max_cars, ) self._grid_view_parameters = common.GridViewParameters( grid_view_type=common.GridViewType.JACKS, show_result=True, show_policy=True, )
class Settings(common.Settings): gamma: float = 1.0 runs: int = 1 training_episodes: int = 500_000 episode_print_frequency: int = 10_000 policy_parameters: common.PolicyParameters = common.PolicyParameters( policy_type=common.PolicyType.TABULAR_DETERMINISTIC, )
class Settings(common.Settings): runs: int = 100 # runs_multiprocessing: common.ParallelContextType =common.ParallelContextType.FORK_GLOBAL training_episodes: int = 100 policy_parameters: common.PolicyParameters = common.PolicyParameters( policy_type=common.PolicyType.TABULAR_NONE) algorithm_parameters: common.AlgorithmParameters = AlgorithmParameters()
def __init__(self, environment: TabularEnvironment, policy_parameters: common.PolicyParameters): super().__init__(environment, policy_parameters) self.epsilon: float = self._policy_parameters.epsilon greedy_policy_parameters = common.PolicyParameters( policy_type=common.PolicyType.TABULAR_DETERMINISTIC, store_matrix=False, ) self.greedy_policy: Deterministic = Deterministic( self._environment, greedy_policy_parameters)
class Settings(common.Settings): gamma: float = 1.0 # 0.99999 policy_parameters: common.PolicyParameters = common.PolicyParameters( policy_type=common.PolicyType.TABULAR_DETERMINISTIC, ) display_every_step: bool = False