示例#1
0
    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.num_tilings = check_attribute_else_default(experiment_parameters, 'num_tilings', 32)
        self.tiling_length = check_attribute_else_default(experiment_parameters, 'tiling_length', 10)
        self.learning_rate = check_attribute_else_default(exp_parameters, 'learning_rate', 0.001)
        self.environment_name = check_attribute_else_default(experiment_parameters, 'env', 'mountain_car',
                                                             choices=['mountain_car', 'catcher'])
        self.verbose = experiment_parameters.verbose

        self.config = Config()
        self.config.store_summary = True
        self.summary = {}

        """ Parameters for the Environment """
        self.config.max_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['max_actions']
        self.config.norm_state = True

        """ Parameters for the Function Approximator """
        self.config.state_dims = ENVIRONMENT_DICTIONARY[self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.lr = self.learning_rate / self.num_tilings
        self.config.num_tilings = self.num_tilings
        self.config.tiling_length = self.tiling_length
        self.config.scaling_factor = 1/2
        self.config.scaling_offset = 1

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](config=self.config, summary=self.summary)
        self.fa = TileCoderFA(config=self.config)
        self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config,
                              summary=self.summary)
 def __init__(self, config, return_function):
     """ Parameters:
     Name:               Type:           Default:            Description: (Omitted when self-explanatory)
     buff_sz             int             10                  buffer size
     batch_sz            int             1
     env_state_dims      list            [2,2]               dimensions of the observations to be stored in the buffer
     obs_dtype           np.type         np.uint8            the data type of the observations
     """
     assert isinstance(config, Config)
     self.config = config
     self.buff_sz = check_attribute_else_default(self.config, 'buff_sz', 10)
     self.batch_sz = check_attribute_else_default(self.config, 'batch_sz',
                                                  1)
     self.env_state_dims = list(
         check_attribute_else_default(self.config, 'env_state_dims',
                                      [2, 2]))
     self.obs_dtype = check_attribute_else_default(self.config, 'obs_dtype',
                                                   np.uint8)
     """ Parameters for Return Function """
     assert isinstance(return_function, TDZeroReturnFunction)
     self.return_function = return_function
     """ Parameters to keep track of the current state of the buffer """
     self.current_index = 0
     self.full_buffer = False
     """ Circular Buffers """
     self.state = CircularBuffer(self.buff_sz,
                                 shape=tuple(self.env_state_dims),
                                 dtype=self.obs_dtype)
     self.reward = CircularBuffer(self.buff_sz, shape=(), dtype=np.int32)
     self.terminate = CircularBuffer(self.buff_sz, shape=(), dtype=np.bool)
     self.timeout = CircularBuffer(self.buff_sz, shape=(), dtype=np.bool)
     self.estimated_return = CircularBuffer(self.buff_sz,
                                            shape=(),
                                            dtype=np.float64)
     self.up_to_date = CircularBuffer(self.buff_sz, shape=(), dtype=np.bool)
    def __init__(self, config=None, summary=None):
        assert isinstance(config, Config)
        """ Parameters:
        Name:                       Type            Default:        Description(omitted when self-explanatory):
        max_actions                 int             1000            The max number of actions executed before forcing
                                                                    a time out
        save_summary                bool            False           Whether to save a summary of the environment
        """
        self.max_actions = check_attribute_else_default(config,
                                                        'max_actions',
                                                        default_value=1000)
        self.save_summary = check_attribute_else_default(config,
                                                         'save_summary',
                                                         default_value=False)
        self.summary = summary
        if self.save_summary:
            assert isinstance(self.summary, dict)
            check_dict_else_default(self.summary, "steps_per_episode", [])

        " Inner state of the environment "
        self.step_count = 0
        self.current_state = self.reset()
        self.actions = np.array(
            [0, 1, 2], dtype=int)  # 0 = backward, 1 = coast, 2 = forward
        self.high = np.array([0.5, 0.07], dtype=np.float32)
        self.low = np.array([-1.2, -0.07], dtype=np.float32)
        self.action_dictionary = {
            0: -1,  # accelerate backwards
            1: 0,  # coast
            2: 1
        }  # accelerate forwards
示例#4
0
    def __init__(self,
                 environment,
                 function_approximator,
                 behaviour_policy,
                 er_buffer,
                 config=None,
                 summary=None,
                 reshape=True):
        """
        Summary Name: return_per_episode
        """
        self.config = config or Config()
        assert isinstance(config, Config)
        """ 
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        save_summary            bool            False               save the summary of the agent (return per episode)
        er_start_size           int             0                   number of steps sampled before training starts
        er_init_steps_count     int             0                   number of initial steps taken so far
        fixed_tpolicy           bool            False               whether the policy is fixed (e.g., a function of
                                                                    the state) or changes over time 
                                                                    (e.g., epsilon-greedy or a function of the q-values)
        """
        self.save_summary = check_attribute_else_default(
            self.config, 'save_summary', False)
        self.er_start_size = check_attribute_else_default(
            self.config, 'er_start_size', 0)
        check_attribute_else_default(self.config, 'er_init_steps_count', 0)
        self.fixed_tpolicy = check_attribute_else_default(
            self.config, 'fixed_tpolicy', False)

        if self.save_summary:
            assert isinstance(summary, dict)
            self.summary = summary
            check_dict_else_default(self.summary, 'return_per_episode', [])

        " Other Parameters "
        # Behaviour
        self.bpolicy = behaviour_policy
        # Experience Replay Buffer
        self.er_buffer = er_buffer
        # Function Approximator: used to approximate the Q-Values
        self.fa = function_approximator
        # Environment that the agent is interacting with
        self.env = environment
        # Summaries
        self.cumulative_reward = 0
        # Whether to reshape the mountain car observations
        self.reshape = reshape
示例#5
0
 def __init__(self, config=None):
     """
     Parameters in config:
     Name:               Type:           Default:            Description: (Omitted when self-explanatory)
     num_actions         int             3                   Number of actions available to the agent
     epsilon             float           0.1                 Epsilon before annealing
     """
     self.config = config or Config()
     assert isinstance(config, Config)
     self.num_actions = check_attribute_else_default(
         self.config, 'num_actions', 3)
     self.epsilon = check_attribute_else_default(self.config, 'epsilon',
                                                 0.1)
     self.p_random = (self.epsilon / self.num_actions)
     self.p_optimal = self.p_random + (1 - self.epsilon)
示例#6
0
    def __init__(self,
                 environment,
                 function_approximator,
                 config=None,
                 summary=None):
        self.config = config or Config()
        assert isinstance(config, Config)
        """ 
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        store_summary           bool            False               store the summary of the agent (return per episode)
        """
        self.store_summary = check_attribute_else_default(
            self.config, 'store_summary', False)
        if self.store_summary:
            assert isinstance(summary, dict)
            self.summary = summary
            check_dict_else_default(self.summary, 'return_per_episode', [])

        " Other Parameters "
        # Function Approximator: used to approximate the Q-Values
        self.fa = function_approximator
        # Environment that the agent is interacting with
        self.env = environment
        # Summaries
        self.cumulative_reward = 0
示例#7
0
    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.tnet_update_Freq = check_attribute_else_default(
            experiment_parameters, 'tnet_update_freq', 1)
        self.buffer_size = check_attribute_else_default(
            experiment_parameters, 'buffer_size', 10000)
        self.learning_rate = check_attribute_else_default(
            exp_parameters, 'lr', 0.001)
        self.environment_name = check_attribute_else_default(
            experiment_parameters,
            'env',
            'mountain_car',
            choices=['mountain_car', 'catcher', 'puddle_world'])
        self.verbose = experiment_parameters.verbose

        self.config = Config()
        self.config.store_summary = True
        # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step'
        self.summary = {}
        self.config.number_of_steps = ENVIRONMENT_DICTIONARY[
            self.environment_name]['number_of_steps']
        """ Parameters for the Environment """
        self.config.max_episode_length = ENVIRONMENT_DICTIONARY[
            self.environment_name]['max_episode_length']
        self.config.norm_state = True
        self.config.current_step = 0
        """ Parameters for the Function Approximator """
        self.config.state_dims = ENVIRONMENT_DICTIONARY[
            self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[
            self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.optim = "adam"
        self.config.lr = self.learning_rate
        self.config.batch_size = 32
        # DQN parameters
        self.config.buffer_size = self.buffer_size
        self.config.tnet_update_freq = self.tnet_update_Freq

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](
            config=self.config, summary=self.summary)
        self.fa = VanillaDQN(config=self.config, summary=self.summary)
        self.rl_agent = Agent(environment=self.env,
                              function_approximator=self.fa,
                              config=self.config,
                              summary=self.summary)
示例#8
0
    def __init__(self, tpolicy, config=None):

        assert isinstance(config, Config)
        """ 
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        gamma                   float           1.0                 the discount factor
        onpolicy                bool            True                whether to compute the on-policy return or the
                                                                    off-policy, i.e. compute the importance sampling
                                                                    ratio or not.
        """
        self.gamma = check_attribute_else_default(config, 'gamma', 1.0)
        self.onpolicy = check_attribute_else_default(config, 'onpolicy', True)
        """
        Other Parameters:
        tpolicy - The target policy
        """
        self.tpolicy = tpolicy
    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.buffer_size = check_attribute_else_default(experiment_parameters, 'buffer_size', 20000)
        self.tnet_update_freq = check_attribute_else_default(experiment_parameters, 'tnet_update_freq', 10)
        self.environment_name = check_attribute_else_default(experiment_parameters, 'env', 'mountain_car',
                                                             choices=['mountain_car', 'catcher'])
        self.verbose = experiment_parameters.verbose
        # parameters specific to the parameter sweep
        self.learning_rate = check_attribute_else_default(exp_parameters, 'lr', 0.001)
        self.dropout_probability = check_attribute_else_default(experiment_parameters, 'dropout_probability', 0.1)

        self.config = Config()
        self.config.store_summary = True
        # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step'
        self.summary = {}
        self.config.number_of_steps = ENVIRONMENT_DICTIONARY[self.environment_name]['number_of_steps']

        """ Parameters for the Environment """
            # Same for every experiment
        self.config.max_episode_length = ENVIRONMENT_DICTIONARY[self.environment_name]['max_episode_length']
        self.config.norm_state = True
        self.config.current_step = 0

        """ Parameters for the Function Approximator """
            # Same for every experiment
        self.config.state_dims = ENVIRONMENT_DICTIONARY[self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.optim = "adam"
        self.config.batch_size = 32
            # Selected after finding the best parameter combinations for DQN with a given buffer size
        self.config.buffer_size = self.buffer_size
        self.config.tnet_update_freq = self.tnet_update_freq
            # These are the parameters that we are sweeping over
        self.config.lr = self.learning_rate
        self.config.dropout_probability = self.dropout_probability

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](config=self.config, summary=self.summary)
        self.fa = DropoutNeuralNetwork(config=self.config, summary=self.summary)
        self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config,
                              summary=self.summary)
    def __init__(self, config=None, summary=None):
        assert isinstance(config, Config)
        """ Parameters:
        Name:                       Type            Default:        Description(omitted when self-explanatory):
        max_actions                 int             1000            The max number of actions executed before forcing
                                                                    a time out
        save_summary                bool            False           Whether to save a summary of the environment
        """
        self.max_actions = check_attribute_else_default(config, 'max_actions', default_value=500)
        self.save_summary = check_attribute_else_default(config, 'save_summary', default_value=False)
        self.summary = summary
        if self.save_summary:
            assert isinstance(self.summary, dict)
            check_dict_else_default(self.summary, "steps_per_episode", [])

        " Inner state of the environment "
        self.step_count = 0
        self.openai_env = gym.make('Acrobot-v1')
        self.actions = np.array([0, 1, 2], dtype=np.int8)
        self.high = np.array([np.pi * 2, np.pi * 2, 12.56637096, 28.27433395], np.float64)
        self.low = np.array([0.0, 0.0, -12.56637096, -28.27433395], dtype=np.float64)
        self.current_state = self.reset()
    def __init__(self, config, summary=None):
        """ Parameters:
        Name:                       Type            Default:        Description(omitted when self-explanatory):
        max_actions                 int             1000            The max number of actions executed before forcing
                                                                    a time out
        norm_state                  bool            True            Normalize the state to [-1,1]
        store_summary               bool            False           Whether to store the summary of the environment
        """
        self.norm_state = check_attribute_else_default(config, 'norm_state', True)
        self.max_actions = check_attribute_else_default(config, 'max_actions', 1000)
        self.store_summary = check_attribute_else_default(config, 'store_summary', False)
        self.summary = summary
        if self.store_summary:
            assert isinstance(self.summary, dict)
            check_dict_else_default(self.summary, "steps_per_episode", [])

        self.num_actions = 3
        self.state_dims = 4

        " Inner state of the environment "
        self.step_count = 0
        self.current_state = np.float64(np.random.uniform(low=-0.5, high=0.5, size=(4,)))
        self.MAX_VEL_1 = 4 * np.pi
        self.MAX_VEL_2 = 9 * np.pi
        self.MAX_THETA_1 = np.pi
        self.MAX_THETA_2 = np.pi
        self.m1 = 1.0
        self.m2 = 1.0
        self.l1 = 1.0
        self.l2 = 1.0
        self.lc1 = 0.5
        self.lc2 = 0.5
        self.I1 = 1.0
        self.I2 = 1.0
        self.g = 9.8
        self.dt = 0.05
        self.acrobotGoalPosition = 1.0
示例#12
0
    def __init__(self, config, summary=None):
        assert isinstance(config, Config)
        """ Parameters:
        Name:                       Type            Default:        Description(omitted when self-explanatory):
        # environment parameters
        max_episode_length          int             500000          The max number of actions executed before forcing
                                                                    a time out
        norm_state                  bool            True            Normalize the state to [-1,1]
        # summary parameters
        store_summary               bool            False           Whether to store the summary of the environment
        number_of_steps             int             500000          Total number of environment steps
        """
        check_attribute_else_default(config, 'current_step', 0)
        self.config = config

        # environment related variables
        self.max_episode_length = check_attribute_else_default(config, 'max_episode_length', default_value=500000)
        self.norm_state = check_attribute_else_default(config, 'norm_state', default_value=True)

        # summary related variables
        self.store_summary = check_attribute_else_default(config, 'store_summary', default_value=False)
        self.number_of_steps = check_attribute_else_default(config, 'number_of_steps', default_value=500000)
        self.summary = summary
        if self.store_summary:
            assert isinstance(self.summary, dict)
            self.reward_per_step = np.zeros(self.number_of_steps, dtype=np.float64)
            check_dict_else_default(self.summary, "steps_per_episode", [])
            check_dict_else_default(self.summary, "reward_per_step", self.reward_per_step)

        # internal state of the environment
        self.episode_step_count = 0
        position = -0.6 + np.random.random() * 0.2
        velocity = 0.0
        self.current_state = np.array((position, velocity), dtype=np.float64)
        self.actions = np.array([0, 1, 2], dtype=int)  # 0 = backward, 1 = coast, 2 = forward
        self.high = np.array([0.5, 0.07], dtype=np.float64)
        self.low = np.array([-1.2, -0.07], dtype=np.float64)
        self.action_dictionary = {0: -1,    # accelerate backwards
                                   1: 0,    # coast
                                   2: 1}    # accelerate forwards
    def __init__(self, config=None, name="default", SEED=None):

        assert isinstance(config, Config)
        """ 
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        dim_out                 list            [10,10,10]          the output dimensions of each layer, i.e. neurons
        obs_dims                list            [2]                 the dimensions of the observations seen by the agent
        num_actions             int             3                   the number of actions available to the agent
        gate_fun                tf gate fun     tf.nn.relu          the gate function used across the whole network
        full_layers             int             3                   number of fully connected layers
        xavier_init             bool            True                whether to use a variant of xavier initialization
                                                                    otherwise, matrices are initialized according to
                                                                    N(0, 0.1) and bias are initialized according to
                                                                    N(0, 0.1) 
        """
        self.dim_out = check_attribute_else_default(config, 'dim_out',
                                                    [10, 10, 10])
        self.obs_dims = check_attribute_else_default(config, 'obs_dims', [2])
        self.num_actions = check_attribute_else_default(
            config, 'num_actions', 3)
        self.gate_fun = check_attribute_else_default(config, 'gate_fun',
                                                     tf.nn.relu)
        self.full_layers = check_attribute_else_default(
            config, 'full_layers', 3)
        self.xavier_init = check_attribute_else_default(
            config, 'xavier_init', True)
        """
        Other Parameters:
        name - name of the network. Should be a string.
        """
        self.name = name
        tf.get_collection(self.name)

        " Dimensions "
        dim_in = [np.prod(self.obs_dims)] + self.dim_out[:-1]
        row_and_action_number = 2
        " Placehodler "
        self.x_frames = tf.placeholder(tf.float32,
                                       shape=(None, dim_in[0]))  # input frames
        self.x_actions = tf.placeholder(
            tf.int32, shape=(None, row_and_action_number))  # input actions
        self.y = tf.placeholder(tf.float32, shape=None)  # target
        " Variables for Training "
        self.train_vars = []

        " Fully Connected Layers "
        current_y_hat = self.x_frames
        for j in range(self.full_layers):
            # layer n + m: fully connected
            W, b, z_hat, y_hat = fully_connected_av(
                self.name,
                "full_" + str(j + 1),
                current_y_hat,
                dim_in[j],
                self.dim_out[j],
                tf.random_normal_initializer(stddev=1.0 / np.sqrt(dim_in[j]),
                                             seed=SEED),
                self.gate_fun,
                xavier_init=self.xavier_init)
            current_y_hat = y_hat
            tf.add_to_collection(self.name, W)
            tf.add_to_collection(self.name, b)
            self.train_vars.extend([W, b])
        """ Output layer """
        # output layer: fully connected
        W, b, z_hat, self.y_hat = fully_connected_av(
            self.name,
            "output_layer",
            current_y_hat,
            self.dim_out[-1],
            self.num_actions,
            tf.random_normal_initializer(stddev=1.0 /
                                         np.sqrt(self.dim_out[-1]),
                                         seed=SEED),
            linear_transfer,
            xavier_init=self.xavier_init)
        tf.add_to_collection(self.name, W)
        tf.add_to_collection(self.name, b)
        self.train_vars.extend([W, b])
        self.train_vars = [self.train_vars]

        # Obtaining y_hat and Scaling by the Importance Sampling
        y_hat = tf.gather_nd(self.y_hat, self.x_actions)
        y = self.y
        # Temporal Difference Error
        self.td_error = tf.subtract(y, y_hat)
        # Loss
        self.train_loss = tf.reduce_mean(tf.pow(self.td_error, 2))
    def __init__(self,
                 optimizer,
                 target_network,
                 update_network,
                 er_buffer,
                 config=None,
                 tf_session=None,
                 summary=None):
        """
        Summary Names:
            cumulative_loss
            training_steps
        """

        assert isinstance(config, Config)
        self.config = config
        """ 
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        alpha                   float           0.00025             step size parameter
        obs_dims                list            [4,84,84]           the dimensions of the obsevations
        tnetwork_update_freq    int             10,000              number of updates before updating the target network
        update_count            int             0                   number of updates performed
        save_summary            bool            False               indicates whether to save a summary of training
        """
        self.alpha = check_attribute_else_default(self.config, 'alpha',
                                                  0.00025)
        self.obs_dims = check_attribute_else_default(self.config, 'obs_dims',
                                                     [4, 84, 84])
        self.tnetwork_update_freq = check_attribute_else_default(
            self.config, 'tnetwork_update_freq', 10000)
        self.save_summary = check_attribute_else_default(
            self.config, 'save_summary', False)
        check_attribute_else_default(self.config, 'update_count', 0)
        self.summary = summary
        if self.save_summary:
            assert isinstance(self.summary, dict)
            check_dict_else_default(self.summary, 'cumulative_loss', [])
            check_dict_else_default(self.summary, 'training_steps', [])
            self.training_steps = 0
            self.cumulative_loss = 0
        """ Other Parameters """
        " Experience Replay Buffer and Return Function "
        self.er_buffer = er_buffer

        " Neural Network Models "
        self.target_network = target_network  # Target Network
        self.update_network = update_network  # Update Network

        " Training and Learning Evaluation: Tensorflow and variables initializer "
        self.optimizer = optimizer(self.alpha)
        self.sess = tf_session or tf.Session()

        " Train step "
        self.train_step = self.optimizer.minimize(
            self.update_network.train_loss,
            var_list=self.update_network.train_vars[0])

        " Initializing variables in the graph"
        for var in tf.global_variables():
            self.sess.run(var.initializer)

        " Copy Weights to Target Network Operator "
        unetwork_vars = tf.get_collection(self.update_network.name)
        tnetwork_vars = tf.get_collection(self.target_network.name)
        copy_ops = [
            target_var.assign(update_var)
            for target_var, update_var in zip(tnetwork_vars, unetwork_vars)
        ]
        self.copy_to_target = tf.group(*copy_ops)
        self.sess.run(self.copy_to_target)
    def __init__(self, config, summary=None):
        assert isinstance(config, Config)
        """ Parameters:
        Name:                       Type            Default:        Description(omitted when self-explanatory):
        max_episode_length          int             500000          The max number of steps executed in an episoe 
                                                                    before forcing a time out
        norm_state                  bool            True            Normalize the state to [-1,1]
        display                     bool            False           Whether to display the screen of the game
        init_lives                  int             3               Number of lives at the start of the game
        store_summary               bool            False           Whether to store the summary of the environment
        number_of_steps             int             500000          Total number of environment steps
        """
        check_attribute_else_default(config, 'current_step', 0)
        self.config = config

        # environment parameters
        self.max_episode_length = check_attribute_else_default(
            config, 'max_episode_length', default_value=500000)
        self.norm_state = check_attribute_else_default(config,
                                                       'norm_state',
                                                       default_value=True)
        self.display = check_attribute_else_default(config,
                                                    'display',
                                                    default_value=False)
        self.init_lives = check_attribute_else_default(config,
                                                       'init_lives',
                                                       default_value=3)

        # summary parameters
        self.store_summary = check_attribute_else_default(config,
                                                          'store_summary',
                                                          default_value=False)
        self.summary = summary
        self.number_of_steps = check_attribute_else_default(
            config, 'number_of_steps', 500000)

        if self.store_summary:
            assert isinstance(self.summary, dict)
            self.reward_per_step = np.zeros(self.number_of_steps,
                                            dtype=np.float64)
            check_dict_else_default(self.summary, "steps_per_episode", [])
            check_dict_else_default(self.summary, "reward_per_step",
                                    self.reward_per_step)

        # setting up original catcher environment with the specified parameters
        self.catcherOb = Catcher(init_lives=self.init_lives)
        if not self.display:
            # do not open a pygame window
            os.putenv('SDL_VIDEODRIVER', 'fbcon')
            os.environ["SDL_VIDEODRIVER"] = "dummy"
        if self.norm_state:
            self.pOb = PLE(self.catcherOb,
                           fps=30,
                           state_preprocessor=get_ob_normalize,
                           display_screen=self.display)
        else:
            self.pOb = PLE(self.catcherOb,
                           fps=30,
                           state_preprocessor=get_ob,
                           display_screen=self.display)
        self.pOb.init()

        # environment internal state
        self.actions = [
            97, None, 100
        ]  # self.pOb.getActionSet() (left = 97, do nothing = None, right = 100)
        self.num_action = 3
        self.num_state = 4
        self.episode_step_count = 0
        self.pOb.reset_game()
        self.current_state = self.pOb.getGameState()
    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.buffer_size = check_attribute_else_default(
            experiment_parameters, 'buffer_size', 20000)
        self.method = check_attribute_else_default(exp_parameters, 'method',
                                                   'DQN')
        self.environment_name = check_attribute_else_default(
            experiment_parameters,
            'env',
            'mountain_car',
            choices=['mountain_car', 'catcher', 'puddle_world'])
        parameters_dictionary = BEST_PARAMETERS_DICTIONARY[
            self.environment_name][self.method][self.buffer_size]
        self.verbose = experiment_parameters.verbose

        self.config = Config()
        self.config.store_summary = True
        # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step'
        self.summary = {}
        self.config.number_of_steps = ENVIRONMENT_DICTIONARY[
            self.environment_name]['number_of_steps']
        """ Parameters for the Environment """
        self.config.max_episode_length = ENVIRONMENT_DICTIONARY[
            self.environment_name]['max_episode_length']
        self.config.norm_state = True
        self.config.current_step = 0
        """ Parameters for the Function Approximator """
        self.config.state_dims = ENVIRONMENT_DICTIONARY[
            self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[
            self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.optim = "adam"
        self.config.batch_size = 32

        # Parameters for any type of agent
        self.config.buffer_size = self.buffer_size
        self.config.lr = parameters_dictionary['LearningRate']
        self.config.tnet_update_freq = parameters_dictionary['Freq']

        if self.method in ['DRE', 'DRE_LB', 'DRG', 'DRG_LB']:
            self.config.beta = parameters_dictionary['Beta']
            self.config.reg_factor = parameters_dictionary['RegFactor']
            self.config.use_gamma = False
            self.config.beta_lb = False
            if self.method in ['DRG', 'DRG_LB']:
                self.config.use_gamma = True
            if self.method in ['DRE_LB', 'DRG_LB']:
                self.config.beta_lb = True
            self.fa = DistRegNeuralNetwork(config=self.config,
                                           summary=self.summary)

        elif self.method in ['L1A', 'L1W', 'L2A', 'L2W']:
            self.config.reg_factor = parameters_dictionary['RegFactor']
            self.config.reg_method = 'l1'
            if self.method in ['L2A', 'L2W']:
                self.config.reg_method = 'l2'
            self.config.weights_reg = False
            if self.method in ['L1W', 'L2W']:
                self.config.weights_reg = True
            self.fa = RegularizedNeuralNetwork(config=self.config,
                                               summary=self.summary)

        elif self.method in ['DQN']:
            self.fa = VanillaDQN(config=self.config, summary=self.summary)

        elif self.method in ['Dropout']:
            self.config.dropout_probability = parameters_dictionary[
                'DropoutProbability']
            self.fa = DropoutNeuralNetwork(config=self.config,
                                           summary=self.summary)
        else:
            raise ValueError(
                "No configuration available for the given method.")

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](
            config=self.config, summary=self.summary)
        self.rl_agent = Agent(environment=self.env,
                              function_approximator=self.fa,
                              config=self.config,
                              summary=self.summary)