Пример #1
0
    def __init__(self,
                 environment,
                 function_approximator,
                 config=None,
                 summary=None):
        self.config = config or Config()
        assert isinstance(config, Config)
        """ 
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        store_summary           bool            False               store the summary of the agent (return per episode)
        """
        self.store_summary = check_attribute_else_default(
            self.config, 'store_summary', False)
        if self.store_summary:
            assert isinstance(summary, dict)
            self.summary = summary
            check_dict_else_default(self.summary, 'return_per_episode', [])

        " Other Parameters "
        # Function Approximator: used to approximate the Q-Values
        self.fa = function_approximator
        # Environment that the agent is interacting with
        self.env = environment
        # Summaries
        self.cumulative_reward = 0
    def __init__(self, config=None, summary=None):
        assert isinstance(config, Config)
        """ Parameters:
        Name:                       Type            Default:        Description(omitted when self-explanatory):
        max_actions                 int             1000            The max number of actions executed before forcing
                                                                    a time out
        save_summary                bool            False           Whether to save a summary of the environment
        """
        self.max_actions = check_attribute_else_default(config,
                                                        'max_actions',
                                                        default_value=1000)
        self.save_summary = check_attribute_else_default(config,
                                                         'save_summary',
                                                         default_value=False)
        self.summary = summary
        if self.save_summary:
            assert isinstance(self.summary, dict)
            check_dict_else_default(self.summary, "steps_per_episode", [])

        " Inner state of the environment "
        self.step_count = 0
        self.current_state = self.reset()
        self.actions = np.array(
            [0, 1, 2], dtype=int)  # 0 = backward, 1 = coast, 2 = forward
        self.high = np.array([0.5, 0.07], dtype=np.float32)
        self.low = np.array([-1.2, -0.07], dtype=np.float32)
        self.action_dictionary = {
            0: -1,  # accelerate backwards
            1: 0,  # coast
            2: 1
        }  # accelerate forwards
Пример #3
0
    def __init__(self, config, summary=None):
        assert isinstance(config, Config)
        """ Parameters:
        Name:                       Type            Default:        Description(omitted when self-explanatory):
        # environment parameters
        max_episode_length          int             500000          The max number of actions executed before forcing
                                                                    a time out
        norm_state                  bool            True            Normalize the state to [-1,1]
        # summary parameters
        store_summary               bool            False           Whether to store the summary of the environment
        number_of_steps             int             500000          Total number of environment steps
        """
        check_attribute(config, 'current_step', 0)
        self.config = config

        # environment related variables
        self.max_episode_length = check_attribute(config,
                                                  'max_episode_length',
                                                  default_value=500000)
        self.norm_state = check_attribute(config,
                                          'norm_state',
                                          default_value=True)

        # summary related variables
        self.store_summary = check_attribute(config,
                                             'store_summary',
                                             default_value=False)
        self.number_of_steps = check_attribute(config,
                                               'number_of_steps',
                                               default_value=500000)
        self.summary = summary
        if self.store_summary:
            assert isinstance(self.summary, dict)
            self.reward_per_step = np.zeros(self.number_of_steps,
                                            dtype=np.float64)
            check_dict_else_default(self.summary, "steps_per_episode", [])
            check_dict_else_default(self.summary, "reward_per_step",
                                    self.reward_per_step)

        # internal state of the environment
        self.episode_step_count = 0
        position = -0.6 + np.random.random() * 0.2
        velocity = 0.0
        self.current_state = np.array((position, velocity), dtype=np.float64)
        self.actions = np.array(
            [0, 1, 2], dtype=int)  # 0 = backward, 1 = coast, 2 = forward
        self.high = np.array([0.5, 0.07], dtype=np.float64)
        self.low = np.array([-1.2, -0.07], dtype=np.float64)
        self.action_dictionary = {
            0: -1,  # accelerate backwards
            1: 0,  # coast
            2: 1
        }  # accelerate forwards
Пример #4
0
    def __init__(self, config, summary=None):
        """ Parameters:
        Name:                       Type            Default:        Description(omitted when self-explanatory):
        max_episode_length          int             200000          The max number of steps executed in an episoe
                                                                    before forcing a time out
        norm_state                  bool            True            Normalize the state to [-1,1]
        store_summary               bool            False           Whether to store the summary of the environment
        number_of_steps             int             200000          Total number of environment steps
        """
        check_attribute(config, 'current_step', 0)
        self.config = config

        # environment parameters
        self.max_episode_length = check_attribute(config, 'max_episode_length',
                                                  200000)
        self.norm_state = check_attribute(config, 'norm_state', True)

        # summary parameters
        self.store_summary = check_attribute(config,
                                             'store_summary',
                                             default_value=False)
        self.summary = summary
        self.number_of_steps = check_attribute(config, 'number_of_steps',
                                               200000)

        if self.store_summary:
            assert isinstance(self.summary, dict)
            self.reward_per_step = np.zeros(self.number_of_steps,
                                            dtype=np.float64)
            check_dict_else_default(self.summary, "steps_per_episode", [])
            check_dict_else_default(self.summary, "reward_per_step",
                                    self.reward_per_step)

        self.num_action = 4
        self.num_state = 2
        """ Inner state of the environment """
        self.episode_step_count = 0
        self.state = np.float64(
            np.random.uniform(low=0.0, high=0.1, size=(2, )))
        self.puddle1 = Puddle(0.45, 0.75, 0.10, 0.75, 0.1, 0.35)
        self.puddle2 = Puddle(0.45, 0.80, 0.45, 0.40, 0.1, 0.4)

        self.pworld_min_x = 0.0
        self.pworld_max_x = 1.0
        self.pworld_min_y = 0.0
        self.pworld_max_y = 1.0

        self.goalDimension = 0.05
        self.defDisplacement = 0.05

        self.goalXCoor = self.pworld_max_x - self.goalDimension
        self.goalYCoor = self.pworld_max_y - self.goalDimension
Пример #5
0
    def __init__(self,
                 environment,
                 function_approximator,
                 behaviour_policy,
                 er_buffer,
                 config=None,
                 summary=None,
                 reshape=True):
        """
        Summary Name: return_per_episode
        """
        self.config = config or Config()
        assert isinstance(config, Config)
        """ 
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        save_summary            bool            False               save the summary of the agent (return per episode)
        er_start_size           int             0                   number of steps sampled before training starts
        er_init_steps_count     int             0                   number of initial steps taken so far
        fixed_tpolicy           bool            False               whether the policy is fixed (e.g., a function of
                                                                    the state) or changes over time 
                                                                    (e.g., epsilon-greedy or a function of the q-values)
        """
        self.save_summary = check_attribute_else_default(
            self.config, 'save_summary', False)
        self.er_start_size = check_attribute_else_default(
            self.config, 'er_start_size', 0)
        check_attribute_else_default(self.config, 'er_init_steps_count', 0)
        self.fixed_tpolicy = check_attribute_else_default(
            self.config, 'fixed_tpolicy', False)

        if self.save_summary:
            assert isinstance(summary, dict)
            self.summary = summary
            check_dict_else_default(self.summary, 'return_per_episode', [])

        " Other Parameters "
        # Behaviour
        self.bpolicy = behaviour_policy
        # Experience Replay Buffer
        self.er_buffer = er_buffer
        # Function Approximator: used to approximate the Q-Values
        self.fa = function_approximator
        # Environment that the agent is interacting with
        self.env = environment
        # Summaries
        self.cumulative_reward = 0
        # Whether to reshape the mountain car observations
        self.reshape = reshape
    def __init__(self, config=None, summary=None):
        assert isinstance(config, Config)
        """ Parameters:
        Name:                       Type            Default:        Description(omitted when self-explanatory):
        max_actions                 int             1000            The max number of actions executed before forcing
                                                                    a time out
        save_summary                bool            False           Whether to save a summary of the environment
        """
        self.max_actions = check_attribute_else_default(config, 'max_actions', default_value=500)
        self.save_summary = check_attribute_else_default(config, 'save_summary', default_value=False)
        self.summary = summary
        if self.save_summary:
            assert isinstance(self.summary, dict)
            check_dict_else_default(self.summary, "steps_per_episode", [])

        " Inner state of the environment "
        self.step_count = 0
        self.openai_env = gym.make('Acrobot-v1')
        self.actions = np.array([0, 1, 2], dtype=np.int8)
        self.high = np.array([np.pi * 2, np.pi * 2, 12.56637096, 28.27433395], np.float64)
        self.low = np.array([0.0, 0.0, -12.56637096, -28.27433395], dtype=np.float64)
        self.current_state = self.reset()
    def __init__(self, config, summary=None):
        """ Parameters:
        Name:                       Type            Default:        Description(omitted when self-explanatory):
        max_actions                 int             1000            The max number of actions executed before forcing
                                                                    a time out
        norm_state                  bool            True            Normalize the state to [-1,1]
        store_summary               bool            False           Whether to store the summary of the environment
        """
        self.norm_state = check_attribute_else_default(config, 'norm_state', True)
        self.max_actions = check_attribute_else_default(config, 'max_actions', 1000)
        self.store_summary = check_attribute_else_default(config, 'store_summary', False)
        self.summary = summary
        if self.store_summary:
            assert isinstance(self.summary, dict)
            check_dict_else_default(self.summary, "steps_per_episode", [])

        self.num_actions = 3
        self.state_dims = 4

        " Inner state of the environment "
        self.step_count = 0
        self.current_state = np.float64(np.random.uniform(low=-0.5, high=0.5, size=(4,)))
        self.MAX_VEL_1 = 4 * np.pi
        self.MAX_VEL_2 = 9 * np.pi
        self.MAX_THETA_1 = np.pi
        self.MAX_THETA_2 = np.pi
        self.m1 = 1.0
        self.m2 = 1.0
        self.l1 = 1.0
        self.l2 = 1.0
        self.lc1 = 0.5
        self.lc2 = 0.5
        self.I1 = 1.0
        self.I2 = 1.0
        self.g = 9.8
        self.dt = 0.05
        self.acrobotGoalPosition = 1.0
Пример #8
0
    def __init__(self, config, summary=None):
        assert isinstance(config, Config)
        """ Parameters:
        Name:                       Type            Default:        Description(omitted when self-explanatory):
        max_episode_length          int             500000          The max number of steps executed in an episoe
                                                                    before forcing a time out
        norm_state                  bool            True            Normalize the state to [-1,1]
        display                     bool            False           Whether to display the screen of the game
        init_lives                  int             3               Number of lives at the start of the game
        store_summary               bool            False           Whether to store the summary of the environment
        number_of_steps             int             500000          Total number of environment steps
        """
        check_attribute(config, 'current_step', 0)
        self.config = config

        # environment parameters
        self.max_episode_length = check_attribute(config,
                                                  'max_episode_length',
                                                  default_value=500000)
        self.norm_state = check_attribute(config,
                                          'norm_state',
                                          default_value=True)
        self.display = False
        self.init_lives = 3
        # self.display = check_attribute(config, 'display', default_value=False)
        # self.init_lives = check_attribute(config, 'init_lives', default_value=3)

        # summary parameters
        self.store_summary = check_attribute(config,
                                             'store_summary',
                                             default_value=False)
        self.summary = summary
        self.number_of_steps = check_attribute(config, 'number_of_steps',
                                               500000)

        if self.store_summary:
            assert isinstance(self.summary, dict)
            self.reward_per_step = np.zeros(self.number_of_steps,
                                            dtype=np.float64)
            check_dict_else_default(self.summary, "steps_per_episode", [])
            check_dict_else_default(self.summary, "reward_per_step",
                                    self.reward_per_step)

        # setting up original catcher environment with the specified parameters
        self.catcherOb = Catcher(init_lives=self.init_lives)
        if not self.display:
            # do not open a pygame window
            os.putenv('SDL_VIDEODRIVER', 'fbcon')
            os.environ["SDL_VIDEODRIVER"] = "dummy"
        if self.norm_state:
            self.pOb = PLE(self.catcherOb,
                           fps=30,
                           state_preprocessor=get_ob_normalize,
                           display_screen=self.display)
        else:
            self.pOb = PLE(self.catcherOb,
                           fps=30,
                           state_preprocessor=get_ob,
                           display_screen=self.display)
        self.pOb.init()

        # environment internal state
        self.actions = [
            97, None, 100
        ]  # self.pOb.getActionSet() (left = 97, do nothing = None, right = 100)
        self.num_action = 3
        self.num_state = 4
        self.episode_step_count = 0
        self.pOb.reset_game()
        self.current_state = self.pOb.getGameState()
    def __init__(self,
                 optimizer,
                 target_network,
                 update_network,
                 er_buffer,
                 config=None,
                 tf_session=None,
                 summary=None):
        """
        Summary Names:
            cumulative_loss
            training_steps
        """

        assert isinstance(config, Config)
        self.config = config
        """ 
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        alpha                   float           0.00025             step size parameter
        obs_dims                list            [4,84,84]           the dimensions of the obsevations
        tnetwork_update_freq    int             10,000              number of updates before updating the target network
        update_count            int             0                   number of updates performed
        save_summary            bool            False               indicates whether to save a summary of training
        """
        self.alpha = check_attribute_else_default(self.config, 'alpha',
                                                  0.00025)
        self.obs_dims = check_attribute_else_default(self.config, 'obs_dims',
                                                     [4, 84, 84])
        self.tnetwork_update_freq = check_attribute_else_default(
            self.config, 'tnetwork_update_freq', 10000)
        self.save_summary = check_attribute_else_default(
            self.config, 'save_summary', False)
        check_attribute_else_default(self.config, 'update_count', 0)
        self.summary = summary
        if self.save_summary:
            assert isinstance(self.summary, dict)
            check_dict_else_default(self.summary, 'cumulative_loss', [])
            check_dict_else_default(self.summary, 'training_steps', [])
            self.training_steps = 0
            self.cumulative_loss = 0
        """ Other Parameters """
        " Experience Replay Buffer and Return Function "
        self.er_buffer = er_buffer

        " Neural Network Models "
        self.target_network = target_network  # Target Network
        self.update_network = update_network  # Update Network

        " Training and Learning Evaluation: Tensorflow and variables initializer "
        self.optimizer = optimizer(self.alpha)
        self.sess = tf_session or tf.Session()

        " Train step "
        self.train_step = self.optimizer.minimize(
            self.update_network.train_loss,
            var_list=self.update_network.train_vars[0])

        " Initializing variables in the graph"
        for var in tf.global_variables():
            self.sess.run(var.initializer)

        " Copy Weights to Target Network Operator "
        unetwork_vars = tf.get_collection(self.update_network.name)
        tnetwork_vars = tf.get_collection(self.target_network.name)
        copy_ops = [
            target_var.assign(update_var)
            for target_var, update_var in zip(tnetwork_vars, unetwork_vars)
        ]
        self.copy_to_target = tf.group(*copy_ops)
        self.sess.run(self.copy_to_target)