示例#1
0
    def __init__(self, task):
        '''Initialize policy and other agent parameters.

        :param task: Should be able to access the following (OpenAI Gym spaces):
            task.observation_space  # i.e. state space
            task.action_space
        '''
        # init statistics writing
        self.stats_dir = util.get_param('out')
        if not os.path.exists(self.stats_dir):
            os.makedirs(self.stats_dir)

        self.stats_filename = os.path.join(
            self.stats_dir,
            'stats_{}.csv'.format(util.get_timestamp())
        )
        self.stats_columns = ['episode', 'total_reward']
        print('Saving statistics {} to {}'.format(self.stats_columns, self.stats_filename))

        # init models writing
        self.models_dir = util.get_param('models')
        self.actor_best_model_file = self.models_dir + '/actor_best.pth'
        self.critic_best_model_file = self.models_dir + '/critic_best.pth'
        if not os.path.exists(self.models_dir):
            os.makedirs(self.models_dir)
示例#2
0
    def __init__(self, task):
        # Task (environment) information
        self.task = task  # should contain observation_space and action_space

        self.action_space = 3
        self.acts = np.zeros(shape=self.task.action_space.shape)

        self.Q = defaultdict(lambda: np.zeros(self.action_space))

        # Episode variables
        self.reset_episode_vars()
        self.episode_num = 1
        self.step_count = 20

        # Save episode stats
        self.stats_filename = os.path.join(
            util.get_param('out'),
            "stats_{}.csv".format(util.get_timestamp()))  # path to CSV file
        print("Saving stats to {}".format(self.stats_filename))  # [debug]

        # Save Q stats
        self.q_stats_filename = os.path.join(
            util.get_param('out'),
            "q_stats_{}.csv".format(util.get_timestamp()))  # path to CSV file
        print("Saving q stats to {}".format(self.q_stats_filename))  # [debug]

        # Save S-A stats
        self.sa_stats_filename = os.path.join(
            util.get_param('out'), "state_action_{}.csv".format(
                util.get_timestamp()))  # path to CSV file
        print("Saving states actions to {}".format(
            self.sa_stats_filename))  # [debug]
    def __init__(self, task):
        # Task State Action
        self.task = task  # should contain observation_space and action_space

        self.state_size = 7
        self.action_size = 1

        # Actor (Policy) Model
        self.acts = np.zeros(shape=self.task.action_space.shape)
        self.actor_local = Actor(self.state_size, self.action_size)
        self.actor_target = Actor(self.state_size, self.action_size)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.noise = OUNoise(self.action_size)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 128
        self.memory = ReplayBuffer(self.buffer_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.005  # for soft update of target parameters
        self.count = 0

        self.reset_episode_vars()

        self.epsilon = 1
        self.episode_num = 1

        # Save episode stats
        self.stats_filename = os.path.join(
            util.get_param('out'),
            "stats_{}.csv".format(util.get_timestamp()))  # path to CSV file
        print("Saving stats to {}".format(self.stats_filename))  # [debug]

        # Save Q stats
        self.q_stats_filename = os.path.join(
            util.get_param('out'),
            "q_stats_{}.csv".format(util.get_timestamp()))  # path to CSV file
        print("Saving q stats to {}".format(self.q_stats_filename))  # [debug]

        # Save S-A stats
        self.sa_stats_filename = os.path.join(
            util.get_param('out'), "state_action_{}.csv".format(
                util.get_timestamp()))  # path to CSV file
        print("Saving states actions to {}".format(
            self.sa_stats_filename))  # [debug]
    def __init__(self, task):
        # Task (environment) information
        self.task = task  # should contain observation_space and action_space
        self.state_size = 3
        self.action_size = 3
        print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format(
            self.task.observation_space.shape, self.task.action_space.shape,
            self.state_size, self.action_size))

        # Parameters
        self.actor_weights = os.path.join(util.get_param('out'), "actor_weights.h5")
        self.critic_weights = os.path.join(util.get_param('out'), "critic_weights.h5")

        # Actor (Policy) Model
        self.action_low = self.preprocess_state(self.task.action_space.low)
        self.action_high = self.preprocess_state(self.task.action_space.high)
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize local model parameters with loaded weights
        if os.path.isfile(self.critic_weights):
            self.critic_local.model.load_weights(self.critic_weights)
        if os.path.isfile(self.actor_weights):
            self.actor_local.model.load_weights(self.actor_weights)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        # Noise process
        self.noise = OUNoise(self.action_size)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.001  # for soft update of target parameters

        # Episode variables
        self.reset_episode_vars()

        # Save episode stats
        self.stats_filename = os.path.join(
            util.get_param('out'),
            "stats_{}.csv".format(util.get_timestamp()))  # path to CSV file
        self.stats_columns = ['episode', 'total_reward']  # specify columns to save
        self.episode_num = 1
        print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename))  # [debug]
示例#5
0
 def setup_weights(self):
     # save weights
     self.load_weights = True
     self.save_weights_every = 50
     self.model_dir = util.get_param('out')
     self.model_name = "ddpg"
     self.model_ext = ".h5"
     if self.load_weights or self.save_weights_every:
         self.actor_filename = os.path.join(self.model_dir,
                 "{}_actor{}".format(self.model_name, self.model_ext))
         self.critic_filename = os.path.join(self.model_dir,
                 "{}_critic{}".format(self.model_name, self.model_ext))
         print("Actor filename :", self.actor_filename)
         print("Critic filename:", self.critic_filename)
     if self.load_weights and os.path.isfile(self.actor_filename):
         try:
             self.actor_local.model.load_weights(self.actor_filename)
             self.critic_local.model.load_weights(self.critic_filename)
             print("Model weights loaded from file!")
         except Exception as e:
             print("Unable to load model weights from file!")
             print("{}: {}".format(e.__class__.__name__, str(e)))
     else:
         self.critic_target.set_weights(self.critic_local)
         self.actor_target.set_weights(self.actor_local)
    def __init__(self, task):

        self.task = task
        self.state_size = 3
        self.state_range = self.task.observation_space.high - self.task.observation_space.low
        self.action_size = 3
        self.action_range = (self.task.action_space.high -
                             self.task.action_space.low)[0:self.action_size]
        self.action_low = self.task.action_space.low[0:self.action_size]
        self.action_high = self.task.action_space.high[0:self.action_size]
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.noise = OUNoise(self.action_size)
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size)
        self.gamma = 0.0
        self.tau = 0.001
        self.reset_episode_vars()
        self.stats_filename = os.path.join(util.get_param('out'),
                                           task.__name__ + ".csv")
        self.stats_columns = ['Episode', 'Total_reward']
        self.episode_num = 1
        print("Save stats ... {} to {}".format(self.stats_columns,
                                               self.stats_filename))
    def __init__(self):
        cube_size = 300.0  # env is cube_size x cube_size x cube_size
        self.observation_space = spaces.Box(
            np.array([
                -cube_size / 2, -cube_size / 2, 0.0, -1.0, -1.0, -1.0, -1.0,
                -np.inf, -np.inf, -np.inf
            ]),
            np.array([
                cube_size / 2, cube_size / 2, cube_size, 1.0, 1.0, 1.0, 1.0,
                np.inf, np.inf, np.inf
            ]))

        max_force = 25
        max_torque = 0
        self.action_space = spaces.Box(
            np.array([
                -max_force, -max_force, -max_force, -max_torque, -max_torque,
                -max_torque
            ]),
            np.array([
                max_force, max_force, max_force, max_torque, max_torque,
                max_torque
            ]))

        self.phase = -1

        self.desc = [
            "Determine Fg = m*g", "Determine Z-Drag", "Determine X-Drag",
            "DONE"
        ]

        self.base_filename = util.get_param('out')

        self.Fg = 19.62
示例#8
0
    def __init__(self, task):
        # Task (environment) information
        self.task = task  # should contain observation_space and action_space
        # self.state_size = np.prod(self.task.observation_space.shape)
        self.state_size = 3
        self.state_range = self.task.observation_space.high - self.task.observation_space.low

        self.action_size = 3
        # self.action_size = np.prod(self.task.action_space.shape)
        self.action_range = self.task.action_space.high - self.task.action_space.low

        # Policy parameters
        self.w = np.random.normal(
            size=(self.state_size, self.action_size),  # weights for simple linear policy: state_space x action_space
            scale=(self.action_range[:3] / (2 * self.state_size)).reshape(1, -1))  # start producing actions in a decent range

        # Score tracker and learning parameters
        self.best_w = None
        self.best_score = -np.inf
        self.noise_scale = 0.1

        # Episode variables
        self.reset_episode_vars()

        # Save episode stats
        self.stats_filename = os.path.join(
            util.get_param('out'),
            '{}_{}_stats_{}.csv'.format(self.task, self, util.get_timestamp()))  # path to CSV file
        self.stats_columns = ['episode', 'total_reward']  # specify columns to save
        self.episode_num = 1
        print('Saving stats {} to {}'.format(self.stats_columns, self.stats_filename))  # [debug]
示例#9
0
    def __init__(self, task):

        #---------------------------------------
        # Saving data

        self.stats_filename = os.path.join(
            util.get_param('out') + '/task04/',
            "stats_{}.csv".format(util.get_timestamp()))  # path to CSV file
        self.stats_columns = ['episode',
                              'total_reward']  # specify columns to save
        self.episode_num = 1
        print("Saving stats {} to {}".format(self.stats_columns,
                                             self.stats_filename))

        # task_takeoff = deepcopy(task)
        # task_hover = deepcopy(task)
        # task_land = deepcopy(task)
        self.task = task
        self.task_takeoff = takeoff_b.TakeoffB()
        self.task_hover = hover_b.HoverB()
        self.task_land = land_b.LandB()
        self.o_task01_agent = task01_ddpg_agent_b.Task01_DDPG(
            self.task_takeoff)
        self.o_task02_agent = task02_ddpg_agent_b.Task02_DDPG(self.task_hover)
        self.o_task03_agent = task03_ddpg_agent_b.Task03_DDPG(self.task_land)

        # Current agent
        self.o_current_agent = self.o_task01_agent

        self.mode = 0
        self.episode_num = 0

        self.total_reward = 0.0
示例#10
0
 def save_episode_stats(self):
     self.stats_filename = os.path.join(
         util.get_param('out'), "stats_{}.csv".format(util.get_timestamp()))
     self.stats_columns = ['episode', 'total_reward']
     self.episode_num = 1
     print("### Saving stats {} to {}".format(self.stats_columns,
                                              self.stats_filename))
示例#11
0
    def __init__(self, task):
        # Task (environment) information
        self.task = task  # should contain observation_space and action_space
        self.state_size = np.prod(self.task.observation_space.shape)
        self.state_range = self.task.observation_space.high - self.task.observation_space.low
        self.action_size = np.prod(self.task.action_space.shape)
        self.action_range = self.task.action_space.high - self.task.action_space.low

        # Policy parameters
        self.w = np.random.normal(
            size=(
                self.state_size, self.action_size
            ),  # weights for simple linear policy: state_space x action_space
            scale=(self.action_range / (2 * self.state_size)).reshape(
                1, -1))  # start producing actions in a decent range

        # Score tracker and learning parameters
        self.best_w = None
        self.best_score = -np.inf
        self.noise_scale = 0.1

        # Episode variables
        self.reset_episode_vars()

        # Save episode stats
        self.stats_filename = os.path.join(
            util.get_param('out'), "dummy_stats_{}.csv".format(
                util.get_timestamp()))  # path to CSV file
        self.episode_num = 1
示例#12
0
    def __init__(self, task):

        self.task = task

        # Constrain State and Action matrices
        self.state_size = 6
        self.action_size = 3
        # For debugging:
        print(
            "Constrained State {} and Action {}; Original State {} and Action {}"
            .format(self.state_size, self.action_size,
                    self.task.observation_space.shape,
                    self.task.action_space.shape))

        # Score tracker and learning parameters
        self.best_w = None
        self.best_score = -np.inf
        self.noise_scale = 0.1

        # Save episode statistics for analysis
        self.stats_filename = os.path.join(
            util.get_param('out'), "stats_{}.csv".format(util.get_timestamp()))
        self.stats_columns = ['episode', 'total_reward']
        self.episode_num = 1
        print("Save Stats {} to {}".format(self.stats_columns,
                                           self.stats_filename))

        # Actor Model
        self.action_low = self.task.action_space.low[0:3]
        self.action_high = self.task.action_space.high[0:3]
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize model parameters with local parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Process noise
        self.noise = OUNoise(self.action_size)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(size=self.buffer_size)

        # Algorithm Parameters
        self.gamma = 0.99  # discount
        self.tau = 0.001  # soft update of targets

        # Episode vars
        self.reset_episode_vars()
示例#13
0
    def __init__(self, task):
        
        self.task = task  # should contain observation_space and action_space
        self.state_size = np.prod(self.task.observation_space.shape)
        self.state_range = self.task.observation_space.high - self.task.observation_space.low
        self.action_size = np.prod(self.task.action_space.shape)
        self.action_range = self.task.action_space.high - self.task.action_space.low
        
        # Constrain state and action spaces
        
        # Actor (Policy) Model
        self.action_low = self.task.action_space.low
        self.action_high = self.task.action_space.high
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        
        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)
        
        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())
        
         # Noise process
        self.noise = OUNoise(self.action_size)
        
        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 248
        self.memory = ReplayBuffer(self.buffer_size)
        
         # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.001  # for soft update of target parameters
        
        # Policy parameters
        self.w = np.random.normal(
            size=(self.state_size, self.action_size),  # weights for simple linear policy: state_space x action_space
            scale=(self.action_range / (2 * self.state_size)).reshape(1, -1))  # start producing actions in a decent range

        # Score tracker and learning parameters
        self.best_w = None
        self.best_score = -np.inf
        self.noise_scale = 0.1

        # Episode variables
        self.reset_episode_vars()
        # Save episode stats
        self.stats_filename = os.path.join(
            util.get_param('out'),
            "stats_{}.csv".format(util.get_timestamp()))  # path to CSV file
        self.stats_columns = ['episode', 'total_reward']  # specify columns to save
        self.episode_num = 1
        print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename))  # [debug]

        # Episode variables
        self.reset_episode_vars()
示例#14
0
 def __init__(self):
     # Save episode stats
     self.stats_filename = os.path.join(
         util.get_param('out'),
         "stats_{}.csv".format(util.get_timestamp()))  # path to CSV file
     self.stats_columns = [
         'episode', 'height', 'target_distance', 'total_reward'
     ]  # specify columns to save
     self.episode_num = 1
     print("Saving stats {} to {}".format(self.stats_columns,
                                          self.stats_filename))  # [debug]
    def __init__(self, task):

        self.task = task
        self.state_size = 3  # position only
        self.state_range = self.task.observation_space.high - self.task.observation_space.low
        self.action_size = 3  # force only
        self.action_range = (self.task.action_space.high -
                             self.task.action_space.low)[0:self.action_size]

        # Actor (Policy) model
        self.action_low = self.task.action_space.low[0:self.action_size]
        self.action_high = self.task.action_space.high[0:self.action_size]
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())

        # Noise process
        self.noise = OUNoise(self.action_size)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size)

        # Algorithm parameters
        self.gamma = 0.5  # Discount factor
        self.tau = 0.001  # for soft update of target parameters

        self.reset_episode_vars()

        # Save episodes stats
        self.stats_filename = os.path.join(
            util.get_param('out'), "stats_{}.csv".format(util.get_timestamp()))
        self.stats_columns = ['episode', 'total_reward']
        self.episode_num = 1

        print("Saving stats {} to {}".format(self.stats_columns,
                                             self.stats_filename))
示例#16
0
    def __init__(self, task):
        self.task = task
        #constrain state and action spaces
        self.state_size = 1
        self.state_low = self.task.observation_space.low[2]
        self.state_high = self.task.observation_space.high[2]
        self.state_range = self.state_high - self.state_low
        #only limit to z direction
        self.action_range = (self.task.action_space.high -
                             self.task.action_space.low)[2]
        self.action_low = self.task.action_space.low[2]
        self.action_high = self.task.action_space.high[2]

        stepping = (self.action_high - 10.0) / 16.0
        self.discrete_actions = np.arange(10.0, self.action_high + 0.1,
                                          stepping)
        self.action_size = len(self.discrete_actions)
        print('discrete action:', self.discrete_actions, ', action size: ',
              self.action_size)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size)

        # Algorithm parameters
        self.gamma = 0.9  # discount factor
        self.learning_rate = 0.001

        self.model = self.build_model()

        #save episode stats
        self.stats_filename = os.path.join(
            util.get_param('out'), "stats_{}.csv".format(util.get_timestamp()))
        self.stats_columns = ['episode', 'total_reward']
        self.episode_num = 1
        print("saving stats {} to {}".format(self.stats_columns,
                                             self.stats_filename))

        self.epilson = 1.0
        self.epilson_decay = 0.96
        self.epilson_min = 0.05

        self.learning = True
        self.reset_episode_vars()
        self.best_reward = -99999
    def __init__(self, task):
        self.task = task  # should contain observation_space and action_space
        self.state_shape = (9, )
        self.action_shape = (1, )
        self.nb_actions = np.prod(self.action_shape)
        self.action_range = self.task.action_space.high[
            2] - self.task.action_space.low[2]

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 128
        self.memory = ReplayBuffer(self.buffer_size, self.action_shape,
                                   self.state_shape)

        # Noise process
        self.noise = OUNoise(self.nb_actions)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.005  # 0.005
        self.actor_lr = 0.0001  #0.0001
        self.critic_lr = 0.001

        #initialize
        self.a2c = A2C(self.state_shape,
                       self.action_shape,
                       actor_lr=self.actor_lr,
                       critic_lr=self.critic_lr,
                       gamma=self.gamma)
        self.initialize()

        # Save episode stats
        self.stats_filename = os.path.join(
            util.get_param('out'),
            "stats_{}.csv".format(util.get_timestamp()))  # path to CSV file
        self.stats_columns = ['episode',
                              'total_reward']  # specify columns to save
        self.episode_num = 1
        print("Saving stats {} to {}".format(self.stats_columns,
                                             self.stats_filename))  # [debug]
        #initial episode vars
        self.last_state = None
        self.last_action = None
        self.total_reward = 0.0
        self.count = 0
        self.acts = np.zeros(shape=self.task.action_space.shape)
示例#18
0
    def __init__(self, task, action_min, action_max, state_min, state_max):
        # Task (environment) information
        self.task = task  # should contain observation_space and action_space
        
        self.min_action = action_min # define minimum and maximum action
        self.max_action = action_max
        
        self.min_stat = state_min # define minimum and maximum state
        self.max_stat = state_max
        
        self.learn_when_done = False # defines if the agent shall only learn at the end of each episode

        # Constrain state and action spaces
        self.state_size = self.max_stat-self.min_stat+1  # position only
        self.action_size = self.max_action-self.min_action+1  # force only
        print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format(
            self.task.observation_space.shape, self.task.action_space.shape,
            self.state_size, self.action_size))
        
        # calc state space minimum and range
        self.state_low = self.task.observation_space.low[self.min_stat:self.max_stat+1]
        self.state_range = self.task.observation_space.high[self.min_stat:self.max_stat+1] - self.state_low
        # self.action_size = np.prod(self.task.action_space.shape)
        
        # calc action space minimum, maximum and range
        self.action_low = self.task.action_space.low[self.min_action:self.max_action+1]
        self.action_high = self.task.action_space.high[self.min_action:self.max_action+1]
        self.action_range = self.action_high-self.action_low

        # Replay memory
        self.epsilon = 0.0
        self.batch_size = 64
        self.buffer_size = 100000
        self.memory = ReplayBuffer(self.buffer_size)

        # Save episode stats
        self.stats_filename = os.path.join(
            util.get_param('out'),
            "stats_{}.csv".format(util.get_timestamp()))  # path to CSV file
        self.stats_columns = ['episode', 'total_reward', 'learning_rate']  # specify columns to save
        self.episode_num = 1
        print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename))  # [debug]        

        # Episode variables
        self.reset_episode_vars()
示例#19
0
    def __init__(self, task):
        self.task = task
        self.state_size = 3
        self.action_size = 3

        #set action space limits
        self.action_low = self.task.action_space.low[0:3]
        self.action_high = self.task.action_space.high[0:3]
        print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format(
            self.task.observation_space.shape, self.task.action_space.shape,
            self.state_size, self.action_size))

        action = [self.action_size, self.action_low, self.action_high]

        #Initialize network
        #Actor
        self.actor_local = Actor(self.state_size, action)
        self.actor_target = Actor(self.state_size, action)
        #Critic
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)
        self.setup_weights()

        #noise
        self.noise = OUNoise(self.action_size)

        #Replay buffer
        self.buffer_size = 100000
        self.batch_size = 128 
        self.memory = ReplayBuffer(self.buffer_size)

        #Hyper params
        self.gamma = 0.99  # discount factor
        self.tau = 0.001  # for soft update of target parameters

        # log file
        self.stats = os.path.join(util.get_param('out'), "stats_{}.csv".format(
          util.get_timestamp()))
        self.episode_no = 1
        self.stats_columns = ['episodes', 'total_reward']
        print("Saving stats {} to {}".format(self.stats_columns, self.stats))


        # Episode variables
        self.reset_episode_vars()
示例#20
0
 def __init__(self, task):
     self.task = task
     self.state_size = 3
     self.action_range = (self.task.action_space.high -
                          self.task.action_space.low)[FORCE_Z]
     self.action_low = self.task.action_space.low[FORCE_Z]
     self.action_high = self.task.action_space.high[FORCE_Z]
     self.action_map = np.arange(self.action_high, 15.0, -4.0)[::-1]
     self.action_size = len(self.action_map)
     self.memory = deque(maxlen=MEMORY_SIZE)
     self.model = self._build_model()
     self.epsilon = 3.0
     self.reset_episode_vars()
     self.stats_filename = os.path.join(util.get_param('out'),
                                        task.__name__ + ".csv")
     self.stats_columns = ['episode', 'total_reward', 'epsilon']
     self.episode_num = 1
     print("Saving stats {} to {}".format(self.stats_columns,
                                          self.stats_filename))
示例#21
0
    def __init__(self, task):

        self.task = task  # should contain observation_space and action_space
        self.state_size = np.prod(self.task.observation_space.shape)
        self.state_range = self.task.observation_space.high - self.task.observation_space.low
        self.action_size = np.prod(self.task.action_space.shape)
        self.action_range = self.task.action_space.high - self.task.action_space.low

        self.action_low = self.task.action_space.low[0:6]
        self.action_high = self.task.action_space.high[0:6]
        print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format(
            self.task.observation_space.shape, self.task.action_space.shape,
            self.state_size, self.action_size))

        # Policy parameters
        self.w = np.random.normal(
            size=(
                self.state_size, self.action_size
            ),  # weights for simple linear policy: state_space x action_space
            scale=(self.action_range / (2 * self.state_size)).reshape(
                1, -1))  # start producing actions in a decent range

        # Score tracker and learning parameters
        self.best_w = None
        self.best_score = -np.inf
        self.noise_scale = 0.1

        # Episode variables
        self.reset_episode_vars()
        # Save episode stats
        self.stats_filename = os.path.join(
            util.get_param('out'), "Hoverstats_{}.csv".format(
                util.get_timestamp()))  # path to CSV file
        self.stats_columns = ['episode',
                              'total_reward']  # specify columns to save
        self.episode_num = 1
        print("Saving stats {} to {}".format(self.stats_columns,
                                             self.stats_filename))  # [debug]

        # Episode variables
        self.reset_episode_vars()
示例#22
0
    def __init__(self, task):
        # Task (environment) information
        self.task = task  # should contain observation_space and action_space
        self.state_size = 3  # position only
        self.action_size = 3  # force only

        # Actor Model
        self.action_low = self.task.action_space.low[0:3]
        self.action_high = self.task.action_space.high[0:3]
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high)

        #  Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())

        # Noise process
        self.noise = OUNoise(self.action_size)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size)

        # Algorithm parameters
        self.gamma = 0.9
        self.tau = 0.001

        # Save episode stats
        self.stats_filename = os.path.join(util.get_param('out'), "stats_{}.csv".format(util.get_timestamp()))
        self.stats_columns = ['episode', 'total_reward']  # specify columns to save
        self.episode_num = 1
        print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename))

        # Episode variables
        self.reset_episode_vars()
示例#23
0
    def __init__(self, task):
        # Task (environment) information
        self.task = task
        self.state_size = 3
        self.action_size = 3
        print("Original spaces:{}, {}\nConstrained spaces:{},{}".format(
            self.task.observation_space.shape, self.task.action_space.shape,
            self.state_size, self.action_size))

        self.action_low = self.task.action_space.low
        self.action_high = self.task.action_space.high

        # Actor(policy) model
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        # Critic (Q-value) model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)
        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())
        # Noise process
        self.noise = OUNoise(self.action_size)
        # Replay Buffer
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size)
        # Policy parameters
        self.gamma = 0.99
        self.tau = 0.001

        # Save episode stats
        self.stats_filename = os.path.join(util.get_param('out'), "stats_{}.csv".format(util.get_timestamp()))
        self.stats_columns = ["episode", "total_reward"]
        self.episode_num = 1

        # Episode variables
        self.reset_episode_vars()
示例#24
0
    def step(self, state, reward, done):
        state = self.preprocess_state(state)
        action = self.act(state)
        if self.last_state is not None and self.last_action is not None:
            self.add_memory(self.last_state, self.last_action, reward, state,
                            done)
        if len(self.memory) > BATCH_SIZE:
            self.replay(BATCH_SIZE)
        self.last_state = state
        self.last_action = action
        self.total_reward += reward

        if done:
            print("Score ... {:.2f}, Epsilon ... {:.2f}".format(
                self.total_reward, self.epsilon))
            self.write_stats(
                [self.episode_num, self.total_reward, self.epsilon])
            self.episode_num += 1
            if self.episode_num % 250 == 0:
                filename = os.path.join(util.get_param('out'),
                                        "dqn_weights.h5")
                self.save_weights(filename)
            self.reset_episode_vars()
        return self.postprocess_action(action)
示例#25
0
    def __init__(self, task):

        # Task (environment) information
        self.task = task  # should contain observation_space and action_space
        #self.state_size = np.prod(self.task.observation_space.shape)
        # self.task.observation_space.high = self.task.observation_space.high[2:3]
        # self.task.observation_space.low =  self.task.observation_space.low[2:3]
        self.state_range = self.task.observation_space.high - self.task.observation_space.low
        #self.action_size = np.prod(self.task.action_space.shape)
        self.action_range = self.task.action_space.high - self.task.action_space.low
        self.task.observation_space.high = self.task.observation_space.high[
            2:3]
        self.task.observation_space.low = self.task.observation_space.low[2:3]

        #self.state_range = self.state_range[2:3]
        #self.action_range = self.action_range[2:3]

        # Constrain state and action spaces
        self.state_size = 1  # position only
        self.action_size = 1  # force only
        self.action_low = self.task.action_space.low[2:3]
        self.action_high = self.task.action_space.high[2:3]
        print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format(
            self.task.observation_space.shape, self.task.action_space.shape,
            self.state_size, self.action_size))

        # # Policy parameters
        # self.w = np.random.normal(
        #     size=(self.state_size, self.action_size),  # weights for simple linear policy: state_space x action_space
        #     scale=(self.action_range / (2 * self.state_size)).reshape(1, -1))  # start producing actions in a decent range

        # Score tracker and learning parameters
        self.best_w = None
        self.best_score = -np.inf
        self.noise_scale = 0.1

        # Actor (Policy) Model
        #self.action_low = self.task.action_space.low
        #self.action_high = self.task.action_space.high
        self.state_range = self.state_range[2:3]
        self.action_range = self.action_range[2:3]
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.noise = OUNoise(self.action_size)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.001  # for soft update of target parameters

        # Episode variables
        #self.reset_episode_vars()

        #---------------------------------------
        # Saving data

        self.stats_filename = os.path.join(
            util.get_param('out') + '/task01/',
            "stats_{}.csv".format(util.get_timestamp()))  # path to CSV file
        self.stats_columns = ['episode',
                              'total_reward']  # specify columns to save
        self.episode_num = 1
        print("Saving stats {} to {}".format(self.stats_columns,
                                             self.stats_filename))

        # Load/save parameters
        self.load_weights = True  # try to load weights from previously saved models
        self.save_weights_every = 1  # save weights every n episodes, None to disable
        self.model_dir = util.get_param(
            'out'
        ) + '/task01'  # you can use a separate subdirectory for each task and/or neural net architecture
        self.model_name = "my-model_" + util.get_timestamp()
        self.model_ext = ".h5"
        # if self.load_weights or self.save_weights_every:
        #     self.actor_filename_local = os.path.join(self.model_dir,
        #         "{}_actor_local{}".format(self.model_name, self.model_ext))
        #     self.critic_filename_local = os.path.join(self.model_dir,
        #         "{}_critic_local{}".format(self.model_name, self.model_ext))
        #     self.actor_filename_target = os.path.join(self.model_dir,
        #         "{}_actor_target{}".format(self.model_name, self.model_ext))
        #     self.critic_filename_target = os.path.join(self.model_dir,
        #         "{}_critic_target{}".format(self.model_name, self.model_ext))
        #     print("Actor local filename :", self.actor_filename_local)  # [debug]
        #     print("Critic local filename:", self.critic_filename_local)  # [debug]
        #     print("Actor target filename :", self.actor_filename_target)  # [debug]
        #     print("Critic target filename:", self.critic_filename_target)  # [debug]

        # Load pre-trained model weights, if available
        #if self.load_weights and os.path.isfile(self.actor_filename_local):
        if self.load_weights:
            try:

                date_of_file = '2018-02-20_11-28-13'
                #date_of_file = '2018-02-20_11-22-27'
                self.actor_filename_local = os.path.join(
                    self.model_dir,
                    'my-model_{}_actor_local.h5'.format(date_of_file))
                self.critic_filename_local = os.path.join(
                    self.model_dir,
                    'my-model_{}_critic_local.h5'.format(date_of_file))
                self.actor_filename_target = os.path.join(
                    self.model_dir,
                    'my-model_{}_actor_target.h5'.format(date_of_file))
                self.critic_filename_target = os.path.join(
                    self.model_dir,
                    'my-model_{}_critic_target.h5'.format(date_of_file))

                self.actor_local.model.load_weights(self.actor_filename_local)
                self.critic_local.model.load_weights(
                    self.critic_filename_local)
                self.actor_target.model.load_weights(
                    self.actor_filename_target)
                self.critic_target.model.load_weights(
                    self.critic_filename_target)
                print("Model weights loaded from file: {}, {}, {}, {}".format(
                    self.actor_filename_local, self.critic_filename_local,
                    self.actor_filename_target,
                    self.critic_filename_target))  # [debug]
            except Exception as e:
                print("Unable to load model weights from file: {}, {}, {}, {}".
                      format(self.actor_filename_local,
                             self.critic_filename_local,
                             self.actor_filename_target,
                             self.critic_filename_target))
                print("{}: {}".format(e.__class__.__name__, str(e)))

        # Set the name of the weight files to this current time stamp, even if loaded from another timestamp.
        self.actor_filename_local = os.path.join(
            self.model_dir, "{}_actor_local{}".format(self.model_name,
                                                      self.model_ext))
        self.critic_filename_local = os.path.join(
            self.model_dir, "{}_critic_local{}".format(self.model_name,
                                                       self.model_ext))
        self.actor_filename_target = os.path.join(
            self.model_dir, "{}_actor_target{}".format(self.model_name,
                                                       self.model_ext))
        self.critic_filename_target = os.path.join(
            self.model_dir,
            "{}_critic_target{}".format(self.model_name, self.model_ext))

        if self.save_weights_every:
            print("Saving model weights",
                  "every {} episodes".format(self.save_weights_every)
                  if self.save_weights_every else "disabled")  # [debug]

        # Episode variables
        self.episode = 0
        self.reset_episode_vars()
示例#26
0
    def __init__(self, task):
        # Task (environment) information
        self.task = task  # should contain observation_space and action_space

        # Constrain state and action spaces
        self.state_size = 1  # position only
        self.state_range = self.task.observation_space.high[
            2] - self.task.observation_space.low[2]
        self.action_size = 1  # force only
        self.action_range = self.task.action_space.high[
            2] - self.task.action_space.low[2]

        print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format(
            self.task.observation_space.shape, self.task.action_space.shape,
            self.state_size, self.action_size))

        # Actor (Policy) Model
        self.action_low = self.task.action_space.low[2]
        self.action_high = self.task.action_space.high[2]
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.noise = OUNoise(self.action_size)
        #print('Noise generated')

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size)
        print('Replay Buffer initialized')

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.001  # for soft update of target parameters

        # Score tracker and learning parameters
        self.best_w = None
        self.best_score = -np.inf
        self.noise_scale = 0.1

        # Episode variables
        self.reset_episode_vars()

        # Save episode stats
        self.stats_filename = os.path.join(
            util.get_param('out'),
            "stats_{}.csv".format(util.get_timestamp()))  # path to CSV file
        self.episode_num = 1
示例#27
0
    def __init__(self, task):
        # Current environment information
        self.task = task
        self.state_size = np.prod(self.task.observation_space.shape)
        self.state_low = self.task.observation_space.low
        self.state_high = self.task.observation_space.high
        self.state_range = self.state_high - self.state_low
        self.action_size = 3
        self.action_low = self.task.action_space.low[0:3]
        self.action_high = self.task.action_space.high[0:3]
        self.last_state = None
        self.last_action = None
        self.count = 0

        # Set logging directory and items
        self.stats_folder = util.get_param('out')
        self.stats_filename = os.path.join(self.stats_folder,
                                           "stats.csv")  # path to CSV file
        self.actor_local_weights = os.path.join(self.stats_folder,
                                                "actor_local_weights.hdf5")
        self.actor_target_weights = os.path.join(self.stats_folder,
                                                 "actor_target_weights.hdf5")
        self.critic_local_weights = os.path.join(self.stats_folder,
                                                 "critic_local_weights.hdf5")
        self.critic_target_weights = os.path.join(
            self.stats_folder, "critic_target_weights.hdf5")
        self.stats_columns = ['episode',
                              'total_reward']  # specify columns to save
        self.replay_buffer_pickle = os.path.join(self.stats_folder,
                                                 "replay_buffer.pickle")
        self.OU_noise_pickle = os.path.join(self.stats_folder,
                                            "OU_noise.pickle")

        # Initialise stats logging
        self.total_reward = 0.0
        try:
            df_stats = pd.read_csv(
                self.stats_filename)  # If stats already exists, load it
            self.episode_num = df_stats.tail(1)['episode'].item() + 1
            print("save file found")
        except:
            self.total_reward = 0.0
            self.episode_num = 1
            print("no save file found")
        print("Saving {} to {}. Starting at episode {}".format(
            self.stats_columns, self.stats_folder,
            self.episode_num))  # [debug]

        # Actor (Policy) initialisation
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)
        try:
            self.actor_local.model.load_weights(self.actor_local_weights)
            self.actor_target.model.load_weights(self.actor_target_weights)
            print("saved actor weights loaded")
        except:
            self.actor_target.model.set_weights(
                self.actor_local.model.get_weights())
            print("new actor weights initialised")
        # Critic (Value) initialisation
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)
        try:
            self.critic_local.model.load_weights(self.critic_local_weights)
            self.critic_target.model.load_weights(self.critic_target_weights)
            print("saved critic weights loaded")
        except:
            self.critic_target.model.set_weights(
                self.critic_local.model.get_weights())
            print("new critic weights initialised")
        # Set replay buffer
        self.buffer_size = 100000
        self.batch_size = 64
        if os.path.exists(self.replay_buffer_pickle):
            with open(self.replay_buffer_pickle, 'rb') as handle:
                self.memory = pickle.load(handle)
            print("loading ReplayBuffer from pickle")
        else:
            self.memory = ReplayBuffer(self.buffer_size)

        # Algorithm parameters
        self.gamma = 0.99
        self.tau = 0.0001

        # Set noise process
        if os.path.exists(self.OU_noise_pickle):
            with open(self.OU_noise_pickle, 'rb') as handle:
                self.noise = pickle.load(handle)
            print("loading OU_Noise from pickle")
        else:
            self.noise = OUNoise(self.action_size)

        # Reset variables for new episode
        self.reset_episode_vars
示例#28
0
    def __init__(self, task):

        # Save episode stats
        self.stats_filename = os.path.join(
            util.get_param('out'),
            "stats_{}.csv".format(util.get_timestamp()))  # path to CSV file
        self.stats_columns = ['episode',
                              'total_reward']  # specify columns to save
        self.episode_num = 1
        print("Saving stats {} to {}".format(self.stats_columns,
                                             self.stats_filename))  # [debug]

        # Save model weights to a file
        # Load/save parameters
        self.load_weights = True  # try to load weights from previously saved models
        self.save_weights_every = 1  # save weights every n episodes, None to disable
        self.model_dir = util.get_param(
            'out'
        )  # you can use a separate subdirectory for each task and/or neural net architecture
        self.model_name = "land"
        self.model_ext = ".h5"

        if self.load_weights or self.save_weights_every:
            self.actor_filename = os.path.join(
                self.model_dir, "{}_actor{}".format(self.model_name,
                                                    self.model_ext))
            self.critic_filename = os.path.join(
                self.model_dir, "{}_critic{}".format(self.model_name,
                                                     self.model_ext))
            print("Actor filename :", self.actor_filename)  #[debug]
            print("Critic filename:", self.critic_filename)  # [debug]

        # Task (environment) information
        self.task = task  # should contain observation_space and action_space
        #self.state_size = np.prod(self.task.observation_space.shape)
        self.state_size = 1
        self.state_range = self.task.observation_space.high - self.task.observation_space.low
        #self.action_size = np.prod(self.task.action_space.shape)
        self.action_size = 1
        self.action_range = self.task.action_space.high - self.task.action_space.low

        # Policy parameters
        #        self.w = np.random.normal(
        #            size=(self.state_size, self.action_size),  # weights for simple linear policy: state_space x action_space
        #            scale=(self.action_range / (2 * self.state_size)).reshape(1, -1))  # start producing actions in a decent range

        # Score tracker and learning parameters
        self.best_w = None
        self.best_score = -np.inf
        self.noise_scale = 0.1

        # Episode variables
        self.episode = 0
        self.reset_episode_vars()

        # Actor (Policy) Model
        #        self.action_low = self.task.action_space.low
        #        self.action_high = self.task.action_space.high
        self.action_low = self.task.action_space.low[2:3]
        self.action_high = self.task.action_space.high[2:3]
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Load pre-trained model weights, if available
        if self.load_weights and os.path.isfile(self.actor_filename):
            try:
                self.actor_local.model.load_weights(self.actor_filename)
                self.critic_local.model.load_weights(self.critic_filename)
                print("Model weights loaded from file!")  # [debug]
            except Exception as e:
                print("Unable to load model weights from file!")
                print("{}: {}".format(e.__class__.__name__, str(e)))

        if self.save_weights_every:
            print("Saving model weights",
                  "every {} episodes".format(self.save_weights_every)
                  if self.save_weights_every else "disabled")  #[debug]

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.noise = OUNoise(self.action_size)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size)

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.001  # for soft update of target parameters
    def __init__(self, task):
        print('start DDPG')
        self.task = task
        self.state_size = 1
        self.action_size = 1
        self.space_low = self.task.observation_space.low[2:3]

        self.stats_filename = os.path.join(
            util.get_param('out'),
            "stats_{}.csv".format(util.get_timestamp()))  # path to CSV file
        self.stats_columns = ['episode', 'total_reward']  # specify columns to save

        # Episode variables
        self.reset_episode_vars()
        self.actor_learning_rate = 0.0001
        self.tau = 0.99
        self.mini_batch_size = 64
        self.buffer_size = 100000
        self.critic_learning_rate = 0.001
        self.gamma = 0.88
        self.episode = 0

        # Load/save parameters
        self.load_weights = False  # try to load weights from previously saved models
        self.save_weights_every = 50  # save weights every n episodes, None to disable
        self.model_dir = util.get_param(
            'out')  # you can use a separate subdirectory for each task and/or neural net architecture
        self.model_name = "my-model4"  #my-model3
        self.model_ext = ".h5"
        if self.load_weights or self.save_weights_every:
            self.actor_filename = os.path.join(self.model_dir,
                                               "{}_actor{}".format(self.model_name, self.model_ext))
            self.critic_filename = os.path.join(self.model_dir,
                                                "{}_critic{}".format(self.model_name, self.model_ext))
            print("Actor filename :", self.actor_filename)  # [debug]
            print("Critic filename:", self.critic_filename)  # [debug]

        self.memory = ReplayBuffer(self.buffer_size)

        self.action_low = self.task.action_space.low[2:3]
        self.action_high = self.task.action_space.high[2:3]
        self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        if self.load_weights and os.path.isfile(self.actor_filename):
            try:
                self.actor_local.model.load_weights(self.actor_filename)
                self.critic_local.model.load_weights(self.critic_filename)
                print("Model weights loaded from file!")  # [debug]
            except Exception as e:
                print("Unable to load model weights from file!")
                print("{}: {}".format(e.__class__.__name__, str(e)))

        if self.save_weights_every:
            print("Saving model weights", "every {} episodes".format(
                self.save_weights_every) if self.save_weights_every else "disabled")

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(self.actor_local.model.get_weights())

        self.actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(self.action_size))
    def __init__(self, task):
        # Task (environment) information
        self.task = task  # should contain observation_space and action_space

        # Load/save parameters
        self.load_weights = True  # try to load weights from previously saved models
        self.save_weights_every = 5  # save weights every n episodes, None to disable
        self.model_dir = util.get_param(
            'out'
        )  # you can use a separate subdirectory for each task and/or neural net architecture
        self.model_name = "ddpg_takeoff"
        self.model_ext = ".h5"
        if self.load_weights or self.save_weights_every:
            self.actor_filename = os.path.join(
                self.model_dir, "{}_actor{}".format(self.model_name,
                                                    self.model_ext))
            self.critic_filename = os.path.join(
                self.model_dir, "{}_critic{}".format(self.model_name,
                                                     self.model_ext))
            print("Actor filename :", self.actor_filename)  # [debug]
            print("Critic filename:", self.critic_filename)  # [debug]

        # Constrain state and action spaces
        self.state_size = 1  # position only
        self.state_range = self.task.observation_space.high[
            2] - self.task.observation_space.low[2]
        self.action_size = 1  # force only
        self.action_range = self.task.action_space.high[
            2] - self.task.action_space.low[2]

        print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format(
            self.task.observation_space.shape, self.task.action_space.shape,
            self.state_size, self.action_size))

        # Actor (Policy) Model
        self.action_low = self.task.action_space.low[2]
        self.action_high = self.task.action_space.high[2]
        self.actor_local = Actor(self.state_size, self.action_size,
                                 self.action_low, self.action_high)
        self.actor_target = Actor(self.state_size, self.action_size,
                                  self.action_low, self.action_high)

        # Critic (Value) Model
        self.critic_local = Critic(self.state_size, self.action_size)
        self.critic_target = Critic(self.state_size, self.action_size)

        # Load pre-trained model weights, if available
        if self.load_weights and os.path.isfile(self.actor_filename):
            try:
                self.actor_local.model.load_weights(self.actor_filename)
                self.critic_local.model.load_weights(self.critic_filename)
                print("Model weights loaded from file!")  # [debug]
            except Exception as e:
                print("Unable to load model weights from file!")
                print("{}: {}".format(e.__class__.__name__, str(e)))

        if self.save_weights_every:
            print("Saving model weights",
                  "every {} episodes".format(self.save_weights_every)
                  if self.save_weights_every else "disabled")  # [debug]

        # Initialize target model parameters with local model parameters
        self.critic_target.model.set_weights(
            self.critic_local.model.get_weights())
        self.actor_target.model.set_weights(
            self.actor_local.model.get_weights())

        # Noise process
        self.noise = OUNoise(self.action_size)

        # Replay memory
        self.buffer_size = 100000
        self.batch_size = 64
        self.memory = ReplayBuffer(self.buffer_size)
        print('Replay Buffer initialized')

        # Algorithm parameters
        self.gamma = 0.99  # discount factor
        self.tau = 0.01  # for soft update of target parameters

        # Episode variables
        self.episode_num = 0
        self.reset_episode_vars()
        # Save episode stats
        self.stats_filename = os.path.join(
            util.get_param('out'), "ddpg_takeoff_stats_{}.csv".format(
                util.get_timestamp()))  # path to CSV file