def __init__(self, task): '''Initialize policy and other agent parameters. :param task: Should be able to access the following (OpenAI Gym spaces): task.observation_space # i.e. state space task.action_space ''' # init statistics writing self.stats_dir = util.get_param('out') if not os.path.exists(self.stats_dir): os.makedirs(self.stats_dir) self.stats_filename = os.path.join( self.stats_dir, 'stats_{}.csv'.format(util.get_timestamp()) ) self.stats_columns = ['episode', 'total_reward'] print('Saving statistics {} to {}'.format(self.stats_columns, self.stats_filename)) # init models writing self.models_dir = util.get_param('models') self.actor_best_model_file = self.models_dir + '/actor_best.pth' self.critic_best_model_file = self.models_dir + '/critic_best.pth' if not os.path.exists(self.models_dir): os.makedirs(self.models_dir)
def __init__(self, task): # Task (environment) information self.task = task # should contain observation_space and action_space self.action_space = 3 self.acts = np.zeros(shape=self.task.action_space.shape) self.Q = defaultdict(lambda: np.zeros(self.action_space)) # Episode variables self.reset_episode_vars() self.episode_num = 1 self.step_count = 20 # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file print("Saving stats to {}".format(self.stats_filename)) # [debug] # Save Q stats self.q_stats_filename = os.path.join( util.get_param('out'), "q_stats_{}.csv".format(util.get_timestamp())) # path to CSV file print("Saving q stats to {}".format(self.q_stats_filename)) # [debug] # Save S-A stats self.sa_stats_filename = os.path.join( util.get_param('out'), "state_action_{}.csv".format( util.get_timestamp())) # path to CSV file print("Saving states actions to {}".format( self.sa_stats_filename)) # [debug]
def __init__(self, task): # Task State Action self.task = task # should contain observation_space and action_space self.state_size = 7 self.action_size = 1 # Actor (Policy) Model self.acts = np.zeros(shape=self.task.action_space.shape) self.actor_local = Actor(self.state_size, self.action_size) self.actor_target = Actor(self.state_size, self.action_size) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 128 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.005 # for soft update of target parameters self.count = 0 self.reset_episode_vars() self.epsilon = 1 self.episode_num = 1 # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file print("Saving stats to {}".format(self.stats_filename)) # [debug] # Save Q stats self.q_stats_filename = os.path.join( util.get_param('out'), "q_stats_{}.csv".format(util.get_timestamp())) # path to CSV file print("Saving q stats to {}".format(self.q_stats_filename)) # [debug] # Save S-A stats self.sa_stats_filename = os.path.join( util.get_param('out'), "state_action_{}.csv".format( util.get_timestamp())) # path to CSV file print("Saving states actions to {}".format( self.sa_stats_filename)) # [debug]
def __init__(self, task): # Task (environment) information self.task = task # should contain observation_space and action_space self.state_size = 3 self.action_size = 3 print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format( self.task.observation_space.shape, self.task.action_space.shape, self.state_size, self.action_size)) # Parameters self.actor_weights = os.path.join(util.get_param('out'), "actor_weights.h5") self.critic_weights = os.path.join(util.get_param('out'), "critic_weights.h5") # Actor (Policy) Model self.action_low = self.preprocess_state(self.task.action_space.low) self.action_high = self.preprocess_state(self.task.action_space.high) self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize local model parameters with loaded weights if os.path.isfile(self.critic_weights): self.critic_local.model.load_weights(self.critic_weights) if os.path.isfile(self.actor_weights): self.actor_local.model.load_weights(self.actor_weights) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters # Episode variables self.reset_episode_vars() # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # [debug]
def setup_weights(self): # save weights self.load_weights = True self.save_weights_every = 50 self.model_dir = util.get_param('out') self.model_name = "ddpg" self.model_ext = ".h5" if self.load_weights or self.save_weights_every: self.actor_filename = os.path.join(self.model_dir, "{}_actor{}".format(self.model_name, self.model_ext)) self.critic_filename = os.path.join(self.model_dir, "{}_critic{}".format(self.model_name, self.model_ext)) print("Actor filename :", self.actor_filename) print("Critic filename:", self.critic_filename) if self.load_weights and os.path.isfile(self.actor_filename): try: self.actor_local.model.load_weights(self.actor_filename) self.critic_local.model.load_weights(self.critic_filename) print("Model weights loaded from file!") except Exception as e: print("Unable to load model weights from file!") print("{}: {}".format(e.__class__.__name__, str(e))) else: self.critic_target.set_weights(self.critic_local) self.actor_target.set_weights(self.actor_local)
def __init__(self, task): self.task = task self.state_size = 3 self.state_range = self.task.observation_space.high - self.task.observation_space.low self.action_size = 3 self.action_range = (self.task.action_space.high - self.task.action_space.low)[0:self.action_size] self.action_low = self.task.action_space.low[0:self.action_size] self.action_high = self.task.action_space.high[0:self.action_size] self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.noise = OUNoise(self.action_size) self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) self.gamma = 0.0 self.tau = 0.001 self.reset_episode_vars() self.stats_filename = os.path.join(util.get_param('out'), task.__name__ + ".csv") self.stats_columns = ['Episode', 'Total_reward'] self.episode_num = 1 print("Save stats ... {} to {}".format(self.stats_columns, self.stats_filename))
def __init__(self): cube_size = 300.0 # env is cube_size x cube_size x cube_size self.observation_space = spaces.Box( np.array([ -cube_size / 2, -cube_size / 2, 0.0, -1.0, -1.0, -1.0, -1.0, -np.inf, -np.inf, -np.inf ]), np.array([ cube_size / 2, cube_size / 2, cube_size, 1.0, 1.0, 1.0, 1.0, np.inf, np.inf, np.inf ])) max_force = 25 max_torque = 0 self.action_space = spaces.Box( np.array([ -max_force, -max_force, -max_force, -max_torque, -max_torque, -max_torque ]), np.array([ max_force, max_force, max_force, max_torque, max_torque, max_torque ])) self.phase = -1 self.desc = [ "Determine Fg = m*g", "Determine Z-Drag", "Determine X-Drag", "DONE" ] self.base_filename = util.get_param('out') self.Fg = 19.62
def __init__(self, task): # Task (environment) information self.task = task # should contain observation_space and action_space # self.state_size = np.prod(self.task.observation_space.shape) self.state_size = 3 self.state_range = self.task.observation_space.high - self.task.observation_space.low self.action_size = 3 # self.action_size = np.prod(self.task.action_space.shape) self.action_range = self.task.action_space.high - self.task.action_space.low # Policy parameters self.w = np.random.normal( size=(self.state_size, self.action_size), # weights for simple linear policy: state_space x action_space scale=(self.action_range[:3] / (2 * self.state_size)).reshape(1, -1)) # start producing actions in a decent range # Score tracker and learning parameters self.best_w = None self.best_score = -np.inf self.noise_scale = 0.1 # Episode variables self.reset_episode_vars() # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), '{}_{}_stats_{}.csv'.format(self.task, self, util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save self.episode_num = 1 print('Saving stats {} to {}'.format(self.stats_columns, self.stats_filename)) # [debug]
def __init__(self, task): #--------------------------------------- # Saving data self.stats_filename = os.path.join( util.get_param('out') + '/task04/', "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # task_takeoff = deepcopy(task) # task_hover = deepcopy(task) # task_land = deepcopy(task) self.task = task self.task_takeoff = takeoff_b.TakeoffB() self.task_hover = hover_b.HoverB() self.task_land = land_b.LandB() self.o_task01_agent = task01_ddpg_agent_b.Task01_DDPG( self.task_takeoff) self.o_task02_agent = task02_ddpg_agent_b.Task02_DDPG(self.task_hover) self.o_task03_agent = task03_ddpg_agent_b.Task03_DDPG(self.task_land) # Current agent self.o_current_agent = self.o_task01_agent self.mode = 0 self.episode_num = 0 self.total_reward = 0.0
def save_episode_stats(self): self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) self.stats_columns = ['episode', 'total_reward'] self.episode_num = 1 print("### Saving stats {} to {}".format(self.stats_columns, self.stats_filename))
def __init__(self, task): # Task (environment) information self.task = task # should contain observation_space and action_space self.state_size = np.prod(self.task.observation_space.shape) self.state_range = self.task.observation_space.high - self.task.observation_space.low self.action_size = np.prod(self.task.action_space.shape) self.action_range = self.task.action_space.high - self.task.action_space.low # Policy parameters self.w = np.random.normal( size=( self.state_size, self.action_size ), # weights for simple linear policy: state_space x action_space scale=(self.action_range / (2 * self.state_size)).reshape( 1, -1)) # start producing actions in a decent range # Score tracker and learning parameters self.best_w = None self.best_score = -np.inf self.noise_scale = 0.1 # Episode variables self.reset_episode_vars() # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "dummy_stats_{}.csv".format( util.get_timestamp())) # path to CSV file self.episode_num = 1
def __init__(self, task): self.task = task # Constrain State and Action matrices self.state_size = 6 self.action_size = 3 # For debugging: print( "Constrained State {} and Action {}; Original State {} and Action {}" .format(self.state_size, self.action_size, self.task.observation_space.shape, self.task.action_space.shape)) # Score tracker and learning parameters self.best_w = None self.best_score = -np.inf self.noise_scale = 0.1 # Save episode statistics for analysis self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) self.stats_columns = ['episode', 'total_reward'] self.episode_num = 1 print("Save Stats {} to {}".format(self.stats_columns, self.stats_filename)) # Actor Model self.action_low = self.task.action_space.low[0:3] self.action_high = self.task.action_space.high[0:3] self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize model parameters with local parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Process noise self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(size=self.buffer_size) # Algorithm Parameters self.gamma = 0.99 # discount self.tau = 0.001 # soft update of targets # Episode vars self.reset_episode_vars()
def __init__(self, task): self.task = task # should contain observation_space and action_space self.state_size = np.prod(self.task.observation_space.shape) self.state_range = self.task.observation_space.high - self.task.observation_space.low self.action_size = np.prod(self.task.action_space.shape) self.action_range = self.task.action_space.high - self.task.action_space.low # Constrain state and action spaces # Actor (Policy) Model self.action_low = self.task.action_space.low self.action_high = self.task.action_space.high self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 248 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters # Policy parameters self.w = np.random.normal( size=(self.state_size, self.action_size), # weights for simple linear policy: state_space x action_space scale=(self.action_range / (2 * self.state_size)).reshape(1, -1)) # start producing actions in a decent range # Score tracker and learning parameters self.best_w = None self.best_score = -np.inf self.noise_scale = 0.1 # Episode variables self.reset_episode_vars() # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # [debug] # Episode variables self.reset_episode_vars()
def __init__(self): # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = [ 'episode', 'height', 'target_distance', 'total_reward' ] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # [debug]
def __init__(self, task): self.task = task self.state_size = 3 # position only self.state_range = self.task.observation_space.high - self.task.observation_space.low self.action_size = 3 # force only self.action_range = (self.task.action_space.high - self.task.action_space.low)[0:self.action_size] # Actor (Policy) model self.action_low = self.task.action_space.low[0:self.action_size] self.action_high = self.task.action_space.high[0:self.action_size] self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.actor_target.model.set_weights( self.actor_local.model.get_weights()) self.critic_target.model.set_weights( self.critic_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.5 # Discount factor self.tau = 0.001 # for soft update of target parameters self.reset_episode_vars() # Save episodes stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) self.stats_columns = ['episode', 'total_reward'] self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename))
def __init__(self, task): self.task = task #constrain state and action spaces self.state_size = 1 self.state_low = self.task.observation_space.low[2] self.state_high = self.task.observation_space.high[2] self.state_range = self.state_high - self.state_low #only limit to z direction self.action_range = (self.task.action_space.high - self.task.action_space.low)[2] self.action_low = self.task.action_space.low[2] self.action_high = self.task.action_space.high[2] stepping = (self.action_high - 10.0) / 16.0 self.discrete_actions = np.arange(10.0, self.action_high + 0.1, stepping) self.action_size = len(self.discrete_actions) print('discrete action:', self.discrete_actions, ', action size: ', self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.9 # discount factor self.learning_rate = 0.001 self.model = self.build_model() #save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) self.stats_columns = ['episode', 'total_reward'] self.episode_num = 1 print("saving stats {} to {}".format(self.stats_columns, self.stats_filename)) self.epilson = 1.0 self.epilson_decay = 0.96 self.epilson_min = 0.05 self.learning = True self.reset_episode_vars() self.best_reward = -99999
def __init__(self, task): self.task = task # should contain observation_space and action_space self.state_shape = (9, ) self.action_shape = (1, ) self.nb_actions = np.prod(self.action_shape) self.action_range = self.task.action_space.high[ 2] - self.task.action_space.low[2] # Replay memory self.buffer_size = 100000 self.batch_size = 128 self.memory = ReplayBuffer(self.buffer_size, self.action_shape, self.state_shape) # Noise process self.noise = OUNoise(self.nb_actions) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.005 # 0.005 self.actor_lr = 0.0001 #0.0001 self.critic_lr = 0.001 #initialize self.a2c = A2C(self.state_shape, self.action_shape, actor_lr=self.actor_lr, critic_lr=self.critic_lr, gamma=self.gamma) self.initialize() # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # [debug] #initial episode vars self.last_state = None self.last_action = None self.total_reward = 0.0 self.count = 0 self.acts = np.zeros(shape=self.task.action_space.shape)
def __init__(self, task, action_min, action_max, state_min, state_max): # Task (environment) information self.task = task # should contain observation_space and action_space self.min_action = action_min # define minimum and maximum action self.max_action = action_max self.min_stat = state_min # define minimum and maximum state self.max_stat = state_max self.learn_when_done = False # defines if the agent shall only learn at the end of each episode # Constrain state and action spaces self.state_size = self.max_stat-self.min_stat+1 # position only self.action_size = self.max_action-self.min_action+1 # force only print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format( self.task.observation_space.shape, self.task.action_space.shape, self.state_size, self.action_size)) # calc state space minimum and range self.state_low = self.task.observation_space.low[self.min_stat:self.max_stat+1] self.state_range = self.task.observation_space.high[self.min_stat:self.max_stat+1] - self.state_low # self.action_size = np.prod(self.task.action_space.shape) # calc action space minimum, maximum and range self.action_low = self.task.action_space.low[self.min_action:self.max_action+1] self.action_high = self.task.action_space.high[self.min_action:self.max_action+1] self.action_range = self.action_high-self.action_low # Replay memory self.epsilon = 0.0 self.batch_size = 64 self.buffer_size = 100000 self.memory = ReplayBuffer(self.buffer_size) # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward', 'learning_rate'] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # [debug] # Episode variables self.reset_episode_vars()
def __init__(self, task): self.task = task self.state_size = 3 self.action_size = 3 #set action space limits self.action_low = self.task.action_space.low[0:3] self.action_high = self.task.action_space.high[0:3] print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format( self.task.observation_space.shape, self.task.action_space.shape, self.state_size, self.action_size)) action = [self.action_size, self.action_low, self.action_high] #Initialize network #Actor self.actor_local = Actor(self.state_size, action) self.actor_target = Actor(self.state_size, action) #Critic self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) self.setup_weights() #noise self.noise = OUNoise(self.action_size) #Replay buffer self.buffer_size = 100000 self.batch_size = 128 self.memory = ReplayBuffer(self.buffer_size) #Hyper params self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters # log file self.stats = os.path.join(util.get_param('out'), "stats_{}.csv".format( util.get_timestamp())) self.episode_no = 1 self.stats_columns = ['episodes', 'total_reward'] print("Saving stats {} to {}".format(self.stats_columns, self.stats)) # Episode variables self.reset_episode_vars()
def __init__(self, task): self.task = task self.state_size = 3 self.action_range = (self.task.action_space.high - self.task.action_space.low)[FORCE_Z] self.action_low = self.task.action_space.low[FORCE_Z] self.action_high = self.task.action_space.high[FORCE_Z] self.action_map = np.arange(self.action_high, 15.0, -4.0)[::-1] self.action_size = len(self.action_map) self.memory = deque(maxlen=MEMORY_SIZE) self.model = self._build_model() self.epsilon = 3.0 self.reset_episode_vars() self.stats_filename = os.path.join(util.get_param('out'), task.__name__ + ".csv") self.stats_columns = ['episode', 'total_reward', 'epsilon'] self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename))
def __init__(self, task): self.task = task # should contain observation_space and action_space self.state_size = np.prod(self.task.observation_space.shape) self.state_range = self.task.observation_space.high - self.task.observation_space.low self.action_size = np.prod(self.task.action_space.shape) self.action_range = self.task.action_space.high - self.task.action_space.low self.action_low = self.task.action_space.low[0:6] self.action_high = self.task.action_space.high[0:6] print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format( self.task.observation_space.shape, self.task.action_space.shape, self.state_size, self.action_size)) # Policy parameters self.w = np.random.normal( size=( self.state_size, self.action_size ), # weights for simple linear policy: state_space x action_space scale=(self.action_range / (2 * self.state_size)).reshape( 1, -1)) # start producing actions in a decent range # Score tracker and learning parameters self.best_w = None self.best_score = -np.inf self.noise_scale = 0.1 # Episode variables self.reset_episode_vars() # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "Hoverstats_{}.csv".format( util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # [debug] # Episode variables self.reset_episode_vars()
def __init__(self, task): # Task (environment) information self.task = task # should contain observation_space and action_space self.state_size = 3 # position only self.action_size = 3 # force only # Actor Model self.action_low = self.task.action_space.low[0:3] self.action_high = self.task.action_space.high[0:3] self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.9 self.tau = 0.001 # Save episode stats self.stats_filename = os.path.join(util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) self.stats_columns = ['episode', 'total_reward'] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # Episode variables self.reset_episode_vars()
def __init__(self, task): # Task (environment) information self.task = task self.state_size = 3 self.action_size = 3 print("Original spaces:{}, {}\nConstrained spaces:{},{}".format( self.task.observation_space.shape, self.task.action_space.shape, self.state_size, self.action_size)) self.action_low = self.task.action_space.low self.action_high = self.task.action_space.high # Actor(policy) model self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Q-value) model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay Buffer self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) # Policy parameters self.gamma = 0.99 self.tau = 0.001 # Save episode stats self.stats_filename = os.path.join(util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) self.stats_columns = ["episode", "total_reward"] self.episode_num = 1 # Episode variables self.reset_episode_vars()
def step(self, state, reward, done): state = self.preprocess_state(state) action = self.act(state) if self.last_state is not None and self.last_action is not None: self.add_memory(self.last_state, self.last_action, reward, state, done) if len(self.memory) > BATCH_SIZE: self.replay(BATCH_SIZE) self.last_state = state self.last_action = action self.total_reward += reward if done: print("Score ... {:.2f}, Epsilon ... {:.2f}".format( self.total_reward, self.epsilon)) self.write_stats( [self.episode_num, self.total_reward, self.epsilon]) self.episode_num += 1 if self.episode_num % 250 == 0: filename = os.path.join(util.get_param('out'), "dqn_weights.h5") self.save_weights(filename) self.reset_episode_vars() return self.postprocess_action(action)
def __init__(self, task): # Task (environment) information self.task = task # should contain observation_space and action_space #self.state_size = np.prod(self.task.observation_space.shape) # self.task.observation_space.high = self.task.observation_space.high[2:3] # self.task.observation_space.low = self.task.observation_space.low[2:3] self.state_range = self.task.observation_space.high - self.task.observation_space.low #self.action_size = np.prod(self.task.action_space.shape) self.action_range = self.task.action_space.high - self.task.action_space.low self.task.observation_space.high = self.task.observation_space.high[ 2:3] self.task.observation_space.low = self.task.observation_space.low[2:3] #self.state_range = self.state_range[2:3] #self.action_range = self.action_range[2:3] # Constrain state and action spaces self.state_size = 1 # position only self.action_size = 1 # force only self.action_low = self.task.action_space.low[2:3] self.action_high = self.task.action_space.high[2:3] print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format( self.task.observation_space.shape, self.task.action_space.shape, self.state_size, self.action_size)) # # Policy parameters # self.w = np.random.normal( # size=(self.state_size, self.action_size), # weights for simple linear policy: state_space x action_space # scale=(self.action_range / (2 * self.state_size)).reshape(1, -1)) # start producing actions in a decent range # Score tracker and learning parameters self.best_w = None self.best_score = -np.inf self.noise_scale = 0.1 # Actor (Policy) Model #self.action_low = self.task.action_space.low #self.action_high = self.task.action_space.high self.state_range = self.state_range[2:3] self.action_range = self.action_range[2:3] self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters # Episode variables #self.reset_episode_vars() #--------------------------------------- # Saving data self.stats_filename = os.path.join( util.get_param('out') + '/task01/', "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # Load/save parameters self.load_weights = True # try to load weights from previously saved models self.save_weights_every = 1 # save weights every n episodes, None to disable self.model_dir = util.get_param( 'out' ) + '/task01' # you can use a separate subdirectory for each task and/or neural net architecture self.model_name = "my-model_" + util.get_timestamp() self.model_ext = ".h5" # if self.load_weights or self.save_weights_every: # self.actor_filename_local = os.path.join(self.model_dir, # "{}_actor_local{}".format(self.model_name, self.model_ext)) # self.critic_filename_local = os.path.join(self.model_dir, # "{}_critic_local{}".format(self.model_name, self.model_ext)) # self.actor_filename_target = os.path.join(self.model_dir, # "{}_actor_target{}".format(self.model_name, self.model_ext)) # self.critic_filename_target = os.path.join(self.model_dir, # "{}_critic_target{}".format(self.model_name, self.model_ext)) # print("Actor local filename :", self.actor_filename_local) # [debug] # print("Critic local filename:", self.critic_filename_local) # [debug] # print("Actor target filename :", self.actor_filename_target) # [debug] # print("Critic target filename:", self.critic_filename_target) # [debug] # Load pre-trained model weights, if available #if self.load_weights and os.path.isfile(self.actor_filename_local): if self.load_weights: try: date_of_file = '2018-02-20_11-28-13' #date_of_file = '2018-02-20_11-22-27' self.actor_filename_local = os.path.join( self.model_dir, 'my-model_{}_actor_local.h5'.format(date_of_file)) self.critic_filename_local = os.path.join( self.model_dir, 'my-model_{}_critic_local.h5'.format(date_of_file)) self.actor_filename_target = os.path.join( self.model_dir, 'my-model_{}_actor_target.h5'.format(date_of_file)) self.critic_filename_target = os.path.join( self.model_dir, 'my-model_{}_critic_target.h5'.format(date_of_file)) self.actor_local.model.load_weights(self.actor_filename_local) self.critic_local.model.load_weights( self.critic_filename_local) self.actor_target.model.load_weights( self.actor_filename_target) self.critic_target.model.load_weights( self.critic_filename_target) print("Model weights loaded from file: {}, {}, {}, {}".format( self.actor_filename_local, self.critic_filename_local, self.actor_filename_target, self.critic_filename_target)) # [debug] except Exception as e: print("Unable to load model weights from file: {}, {}, {}, {}". format(self.actor_filename_local, self.critic_filename_local, self.actor_filename_target, self.critic_filename_target)) print("{}: {}".format(e.__class__.__name__, str(e))) # Set the name of the weight files to this current time stamp, even if loaded from another timestamp. self.actor_filename_local = os.path.join( self.model_dir, "{}_actor_local{}".format(self.model_name, self.model_ext)) self.critic_filename_local = os.path.join( self.model_dir, "{}_critic_local{}".format(self.model_name, self.model_ext)) self.actor_filename_target = os.path.join( self.model_dir, "{}_actor_target{}".format(self.model_name, self.model_ext)) self.critic_filename_target = os.path.join( self.model_dir, "{}_critic_target{}".format(self.model_name, self.model_ext)) if self.save_weights_every: print("Saving model weights", "every {} episodes".format(self.save_weights_every) if self.save_weights_every else "disabled") # [debug] # Episode variables self.episode = 0 self.reset_episode_vars()
def __init__(self, task): # Task (environment) information self.task = task # should contain observation_space and action_space # Constrain state and action spaces self.state_size = 1 # position only self.state_range = self.task.observation_space.high[ 2] - self.task.observation_space.low[2] self.action_size = 1 # force only self.action_range = self.task.action_space.high[ 2] - self.task.action_space.low[2] print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format( self.task.observation_space.shape, self.task.action_space.shape, self.state_size, self.action_size)) # Actor (Policy) Model self.action_low = self.task.action_space.low[2] self.action_high = self.task.action_space.high[2] self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) #print('Noise generated') # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) print('Replay Buffer initialized') # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters # Score tracker and learning parameters self.best_w = None self.best_score = -np.inf self.noise_scale = 0.1 # Episode variables self.reset_episode_vars() # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.episode_num = 1
def __init__(self, task): # Current environment information self.task = task self.state_size = np.prod(self.task.observation_space.shape) self.state_low = self.task.observation_space.low self.state_high = self.task.observation_space.high self.state_range = self.state_high - self.state_low self.action_size = 3 self.action_low = self.task.action_space.low[0:3] self.action_high = self.task.action_space.high[0:3] self.last_state = None self.last_action = None self.count = 0 # Set logging directory and items self.stats_folder = util.get_param('out') self.stats_filename = os.path.join(self.stats_folder, "stats.csv") # path to CSV file self.actor_local_weights = os.path.join(self.stats_folder, "actor_local_weights.hdf5") self.actor_target_weights = os.path.join(self.stats_folder, "actor_target_weights.hdf5") self.critic_local_weights = os.path.join(self.stats_folder, "critic_local_weights.hdf5") self.critic_target_weights = os.path.join( self.stats_folder, "critic_target_weights.hdf5") self.stats_columns = ['episode', 'total_reward'] # specify columns to save self.replay_buffer_pickle = os.path.join(self.stats_folder, "replay_buffer.pickle") self.OU_noise_pickle = os.path.join(self.stats_folder, "OU_noise.pickle") # Initialise stats logging self.total_reward = 0.0 try: df_stats = pd.read_csv( self.stats_filename) # If stats already exists, load it self.episode_num = df_stats.tail(1)['episode'].item() + 1 print("save file found") except: self.total_reward = 0.0 self.episode_num = 1 print("no save file found") print("Saving {} to {}. Starting at episode {}".format( self.stats_columns, self.stats_folder, self.episode_num)) # [debug] # Actor (Policy) initialisation self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) try: self.actor_local.model.load_weights(self.actor_local_weights) self.actor_target.model.load_weights(self.actor_target_weights) print("saved actor weights loaded") except: self.actor_target.model.set_weights( self.actor_local.model.get_weights()) print("new actor weights initialised") # Critic (Value) initialisation self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) try: self.critic_local.model.load_weights(self.critic_local_weights) self.critic_target.model.load_weights(self.critic_target_weights) print("saved critic weights loaded") except: self.critic_target.model.set_weights( self.critic_local.model.get_weights()) print("new critic weights initialised") # Set replay buffer self.buffer_size = 100000 self.batch_size = 64 if os.path.exists(self.replay_buffer_pickle): with open(self.replay_buffer_pickle, 'rb') as handle: self.memory = pickle.load(handle) print("loading ReplayBuffer from pickle") else: self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.99 self.tau = 0.0001 # Set noise process if os.path.exists(self.OU_noise_pickle): with open(self.OU_noise_pickle, 'rb') as handle: self.noise = pickle.load(handle) print("loading OU_Noise from pickle") else: self.noise = OUNoise(self.action_size) # Reset variables for new episode self.reset_episode_vars
def __init__(self, task): # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save self.episode_num = 1 print("Saving stats {} to {}".format(self.stats_columns, self.stats_filename)) # [debug] # Save model weights to a file # Load/save parameters self.load_weights = True # try to load weights from previously saved models self.save_weights_every = 1 # save weights every n episodes, None to disable self.model_dir = util.get_param( 'out' ) # you can use a separate subdirectory for each task and/or neural net architecture self.model_name = "land" self.model_ext = ".h5" if self.load_weights or self.save_weights_every: self.actor_filename = os.path.join( self.model_dir, "{}_actor{}".format(self.model_name, self.model_ext)) self.critic_filename = os.path.join( self.model_dir, "{}_critic{}".format(self.model_name, self.model_ext)) print("Actor filename :", self.actor_filename) #[debug] print("Critic filename:", self.critic_filename) # [debug] # Task (environment) information self.task = task # should contain observation_space and action_space #self.state_size = np.prod(self.task.observation_space.shape) self.state_size = 1 self.state_range = self.task.observation_space.high - self.task.observation_space.low #self.action_size = np.prod(self.task.action_space.shape) self.action_size = 1 self.action_range = self.task.action_space.high - self.task.action_space.low # Policy parameters # self.w = np.random.normal( # size=(self.state_size, self.action_size), # weights for simple linear policy: state_space x action_space # scale=(self.action_range / (2 * self.state_size)).reshape(1, -1)) # start producing actions in a decent range # Score tracker and learning parameters self.best_w = None self.best_score = -np.inf self.noise_scale = 0.1 # Episode variables self.episode = 0 self.reset_episode_vars() # Actor (Policy) Model # self.action_low = self.task.action_space.low # self.action_high = self.task.action_space.high self.action_low = self.task.action_space.low[2:3] self.action_high = self.task.action_space.high[2:3] self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Load pre-trained model weights, if available if self.load_weights and os.path.isfile(self.actor_filename): try: self.actor_local.model.load_weights(self.actor_filename) self.critic_local.model.load_weights(self.critic_filename) print("Model weights loaded from file!") # [debug] except Exception as e: print("Unable to load model weights from file!") print("{}: {}".format(e.__class__.__name__, str(e))) if self.save_weights_every: print("Saving model weights", "every {} episodes".format(self.save_weights_every) if self.save_weights_every else "disabled") #[debug] # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.001 # for soft update of target parameters
def __init__(self, task): print('start DDPG') self.task = task self.state_size = 1 self.action_size = 1 self.space_low = self.task.observation_space.low[2:3] self.stats_filename = os.path.join( util.get_param('out'), "stats_{}.csv".format(util.get_timestamp())) # path to CSV file self.stats_columns = ['episode', 'total_reward'] # specify columns to save # Episode variables self.reset_episode_vars() self.actor_learning_rate = 0.0001 self.tau = 0.99 self.mini_batch_size = 64 self.buffer_size = 100000 self.critic_learning_rate = 0.001 self.gamma = 0.88 self.episode = 0 # Load/save parameters self.load_weights = False # try to load weights from previously saved models self.save_weights_every = 50 # save weights every n episodes, None to disable self.model_dir = util.get_param( 'out') # you can use a separate subdirectory for each task and/or neural net architecture self.model_name = "my-model4" #my-model3 self.model_ext = ".h5" if self.load_weights or self.save_weights_every: self.actor_filename = os.path.join(self.model_dir, "{}_actor{}".format(self.model_name, self.model_ext)) self.critic_filename = os.path.join(self.model_dir, "{}_critic{}".format(self.model_name, self.model_ext)) print("Actor filename :", self.actor_filename) # [debug] print("Critic filename:", self.critic_filename) # [debug] self.memory = ReplayBuffer(self.buffer_size) self.action_low = self.task.action_space.low[2:3] self.action_high = self.task.action_space.high[2:3] self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) if self.load_weights and os.path.isfile(self.actor_filename): try: self.actor_local.model.load_weights(self.actor_filename) self.critic_local.model.load_weights(self.critic_filename) print("Model weights loaded from file!") # [debug] except Exception as e: print("Unable to load model weights from file!") print("{}: {}".format(e.__class__.__name__, str(e))) if self.save_weights_every: print("Saving model weights", "every {} episodes".format( self.save_weights_every) if self.save_weights_every else "disabled") # Initialize target model parameters with local model parameters self.critic_target.model.set_weights(self.critic_local.model.get_weights()) self.actor_target.model.set_weights(self.actor_local.model.get_weights()) self.actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(self.action_size))
def __init__(self, task): # Task (environment) information self.task = task # should contain observation_space and action_space # Load/save parameters self.load_weights = True # try to load weights from previously saved models self.save_weights_every = 5 # save weights every n episodes, None to disable self.model_dir = util.get_param( 'out' ) # you can use a separate subdirectory for each task and/or neural net architecture self.model_name = "ddpg_takeoff" self.model_ext = ".h5" if self.load_weights or self.save_weights_every: self.actor_filename = os.path.join( self.model_dir, "{}_actor{}".format(self.model_name, self.model_ext)) self.critic_filename = os.path.join( self.model_dir, "{}_critic{}".format(self.model_name, self.model_ext)) print("Actor filename :", self.actor_filename) # [debug] print("Critic filename:", self.critic_filename) # [debug] # Constrain state and action spaces self.state_size = 1 # position only self.state_range = self.task.observation_space.high[ 2] - self.task.observation_space.low[2] self.action_size = 1 # force only self.action_range = self.task.action_space.high[ 2] - self.task.action_space.low[2] print("Original spaces: {}, {}\nConstrained spaces: {}, {}".format( self.task.observation_space.shape, self.task.action_space.shape, self.state_size, self.action_size)) # Actor (Policy) Model self.action_low = self.task.action_space.low[2] self.action_high = self.task.action_space.high[2] self.actor_local = Actor(self.state_size, self.action_size, self.action_low, self.action_high) self.actor_target = Actor(self.state_size, self.action_size, self.action_low, self.action_high) # Critic (Value) Model self.critic_local = Critic(self.state_size, self.action_size) self.critic_target = Critic(self.state_size, self.action_size) # Load pre-trained model weights, if available if self.load_weights and os.path.isfile(self.actor_filename): try: self.actor_local.model.load_weights(self.actor_filename) self.critic_local.model.load_weights(self.critic_filename) print("Model weights loaded from file!") # [debug] except Exception as e: print("Unable to load model weights from file!") print("{}: {}".format(e.__class__.__name__, str(e))) if self.save_weights_every: print("Saving model weights", "every {} episodes".format(self.save_weights_every) if self.save_weights_every else "disabled") # [debug] # Initialize target model parameters with local model parameters self.critic_target.model.set_weights( self.critic_local.model.get_weights()) self.actor_target.model.set_weights( self.actor_local.model.get_weights()) # Noise process self.noise = OUNoise(self.action_size) # Replay memory self.buffer_size = 100000 self.batch_size = 64 self.memory = ReplayBuffer(self.buffer_size) print('Replay Buffer initialized') # Algorithm parameters self.gamma = 0.99 # discount factor self.tau = 0.01 # for soft update of target parameters # Episode variables self.episode_num = 0 self.reset_episode_vars() # Save episode stats self.stats_filename = os.path.join( util.get_param('out'), "ddpg_takeoff_stats_{}.csv".format( util.get_timestamp())) # path to CSV file