def __init__(self, experiment_parameters, run_results_dir): self.run_results_dir = run_results_dir self.num_tilings = check_attribute_else_default(experiment_parameters, 'num_tilings', 32) self.tiling_length = check_attribute_else_default(experiment_parameters, 'tiling_length', 10) self.learning_rate = check_attribute_else_default(exp_parameters, 'learning_rate', 0.001) self.environment_name = check_attribute_else_default(experiment_parameters, 'env', 'mountain_car', choices=['mountain_car', 'catcher']) self.verbose = experiment_parameters.verbose self.config = Config() self.config.store_summary = True self.summary = {} """ Parameters for the Environment """ self.config.max_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['max_actions'] self.config.norm_state = True """ Parameters for the Function Approximator """ self.config.state_dims = ENVIRONMENT_DICTIONARY[self.environment_name]['state_dims'] self.config.num_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['num_actions'] self.config.gamma = 1.0 self.config.epsilon = 0.1 self.config.lr = self.learning_rate / self.num_tilings self.config.num_tilings = self.num_tilings self.config.tiling_length = self.tiling_length self.config.scaling_factor = 1/2 self.config.scaling_offset = 1 self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](config=self.config, summary=self.summary) self.fa = TileCoderFA(config=self.config) self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config, summary=self.summary)
def __init__(self, experiment_parameters, run_results_dir): self.run_results_dir = run_results_dir self.tnet_update_Freq = check_attribute(experiment_parameters, 'tnet_update_freq', 1) self.buffer_size = check_attribute(experiment_parameters, 'buffer_size', 10000) self.learning_rate = check_attribute(exp_parameters, 'lr', 0.001) self.environment_name = check_attribute( experiment_parameters, 'env', 'mountain_car', choices=['mountain_car', 'catcher', 'puddle_world']) self.ppa = check_attribute(exp_parameters, 'ppa', 0.1) self.gated = check_attribute(exp_parameters, 'gated', False) self.gate_function = check_attribute(exp_parameters, 'gate_function', 'tanh') self.verbose = experiment_parameters.verbose self.config = Config() self.config.store_summary = True # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step' self.summary = {} self.config.number_of_steps = ENVIRONMENT_DICTIONARY[ self.environment_name]['number_of_steps'] """ Parameters for the Environment """ self.config.max_episode_length = ENVIRONMENT_DICTIONARY[ self.environment_name]['max_episode_length'] self.config.norm_state = True self.config.current_step = 0 """ Parameters for the Function Approximator """ self.config.state_dims = ENVIRONMENT_DICTIONARY[ self.environment_name]['state_dims'] self.config.num_actions = ENVIRONMENT_DICTIONARY[ self.environment_name]['num_actions'] self.config.gamma = 1.0 self.config.epsilon = 0.1 self.config.optim = "adam" self.config.lr = self.learning_rate self.config.batch_size = 32 self.config.ppa = self.ppa self.config.h1_dims = 32 self.config.h2_dims = 256 self.config.gate_function = self.gate_function # DQN parameters self.config.buffer_size = self.buffer_size self.config.tnet_update_freq = self.tnet_update_Freq self.config.input_dims = self.config.state_dims if self.gated: self.config.gated = True else: self.config.gated = False self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class']( config=self.config, summary=self.summary) self.fa = ActionDQN(config=self.config, summary=self.summary) self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config, summary=self.summary)
def __init__(self, experiment_parameters, run_results_dir): self.run_results_dir = run_results_dir self.buffer_size = check_attribute_else_default(experiment_parameters, 'buffer_size', 20000) self.tnet_update_freq = check_attribute_else_default(experiment_parameters, 'tnet_update_freq', 10) self.environment_name = check_attribute_else_default(experiment_parameters, 'env', 'mountain_car', choices=['mountain_car', 'catcher']) self.verbose = experiment_parameters.verbose # parameters specific to the parameter sweep self.learning_rate = check_attribute_else_default(exp_parameters, 'lr', 0.001) self.l1_reg = check_attribute_else_default(experiment_parameters, 'l1_reg', True) self.weights_reg = check_attribute_else_default(experiment_parameters, 'weights_reg', True) self.reg_factor = check_attribute_else_default(experiment_parameters, 'reg_factor', 0.1) self.config = Config() self.config.store_summary = True # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step' self.summary = {} self.config.number_of_steps = ENVIRONMENT_DICTIONARY[self.environment_name]['number_of_steps'] """ Parameters for the Environment """ # Same for every experiment self.config.max_episode_length = ENVIRONMENT_DICTIONARY[self.environment_name]['max_episode_length'] self.config.norm_state = True self.config.current_step = 0 """ Parameters for the Function Approximator """ # Same for every experiment self.config.state_dims = ENVIRONMENT_DICTIONARY[self.environment_name]['state_dims'] self.config.num_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['num_actions'] self.config.gamma = 1.0 self.config.epsilon = 0.1 self.config.optim = "adam" self.config.batch_size = 32 # Selected after finding the best parameter combinations for DQN with a given buffer size self.config.buffer_size = self.buffer_size self.config.tnet_update_freq = self.tnet_update_freq # These are the parameters that we are sweeping over self.config.lr = self.learning_rate self.config.reg_method = 'l1' if self.l1_reg else 'l2' self.config.weights_reg = self.weights_reg self.config.reg_factor = self.reg_factor self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](config=self.config, summary=self.summary) self.fa = RegularizedNeuralNetwork(config=self.config, summary=self.summary) self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config, summary=self.summary)
class Experiment: def __init__(self, experiment_parameters, run_results_dir): self.run_results_dir = run_results_dir self.num_tilings = check_attribute_else_default(experiment_parameters, 'num_tilings', 32) self.tiling_length = check_attribute_else_default(experiment_parameters, 'tiling_length', 10) self.learning_rate = check_attribute_else_default(exp_parameters, 'learning_rate', 0.001) self.environment_name = check_attribute_else_default(experiment_parameters, 'env', 'mountain_car', choices=['mountain_car', 'catcher']) self.verbose = experiment_parameters.verbose self.config = Config() self.config.store_summary = True self.summary = {} """ Parameters for the Environment """ self.config.max_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['max_actions'] self.config.norm_state = True """ Parameters for the Function Approximator """ self.config.state_dims = ENVIRONMENT_DICTIONARY[self.environment_name]['state_dims'] self.config.num_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['num_actions'] self.config.gamma = 1.0 self.config.epsilon = 0.1 self.config.lr = self.learning_rate / self.num_tilings self.config.num_tilings = self.num_tilings self.config.tiling_length = self.tiling_length self.config.scaling_factor = 1/2 self.config.scaling_offset = 1 self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](config=self.config, summary=self.summary) self.fa = TileCoderFA(config=self.config) self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config, summary=self.summary) def run(self): for i in range(ENVIRONMENT_DICTIONARY[self.environment_name]['number_of_episodes']): episode_number = i + 1 self.rl_agent.train(1) if self.verbose and (((i+1) % 10 == 0) or i == 0): print("Episode Number:", episode_number) print('\tThe cumulative reward was:', self.summary['return_per_episode'][-1]) if self.environment_name == 'catcher': assert isinstance(self.env, Catcher3) if self.env.timeout: break self.save_run_summary() # self.save_tilecoder() def save_tilecoder(self): tilecoder_path = os.path.join(self.run_results_dir, 'tilecoder.p') with open(tilecoder_path, mode='wb') as tilecoder_file: pickle.dump(self.rl_agent.fa, tilecoder_file) def save_run_summary(self): total_reward = np.sum(self.summary['reward_per_step']) tr_path = os.path.join(self.run_results_dir, 'total_reward.p') with open(tr_path, mode='wb') as tr_file: pickle.dump(total_reward, tr_file) config_path = os.path.join(self.run_results_dir, 'config.p') with open(config_path, mode='wb') as config_file: pickle.dump(self.config, config_file)
class Experiment: def __init__(self, experiment_parameters, run_results_dir): self.run_results_dir = run_results_dir self.tnet_update_Freq = check_attribute_else_default( experiment_parameters, 'tnet_update_freq', 1) self.buffer_size = check_attribute_else_default( experiment_parameters, 'buffer_size', 10000) self.learning_rate = check_attribute_else_default( exp_parameters, 'lr', 0.001) self.environment_name = check_attribute_else_default( experiment_parameters, 'env', 'mountain_car', choices=['mountain_car', 'catcher', 'puddle_world']) self.verbose = experiment_parameters.verbose self.config = Config() self.config.store_summary = True # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step' self.summary = {} self.config.number_of_steps = ENVIRONMENT_DICTIONARY[ self.environment_name]['number_of_steps'] """ Parameters for the Environment """ self.config.max_episode_length = ENVIRONMENT_DICTIONARY[ self.environment_name]['max_episode_length'] self.config.norm_state = True self.config.current_step = 0 """ Parameters for the Function Approximator """ self.config.state_dims = ENVIRONMENT_DICTIONARY[ self.environment_name]['state_dims'] self.config.num_actions = ENVIRONMENT_DICTIONARY[ self.environment_name]['num_actions'] self.config.gamma = 1.0 self.config.epsilon = 0.1 self.config.optim = "adam" self.config.lr = self.learning_rate self.config.batch_size = 32 # DQN parameters self.config.buffer_size = self.buffer_size self.config.tnet_update_freq = self.tnet_update_Freq self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class']( config=self.config, summary=self.summary) self.fa = VanillaDQN(config=self.config, summary=self.summary) self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config, summary=self.summary) def run(self): prev_idx = 0 current_episode_number = 1 while self.config.current_step != self.config.number_of_steps: self.rl_agent.train(1) if self.verbose and ((current_episode_number % 10 == 0) or (current_episode_number - 1 == 0)): print("Episode Number:", current_episode_number) print('\tThe cumulative reward was:', self.summary['return_per_episode'][-1]) print( '\tThe cumulative loss was:', np.round(np.sum(self.summary['loss_per_step'][prev_idx:]), 2)) print('\tCurrent environment steps:', self.config.current_step) prev_idx = self.config.current_step current_episode_number += 1 if self.verbose: print("Number of episodes completed:", len(self.summary['return_per_episode'])) print("The total cumulative reward was:", np.sum(self.summary['reward_per_step'])) print("Current environment steps:", self.config.current_step) self.save_network_params() self.save_run_summary() def save_network_params(self): params_path = os.path.join(self.run_results_dir, 'final_network_weights.pt') torch.save(self.fa.net.state_dict(), params_path) def save_run_summary(self): summary_path = os.path.join(self.run_results_dir, 'summary.p') with open(summary_path, mode='wb') as summary_file: pickle.dump(self.summary, summary_file) config_path = os.path.join(self.run_results_dir, 'config.p') with open(config_path, mode='wb') as config_file: pickle.dump(self.config, config_file)
def __init__(self, experiment_parameters, run_results_dir): self.run_results_dir = run_results_dir self.learning_rate = check_attribute_else_default( exp_parameters, 'lr', 0.001) self.buffer_size = check_attribute_else_default( experiment_parameters, 'buffer_size', 20000) self.tnet_update_freq = check_attribute_else_default( experiment_parameters, 'tnet_update_freq', 10) self.environment_name = check_attribute_else_default( experiment_parameters, 'env', 'mountain_car', choices=['mountain_car', 'catcher']) self.verbose = experiment_parameters.verbose # parameters specific to distributional regularizers self.beta = check_attribute_else_default(experiment_parameters, 'beta', 0.1) self.reg_factor = check_attribute_else_default(experiment_parameters, 'reg_factor', 0.1) self.use_gamma = check_attribute_else_default(experiment_parameters, 'use_gamma', False) self.beta_lb = check_attribute_else_default(experiment_parameters, 'beta_lb', False) self.config = Config() self.config.store_summary = True # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step' self.summary = {} self.config.number_of_steps = ENVIRONMENT_DICTIONARY[ self.environment_name]['number_of_steps'] """ Parameters for the Environment """ self.config.max_episode_length = ENVIRONMENT_DICTIONARY[ self.environment_name]['max_episode_length'] self.config.norm_state = True self.config.current_step = 0 """ Parameters for the Function Approximator """ self.config.state_dims = ENVIRONMENT_DICTIONARY[ self.environment_name]['state_dims'] self.config.num_actions = ENVIRONMENT_DICTIONARY[ self.environment_name]['num_actions'] self.config.gamma = 1.0 self.config.epsilon = 0.1 self.config.optim = "adam" self.config.batch_size = 32 # DQN parameters self.config.lr = self.learning_rate self.config.buffer_size = self.buffer_size self.config.tnet_update_freq = self.tnet_update_freq self.config.beta = self.beta self.config.reg_factor = self.reg_factor self.config.use_gamma = self.use_gamma self.config.beta_lb = self.beta_lb self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class']( config=self.config, summary=self.summary) self.fa = DistRegNeuralNetwork(config=self.config, summary=self.summary) self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config, summary=self.summary)
def __init__(self, experiment_parameters, run_results_dir): self.run_results_dir = run_results_dir self.buffer_size = check_attribute_else_default( experiment_parameters, 'buffer_size', 20000) self.method = check_attribute_else_default(exp_parameters, 'method', 'DQN') self.environment_name = check_attribute_else_default( experiment_parameters, 'env', 'mountain_car', choices=['mountain_car', 'catcher', 'puddle_world']) parameters_dictionary = BEST_PARAMETERS_DICTIONARY[ self.environment_name][self.method][self.buffer_size] self.verbose = experiment_parameters.verbose self.config = Config() self.config.store_summary = True # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step' self.summary = {} self.config.number_of_steps = ENVIRONMENT_DICTIONARY[ self.environment_name]['number_of_steps'] """ Parameters for the Environment """ self.config.max_episode_length = ENVIRONMENT_DICTIONARY[ self.environment_name]['max_episode_length'] self.config.norm_state = True self.config.current_step = 0 """ Parameters for the Function Approximator """ self.config.state_dims = ENVIRONMENT_DICTIONARY[ self.environment_name]['state_dims'] self.config.num_actions = ENVIRONMENT_DICTIONARY[ self.environment_name]['num_actions'] self.config.gamma = 1.0 self.config.epsilon = 0.1 self.config.optim = "adam" self.config.batch_size = 32 # Parameters for any type of agent self.config.buffer_size = self.buffer_size self.config.lr = parameters_dictionary['LearningRate'] self.config.tnet_update_freq = parameters_dictionary['Freq'] if self.method in ['DRE', 'DRE_LB', 'DRG', 'DRG_LB']: self.config.beta = parameters_dictionary['Beta'] self.config.reg_factor = parameters_dictionary['RegFactor'] self.config.use_gamma = False self.config.beta_lb = False if self.method in ['DRG', 'DRG_LB']: self.config.use_gamma = True if self.method in ['DRE_LB', 'DRG_LB']: self.config.beta_lb = True self.fa = DistRegNeuralNetwork(config=self.config, summary=self.summary) elif self.method in ['L1A', 'L1W', 'L2A', 'L2W']: self.config.reg_factor = parameters_dictionary['RegFactor'] self.config.reg_method = 'l1' if self.method in ['L2A', 'L2W']: self.config.reg_method = 'l2' self.config.weights_reg = False if self.method in ['L1W', 'L2W']: self.config.weights_reg = True self.fa = RegularizedNeuralNetwork(config=self.config, summary=self.summary) elif self.method in ['DQN']: self.fa = VanillaDQN(config=self.config, summary=self.summary) elif self.method in ['Dropout']: self.config.dropout_probability = parameters_dictionary[ 'DropoutProbability'] self.fa = DropoutNeuralNetwork(config=self.config, summary=self.summary) else: raise ValueError( "No configuration available for the given method.") self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class']( config=self.config, summary=self.summary) self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config, summary=self.summary)
class Experiment: def __init__(self, experiment_parameters, run_results_dir): self.run_results_dir = run_results_dir self.buffer_size = check_attribute_else_default( experiment_parameters, 'buffer_size', 20000) self.method = check_attribute_else_default(exp_parameters, 'method', 'DQN') self.environment_name = check_attribute_else_default( experiment_parameters, 'env', 'mountain_car', choices=['mountain_car', 'catcher', 'puddle_world']) parameters_dictionary = BEST_PARAMETERS_DICTIONARY[ self.environment_name][self.method][self.buffer_size] self.verbose = experiment_parameters.verbose self.config = Config() self.config.store_summary = True # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step' self.summary = {} self.config.number_of_steps = ENVIRONMENT_DICTIONARY[ self.environment_name]['number_of_steps'] """ Parameters for the Environment """ self.config.max_episode_length = ENVIRONMENT_DICTIONARY[ self.environment_name]['max_episode_length'] self.config.norm_state = True self.config.current_step = 0 """ Parameters for the Function Approximator """ self.config.state_dims = ENVIRONMENT_DICTIONARY[ self.environment_name]['state_dims'] self.config.num_actions = ENVIRONMENT_DICTIONARY[ self.environment_name]['num_actions'] self.config.gamma = 1.0 self.config.epsilon = 0.1 self.config.optim = "adam" self.config.batch_size = 32 # Parameters for any type of agent self.config.buffer_size = self.buffer_size self.config.lr = parameters_dictionary['LearningRate'] self.config.tnet_update_freq = parameters_dictionary['Freq'] if self.method in ['DRE', 'DRE_LB', 'DRG', 'DRG_LB']: self.config.beta = parameters_dictionary['Beta'] self.config.reg_factor = parameters_dictionary['RegFactor'] self.config.use_gamma = False self.config.beta_lb = False if self.method in ['DRG', 'DRG_LB']: self.config.use_gamma = True if self.method in ['DRE_LB', 'DRG_LB']: self.config.beta_lb = True self.fa = DistRegNeuralNetwork(config=self.config, summary=self.summary) elif self.method in ['L1A', 'L1W', 'L2A', 'L2W']: self.config.reg_factor = parameters_dictionary['RegFactor'] self.config.reg_method = 'l1' if self.method in ['L2A', 'L2W']: self.config.reg_method = 'l2' self.config.weights_reg = False if self.method in ['L1W', 'L2W']: self.config.weights_reg = True self.fa = RegularizedNeuralNetwork(config=self.config, summary=self.summary) elif self.method in ['DQN']: self.fa = VanillaDQN(config=self.config, summary=self.summary) elif self.method in ['Dropout']: self.config.dropout_probability = parameters_dictionary[ 'DropoutProbability'] self.fa = DropoutNeuralNetwork(config=self.config, summary=self.summary) else: raise ValueError( "No configuration available for the given method.") self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class']( config=self.config, summary=self.summary) self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config, summary=self.summary) def run(self): prev_idx = 0 current_episode_number = 1 assert hasattr(self.config, 'current_step') while self.config.current_step != self.config.number_of_steps: self.rl_agent.train(1) if self.verbose and ((current_episode_number % 10 == 0) or (current_episode_number - 1 == 0)): print("Episode Number:", current_episode_number) print('\tThe cumulative reward was:', self.summary['return_per_episode'][-1]) print( '\tThe cumulative loss was:', np.round(np.sum(self.summary['loss_per_step'][prev_idx:]), 2)) print('\tCurrent environment steps:', self.config.current_step) prev_idx = self.config.current_step current_episode_number += 1 if self.verbose: print("Number of episodes completed:", len(self.summary['return_per_episode'])) print("The total cumulative reward was:", np.sum(self.summary['reward_per_step'])) print("Current environment steps:", self.config.current_step) self.save_network_params() self.save_run_summary() def save_network_params(self): params_path = os.path.join(self.run_results_dir, 'final_network_weights.pt') torch.save(self.fa.net.state_dict(), params_path) def save_run_summary(self): summary_path = os.path.join(self.run_results_dir, 'summary.p') with open(summary_path, mode='wb') as summary_file: pickle.dump(self.summary, summary_file) config_path = os.path.join(self.run_results_dir, 'config.p') with open(config_path, mode='wb') as config_file: pickle.dump(self.config, config_file)