def agent_init(self, task_spec_string): """ This function is called once at the beginning of an experiment. Arguments: task_spec_string - A string defining the task. This string is decoded using TaskSpecVRLGLUE3.TaskSpecParser """ # DO SOME SANITY CHECKING ON THE TASKSPEC TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(task_spec_string) if TaskSpec.valid: assert ((len(TaskSpec.getIntObservations()) == 0) != (len(TaskSpec.getDoubleObservations()) == 0)), \ "expecting continous or discrete observations. Not both." assert len(TaskSpec.getDoubleActions()) == 0, \ "expecting no continuous actions" assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][0]), \ " expecting min action to be a number not a special value" assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][1]), \ " expecting max action to be a number not a special value" self.num_actions = TaskSpec.getIntActions()[0][1]+1 else: print "INVALID TASK SPEC" self.data_set = ale_data_set.DataSet(width=CROPPED_WIDTH, height=CROPPED_HEIGHT, max_steps=self.max_history, phi_length=self.phi_length) # just needs to be big enough to create phi's self.test_data_set = ale_data_set.DataSet(width=CROPPED_WIDTH, height=CROPPED_HEIGHT, max_steps=10, phi_length=self.phi_length) self.epsilon = 1. self.epsilon_rate = .9 / self.max_history self.testing = False if self.nn_file is None: self.network = self._init_network() else: handle = open(self.nn_file, 'r') self.network = cPickle.load(handle) self._open_results_file() self._open_learning_file() self.step_counter = 0 self.episode_counter = 0 self.batch_counter = 0 self.holdout_data = None # In order to add an element to the data set we need the # previous state and action and the current reward. These # will be used to store states and actions. self.last_img = None self.last_action = None
def __init__(self, q_network, epsilon_start, epsilon_min, epsilon_decay, replay_memory_size, exp_pref, replay_start_size, update_frequency): self.network = q_network self.epsilon_start = epsilon_start self.epsilon_min = epsilon_min self.epsilon_decay = epsilon_decay self.replay_memory_size = replay_memory_size self.exp_pref = exp_pref self.replay_start_size = replay_start_size self.update_frequency = update_frequency self.phi_length = self.network.num_frames self.image_width = self.network.input_width self.image_height = self.network.input_height # CREATE A FOLDER TO HOLD RESULTS time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime()) self.exp_dir = self.exp_pref + time_str + \ "{}".format(self.network.lr).replace(".", "p") + "_" \ + "{}".format(self.network.discount).replace(".", "p") try: os.stat(self.exp_dir) except OSError: os.makedirs(self.exp_dir) self.num_actions = self.network.num_actions self.testing = False self.total_reward = 0 self.episode_counter = 0 self.data_set = ale_data_set.DataSet(self.network.state_count, max_steps=self.replay_memory_size, phi_length=self.phi_length) self.test_data_set = ale_data_set.DataSet(self.network.state_count, max_steps=self.replay_memory_size, phi_length=self.phi_length) self.epsilon = self.epsilon_start if self.epsilon_decay != 0: self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) / self.epsilon_decay) else: self.epsilon_rate = 0 self.holdout_data = None
def __init__(self, sarsa_network, args, epsilon_min, epsilon_decay, exp_pref, logger, rng): self.network = sarsa_network self.epsilon_start = args.epsilon_start self.epsilon_min = epsilon_min self.epsilon_decay = epsilon_decay self.exp_pref = exp_pref self.rng = rng self.phi_length = self.network.num_frames self.image_height = self.network.input_height self.image_width = self.network.input_width self.data_set = ale_data_set.DataSet(self.image_height, self.image_width, self.phi_length, rng) self._game_name = args.game self.logger = logger # Create folder to save the network self.model_dir = "./%s/%s_sarsa" % (args.saved_model_dir, args.game) if not os.path.isdir(self.model_dir): os.makedirs(self.model_dir) # CREATE A FOLDER TO HOLD RESULTS # time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime()) # self.exp_dir = self.exp_pref + time_str + \ # "{}".format(self.network.lr).replace(".", "p") + "_" \ # + "{}".format(self.network.discount).replace(".", "p") # # try: # os.stat(self.exp_dir) # except OSError: # os.makedirs(self.exp_dir) # self.num_actions = self.network.num_actions self.epsilon = self.epsilon_start if self.epsilon_decay != 0: self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) / self.epsilon_decay) else: self.epsilon_rate = 0 #self._open_results_file() #self._open_learning_file() self.episode_counter = 0 self.last_img = None self.last_action = None
def __init__(self, sarsa_network, epsilon_start, epsilon_min, epsilon_decay, exp_pref, rng): self.network = sarsa_network self.epsilon_start = epsilon_start self.epsilon_min = epsilon_min self.epsilon_decay = epsilon_decay self.exp_pref = exp_pref self.rng = rng self.phi_length = self.network.num_frames self.image_height = self.network.input_height self.image_width = self.network.input_width self.data_set = ale_data_set.DataSet(self.image_height, self.image_width, self.phi_length, rng) # CREATE A FOLDER TO HOLD RESULTS time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime()) self.exp_dir = self.exp_pref + time_str + \ "{}".format(self.network.lr).replace(".", "p") + "_" \ + "{}".format(self.network.discount).replace(".", "p") try: os.stat(self.exp_dir) except OSError: os.makedirs(self.exp_dir) self.num_actions = self.network.num_actions self.epsilon = self.epsilon_start if self.epsilon_decay != 0: self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) / self.epsilon_decay) else: self.epsilon_rate = 0 self._open_results_file() self._open_learning_file() self.episode_counter = 0 self.last_img = None self.last_action = None
def __init__(self, q_network, epsilon_start, epsilon_min, epsilon_decay, replay_memory_size, exp_pref, replay_start_size, update_frequency, rng): self.network = q_network self.epsilon_start = epsilon_start self.epsilon_min = epsilon_min self.epsilon_decay = epsilon_decay self.replay_memory_size = replay_memory_size self.exp_pref = exp_pref self.replay_start_size = replay_start_size self.update_frequency = update_frequency self.rng = rng self.phi_length = self.network.num_frames self.image_width = self.network.input_width self.image_height = self.network.input_height # CREATE A FOLDER TO HOLD RESULTS time_str = time.strftime("_%m-%d-%H-%M_", time.gmtime()) self.exp_dir = self.exp_pref + time_str + \ "{}".format(self.network.lr).replace(".", "p") + "_" \ + "{}".format(self.network.discount).replace(".", "p") try: os.stat(self.exp_dir) except OSError: os.makedirs(self.exp_dir) self.save_info_file() self.num_actions = self.network.num_actions self.data_set = ale_data_set.DataSet(width=self.image_width, height=self.image_height, rng=rng, max_steps=self.replay_memory_size, phi_length=self.phi_length) # just needs to be big enough to create phi's self.test_data_set = ale_data_set.DataSet(width=self.image_width, height=self.image_height, rng=rng, max_steps=self.phi_length * 2, phi_length=self.phi_length) self.epsilon = self.epsilon_start if self.epsilon_decay != 0: self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) / self.epsilon_decay) else: self.epsilon_rate = 0 self.testing = False self._open_results_file() self._open_learning_file() self.episode_counter = 0 self.batch_counter = 0 self.holdout_data = None self.holdout_ram = None # In order to add an element to the data set we need the # previous state and action and the current reward. These # will be used to store states and actions. self.last_img = None self.last_action = None self.last_ram = None
def __init__(self, q_network, epsilon_start, epsilon_min, epsilon_decay, replay_memory_size, experiment_directory, replay_start_size, update_frequency, rng, recording=True): self.results_file = self.learning_file = None self.best_epoch_reward = None self.network = q_network self.epsilon_start = epsilon_start self.epsilon_min = epsilon_min self.epsilon_decay = epsilon_decay self.replay_memory_size = replay_memory_size self.replay_start_size = replay_start_size self.update_frequency = update_frequency self.rng = rng self.phi_length = self.network.num_frames self.image_width = self.network.input_width self.image_height = self.network.input_height self.recording = recording self.exp_dir = experiment_directory if self.recording: try: os.stat(self.exp_dir) except OSError: os.makedirs(self.exp_dir) self.record_parameters() self.num_actions = self.network.num_actions self.data_set = ale_data_set.DataSet(width=self.image_width, height=self.image_height, rng=rng, max_steps=self.replay_memory_size, phi_length=self.phi_length) # just needs to be big enough to create phi's self.test_data_set = ale_data_set.DataSet(width=self.image_width, height=self.image_height, rng=rng, max_steps=self.phi_length * 2, phi_length=self.phi_length) self.epsilon = self.epsilon_start if self.epsilon_decay != 0: self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) / self.epsilon_decay) else: self.epsilon_rate = 0 self.testing = False self._open_results_file() self._open_learning_file() self.episode_counter = 0 self.batch_counter = 0 self.holdout_data = None # In order to add an element to the data set we need the # previous state and action and the current reward. These # will be used to store states and actions. self.last_img = None self.last_action = None
def __init__(self, q_network, epsilon_start, epsilon_min, epsilon_decay, replay_memory_size, exp_pref, replay_start_size, update_frequency, rng, max_epochs, use_human_net=False, use_human_exp_replay=False, human_net=None, human_exp_replay=None): self.network = q_network self.epsilon_start = epsilon_start self.epsilon_min = epsilon_min self.epsilon_decay = epsilon_decay self.replay_memory_size = replay_memory_size self.exp_pref = exp_pref self.replay_start_size = replay_start_size self.update_frequency = update_frequency self.rng = rng self.phi_length = self.network.num_frames self.image_width = self.network.input_width self.image_height = self.network.input_height self.num_actions = self.network.num_actions # Daniel: some stuff I added. self.max_epochs = max_epochs self.use_human_net = use_human_net if self.use_human_net: self.human_net = human_net self.use_human_exp_replay = use_human_exp_replay if self.use_human_exp_replay: self.human_exp_replay = human_exp_replay assert (self.use_human_net == False) or (self.use_human_exp_replay == False) self.actions_train_ep = [0 for i in range(self.num_actions)] self.actions_test_ep = [0 for i in range(self.num_actions)] # CREATE A FOLDER TO HOLD RESULTS time_str = time.strftime("_%m-%d-%H-%M_", time.localtime()) self.exp_dir = self.exp_pref + time_str + \ "{}".format(self.network.lr).replace(".", "p") + "_" \ + "{}".format(self.network.discount).replace(".", "p") try: os.stat(self.exp_dir) except OSError: os.makedirs(self.exp_dir) self.data_set = ale_data_set.DataSet(width=self.image_width, height=self.image_height, rng=rng, max_steps=self.replay_memory_size, phi_length=self.phi_length) # just needs to be big enough to create phi's self.test_data_set = ale_data_set.DataSet(width=self.image_width, height=self.image_height, rng=rng, max_steps=self.phi_length * 2, phi_length=self.phi_length) self.epsilon = self.epsilon_start if self.epsilon_decay != 0: self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) / self.epsilon_decay) else: self.epsilon_rate = 0 self.testing = False self._open_results_file() self._open_learning_file() self._open_actions_file() self.episode_counter = 0 self.batch_counter = 0 self.holdout_data = None # In order to add an element to the data set we need the # previous state and action and the current reward. These # will be used to store states and actions. self.last_img = None self.last_action = None # Exponential moving average of runtime performance. self.steps_sec_ema = 0.
def __init__(self, q_network, epsilon_start, epsilon_min, epsilon_decay, replay_memory_size, exp_pref, replay_start_size, update_frequency, rng): self.network = q_network self.epsilon_start = epsilon_start self.epsilon_min = epsilon_min self.epsilon_decay = epsilon_decay self.replay_memory_size = replay_memory_size self.exp_pref = exp_pref self.replay_start_size = replay_start_size self.update_frequency = update_frequency self.random_state = rng ## Remember the dimensionality of the input space self.phi_length = self.network.num_frames self.image_width = self.network.input_width self.image_height = self.network.input_height ## The output lyaer size of the q-value network approximator self.num_actions = self.network.num_actions ## Allocate experience replay datasets: a large one for trainig ... self.dataset_training = ale_data_set.DataSet( width=self.image_width, height=self.image_height, rng=self.random_state, max_steps=self.replay_memory_size, phi_length=self.phi_length) ## ... and a small one for testing. ## Since during the testing pahse no learning takes place, we just need ## this dataset to be big enough to hold the current phi ( state x phi_length ). ## Thus "max_steps" is set to double the size of phi. self.dataset_testing = ale_data_set.DataSet(width=self.image_width, height=self.image_height, rng=self.random_state, max_steps=self.phi_length * 2, phi_length=self.phi_length) ## The epsilon-probability changes across the epochs! self.epsilon = self.epsilon_start if self.epsilon_decay != 0: self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) / self.epsilon_decay) else: self.epsilon_rate = 0 # CREATE A FOLDER TO HOLD THE RESULTS time_str = time.strftime("_%m-%d-%H-%M", time.gmtime()) self.exp_dir = self.exp_pref + time_str + "_" \ + "{}".format( self.network.lr ).replace( ".", "p" ) + "_" \ + "{}".format( self.network.discount ).replace( ".", "p" ) try: os.stat(self.exp_dir) except OSError: os.makedirs(self.exp_dir) self._open_results_file() self._open_learning_file() self.holdout_observations = None ## Logging: logging.info("NeuralAgent: actions %d, phi %d" % ( self.num_actions, self.phi_length, ))
def __init__(self, q_network, epsilon_start, epsilon_min, epsilon_decay, replay_memory_size, exp_dir, replay_start_size, update_frequency, rng): self.network = q_network self.epsilon_start = epsilon_start self.epsilon_min = epsilon_min self.epsilon_decay = epsilon_decay self.replay_memory_size = replay_memory_size self.exp_dir = exp_dir self.replay_start_size = replay_start_size self.update_frequency = update_frequency self.rng = rng self.logger = logging.getLogger("DeepLogger") self.episode_no = 0 self.phi_length = self.network.num_frames self.image_width = self.network.input_width self.image_height = self.network.input_height self.num_actions = self.network.num_actions self.data_set = ale_data_set.DataSet(width=self.image_width, height=self.image_height, rng=rng, max_steps=self.replay_memory_size, phi_length=self.phi_length) # just needs to be big enough to create phi's self.test_data_set = ale_data_set.DataSet(width=self.image_width, height=self.image_height, rng=rng, max_steps=self.phi_length * 2, phi_length=self.phi_length) self.epsilon = self.epsilon_start if self.epsilon_decay != 0: self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) / self.epsilon_decay) else: self.epsilon_rate = 0 self.testing = False self._open_results_file() self._open_learning_file() # weird name, does not count epsiodes, only iterated when testing? self.episode_counter = 0 self.batch_counter = 0 self.holdout_data = None # In order to add an element to the data set we need the # previous state and action and the current reward. These # will be used to store states and actions. self.last_img = None self.last_action = None # Exponential moving average of runtime performance self.steps_sec_ema = 0
def agent_init(self, task_spec_string): """ This function is called once at the beginning of an experiment. Arguments: task_spec_string - A string defining the task. This string is decoded using TaskSpecVRLGLUE3.TaskSpecParser """ # DO SOME SANITY CHECKING ON THE TASKSPEC TaskSpec = TaskSpecVRLGLUE3.TaskSpecParser(task_spec_string) if TaskSpec.valid: assert ((len(TaskSpec.getIntObservations()) == 0) != (len(TaskSpec.getDoubleObservations()) == 0)), \ "expecting continous or discrete observations. Not both." assert len(TaskSpec.getDoubleActions()) == 0, \ "expecting no continuous actions" assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][0]), \ " expecting min action to be a number not a special value" assert not TaskSpec.isSpecial(TaskSpec.getIntActions()[0][1]), \ " expecting max action to be a number not a special value" self.num_actions = TaskSpec.getIntActions()[0][1]+1 else: print "INVALID TASK SPEC" self.data_set = ale_data_set.DataSet(width=CROPPED_WIDTH, height=CROPPED_HEIGHT, max_steps=self.max_history, phi_length=self.phi_length) # just needs to be big enough to create phi's self.test_data_set = ale_data_set.DataSet(width=CROPPED_WIDTH, height=CROPPED_HEIGHT, max_steps=10, phi_length=self.phi_length) self.epsilon = self.epsilon_start if self.epsilon_decay != 0: self.epsilon_rate = .9 / self.epsilon_decay else: self.epsilon_rate = 0 self.testing = False if self.nn_file is None: self.network = self._init_network() else: handle = open(self.nn_file, 'r') self.network = cPickle.load(handle) # If an trained network has been specified, # use it to initialize weights if self.nn_trained_share is None: print "No sharing between networks" else: print "Sharing between networks ", self.nn_trained_share print "Layers", self.share_layers handle = open(self.nn_trained_share, 'r') trained_network = cPickle.load(handle) if self.share_layers.find('1') != -1: # Sharing weights of the first convolutional layer print "Sharing weights for Convolution Layer 1" self.network.q_layers[2].W.set_value(trained_network.q_layers[2].W.get_value()) self.network.q_layers[2].b.set_value(trained_network.q_layers[2].b.get_value()) self.network.q_layers[2].bias_params[0].set_value(trained_network.q_layers[2].bias_params[0].get_value()) if self.flip == 1: print "Flipping weights in the first convolutional layer" W_old = trained_network.q_layers[2].W.get_value() for i in xrange(4): for j in xrange(16): temp = W_old[i, :, :, j] W_old[i, :, :, j] = temp[::-1].T self.network.q_layers[2].W.set_value(W_old) # Sharing weights of the second convolutional layer if self.share_layers.find('2') != -1: print "Sharing weights for Convolution Layer 2" self.network.q_layers[3].W.set_value(trained_network.q_layers[3].W.get_value()) self.network.q_layers[3].b.set_value(trained_network.q_layers[3].b.get_value()) self.network.q_layers[3].bias_params[0].set_value(trained_network.q_layers[3].bias_params[0].get_value()) # Sharing weights of the fully connected layer if self.share_layers.find('3') != -1: print "Sharing weights for FC layer" self.network.q_layers[5].W.set_value(trained_network.q_layers[5].W.get_value()) self.network.q_layers[5].b.set_value(trained_network.q_layers[5].b.get_value()) self.network.q_layers[5].bias_params[0].set_value(trained_network.q_layers[5].bias_params[0].get_value()) self._open_results_file() self._open_learning_file() self.step_counter = 0 self.episode_counter = 0 self.batch_counter = 0 self.holdout_data = None # In order to add an element to the data set we need the # previous state and action and the current reward. These # will be used to store states and actions. self.last_img = None self.last_action = None
def __init__(self, q_network, epsilon_start, epsilon_min, epsilon_decay, replay_memory_size, exp_pref, update_frequency, replay_start_size, rng, transitions_sequence_length, transition_range, penalty_method, weight_min, weight_max, weight_decay_length, beta, two_train=False, late2=True, close2=True, verbose=False, double=False, save_pkl=True): self.double_dqn = double self.network = q_network self.num_actions = q_network.num_actions self.epsilon_start = epsilon_start self.update_frequency = update_frequency self.beta = beta self.epsilon_min = epsilon_min self.epsilon_decay = epsilon_decay self.replay_memory_size = replay_memory_size self.exp_dir = exp_pref + '_' + str(weight_max) + '_' + str(weight_min) if late2: self.exp_dir += '_l2' if close2: self.exp_dir += '_close2' else: self.exp_dir += '_len' + str( transitions_sequence_length) + '_r' + str(transition_range) if two_train: self.exp_dir += '_TTR' self.replay_start_size = replay_start_size self.rng = rng self.transition_len = transitions_sequence_length self.two_train = two_train self.verbose = verbose if verbose > 0: print "Using verbose", verbose self.exp_dir += '_vb' + str(verbose) self.phi_length = self.network.num_frames self.image_width = self.network.input_width self.image_height = self.network.input_height self.penalty_method = penalty_method self.batch_size = self.network.batch_size self.discount = self.network.discount self.transition_range = transition_range self.late2 = late2 self.close2 = close2 self.same_update = False self.save_pkl = save_pkl self.start_index = 0 self.terminal_index = None self.weight_max = weight_max self.weight_min = weight_min self.weight = self.weight_max self.weight_decay_length = weight_decay_length self.weight_decay = (self.weight_max - self.weight_min) / self.weight_decay_length self.batchnum = 0 self.epi_len = 0 self.batch_count = 0 self.epi_state = None self.epi_actions = None self.epi_rewards = None self.epi_terminals = None self.Q_tilde = None self.y_ = None try: os.stat(self.exp_dir) except OSError: os.makedirs(self.exp_dir) self.data_set = ale_data_set.DataSet( width=self.image_width, height=self.image_height, rng=rng, max_steps=self.replay_memory_size, phi_length=self.phi_length, discount=self.discount, batch_size=self.batch_size, transitions_len=self.transition_len) # just needs to be big enough to create phi's self.test_data_set = ale_data_set.DataSet(width=self.image_width, height=self.image_height, rng=rng, max_steps=self.phi_length * 2, phi_length=self.phi_length) self.epsilon = self.epsilon_start if self.epsilon_decay != 0: self.epsilon_rate = ((self.epsilon_start - self.epsilon_min) / self.epsilon_decay) else: self.epsilon_rate = 0 self.testing = False self._open_results_file() self._open_learning_file() self._open_recording_file() self.step_counter = 0 self.episode_reward = 0 self.start_time = None self.loss_averages = None self.total_reward = 0 self.episode_counter = 0 self.batch_counter = 0 self.holdout_data = None # In order to add an element to the data set we need the # previous state and action and the current reward. These # will be used to store states and actions. self.last_img = None self.last_action = None # Exponential moving average of runtime performance. self.steps_sec_ema = 0. self.program_start_time = None self.last_count_time = None self.epoch_time = None self.total_time = None