def __init__(self, config: BasicConfigSAC, environment: Env, log_path: str = None, logging: bool = True): """ TODO: Write docstring """ log_path = generate_experiment_signature( environment) if log_path is None else log_path self.config = config self.monitor = Monitor(log_path, config, logging=logging) self.env = environment self.batch_size = config.learner.batch_size self.episode_horizon = config.episode_horizon self.steps_before_learn = config.steps_before_learn self.memory_buffer = MemoryBuffer(max_memory_size=config.memory_size) self.agent = AgentSAC(environment, config.policy) self.learner = LearnerSAC(config=config.learner, agent=self.agent, enviroment=self.env, monitor=self.monitor)
def __init__(self, in_features, n_hidden, out_features): super().__init__() self.in_features = in_features self.out_features = out_features self.f_internal = torch.relu self.f_output = lambda x: x self.layers = [] for i in range(len(n_hidden) + 1): if i == 0: inf = in_features else: inf = n_hidden[i - 1] if i == len(n_hidden): outf = out_features else: outf = n_hidden[i] self.layers.append(torch.nn.Linear(inf, outf)) self.add_module(f'layer{i}', self.layers[i]) self.memory_buffer = MemoryBuffer() self.use_memory_buffer = False
def __init__(self, memory, start_address): self.memory = memory self.start_address = start_address self.register_file = RegisterFile() self.data_memory_key_fn = lambda: -777 self.data_memory = defaultdict(self.data_memory_key_fn) self.cycle_count = 0 self.instr_count = 0 self.PC = 0 self.fetch_input_buffer = FetchInputBuffer({ 'PC': self.start_address, 'instr_count': self.instr_count, }) self.fetcher_buffer = FetcherBuffer() self.fetch_stage = FetchStage(self.memory, self.fetch_input_buffer, self.fetcher_buffer) self.decoder_buffer = DecoderBuffer() self.decode_stage = DecodeStage(self.fetcher_buffer, self.decoder_buffer, self.register_file) self.executer_buffer = ExecuterBuffer() self.execute_stage = ExecuteStage(self.decoder_buffer, self.executer_buffer) self.memory_buffer = MemoryBuffer() self.memory_stage = MemoryStage(self.executer_buffer, self.memory_buffer, self.data_memory) self.write_back_stage = WriteBackStage(self.memory_buffer, self.register_file)
def __init__(self, action_dim, state_dim, params): """ Initialization """ # session = K.get_session() # Environment and DDQN parameters self.with_per = params["with_per"] self.action_dim = action_dim self.state_dim = state_dim self.lr = 2.5e-4 self.gamma = 0.95 self.epsilon = params["epsilon"] self.epsilon_decay = params["epsilon_decay"] self.epsilon_minimum = 0.05 self.buffer_size = 10000 self.tau = 1.0 self.agent = Agent(self.state_dim, action_dim, self.lr, self.tau, params["dueling"]) # Memory Buffer for Experience Replay self.buffer = MemoryBuffer(self.buffer_size, self.with_per) exp_dir = 'test/models/' if not os.path.exists(exp_dir): os.makedirs(exp_dir) self.export_path = exp_dir + '/lala.h5' self.save_interval = params["save_interval"]
def test_write_back_R(self): self.set_up_write_back_stage('R ADD R1 R2 R3') expected_reg_value = self.memory_buffer.rd[1] self.write_back_stage.write_back() self.assertEqual(self.write_back_stage.memory_buffer, MemoryBuffer()) self.assertEqual(self.write_back_stage.register_file[self.instr.rd], expected_reg_value) self.assertTrue(self.register_file.isClean(self.instr.rd))
def test_do_operand_forwarding_MEM(self): self.processor.executer_buffer = ExecuterBuffer({'rt': [1, None]}) self.processor.memory_buffer = MemoryBuffer({'rt': [1, 3]}) self.processor.do_operand_forwarding() self.assertEqual(self.processor.executer_buffer.rt, [1, 3]) self.processor.executer_buffer = ExecuterBuffer({'rt': [2, None]}) self.processor.do_operand_forwarding() self.assertEqual(self.processor.executer_buffer.rt, [2, None])
def __init__(self, load_policy=False, learning_rate=0.001, dim_a=3, fc_layers_neurons=100, loss_function_type='mean_squared', policy_loc='./racing_car_m2/network', image_size=64, action_upper_limits='1,1', action_lower_limits='-1,-1', e='1', show_ae_output=True, show_state=True, resize_observation=True, ae_training_threshold=0.0011, ae_evaluation_frequency=40): self.image_size = image_size super(Agent, self).__init__(dim_a=dim_a, policy_loc=policy_loc, action_upper_limits=action_upper_limits, action_lower_limits=action_lower_limits, e=e, load_policy=load_policy, loss_function_type=loss_function_type, learning_rate=learning_rate, fc_layers_neurons=fc_layers_neurons) # High-dimensional state initialization self.resize_observation = resize_observation self.show_state = show_state self.show_ae_output = show_ae_output # Autoencoder training control variables self.ae_training = True self.ae_loss_history = MemoryBuffer( min_size=50, max_size=50) # reuse memory buffer for the ae loss history self.ae_trainig_threshold = ae_training_threshold self.ae_evaluation_frequency = ae_evaluation_frequency self.mean_ae_loss = 1e7 if self.show_state: self.state_plot = FastImagePlot(1, np.zeros([image_size, image_size]), image_size, 'Image State', vmax=0.5) if self.show_ae_output: self.ae_output_plot = FastImagePlot(2, np.zeros( [image_size, image_size]), image_size, 'Autoencoder Output', vmax=0.5)
def test_memory_buffer_size(self): info_set_size = 1 + 2 + 5 + 24 item_size = 64 max_size = int(1e6) mb = MemoryBuffer(info_set_size, item_size, max_size=max_size) print(mb._infosets.dtype) print(mb._items.dtype) print(mb._weights.dtype) print("Memory buffer size (max_size={}): {} mb".format( max_size, mb.size_mb()))
def __init__(self, n_state, n_action, a_bound, discount=0.99, tau=0.05, actor_lr=0.001, critic_lr=0.001, policy_freq=2, exp_noise_std=0.1, noise_decay=0.9995, noise_decay_steps=1000, smooth_noise_std=0.1, clip=0.2, buffer_size=20000, save_interval=5000, assess_interval=20, logger=None, checkpoint_queen=None): #self.__dict__.update(locals()) self.logger = logger self.logger.save_config(locals()) self.n_action = n_action self.n_state = n_state self.a_bound = a_bound self.noise_std = exp_noise_std self.noise_decay = noise_decay self.noise_decay_steps = noise_decay_steps self.policy_freq = policy_freq self.smooth_noise_std = smooth_noise_std self.clip = clip self.discount = discount self.pointer = 0 self.buffer = MemoryBuffer(buffer_size, with_per=True) self.save_interval = save_interval self.assess_interval = assess_interval self.actor = Actor(self.n_state, self.n_action, gamma=discount, lr=actor_lr, tau=tau) self.critic1 = Critic(self.n_state, self.n_action, gamma=discount, lr=critic_lr, tau=tau) self.critic2 = Critic(self.n_state, self.n_action, gamma=discount, lr=critic_lr, tau=tau) self.merge = self._merge_summary() self.ckpt_queen = checkpoint_queen self.prefix = self.__class__.__name__
def test_do_operand_forwarding(self): self.processor.decoder_buffer = DecoderBuffer({'rs': [2, None]}) self.processor.executer_buffer = ExecuterBuffer({'rt': [2, 7]}) self.processor.do_operand_forwarding() self.assertEqual(self.processor.decoder_buffer.rs, [2, 7]) self.processor.decoder_buffer = DecoderBuffer({'rs': [2, None]}) self.processor.executer_buffer = ExecuterBuffer() self.processor.memory_buffer = MemoryBuffer({'rd': [2, 9]}) self.processor.do_operand_forwarding() self.assertEqual(self.processor.decoder_buffer.rs, [2, 9])
def __init__(self, state_dim, action_dim, batchSize=64, lr=.0001, tau=.05, gamma=.95, epsilon=1, eps_dec=.99, learnInterval=1, isDual=False, isDueling=False, isPER=False, filename='model', mem_size=1000000, layerCount=2, layerUnits=64, usePruning=False): self.state_dim = state_dim self.action_dim = action_dim self.isDueling = isDueling self.isDual = isDual self.isPER = isPER self.lr = lr self.gamma = gamma self.epsilon = epsilon self.epsilon_decay = eps_dec self.batchSize = batchSize self.filename = filename self.learnInterval = learnInterval # Initialize Deep Q-Network self.model = generateDQN(action_dim, lr, state_dim, isDueling, layerCount, layerUnits, usePruning) # Build target Q-Network self.target_model = generateDQN(action_dim, lr, state_dim, isDueling, layerCount, layerUnits, usePruning) self.layerCount = layerCount self.layerUnits = layerUnits self.target_model.set_weights(self.model.get_weights()) self.memory = MemoryBuffer(mem_size, isPER) self.epsilonInitial = epsilon self.minEpsilon = .1 self.usePruning = usePruning if isDual: self.tau = tau else: self.tau = 1.0 # load memory data from disk if needed self.lastLearnIndex = self.memory.totalMemCount
def test_resample(self): if os.path.exists("./memory/memory_buffer_test/"): shutil.rmtree("./memory/memory_buffer_test/") # Make a few saved memory buffers. info_set_size = 1 + 1 + 16 item_size = 6 max_size = int(1e4) mb = MemoryBuffer(info_set_size, item_size, max_size=max_size) buf1_size = 100 for i in range(buf1_size): mb.add(make_dummy_ev_infoset(), torch.zeros(item_size), 0) mb.save("./memory/memory_buffer_test/", "advt_mem_0") mb.clear() buf2_size = 200 for i in range(buf2_size): mb.add(make_dummy_ev_infoset(), torch.zeros(item_size), 1) mb.save("./memory/memory_buffer_test/", "advt_mem_0") mb.clear() buf3_size = 300 for i in range(buf3_size): mb.add(make_dummy_ev_infoset(), torch.zeros(item_size), 2) mb.save("./memory/memory_buffer_test/", "advt_mem_0") mb.clear() # Make a dataset using the saved buffers. # n = (buf1_size + buf2_size) // 10 n = 1000 dataset = MemoryBufferDataset("./memory/memory_buffer_test/", "advt_mem_0", n) # min_size = min(n, buf1_size + buf2_size + buf3_size) # print(min_size) for _ in range(1): dataset.resample() self.assertEqual(len(dataset), n) self.assertEqual(len(dataset._infosets), n) self.assertEqual(len(dataset._items), n) self.assertEqual(len(dataset._weights), n) # print(dataset._weights) # Test iteration over the dataset. for inputs in dataset: print(inputs.keys()) print(dataset._weights)
def get_stage_output(memory, register_file, pc, instr_count, stage_name): """Return the output buffer of stage given the initial conditions. All the stages before stage_name will be executed. Arguments: - `memory`: - `register_file`: - `pc`: - `stage_name`: TODO: Maybe just take the stages as input later. """ fetch_input_buffer = FetchInputBuffer({ 'PC': pc, 'instr_count': instr_count, }) fetcher_buffer = FetcherBuffer() fetch_stage = FetchStage(memory, fetch_input_buffer, fetcher_buffer) fetch_stage.fetch_instruction() if stage_name == 'fetch': return fetch_stage.fetcher_buffer decode_stage = DecodeStage(fetch_stage.fetcher_buffer, DecoderBuffer(), register_file) decode_stage.decode_instruction() if stage_name == 'decode': return decode_stage.decoder_buffer execute_stage = ExecuteStage(decode_stage.decoder_buffer, ExecuterBuffer()) execute_stage.execute() if stage_name == 'execute': return execute_stage.executer_buffer data_memory_key_fn = lambda: -1 data_memory = defaultdict(data_memory_key_fn) memory_stage = MemoryStage(execute_stage.executer_buffer, MemoryBuffer(), data_memory) memory_stage.do_memory_operation() if stage_name == 'memory': return memory_stage.memory_buffer
def test_memory_buffer_save(self): # Make sure the folder doesn't exist so the manifest has to be created. if os.path.exists("./memory/memory_buffer_test/"): shutil.rmtree("./memory/memory_buffer_test/") info_set_size = 1 + 2 + 5 + 24 item_size = 64 max_size = int(1e6) mb = MemoryBuffer(info_set_size, item_size, max_size=max_size) mb.save("./memory/memory_buffer_test/", "test_buffer") self.assertTrue( os.path.exists( "./memory/memory_buffer_test/manifest_test_buffer.csv")) self.assertTrue( os.path.exists( "./memory/memory_buffer_test/test_buffer_00000.pth")) # Now save again. mb.save("./memory/memory_buffer_test/", "test_buffer") self.assertTrue( os.path.exists( "./memory/memory_buffer_test/test_buffer_00001.pth"))
def test_memory_buffer_autosave(self): print("\n ================= AUTOSAVE TEST ====================") # Make sure the folder doesn't exist so the manifest has to be created. if os.path.exists("./memory/memory_buffer_test/"): shutil.rmtree("./memory/memory_buffer_test/") info_set_size = 1 + 1 + 24 item_size = 64 max_size = int(1e3) # Add autosave params. mb = MemoryBuffer(info_set_size, item_size, max_size=max_size, autosave_params=("./memory/memory_buffer_test/", "test_buffer")) for _ in range(max_size): mb.add(make_dummy_ev_infoset(), torch.zeros(item_size), 1234) self.assertTrue(mb.full()) # This should trigger the save and reset. mb.add(make_dummy_ev_infoset(), torch.zeros(item_size), 1234)
def traverse_worker(worker_id, traverse_player_idx, strategies, save_lock, opt, t, eval_mode, info_queue): """ A worker that traverses the game tree K times, saving things to memory buffers. Each worker maintains its own memory buffers and saves them after finishing. If eval_mode is set to True, no memory buffers are created. """ # assert(strategies[0]._network.device == torch.device("cpu")) # assert(strategies[1]._network.device == torch.device("cpu")) advt_mem = MemoryBuffer( Constants.INFO_SET_SIZE, Constants.NUM_ACTIONS, max_size=opt.SINGLE_PROC_MEM_BUFFER_MAX_SIZE, autosave_params=(opt.MEMORY_FOLDER, opt.ADVT_BUFFER_FMT.format(traverse_player_idx)), save_lock=save_lock) if eval_mode == False else None strt_mem = MemoryBuffer( Constants.INFO_SET_SIZE, Constants.NUM_ACTIONS, max_size=opt.SINGLE_PROC_MEM_BUFFER_MAX_SIZE, autosave_params=(opt.MEMORY_FOLDER, opt.STRT_BUFFER_FMT), save_lock=save_lock) if eval_mode == False else None if eval_mode: num_traversals_per_worker = int(opt.NUM_TRAVERSALS_EVAL / opt.NUM_TRAVERSE_WORKERS) else: num_traversals_per_worker = int(opt.NUM_TRAVERSALS_PER_ITER / opt.NUM_TRAVERSE_WORKERS) t0 = time.time() for k in range(num_traversals_per_worker): ctr = [0] # Generate a random initialization, alternating the SB player each time. sb_player_idx = k % 2 round_state = create_new_round(sb_player_idx) precomputed_ev = make_precomputed_ev(round_state) info = traverse(round_state, make_actions, make_infoset, traverse_player_idx, sb_player_idx, strategies, advt_mem, strt_mem, t, precomputed_ev, recursion_ctr=ctr) if (k % opt.TRAVERSE_DEBUG_PRINT_HZ) == 0 and eval_mode == False: elapsed = time.time() - t0 print( "[WORKER #{}] done with {}/{} traversals | recursion depth={} | advt={} strt={} | elapsed={} sec" .format(worker_id, k, num_traversals_per_worker, ctr[0], advt_mem.size(), strt_mem.size(), elapsed)) # Save all the buffers one last time. print("[WORKER #{}] Final autosave ...".format(worker_id)) if advt_mem is not None: advt_mem.autosave() if strt_mem is not None: strt_mem.autosave()
tf.compat.v1.keras.backend.set_session(sess) # 设置gym有关参数 env = make_atari('PongNoFrameskip-v4') env = wrap_deepmind(env, scale=False, frame_stack=True) num_actions = env.action_space.n dqn = DeepQNetwork(input_shape=(WIDTH, HEIGHT, NUM_FRAMES), num_actions=num_actions, name='dqn', learning_rate=LR) target_dqn = DeepQNetwork(input_shape=(WIDTH, HEIGHT, NUM_FRAMES), num_actions=num_actions, name='target_dqn', learning_rate=LR) buf = MemoryBuffer(memory_size=BUFFER_SIZE) total_episode_rewards = [] step = 0 for episode in range(MAX_EPISODE + 1): frame = env.reset() # LazyFrames state = np.array(frame) # narray (84, 84, 4) done = False cur_episode_reward = 0 while not done: # 如果done则结束episode if step % C == 0: target_dqn.copy_from(dqn) # 复制参数 if epsilon_greedy(step): action = env.action_space.sample() else: action = dqn.get_action(state / 255.0)
comarg.add_argument("output_folder", help="Where to write results to.") comarg.add_argument("--num_episodes", type=int, default=10, help="Number of episodes to test.") comarg.add_argument("--random_seed", type=int, help="Random seed for repeatable experiments.") args = parser.parse_args() if args.random_seed: random.seed(args.random_seed) env = GymEnvironment(args.env_id, args) net = DeepQNetwork(env.numActions(), args) buf = MemoryBuffer(args) if args.load_weights: print "Loading weights from %s" % args.load_weights net.load_weights(args.load_weights) env.gym.monitor.start(args.output_folder, force=True) avg_reward = 0 num_episodes = args.num_episodes for i_episode in xrange(num_episodes): env.restart() observation = env.getScreen() buf.reset() i_total_reward = 0 for t in xrange(10000): buf.add(observation)
version, train_ae=config_graph.getboolean('train_autoencoder'), load_policy=config_exp_setup.getboolean('load_graph'), learning_rate=float(config_graph['learning_rate']), dim_a=config_graph.getint('dim_a'), fc_layers_neurons=config_graph.getint('fc_layers_neurons'), loss_function_type=config_graph['loss_function_type'], policy_loc=config_graph['policy_loc'] + exp_num + '_', action_upper_limits=config_graph['action_upper_limits'], action_lower_limits=config_graph['action_lower_limits'], e=config_graph['e'], config_graph=config_graph, config_general=config_general) # Create memory buffer buffer = MemoryBuffer(min_size=config_buffer.getint('min_size'), max_size=config_buffer.getint('max_size')) # Create feedback object env.render() human_feedback = Feedback(env, key_type=config_feedback['key_type'], h_up=config_feedback['h_up'], h_down=config_feedback['h_down'], h_right=config_feedback['h_right'], h_left=config_feedback['h_left'], h_null=config_feedback['h_null']) # Create saving directory if it does no exist if save_results: if not os.path.exists(eval_save_path + eval_save_folder): os.makedirs(eval_save_path + eval_save_folder)
def test_write_back_I(self): self.set_up_write_back_stage('I LW R2 R5 4') self.write_back_stage.write_back() self.assertEqual(self.write_back_stage.memory_buffer, MemoryBuffer()) self.assertTrue(self.register_file.isClean(self.instr.rt))