示例#1
0
    def __init__(self,
                 config: BasicConfigSAC,
                 environment: Env,
                 log_path: str = None,
                 logging: bool = True):
        """
        TODO: Write docstring
        """
        log_path = generate_experiment_signature(
            environment) if log_path is None else log_path
        self.config = config
        self.monitor = Monitor(log_path, config, logging=logging)
        self.env = environment

        self.batch_size = config.learner.batch_size
        self.episode_horizon = config.episode_horizon
        self.steps_before_learn = config.steps_before_learn

        self.memory_buffer = MemoryBuffer(max_memory_size=config.memory_size)

        self.agent = AgentSAC(environment, config.policy)

        self.learner = LearnerSAC(config=config.learner,
                                  agent=self.agent,
                                  enviroment=self.env,
                                  monitor=self.monitor)
示例#2
0
    def __init__(self, in_features, n_hidden, out_features):
        super().__init__()

        self.in_features = in_features
        self.out_features = out_features

        self.f_internal = torch.relu
        self.f_output = lambda x: x

        self.layers = []
        for i in range(len(n_hidden) + 1):
            if i == 0:
                inf = in_features
            else:
                inf = n_hidden[i - 1]
            if i == len(n_hidden):
                outf = out_features
            else:
                outf = n_hidden[i]

            self.layers.append(torch.nn.Linear(inf, outf))
            self.add_module(f'layer{i}', self.layers[i])

        self.memory_buffer = MemoryBuffer()
        self.use_memory_buffer = False
    def __init__(self, memory, start_address):
        self.memory = memory
        self.start_address = start_address
        self.register_file = RegisterFile()
        self.data_memory_key_fn = lambda: -777
        self.data_memory = defaultdict(self.data_memory_key_fn)

        self.cycle_count = 0
        self.instr_count = 0
        self.PC = 0

        self.fetch_input_buffer = FetchInputBuffer({
            'PC':
            self.start_address,
            'instr_count':
            self.instr_count,
        })
        self.fetcher_buffer = FetcherBuffer()
        self.fetch_stage = FetchStage(self.memory, self.fetch_input_buffer,
                                      self.fetcher_buffer)

        self.decoder_buffer = DecoderBuffer()
        self.decode_stage = DecodeStage(self.fetcher_buffer,
                                        self.decoder_buffer,
                                        self.register_file)

        self.executer_buffer = ExecuterBuffer()
        self.execute_stage = ExecuteStage(self.decoder_buffer,
                                          self.executer_buffer)
        self.memory_buffer = MemoryBuffer()
        self.memory_stage = MemoryStage(self.executer_buffer,
                                        self.memory_buffer, self.data_memory)
        self.write_back_stage = WriteBackStage(self.memory_buffer,
                                               self.register_file)
示例#4
0
    def __init__(self, action_dim, state_dim, params):
        """ Initialization
        """
        # session = K.get_session()
        # Environment and DDQN parameters
        self.with_per = params["with_per"]
        self.action_dim = action_dim
        self.state_dim = state_dim

        self.lr = 2.5e-4
        self.gamma = 0.95
        self.epsilon = params["epsilon"]
        self.epsilon_decay = params["epsilon_decay"]
        self.epsilon_minimum = 0.05
        self.buffer_size = 10000
        self.tau = 1.0
        self.agent = Agent(self.state_dim, action_dim, self.lr, self.tau,
                           params["dueling"])
        # Memory Buffer for Experience Replay
        self.buffer = MemoryBuffer(self.buffer_size, self.with_per)

        exp_dir = 'test/models/'
        if not os.path.exists(exp_dir):
            os.makedirs(exp_dir)
        self.export_path = exp_dir + '/lala.h5'
        self.save_interval = params["save_interval"]
 def test_write_back_R(self):
     self.set_up_write_back_stage('R ADD  R1 R2 R3')
     expected_reg_value = self.memory_buffer.rd[1]
     self.write_back_stage.write_back()
     self.assertEqual(self.write_back_stage.memory_buffer, MemoryBuffer())
     self.assertEqual(self.write_back_stage.register_file[self.instr.rd],
                      expected_reg_value)
     self.assertTrue(self.register_file.isClean(self.instr.rd))
    def test_do_operand_forwarding_MEM(self):
        self.processor.executer_buffer = ExecuterBuffer({'rt': [1, None]})
        self.processor.memory_buffer = MemoryBuffer({'rt': [1, 3]})
        self.processor.do_operand_forwarding()
        self.assertEqual(self.processor.executer_buffer.rt, [1, 3])

        self.processor.executer_buffer = ExecuterBuffer({'rt': [2, None]})
        self.processor.do_operand_forwarding()
        self.assertEqual(self.processor.executer_buffer.rt, [2, None])
    def __init__(self,
                 load_policy=False,
                 learning_rate=0.001,
                 dim_a=3,
                 fc_layers_neurons=100,
                 loss_function_type='mean_squared',
                 policy_loc='./racing_car_m2/network',
                 image_size=64,
                 action_upper_limits='1,1',
                 action_lower_limits='-1,-1',
                 e='1',
                 show_ae_output=True,
                 show_state=True,
                 resize_observation=True,
                 ae_training_threshold=0.0011,
                 ae_evaluation_frequency=40):

        self.image_size = image_size

        super(Agent, self).__init__(dim_a=dim_a,
                                    policy_loc=policy_loc,
                                    action_upper_limits=action_upper_limits,
                                    action_lower_limits=action_lower_limits,
                                    e=e,
                                    load_policy=load_policy,
                                    loss_function_type=loss_function_type,
                                    learning_rate=learning_rate,
                                    fc_layers_neurons=fc_layers_neurons)

        # High-dimensional state initialization
        self.resize_observation = resize_observation
        self.show_state = show_state
        self.show_ae_output = show_ae_output

        # Autoencoder training control variables
        self.ae_training = True
        self.ae_loss_history = MemoryBuffer(
            min_size=50,
            max_size=50)  # reuse memory buffer for the ae loss history
        self.ae_trainig_threshold = ae_training_threshold
        self.ae_evaluation_frequency = ae_evaluation_frequency
        self.mean_ae_loss = 1e7

        if self.show_state:
            self.state_plot = FastImagePlot(1,
                                            np.zeros([image_size, image_size]),
                                            image_size,
                                            'Image State',
                                            vmax=0.5)

        if self.show_ae_output:
            self.ae_output_plot = FastImagePlot(2,
                                                np.zeros(
                                                    [image_size, image_size]),
                                                image_size,
                                                'Autoencoder Output',
                                                vmax=0.5)
示例#8
0
 def test_memory_buffer_size(self):
     info_set_size = 1 + 2 + 5 + 24
     item_size = 64
     max_size = int(1e6)
     mb = MemoryBuffer(info_set_size, item_size, max_size=max_size)
     print(mb._infosets.dtype)
     print(mb._items.dtype)
     print(mb._weights.dtype)
     print("Memory buffer size (max_size={}): {} mb".format(
         max_size, mb.size_mb()))
示例#9
0
    def __init__(self,
                 n_state,
                 n_action,
                 a_bound,
                 discount=0.99,
                 tau=0.05,
                 actor_lr=0.001,
                 critic_lr=0.001,
                 policy_freq=2,
                 exp_noise_std=0.1,
                 noise_decay=0.9995,
                 noise_decay_steps=1000,
                 smooth_noise_std=0.1,
                 clip=0.2,
                 buffer_size=20000,
                 save_interval=5000,
                 assess_interval=20,
                 logger=None,
                 checkpoint_queen=None):
        #self.__dict__.update(locals())
        self.logger = logger
        self.logger.save_config(locals())
        self.n_action = n_action
        self.n_state = n_state
        self.a_bound = a_bound
        self.noise_std = exp_noise_std
        self.noise_decay = noise_decay
        self.noise_decay_steps = noise_decay_steps
        self.policy_freq = policy_freq
        self.smooth_noise_std = smooth_noise_std
        self.clip = clip
        self.discount = discount

        self.pointer = 0
        self.buffer = MemoryBuffer(buffer_size, with_per=True)
        self.save_interval = save_interval
        self.assess_interval = assess_interval
        self.actor = Actor(self.n_state,
                           self.n_action,
                           gamma=discount,
                           lr=actor_lr,
                           tau=tau)
        self.critic1 = Critic(self.n_state,
                              self.n_action,
                              gamma=discount,
                              lr=critic_lr,
                              tau=tau)
        self.critic2 = Critic(self.n_state,
                              self.n_action,
                              gamma=discount,
                              lr=critic_lr,
                              tau=tau)
        self.merge = self._merge_summary()
        self.ckpt_queen = checkpoint_queen
        self.prefix = self.__class__.__name__
    def test_do_operand_forwarding(self):
        self.processor.decoder_buffer = DecoderBuffer({'rs': [2, None]})
        self.processor.executer_buffer = ExecuterBuffer({'rt': [2, 7]})
        self.processor.do_operand_forwarding()
        self.assertEqual(self.processor.decoder_buffer.rs, [2, 7])

        self.processor.decoder_buffer = DecoderBuffer({'rs': [2, None]})
        self.processor.executer_buffer = ExecuterBuffer()
        self.processor.memory_buffer = MemoryBuffer({'rd': [2, 9]})
        self.processor.do_operand_forwarding()
        self.assertEqual(self.processor.decoder_buffer.rs, [2, 9])
    def __init__(self,
                 state_dim,
                 action_dim,
                 batchSize=64,
                 lr=.0001,
                 tau=.05,
                 gamma=.95,
                 epsilon=1,
                 eps_dec=.99,
                 learnInterval=1,
                 isDual=False,
                 isDueling=False,
                 isPER=False,
                 filename='model',
                 mem_size=1000000,
                 layerCount=2,
                 layerUnits=64,
                 usePruning=False):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.isDueling = isDueling
        self.isDual = isDual
        self.isPER = isPER
        self.lr = lr
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay = eps_dec
        self.batchSize = batchSize
        self.filename = filename
        self.learnInterval = learnInterval
        # Initialize Deep Q-Network
        self.model = generateDQN(action_dim, lr, state_dim, isDueling,
                                 layerCount, layerUnits, usePruning)
        # Build target Q-Network
        self.target_model = generateDQN(action_dim, lr, state_dim, isDueling,
                                        layerCount, layerUnits, usePruning)
        self.layerCount = layerCount
        self.layerUnits = layerUnits
        self.target_model.set_weights(self.model.get_weights())
        self.memory = MemoryBuffer(mem_size, isPER)
        self.epsilonInitial = epsilon
        self.minEpsilon = .1
        self.usePruning = usePruning

        if isDual:
            self.tau = tau
        else:
            self.tau = 1.0

        # load memory data from disk if needed
        self.lastLearnIndex = self.memory.totalMemCount
示例#12
0
    def test_resample(self):
        if os.path.exists("./memory/memory_buffer_test/"):
            shutil.rmtree("./memory/memory_buffer_test/")

        # Make a few saved memory buffers.
        info_set_size = 1 + 1 + 16
        item_size = 6
        max_size = int(1e4)
        mb = MemoryBuffer(info_set_size, item_size, max_size=max_size)

        buf1_size = 100
        for i in range(buf1_size):
            mb.add(make_dummy_ev_infoset(), torch.zeros(item_size), 0)
        mb.save("./memory/memory_buffer_test/", "advt_mem_0")
        mb.clear()

        buf2_size = 200
        for i in range(buf2_size):
            mb.add(make_dummy_ev_infoset(), torch.zeros(item_size), 1)
        mb.save("./memory/memory_buffer_test/", "advt_mem_0")
        mb.clear()

        buf3_size = 300
        for i in range(buf3_size):
            mb.add(make_dummy_ev_infoset(), torch.zeros(item_size), 2)
        mb.save("./memory/memory_buffer_test/", "advt_mem_0")
        mb.clear()

        # Make a dataset using the saved buffers.
        # n = (buf1_size + buf2_size) // 10
        n = 1000
        dataset = MemoryBufferDataset("./memory/memory_buffer_test/",
                                      "advt_mem_0", n)
        # min_size = min(n, buf1_size + buf2_size + buf3_size)
        # print(min_size)

        for _ in range(1):
            dataset.resample()
            self.assertEqual(len(dataset), n)
            self.assertEqual(len(dataset._infosets), n)
            self.assertEqual(len(dataset._items), n)
            self.assertEqual(len(dataset._weights), n)
            # print(dataset._weights)

        # Test iteration over the dataset.
        for inputs in dataset:
            print(inputs.keys())

        print(dataset._weights)
示例#13
0
    def get_stage_output(memory, register_file, pc, instr_count, stage_name):
        """Return the output buffer of stage given the initial conditions.
        
        All the stages before stage_name will be executed.
        
        Arguments:
        - `memory`:
        - `register_file`:
        - `pc`:
        - `stage_name`:

        TODO: Maybe just take the stages as input later.
        """
        fetch_input_buffer = FetchInputBuffer({
            'PC': pc,
            'instr_count': instr_count,
        })
        fetcher_buffer = FetcherBuffer()
        fetch_stage = FetchStage(memory, fetch_input_buffer, fetcher_buffer)
        fetch_stage.fetch_instruction()

        if stage_name == 'fetch':
            return fetch_stage.fetcher_buffer

        decode_stage = DecodeStage(fetch_stage.fetcher_buffer, DecoderBuffer(),
                                   register_file)
        decode_stage.decode_instruction()

        if stage_name == 'decode':
            return decode_stage.decoder_buffer

        execute_stage = ExecuteStage(decode_stage.decoder_buffer,
                                     ExecuterBuffer())
        execute_stage.execute()
        if stage_name == 'execute':
            return execute_stage.executer_buffer

        data_memory_key_fn = lambda: -1
        data_memory = defaultdict(data_memory_key_fn)

        memory_stage = MemoryStage(execute_stage.executer_buffer,
                                   MemoryBuffer(), data_memory)
        memory_stage.do_memory_operation()

        if stage_name == 'memory':
            return memory_stage.memory_buffer
示例#14
0
    def test_memory_buffer_save(self):
        # Make sure the folder doesn't exist so the manifest has to be created.
        if os.path.exists("./memory/memory_buffer_test/"):
            shutil.rmtree("./memory/memory_buffer_test/")
        info_set_size = 1 + 2 + 5 + 24
        item_size = 64
        max_size = int(1e6)
        mb = MemoryBuffer(info_set_size, item_size, max_size=max_size)
        mb.save("./memory/memory_buffer_test/", "test_buffer")

        self.assertTrue(
            os.path.exists(
                "./memory/memory_buffer_test/manifest_test_buffer.csv"))
        self.assertTrue(
            os.path.exists(
                "./memory/memory_buffer_test/test_buffer_00000.pth"))

        # Now save again.
        mb.save("./memory/memory_buffer_test/", "test_buffer")
        self.assertTrue(
            os.path.exists(
                "./memory/memory_buffer_test/test_buffer_00001.pth"))
示例#15
0
    def test_memory_buffer_autosave(self):
        print("\n ================= AUTOSAVE TEST ====================")
        # Make sure the folder doesn't exist so the manifest has to be created.
        if os.path.exists("./memory/memory_buffer_test/"):
            shutil.rmtree("./memory/memory_buffer_test/")
        info_set_size = 1 + 1 + 24
        item_size = 64
        max_size = int(1e3)

        # Add autosave params.
        mb = MemoryBuffer(info_set_size,
                          item_size,
                          max_size=max_size,
                          autosave_params=("./memory/memory_buffer_test/",
                                           "test_buffer"))

        for _ in range(max_size):
            mb.add(make_dummy_ev_infoset(), torch.zeros(item_size), 1234)
        self.assertTrue(mb.full())

        # This should trigger the save and reset.
        mb.add(make_dummy_ev_infoset(), torch.zeros(item_size), 1234)
示例#16
0
def traverse_worker(worker_id, traverse_player_idx, strategies, save_lock, opt,
                    t, eval_mode, info_queue):
    """
  A worker that traverses the game tree K times, saving things to memory buffers. Each worker
  maintains its own memory buffers and saves them after finishing.

  If eval_mode is set to True, no memory buffers are created.
  """
    # assert(strategies[0]._network.device == torch.device("cpu"))
    # assert(strategies[1]._network.device == torch.device("cpu"))

    advt_mem = MemoryBuffer(
        Constants.INFO_SET_SIZE,
        Constants.NUM_ACTIONS,
        max_size=opt.SINGLE_PROC_MEM_BUFFER_MAX_SIZE,
        autosave_params=(opt.MEMORY_FOLDER,
                         opt.ADVT_BUFFER_FMT.format(traverse_player_idx)),
        save_lock=save_lock) if eval_mode == False else None

    strt_mem = MemoryBuffer(
        Constants.INFO_SET_SIZE,
        Constants.NUM_ACTIONS,
        max_size=opt.SINGLE_PROC_MEM_BUFFER_MAX_SIZE,
        autosave_params=(opt.MEMORY_FOLDER, opt.STRT_BUFFER_FMT),
        save_lock=save_lock) if eval_mode == False else None

    if eval_mode:
        num_traversals_per_worker = int(opt.NUM_TRAVERSALS_EVAL /
                                        opt.NUM_TRAVERSE_WORKERS)
    else:
        num_traversals_per_worker = int(opt.NUM_TRAVERSALS_PER_ITER /
                                        opt.NUM_TRAVERSE_WORKERS)

    t0 = time.time()
    for k in range(num_traversals_per_worker):
        ctr = [0]

        # Generate a random initialization, alternating the SB player each time.
        sb_player_idx = k % 2
        round_state = create_new_round(sb_player_idx)

        precomputed_ev = make_precomputed_ev(round_state)
        info = traverse(round_state,
                        make_actions,
                        make_infoset,
                        traverse_player_idx,
                        sb_player_idx,
                        strategies,
                        advt_mem,
                        strt_mem,
                        t,
                        precomputed_ev,
                        recursion_ctr=ctr)

        if (k % opt.TRAVERSE_DEBUG_PRINT_HZ) == 0 and eval_mode == False:
            elapsed = time.time() - t0
            print(
                "[WORKER #{}] done with {}/{} traversals | recursion depth={} | advt={} strt={} | elapsed={} sec"
                .format(worker_id, k, num_traversals_per_worker, ctr[0],
                        advt_mem.size(), strt_mem.size(), elapsed))

    # Save all the buffers one last time.
    print("[WORKER #{}] Final autosave ...".format(worker_id))
    if advt_mem is not None: advt_mem.autosave()
    if strt_mem is not None: strt_mem.autosave()
示例#17
0
    tf.compat.v1.keras.backend.set_session(sess)

    # 设置gym有关参数
    env = make_atari('PongNoFrameskip-v4')
    env = wrap_deepmind(env, scale=False, frame_stack=True)
    num_actions = env.action_space.n

    dqn = DeepQNetwork(input_shape=(WIDTH, HEIGHT, NUM_FRAMES),
                       num_actions=num_actions,
                       name='dqn',
                       learning_rate=LR)
    target_dqn = DeepQNetwork(input_shape=(WIDTH, HEIGHT, NUM_FRAMES),
                              num_actions=num_actions,
                              name='target_dqn',
                              learning_rate=LR)
    buf = MemoryBuffer(memory_size=BUFFER_SIZE)

    total_episode_rewards = []
    step = 0
    for episode in range(MAX_EPISODE + 1):
        frame = env.reset()  # LazyFrames
        state = np.array(frame)  # narray (84, 84, 4)
        done = False
        cur_episode_reward = 0
        while not done:  # 如果done则结束episode
            if step % C == 0:
                target_dqn.copy_from(dqn)  # 复制参数
            if epsilon_greedy(step):
                action = env.action_space.sample()
            else:
                action = dqn.get_action(state / 255.0)
示例#18
0
comarg.add_argument("output_folder", help="Where to write results to.")
comarg.add_argument("--num_episodes",
                    type=int,
                    default=10,
                    help="Number of episodes to test.")
comarg.add_argument("--random_seed",
                    type=int,
                    help="Random seed for repeatable experiments.")
args = parser.parse_args()

if args.random_seed:
    random.seed(args.random_seed)

env = GymEnvironment(args.env_id, args)
net = DeepQNetwork(env.numActions(), args)
buf = MemoryBuffer(args)

if args.load_weights:
    print "Loading weights from %s" % args.load_weights
    net.load_weights(args.load_weights)

env.gym.monitor.start(args.output_folder, force=True)
avg_reward = 0
num_episodes = args.num_episodes
for i_episode in xrange(num_episodes):
    env.restart()
    observation = env.getScreen()
    buf.reset()
    i_total_reward = 0
    for t in xrange(10000):
        buf.add(observation)
示例#19
0
    version,
    train_ae=config_graph.getboolean('train_autoencoder'),
    load_policy=config_exp_setup.getboolean('load_graph'),
    learning_rate=float(config_graph['learning_rate']),
    dim_a=config_graph.getint('dim_a'),
    fc_layers_neurons=config_graph.getint('fc_layers_neurons'),
    loss_function_type=config_graph['loss_function_type'],
    policy_loc=config_graph['policy_loc'] + exp_num + '_',
    action_upper_limits=config_graph['action_upper_limits'],
    action_lower_limits=config_graph['action_lower_limits'],
    e=config_graph['e'],
    config_graph=config_graph,
    config_general=config_general)

# Create memory buffer
buffer = MemoryBuffer(min_size=config_buffer.getint('min_size'),
                      max_size=config_buffer.getint('max_size'))

# Create feedback object
env.render()
human_feedback = Feedback(env,
                          key_type=config_feedback['key_type'],
                          h_up=config_feedback['h_up'],
                          h_down=config_feedback['h_down'],
                          h_right=config_feedback['h_right'],
                          h_left=config_feedback['h_left'],
                          h_null=config_feedback['h_null'])

# Create saving directory if it does no exist
if save_results:
    if not os.path.exists(eval_save_path + eval_save_folder):
        os.makedirs(eval_save_path + eval_save_folder)
 def test_write_back_I(self):
     self.set_up_write_back_stage('I LW  R2 R5 4')
     self.write_back_stage.write_back()
     self.assertEqual(self.write_back_stage.memory_buffer, MemoryBuffer())
     self.assertTrue(self.register_file.isClean(self.instr.rt))