def testRunningStat(self): for shp in ((), (3,), (3, 4)): li = [] rs = RunningStat(shp) for _ in range(5): val = np.random.randn(*shp) rs.push(val) li.append(val) m = np.mean(li, axis=0) self.assertTrue(np.allclose(rs.mean, m)) v = np.square(m) if (len(li) == 1) else np.var(li, ddof=1, axis=0) self.assertTrue(np.allclose(rs.var, v))
def testRunningStat(self): for shp in ((), (3, ), (3, 4)): li = [] rs = RunningStat(shp) for _ in range(5): val = np.random.randn(*shp) rs.push(val) li.append(val) m = np.mean(li, axis=0) self.assertTrue(np.allclose(rs.mean, m)) v = (np.square(m) if (len(li) == 1) else np.var(li, ddof=1, axis=0)) self.assertTrue(np.allclose(rs.var, v))
def _init(self, num_sgd_iter=1, timesteps_per_batch=1): self.update_weights_timer = TimerStat() self.sample_timer = TimerStat() self.grad_timer = TimerStat() self.throughput = RunningStat() self.num_sgd_iter = num_sgd_iter self.timesteps_per_batch = timesteps_per_batch
def _init(self, learning_starts=1000, buffer_size=10000, prioritized_replay=True, prioritized_replay_alpha=0.6, prioritized_replay_beta=0.4, prioritized_replay_eps=1e-6, train_batch_size=32, sample_batch_size=4): self.replay_starts = learning_starts self.prioritized_replay_beta = prioritized_replay_beta self.prioritized_replay_eps = prioritized_replay_eps self.train_batch_size = train_batch_size # Stats self.update_weights_timer = TimerStat() self.sample_timer = TimerStat() self.replay_timer = TimerStat() self.grad_timer = TimerStat() self.throughput = RunningStat() # Set up replay buffer if prioritized_replay: self.replay_buffer = PrioritizedReplayBuffer( buffer_size, alpha=prioritized_replay_alpha) else: self.replay_buffer = ReplayBuffer(buffer_size) assert buffer_size >= self.replay_starts
def __init__(self, workers, num_sgd_iter=1, train_batch_size=1, sgd_minibatch_size=0, standardize_fields=frozenset([]), aux_loss_every_k=16, aux_loss_num_sgd_iter=9, aux_loss_start_after_num_steps=0): PolicyOptimizer.__init__(self, workers) self.update_weights_timer = TimerStat() self.standardize_fields = standardize_fields self.sample_timer = TimerStat() self.grad_timer = TimerStat() self.throughput = RunningStat() self.num_sgd_iter = num_sgd_iter self.sgd_minibatch_size = sgd_minibatch_size self.train_batch_size = train_batch_size self.learner_stats = {} self.policies = dict( self.workers.local_worker().foreach_trainable_policy(lambda p, i: (i, p))) logger.debug("Policies to train: {}".format(self.policies)) self.aux_loss_every_k = aux_loss_every_k self.aux_loss_num_sgd_iter = aux_loss_num_sgd_iter self.aux_loss_start_after_num_steps = aux_loss_start_after_num_steps self.memory = [] # Assert that train batch size is divisible by sgd minibatch size to make populating # policy logits simpler. assert train_batch_size % sgd_minibatch_size == 0, ( f"train_batch_size: {train_batch_size}" f"sgd_minibatch_size: {sgd_minibatch_size}")
def _init(self, num_sgd_iter=1, train_batch_size=1): self.update_weights_timer = TimerStat() self.sample_timer = TimerStat() self.grad_timer = TimerStat() self.throughput = RunningStat() self.num_sgd_iter = num_sgd_iter self.train_batch_size = train_batch_size self.learner_stats = {}
def __init__(self, workers, num_sgd_iter=1, train_batch_size=1): PolicyOptimizer.__init__(self, workers) self.update_weights_timer = TimerStat() self.sample_timer = TimerStat() self.grad_timer = TimerStat() self.throughput = RunningStat() self.num_sgd_iter = num_sgd_iter self.train_batch_size = train_batch_size self.learner_stats = {}
def _init(self, learning_starts=1000, buffer_size=10000, prioritized_replay=True, prioritized_replay_alpha=0.6, prioritized_replay_beta=0.4, schedule_max_timesteps=100000, beta_annealing_fraction=0.2, final_prioritized_replay_beta=0.4, prioritized_replay_eps=1e-6, train_batch_size=32, sample_batch_size=4): self.replay_starts = learning_starts # linearly annealing beta used in Rainbow paper self.prioritized_replay_beta = LinearSchedule( schedule_timesteps=int(schedule_max_timesteps * beta_annealing_fraction), initial_p=prioritized_replay_beta, final_p=final_prioritized_replay_beta) self.prioritized_replay_eps = prioritized_replay_eps self.train_batch_size = train_batch_size # Stats self.update_weights_timer = TimerStat() self.sample_timer = TimerStat() self.replay_timer = TimerStat() self.grad_timer = TimerStat() self.throughput = RunningStat() self.learner_stats = {} # Set up replay buffer if prioritized_replay: def new_buffer(): return PrioritizedReplayBuffer(buffer_size, alpha=prioritized_replay_alpha) else: def new_buffer(): return ReplayBuffer(buffer_size) self.replay_buffers = collections.defaultdict(new_buffer) assert buffer_size >= self.replay_starts
def __init__(self, workers, train_batch_size=10000, microbatch_size=1000): PolicyOptimizer.__init__(self, workers) if train_batch_size <= microbatch_size: raise ValueError( "The microbatch size must be smaller than the train batch " "size, got {} vs {}".format(microbatch_size, train_batch_size)) self.update_weights_timer = TimerStat() self.sample_timer = TimerStat() self.grad_timer = TimerStat() self.throughput = RunningStat() self.train_batch_size = train_batch_size self.microbatch_size = microbatch_size self.learner_stats = {} self.policies = dict( self.workers.local_worker().foreach_trainable_policy(lambda p, i: (i, p))) logger.debug("Policies to train: {}".format(self.policies))
def __init__(self, workers, num_sgd_iter=1, train_batch_size=1, sgd_minibatch_size=0, standardize_fields=frozenset([])): PolicyOptimizer.__init__(self, workers) self.update_weights_timer = TimerStat() self.standardize_fields = standardize_fields self.sample_timer = TimerStat() self.grad_timer = TimerStat() self.throughput = RunningStat() self.num_sgd_iter = num_sgd_iter self.sgd_minibatch_size = sgd_minibatch_size self.train_batch_size = train_batch_size self.learner_stats = {} self.policies = dict(self.workers.local_worker() .foreach_trainable_policy(lambda p, i: (i, p))) logger.debug("Policies to train: {}".format(self.policies))
def _init(self, learning_starts=1000, buffer_size=10000, prioritized_replay=True, prioritized_replay_alpha=0.6, prioritized_replay_beta=0.4, prioritized_replay_eps=1e-6, train_batch_size=32, sample_batch_size=4, clip_rewards=True): self.replay_starts = learning_starts self.prioritized_replay_beta = prioritized_replay_beta self.prioritized_replay_eps = prioritized_replay_eps self.train_batch_size = train_batch_size # Stats self.update_weights_timer = TimerStat() self.sample_timer = TimerStat() self.replay_timer = TimerStat() self.grad_timer = TimerStat() self.throughput = RunningStat() # Set up replay buffer if prioritized_replay: def new_buffer(): return PrioritizedReplayBuffer( buffer_size, alpha=prioritized_replay_alpha, clip_rewards=clip_rewards) else: def new_buffer(): return ReplayBuffer(buffer_size, clip_rewards) self.replay_buffers = collections.defaultdict(new_buffer) assert buffer_size >= self.replay_starts
def testCombiningStat(self): for shape in [(), (3, ), (3, 4)]: li = [] rs1 = RunningStat(shape) rs2 = RunningStat(shape) rs = RunningStat(shape) for _ in range(5): val = np.random.randn(*shape) rs1.push(val) rs.push(val) li.append(val) for _ in range(9): rs2.push(val) rs.push(val) li.append(val) rs1.update(rs2) assert np.allclose(rs.mean, rs1.mean) assert np.allclose(rs.std, rs1.std)
def __init__(self): RunningStat.__init__(self, ()) self._start_time = None
def _init(self, batch_size=32): self.update_weights_timer = TimerStat() self.sample_timer = TimerStat() self.grad_timer = TimerStat() self.throughput = RunningStat() self.batch_size = batch_size
def _init(self): self.update_weights_timer = TimerStat() self.sample_timer = TimerStat() self.grad_timer = TimerStat() self.throughput = RunningStat()