def __init__(self, local_worker, minibatch_buffer_size, num_sgd_iter, learner_queue_size, learner_queue_timeout): """Initialize the learner thread. Arguments: local_worker (RolloutWorker): process local rollout worker holding policies this thread will call learn_on_batch() on minibatch_buffer_size (int): max number of train batches to store in the minibatching buffer num_sgd_iter (int): number of passes to learn on per train batch learner_queue_size (int): max size of queue of inbound train batches to this thread learner_queue_timeout (int): raise an exception if the queue has been empty for this long in seconds """ threading.Thread.__init__(self) self.learner_queue_size = WindowStat("size", 50) self.local_worker = local_worker self.inqueue = queue.Queue(maxsize=learner_queue_size) self.outqueue = queue.Queue() self.minibatch_buffer = MinibatchBuffer(inqueue=self.inqueue, size=minibatch_buffer_size, timeout=learner_queue_timeout, num_passes=num_sgd_iter, init_num_passes=num_sgd_iter) self.queue_timer = TimerStat() self.grad_timer = TimerStat() self.load_timer = TimerStat() self.load_wait_timer = TimerStat() self.daemon = True self.weights_updated = False self.stats = {} self.stopped = False self.num_steps = 0
def __init__( self, local_worker: RolloutWorker, num_gpus: int = 1, lr=None, # deprecated. train_batch_size: int = 500, num_multi_gpu_tower_stacks: int = 1, num_sgd_iter: int = 1, learner_queue_size: int = 16, learner_queue_timeout: int = 300, num_data_load_threads: int = 16, _fake_gpus: bool = False, # Deprecated arg, use minibatch_buffer_size=None, ): """Initializes a MultiGPULearnerThread instance. Args: local_worker (RolloutWorker): Local RolloutWorker holding policies this thread will call `load_batch_into_buffer` and `learn_on_loaded_batch` on. num_gpus (int): Number of GPUs to use for data-parallel SGD. train_batch_size (int): Size of batches (minibatches if `num_sgd_iter` > 1) to learn on. num_multi_gpu_tower_stacks (int): Number of buffers to parallelly load data into on one device. Each buffer is of size of `train_batch_size` and hence increases GPU memory usage accordingly. num_sgd_iter (int): Number of passes to learn on per train batch (minibatch if `num_sgd_iter` > 1). learner_queue_size (int): Max size of queue of inbound train batches to this thread. num_data_load_threads (int): Number of threads to use to load data into GPU memory in parallel. """ # Deprecated: No need to specify as we don't need the actual # minibatch-buffer anyways. if minibatch_buffer_size: deprecation_warning( old="MultiGPULearnerThread.minibatch_buffer_size", error=False, ) super().__init__( local_worker=local_worker, minibatch_buffer_size=0, num_sgd_iter=num_sgd_iter, learner_queue_size=learner_queue_size, learner_queue_timeout=learner_queue_timeout, ) # Delete reference to parent's minibatch_buffer, which is not needed. # Instead, in multi-GPU mode, we pull tower stack indices from the # `self.ready_tower_stacks_buffer` buffer, whose size is exactly # `num_multi_gpu_tower_stacks`. self.minibatch_buffer = None self.train_batch_size = train_batch_size # TODO: (sven) Allow multi-GPU to work for multi-agent as well. self.policy = self.local_worker.policy_map[DEFAULT_POLICY_ID] logger.info("MultiGPULearnerThread devices {}".format( self.policy.devices)) assert self.train_batch_size % len(self.policy.devices) == 0 assert self.train_batch_size >= len(self.policy.devices),\ "batch too small" if set(self.local_worker.policy_map.keys()) != {DEFAULT_POLICY_ID}: raise NotImplementedError("Multi-gpu mode for multi-agent") self.tower_stack_indices = list(range(num_multi_gpu_tower_stacks)) # Two queues for tower stacks: # a) Those that are loaded with data ("ready") # b) Those that are ready to be loaded with new data ("idle"). self.idle_tower_stacks = queue.Queue() self.ready_tower_stacks = queue.Queue() # In the beginning, all stacks are idle (no loading has taken place # yet). for idx in self.tower_stack_indices: self.idle_tower_stacks.put(idx) # Start n threads that are responsible for loading data into the # different (idle) stacks. for i in range(num_data_load_threads): self.loader_thread = _MultiGPULoaderThread(self, share_stats=(i == 0)) self.loader_thread.start() # Create a buffer that holds stack indices that are "ready" # (loaded with data). Those are stacks that we can call # "learn_on_loaded_batch" on. self.ready_tower_stacks_buffer = MinibatchBuffer( self.ready_tower_stacks, num_multi_gpu_tower_stacks, learner_queue_timeout, num_sgd_iter)
def __init__( self, local_worker: RolloutWorker, num_gpus: int = 1, lr=None, # deprecated. train_batch_size: int = 500, num_multi_gpu_tower_stacks: int = 1, minibatch_buffer_size: int = 1, num_sgd_iter: int = 1, learner_queue_size: int = 16, learner_queue_timeout: int = 300, num_data_load_threads: int = 16, _fake_gpus: bool = False): """Initializes a MultiGPULearnerThread instance. Args: local_worker (RolloutWorker): Local RolloutWorker holding policies this thread will call load_data() and optimizer() on. num_gpus (int): Number of GPUs to use for data-parallel SGD. train_batch_size (int): Size of batches (minibatches if `num_sgd_iter` > 1) to learn on. num_multi_gpu_tower_stacks (int): Number of buffers to parallelly load data into on one device. Each buffer is of size of `train_batch_size` and hence increases GPU memory usage accordingly. minibatch_buffer_size (int): Max number of train batches to store in the minibatch buffer. num_sgd_iter (int): Number of passes to learn on per train batch (minibatch if `num_sgd_iter` > 1). learner_queue_size (int): Max size of queue of inbound train batches to this thread. num_data_load_threads (int): Number of threads to use to load data into GPU memory in parallel. """ LearnerThread.__init__(self, local_worker, minibatch_buffer_size, num_sgd_iter, learner_queue_size, learner_queue_timeout) self.train_batch_size = train_batch_size # TODO: (sven) Allow multi-GPU to work for multi-agent as well. self.policy = self.local_worker.policy_map[DEFAULT_POLICY_ID] logger.info("MultiGPULearnerThread devices {}".format( self.policy.devices)) assert self.train_batch_size % len(self.policy.devices) == 0 assert self.train_batch_size >= len(self.policy.devices),\ "batch too small" if set(self.local_worker.policy_map.keys()) != {DEFAULT_POLICY_ID}: raise NotImplementedError("Multi-gpu mode for multi-agent") self.tower_stack_indices = list(range(num_multi_gpu_tower_stacks)) self.idle_tower_stacks = queue.Queue() self.ready_tower_stacks = queue.Queue() for idx in self.tower_stack_indices: self.idle_tower_stacks.put(idx) for i in range(num_data_load_threads): self.loader_thread = _MultiGPULoaderThread(self, share_stats=(i == 0)) self.loader_thread.start() self.minibatch_buffer = MinibatchBuffer(self.ready_tower_stacks, minibatch_buffer_size, learner_queue_timeout, num_sgd_iter)
def __init__(self, local_worker, num_gpus=1, lr=0.0005, train_batch_size=500, num_data_loader_buffers=1, minibatch_buffer_size=1, num_sgd_iter=1, learner_queue_size=16, learner_queue_timeout=300, num_data_load_threads=16, _fake_gpus=False): """Initialize a multi-gpu learner thread. Arguments: local_worker (RolloutWorker): process local rollout worker holding policies this thread will call learn_on_batch() on num_gpus (int): number of GPUs to use for data-parallel SGD lr (float): learning rate train_batch_size (int): size of batches to learn on num_data_loader_buffers (int): number of buffers to load data into in parallel. Each buffer is of size of train_batch_size and increases GPU memory usage proportionally. minibatch_buffer_size (int): max number of train batches to store in the minibatching buffer num_sgd_iter (int): number of passes to learn on per train batch learner_queue_size (int): max size of queue of inbound train batches to this thread num_data_loader_threads (int): number of threads to use to load data into GPU memory in parallel """ LearnerThread.__init__(self, local_worker, minibatch_buffer_size, num_sgd_iter, learner_queue_size, learner_queue_timeout) self.lr = lr self.train_batch_size = train_batch_size if not num_gpus: self.devices = ["/cpu:0"] elif _fake_gpus: self.devices = [ "/cpu:{}".format(i) for i in range(int(math.ceil(num_gpus))) ] else: self.devices = [ "/gpu:{}".format(i) for i in range(int(math.ceil(num_gpus))) ] logger.info("TFMultiGPULearner devices {}".format(self.devices)) assert self.train_batch_size % len(self.devices) == 0 assert self.train_batch_size >= len(self.devices), "batch too small" if set(self.local_worker.policy_map.keys()) != {DEFAULT_POLICY_ID}: raise NotImplementedError("Multi-gpu mode for multi-agent") self.policy = self.local_worker.policy_map[DEFAULT_POLICY_ID] # per-GPU graph copies created below must share vars with the policy # reuse is set to AUTO_REUSE because Adam nodes are created after # all of the device copies are created. self.par_opt = [] with self.local_worker.tf_sess.graph.as_default(): with self.local_worker.tf_sess.as_default(): with tf.variable_scope(DEFAULT_POLICY_ID, reuse=tf.AUTO_REUSE): if self.policy._state_inputs: rnn_inputs = self.policy._state_inputs + [ self.policy._seq_lens ] else: rnn_inputs = [] adam = tf.train.AdamOptimizer(self.lr) for _ in range(num_data_loader_buffers): self.par_opt.append( LocalSyncParallelOptimizer( adam, self.devices, [v for _, v in self.policy._loss_inputs], rnn_inputs, 999999, # it will get rounded down self.policy.copy)) self.sess = self.local_worker.tf_sess self.sess.run(tf.global_variables_initializer()) self.idle_optimizers = queue.Queue() self.ready_optimizers = queue.Queue() for opt in self.par_opt: self.idle_optimizers.put(opt) for i in range(num_data_load_threads): self.loader_thread = _LoaderThread(self, share_stats=(i == 0)) self.loader_thread.start() self.minibatch_buffer = MinibatchBuffer( self.ready_optimizers, minibatch_buffer_size, learner_queue_timeout, num_sgd_iter)