示例#1
0
    def load_batch_into_buffer(
        self,
        batch: SampleBatch,
        buffer_index: int = 0,
    ) -> int:
        # Set the is_training flag of the batch.
        batch.is_training = True

        # Shortcut for 1 CPU only: Store batch in
        # `self._loaded_single_cpu_batch`.
        if len(self.devices) == 1 and self.devices[0] == "/cpu:0":
            assert buffer_index == 0
            self._loaded_single_cpu_batch = batch
            return len(batch)

        input_dict = self._get_loss_inputs_dict(batch, shuffle=False)
        data_keys = list(self._loss_input_dict_no_rnn.values())
        if self._state_inputs:
            state_keys = self._state_inputs + [self._seq_lens]
        else:
            state_keys = []
        inputs = [input_dict[k] for k in data_keys]
        state_inputs = [input_dict[k] for k in state_keys]

        return self.multi_gpu_tower_stacks[buffer_index].load_data(
            sess=self.get_session(),
            inputs=inputs,
            state_inputs=state_inputs,
        )
示例#2
0
文件: torch_policy.py 项目: rlan/ray
    def compute_gradients(self,
                          postprocessed_batch: SampleBatch) -> ModelGradients:

        assert len(self.devices) == 1

        # If not done yet, see whether we have to zero-pad this batch.
        if not postprocessed_batch.zero_padded:
            pad_batch_to_sequences_of_same_size(
                batch=postprocessed_batch,
                max_seq_len=self.max_seq_len,
                shuffle=False,
                batch_divisibility_req=self.batch_divisibility_req,
                view_requirements=self.view_requirements,
            )

        postprocessed_batch.is_training = True
        self._lazy_tensor_dict(postprocessed_batch, device=self.devices[0])

        # Do the (maybe parallelized) gradient calculation step.
        tower_outputs = self._multi_gpu_parallel_grad_calc(
            [postprocessed_batch])

        all_grads, grad_info = tower_outputs[0]

        grad_info["allreduce_latency"] /= len(self._optimizers)
        grad_info.update(self.extra_grad_info(postprocessed_batch))

        fetches = self.extra_compute_grad_fetches()

        return all_grads, dict(fetches, **{LEARNER_STATS_KEY: grad_info})
示例#3
0
文件: torch_policy.py 项目: rlan/ray
    def load_batch_into_buffer(
            self,
            batch: SampleBatch,
            buffer_index: int = 0,
    ) -> int:
        # Set the is_training flag of the batch.
        batch.is_training = True

        # Shortcut for 1 CPU only: Store batch in `self._loaded_batches`.
        if len(self.devices) == 1 and self.devices[0].type == "cpu":
            assert buffer_index == 0
            pad_batch_to_sequences_of_same_size(
                batch=batch,
                max_seq_len=self.max_seq_len,
                shuffle=False,
                batch_divisibility_req=self.batch_divisibility_req,
                view_requirements=self.view_requirements,
            )
            self._lazy_tensor_dict(batch)
            self._loaded_batches[0] = [batch]
            return len(batch)

        # Batch (len=28, seq-lens=[4, 7, 4, 10, 3]):
        # 0123 0123456 0123 0123456789ABC

        # 1) split into n per-GPU sub batches (n=2).
        # [0123 0123456] [012] [3 0123456789 ABC]
        # (len=14, 14 seq-lens=[4, 7, 3] [1, 10, 3])
        slices = batch.timeslices(num_slices=len(self.devices))

        # 2) zero-padding (max-seq-len=10).
        # - [0123000000 0123456000 0120000000]
        # - [3000000000 0123456789 ABC0000000]
        for slice in slices:
            pad_batch_to_sequences_of_same_size(
                batch=slice,
                max_seq_len=self.max_seq_len,
                shuffle=False,
                batch_divisibility_req=self.batch_divisibility_req,
                view_requirements=self.view_requirements,
            )

        # 3) Load splits into the given buffer (consisting of n GPUs).
        slices = [
            slice.to_device(self.devices[i]) for i, slice in enumerate(slices)
        ]
        self._loaded_batches[buffer_index] = slices

        # Return loaded samples per-device.
        return len(slices[0])
示例#4
0
    def _get_loss_inputs_dict(self, train_batch: SampleBatch, shuffle: bool):
        """Return a feed dict from a batch.

        Args:
            train_batch (SampleBatch): batch of data to derive inputs from.
            shuffle (bool): whether to shuffle batch sequences. Shuffle may
                be done in-place. This only makes sense if you're further
                applying minibatch SGD after getting the outputs.

        Returns:
            Feed dict of data.
        """

        if not isinstance(train_batch,
                          SampleBatch) or not train_batch.zero_padded:
            pad_batch_to_sequences_of_same_size(
                train_batch,
                max_seq_len=self._max_seq_len,
                shuffle=shuffle,
                batch_divisibility_req=self._batch_divisibility_req,
                feature_keys=list(self._loss_input_dict_no_rnn.keys()),
                view_requirements=self.view_requirements,
            )
        else:
            train_batch["seq_lens"] = train_batch.seq_lens

        # Get batch ready for RNNs, if applicable.

        # Mark the batch as "is_training" so the Model can use this
        # information.
        train_batch.is_training = True

        # Build the feed dict from the batch.
        feed_dict = {}
        for key, placeholder in self._loss_input_dict.items():
            feed_dict[placeholder] = train_batch[key]

        state_keys = [
            "state_in_{}".format(i) for i in range(len(self._state_inputs))
        ]
        for key in state_keys:
            feed_dict[self._loss_input_dict[key]] = train_batch[key]
        if state_keys:
            feed_dict[self._seq_lens] = train_batch["seq_lens"]

        return feed_dict
示例#5
0
    def compute_gradients(self,
                          postprocessed_batch: SampleBatch) -> ModelGradients:

        assert len(self.devices) == 1

        postprocessed_batch.is_training = True
        self._lazy_tensor_dict(postprocessed_batch, device=self.devices[0])

        # Do the (maybe parallelized) gradient calculation step.
        tower_outputs = self._multi_gpu_parallel_grad_calc(
            [postprocessed_batch])

        all_grads, grad_info = tower_outputs[0]

        grad_info["allreduce_latency"] /= len(self._optimizers)
        grad_info.update(self.extra_grad_info(postprocessed_batch))

        fetches = self.extra_compute_grad_fetches()

        return all_grads, dict(fetches, **{LEARNER_STATS_KEY: grad_info})
示例#6
0
    def compute_gradients(self,
                          postprocessed_batch: SampleBatch) -> ModelGradients:

        if not isinstance(postprocessed_batch, SampleBatch) or \
                not postprocessed_batch.zero_padded:
            pad_batch_to_sequences_of_same_size(
                postprocessed_batch,
                max_seq_len=self.max_seq_len,
                shuffle=False,
                batch_divisibility_req=self.batch_divisibility_req,
                view_requirements=self.view_requirements,
            )

        # Mark the batch as "is_training" so the Model can use this
        # information.
        postprocessed_batch.is_training = True

        # Single device case: Use batch as-is (no slicing).
        if len(self.devices) == 1:
            batches = [self._lazy_tensor_dict(postprocessed_batch)]
        # Multi-GPU case: Slice inputs into n (roughly) equal batches.
        else:
            len_ = len(postprocessed_batch)
            batches = []
            start = 0
            for i, device in enumerate(self.devices):
                shard_len = len_ // (len(self.devices) - i)
                batch = self._lazy_tensor_dict(postprocessed_batch.slice(
                    start, start + shard_len),
                                               device=device)
                batches.append(batch)
                len_ -= shard_len
                start += shard_len

            # Copy weights of main model to all towers.
            state_dict = self.model.state_dict()
            for tower in self.model_gpu_towers:
                tower.load_state_dict(state_dict)

        # Do the (maybe parallelized) gradient calculation step.
        tower_outputs = self._multi_gpu_parallel_grad_calc(batches)

        # Multi device (GPU) case.
        if len(self.devices) > 1:
            # Mean-reduce over GPU-towers.
            all_grads = []
            for i in range(len(tower_outputs[0][0])):
                if tower_outputs[0][0][i] is not None:
                    all_grads.append(
                        torch.mean(torch.stack(
                            [t[0][i].to(self.device) for t in tower_outputs]),
                                   dim=0))
                else:
                    all_grads.append(None)
            # Set main model's grads to mean-reduced values.
            for i, p in enumerate(self.model.parameters()):
                p.grad = all_grads[i]
            # Reduce stats over towers as well.
            from ray.rllib.execution.train_ops import all_tower_reduce
            grad_info = tree.map_structure_with_path(
                lambda p, *t: all_tower_reduce(p, *t),
                *[t[1] for t in tower_outputs])
        # Single device case.
        else:
            all_grads, grad_info = tower_outputs[0]

        grad_info["allreduce_latency"] /= len(self._optimizers)
        grad_info.update(self.extra_grad_info(postprocessed_batch))

        fetches = self.extra_compute_grad_fetches()

        return all_grads, dict(fetches, **{LEARNER_STATS_KEY: grad_info})
示例#7
0
    def compute_gradients(self,
                          postprocessed_batch: SampleBatch) -> ModelGradients:

        if not isinstance(postprocessed_batch, SampleBatch) or \
                not postprocessed_batch.zero_padded:
            pad_batch_to_sequences_of_same_size(
                postprocessed_batch,
                max_seq_len=self.max_seq_len,
                shuffle=False,
                batch_divisibility_req=self.batch_divisibility_req,
                view_requirements=self.view_requirements,
            )
        else:
            postprocessed_batch["seq_lens"] = postprocessed_batch.seq_lens

        # Mark the batch as "is_training" so the Model can use this
        # information.
        postprocessed_batch.is_training = True
        train_batch = self._lazy_tensor_dict(postprocessed_batch)

        # Calculate the actual policy loss.
        loss_out = force_list(
            self._loss(self, self.model, self.dist_class, train_batch))

        # Call Model's custom-loss with Policy loss outputs and train_batch.
        if self.model:
            loss_out = self.model.custom_loss(loss_out, train_batch)

        # Give Exploration component that chance to modify the loss (or add
        # its own terms).
        if hasattr(self, "exploration"):
            loss_out = self.exploration.get_exploration_loss(
                loss_out, train_batch)

        assert len(loss_out) == len(self._optimizers)

        # assert not any(torch.isnan(l) for l in loss_out)
        fetches = self.extra_compute_grad_fetches()

        # Loop through all optimizers.
        grad_info = {"allreduce_latency": 0.0}

        all_grads = []
        for i, opt in enumerate(self._optimizers):
            # Erase gradients in all vars of this optimizer.
            opt.zero_grad()
            # Recompute gradients of loss over all variables.
            loss_out[i].backward(retain_graph=(i < len(self._optimizers) - 1))
            grad_info.update(self.extra_grad_process(opt, loss_out[i]))

            grads = []
            # Note that return values are just references;
            # Calling zero_grad would modify the values.
            for param_group in opt.param_groups:
                for p in param_group["params"]:
                    if p.grad is not None:
                        grads.append(p.grad)
                        all_grads.append(p.grad.data.cpu().numpy())
                    else:
                        all_grads.append(None)

            if self.distributed_world_size:
                start = time.time()
                if torch.cuda.is_available():
                    # Sadly, allreduce_coalesced does not work with CUDA yet.
                    for g in grads:
                        torch.distributed.all_reduce(
                            g, op=torch.distributed.ReduceOp.SUM)
                else:
                    torch.distributed.all_reduce_coalesced(
                        grads, op=torch.distributed.ReduceOp.SUM)

                for param_group in opt.param_groups:
                    for p in param_group["params"]:
                        if p.grad is not None:
                            p.grad /= self.distributed_world_size

                grad_info["allreduce_latency"] += time.time() - start

        grad_info["allreduce_latency"] /= len(self._optimizers)
        grad_info.update(self.extra_grad_info(train_batch))

        return all_grads, dict(fetches, **{LEARNER_STATS_KEY: grad_info})