示例#1
0
    def load_chunks(self, dev, dataloader):
        try:
            indices = dataloader._gather_indices_for_dev(dev)
            itr = iter(dataloader.data.to_iter(indices=indices))
            with dataloader._get_device_ctx(dev):
                spill = None
                for chunks in self.batch(itr):
                    if self.stopped:
                        return

                    if spill and not spill.empty:
                        chunks.insert(0, spill)

                    chunks = cudf.core.reshape.concat(chunks)
                    chunks.reset_index(drop=True, inplace=True)
                    chunks, spill = self.get_batch_div_chunk(chunks, dataloader.batch_size)
                    if self.shuffle:
                        _shuffle_gdf(chunks)

                    if len(chunks) > 0:
                        chunks = dataloader.make_tensors(chunks, dataloader._use_nnz)
                        # put returns True if buffer is stopped before
                        # packet can be put in queue. Keeps us from
                        # freezing on a put on a full queue
                        if self.put(chunks):
                            return
                    chunks = None

                # takes care final batch, which is less than batch size
                if spill is not None and not spill.empty:
                    spill = dataloader.make_tensors(spill, dataloader._use_nnz)
                    self.put(spill)
        except Exception as e:
            self.put(e)
示例#2
0
    def load_chunks(self, dev, dataloader):
        try:
            indices = dataloader._gather_indices_for_dev(dev)
            itr = iter(dataloader.data.to_iter(indices=indices))
            with dataloader._get_device_ctx(dev):
                spill = None
                for chunks in self.batch(itr):
                    if self.stopped:
                        return

                    if spill and not spill.empty:
                        chunks.insert(0, spill)

                    chunks = cudf.core.reshape.concat(chunks)
                    chunks.reset_index(drop=True, inplace=True)
                    chunks, spill = self.get_batch_div_chunk(
                        chunks, dataloader.batch_size)
                    if self.shuffle:
                        _shuffle_gdf(chunks)

                    num_samples = len(chunks)
                    if num_samples > 0:
                        for workflow in dataloader.workflows:
                            chunks = workflow.apply_ops(chunks)

                        # map from big chunk to fraemwork specific tensors
                        chunks = dataloader._create_tensors(chunks)

                        # split them into batches and map to
                        # the framework-specific output format
                        chunks = [
                            dataloader._create_batch(x, num_samples)
                            for x in chunks
                        ]
                        chunks = zip(*chunks)
                        chunks = [
                            dataloader._handle_tensors(*tensors)
                            for tensors in chunks
                        ]

                        # put returns True if buffer is stopped before
                        # packet can be put in queue. Keeps us from
                        # freezing on a put on a full queue
                        if self.put(chunks):
                            return
                    chunks = None

                # takes care final batch, which is less than batch size
                if spill is not None and not spill.empty:
                    for workflow in dataloader.workflows:
                        spill = workflow.apply_ops(spill)
                    spill = dataloader._create_tensors(spill)
                    spill = dataloader._handle_tensors(*spill)
                    self.put([spill])
        except Exception as e:
            self.put(e)