示例#1
0
文件: core.py 项目: x213212/Hub
        def tf_gen(step=4):
            with dask.config.set(scheduler="sync"):
                for index in range(0, len(self), step):
                    arrs = [self[index : index + step].values() for i in range(1)]
                    arrs = list(map(lambda x: x._array, _flatten(arrs)))
                    arrs = dask.delayed(list, pure=False, nout=len(list(self.keys())))(
                        arrs
                    )
                    arrs = arrs.compute()
                    for ind, arr in enumerate(arrs):
                        if arr.dtype.type is np.str_:
                            arr = [
                                ([ord(x) for x in sample.tolist()[0:max_text_len]])
                                for sample in arr
                            ]
                            arr = np.array(
                                [
                                    np.pad(
                                        sample,
                                        (0, max_text_len - len(sample)),
                                        "constant",
                                        constant_values=(32),
                                    )
                                    for sample in arr
                                ]
                            )
                            arrs[ind] = arr

                    for i in range(step):
                        sample = {key: r[i] for key, r in zip(self[index].keys(), arrs)}
                        yield sample
示例#2
0
文件: core.py 项目: x213212/Hub
    def __getitem__(self, index):
        with dask.config.set(scheduler="sync", delayed_pure=True):
            arrs = [self._ds[index : index + 1].values() for i in range(1)]
            arrs = list(map(lambda x: x._array, _flatten(arrs)))
            arrs = dask.delayed(list, pure=True, nout=len(list(self._ds.keys())))(arrs)
            arrs = arrs.compute()

            arrs = {key: r[0] for key, r in zip(self._ds[index].keys(), arrs)}

        objs = self._do_transform(arrs)
        if isinstance(objs, dict):
            objs = {k: self._to_tensor(k, v) for k, v in objs.items()}
        elif isinstance(objs, list):
            objs = [self._to_tensor(v) for v in objs]
        return objs
示例#3
0
def test_flatten_array():
    expected_list = [1, 2, 3, 4, 5]
    flatten_list = _flatten([[1, 2], [3, 4, 5]])
    assert flatten_list == expected_list