Exemplo n.º 1
0
    def test_h5_singletons_reading(self):
        fobj = tempfile.NamedTemporaryFile(delete=True)
        fname = fobj.name

        class DummyPump(Module):
            def process(self, blob):
                tab = Table({"a": 2}, h5loc="tab", h5singleton=True)
                return Blob({"Tab": tab})

        pipe = Pipeline()
        pipe.attach(DummyPump)
        pipe.attach(HDF5Sink, filename=fname)
        pipe.drain(5)

        class Observer(Module):
            def process(self, blob):
                print(blob)
                assert "Tab" in blob
                print(blob["Tab"])
                assert len(blob["Tab"]) == 1
                assert blob["Tab"].a[0] == 2
                return blob

        pipe = Pipeline()
        pipe.attach(HDF5Pump, filename=fname)
        pipe.attach(Observer)
        pipe.drain()

        fobj.close()
Exemplo n.º 2
0
    def test_hdf5_readout_split_tables_in_same_group(self):
        fobj = tempfile.NamedTemporaryFile(delete=True)
        fname = fobj.name

        class DummyPump(Module):
            def configure(self):
                self.count = 0

            def process(self, blob):
                self.count += 1
                tab_a = Table(
                    {
                        "a": self.count * 10,
                    },
                    h5loc="/tabs/tab_a",
                    split_h5=True,
                )
                tab_b = Table(
                    {
                        "b": self.count * 100,
                    },
                    h5loc="/tabs/tab_b",
                    split_h5=True,
                )
                blob["TabA"] = tab_a
                blob["TabB"] = tab_b
                return blob

        pipe = Pipeline()
        pipe.attach(DummyPump)
        pipe.attach(HDF5Sink, filename=fname)
        pipe.drain(5)

        class BlobTester(Module):
            def configure(self):
                self.index = 0

            def process(self, blob):
                self.index += 1
                assert "GroupInfo" in blob
                assert "TabA" in blob
                assert "TabB" in blob
                assert self.index - 1 == blob["GroupInfo"].group_id
                assert self.index * 10 == blob["TabA"]["a"]
                assert self.index * 100 == blob["TabB"]["b"]
                return blob

        pipe = Pipeline()
        pipe.attach(HDF5Pump, filename=fname)
        pipe.attach(BlobTester)
        pipe.drain()

        fobj.close()
Exemplo n.º 3
0
    def test_h5_consistency_for_tables_without_group_id(self):
        fobj = tempfile.NamedTemporaryFile(delete=True)
        fname = fobj.name

        class DummyPump(Module):
            def configure(self):
                self.count = 0

            def process(self, blob):
                self.count += 10
                tab = Table({"a": self.count, "b": 1}, h5loc="tab")
                return Blob({"tab": tab})

        pipe = Pipeline()
        pipe.attach(DummyPump)
        pipe.attach(HDF5Sink, filename=fname)
        pipe.drain(5)

        with tb.File(fname) as f:
            a = f.get_node("/tab")[:]["a"]
            b = f.get_node("/tab")[:]["b"]
            group_id = f.get_node("/tab")[:]["group_id"]
        assert np.allclose([10, 20, 30, 40, 50], a)
        assert np.allclose([1, 1, 1, 1, 1], b)
        assert np.allclose([0, 1, 2, 3, 4], group_id)
        fobj.close()
Exemplo n.º 4
0
    def test_skipped_blob_with_ndarray(self):
        fobj = tempfile.NamedTemporaryFile(delete=True)
        fname = fobj.name

        class DummyPump(Module):
            def configure(self):
                self.index = 0

            def process(self, blob):
                blob["Arr"] = NDArray(np.arange(self.index + 1), h5loc="/arr")
                self.index += 1
                return blob

        pipe = Pipeline()
        pipe.attach(DummyPump)
        pipe.attach(Skipper, indices=[2])
        pipe.attach(HDF5Sink, filename=fname)
        pipe.drain(5)

        with tb.File(fname) as f:
            a = f.get_node("/arr")[:]
            index_table = f.get_node("/arr_indices")[:]
        assert np.allclose([0, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4], a)
        assert np.allclose([0, 1, 3, 7], index_table["index"])
        assert np.allclose([1, 2, 4, 5], index_table["n_items"])

        fobj.close()
Exemplo n.º 5
0
    def test_skipped_blob_with_tables(self):
        fobj = tempfile.NamedTemporaryFile(delete=True)
        fname = fobj.name

        class DummyPump(Module):
            def configure(self):
                self.index = 0

            def process(self, blob):
                blob["Tab"] = Table(
                    {"a": np.arange(self.index + 1), "i": self.index}, h5loc="/tab"
                )
                self.index += 1
                return blob

        pipe = Pipeline()
        pipe.attach(DummyPump)
        pipe.attach(Skipper, indices=[2])
        pipe.attach(HDF5Sink, filename=fname)
        pipe.drain(5)

        with tb.File(fname) as f:
            a = f.get_node("/tab")[:]["a"]
            i = f.get_node("/tab")[:]["i"]
            group_id = f.get_node("/tab")[:]["group_id"]
        assert np.allclose([0, 1, 1, 3, 3, 3, 3, 4, 4, 4, 4, 4], i)
        assert np.allclose([0, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4], a)
        assert np.allclose([0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3], group_id)

        fobj.close()
Exemplo n.º 6
0
    def test_writing_of_n_dim_arrays(self):
        fobj = tempfile.NamedTemporaryFile(delete=True)
        fname = fobj.name

        arr = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])

        class DummyPump(Module):
            def configure(self):
                self.index = 0

            def process(self, blob):
                blob["foo"] = NDArray(arr + self.index * 10, h5loc="/foo", title="Yep")
                self.index += 1
                return blob

        pipe = Pipeline()
        pipe.attach(DummyPump)
        pipe.attach(HDF5Sink, filename=fname)
        pipe.drain(3)

        with tb.File(fname) as f:
            foo = f.get_node("/foo")
            assert 3 == foo[0, 1, 0]
            assert 4 == foo[0, 1, 1]
            assert "Yep" == foo.title

        fobj.close()
Exemplo n.º 7
0
    def test_writing_of_n_dim_arrays_with_defaults(self):
        fobj = tempfile.NamedTemporaryFile(delete=True)
        fname = fobj.name

        arr = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])

        class DummyPump(Module):
            def process(self, blob):
                blob["foo"] = NDArray(arr)
                return blob

        pipe = Pipeline()
        pipe.attach(DummyPump)
        pipe.attach(HDF5Sink, filename=fname)
        pipe.drain(3)

        with tb.File(fname) as f:
            foo = f.get_node("/misc")
            assert 3 == foo[0, 1, 0]
            assert 4 == foo[0, 1, 1]
            assert "Unnamed NDArray" == foo.title
            indices = f.get_node("/misc_indices")
            self.assertTupleEqual((0, 2, 4), tuple(indices.cols.index[:]))
            self.assertTupleEqual((2, 2, 2), tuple(indices.cols.n_items[:]))

        fobj.close()
Exemplo n.º 8
0
    def test_filtered_writing_of_multiple_keys(self):
        fobj = tempfile.NamedTemporaryFile(delete=True)

        class DummyPump(Module):
            def configure(self):
                self.i = 0

            def process(self, blob):
                blob["A"] = Table({"a": self.i}, name="A", h5loc="tab_a")
                blob["B"] = Table({"b": self.i}, name="B", h5loc="tab_b")
                blob["C"] = Table({"c": self.i}, name="C", h5loc="tab_c")
                self.i += 1
                return blob

        keys = ["A", "B"]

        pipe = Pipeline()
        pipe.attach(DummyPump)
        pipe.attach(HDF5Sink, filename=fobj.name, keys=keys)
        pipe.drain(5)

        with tb.File(fobj.name, "r") as f:
            assert "/tab_a" in f
            assert "/tab_b" in f
            assert "/tab_c" not in f

        fobj.close()
Exemplo n.º 9
0
    def test_h5_consistency_for_tables_without_group_id_and_multiple_keys(self):
        fobj = tempfile.NamedTemporaryFile(delete=True)
        fname = fobj.name

        class DummyPump(Module):
            def configure(self):
                self.count = 0

            def process(self, blob):
                self.count += 10
                tab1 = Table({"a": self.count, "b": 1}, h5loc="tab1")
                tab2 = Table({"c": self.count + 1, "d": 2}, h5loc="tab2")
                return Blob({"tab1": tab1, "tab2": tab2})

        pipe = Pipeline()
        pipe.attach(DummyPump)
        pipe.attach(HDF5Sink, filename=fname)
        pipe.drain(5)

        with tb.File(fname) as f:
            a = f.get_node("/tab1")[:]["a"]
            b = f.get_node("/tab1")[:]["b"]
            c = f.get_node("/tab2")[:]["c"]
            d = f.get_node("/tab2")[:]["d"]
            group_id_1 = f.get_node("/tab1")[:]["group_id"]
            group_id_2 = f.get_node("/tab1")[:]["group_id"]
        assert np.allclose([10, 20, 30, 40, 50], a)
        assert np.allclose([1, 1, 1, 1, 1], b)
        assert np.allclose([0, 1, 2, 3, 4], group_id_1)
        assert np.allclose([11, 21, 31, 41, 51], c)
        assert np.allclose([2, 2, 2, 2, 2], d)
        assert np.allclose([0, 1, 2, 3, 4], group_id_2)
        fobj.close()
Exemplo n.º 10
0
    def test_pipe(self):
        class Observer(Module):
            def configure(self):
                self.dump = defaultdict(list)

            def process(self, blob):
                for key, data in blob.items():
                    if key == "Header":
                        self.dump["headers"].append(data)
                    else:
                        self.dump[key].append(len(data))
                return blob

            def finish(self):
                return self.dump

        p = Pipeline()
        p.attach(HDF5Pump, filename=self.fname)
        p.attach(Observer)
        results = p.drain()["Observer"]
        self.assertListEqual(
            [147, 110, 70, 62, 59, 199, 130, 92, 296, 128], results["Hits"]
        )
        self.assertListEqual(
            [315, 164, 100, 111, 123, 527, 359, 117, 984, 263], results["McHits"]
        )
        self.assertListEqual([1, 1, 1, 1, 1, 3, 2, 1, 2, 1], results["McTracks"])
Exemplo n.º 11
0
    def test_hdf5_readout(self):
        fobj = tempfile.NamedTemporaryFile(delete=True)
        fname = fobj.name

        class DummyPump(Module):
            def configure(self):
                self.count = 0

            def process(self, blob):
                self.count += 1
                tab = Table({"a": self.count * 10, "b": 1}, h5loc="tab")
                tab2 = Table({"a": np.arange(self.count)}, h5loc="tab2")
                blob["Tab"] = tab
                blob["Tab2"] = tab2
                return blob

        pipe = Pipeline()
        pipe.attach(DummyPump)
        pipe.attach(HDF5Sink, filename=fname)
        pipe.drain(5)

        class BlobTester(Module):
            def configure(self):
                self.index = 0

            def process(self, blob):
                self.index += 1
                assert "GroupInfo" in blob
                assert "Tab" in blob
                print(self.index)
                print(blob["Tab"])
                print(blob["Tab"]["a"])
                assert self.index - 1 == blob["GroupInfo"].group_id
                assert self.index * 10 == blob["Tab"]["a"]
                assert 1 == blob["Tab"]["b"] == 1
                assert np.allclose(np.arange(self.index), blob["Tab2"]["a"])
                return blob

        pipe = Pipeline()
        pipe.attach(HDF5Pump, filename=fname)
        pipe.attach(BlobTester)
        pipe.drain()

        fobj.close()
Exemplo n.º 12
0
    def test_reading_of_n_dim_arrays(self):
        fobj = tempfile.NamedTemporaryFile(delete=True)
        fname = fobj.name

        arr = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])

        class DummyPump(Module):
            def configure(self):
                self.index = 0

            def process(self, blob):
                blob["Foo"] = NDArray(arr + self.index * 10, h5loc="/foo", title="Yep")
                self.index += 1
                return blob

        pipe = Pipeline()
        pipe.attach(DummyPump)
        pipe.attach(HDF5Sink, filename=fname)
        pipe.drain(3)

        class Observer(Module):
            def configure(self):
                self.index = 0

            def process(self, blob):
                assert "Foo" in blob
                foo = blob["Foo"]
                print(self.index)
                assert self.index * 10 + 1 == foo[0, 0, 0]
                assert self.index * 10 + 8 == foo[1, 1, 1]
                assert self.index * 10 + 3 == foo[0, 1, 0]
                assert self.index * 10 + 6 == foo[1, 0, 1]
                self.index += 1
                return blob

        pipe = Pipeline()
        pipe.attach(HDF5Pump, filename=fname)
        pipe.attach(Observer)
        pipe.drain()

        fobj.close()
Exemplo n.º 13
0
    def test_event_info_is_not_empty(self):
        self.fname = data_path("hdf5/test_event_info.h5")

        class Printer(Module):
            def process(self, blob):
                assert blob["EventInfo"].size != 0
                return blob

        p = Pipeline()
        p.attach(HDF5Pump, filename=self.fname)
        p.attach(Printer)
        p.drain()
Exemplo n.º 14
0
    def test_sparse_ndarray(self):
        fobj = tempfile.NamedTemporaryFile(delete=True)
        fname = fobj.name

        class Dummy(Module):
            def configure(self):
                self.i = 0

            def process(self, blob):
                self.i += 1

                if self.i == 5:
                    blob["Arr"] = NDArray([1, 2, 3], h5loc="/arr")
                return blob

        pipe = Pipeline()
        pipe.attach(Dummy)
        pipe.attach(HDF5Sink, filename=fname)
        pipe.drain(10)

        class Observer(Module):
            def configure(self):
                self.i = 0

            def process(self, blob):
                self.i += 1

                print(blob)
                if self.i == 5:
                    assert 6 == np.sum(blob["Arr"])
                else:
                    assert len(blob["Arr"]) == 0

                return blob

        pipe = Pipeline()
        pipe.attach(HDF5Pump, filename=fname)
        pipe.attach(Observer)
        pipe.drain()
Exemplo n.º 15
0
    def test_sparse_table(self):
        fobj = tempfile.NamedTemporaryFile(delete=True)
        fname = fobj.name

        class Dummy(Module):
            def configure(self):
                self.i = 0

            def process(self, blob):
                self.i += 1

                if self.i == 5:
                    blob["Tab"] = Table({"a": 23}, h5loc="/tab")
                return blob

        pipe = Pipeline()
        pipe.attach(Dummy)
        pipe.attach(HDF5Sink, filename=fname)
        pipe.drain(10)

        class Observer(Module):
            def configure(self):
                self.i = 0

            def process(self, blob):
                self.i += 1

                if self.i == 5:
                    assert 23 == blob["Tab"].a[0]
                else:
                    assert "Tab" not in blob

                return blob

        pipe = Pipeline()
        pipe.attach(HDF5Pump, filename=fname)
        pipe.attach(Observer)
        pipe.drain()
Exemplo n.º 16
0
    def test_write_table_service(self):
        fobj = tempfile.NamedTemporaryFile(delete=True)

        class Foo(Module):
            def prepare(self):
                self.services["write_table"](Table({"a": 1}, name="A", h5loc="tab_a"))

        pipe = Pipeline()
        pipe.attach(Foo)
        pipe.attach(HDF5Sink, filename=fobj.name)
        pipe.drain(5)

        with tb.File(fobj.name, "r") as f:
            assert "/tab_a" in f

        fobj.close()
Exemplo n.º 17
0
    def test_event_info_has_correct_group_id(self):
        self.fname = data_path("hdf5/test_event_info.h5")

        class Printer(Module):
            def configure(self):
                self.index = 0

            def process(self, blob):
                assert blob["EventInfo"][0].group_id == self.index
                self.index += 1
                return blob

        p = Pipeline()
        p.attach(HDF5Pump, filename=self.fname)
        p.attach(Printer)
        p.drain()
Exemplo n.º 18
0
    def test_custom_random_state(self):
        assertAlmostEqual = self.assertAlmostEqual

        class Observer(Module):
            def configure(self):
                self.i = 0
                self.x = [0.221993171, 0.870732306, 0.206719155]

            def process(self, blob):
                assertAlmostEqual(self.x[self.i], np.random.rand())
                self.i += 1
                return blob

        pipe = Pipeline()
        pipe.attach(GlobalRandomState, seed=5)
        pipe.attach(Observer)
        pipe.drain(3)
Exemplo n.º 19
0
    def test_default_random_state(self):
        assertAlmostEqual = self.assertAlmostEqual

        class Observer(Module):
            def configure(self):
                self.i = 0
                self.x = [0.3745401188, 0.950714306, 0.7319939418]

            def process(self, blob):
                assertAlmostEqual(self.x[self.i], np.random.rand())
                self.i += 1
                return blob

        pipe = Pipeline()
        pipe.attach(GlobalRandomState)
        pipe.attach(Observer)
        pipe.drain(3)
Exemplo n.º 20
0
def main():
    from docopt import docopt
    args = docopt(__doc__)

    ligier_ip = args['-l']
    ligier_port = int(args['-p'])
    path = args['-o']
    prefix = args['-x']

    pipe = Pipeline()
    pipe.attach(CHPump,
                host=ligier_ip,
                port=ligier_port,
                tags='MSG',
                timeout=7 * 60 * 60 * 24,
                max_queue=500)
    pipe.attach(MSGDumper, prefix=prefix, path=path)
    pipe.drain()
Exemplo n.º 21
0
    def test_h5info(self):
        fobj = tempfile.NamedTemporaryFile(delete=True)
        fname = fobj.name

        class DummyPump(Module):
            def process(self, blob):
                return Blob()

        pipe = Pipeline()
        pipe.attach(DummyPump)
        pipe.attach(HDF5Sink, filename=fname)
        pipe.drain(5)

        with tb.open_file(fname, "r") as h5file:
            assert version == h5file.root._v_attrs.km3pipe.decode()
            assert tb.__version__ == h5file.root._v_attrs.pytables.decode()
            assert FORMAT_VERSION == h5file.root._v_attrs.format_version

        fobj.close()
Exemplo n.º 22
0
    def test_h5_singletons(self):
        fobj = tempfile.NamedTemporaryFile(delete=True)
        fname = fobj.name

        class DummyPump(Module):
            def process(self, blob):
                tab = Table({"a": 2}, h5loc="tab", h5singleton=True)
                return Blob({"tab": tab})

        pipe = Pipeline()
        pipe.attach(DummyPump)
        pipe.attach(HDF5Sink, filename=fname)
        pipe.drain(5)

        with tb.File(fname) as f:
            a = f.get_node("/tab")[:]["a"]

        assert len(a) == 1

        fobj.close()
Exemplo n.º 23
0
    def test_h5_consistency_for_tables_with_custom_group_id(self):
        fobj = tempfile.NamedTemporaryFile(delete=True)
        fname = fobj.name

        class DummyPump(Module):
            def process(self, blob):
                tab = Table({"group_id": 2}, h5loc="tab")
                return Blob({"tab": tab})

        pipe = Pipeline()
        pipe.attach(DummyPump)
        pipe.attach(HDF5Sink, filename=fname, reset_group_id=False)
        pipe.drain(5)

        with tb.File(fname) as f:
            group_id = f.get_node("/tab")[:]["group_id"]

        assert np.allclose([2, 2, 2, 2, 2], group_id)

        fobj.close()
Exemplo n.º 24
0
    def test_filtered_writing(self):
        fobjs = []
        for i in range(3):
            fobj = tempfile.NamedTemporaryFile(delete=True)
            fobjs.append(fobj)

        fobj_all = tempfile.NamedTemporaryFile(delete=True)

        class DummyPump(Module):
            def configure(self):
                self.i = 0

            def process(self, blob):
                blob["A"] = Table({"a": self.i}, name="A", h5loc="tab_a")
                blob["B"] = Table({"b": self.i}, name="B", h5loc="tab_b")
                blob["C"] = Table({"c": self.i}, name="C", h5loc="tab_c")
                self.i += 1
                return blob

        keys = "ABC"

        pipe = Pipeline()
        pipe.attach(DummyPump)
        for fobj, key in zip(fobjs, keys):
            pipe.attach(HDF5Sink, filename=fobj.name, keys=[key])
        pipe.attach(HDF5Sink, filename=fobj_all.name)
        pipe.drain(5)

        for fobj, key in zip(fobjs, keys):
            with tb.File(fobj.name, "r") as f:
                assert "/tab_" + key.lower() in f
                for _key in set(keys) - set(key):
                    assert "/tab_" + _key.lower() not in f

        for key in keys:
            with tb.File(fobj_all.name, "r") as f:
                assert "/tab_" + key.lower() in f

        for fobj in fobjs:
            fobj.close()
        fobj_all.close()
Exemplo n.º 25
0
    def test_skipped_blob_with_tables_and_ndarrays_first_and_last(self):
        fobj = tempfile.NamedTemporaryFile(delete=True)
        fname = fobj.name

        class DummyPump(Module):
            def configure(self):
                self.index = 0

            def process(self, blob):
                blob["Arr"] = NDArray(np.arange(self.index + 1), h5loc="/arr")
                blob["Tab"] = Table(
                    {"a": np.arange(self.index + 1), "i": self.index}, h5loc="/tab"
                )
                self.index += 1
                return blob

        pipe = Pipeline()
        pipe.attach(DummyPump)
        pipe.attach(Skipper, indices=[0, 4])
        pipe.attach(HDF5Sink, filename=fname)
        pipe.drain(5)

        with tb.File(fname) as f:
            tab_a = f.get_node("/tab")[:]["a"]
            tab_i = f.get_node("/tab")[:]["i"]
            group_id = f.get_node("/tab")[:]["group_id"]

            arr = f.get_node("/arr")[:]
            index_table = f.get_node("/arr_indices")[:]

            group_info = f.get_node("/group_info")[:]

        assert np.allclose([1, 1, 2, 2, 2, 3, 3, 3, 3], tab_i)
        assert np.allclose([0, 1, 0, 1, 2, 0, 1, 2, 3], tab_a)
        assert np.allclose([0, 0, 1, 1, 1, 2, 2, 2, 2], group_id)

        assert np.allclose([0, 1, 0, 1, 2, 0, 1, 2, 3], arr)
        assert np.allclose([0, 2, 5], index_table["index"])
        assert np.allclose([2, 3, 4], index_table["n_items"])

        fobj.close()
Exemplo n.º 26
0
 def test_pipe(self):
     p = Pipeline()
     p.attach(HDF5Pump, filename=self.fname)
     p.attach(HDF5Sink, h5file=self.out)
     p.drain()
Exemplo n.º 27
0
        plt.suptitle("FrameIndex {0}, TriggerCounter {1}\n{2} UTC".format(
            e_info.frame_index, e_info.trigger_counter,
            datetime.utcfromtimestamp(e_info.utc_seconds)),
                     fontsize=16)
        fig.text(0.5, 0.01, 'time [ns]', ha='center')
        fig.text(0.08, 0.5, 'z [m]', va='center', rotation='vertical')
        #        plt.tight_layout()

        filename = 'ztplot'
        f = os.path.join(PLOTS_PATH, filename + '.png')
        f_tmp = os.path.join(PLOTS_PATH, filename + '_tmp.png')
        plt.savefig(f_tmp, dpi=120, bbox_inches="tight")
        plt.close('all')
        shutil.move(f_tmp, f)

    def finish(self):
        self.run = False
        if self.thread is not None:
            self.thread.stop()


pipe = Pipeline()
pipe.attach(CHPump,
            host='127.0.0.1',
            tags='IO_EVT, IO_SUM',
            timeout=60 * 60 * 24 * 7,
            max_queue=2000)
pipe.attach(DAQProcessor)
pipe.attach(ZTPlot)
pipe.drain()
Exemplo n.º 28
0
    def test_shuffle_with_reset_index(self):
        fobj = tempfile.NamedTemporaryFile(delete=True)
        fname = fobj.name

        class DummyPump(Module):
            def configure(self):
                self.i = 0

            def process(self, blob):
                blob["Tab"] = Table({"a": self.i}, h5loc="/tab")
                blob["SplitTab"] = Table(
                    {"b": self.i}, h5loc="/split_tab", split_h5=True
                )
                blob["Arr"] = NDArray(np.arange(self.i + 1), h5loc="/arr")
                self.i += 1
                return blob

        pipe = Pipeline()
        pipe.attach(DummyPump)
        pipe.attach(HDF5Sink, filename=fname)
        pipe.drain(5)

        shuffled_group_ids = [2, 1, 0, 3, 4]

        def shuffle(x):
            for i in range(len(x)):
                x[i] = shuffled_group_ids[i]

        class Observer(Module):
            def configure(self):
                self.group_ids_tab = []
                self.group_ids_split_tab = []
                self.group_ids_arr = []
                self.a = []
                self.b = []
                self.arr_len = []

            def process(self, blob):
                group_id_tab = blob["Tab"].group_id[0]
                group_id_split_tab = blob["SplitTab"].group_id[0]
                group_id_arr = blob["Arr"].group_id
                assert blob["GroupInfo"].group_id[0] == group_id_tab
                assert blob["GroupInfo"].group_id[0] == group_id_split_tab
                assert blob["GroupInfo"].group_id[0] == group_id_arr
                self.group_ids_tab.append(blob["Tab"].group_id[0])
                self.group_ids_split_tab.append(blob["SplitTab"].group_id[0])
                self.group_ids_arr.append(blob["Arr"].group_id)
                self.a.append(blob["Tab"].a[0])
                self.b.append(blob["SplitTab"].b[0])
                self.arr_len.append(len(blob["Arr"]) - 1)
                return blob

            def finish(self):
                return {
                    "group_ids_tab": self.group_ids_tab,
                    "group_ids_split_tab": self.group_ids_split_tab,
                    "group_ids_arr": self.group_ids_arr,
                    "a": self.a,
                    "b": self.b,
                    "arr_len": self.arr_len,
                }

        pipe = Pipeline()
        pipe.attach(
            HDF5Pump,
            filename=fname,
            shuffle=True,
            shuffle_function=shuffle,
            reset_index=True,
        )
        pipe.attach(Observer)
        results = pipe.drain()

        self.assertListEqual(results["Observer"]["group_ids_tab"], [0, 1, 2, 3, 4])
        self.assertListEqual(
            results["Observer"]["group_ids_split_tab"], [0, 1, 2, 3, 4]
        )
        self.assertListEqual(results["Observer"]["group_ids_arr"], [0, 1, 2, 3, 4])
        self.assertListEqual(results["Observer"]["a"], shuffled_group_ids)
        self.assertListEqual(results["Observer"]["b"], shuffled_group_ids)
        # a small hack: we store the length of the array in 'b', which is
        # then equal to the shuffled group IDs (since those were generated
        # using the group_id
        self.assertListEqual(results["Observer"]["arr_len"], shuffled_group_ids)

        fobj.close()