def test_writing_of_n_dim_arrays_with_defaults(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name arr = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) class DummyPump(Module): def process(self, blob): blob["foo"] = NDArray(arr) return blob pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(3) with tb.File(fname) as f: foo = f.get_node("/misc") assert 3 == foo[0, 1, 0] assert 4 == foo[0, 1, 1] assert "Unnamed NDArray" == foo.title indices = f.get_node("/misc_indices") self.assertTupleEqual((0, 2, 4), tuple(indices.cols.index[:])) self.assertTupleEqual((2, 2, 2), tuple(indices.cols.n_items[:])) fobj.close()
def test_filtered_writing_of_multiple_keys(self): fobj = tempfile.NamedTemporaryFile(delete=True) class DummyPump(Module): def configure(self): self.i = 0 def process(self, blob): blob["A"] = Table({"a": self.i}, name="A", h5loc="tab_a") blob["B"] = Table({"b": self.i}, name="B", h5loc="tab_b") blob["C"] = Table({"c": self.i}, name="C", h5loc="tab_c") self.i += 1 return blob keys = ["A", "B"] pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fobj.name, keys=keys) pipe.drain(5) with tb.File(fobj.name, "r") as f: assert "/tab_a" in f assert "/tab_b" in f assert "/tab_c" not in f fobj.close()
def test_h5_consistency_for_tables_without_group_id(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Pump): def configure(self): self.count = 0 def process(self, blob): self.count += 10 tab = Table({'a': self.count, 'b': 1}, h5loc='tab') return Blob({'tab': tab}) pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) with tb.File(fname) as f: a = f.get_node("/tab")[:]['a'] b = f.get_node("/tab")[:]['b'] group_id = f.get_node("/tab")[:]['group_id'] assert np.allclose([10, 20, 30, 40, 50], a) assert np.allclose([1, 1, 1, 1, 1], b) assert np.allclose([0, 1, 2, 3, 4], group_id) fobj.close()
def test_h5_consistency_for_tables_without_group_id_and_multiple_keys( self ): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Pump): def configure(self): self.count = 0 def process(self, blob): self.count += 10 tab1 = Table({'a': self.count, 'b': 1}, h5loc='tab1') tab2 = Table({'c': self.count + 1, 'd': 2}, h5loc='tab2') return Blob({'tab1': tab1, 'tab2': tab2}) pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) with tb.File(fname) as f: a = f.get_node("/tab1")[:]['a'] b = f.get_node("/tab1")[:]['b'] c = f.get_node("/tab2")[:]['c'] d = f.get_node("/tab2")[:]['d'] group_id_1 = f.get_node("/tab1")[:]['group_id'] group_id_2 = f.get_node("/tab1")[:]['group_id'] assert np.allclose([10, 20, 30, 40, 50], a) assert np.allclose([1, 1, 1, 1, 1], b) assert np.allclose([0, 1, 2, 3, 4], group_id_1) assert np.allclose([11, 21, 31, 41, 51], c) assert np.allclose([2, 2, 2, 2, 2], d) assert np.allclose([0, 1, 2, 3, 4], group_id_2) fobj.close()
def test_writing_of_n_dim_arrays(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name arr = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) class DummyPump(Pump): def configure(self): self.index = 0 def process(self, blob): blob['foo'] = NDArray( arr + self.index * 10, h5loc='/foo', title='Yep' ) self.index += 1 return blob pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(3) with tb.File(fname) as f: foo = f.get_node("/foo") assert 3 == foo[0, 1, 0] assert 4 == foo[0, 1, 1] assert "Yep" == foo.title fobj.close()
def test_writing_of_n_dim_arrays(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name arr = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) class DummyPump(Module): def configure(self): self.index = 0 def process(self, blob): blob["foo"] = NDArray(arr + self.index * 10, h5loc="/foo", title="Yep") self.index += 1 return blob pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(3) with tb.File(fname) as f: foo = f.get_node("/foo") assert 3 == foo[0, 1, 0] assert 4 == foo[0, 1, 1] assert "Yep" == foo.title fobj.close()
def test_skipped_blob_with_ndarray(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Module): def configure(self): self.index = 0 def process(self, blob): blob["Arr"] = NDArray(np.arange(self.index + 1), h5loc="/arr") self.index += 1 return blob pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(Skipper, indices=[2]) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) with tb.File(fname) as f: a = f.get_node("/arr")[:] index_table = f.get_node("/arr_indices")[:] assert np.allclose([0, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4], a) assert np.allclose([0, 1, 3, 7], index_table["index"]) assert np.allclose([1, 2, 4, 5], index_table["n_items"]) fobj.close()
def test_writing_of_n_dim_arrays_with_defaults(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name arr = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) class DummyPump(Pump): def process(self, blob): blob['foo'] = NDArray(arr) return blob pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(3) with tb.File(fname) as f: foo = f.get_node('/misc') assert 3 == foo[0, 1, 0] assert 4 == foo[0, 1, 1] assert 'Unnamed NDArray' == foo.title indices = f.get_node('/misc_indices') self.assertTupleEqual((0, 2, 4), tuple(indices.cols.index[:])) self.assertTupleEqual((2, 2, 2), tuple(indices.cols.n_items[:])) fobj.close()
def test_h5_consistency_for_tables_without_group_id(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Module): def configure(self): self.count = 0 def process(self, blob): self.count += 10 tab = Table({"a": self.count, "b": 1}, h5loc="tab") return Blob({"tab": tab}) pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) with tb.File(fname) as f: a = f.get_node("/tab")[:]["a"] b = f.get_node("/tab")[:]["b"] group_id = f.get_node("/tab")[:]["group_id"] assert np.allclose([10, 20, 30, 40, 50], a) assert np.allclose([1, 1, 1, 1, 1], b) assert np.allclose([0, 1, 2, 3, 4], group_id) fobj.close()
def test_h5_consistency_for_tables_without_group_id_and_multiple_keys(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Module): def configure(self): self.count = 0 def process(self, blob): self.count += 10 tab1 = Table({"a": self.count, "b": 1}, h5loc="tab1") tab2 = Table({"c": self.count + 1, "d": 2}, h5loc="tab2") return Blob({"tab1": tab1, "tab2": tab2}) pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) with tb.File(fname) as f: a = f.get_node("/tab1")[:]["a"] b = f.get_node("/tab1")[:]["b"] c = f.get_node("/tab2")[:]["c"] d = f.get_node("/tab2")[:]["d"] group_id_1 = f.get_node("/tab1")[:]["group_id"] group_id_2 = f.get_node("/tab1")[:]["group_id"] assert np.allclose([10, 20, 30, 40, 50], a) assert np.allclose([1, 1, 1, 1, 1], b) assert np.allclose([0, 1, 2, 3, 4], group_id_1) assert np.allclose([11, 21, 31, 41, 51], c) assert np.allclose([2, 2, 2, 2, 2], d) assert np.allclose([0, 1, 2, 3, 4], group_id_2) fobj.close()
def test_h5_singletons_reading(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Module): def process(self, blob): tab = Table({"a": 2}, h5loc="tab", h5singleton=True) return Blob({"Tab": tab}) pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) class Observer(Module): def process(self, blob): print(blob) assert "Tab" in blob print(blob["Tab"]) assert len(blob["Tab"]) == 1 assert blob["Tab"].a[0] == 2 return blob pipe = Pipeline() pipe.attach(HDF5Pump, filename=fname) pipe.attach(Observer) pipe.drain() fobj.close()
def test_h5_singletons_reading(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Pump): def process(self, blob): tab = Table({'a': 2}, h5loc='tab', h5singleton=True) return Blob({'Tab': tab}) pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) class Observer(Module): def process(self, blob): print(blob) assert 'Tab' in blob print(blob['Tab']) assert len(blob['Tab']) == 1 assert blob['Tab'].a[0] == 2 return blob pipe = Pipeline() pipe.attach(HDF5Pump, filename=fname) pipe.attach(Observer) pipe.drain() fobj.close()
def test_skipped_blob_with_tables(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Module): def configure(self): self.index = 0 def process(self, blob): blob["Tab"] = Table( {"a": np.arange(self.index + 1), "i": self.index}, h5loc="/tab" ) self.index += 1 return blob pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(Skipper, indices=[2]) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) with tb.File(fname) as f: a = f.get_node("/tab")[:]["a"] i = f.get_node("/tab")[:]["i"] group_id = f.get_node("/tab")[:]["group_id"] assert np.allclose([0, 1, 1, 3, 3, 3, 3, 4, 4, 4, 4, 4], i) assert np.allclose([0, 0, 1, 0, 1, 2, 3, 0, 1, 2, 3, 4], a) assert np.allclose([0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3], group_id) fobj.close()
def test_event_info_is_not_empty(self): self.fname = join(DATA_DIR, 'test_event_info.h5') class Printer(Module): def process(self, blob): assert blob['EventInfo'].size != 0 return blob p = Pipeline() p.attach(HDF5Pump, filename=self.fname) p.attach(Printer) p.drain()
def test_event_info_is_not_empty(self): self.fname = data_path("hdf5/test_event_info.h5") class Printer(Module): def process(self, blob): assert blob["EventInfo"].size != 0 return blob p = Pipeline() p.attach(HDF5Pump, filename=self.fname) p.attach(Printer) p.drain()
def test_hdf5_readout_split_tables_in_same_group(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Module): def configure(self): self.count = 0 def process(self, blob): self.count += 1 tab_a = Table( { "a": self.count * 10, }, h5loc="/tabs/tab_a", split_h5=True, ) tab_b = Table( { "b": self.count * 100, }, h5loc="/tabs/tab_b", split_h5=True, ) blob["TabA"] = tab_a blob["TabB"] = tab_b return blob pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) class BlobTester(Module): def configure(self): self.index = 0 def process(self, blob): self.index += 1 assert "GroupInfo" in blob assert "TabA" in blob assert "TabB" in blob assert self.index - 1 == blob["GroupInfo"].group_id assert self.index * 10 == blob["TabA"]["a"] assert self.index * 100 == blob["TabB"]["b"] return blob pipe = Pipeline() pipe.attach(HDF5Pump, filename=fname) pipe.attach(BlobTester) pipe.drain() fobj.close()
def test_hdf5_readout_split_tables_in_same_group(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Pump): def configure(self): self.count = 0 def process(self, blob): self.count += 1 tab_a = Table({ 'a': self.count * 10, }, h5loc='/tabs/tab_a', split_h5=True) tab_b = Table({ 'b': self.count * 100, }, h5loc='/tabs/tab_b', split_h5=True) blob['TabA'] = tab_a blob['TabB'] = tab_b return blob pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) class BlobTester(Module): def configure(self): self.index = 0 def process(self, blob): self.index += 1 assert 'GroupInfo' in blob assert 'TabA' in blob assert 'TabB' in blob assert self.index - 1 == blob['GroupInfo'].group_id assert self.index * 10 == blob['TabA']['a'] assert self.index * 100 == blob['TabB']['b'] return blob pipe = Pipeline() pipe.attach(HDF5Pump, filename=fname) pipe.attach(BlobTester) pipe.drain() fobj.close()
def test_write_table_service(self): fobj = tempfile.NamedTemporaryFile(delete=True) class Foo(Module): def prepare(self): self.services["write_table"](Table({"a": 1}, name="A", h5loc="tab_a")) pipe = Pipeline() pipe.attach(Foo) pipe.attach(HDF5Sink, filename=fobj.name) pipe.drain(5) with tb.File(fobj.name, "r") as f: assert "/tab_a" in f fobj.close()
def test_multiple_files_readout_using_the_pipeline(self): class Observer(Module): def configure(self): self._blob_lengths = [] self._a = [] self._group_ids = [] self.index = 0 def process(self, blob): self._blob_lengths.append(len(blob)) self._a.append(blob['Tab'].a[0]) self._group_ids.append(blob['GroupInfo'].group_id[0]) print(blob) self.index += 1 return blob def finish(self): return { 'blob_lengths': self._blob_lengths, 'a': self._a, 'group_ids': self._group_ids } pipe = Pipeline() pipe.attach(HDF5Pump, filenames=self.filenames) pipe.attach(Observer) results = pipe.drain() summary = results['Observer'] assert 12 == len(summary['blob_lengths']) print(summary) assert all(x == 2 for x in summary['blob_lengths']) self.assertListEqual([0, 1, 2, 1, 2, 3, 4, 2, 3, 4, 5, 6], summary['a']) self.assertListEqual([0, 1, 2, 0, 1, 2, 3, 0, 1, 2, 3, 4], summary['group_ids'])
def test_event_info_has_correct_group_id(self): self.fname = data_path("hdf5/test_event_info.h5") class Printer(Module): def configure(self): self.index = 0 def process(self, blob): assert blob["EventInfo"][0].group_id == self.index self.index += 1 return blob p = Pipeline() p.attach(HDF5Pump, filename=self.fname) p.attach(Printer) p.drain()
def test_pipe(self): class Observer(Module): def configure(self): self.dump = defaultdict(list) def process(self, blob): for key, data in blob.items(): if key == "Header": self.dump["headers"].append(data) else: self.dump[key].append(len(data)) return blob def finish(self): return self.dump p = Pipeline() p.attach(HDF5Pump, filename=self.fname) p.attach(Observer) results = p.drain()["Observer"] self.assertListEqual( [147, 110, 70, 62, 59, 199, 130, 92, 296, 128], results["Hits"] ) self.assertListEqual( [315, 164, 100, 111, 123, 527, 359, 117, 984, 263], results["McHits"] ) self.assertListEqual([1, 1, 1, 1, 1, 3, 2, 1, 2, 1], results["McTracks"])
def test_event_info_has_correct_group_id(self): self.fname = join(DATA_DIR, 'test_event_info.h5') class Printer(Module): def configure(self): self.index = 0 def process(self, blob): assert blob['EventInfo'][0].group_id == self.index self.index += 1 return blob p = Pipeline() p.attach(HDF5Pump, filename=self.fname) p.attach(Printer) p.drain()
def test_default_random_state(self): assertAlmostEqual = self.assertAlmostEqual class Observer(Module): def configure(self): self.i = 0 self.x = [0.3745401188, 0.950714306, 0.7319939418] def process(self, blob): assertAlmostEqual(self.x[self.i], np.random.rand()) self.i += 1 return blob pipe = Pipeline() pipe.attach(GlobalRandomState) pipe.attach(Observer) pipe.drain(3)
def test_default_random_state(self): assertAlmostEqual = self.assertAlmostEqual class Observer(Module): def configure(self): self.i = 0 self.x = [0.3745401188, 0.950714306, 0.7319939418] def process(self, blob): assertAlmostEqual(self.x[self.i], np.random.rand()) self.i += 1 return blob pipe = Pipeline() pipe.attach(GlobalRandomState) pipe.attach(Observer) pipe.drain(3)
def test_custom_random_state(self): assertAlmostEqual = self.assertAlmostEqual class Observer(Module): def configure(self): self.i = 0 self.x = [0.221993171, 0.870732306, 0.206719155] def process(self, blob): assertAlmostEqual(self.x[self.i], np.random.rand()) self.i += 1 return blob pipe = Pipeline() pipe.attach(GlobalRandomState, seed=5) pipe.attach(Observer) pipe.drain(3)
def test_custom_random_state(self): assertAlmostEqual = self.assertAlmostEqual class Observer(Module): def configure(self): self.i = 0 self.x = [0.221993171, 0.870732306, 0.206719155] def process(self, blob): assertAlmostEqual(self.x[self.i], np.random.rand()) self.i += 1 return blob pipe = Pipeline() pipe.attach(GlobalRandomState, seed=5) pipe.attach(Observer) pipe.drain(3)
def main(): from docopt import docopt args = docopt(__doc__) ligier_ip = args['-l'] ligier_port = int(args['-p']) path = args['-o'] prefix = args['-x'] pipe = Pipeline() pipe.attach(CHPump, host=ligier_ip, port=ligier_port, tags='MSG', timeout=7 * 60 * 60 * 24, max_queue=500) pipe.attach(MSGDumper, prefix=prefix, path=path) pipe.drain()
def main(): from docopt import docopt args = docopt(__doc__) ligier_ip = args['-l'] ligier_port = int(args['-p']) filename = args['-f'] pipe = Pipeline() pipe.attach( CHPump, host=ligier_ip, port=ligier_port, tags='MSG', timeout=7 * 60 * 60 * 24, max_queue=500) pipe.attach(MSGDumper, filename=filename) pipe.drain()
def test_hdf5_readout(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Module): def configure(self): self.count = 0 def process(self, blob): self.count += 1 tab = Table({"a": self.count * 10, "b": 1}, h5loc="tab") tab2 = Table({"a": np.arange(self.count)}, h5loc="tab2") blob["Tab"] = tab blob["Tab2"] = tab2 return blob pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) class BlobTester(Module): def configure(self): self.index = 0 def process(self, blob): self.index += 1 assert "GroupInfo" in blob assert "Tab" in blob print(self.index) print(blob["Tab"]) print(blob["Tab"]["a"]) assert self.index - 1 == blob["GroupInfo"].group_id assert self.index * 10 == blob["Tab"]["a"] assert 1 == blob["Tab"]["b"] == 1 assert np.allclose(np.arange(self.index), blob["Tab2"]["a"]) return blob pipe = Pipeline() pipe.attach(HDF5Pump, filename=fname) pipe.attach(BlobTester) pipe.drain() fobj.close()
def test_hdf5_readout(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Pump): def configure(self): self.count = 0 def process(self, blob): self.count += 1 tab = Table({'a': self.count * 10, 'b': 1}, h5loc='tab') tab2 = Table({'a': np.arange(self.count)}, h5loc='tab2') blob['Tab'] = tab blob['Tab2'] = tab2 return blob pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) class BlobTester(Module): def configure(self): self.index = 0 def process(self, blob): self.index += 1 assert 'GroupInfo' in blob assert 'Tab' in blob print(self.index) print(blob['Tab']) print(blob['Tab']['a']) assert self.index - 1 == blob['GroupInfo'].group_id assert self.index * 10 == blob['Tab']['a'] assert 1 == blob['Tab']['b'] == 1 assert np.allclose(np.arange(self.index), blob['Tab2']['a']) return blob pipe = Pipeline() pipe.attach(HDF5Pump, filename=fname) pipe.attach(BlobTester) pipe.drain() fobj.close()
def test_h5info(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Module): def process(self, blob): return Blob() pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) with tb.open_file(fname, "r") as h5file: assert version == h5file.root._v_attrs.km3pipe.decode() assert tb.__version__ == h5file.root._v_attrs.pytables.decode() assert FORMAT_VERSION == h5file.root._v_attrs.format_version fobj.close()
def test_reading_of_n_dim_arrays(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name arr = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) class DummyPump(Pump): def configure(self): self.index = 0 def process(self, blob): blob['Foo'] = NDArray( arr + self.index * 10, h5loc='/foo', title='Yep' ) self.index += 1 return blob pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(3) class Observer(Module): def configure(self): self.index = 0 def process(self, blob): assert 'Foo' in blob foo = blob['Foo'] print(self.index) assert self.index * 10 + 1 == foo[0, 0, 0] assert self.index * 10 + 8 == foo[1, 1, 1] assert self.index * 10 + 3 == foo[0, 1, 0] assert self.index * 10 + 6 == foo[1, 0, 1] self.index += 1 return blob pipe = Pipeline() pipe.attach(HDF5Pump, filename=fname) pipe.attach(Observer) pipe.drain() fobj.close()
def main(): from docopt import docopt args = docopt(__doc__) ligier_ip = args['-l'] ligier_port = int(args['-p']) logio_ip = args['-x'] logio_port = int(args['-q']) pipe = Pipeline() pipe.attach( CHPump, host=ligier_ip, port=ligier_port, tags='MSG', timeout=7 * 60 * 60 * 24, max_queue=500) pipe.attach(LogIO, logio_ip=logio_ip, logio_port=logio_port) pipe.drain()
def test_h5info(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Pump): def process(self, blob): return Blob() pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) with tb.open_file(fname, 'r') as h5file: assert version == h5file.root._v_attrs.km3pipe.decode() assert tb.__version__ == h5file.root._v_attrs.pytables.decode() assert FORMAT_VERSION == h5file.root._v_attrs.format_version fobj.close()
def test_h5_consistency_for_tables_with_custom_group_id(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Module): def process(self, blob): tab = Table({"group_id": 2}, h5loc="tab") return Blob({"tab": tab}) pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname, reset_group_id=False) pipe.drain(5) with tb.File(fname) as f: group_id = f.get_node("/tab")[:]["group_id"] assert np.allclose([2, 2, 2, 2, 2], group_id) fobj.close()
def test_h5_singletons(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Pump): def process(self, blob): tab = Table({'a': 2}, h5loc='tab', h5singleton=True) return Blob({'tab': tab}) pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) with tb.File(fname) as f: a = f.get_node("/tab")[:]['a'] assert len(a) == 1 fobj.close()
def test_h5_consistency_for_tables_with_custom_group_id(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Pump): def process(self, blob): tab = Table({'group_id': 2}, h5loc='tab') return Blob({'tab': tab}) pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) with tb.File(fname) as f: group_id = f.get_node("/tab")[:]['group_id'] assert np.allclose([2, 2, 2, 2, 2], group_id) fobj.close()
def test_h5_singletons(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Module): def process(self, blob): tab = Table({"a": 2}, h5loc="tab", h5singleton=True) return Blob({"tab": tab}) pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) with tb.File(fname) as f: a = f.get_node("/tab")[:]["a"] assert len(a) == 1 fobj.close()
def test_filtered_writing(self): fobjs = [] for i in range(3): fobj = tempfile.NamedTemporaryFile(delete=True) fobjs.append(fobj) fobj_all = tempfile.NamedTemporaryFile(delete=True) class DummyPump(Module): def configure(self): self.i = 0 def process(self, blob): blob["A"] = Table({"a": self.i}, name="A", h5loc="tab_a") blob["B"] = Table({"b": self.i}, name="B", h5loc="tab_b") blob["C"] = Table({"c": self.i}, name="C", h5loc="tab_c") self.i += 1 return blob keys = "ABC" pipe = Pipeline() pipe.attach(DummyPump) for fobj, key in zip(fobjs, keys): pipe.attach(HDF5Sink, filename=fobj.name, keys=[key]) pipe.attach(HDF5Sink, filename=fobj_all.name) pipe.drain(5) for fobj, key in zip(fobjs, keys): with tb.File(fobj.name, "r") as f: assert "/tab_" + key.lower() in f for _key in set(keys) - set(key): assert "/tab_" + _key.lower() not in f for key in keys: with tb.File(fobj_all.name, "r") as f: assert "/tab_" + key.lower() in f for fobj in fobjs: fobj.close() fobj_all.close()
def test_reading_of_n_dim_arrays(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name arr = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) class DummyPump(Module): def configure(self): self.index = 0 def process(self, blob): blob["Foo"] = NDArray(arr + self.index * 10, h5loc="/foo", title="Yep") self.index += 1 return blob pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(3) class Observer(Module): def configure(self): self.index = 0 def process(self, blob): assert "Foo" in blob foo = blob["Foo"] print(self.index) assert self.index * 10 + 1 == foo[0, 0, 0] assert self.index * 10 + 8 == foo[1, 1, 1] assert self.index * 10 + 3 == foo[0, 1, 0] assert self.index * 10 + 6 == foo[1, 0, 1] self.index += 1 return blob pipe = Pipeline() pipe.attach(HDF5Pump, filename=fname) pipe.attach(Observer) pipe.drain() fobj.close()
def test_skipped_blob_with_tables_and_ndarrays_first_and_last(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Module): def configure(self): self.index = 0 def process(self, blob): blob["Arr"] = NDArray(np.arange(self.index + 1), h5loc="/arr") blob["Tab"] = Table( {"a": np.arange(self.index + 1), "i": self.index}, h5loc="/tab" ) self.index += 1 return blob pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(Skipper, indices=[0, 4]) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) with tb.File(fname) as f: tab_a = f.get_node("/tab")[:]["a"] tab_i = f.get_node("/tab")[:]["i"] group_id = f.get_node("/tab")[:]["group_id"] arr = f.get_node("/arr")[:] index_table = f.get_node("/arr_indices")[:] group_info = f.get_node("/group_info")[:] assert np.allclose([1, 1, 2, 2, 2, 3, 3, 3, 3], tab_i) assert np.allclose([0, 1, 0, 1, 2, 0, 1, 2, 3], tab_a) assert np.allclose([0, 0, 1, 1, 1, 2, 2, 2, 2], group_id) assert np.allclose([0, 1, 0, 1, 2, 0, 1, 2, 3], arr) assert np.allclose([0, 2, 5], index_table["index"]) assert np.allclose([2, 3, 4], index_table["n_items"]) fobj.close()
def setUp(self): class DummyPump(Pump): def configure(self): self.i = self.require('i') def process(self, blob): blob['Tab'] = Table({'a': self.i}, h5loc='tab') self.i += 1 return blob self.filenames = [] self.fobjs = [] for i in range(3): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name self.filenames.append(fname) self.fobjs.append(fobj) pipe = Pipeline() pipe.attach(DummyPump, i=i) pipe.attach(HDF5Sink, filename=fname) pipe.drain(i + 3)
def test_sparse_ndarray(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class Dummy(Module): def configure(self): self.i = 0 def process(self, blob): self.i += 1 if self.i == 5: blob["Arr"] = NDArray([1, 2, 3], h5loc="/arr") return blob pipe = Pipeline() pipe.attach(Dummy) pipe.attach(HDF5Sink, filename=fname) pipe.drain(10) class Observer(Module): def configure(self): self.i = 0 def process(self, blob): self.i += 1 print(blob) if self.i == 5: assert 6 == np.sum(blob["Arr"]) else: assert len(blob["Arr"]) == 0 return blob pipe = Pipeline() pipe.attach(HDF5Pump, filename=fname) pipe.attach(Observer) pipe.drain()
def test_sparse_table(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class Dummy(Module): def configure(self): self.i = 0 def process(self, blob): self.i += 1 if self.i == 5: blob["Tab"] = Table({"a": 23}, h5loc="/tab") return blob pipe = Pipeline() pipe.attach(Dummy) pipe.attach(HDF5Sink, filename=fname) pipe.drain(10) class Observer(Module): def configure(self): self.i = 0 def process(self, blob): self.i += 1 if self.i == 5: assert 23 == blob["Tab"].a[0] else: assert "Tab" not in blob return blob pipe = Pipeline() pipe.attach(HDF5Pump, filename=fname) pipe.attach(Observer) pipe.drain()
def test_sparse_table(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class Dummy(Module): def configure(self): self.i = 0 def process(self, blob): self.i += 1 if self.i == 5: blob['Tab'] = Table({'a': 23}, h5loc='/tab') return blob pipe = Pipeline() pipe.attach(Dummy) pipe.attach(HDF5Sink, filename=fname) pipe.drain(10) class Observer(Module): def configure(self): self.i = 0 def process(self, blob): self.i += 1 if self.i == 5: assert 23 == blob['Tab'].a[0] else: assert 'Tab' not in blob return blob pipe = Pipeline() pipe.attach(HDF5Pump, filename=fname) pipe.attach(Observer) pipe.drain()
def test_shuffle_with_reset_index(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Pump): def configure(self): self.i = 0 def process(self, blob): blob['Tab'] = Table({'a': self.i}, h5loc='/tab') blob['SplitTab'] = Table({'b': self.i}, h5loc='/split_tab', split_h5=True) blob['Arr'] = NDArray(np.arange(self.i + 1), h5loc='/arr') self.i += 1 return blob pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) shuffled_group_ids = [2, 1, 0, 3, 4] def shuffle(x): for i in range(len(x)): x[i] = shuffled_group_ids[i] class Observer(Module): def configure(self): self.group_ids_tab = [] self.group_ids_split_tab = [] self.group_ids_arr = [] self.a = [] self.b = [] self.arr_len = [] def process(self, blob): group_id_tab = blob['Tab'].group_id[0] group_id_split_tab = blob['SplitTab'].group_id[0] group_id_arr = blob['Arr'].group_id assert blob['GroupInfo'].group_id[0] == group_id_tab assert blob['GroupInfo'].group_id[0] == group_id_split_tab assert blob['GroupInfo'].group_id[0] == group_id_arr self.group_ids_tab.append(blob['Tab'].group_id[0]) self.group_ids_split_tab.append(blob['SplitTab'].group_id[0]) self.group_ids_arr.append(blob['Arr'].group_id) self.a.append(blob['Tab'].a[0]) self.b.append(blob['SplitTab'].b[0]) self.arr_len.append(len(blob['Arr']) - 1) return blob def finish(self): return { 'group_ids_tab': self.group_ids_tab, 'group_ids_split_tab': self.group_ids_split_tab, 'group_ids_arr': self.group_ids_arr, 'a': self.a, 'b': self.b, 'arr_len': self.arr_len } pipe = Pipeline() pipe.attach( HDF5Pump, filename=fname, shuffle=True, shuffle_function=shuffle, reset_index=True ) pipe.attach(Observer) results = pipe.drain() self.assertListEqual( results['Observer']['group_ids_tab'], [0, 1, 2, 3, 4] ) self.assertListEqual( results['Observer']['group_ids_split_tab'], [0, 1, 2, 3, 4] ) self.assertListEqual( results['Observer']['group_ids_arr'], [0, 1, 2, 3, 4] ) self.assertListEqual(results['Observer']['a'], shuffled_group_ids) self.assertListEqual(results['Observer']['b'], shuffled_group_ids) # a small hack: we store the length of the array in 'b', which is # then equal to the shuffled group IDs (since those were generated # using the group_id self.assertListEqual( results['Observer']['arr_len'], shuffled_group_ids ) fobj.close()
plt.suptitle("FrameIndex {0}, TriggerCounter {1}\n{2} UTC".format( e_info.frame_index, e_info.trigger_counter, datetime.utcfromtimestamp(e_info.utc_seconds)), fontsize=16) fig.text(0.5, 0.01, 'time [ns]', ha='center') fig.text(0.08, 0.5, 'z [m]', va='center', rotation='vertical') # plt.tight_layout() filename = 'ztplot' f = os.path.join(PLOTS_PATH, filename + '.png') f_tmp = os.path.join(PLOTS_PATH, filename + '_tmp.png') plt.savefig(f_tmp, dpi=120, bbox_inches="tight") plt.close('all') shutil.move(f_tmp, f) def finish(self): self.run = False if self.thread is not None: self.thread.stop() pipe = Pipeline() pipe.attach(CHPump, host='127.0.0.1', tags='IO_EVT, IO_SUM', timeout=60 * 60 * 24 * 7, max_queue=2000) pipe.attach(DAQProcessor) pipe.attach(ZTPlot) pipe.drain()
class TOTHisto(Module): def configure(self): self.tots = [] def process(self, blob): for pmt_data in blob['PMTData']: self.tots.append(pmt_data.tot) return blob def finish(self): plt.hist(self.tots, 80) plt.xlabel("ToT [ns]") plt.ylabel('count') plt.show() class PrintCLBHeader(Module): def process(self, blob): print(blob['CLBHeader']) return blob pipeline = Pipeline() pipeline.attach( CLBPump, filename='/Users/tamasgal/Data/KM3NeT/du1-clb/DOM2_run23.dat' ) pipeline.attach(StatusBar) pipeline.attach(PrintCLBHeader) pipeline.attach(TOTHisto) pipeline.drain(30)
"""Open a file with real data and estimate the DOM rates. """ from km3pipe import Module, Pipeline from km3pipe.io.aanet import AanetPump class RateEstimator(Module): def process(self, blob): hits = blob["Hits"] doms = {h.dom_id for h in hits} hit_times = [h.t for h in hits] event_length = max(hit_times) - min(hit_times) n_doms = len(doms) n_hits = len(hits) print("Active DOMs: {0}".format(doms)) print("Number of active DOMs: {0}".format(n_doms)) print("Event length: {0} ns".format(event_length)) print("Number of hits: {0}".format(n_hits)) rate_per_dom = 1e6 / (event_length / (n_hits / n_doms)) print("Estimated rate per DOM: {0} kHz".format(rate_per_dom)) pipe = Pipeline() pipe.attach(AanetPump, filename='KM3NeT_00000007_00001000.root') pipe.attach(RateEstimator) pipe.drain(10)
# create an in-memory-ony file, and pass it as the ``h5file`` arg to the # hdf5sink. to actually write out a file, just specify an outfile name # (commented out here). OUTFILE = tb.open_file( # create the file in memory only OUT_FNAME, 'w', driver="H5FD_CORE", driver_core_backing_store=0, ) ##################################################### # Setting up the pipeline # ----------------------- pipe = Pipeline(timeit=True) pipe.attach(EvtPump, filename=IN_FNAME) pipe.attach(StatusBar, every=25) pipe.attach( HDF5Sink, # filename=OUT_FNAME, h5file=OUTFILE, ) ##################################################### # Draining the pipeline # --------------------- pipe.drain(N_EVENTS)
def test_pipe(self): p = Pipeline() p.attach(HDF5Pump, filename=self.fname) p.attach(HDF5Sink, h5file=self.out) p.drain()
#!/usr/bin/env python """ Usage: kp_jfit_readout.py FILENAME """ from docopt import docopt from km3pipe.io.hdf5 import HDF5Sink from km3pipe.io.jpp import FitPump from km3pipe import Pipeline def print_fits(blob): fits = blob['JFit'] print(fits[:10]) if __name__ == '__main__': args = docopt(__doc__) fname = args['FILENAME'] pipe = Pipeline() pipe.attach(FitPump, filename=fname) pipe.attach(print_fits) pipe.attach(HDF5Sink, filename=fname + '.h5') pipe.drain(1)
__author__ = 'tamasgal' from km3pipe import Module, Pipeline from km3pipe.pumps import EvtPump class PrintBlob(Module): def process(self, blob): print(blob.keys()) return blob pipeline = Pipeline() pipeline.attach(EvtPump, 'evtpump', filename='files/example_numuNC.evt') pipeline.attach(PrintBlob, 'printer') pipeline.drain(1)
def test_shuffle_with_reset_index(self): fobj = tempfile.NamedTemporaryFile(delete=True) fname = fobj.name class DummyPump(Module): def configure(self): self.i = 0 def process(self, blob): blob["Tab"] = Table({"a": self.i}, h5loc="/tab") blob["SplitTab"] = Table( {"b": self.i}, h5loc="/split_tab", split_h5=True ) blob["Arr"] = NDArray(np.arange(self.i + 1), h5loc="/arr") self.i += 1 return blob pipe = Pipeline() pipe.attach(DummyPump) pipe.attach(HDF5Sink, filename=fname) pipe.drain(5) shuffled_group_ids = [2, 1, 0, 3, 4] def shuffle(x): for i in range(len(x)): x[i] = shuffled_group_ids[i] class Observer(Module): def configure(self): self.group_ids_tab = [] self.group_ids_split_tab = [] self.group_ids_arr = [] self.a = [] self.b = [] self.arr_len = [] def process(self, blob): group_id_tab = blob["Tab"].group_id[0] group_id_split_tab = blob["SplitTab"].group_id[0] group_id_arr = blob["Arr"].group_id assert blob["GroupInfo"].group_id[0] == group_id_tab assert blob["GroupInfo"].group_id[0] == group_id_split_tab assert blob["GroupInfo"].group_id[0] == group_id_arr self.group_ids_tab.append(blob["Tab"].group_id[0]) self.group_ids_split_tab.append(blob["SplitTab"].group_id[0]) self.group_ids_arr.append(blob["Arr"].group_id) self.a.append(blob["Tab"].a[0]) self.b.append(blob["SplitTab"].b[0]) self.arr_len.append(len(blob["Arr"]) - 1) return blob def finish(self): return { "group_ids_tab": self.group_ids_tab, "group_ids_split_tab": self.group_ids_split_tab, "group_ids_arr": self.group_ids_arr, "a": self.a, "b": self.b, "arr_len": self.arr_len, } pipe = Pipeline() pipe.attach( HDF5Pump, filename=fname, shuffle=True, shuffle_function=shuffle, reset_index=True, ) pipe.attach(Observer) results = pipe.drain() self.assertListEqual(results["Observer"]["group_ids_tab"], [0, 1, 2, 3, 4]) self.assertListEqual( results["Observer"]["group_ids_split_tab"], [0, 1, 2, 3, 4] ) self.assertListEqual(results["Observer"]["group_ids_arr"], [0, 1, 2, 3, 4]) self.assertListEqual(results["Observer"]["a"], shuffled_group_ids) self.assertListEqual(results["Observer"]["b"], shuffled_group_ids) # a small hack: we store the length of the array in 'b', which is # then equal to the shuffled group IDs (since those were generated # using the group_id self.assertListEqual(results["Observer"]["arr_len"], shuffled_group_ids) fobj.close()
#!/usr/bin/env python from km3pipe import Module, Pipeline from km3pipe.io import HDF5Pump class Printer(Module): def process(self, blob): print(blob['HitTable']['dom_id']) return blob FILENAME = '/Users/tamasgal/Data/KM3NeT/DU-2/KM3NeT_00000007_00001597.root.h5' pipe = Pipeline() pipe.attach(HDF5Pump, filename=FILENAME) pipe.attach(Printer) pipe.drain(5)
.format(len(self.hits), mean_hits)) class MeanRates(Module): def __init__(self, **context): super(self.__class__, self).__init__(**context) self.rates = {} def process(self, blob): try: summaryslice = blob['DAQSummaryslice'] print(summaryslice.summary_frames) except KeyError: pass return blob def finish(self): pass pipeline = Pipeline() pipeline.attach(DAQPump, 'daq_pump', filename='/Users/tamasgal/Desktop/RUN-PPM_DU-00430-20140730-121124_detx.dat') #pipeline.attach(DAQEventPrinter, 'moo') #pipeline.attach(DAQSummaryslicePrinter, 'summaryslice_printer') #pipeline.attach(MeanRates, 'mean_rates') pipeline.attach(MeanHits, 'mean_hits') pipeline.drain()
# plt.tight_layout() filename = 'ztplot' f = os.path.join(PLOTS_PATH, filename + '.png') f_tmp = os.path.join(PLOTS_PATH, filename + '_tmp.png') plt.savefig(f_tmp, dpi=120, bbox_inches="tight") plt.close('all') shutil.move(f_tmp, f) def finish(self): self.run = False if self.thread is not None: self.thread.stop() pipe = Pipeline() pipe.attach( CHPump, host='192.168.0.110', port=5553, tags='IO_EVT, IO_SUM', timeout=60 * 60 * 24 * 7, max_queue=2000 ) pipe.attach(DAQProcessor) pipe.attach(DOMActivityPlotter) pipe.attach(TriggerRate) pipe.attach(DOMHits) pipe.attach(ZTPlot) pipe.drain()
def test_pipe(self): p = Pipeline() p.attach(HDF5Pump, filename=self.fname) p.attach(HDF5Sink, h5file=self.out) p.drain()