def setUp(self) -> None: self.outfile = "temp_out.h5" shuffle2.h5shuffle2( self.infile.name, output_file=self.outfile, datasets=("x", ), seed=2, )
def setUp(self): self.temp_input = "temp_input.h5" self.temp_output = "temp_output.h5" self.x, self.y = _make_shuffle_dummy_file(self.temp_input) np.random.seed(42) shuffle2.h5shuffle2( input_file=self.temp_input, output_file=self.temp_output, datasets=("x", "y"), max_ram=400, # -> 2 batches )
def test_run_3_iterations(self): # just check if it goes through without errors fname = "temp_output_triple.h5" try: shuffle2.h5shuffle2( input_file=self.temp_input, output_file=fname, datasets=("x", "y"), iterations=3, ) finally: if os.path.exists(fname): os.remove(fname)
def setUp(self): self.infile = tempfile.NamedTemporaryFile() with h5py.File(self.infile, "w") as f: dset_x = f.create_dataset("x", data=np.arange(2000), chunks=(11, )) dset_x.attrs.create("indexed", 1) n_items = np.ones(100) * 20 self.index = np.concatenate([[0.], np.cumsum(n_items)[:-1]]) indices = np.array( list(zip(self.index, n_items)), dtype=[("index", "<i8"), ("n_items", "<i8")], ) f.create_dataset("x_indices", data=indices, chunks=(14, )) self.outfile = "temp_out.h5" shuffle2.h5shuffle2( self.infile.name, output_file=self.outfile, datasets=("x", ), seed=2, max_ram=10000, iterations=2, )