示例#1
0
 def setUp(self) -> None:
     self.outfile = "temp_out.h5"
     shuffle2.h5shuffle2(
         self.infile.name,
         output_file=self.outfile,
         datasets=("x", ),
         seed=2,
     )
示例#2
0
    def setUp(self):
        self.temp_input = "temp_input.h5"
        self.temp_output = "temp_output.h5"

        self.x, self.y = _make_shuffle_dummy_file(self.temp_input)
        np.random.seed(42)
        shuffle2.h5shuffle2(
            input_file=self.temp_input,
            output_file=self.temp_output,
            datasets=("x", "y"),
            max_ram=400,  # -> 2 batches
        )
示例#3
0
 def test_run_3_iterations(self):
     # just check if it goes through without errors
     fname = "temp_output_triple.h5"
     try:
         shuffle2.h5shuffle2(
             input_file=self.temp_input,
             output_file=fname,
             datasets=("x", "y"),
             iterations=3,
         )
     finally:
         if os.path.exists(fname):
             os.remove(fname)
示例#4
0
    def setUp(self):
        self.infile = tempfile.NamedTemporaryFile()
        with h5py.File(self.infile, "w") as f:
            dset_x = f.create_dataset("x", data=np.arange(2000), chunks=(11, ))
            dset_x.attrs.create("indexed", 1)
            n_items = np.ones(100) * 20
            self.index = np.concatenate([[0.], np.cumsum(n_items)[:-1]])
            indices = np.array(
                list(zip(self.index, n_items)),
                dtype=[("index", "<i8"), ("n_items", "<i8")],
            )
            f.create_dataset("x_indices", data=indices, chunks=(14, ))

        self.outfile = "temp_out.h5"
        shuffle2.h5shuffle2(
            self.infile.name,
            output_file=self.outfile,
            datasets=("x", ),
            seed=2,
            max_ram=10000,
            iterations=2,
        )