def check_numpy_matrix(self, dtype): # Makes a random numpy array of the given type, converts it to # a matrix, writes it and reads it back, and then compares it. shape = random_numpy_shape(2, max_array_axis_length) data = np.matrix(random_numpy(shape, dtype)) out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def check_numpy_array(self, dtype, dimensions): # Makes a random numpy array of the given type, writes it and # reads it back, and then compares it. shape = random_numpy_shape(dimensions, max_array_axis_length) data = random_numpy(shape, dtype) out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def check_all_valid_str_keys(tp, option_keywords): options = hdf5storage.Options(**option_keywords) key_value_names = (options.dict_like_keys_name, options.dict_like_values_name) data = random_dict(tp) for k in key_value_names: if k in data: del data[k] # Make a random name. name = random_name() # Write the data to the proper file with the given name with the # provided options. The file needs to be deleted after to keep junk # from building up. fld = None try: fld = tempfile.mkstemp() os.close(fld[0]) filename = fld[1] hdf5storage.write(data, path=name, filename=filename, options=options) with h5py.File(filename, mode='r') as f: for k in key_value_names: assert escape_path(k) not in f[name] for k in data: assert escape_path(k) in f[name] except: raise finally: if fld is not None: os.remove(fld[1])
def check_numpy_chararray_empty(self, num_chars): # Makes an empty numpy array of bytes of the given number of # characters, converts it to a chararray, writes it and reads it # back, and then compares it. data = np.array([], 'S' + str(num_chars)).view(np.chararray).copy() out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def check_dict_like_key_leading_periods(self, tp): data = random_dict(tp) prefix = '.' * random.randint(1, 10) key = prefix + random_str_ascii(max_dict_key_length) data[key] = random_int() out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def test_multi_write(): # Makes a random dict of random paths and variables (random number # of randomized paths with random numpy arrays as values). data = dict() for i in range(0, random.randint(min_dict_keys, \ max_dict_keys)): name = random_name() data[name] = \ random_numpy(random_numpy_shape( \ dict_value_subarray_dimensions, \ max_dict_value_subarray_axis_length), \ dtype=random.choice(dtypes)) # Write it and then read it back item by item. fld = None try: fld = tempfile.mkstemp() os.close(fld[0]) filename = fld[1] hdf5storage.writes(mdict=data, filename=filename) out = dict() for p in data: out[p] = hdf5storage.read(path=p, filename=filename) except: raise finally: if fld is not None: os.remove(fld[1]) # Compare data and out. assert_equal(out, data)
def check_numpy_chararray(self, dimensions): # Makes a random numpy array of bytes, converts it to a # chararray, writes it and reads it back, and then compares it. shape = random_numpy_shape(dimensions, max_array_axis_length) data = random_numpy(shape, 'S').view(np.chararray).copy() out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def test_multi_read(): # Makes a random dict of random paths and variables (random number # of randomized paths with random numpy arrays as values). data = dict() for i in range(0, random.randint(min_dict_keys, \ max_dict_keys)): name = random_name() data[name] = \ random_numpy(random_numpy_shape( \ dict_value_subarray_dimensions, \ max_dict_value_subarray_axis_length), \ dtype=random.choice(dtypes)) paths = data.keys() # Write it item by item and then read it back in one unit. fld = None try: fld = tempfile.mkstemp() os.close(fld[0]) filename = fld[1] for p in paths: hdf5storage.write(data=data[p], path=p, filename=filename) out = hdf5storage.reads(paths=list(data.keys()), filename=filename) except: raise finally: if fld is not None: os.remove(fld[1]) # Compare data and out. for i, p in enumerate(paths): assert_equal(out[i], data[p])
def check_write_filters(filters): # Read out the filter arguments. filts = {'compression': 'gzip', 'shuffle': True, 'fletcher32': True, 'gzip_level': 7} for k, v in filters.items(): filts[k] = v # Make some random data. The dtype must be restricted so that it can # be read back reliably. dims = random.randint(1, 4) dts = tuple(set(dtypes) - set(['U', 'S', 'bool', 'complex64', \ 'complex128'])) data = random_numpy(shape=random_numpy_shape(dims, max_array_axis_length), dtype=random.choice(dts)) # Make a random name. name = random_name() # Write the data to the proper file with the given name with the # provided filters and read it backt. The file needs to be deleted # after to keep junk from building up. fld = None try: fld = tempfile.mkstemp() os.close(fld[0]) filename = fld[1] hdf5storage.write(data, path=name, filename=filename, \ store_python_metadata=False, matlab_compatible=False, \ compress=True, compress_size_threshold=0, \ compression_algorithm=filts['compression'], \ gzip_compression_level=filts['gzip_level'], \ shuffle_filter=filts['shuffle'], \ compressed_fletcher32_filter=filts['fletcher32']) with h5py.File(filename, mode='r') as f: d = f[name] fletcher32 = d.fletcher32 shuffle = d.shuffle compression = d.compression gzip_level = d.compression_opts out = d[...] except: raise finally: if fld is not None: os.remove(fld[1]) # Check the filters assert_equal_nose(fletcher32, filts['fletcher32']) assert_equal_nose(shuffle, filts['shuffle']) assert_equal_nose(compression, filts['compression']) if filts['compression'] == 'gzip': assert_equal_nose(gzip_level, filts['gzip_level']) # Compare assert_equal(out, data)
def check_numpy_structured_array_empty(self, dimensions): # Makes a random structured ndarray of the given type, writes it # and reads it back, and then compares it. shape = random_numpy_shape(dimensions, \ max_structured_ndarray_axis_length) data = random_structured_numpy_array(shape, (1, 0)) out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def test_datetime_timedelta(self): for _ in range(10): data = datetime.timedelta(days=random.randint(-20, 20), seconds=random.randint(-1000, 1000), microseconds=random.randint( -1000**3, 1000**3)) out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def check_dict_like_key_back_slash(self, tp): data = random_dict(tp) ch = '\\' key = ch.join( [random_str_ascii(max_dict_key_length) for i in range(2)]) data[key] = random_int() out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def test_datetime_date(self): for _ in range(10): data = datetime.date(year=random.randint(datetime.MINYEAR, datetime.MAXYEAR), month=random.randint(1, 12), day=random.randint(1, 28)) out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def test_bytes_with_null(self): strs = [ random_bytes(random.randint(1, max_string_length)) for i in range(2) ] data = b'\x00'.join(strs) out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def test_numpy_recarray_unicode_fields(self): # Makes a random 1d structured ndarray with non-ascii characters # in its fields, converts it to a recarray, writes it and reads # it back, and then compares it. shape = random_numpy_shape(1, \ max_structured_ndarray_axis_length) data = random_structured_numpy_array(shape, nonascii_fields=True) out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def check_numpy_sized_dtype_nested_0(self, zero_shaped): dtypes = ('uint8', 'uint16', 'uint32', 'uint64', 'int8', 'int16', 'int32', 'int64', 'float32', 'float64', 'complex64', 'complex128') for i in range(10): dt = (random.choice(dtypes), (2, 2 * zero_shaped)) data = np.zeros((2, ), dtype=dt) out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def test_datetime_time(self): for _ in range(10): data = datetime.time(hour=random.randint(0, 23), minute=random.randint(0, 59), second=random.randint(0, 59), microsecond=random.randint(0, 999999), tzinfo=random_datetime_timezone()) out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def check_numpy_recarray(self, dimensions): # Makes a random structured ndarray of the given type, converts # it to a recarray, writes it and reads it back, and then # compares it. shape = random_numpy_shape(dimensions, \ max_structured_ndarray_axis_length) data = random_structured_numpy_array(shape).view(np.recarray).copy() out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def test_str_ascii_encoded_utf8(self): ltrs = string.ascii_letters + string.digits data = 'a' while all([(c in ltrs) for c in data]): data = random_str_some_unicode(random.randint(1, \ max_string_length)) data = data.encode('utf-8') out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def check_numpy_sized_dtype_nested_1(self, zero_shaped): dtypes = ('uint8', 'uint16', 'uint32', 'uint64', 'int8', 'int16', 'int32', 'int64', 'float32', 'float64', 'complex64', 'complex128') for i in range(10): dt = [('a', random.choice(dtypes), (1, 2)), ('b', random.choice(dtypes), (1, 1, 4 * zero_shaped)), ('c', [('a', random.choice(dtypes)), ('b', random.choice(dtypes), (1, 2))])] data = np.zeros((random.randrange(1, 4), ), dtype=dt) out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def test_datetime_datetime(self): for _ in range(10): data = datetime.datetime(year=random.randint( datetime.MINYEAR, datetime.MAXYEAR), month=random.randint(1, 12), day=random.randint(1, 28), hour=random.randint(0, 23), minute=random.randint(0, 59), second=random.randint(0, 59), microsecond=random.randint(0, 999999), tzinfo=random_datetime_timezone()) out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def check_numpy_recarray_field_special_char(self, ch, leading=False): # Makes a random 1d structured ndarray with the character # in one field, converts it to a recarray, writes it and reads # it back, and then compares it. field_names = [random_str_ascii(max_dict_key_length) for i in range(2)] if leading: field_names[1] = ch + field_names[1] else: field_names[1] = field_names[1][0] + ch + field_names[1][1:] shape = random_numpy_shape(1, \ max_structured_ndarray_axis_length) data = random_structured_numpy_array(shape, names=field_names).view( np.recarray).copy() out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def check_dict_like_other_type_key(self, tp, other_tp): data = random_dict(tp) key_gen = random_str_some_unicode(max_dict_key_length) if other_tp == 'numpy.bytes_': key = np.bytes_(key_gen.encode('UTF-8')) elif other_tp == 'numpy.unicode_': key = np.unicode_(key_gen) elif other_tp == 'bytes': key = key_gen.encode('UTF-8') elif other_tp == 'int': key = random_int() elif other_tp == 'float': key = random_float() data[key] = random_int() out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def check_string_type_non_str_key(tp, other_tp, option_keywords): options = hdf5storage.Options(**option_keywords) key_value_names = (options.dict_like_keys_name, options.dict_like_values_name) data = random_dict(tp) for k in key_value_names: if k in data: del data[k] keys = list(data.keys()) key_gen = random_str_some_unicode(max_dict_key_length) if other_tp == 'numpy.bytes_': key = np.bytes_(key_gen.encode('UTF-8')) elif other_tp == 'numpy.unicode_': key = np.unicode_(key_gen) elif other_tp == 'bytes': key = key_gen.encode('UTF-8') data[key] = random_int() keys.append(key_gen) # Make a random name. name = random_name() # Write the data to the proper file with the given name with the # provided options. The file needs to be deleted after to keep junk # from building up. fld = None try: fld = tempfile.mkstemp() os.close(fld[0]) filename = fld[1] hdf5storage.write(data, path=name, filename=filename, options=options) with h5py.File(filename, mode='r') as f: assert_equal_nose(set(keys), set(f[name].keys())) except: raise finally: if fld is not None: os.remove(fld[1])
def check_read_filters(filters): # Read out the filter arguments. filts = {'compression': 'gzip', 'shuffle': True, 'fletcher32': True, 'gzip_level': 7} for k, v in filters.items(): filts[k] = v if filts['compression'] == 'gzip': filts['compression_opts'] = filts['gzip_level'] del filts['gzip_level'] # Make some random data. dims = random.randint(1, 4) data = random_numpy(shape=random_numpy_shape(dims, max_array_axis_length), dtype=random.choice(tuple( set(dtypes) - set(['U'])))) # Make a random name. name = random_name() # Write the data to the proper file with the given name with the # provided filters and read it backt. The file needs to be deleted # after to keep junk from building up. fld = None try: fld = tempfile.mkstemp() os.close(fld[0]) filename = fld[1] with h5py.File(filename, mode='w') as f: f.create_dataset(name, data=data, chunks=True, **filts) out = hdf5storage.read(path=name, filename=filename, matlab_compatible=False) except: raise finally: if fld is not None: os.remove(fld[1]) # Compare assert_equal(out, data)
def check_str_key_previously_invalid_char(tp, ch, option_keywords): options = hdf5storage.Options(**option_keywords) key_value_names = (options.dict_like_keys_name, options.dict_like_values_name) data = random_dict(tp) for k in key_value_names: if k in data: del data[k] # Add a random invalid str key using the provided character key = key_value_names[0] while key in key_value_names: key = ch.join( [random_str_ascii(max_dict_key_length) for i in range(2)]) data[key] = random_int() # Make a random name. name = random_name() # Write the data to the proper file with the given name with the # provided options. The file needs to be deleted after to keep junk # from building up. fld = None try: fld = tempfile.mkstemp() os.close(fld[0]) filename = fld[1] hdf5storage.write(data, path=name, filename=filename, options=options) with h5py.File(filename, mode='r') as f: for k in key_value_names: assert escape_path(k) not in f[name] for k in data: assert escape_path(k) in f[name] except: raise finally: if fld is not None: os.remove(fld[1])
def check_python_collection(self, tp, same_dims): # Makes a random collection of the specified type, writes it and # reads it back, and then compares it. if tp in (set, frozenset): data = tp(random_list(max_list_length, python_or_numpy='python')) else: if same_dims == 'same-dims': shape = random_numpy_shape(random.randrange(2, 4), random.randrange(1, 4)) dtypes = ('uint8', 'uint16', 'uint32', 'uint64', 'int8', 'int16', 'int32', 'int64', 'float32', 'float64', 'complex64', 'complex128') data = tp([ random_numpy(shape, random.choice(dtypes), allow_nan=True) for i in range(random.randrange(2, 7)) ]) elif same_dims == 'diff-dims': data = tp(random_list(max_list_length, python_or_numpy='numpy')) else: raise ValueError('invalid value of same_dims') out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def test_bytearray_empty(self): data = bytearray(b'') out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def test_bytearray(self): data = bytearray(random_bytes(random.randint(1, max_string_length))) out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)
def test_str_empty(self): data = '' out = self.write_readback(data, random_name(), self.options) self.assert_equal(out, data)