def anyarray( draw, min_dims: int = 0, max_dims: int = 2, include_complex_numbers: bool = True, dtype: Optional[np.dtype] = None, ): if dtype is None: if include_complex_numbers: dtype = one_of( integer_dtypes(), floating_dtypes(), complex_number_dtypes() ) else: dtype = one_of(integer_dtypes(), floating_dtypes()) arr = draw( arrays( dtype=dtype, shape=array_shapes(min_dims=min_dims, max_dims=max_dims), ) ) assume(not np.any(np.isnan(arr))) assume(np.all(np.isfinite(arr))) return arr
def gen_codes_for_fmt(fmt, include_nas=False, valid_only=True): elems = None if not valid_only \ else hs.sampled_from( sorted(fmt.keys()) + [np.nan] if include_nas \ else sorted(fmt.keys())) dtypes = hnp.floating_dtypes(endianness='=') \ if include_nas or valid_only \ else hs.one_of(hnp.floating_dtypes(endianness='='), hnp.integer_dtypes(endianness='=')) return hnp.arrays(dtype=dtypes, shape=hnp.array_shapes(max_dims=1, max_side=1000), elements=elems)
def testPreservesSortOrder(self, data): dtype = data.draw(hpnp.floating_dtypes()) xs = data.draw(hpnp.arrays(dtype, 10, unique=True)) pushed = push_apart(xs, axis=-1) hp.note(pushed) self.assertAllEqual(np.argsort(xs, axis=-1), np.argsort(pushed, axis=-1))
class TestMax: # TODO: Extend this to also check integer dtypes (dtype=ints_or_floats_dtypes()) @pytest.mark.xfail( reason= "This test exposes a known bug around NaN-handling that needs to be fixed." ) @given(arr=arrays( shape=one_darray_shape_strategy(), dtype=floating_dtypes(endianness="=", sizes=(32, 64)), )) @pytest.mark.parametrize("func_type", ['module', 'member']) def test_isnan_implies_nan_result(self, arr, func_type): """ Check how :func:`rt.max` handles NaN values. One or more NaNs in the input array should result in the function returning a NaN. """ # Get the function implementation based on how we want to call it. if func_type == 'module': test_func = lambda x: rt.max(x) elif func_type == 'member': test_func = lambda x: x.max() else: raise ValueError( f"Unhandled value '{func_type}' specified for the function type." ) # Wrap the input as a FastArray to ensure we'll get the riptable implementation of the function. arr = rt.FA(arr) # Call the test implementation. NanUnawareTestImpl.test_isnan_implies_nan_result(test_func, arr)
def get_scalar_dtype_strategy(exclude=None): """ A `hypothesis` strategy yielding """ possible_strategies = { "datetime": hyp_np.datetime64_dtypes(max_period="ms", min_period="ns"), "uint": hyp_np.unsigned_integer_dtypes(), "int": hyp_np.integer_dtypes(), "float": hyp_np.floating_dtypes(), "byte": hyp_np.byte_string_dtypes(), "unicode": hyp_np.unicode_string_dtypes(), } if exclude is None: exclude = {} elif not isinstance(exclude, list): exclude = [exclude] for ex in exclude: if ex in possible_strategies: del possible_strategies[ex] else: raise ValueError( "Strategy {} unknown. Possible values are {}".format( ex, possible_strategies.keys() ) ) return hyp_st.one_of(*list(possible_strategies.values()))
class TestNanMin: # TODO: Extend this to also check integer dtypes (dtype=ints_or_floats_dtypes()); # need to use rt.isnan instead of np.isnan because it'll recognize the riptable invalid values. @hypothesis.settings(suppress_health_check=[HealthCheck.too_slow]) @given(arr=arrays( shape=one_darray_shape_strategy(), dtype=floating_dtypes(endianness="=", sizes=(32, 64)), )) @pytest.mark.parametrize("func_type", ['module', 'member']) def test_nan_awareness(self, arr, func_type): """ Check how :func:`rt.nanmin` handles NaN values by comparing it against :func:`rt.min`. Call :func:`rt.nanmin` with an array, then remove any NaNs from the array and call :func:`np.min` with the 'clean' array. The results should match. """ # Get the function implementation based on how we want to call it. if func_type == 'module': test_func = lambda x: rt.nanmin(x) elif func_type == 'member': test_func = lambda x: x.nanmin() else: raise ValueError( f"Unhandled value '{func_type}' specified for the function type." ) # Get the nan-unaware version of the function. nan_unaware_func = lambda x: rt.min(x) # Wrap the input as a FastArray to ensure we'll get the riptable implementation of the function. arr = rt.FA(arr) # Call the test implementation. NanAwareTestImpl.test_nan_awareness(test_func, nan_unaware_func, arr)
def test_get_gained_points_back(self, x, y, gain): y = y.draw(nph.arrays(nph.floating_dtypes(), x.shape)) if np.any(np.isinf(x)) or np.any(np.isnan(x)) or np.any(np.isinf(y)) or np.any(np.isnan(y)): return smooth_lines = smooth_plot(x, y, gain=gain) assert int(len(x) * gain) >= len(smooth_lines[0].get_xdata())
class TestDecodeNdarray(unittest.TestCase): @given(htn.arrays(htn.floating_dtypes(), htn.array_shapes())) def test_arrays(self, arr): restored = aio.decode_ndarray(aio.encode_ndarray(arr)) self.assertTrue(arr.dtype.type is restored.dtype.type) self.assertTrue(arr.shape == restored.shape) self.assertTrue( np.allclose(arr, restored, rtol=0, atol=0, equal_nan=True))
def ints_floats_datetimes_and_timedeltas(draw): dtypes = ( unsigned_integer_dtypes(endianness="="), integer_dtypes(endianness="="), floating_dtypes(endianness="="), datetime64_dtypes(endianness="="), timedelta64_dtypes(endianness="="), ) return draw(one_of(dtypes))
def ints_floats_complex_or_booleans(draw): dtypes = ( unsigned_integer_dtypes(endianness="="), integer_dtypes(endianness="="), floating_dtypes(endianness="="), # complex_number_dtypes(endianness="="), boolean_dtypes(), ) return draw(one_of(dtypes))
def array_with_two_entries(draw, array_length=10_000): length = draw(integers(1, max_value=array_length)) arr = draw( arrays( dtype=one_of(integer_dtypes(), floating_dtypes()), shape=(length, 2), )) assume(not np.any(np.isnan(arr))) assume(np.all(np.isfinite(arr))) return arr
class TestEncodeNdarray(unittest.TestCase): @given(htn.arrays(htn.floating_dtypes(), htn.array_shapes())) def test_encode(self, arr): encoded = aio.encode_ndarray(arr) self.assertTrue('__ndarray__' in encoded) self.assertTrue(encoded['__ndarray__']) self.assertTrue('__dtype__' in encoded) self.assertTrue(isinstance(encoded['__dtype__'], str)) self.assertTrue('data' in encoded) self.assertTrue(isinstance(encoded['data'], list))
def ints_or_floats_dtypes(draw): # Endianness needs to be specified for now, otherwise the byte-order may get flipped # https://jira/browse/SOQTEST-6478 # Half floats are not supported. dtypes = ( unsigned_integer_dtypes(endianness="="), integer_dtypes(endianness="="), floating_dtypes(endianness="=", sizes=(32, 64)), ) return draw(one_of(dtypes))
def ints_floats_or_complex_dtypes(draw): # Endianness needs to be specified for now, otherwise the byte-order may get flipped # https://jira/browse/SOQTEST-6478 dtypes = ( unsigned_integer_dtypes(endianness="="), integer_dtypes(endianness="="), floating_dtypes(endianness="="), # complex_number_dtypes(endianness="="), ) return draw(one_of(dtypes))
class TestSmoothenPlot: @given(nph.arrays(nph.floating_dtypes(), nph.array_shapes(max_dims=1, min_side=4), unique=True), st.data(), st.floats(1, 10)) def test_get_gained_points_back(self, x, y, gain): y = y.draw(nph.arrays(nph.floating_dtypes(), x.shape)) if np.any(np.isinf(x)) or np.any(np.isnan(x)) or np.any(np.isinf(y)) or np.any(np.isnan(y)): return smooth_lines = smooth_plot(x, y, gain=gain) assert int(len(x) * gain) >= len(smooth_lines[0].get_xdata())
def test_generate_arbitrary_indices(data): min_size = data.draw(st.integers(0, 10), "min_size") max_size = data.draw(st.none() | st.integers(min_size, min_size + 10), "max_size") unique = data.draw(st.booleans(), "unique") dtype = data.draw( st.one_of( npst.boolean_dtypes(), npst.integer_dtypes(endianness="="), npst.floating_dtypes(endianness="="), npst.complex_number_dtypes(endianness="="), npst.datetime64_dtypes(endianness="="), npst.timedelta64_dtypes(endianness="="), ).filter(supported_by_pandas), "dtype", ) pass_elements = data.draw(st.booleans(), "pass_elements") converted_dtype = pandas.Index([], dtype=dtype).dtype try: inferred_dtype = pandas.Index([data.draw(npst.from_dtype(dtype)) ]).dtype if pass_elements: elements = npst.from_dtype(dtype) dtype = None else: elements = None index = data.draw( pdst.indexes( elements=elements, dtype=dtype, min_size=min_size, max_size=max_size, unique=unique, )) except Exception as e: if type(e).__name__ == "OutOfBoundsDatetime": # See https://github.com/HypothesisWorks/hypothesis-python/pull/826 reject() else: raise if dtype is None: assert index.dtype == inferred_dtype else: assert index.dtype == converted_dtype if unique: assert len(set(index.values)) == len(index)
def one_of_supported_dtypes(draw): # A strategy that selects a dtype that riptable is known to handle. # dtype size 16-bit is not supported # little endian is not supported return one_of( boolean_dtypes(), integer_dtypes(endianness="=", sizes=(8, 32, 64)), unsigned_integer_dtypes(endianness="=", sizes=(8, 32, 64)), floating_dtypes(endianness="=", sizes=(32, 64)), byte_string_dtypes(endianness="="), unicode_string_dtypes(endianness="="), # the following dtypes are not supported # complex_number_dtypes(), # datetime64_dtypes(), # timedelta64_dtypes(), )
def numpy_number(draw, min_val, max_val): dtype = draw(st.one_of(nps.integer_dtypes(), nps.unsigned_integer_dtypes(), nps.floating_dtypes())) if 'f' in dtype.str: if min_val < np.finfo(dtype).min: min_val = np.finfo(dtype).min if max_val > np.finfo(dtype).max: max_val = np.finfo(dtype).max number = draw(st.floats(min_val, max_val, allow_nan=False, allow_infinity=False)) else: if min_val < np.iinfo(dtype).min: min_val = np.iinfo(dtype).min if max_val > np.iinfo(dtype).max: max_val = np.iinfo(dtype).max number = draw(st.integers(min_val, max_val)) return np.array([number], dtype)[0]
def numpy_number(draw, min_val, max_val): dtype = draw( st.one_of(nps.integer_dtypes(), nps.unsigned_integer_dtypes(), nps.floating_dtypes())) if 'f' in dtype.str: if min_val < np.finfo(dtype).min: min_val = np.finfo(dtype).min if max_val > np.finfo(dtype).max: max_val = np.finfo(dtype).max number = draw( st.floats(min_val, max_val, allow_nan=False, allow_infinity=False)) else: min_val, max_val = np.ceil(min_val), np.floor(max_val) if min_val < np.iinfo(dtype).min: min_val = np.iinfo(dtype).min if max_val > np.iinfo(dtype).max: max_val = np.iinfo(dtype).max number = draw(st.integers(min_val, max_val)) return np.array([number], dtype)[0]
def dataframe(draw): n_cols = draw(integers(min_value=1, max_value=20)) dtypes = draw( lists( one_of( np_strategies.floating_dtypes(), np_strategies.integer_dtypes(), np_strategies.unicode_string_dtypes(), ), min_size=n_cols, max_size=n_cols, )) colnames = draw( lists(text() | integers(), min_size=n_cols, max_size=n_cols, unique=True)) return draw( data_frames(columns=[ column(name=name, dtype=dtype) for dtype, name in zip(dtypes, colnames) ]))
class TestNanSum: # TODO: Extend this to also check integer dtypes (dtype=ints_or_floats_dtypes()); # need to use rt.isnan instead of np.isnan because it'll recognize the riptable invalid values. @pytest.mark.xfail( reason= "Very small differences between sum and nansum; likely ignorable differences due to rounding, but let's investigate to be sure." ) @given(arr=arrays( shape=one_darray_shape_strategy(), dtype=floating_dtypes(endianness="=", sizes=(32, 64)), )) @pytest.mark.parametrize("func_type", ['module', 'member']) def test_nan_awareness(self, arr, func_type): """ Check how :func:`rt.nansum` handles NaN values by comparing it against :func:`rt.sum`. Call :func:`rt.nansum` with an array, then remove any NaNs from the array and call :func:`np.sum` with the 'clean' array. The results should match. """ # Get the function implementation based on how we want to call it. if func_type == 'module': test_func = lambda x: rt.nansum(x) elif func_type == 'member': test_func = lambda x: x.nansum() else: raise ValueError( f"Unhandled value '{func_type}' specified for the function type." ) # Get the nan-unaware version of the function. nan_unaware_func = lambda x: rt.sum(x) # Wrap the input as a FastArray to ensure we'll get the riptable implementation of the function. arr = rt.FA(arr) # Call the test implementation. NanAwareTestImpl.test_nan_awareness(test_func, nan_unaware_func, arr)
def test_glorot_normal_statistics(shape, gain): tensor = glorot_uniform(shape, gain=gain) assert isinstance(tensor, Tensor) assert np.isclose(np.mean(tensor.data), 0, atol=1e-3) # check the bounds of the distribution hold fan_in = tensor.shape[1] * (tensor[0, 0].size if tensor.ndim > 2 else 1) fan_out = tensor.shape[0] * (tensor[0, 0].size if tensor.ndim > 2 else 1) assert tensor.min() >= -gain * np.sqrt(6 / (fan_in + fan_out)) assert tensor.max() <= gain * np.sqrt(6 / (fan_in + fan_out)) # check that the distribution is roughly normal hist, _ = np.histogram(tensor.data, bins=100) assert (max(hist) - min(hist)) / np.mean(hist) < 0.1 @given( shape=hnp.array_shapes(min_dims=2), gain=st.floats(0.1, 10), dtype=hnp.floating_dtypes(), constant=st.booleans(), ) def test_glorot_normal(shape, gain, dtype, constant): tensor = glorot_uniform(shape, gain=Tensor(gain), dtype=dtype, constant=constant) assert tensor.shape == shape assert tensor.dtype == dtype assert tensor.constant == constant
@pytest.mark.parametrize( "data", [None, np.array(None), np.array([[0], [0, 0]]), np.array(1, dtype="O")]) @given(constant=st.booleans(), creator=st.none() | st.just(MatMul())) def test_input_type_checking(data, constant, creator): with raises(TypeError): Tensor(data, constant=constant, _creator=creator) @given( data=hnp.arrays(shape=hnp.array_shapes(), dtype=hnp.floating_dtypes()), constant=st.booleans(), ) def test_copy(data, constant): x = Tensor(data, constant=constant) y = x * 2 y.backward() y_copy = y.copy() assert y.creator is not None assert y.dtype == y_copy.dtype assert y_copy.constant is constant if y.grad is None: assert y_copy.grad is None else: assert_array_equal(y.grad, y_copy.grad)
from typing import Optional import hypothesis.extra.numpy as hnp import hypothesis.strategies as st import numpy as np import pytest from hypothesis import given, settings from numpy.testing import assert_array_equal from mygrad import Tensor real_types = (hnp.integer_dtypes() | hnp.unsigned_integer_dtypes() | hnp.floating_dtypes()) @given( tensor=st.tuples( hnp.arrays(shape=hnp.array_shapes(), dtype=real_types), st.booleans(), ).map(lambda x: Tensor(x[0], constant=x[1])), dest_type=real_types, constant=st.booleans() | st.none(), ) def test_astype(tensor: Tensor, dest_type: type, constant: Optional[bool]): tensor = tensor * 1 # give tensor a creator new_tensor = tensor.astype(dest_type, constant=constant) assert new_tensor.constant is (tensor.constant if constant is None else constant) assert tensor.creator is not None assert new_tensor.creator is None
assert np.newaxis not in shape if not allow_ellipsis: assert Ellipsis not in shape if 0 in shape: # If there's a zero in the shape, the array will have no elements. array = np.zeros(shape) assert array.size == 0 elif np.prod(shape) <= 10**5: # If it's small enough to instantiate, do so with distinct elements. array = np.arange(np.prod(shape)).reshape(shape) else: # We can't cheat on this one, so just try another. assume(False) view = array[indexer] if not np.isscalar(view): assert min_dims <= view.ndim <= (32 if max_dims is None else max_dims) if view.size: assert np.shares_memory(view, array) # addresses https://github.com/HypothesisWorks/hypothesis/issues/2582 @given( nps.arrays( shape=nps.array_shapes(min_dims=0, min_side=0), dtype=nps.floating_dtypes() ) ) def test_array_owns_memory(x: np.ndarray): assert x.base is None assert x[...].base is x
from functools import partial import numpy as np import pandas as pd import pytest import xarray as xr pytest.importorskip("hypothesis") import hypothesis.extra.numpy as npst # isort:skip import hypothesis.extra.pandas as pdst # isort:skip import hypothesis.strategies as st # isort:skip from hypothesis import given # isort:skip numeric_dtypes = st.one_of(npst.unsigned_integer_dtypes(), npst.integer_dtypes(), npst.floating_dtypes()) numeric_series = numeric_dtypes.flatmap(lambda dt: pdst.series(dtype=dt)) an_array = npst.arrays( dtype=numeric_dtypes, shape=npst.array_shapes(max_dims=2), # can only convert 1D/2D to pandas ) @st.composite def datasets_1d_vars(draw) -> xr.Dataset: """Generate datasets with only 1D variables Suitable for converting to pandas dataframes. """
else: assert 1 <= len(shape) + int(allow_ellipsis) assert np.newaxis not in shape if not allow_ellipsis: assert Ellipsis not in shape if 0 in shape: # If there's a zero in the shape, the array will have no elements. array = np.zeros(shape) assert array.size == 0 elif np.prod(shape) <= 10**5: # If it's small enough to instantiate, do so with distinct elements. array = np.arange(np.prod(shape)).reshape(shape) else: # We can't cheat on this one, so just try another. assume(False) view = array[indexer] if not np.isscalar(view): assert min_dims <= view.ndim <= (32 if max_dims is None else max_dims) if view.size: assert np.shares_memory(view, array) # addresses https://github.com/HypothesisWorks/hypothesis/issues/2582 @given( nps.arrays(shape=nps.array_shapes(min_dims=0, min_side=0), dtype=nps.floating_dtypes())) def test_array_owns_memory(x: np.ndarray): assert x.base is None assert x[...].base is x
import hypothesis.extra.numpy as npst import hypothesis.strategies as st from hypothesis import given, settings import xarray as xr # Run for a while - arrays are a bigger search space than usual settings.register_profile("ci", deadline=None) settings.load_profile("ci") an_array = npst.arrays( dtype=st.one_of( npst.unsigned_integer_dtypes(), npst.integer_dtypes(), npst.floating_dtypes(), ), shape=npst.array_shapes(max_side=3), # max_side specified for performance ) @given(st.data(), an_array) def test_CFMask_coder_roundtrip(data, arr): names = data.draw(st.lists(st.text(), min_size=arr.ndim, max_size=arr.ndim, unique=True).map(tuple)) original = xr.Variable(names, arr) coder = xr.coding.variables.CFMaskCoder() roundtripped = coder.decode(coder.encode(original)) xr.testing.assert_identical(original, roundtripped)
try: slow_result = slow_func(arr, axis=axis) assert False except ValueError: return slow_result = slow_func(arr, axis=axis) assert_array_almost_equal(bn_result, slow_result) @pytest.mark.parametrize("func", (bn.nanmin, bn.nanmax, bn.anynan, bn.allnan), ids=lambda x: x.__name__) @hypothesis.given(array=hy_arrays( dtype=one_of(integer_dtypes(sizes=(32, 64)), floating_dtypes(sizes=(32, 64))), shape=array_shapes(), )) def test_reduce_hypothesis(func, array): _hypothesis_helper(func, array) @pytest.mark.parametrize("func", (bn.nanargmin, bn.nanargmax), ids=lambda x: x.__name__) @hypothesis.given(array=hy_arrays( dtype=one_of(integer_dtypes(sizes=(32, 64)), floating_dtypes(sizes=(32, 64))), shape=array_shapes(), )) def test_reduce_hypothesis_errata(func, array): _hypothesis_helper(func, array, skip_all_nans=True)
pytest.importorskip("hypothesis") import hypothesis.extra.numpy as npst import hypothesis.strategies as st from hypothesis import given, settings import xarray as xr # Run for a while - arrays are a bigger search space than usual settings.register_profile("ci", deadline=None) settings.load_profile("ci") an_array = npst.arrays( dtype=st.one_of( npst.unsigned_integer_dtypes(), npst.integer_dtypes(), npst.floating_dtypes() ), shape=npst.array_shapes(max_side=3), # max_side specified for performance ) @pytest.mark.slow @given(st.data(), an_array) def test_CFMask_coder_roundtrip(data, arr): names = data.draw( st.lists(st.text(), min_size=arr.ndim, max_size=arr.ndim, unique=True).map( tuple ) ) original = xr.Variable(names, arr) coder = xr.coding.variables.CFMaskCoder()
composite, floats, integers, lists, one_of, text, tuples, ) from superintendent.distributed.dbqueue import DatabaseQueue guaranteed_dtypes = one_of( np_strategies.scalar_dtypes(), np_strategies.unsigned_integer_dtypes(), np_strategies.datetime64_dtypes(), np_strategies.floating_dtypes(), np_strategies.integer_dtypes(), ) @composite def dataframe(draw): n_cols = draw(integers(min_value=1, max_value=20)) dtypes = draw( lists( one_of( np_strategies.floating_dtypes(), np_strategies.integer_dtypes(), np_strategies.unicode_string_dtypes(), ), min_size=n_cols,
floats, integers, lists, one_of, recursive, text, ) import superintendent.prioritisation from superintendent import SemiSupervisor primitive_strategy = text() | integers() | floats(allow_nan=False) | booleans() guaranteed_dtypes = (boolean_dtypes() | integer_dtypes() | floating_dtypes() | unicode_string_dtypes()) container_strategy = dictionaries( text(), primitive_strategy) | lists(primitive_strategy) nested_strategy = recursive( container_strategy, lambda children: lists(children) | dictionaries(text(), children), ) container_strategy = dictionaries( text(), primitive_strategy) | lists(primitive_strategy) nested_strategy = recursive( container_strategy,
# tile integer tests @settings(buffer_size=11000000) @given(data=hnp.arrays(dtype=st.one_of(hnp.integer_dtypes(endianness="="), hnp.unsigned_integer_dtypes(endianness="=")), shape=hnp.array_shapes(min_dims=2, max_dims=2, min_side=10, max_side=50))) def test_write_int_tile(data, tmpdir_factory): filename = str(tmpdir_factory.mktemp("write").join("int_tile_img.tif")) with Tiff(filename, "w") as handle: handle.write(data, method="tile", tile_width=16, tile_length=16) with tifffile.TiffFile(filename) as handle: img = handle.asarray() np.testing.assert_array_equal(data, img) @settings(buffer_size=11000000) @given(data=hnp.arrays(dtype=hnp.floating_dtypes(endianness="="), shape=hnp.array_shapes(min_dims=2, max_dims=2, min_side=10, max_side=50), elements=st.floats(0, 1))) def test_write_float_scanline(data, tmpdir_factory): filename = str(tmpdir_factory.mktemp("write").join("float_img.tif")) with Tiff(filename, "w") as handle: handle.write(data, method="scanline") with tifffile.TiffFile(filename) as handle: img = handle.asarray() np.testing.assert_array_equal(data, img) @settings(buffer_size=11000000) @given(data=hnp.arrays(dtype=hnp.floating_dtypes(endianness="="), shape=hnp.array_shapes(min_dims=2, max_dims=2, min_side=10, max_side=50), elements=st.floats(0, 1))) def test_write_float_tile(data, tmpdir_factory): filename = str(tmpdir_factory.mktemp("write").join("float_tile_img.tif"))