def test_generate_arbitrary_indices(data):
    min_size = data.draw(st.integers(0, 10), "min_size")
    max_size = data.draw(st.none() | st.integers(min_size, min_size + 10), "max_size")
    unique = data.draw(st.booleans(), "unique")
    dtype = data.draw(npst.scalar_dtypes(), "dtype")
    assume(supported_by_pandas(dtype))

    # Pandas bug: https://github.com/pandas-dev/pandas/pull/14916 until 0.20;
    # then int64 indexes are inferred from uint64 values.
    assume(dtype.kind != "u")

    pass_elements = data.draw(st.booleans(), "pass_elements")

    converted_dtype = pandas.Index([], dtype=dtype).dtype

    try:
        inferred_dtype = pandas.Index([data.draw(npst.from_dtype(dtype))]).dtype

        if pass_elements:
            elements = npst.from_dtype(dtype)
            dtype = None
        else:
            elements = None

        index = data.draw(
            pdst.indexes(
                elements=elements,
                dtype=dtype,
                min_size=min_size,
                max_size=max_size,
                unique=unique,
            )
        )

    except Exception as e:
        if type(e).__name__ == "OutOfBoundsDatetime":
            # See https://github.com/HypothesisWorks/hypothesis-python/pull/826
            reject()
        else:
            raise
    if dtype is None:
        if pandas.__version__ >= "0.19":
            assert index.dtype == inferred_dtype
    else:
        assert index.dtype == converted_dtype

    if unique:
        assert len(set(index.values)) == len(index)
Пример #2
0
def test_inferred_string_strategies_roundtrip(data, dtype):
    # Check that we never generate too-long or nul-terminated strings, which
    # cannot be read back out of an array.
    arr = np.zeros(shape=1, dtype=dtype)
    ex = data.draw(nps.from_dtype(arr.dtype))
    arr[0] = ex
    assert arr[0] == ex
def test_produces_instances(t):
    @given(from_dtype(t))
    def test_is_t(x):
        assert isinstance(x, t.type)
        assert x.dtype.kind == t.kind

    test_is_t()
Пример #4
0
def test_inferred_string_strategies_roundtrip(data, dtype):
    # Check that we never generate too-long or nul-terminated strings, which
    # cannot be read back out of an array.
    arr = np.zeros(shape=1, dtype=dtype)
    ex = data.draw(nps.from_dtype(arr.dtype))
    arr[0] = ex
    assert arr[0] == ex
Пример #5
0
def test_broadcast_shapes_gufunc_args(parsed_sig_and_size, max_dims_extra, dtype, unique, data):
    parsed_sig, min_side, max_side = parsed_sig_and_size

    signature = unparse(parsed_sig)
    parsed_sig, _ = parsed_sig

    excluded = data.draw(sets(integers(0, len(parsed_sig) - 1)).map(tuple))

    elements = from_dtype(np.dtype(dtype))

    S = gu.gufunc_args(
        signature,
        excluded=excluded,
        min_side=min_side,
        max_side=max_side,
        max_dims_extra=max_dims_extra,
        dtype=dtype,
        elements=elements,
        unique=unique,
    )

    X = data.draw(S)
    shapes = [np.shape(xx) for xx in X]

    validate_bcast_shapes(shapes, parsed_sig, excluded, min_side, max_side, max_dims_extra)
    validate_elements(X, dtype=dtype, unique=unique)
Пример #6
0
def test_produces_instances(t):
    @given(from_dtype(t))
    def test_is_t(x):
        assert isinstance(x, t.type)
        assert x.dtype.kind == t.kind

    test_is_t()
Пример #7
0
def pandas_dtype_strategy(
    pandas_dtype: PandasDtype,
    strategy: Optional[SearchStrategy] = None,
    **kwargs,
) -> SearchStrategy:
    # pylint: disable=line-too-long,no-else-raise
    """Strategy to generate data from a :class:`pandera.dtypes.PandasDtype`.

    :param pandas_dtype: :class:`pandera.dtypes.PandasDtype` instance.
    :param strategy: an optional hypothesis strategy. If specified, the
        pandas dtype strategy will be chained onto this strategy.
    :kwargs: key-word arguments passed into
        `hypothesis.extra.numpy.from_dtype <https://hypothesis.readthedocs.io/en/latest/numpy.html#hypothesis.extra.numpy.from_dtype>`_ .
        For datetime, timedelta, and complex number datatypes, these arguments
        are passed into :func:`~pandera.strategies.numpy_time_dtypes` and
        :func:`~pandera.strategies.numpy_complex_dtypes`.
    :returns: ``hypothesis`` strategy
    """

    def compat_kwargs(*args):
        return {k: v for k, v in kwargs.items() if k in args}

    # hypothesis doesn't support categoricals or objects, so we'll will need to
    # build a pandera-specific solution.
    if pandas_dtype is PandasDtype.Category:
        raise TypeError(
            "data generation for the Category dtype is currently "
            "unsupported. Consider using a string or int dtype and "
            "Check.isin(values) to ensure a finite set of values."
        )

    # The object type falls back onto generating strings.
    if pandas_dtype is PandasDtype.Object:
        dtype = np.dtype("str")
    else:
        dtype = pandas_dtype.numpy_dtype

    if strategy:
        return strategy.map(dtype.type)
    elif pandas_dtype.is_datetime or pandas_dtype.is_timedelta:
        return numpy_time_dtypes(
            dtype,
            **compat_kwargs("min_value", "max_value"),
        )
    elif pandas_dtype.is_complex:
        return numpy_complex_dtypes(
            dtype,
            **compat_kwargs(
                "min_value", "max_value", "allow_infinity", "allow_nan"
            ),
        )
    return npst.from_dtype(
        dtype,
        **{  # type: ignore
            "allow_nan": False,
            "allow_infinity": False,
            **kwargs,
        },
    )
Пример #8
0
def test_infer_strategy_from_dtype(dtype, data):
    # Given a dtype
    assert isinstance(dtype, np.dtype)
    # We can infer a strategy
    strat = nps.from_dtype(dtype)
    assert isinstance(strat, SearchStrategy)
    # And use it to fill an array of that dtype
    data.draw(nps.arrays(dtype, 10, strat))
Пример #9
0
def test_infer_strategy_from_dtype(dtype, data):
    # Given a dtype
    assert isinstance(dtype, np.dtype)
    # We can infer a strategy
    strat = nps.from_dtype(dtype)
    assert isinstance(strat, SearchStrategy)
    # And use it to fill an array of that dtype
    data.draw(nps.arrays(dtype, 10, strat))
Пример #10
0
def test_check_strategy_continuous(pdtype, data):
    """Test built-in check strategies can generate continuous data."""
    value = data.draw(
        npst.from_dtype(
            pdtype.numpy_dtype,
            allow_nan=False,
            allow_infinity=False,
        )
    )
    pdtype = pa.PandasDtype.Int
    value = data.draw(npst.from_dtype(pdtype.numpy_dtype))
    assert data.draw(strategies.ne_strategy(pdtype, value=value)) != value
    assert data.draw(strategies.eq_strategy(pdtype, value=value)) == value
    assert data.draw(strategies.gt_strategy(pdtype, min_value=value)) > value
    assert data.draw(strategies.ge_strategy(pdtype, min_value=value)) >= value
    assert data.draw(strategies.lt_strategy(pdtype, max_value=value)) < value
    assert data.draw(strategies.le_strategy(pdtype, max_value=value)) <= value
Пример #11
0
def test_generate_arbitrary_indices(data):
    min_size = data.draw(st.integers(0, 10), 'min_size')
    max_size = data.draw(
        st.none() | st.integers(min_size, min_size + 10), 'max_size')
    unique = data.draw(st.booleans(), 'unique')
    dtype = data.draw(npst.scalar_dtypes(), 'dtype')
    assume(supported_by_pandas(dtype))

    # Pandas bug: https://github.com/pandas-dev/pandas/pull/14916 until 0.20;
    # then int64 indexes are inferred from uint64 values.
    assume(dtype.kind != 'u')

    pass_elements = data.draw(st.booleans(), 'pass_elements')

    converted_dtype = pandas.Index([], dtype=dtype).dtype

    try:
        inferred_dtype = pandas.Index(
            [data.draw(npst.from_dtype(dtype))]).dtype

        if pass_elements:
            elements = npst.from_dtype(dtype)
            dtype = None
        else:
            elements = None

        index = data.draw(pdst.indexes(
            elements=elements, dtype=dtype, min_size=min_size,
            max_size=max_size, unique=unique,
        ))

    except Exception as e:
        if type(e).__name__ == 'OutOfBoundsDatetime':
            # See https://github.com/HypothesisWorks/hypothesis-python/pull/826
            reject()
        else:
            raise
    if dtype is None:
        if pandas.__version__ >= '0.19':
            assert index.dtype == inferred_dtype
    else:
        assert index.dtype == converted_dtype

    if unique:
        assert len(set(index.values)) == len(index)
Пример #12
0
def test_unicode_string_dtypes_need_not_be_utf8():
    def cannot_encode(string):
        try:
            string.encode()
            return False
        except UnicodeEncodeError:
            return True

    find_any(nps.from_dtype(np.dtype("U")), cannot_encode)
Пример #13
0
def test_all_inferred_scalar_strategies_roundtrip(data, dtype):
    # We only check scalars here, because record/compound/nested dtypes always
    # give an array of np.void objects.  We're interested in whether scalar
    # values are safe, not known type coercion.
    arr = np.zeros(shape=1, dtype=dtype)
    ex = data.draw(nps.from_dtype(arr.dtype))
    assume(ex == ex)  # If not, the roundtrip test *should* fail!  (eg NaN)
    arr[0] = ex
    assert arr[0] == ex
Пример #14
0
def test_all_inferred_scalar_strategies_roundtrip(data, dtype):
    # We only check scalars here, because record/compound/nested dtypes always
    # give an array of np.void objects.  We're interested in whether scalar
    # values are safe, not known type coercion.
    arr = np.zeros(shape=1, dtype=dtype)
    ex = data.draw(nps.from_dtype(arr.dtype))
    assume(ex == ex)  # If not, the roundtrip test *should* fail!  (eg NaN)
    arr[0] = ex
    assert arr[0] == ex
Пример #15
0
def real_from_dtype(dtype, N=10):
    dtype = np.dtype(dtype)

    def clean_up(x):
        x = np.nan_to_num(x).astype(dtype)
        assert x.dtype == dtype  # hard to always get this it seems
        return x

    S = lists(from_dtype(dtype), min_size=N, max_size=N).map(clean_up)
    return S
Пример #16
0
def elements_and_dtype(elements, dtype, source=None):

    if source is None:
        prefix = ""
    else:
        prefix = f"{source}."

    if elements is not None:
        check_strategy(elements, f"{prefix}elements")
    else:
        with check("dtype is not None"):
            if dtype is None:
                raise InvalidArgument(
                    f"At least one of {prefix}elements or {prefix}dtype must be provided."
                )

    with check("is_categorical_dtype"):
        if is_categorical_dtype(dtype):
            raise InvalidArgument(
                f"{prefix}dtype is categorical, which is currently unsupported"
            )

    if isinstance(
            dtype,
            type) and np.dtype(dtype).kind == "O" and dtype is not object:
        note_deprecation(
            f"Passed dtype={dtype!r} is not a valid Pandas dtype.  We'll treat it as "
            "dtype=object for now, but this will be an error in a future version.",
            since="2021-12-31",
            has_codemod=False,
        )

    dtype = try_convert(np.dtype, dtype, "dtype")

    if elements is None:
        elements = npst.from_dtype(dtype)
    elif dtype is not None:

        def convert_element(value):
            name = f"draw({prefix}elements)"
            try:
                return np.array([value], dtype=dtype)[0]
            except TypeError:
                raise InvalidArgument(
                    "Cannot convert %s=%r of type %s to dtype %s" %
                    (name, value, type(value).__name__, dtype.str)) from None
            except ValueError:
                raise InvalidArgument(
                    f"Cannot convert {name}={value!r} to type {dtype.str}"
                ) from None

        elements = elements.map(convert_element)
    assert elements is not None

    return elements, dtype
Пример #17
0
def st_2Dint8array(draw):
    dtype = np.dtype('int8')
    shape = draw(hyp_np.array_shapes(min_dims=2, max_dims=2))
    array = draw(
        hyp_np.arrays(
            dtype,
            shape,
            elements=hyp_np.from_dtype(dtype, min_value=-1, max_value=1),
        ))

    return array
Пример #18
0
def test_shapes_tuple_of_arrays(shapes, dtype, unique, data):
    elements = from_dtype(np.dtype(dtype))

    S = gu._tuple_of_arrays(shapes, dtype, elements=elements, unique=unique)
    X = data.draw(S)

    validate_elements(X, dtype=dtype, unique=unique)

    assert len(shapes) == len(X)
    for spec, drawn in zip(shapes, X):
        assert tuple(spec) == np.shape(drawn)
Пример #19
0
def finite_arrays(draw,
                  shape,
                  dtype=np.float64(),
                  min_value=-1.0e3,
                  max_value=1.0e3):
    dtype = np.dtype(dtype)
    elts = nph.from_dtype(dtype,
                          min_value=min_value,
                          max_value=max_value,
                          allow_infinity=False,
                          allow_nan=False)
    return draw(nph.arrays(dtype, shape, elements=elts))
Пример #20
0
def test_just_shapes_tuple_of_arrays(shapes, dtype, unique, data):
    elements = from_dtype(np.dtype(dtype))

    # test again, but this time pass in strategy to make sure it can handle it
    S = gu._tuple_of_arrays(just(shapes), just(dtype), elements=elements, unique=just(unique))
    X = data.draw(S)

    validate_elements(X, dtype=dtype, unique=unique)

    assert len(shapes) == len(X)
    for spec, drawn in zip(shapes, X):
        assert tuple(spec) == np.shape(drawn)
Пример #21
0
def test_decode_broadcast_float():
    broadcast_tester(
        sp.decode,
        "(m,n),(n),()->(m)",
        otype=CAT_DTYPE,
        excluded=(1, 2),
        dtype=[np.float_, CAT_DTYPE, np.bool_],
        elements=[floats(), from_dtype(np.dtype(CAT_DTYPE)), booleans()],
        unique=[False, True, False],
        min_side={"n": 1},
        map_=decoder_gen_broadcast,
    )
Пример #22
0
def test_can_cast_for_arrays(data):
    # Note: this only passes with castable datatypes, certain dtype
    # combinations will result in an error if numpy is not able to cast them.
    dt_elements = np.dtype(data.draw(st.sampled_from(["bool", "<i2", ">i2"])))
    dt_desired = np.dtype(
        data.draw(st.sampled_from(["<i2", ">i2", "float32", "float64"])))
    result = data.draw(
        nps.arrays(dtype=dt_desired,
                   elements=nps.from_dtype(dt_elements),
                   shape=(1, 2, 3)))
    assert isinstance(result, np.ndarray)
    assert result.dtype == dt_desired
Пример #23
0
def test_encode_broadcast_float():
    broadcast_tester(
        sp.encode,
        "(),(n),(),(),()->(n)",
        otype=float,
        excluded=(1, 2, 3, 4),
        dtype=[np.int_, CAT_DTYPE, np.bool_, object, np.bool_],
        elements=[integers(0, INT_MAX), from_dtype(np.dtype(CAT_DTYPE)), booleans(), just("float"), booleans()],
        unique=[False, True, False, False, False],
        min_side={"n": 1},
        map_=encoder_gen,
    )
Пример #24
0
def test_generate_arbitrary_indices(data):
    min_size = data.draw(st.integers(0, 10), 'min_size')
    max_size = data.draw(
        st.none() | st.integers(min_size, min_size + 10), 'max_size')
    unique = data.draw(st.booleans(), 'unique')
    dtype = data.draw(npst.scalar_dtypes(), 'dtype')
    assume(supported_by_pandas(dtype))

    pass_elements = data.draw(st.booleans(), 'pass_elements')

    converted_dtype = pandas.Index([], dtype=dtype).dtype

    try:
        inferred_dtype = pandas.Index(
            [data.draw(npst.from_dtype(dtype))]).dtype

        if pass_elements:
            elements = npst.from_dtype(dtype)
            dtype = None
        else:
            elements = None

        index = data.draw(pdst.indexes(
            elements=elements, dtype=dtype, min_size=min_size,
            max_size=max_size, unique=unique,
        ))

    except Exception as e:
        if type(e).__name__ == 'OutOfBoundsDatetime':
            # See https://github.com/HypothesisWorks/hypothesis-python/pull/826
            reject()
        else:
            raise
    if dtype is None:
        assert index.dtype == inferred_dtype
    else:
        assert index.dtype == converted_dtype

    if unique:
        assert len(set(index.values)) == len(index)
Пример #25
0
def test_upcast_roundtrip(type_strategy, data: st.DataObject):
    thin, wide = data.draw(
        st.tuples(type_strategy, type_strategy).map(
            lambda x: sorted(x, key=lambda y: np.dtype(y).itemsize)))
    orig_tensor = data.draw(
        hnp.arrays(
            dtype=thin,
            shape=hnp.array_shapes(),
            elements=hnp.from_dtype(thin).filter(np.isfinite),
        ).map(Tensor))

    roundtripped_tensor = orig_tensor.astype(wide).astype(thin)
    assert_array_equal(orig_tensor, roundtripped_tensor)
Пример #26
0
def elements_and_dtype(elements, dtype, source=None):

    if source is None:
        prefix = ""
    else:
        prefix = "%s." % (source,)

    if elements is not None:
        st.check_strategy(elements, "%selements" % (prefix,))
    else:
        with check("dtype is not None"):
            if dtype is None:
                raise InvalidArgument(
                    (
                        "At least one of %(prefix)selements or %(prefix)sdtype "
                        "must be provided."
                    )
                    % {"prefix": prefix}
                )

    with check("is_categorical_dtype"):
        if is_categorical_dtype(dtype):
            raise InvalidArgument(
                "%sdtype is categorical, which is currently unsupported" % (prefix,)
            )

    dtype = try_convert(np.dtype, dtype, "dtype")

    if elements is None:
        elements = npst.from_dtype(dtype)
    elif dtype is not None:

        def convert_element(value):
            name = "draw(%selements)" % (prefix,)
            try:
                return np.array([value], dtype=dtype)[0]
            except TypeError:
                raise InvalidArgument(
                    "Cannot convert %s=%r of type %s to dtype %s"
                    % (name, value, type(value).__name__, dtype.str)
                )
            except ValueError:
                raise InvalidArgument(
                    "Cannot convert %s=%r to type %s" % (name, value, dtype.str)
                )

        elements = elements.map(convert_element)
    assert elements is not None

    return elements, dtype
Пример #27
0
def elements_and_dtype(elements, dtype, source=None):

    if source is None:
        prefix = ""
    else:
        prefix = "%s." % (source,)

    if elements is not None:
        st.check_strategy(elements, "%selements" % (prefix,))
    else:
        with check("dtype is not None"):
            if dtype is None:
                raise InvalidArgument(
                    (
                        "At least one of %(prefix)selements or %(prefix)sdtype "
                        "must be provided."
                    )
                    % {"prefix": prefix}
                )

    with check("is_categorical_dtype"):
        if is_categorical_dtype(dtype):
            raise InvalidArgument(
                "%sdtype is categorical, which is currently unsupported" % (prefix,)
            )

    dtype = try_convert(np.dtype, dtype, "dtype")

    if elements is None:
        elements = npst.from_dtype(dtype)
    elif dtype is not None:

        def convert_element(value):
            name = "draw(%selements)" % (prefix,)
            try:
                return np.array([value], dtype=dtype)[0]
            except TypeError:
                raise InvalidArgument(
                    "Cannot convert %s=%r of type %s to dtype %s"
                    % (name, value, type(value).__name__, dtype.str)
                )
            except ValueError:
                raise InvalidArgument(
                    "Cannot convert %s=%r to type %s" % (name, value, dtype.str)
                )

        elements = elements.map(convert_element)
    assert elements is not None

    return elements, dtype
Пример #28
0
def test_bcast_tuple_of_arrays(args, data):
    """Now testing broadcasting of tuple_of_arrays, kind of crazy since it uses
    gufuncs to test itself. Some awkwardness here since there are a lot of
    corner cases when dealing with object types in the numpy extension.

    For completeness, should probably right a function like this for the other
    functions, but there always just pass dtype, elements, unique to
    `_tuple_of_arrays` anyway, so this should be pretty good.
    """
    shapes, dtype, elements, unique = args

    shapes = shapes.ravel()
    # Need to squeeze out due to weird behaviour of object
    dtype = np.squeeze(dtype, -1)
    elements = np.squeeze(elements, -1)

    elements_shape = max(dtype.shape, elements.shape)
    dtype_ = np.broadcast_to(dtype, elements_shape)
    if elements_shape == ():
        elements = from_dtype(dtype_.item())
    else:
        elements = [from_dtype(dd) for dd in dtype_]

    shapes_shape = max(shapes.shape, dtype.shape, elements_shape, unique.shape)
    shapes = np.broadcast_to(shapes, shapes_shape)

    S = gu._tuple_of_arrays(shapes, dtype, elements=elements, unique=unique)
    X = data.draw(S)

    assert len(shapes) == len(X)
    for spec, drawn in zip(shapes, X):
        assert tuple(spec) == np.shape(drawn)

    for ii, xx in enumerate(X):
        dd = dtype[ii] if dtype.size > 1 else dtype.item()
        uu = unique[ii] if unique.size > 1 else unique.item()
        validate_elements([xx], dtype=dd, unique=uu)
Пример #29
0
def elements_and_dtype(elements, dtype, source=None):

    if source is None:
        prefix = ''
    else:
        prefix = '%s.' % (source,)

    if elements is not None:
        st.check_strategy(elements, '%selements' % (prefix,))
    else:
        with check('dtype is not None'):
            if dtype is None:
                raise InvalidArgument((
                    'At least one of %(prefix)selements or %(prefix)sdtype '
                    'must be provided.') % {'prefix': prefix})

    with check('is_categorical_dtype'):
        if is_categorical_dtype(dtype):
            raise InvalidArgument(
                '%sdtype is categorical, which is currently unsupported' % (
                    prefix,
                ))

    dtype = st.try_convert(np.dtype, dtype, 'dtype')

    if elements is None:
        elements = npst.from_dtype(dtype)
    elif dtype is not None:
        def convert_element(value):
            name = 'draw(%selements)' % (prefix,)
            try:
                return np.array([value], dtype=dtype)[0]
            except TypeError:
                raise InvalidArgument(
                    'Cannot convert %s=%r of type %s to dtype %s' % (
                        name, value, type(value).__name__, dtype.str
                    )
                )
            except ValueError:
                raise InvalidArgument(
                    'Cannot convert %s=%r to type %s' % (
                        name, value, dtype.str,
                    )
                )
        elements = elements.map(convert_element)
    assert elements is not None

    return elements, dtype
Пример #30
0
def elements_and_dtype(elements, dtype, source=None):

    if source is None:
        prefix = ''
    else:
        prefix = '%s.' % (source,)

    if elements is not None:
        st.check_strategy(elements, '%selements' % (prefix,))
    else:
        with check('dtype is not None'):
            if dtype is None:
                raise InvalidArgument((
                    'At least one of %(prefix)selements or %(prefix)sdtype '
                    'must be provided.') % {'prefix': prefix})

    with check('is_categorical_dtype'):
        if is_categorical_dtype(dtype):
            raise InvalidArgument(
                '%sdtype is categorical, which is currently unsupported' % (
                    prefix,
                ))

    dtype = st.try_convert(np.dtype, dtype, 'dtype')

    if elements is None:
        elements = npst.from_dtype(dtype)
    elif dtype is not None:
        def convert_element(value):
            name = 'draw(%selements)' % (prefix,)
            try:
                return np.array([value], dtype=dtype)[0]
            except TypeError:
                raise InvalidArgument(
                    'Cannot convert %s=%r of type %s to dtype %s' % (
                        name, value, type(value).__name__, dtype.str
                    )
                )
            except ValueError:
                raise InvalidArgument(
                    'Cannot convert %s=%r to type %s' % (
                        name, value, dtype.str,
                    )
                )
        elements = elements.map(convert_element)
    assert elements is not None

    return elements, dtype
Пример #31
0
def test_shapes_gufunc_args(parsed_sig_and_size, dtype, unique, data):
    parsed_sig, min_side, max_side = parsed_sig_and_size

    signature = unparse(parsed_sig)

    # We could also test using elements strategy that then requires casting,
    # but that would be kind of complicated to come up with compatible combos
    elements = from_dtype(np.dtype(dtype))

    # Assumes zero broadcast dims by default
    S = gu.gufunc_args(signature, min_side=min_side, max_side=max_side, dtype=dtype, elements=elements, unique=unique)

    X = data.draw(S)
    shapes = [np.shape(xx) for xx in X]

    validate_shapes(shapes, parsed_sig[0], min_side, max_side)
    validate_elements(X, dtype=dtype, unique=unique)
Пример #32
0
def elements_and_dtype(elements, dtype, source=None):

    if source is None:
        prefix = ""
    else:
        prefix = f"{source}."

    if elements is not None:
        check_strategy(elements, f"{prefix}elements")
    else:
        with check("dtype is not None"):
            if dtype is None:
                raise InvalidArgument(
                    f"At least one of {prefix}elements or {prefix}dtype must be provided."
                )

    with check("is_categorical_dtype"):
        if is_categorical_dtype(dtype):
            raise InvalidArgument(
                f"{prefix}dtype is categorical, which is currently unsupported"
            )

    dtype = try_convert(np.dtype, dtype, "dtype")

    if elements is None:
        elements = npst.from_dtype(dtype)
    elif dtype is not None:

        def convert_element(value):
            name = f"draw({prefix}elements)"
            try:
                return np.array([value], dtype=dtype)[0]
            except TypeError:
                raise InvalidArgument(
                    "Cannot convert %s=%r of type %s to dtype %s" %
                    (name, value, type(value).__name__, dtype.str)) from None
            except ValueError:
                raise InvalidArgument(
                    f"Cannot convert {name}={value!r} to type {dtype.str}"
                ) from None

        elements = elements.map(convert_element)
    assert elements is not None

    return elements, dtype
Пример #33
0
def column_strategy(draw):
    name = draw(st.none() | st.text())
    dtype = draw(npst.scalar_dtypes().filter(supported_by_pandas))
    pass_dtype = not draw(st.booleans())
    if pass_dtype:
        pass_elements = not draw(st.booleans())
    else:
        pass_elements = True
    if pass_elements:
        elements = npst.from_dtype(dtype)
    else:
        elements = None

    unique = draw(st.booleans())
    fill = st.nothing() if draw(st.booleans()) else None

    return pdst.column(
        name=name, dtype=dtype, unique=unique, fill=fill, elements=elements)
def column_strategy(draw):
    name = draw(st.none() | st.text())
    dtype = draw(npst.scalar_dtypes().filter(supported_by_pandas))
    pass_dtype = not draw(st.booleans())
    if pass_dtype:
        pass_elements = not draw(st.booleans())
    else:
        pass_elements = True
    if pass_elements:
        elements = npst.from_dtype(dtype)
    else:
        elements = None

    unique = draw(st.booleans())
    fill = st.nothing() if draw(st.booleans()) else None

    return pdst.column(
        name=name, dtype=dtype, unique=unique, fill=fill, elements=elements)
Пример #35
0
def test_check_strategy_continuous(data_type, data):
    """Test built-in check strategies can generate continuous data."""
    np_dtype = strategies.to_numpy_dtype(data_type)
    value = data.draw(
        npst.from_dtype(
            strategies.to_numpy_dtype(data_type),
            allow_nan=False,
            allow_infinity=False,
        ))
    # don't overstep bounds of representation
    hypothesis.assume(np.finfo(np_dtype).min < value < np.finfo(np_dtype).max)

    assert data.draw(strategies.ne_strategy(data_type, value=value)) != value
    assert data.draw(strategies.eq_strategy(data_type, value=value)) == value
    assert (data.draw(strategies.gt_strategy(data_type, min_value=value)) >
            value)
    assert (data.draw(strategies.ge_strategy(data_type, min_value=value)) >=
            value)
    assert (data.draw(strategies.lt_strategy(data_type, max_value=value)) <
            value)
    assert (data.draw(strategies.le_strategy(data_type, max_value=value)) <=
            value)
Пример #36
0
def parse_basic_arrays(s_name, type_dict, strategy_dict):
    """
    Generate Hypothesis strategies for array types.

    :param s_name: Slot name to be parsed.
    :param type_dict: A dictionary which values say if the ROS message type is complex (not basic), which is its parent
                      ROS message module, its type, if it is an array and if so, its size.
    :param strategy_dict: A pointer to a dictionary to be filled with Hypothesis strategies.
    """
    if type_dict['array_size']:
        array_size = int(type_dict['array_size'])
    else:
        array_size = None
    if type_dict['type'] == 'string':
        strategy_dict[s_name] = array(elements=string(),
                                      min_size=array_size,
                                      max_size=array_size)
    else:
        strategy_dict[s_name] = array(elements=npst.from_dtype(
            np.dtype(type_dict['type'])),
                                      min_size=array_size,
                                      max_size=array_size)
Пример #37
0
def test_csr_from_coo(data, nrows, ncols, dtype):
    dtype = np.dtype(dtype)
    n = nrows * ncols
    nnz = data.draw(st.integers(0, int(n * 0.75)))
    _log.debug('testing %d×%d (%d nnz) of type %s', nrows, ncols, nnz, dtype)

    coords = st.integers(0, max(n - 1, 0))
    coords = data.draw(nph.arrays(np.int32, nnz, elements=coords, unique=True))
    rows = np.mod(coords, nrows, dtype=np.int32)
    cols = np.floor_divide(coords, nrows, dtype=np.int32)

    finite = nph.from_dtype(dtype, allow_infinity=False, allow_nan=False)
    vals = data.draw(nph.arrays(dtype, nnz, elements=finite))

    csr = CSR.from_coo(rows, cols, vals, (nrows, ncols))

    rowinds = csr.rowinds()
    assert csr.nrows == nrows
    assert csr.ncols == ncols
    assert csr.nnz == nnz

    for i in range(nrows):
        sp = csr.rowptrs[i]
        ep = csr.rowptrs[i + 1]
        assert ep - sp == np.sum(rows == i)
        points, = np.nonzero(rows == i)
        assert len(points) == ep - sp
        po = np.argsort(cols[points])
        points = points[po]
        assert all(np.sort(csr.colinds[sp:ep]) == cols[points])
        assert all(np.sort(csr.row_cs(i)) == cols[points])
        assert all(csr.values[np.argsort(csr.colinds[sp:ep]) +
                              sp] == vals[points])
        assert all(rowinds[sp:ep] == i)

        row = np.zeros(ncols, dtype)
        row[cols[points]] = vals[points]
        assert all(csr.row(i) == row)
Пример #38
0

def test_does_not_generate_impossible_conditions():
    with pytest.raises(NoExamples):
        pdst.indexes(
            min_size=3, max_size=3, dtype=bool
        ).example()


@given(pdst.indexes(dtype=bool, unique=True))
def test_unique_indexes_of_small_values(ix):
    assert len(ix) <= 2
    assert len(set(ix)) == len(ix)


int64s = npst.from_dtype(np.dtype(int))


@given(int64s, int64s | st.none(), st.data())
def test_arbitrary_range_index(i, j, data):
    if j is not None:
        i, j = sorted((i, j))
    data.draw(pdst.range_indexes(i, j))


@given(pdst.range_indexes())
def test_basic_range_indexes(ix):
    assert isinstance(ix, pandas.RangeIndex)


@given(st.data())
Пример #39
0
class TestArray2String:
    def test_basic(self):
        """Basic test of array2string."""
        a = np.arange(3)
        assert_(np.array2string(a) == '[0 1 2]')
        assert_(np.array2string(a, max_line_width=4, legacy='1.13') == '[0 1\n 2]')
        assert_(np.array2string(a, max_line_width=4) == '[0\n 1\n 2]')

    def test_unexpected_kwarg(self):
        # ensure than an appropriate TypeError
        # is raised when array2string receives
        # an unexpected kwarg

        with assert_raises_regex(TypeError, 'nonsense'):
            np.array2string(np.array([1, 2, 3]),
                            nonsense=None)

    def test_format_function(self):
        """Test custom format function for each element in array."""
        def _format_function(x):
            if np.abs(x) < 1:
                return '.'
            elif np.abs(x) < 2:
                return 'o'
            else:
                return 'O'

        x = np.arange(3)
        x_hex = "[0x0 0x1 0x2]"
        x_oct = "[0o0 0o1 0o2]"
        assert_(np.array2string(x, formatter={'all':_format_function}) ==
                "[. o O]")
        assert_(np.array2string(x, formatter={'int_kind':_format_function}) ==
                "[. o O]")
        assert_(np.array2string(x, formatter={'all':lambda x: "%.4f" % x}) ==
                "[0.0000 1.0000 2.0000]")
        assert_equal(np.array2string(x, formatter={'int':lambda x: hex(x)}),
                x_hex)
        assert_equal(np.array2string(x, formatter={'int':lambda x: oct(x)}),
                x_oct)

        x = np.arange(3.)
        assert_(np.array2string(x, formatter={'float_kind':lambda x: "%.2f" % x}) ==
                "[0.00 1.00 2.00]")
        assert_(np.array2string(x, formatter={'float':lambda x: "%.2f" % x}) ==
                "[0.00 1.00 2.00]")

        s = np.array(['abc', 'def'])
        assert_(np.array2string(s, formatter={'numpystr':lambda s: s*2}) ==
                '[abcabc defdef]')


    def test_structure_format(self):
        dt = np.dtype([('name', np.str_, 16), ('grades', np.float64, (2,))])
        x = np.array([('Sarah', (8.0, 7.0)), ('John', (6.0, 7.0))], dtype=dt)
        assert_equal(np.array2string(x),
                "[('Sarah', [8., 7.]) ('John', [6., 7.])]")

        np.set_printoptions(legacy='1.13')
        try:
            # for issue #5692
            A = np.zeros(shape=10, dtype=[("A", "M8[s]")])
            A[5:].fill(np.datetime64('NaT'))
            assert_equal(
                np.array2string(A),
                textwrap.dedent("""\
                [('1970-01-01T00:00:00',) ('1970-01-01T00:00:00',) ('1970-01-01T00:00:00',)
                 ('1970-01-01T00:00:00',) ('1970-01-01T00:00:00',) ('NaT',) ('NaT',)
                 ('NaT',) ('NaT',) ('NaT',)]""")
            )
        finally:
            np.set_printoptions(legacy=False)

        # same again, but with non-legacy behavior
        assert_equal(
            np.array2string(A),
            textwrap.dedent("""\
            [('1970-01-01T00:00:00',) ('1970-01-01T00:00:00',)
             ('1970-01-01T00:00:00',) ('1970-01-01T00:00:00',)
             ('1970-01-01T00:00:00',) (                'NaT',)
             (                'NaT',) (                'NaT',)
             (                'NaT',) (                'NaT',)]""")
        )

        # and again, with timedeltas
        A = np.full(10, 123456, dtype=[("A", "m8[s]")])
        A[5:].fill(np.datetime64('NaT'))
        assert_equal(
            np.array2string(A),
            textwrap.dedent("""\
            [(123456,) (123456,) (123456,) (123456,) (123456,) ( 'NaT',) ( 'NaT',)
             ( 'NaT',) ( 'NaT',) ( 'NaT',)]""")
        )

        # See #8160
        struct_int = np.array([([1, -1],), ([123, 1],)], dtype=[('B', 'i4', 2)])
        assert_equal(np.array2string(struct_int),
                "[([  1,  -1],) ([123,   1],)]")
        struct_2dint = np.array([([[0, 1], [2, 3]],), ([[12, 0], [0, 0]],)],
                dtype=[('B', 'i4', (2, 2))])
        assert_equal(np.array2string(struct_2dint),
                "[([[ 0,  1], [ 2,  3]],) ([[12,  0], [ 0,  0]],)]")

        # See #8172
        array_scalar = np.array(
                (1., 2.1234567890123456789, 3.), dtype=('f8,f8,f8'))
        assert_equal(np.array2string(array_scalar), "(1., 2.12345679, 3.)")

    def test_unstructured_void_repr(self):
        a = np.array([27, 91, 50, 75,  7, 65, 10,  8,
                      27, 91, 51, 49,109, 82,101,100], dtype='u1').view('V8')
        assert_equal(repr(a[0]), r"void(b'\x1B\x5B\x32\x4B\x07\x41\x0A\x08')")
        assert_equal(str(a[0]), r"b'\x1B\x5B\x32\x4B\x07\x41\x0A\x08'")
        assert_equal(repr(a),
            r"array([b'\x1B\x5B\x32\x4B\x07\x41\x0A\x08'," "\n"
            r"       b'\x1B\x5B\x33\x31\x6D\x52\x65\x64'], dtype='|V8')")

        assert_equal(eval(repr(a), vars(np)), a)
        assert_equal(eval(repr(a[0]), vars(np)), a[0])

    def test_edgeitems_kwarg(self):
        # previously the global print options would be taken over the kwarg
        arr = np.zeros(3, int)
        assert_equal(
            np.array2string(arr, edgeitems=1, threshold=0),
            "[0 ... 0]"
        )

    def test_summarize_1d(self):
        A = np.arange(1001)
        strA = '[   0    1    2 ...  998  999 1000]'
        assert_equal(str(A), strA)

        reprA = 'array([   0,    1,    2, ...,  998,  999, 1000])'
        assert_equal(repr(A), reprA)

    def test_summarize_2d(self):
        A = np.arange(1002).reshape(2, 501)
        strA = '[[   0    1    2 ...  498  499  500]\n' \
               ' [ 501  502  503 ...  999 1000 1001]]'
        assert_equal(str(A), strA)

        reprA = 'array([[   0,    1,    2, ...,  498,  499,  500],\n' \
                '       [ 501,  502,  503, ...,  999, 1000, 1001]])'
        assert_equal(repr(A), reprA)

    def test_linewidth(self):
        a = np.full(6, 1)

        def make_str(a, width, **kw):
            return np.array2string(a, separator="", max_line_width=width, **kw)

        assert_equal(make_str(a, 8, legacy='1.13'), '[111111]')
        assert_equal(make_str(a, 7, legacy='1.13'), '[111111]')
        assert_equal(make_str(a, 5, legacy='1.13'), '[1111\n'
                                                    ' 11]')

        assert_equal(make_str(a, 8), '[111111]')
        assert_equal(make_str(a, 7), '[11111\n'
                                     ' 1]')
        assert_equal(make_str(a, 5), '[111\n'
                                     ' 111]')

        b = a[None,None,:]

        assert_equal(make_str(b, 12, legacy='1.13'), '[[[111111]]]')
        assert_equal(make_str(b,  9, legacy='1.13'), '[[[111111]]]')
        assert_equal(make_str(b,  8, legacy='1.13'), '[[[11111\n'
                                                     '   1]]]')

        assert_equal(make_str(b, 12), '[[[111111]]]')
        assert_equal(make_str(b,  9), '[[[111\n'
                                      '   111]]]')
        assert_equal(make_str(b,  8), '[[[11\n'
                                      '   11\n'
                                      '   11]]]')

    def test_wide_element(self):
        a = np.array(['xxxxx'])
        assert_equal(
            np.array2string(a, max_line_width=5),
            "['xxxxx']"
        )
        assert_equal(
            np.array2string(a, max_line_width=5, legacy='1.13'),
            "[ 'xxxxx']"
        )

    def test_multiline_repr(self):
        class MultiLine:
            def __repr__(self):
                return "Line 1\nLine 2"

        a = np.array([[None, MultiLine()], [MultiLine(), None]])

        assert_equal(
            np.array2string(a),
            '[[None Line 1\n'
            '       Line 2]\n'
            ' [Line 1\n'
            '  Line 2 None]]'
        )
        assert_equal(
            np.array2string(a, max_line_width=5),
            '[[None\n'
            '  Line 1\n'
            '  Line 2]\n'
            ' [Line 1\n'
            '  Line 2\n'
            '  None]]'
        )
        assert_equal(
            repr(a),
            'array([[None, Line 1\n'
            '              Line 2],\n'
            '       [Line 1\n'
            '        Line 2, None]], dtype=object)'
        )

        class MultiLineLong:
            def __repr__(self):
                return "Line 1\nLooooooooooongestLine2\nLongerLine 3"

        a = np.array([[None, MultiLineLong()], [MultiLineLong(), None]])
        assert_equal(
            repr(a),
            'array([[None, Line 1\n'
            '              LooooooooooongestLine2\n'
            '              LongerLine 3          ],\n'
            '       [Line 1\n'
            '        LooooooooooongestLine2\n'
            '        LongerLine 3          , None]], dtype=object)'
        )
        assert_equal(
            np.array_repr(a, 20),
            'array([[None,\n'
            '        Line 1\n'
            '        LooooooooooongestLine2\n'
            '        LongerLine 3          ],\n'
            '       [Line 1\n'
            '        LooooooooooongestLine2\n'
            '        LongerLine 3          ,\n'
            '        None]],\n'
            '      dtype=object)'
        )

    def test_nested_array_repr(self):
        a = np.empty((2, 2), dtype=object)
        a[0, 0] = np.eye(2)
        a[0, 1] = np.eye(3)
        a[1, 0] = None
        a[1, 1] = np.ones((3, 1))
        assert_equal(
            repr(a),
            'array([[array([[1., 0.],\n'
            '               [0., 1.]]), array([[1., 0., 0.],\n'
            '                                  [0., 1., 0.],\n'
            '                                  [0., 0., 1.]])],\n'
            '       [None, array([[1.],\n'
            '                     [1.],\n'
            '                     [1.]])]], dtype=object)'
        )

    @given(hynp.from_dtype(np.dtype("U")))
    def test_any_text(self, text):
        # This test checks that, given any value that can be represented in an
        # array of dtype("U") (i.e. unicode string), ...
        a = np.array([text, text, text])
        # casting a list of them to an array does not e.g. truncate the value
        assert_equal(a[0], text)
        # and that np.array2string puts a newline in the expected location
        expected_repr = "[{0!r} {0!r}\n {0!r}]".format(text)
        result = np.array2string(a, max_line_width=len(repr(text)) * 2 + 3)
        assert_equal(result, expected_repr)

    @pytest.mark.skipif(not HAS_REFCOUNT, reason="Python lacks refcounts")
    def test_refcount(self):
        # make sure we do not hold references to the array due to a recursive
        # closure (gh-10620)
        gc.disable()
        a = np.arange(2)
        r1 = sys.getrefcount(a)
        np.array2string(a)
        np.array2string(a)
        r2 = sys.getrefcount(a)
        gc.collect()
        gc.enable()
        assert_(r1 == r2)
Пример #40
0
def test_strategies_for_standard_dtypes_have_reusable_values(dtype):
    assert nps.from_dtype(dtype).has_reusable_values
Пример #41
0
def test_inferring_from_time_dtypes_gives_same_dtype(data, dtype):
    ex = data.draw(nps.from_dtype(dtype))
    assert dtype == ex.dtype
def test_floor_ceil_lossless(data, dtype):
    # Regression test for issue #1667; ceil converting numpy integers
    # to float and back to int with loss of exact value.
    x = data.draw(from_dtype(dtype))
    assert data.draw(integers(x, x)) == x
Пример #43
0
def test_strategies_for_standard_dtypes_have_reusable_values(dtype):
    assert nps.from_dtype(dtype).has_reusable_values
Пример #44
0
def test_byte_string_dtypes_generate_unicode_strings(data):
    dt = data.draw(nps.byte_string_dtypes())
    result = data.draw(nps.from_dtype(dt))
    assert isinstance(result, binary_type)