Пример #1
0
    def test_object_array_of_0d(self):
        # gh-7864
        assert_raises(ValueError, histogram,
                      [np.array([0.4]) for i in range(10)] + [-np.inf])
        assert_raises(ValueError, histogram,
                      [np.array([0.4]) for i in range(10)] + [np.inf])

        # these should not crash
        np.histogram([np.array([0.5]) for i in range(10)] + [.500000000000001])
        np.histogram([np.array([0.5]) for i in range(10)] + [.5])
Пример #2
0
    def test_density(self):
        # Check that the integral of the density equals 1.
        n = 100
        v = np.random.rand(n)
        a, b = histogram(v, density=True)
        area = np.sum(a * np.diff(b))
        assert_almost_equal(area, 1)

        # Check with non-constant bin widths
        v = np.arange(10)
        bins = [0, 1, 3, 6, 10]
        a, b = histogram(v, bins, density=True)
        assert_array_equal(a, .1)
        assert_equal(np.sum(a * np.diff(b)), 1)

        # Test that passing False works too
        a, b = histogram(v, bins, density=False)
        assert_array_equal(a, [1, 2, 3, 4])

        # Variale bin widths are especially useful to deal with
        # infinities.
        v = np.arange(10)
        bins = [0, 1, 3, 6, np.inf]
        a, b = histogram(v, bins, density=True)
        assert_array_equal(a, [.1, .1, .1, 0.])

        # Taken from a bug report from N. Becker on the numpy-discussion
        # mailing list Aug. 6, 2010.
        counts, dmy = np.histogram([1, 2, 3, 4], [0.5, 1.5, np.inf],
                                   density=True)
        assert_equal(counts, [.25, 0])
Пример #3
0
 def test_unsigned_monotonicity_check(self):
     # Ensures ValueError is raised if bins not increasing monotonically
     # when bins contain unsigned values (see #9222)
     arr = np.array([2])
     bins = np.array([1, 3, 1], dtype='uint64')
     with assert_raises(ValueError):
         hist, edges = np.histogram(arr, bins=bins)
Пример #4
0
    def test_bool_conversion(self):
        # gh-12107
        # Reference integer histogram
        a = np.array([1, 1, 0], dtype=np.uint8)
        int_hist, int_edges = np.histogram(a)

        # Should raise an warning on booleans
        # Ensure that the histograms are equivalent, need to suppress
        # the warnings to get the actual outputs
        with suppress_warnings() as sup:
            rec = sup.record(RuntimeWarning, 'Converting input from .*')
            hist, edges = np.histogram([True, True, False])
            # A warning should be issued
            assert_equal(len(rec), 1)
            assert_array_equal(hist, int_hist)
            assert_array_equal(edges, int_edges)
Пример #5
0
 def test_bin_edge_cases(self):
     # Ensure that floating-point computations correctly place edge cases.
     arr = np.array([337, 404, 739, 806, 1007, 1811, 2012])
     hist, edges = np.histogram(arr, bins=8296, range=(2, 2280))
     mask = hist > 0
     left_edges = edges[:-1][mask]
     right_edges = edges[1:][mask]
     for x, left, right in zip(arr, left_edges, right_edges):
         assert_(x >= left)
         assert_(x < right)
Пример #6
0
    def test_simple(self):
        """
        Straightforward testing with a mixture of linspace data (for
        consistency). All test values have been precomputed and the values
        shouldn't change
        """
        # Some basic sanity checking, with some fixed data.
        # Checking for the correct number of bins
        basic_test = {
            50: {
                'fd': 4,
                'scott': 4,
                'rice': 8,
                'sturges': 7,
                'doane': 8,
                'sqrt': 8,
                'auto': 7
            },
            500: {
                'fd': 8,
                'scott': 8,
                'rice': 16,
                'sturges': 10,
                'doane': 12,
                'sqrt': 23,
                'auto': 10
            },
            5000: {
                'fd': 17,
                'scott': 17,
                'rice': 35,
                'sturges': 14,
                'doane': 17,
                'sqrt': 71,
                'auto': 17
            }
        }

        for testlen, expectedResults in basic_test.items():
            # Create some sort of non uniform data to test with
            # (2 peak uniform mixture)
            x1 = np.linspace(-10, -1, testlen // 5 * 2)
            x2 = np.linspace(1, 10, testlen // 5 * 3)
            x = np.concatenate((x1, x2))
            for estimator, numbins in expectedResults.items():
                a, b = np.histogram(x, estimator)
                assert_equal(len(a),
                             numbins,
                             err_msg="For the {0} estimator "
                             "with datasize of {1}".format(estimator, testlen))
Пример #7
0
    def do_precision_upper_bound(self, float_small, float_large):
        eps = np.finfo(float_large).eps

        arr = np.array([1.0], float_small)
        range = np.array([0.0, 1.0 - eps], float_large)

        # test is looking for behavior when the bounds change between dtypes
        if range.astype(float_small)[-1] != 1:
            return

        # previously crashed
        count, x_loc = np.histogram(arr, bins=1, range=range)
        assert_equal(count, [1])

        # gh-10322 means that the type comes from arr - this may change
        assert_equal(x_loc.dtype, float_small)
Пример #8
0
    def test_simple_range(self):
        """
        Straightforward testing with a mixture of linspace data (for
        consistency). Adding in a 3rd mixture that will then be
        completely ignored. All test values have been precomputed and
        the shouldn't change.
        """
        # some basic sanity checking, with some fixed data.
        # Checking for the correct number of bins
        basic_test = {
            50: {
                'fd': 8,
                'scott': 8,
                'rice': 15,
                'sturges': 14,
                'auto': 14
            },
            500: {
                'fd': 15,
                'scott': 16,
                'rice': 32,
                'sturges': 20,
                'auto': 20
            },
            5000: {
                'fd': 33,
                'scott': 33,
                'rice': 69,
                'sturges': 27,
                'auto': 33
            }
        }

        for testlen, expectedResults in basic_test.items():
            # create some sort of non uniform data to test with
            # (3 peak uniform mixture)
            x1 = np.linspace(-10, -1, testlen // 5 * 2)
            x2 = np.linspace(1, 10, testlen // 5 * 3)
            x3 = np.linspace(-100, -50, testlen)
            x = np.hstack((x1, x2, x3))
            for estimator, numbins in expectedResults.items():
                a, b = np.histogram(x, estimator, range=(-20, 20))
                msg = "For the {0} estimator".format(estimator)
                msg += " with datasize of {0}".format(testlen)
                assert_equal(len(a), numbins, err_msg=msg)
Пример #9
0
    def test_outlier(self):
        """
        Check the FD, Scott and Doane with outliers.

        The FD estimates a smaller binwidth since it's less affected by
        outliers. Since the range is so (artificially) large, this means more
        bins, most of which will be empty, but the data of interest usually is
        unaffected. The Scott estimator is more affected and returns fewer bins,
        despite most of the variance being in one area of the data. The Doane
        estimator lies somewhere between the other two.
        """
        xcenter = np.linspace(-10, 10, 50)
        outlier_dataset = np.hstack((np.linspace(-110, -100, 5), xcenter))

        outlier_resultdict = {'fd': 21, 'scott': 5, 'doane': 11}

        for estimator, numbins in outlier_resultdict.items():
            a, b = np.histogram(outlier_dataset, estimator)
            assert_equal(len(a), numbins)
Пример #10
0
    def test_small(self):
        """
        Smaller datasets have the potential to cause issues with the data
        adaptive methods, especially the FD method. All bin numbers have been
        precalculated.
        """
        small_dat = {
            1: {
                'fd': 1,
                'scott': 1,
                'rice': 1,
                'sturges': 1,
                'doane': 1,
                'sqrt': 1
            },
            2: {
                'fd': 2,
                'scott': 1,
                'rice': 3,
                'sturges': 2,
                'doane': 1,
                'sqrt': 2
            },
            3: {
                'fd': 2,
                'scott': 2,
                'rice': 3,
                'sturges': 3,
                'doane': 3,
                'sqrt': 2
            }
        }

        for testlen, expectedResults in small_dat.items():
            testdat = np.arange(testlen)
            for estimator, expbins in expectedResults.items():
                a, b = np.histogram(testdat, estimator)
                assert_equal(len(a),
                             expbins,
                             err_msg="For the {0} estimator "
                             "with datasize of {1}".format(estimator, testlen))
Пример #11
0
    def test_novariance(self):
        """
        Check that methods handle no variance in data
        Primarily for Scott and FD as the SD and IQR are both 0 in this case
        """
        novar_dataset = np.ones(100)
        novar_resultdict = {
            'fd': 1,
            'scott': 1,
            'rice': 1,
            'sturges': 1,
            'doane': 1,
            'sqrt': 1,
            'auto': 1
        }

        for estimator, numbins in novar_resultdict.items():
            a, b = np.histogram(novar_dataset, estimator)
            assert_equal(len(a),
                         numbins,
                         err_msg="{0} estimator, "
                         "No Variance test".format(estimator))
Пример #12
0
 def test_last_bin_inclusive_range(self):
     arr = np.array([0., 0., 0., 1., 2., 3., 3., 4., 5.])
     hist, edges = np.histogram(arr, bins=30, range=(-0.5, 5))
     assert_equal(hist[-1], 1)
Пример #13
0
 def test_no_side_effects(self):
     # This is a regression test that ensures that values passed to
     # ``histogram`` are unchanged.
     values = np.array([1.3, 2.5, 2.3])
     np.histogram(values, range=[-10, 10], bins=100)
     assert_array_almost_equal(values, [1.3, 2.5, 2.3])