def test_input_wrong_x_y(): """Test the down sampling with wrong input types for x or/and y""" x = 1 y = np.array([True] * ARRAY_SIZE, dtype=np.bool) with pytest.raises(TypeError): lttbc.downsample(x, y, THRESHOLD) x = np.array([True] * ARRAY_SIZE, dtype=np.bool) y = 4 with pytest.raises(TypeError): lttbc.downsample(x, y, THRESHOLD) x = "wrong" y = np.array([True] * ARRAY_SIZE, dtype=np.bool) with pytest.raises(TypeError): lttbc.downsample(x, y, THRESHOLD) x = np.array([True] * ARRAY_SIZE, dtype=np.bool) y = "wrong" with pytest.raises(TypeError): lttbc.downsample(x, y, THRESHOLD) x = 1 y = "wrong" with pytest.raises(TypeError): lttbc.downsample(x, y, THRESHOLD)
def test_single_dimension_validation(): """Test that the downsample algorithm rejects arrays with multiple dims""" x = np.array([[0., 0.], [1., 0.8], [0.9, 0.8], [0.9, 0.7], [0.9, 0.6], [0.8, 0.5], [0.8, 0.5], [0.7, 0.5], [0.1, 0.], [0., 0.]], dtype=np.double) assert x.shape == (10, 2) assert x.ndim == 2 y = np.array([True] * ARRAY_SIZE, dtype=np.bool) with pytest.raises(ValueError): lttbc.downsample(x, y, THRESHOLD)
def test_single_inf(): """Test single 'inf' input for down sampling XXX: Apparently infinite values provide a crappy result... """ x = np.arange(20, dtype='int32') y = np.array([ 0.0, 1.0, 2.0, np.inf, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0 ], dtype=np.float) assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2 nx, ny = lttbc.downsample(x, y, 10) assert len(nx) == 10 assert len(ny) == 10 assert nx.dtype == np.double assert ny.dtype == np.double assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2 assert sys.getrefcount(nx) == 2 assert sys.getrefcount(ny) == 2 test_array = np.array( [0., 1., np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, 19.], dtype=np.float) np.testing.assert_array_almost_equal(ny, test_array)
def test_array_size(): """Test the input failure for different dimensions of arrays""" x = np.arange(ARRAY_SIZE) y = np.random.randint(1000, size=ARRAY_SIZE - 1, dtype='uint64') assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2 with pytest.raises(ValueError): assert lttbc.downsample(x, y, ARRAY_SIZE) assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2
def test_downsample_uint64(): """Test the base down sampling of the module""" x = np.arange(ARRAY_SIZE, dtype='int32') y = np.random.randint(1000, size=ARRAY_SIZE, dtype='uint64') assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2 nx, ny = lttbc.downsample(x, y, THRESHOLD) assert len(nx) == THRESHOLD assert len(ny) == THRESHOLD assert nx.dtype == np.double assert ny.dtype == np.double assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2 assert sys.getrefcount(nx) == 2 assert sys.getrefcount(ny) == 2
def test_negative_threshold(): """Test if a negative threshold provides problems""" x = np.arange(ARRAY_SIZE, dtype='int32') y = np.random.randint(1000, size=ARRAY_SIZE, dtype='uint64') assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2 nx, ny = lttbc.downsample(x, y, -THRESHOLD) assert len(nx) == ARRAY_SIZE assert len(ny) == ARRAY_SIZE assert nx.dtype == np.double assert ny.dtype == np.double assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2 assert sys.getrefcount(nx) == 2 assert sys.getrefcount(ny) == 2 np.testing.assert_array_almost_equal(ny, y)
def test_nan(): """Test the down sampling with NaN types""" x = np.arange(ARRAY_SIZE, dtype='int32') y = np.array([np.nan] * ARRAY_SIZE, dtype=np.float) assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2 nx, ny = lttbc.downsample(x, y, THRESHOLD) assert len(nx) == THRESHOLD assert len(ny) == THRESHOLD assert nx.dtype == np.double assert ny.dtype == np.double assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2 assert sys.getrefcount(nx) == 2 assert sys.getrefcount(ny) == 2 test_array = np.array([0.0] * THRESHOLD, dtype=np.float) np.testing.assert_array_almost_equal(ny, test_array)
def test_input_list(): """Test the down sampling with lists types""" x = [value for value in range(ARRAY_SIZE)] y = [True] * ARRAY_SIZE assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2 nx, ny = lttbc.downsample(x, y, THRESHOLD) assert len(nx) == THRESHOLD assert len(ny) == THRESHOLD assert nx.dtype == np.double assert ny.dtype == np.double assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2 assert sys.getrefcount(nx) == 2 assert sys.getrefcount(ny) == 2 test_array = np.array([1.0] * THRESHOLD, dtype=np.float) test_array_bool = np.array([1.0] * THRESHOLD, dtype=np.bool) np.testing.assert_array_almost_equal(ny, test_array) np.testing.assert_array_almost_equal(ny, test_array_bool)
def test_downsample_bool(): """Test the down sampling with boolean types""" x = np.arange(ARRAY_SIZE, dtype='int32') y = np.array([True] * ARRAY_SIZE, dtype=np.bool) assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2 nx, ny = lttbc.downsample(x, y, THRESHOLD) assert len(nx) == THRESHOLD assert len(ny) == THRESHOLD assert nx.dtype == np.double assert ny.dtype == np.double assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2 assert sys.getrefcount(nx) == 2 assert sys.getrefcount(ny) == 2 test_array = np.array([1.0] * THRESHOLD, dtype=np.float) test_array_bool = np.array([1.0] * THRESHOLD, dtype=np.bool) np.testing.assert_array_almost_equal(ny, test_array) np.testing.assert_array_almost_equal(ny, test_array_bool)
def test_threshold_larger(): """Test if a larger threshold provides problems""" x = np.arange(ARRAY_SIZE, dtype='int32') y = np.random.randint(1000, size=ARRAY_SIZE, dtype='uint64') assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2 # Will return the arrays! nx, ny = lttbc.downsample(x, y, ARRAY_SIZE + 1) assert len(nx) == ARRAY_SIZE assert len(ny) == ARRAY_SIZE assert nx.dtype == np.double assert ny.dtype == np.double assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2 assert sys.getrefcount(nx) == 2 assert sys.getrefcount(ny) == 2 # NOTE: Known feature, we return double arrays ... np.testing.assert_array_almost_equal(nx, x) np.testing.assert_array_almost_equal(ny, y)
def test_single_nan(): """Test single 'nan' input for down sampling""" x = np.arange(20, dtype='int32') y = np.array([ 0.0, 1.0, 2.0, np.nan, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0 ], dtype=np.float) assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2 nx, ny = lttbc.downsample(x, y, 10) assert len(nx) == 10 assert len(ny) == 10 assert nx.dtype == np.double assert ny.dtype == np.double assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2 assert sys.getrefcount(nx) == 2 assert sys.getrefcount(ny) == 2 test_array = np.array([0., 0., 4., 5., 7., 10., 12., 14., 16., 19.], dtype=np.float) np.testing.assert_array_almost_equal(ny, test_array)
def lttb_downsample(x: np.ndarray, y: np.ndarray, max_pts: int = 5_000) -> Tuple[np.ndarray, np.ndarray]: """ Downsample x, y for visualisation Parameters ---------- x : np.ndarray x array y : np.ndarray y array max_pts : int, optional Maximum number of points after downsampling, by default 5000 Returns ------- Tuple[np.ndarray, np.ndarray] (new_x, new_y), the downsampled x and y arrays Raises ------ ValueError If the size of x does not match the size of y """ if x.size != y.size: raise ValueError(f"x size {x.size} must equal y size {y.size}") if max_pts >= x.size: return x, y x_dtype = x.dtype y_dtype = y.dtype nx, ny = lttbc.downsample( x.astype(np.float32), y.astype(np.float32), max_pts, ) return nx.astype(x_dtype), ny.astype(y_dtype)
def test_array_mix_inf_nan(): """Test mix of problematic input 'inf' and 'nan'""" x = np.arange(20, dtype='int32') y = np.array([ 0.0, 1.0, 2.0, np.nan, 4.0, 5.0, 6.0, np.nan, np.inf, np.inf, 10.0, np.nan, 12.0, -np.inf, 14.0, 15.0, 16.0, 17.0, np.nan, 19.0 ], dtype=np.float) assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2 nx, ny = lttbc.downsample(x, y, 10) assert len(nx) == 10 assert len(ny) == 10 assert nx.dtype == np.double assert ny.dtype == np.double assert sys.getrefcount(x) == 2 assert sys.getrefcount(y) == 2 assert sys.getrefcount(nx) == 2 assert sys.getrefcount(ny) == 2 test_array = np.array( [0., 0., 4., 4., 4., 10., -np.inf, -np.inf, -np.inf, 19.], dtype=np.float) np.testing.assert_array_almost_equal(ny, test_array)
def sample(): nx, ny = lttbc.downsample(x, y, LARGE_THRESHOLD) return nx, ny