def test_rank_apply(): lev1 = tm.rands_array(10, 100) lev2 = tm.rands_array(10, 130) lab1 = np.random.randint(0, 100, size=500) lab2 = np.random.randint(0, 130, size=500) df = DataFrame( { "value": np.random.randn(500), "key1": lev1.take(lab1), "key2": lev2.take(lab2), } ) result = df.groupby(["key1", "key2"]).value.rank() expected = [piece.value.rank() for key, piece in df.groupby(["key1", "key2"])] expected = concat(expected, axis=0) expected = expected.reindex(result.index) tm.assert_series_equal(result, expected) result = df.groupby(["key1", "key2"]).value.rank(pct=True) expected = [ piece.value.rank(pct=True) for key, piece in df.groupby(["key1", "key2"]) ] expected = concat(expected, axis=0) expected = expected.reindex(result.index) tm.assert_series_equal(result, expected)
def test_getitem_negative_out_of_bounds(): s = Series(tm.rands_array(5, 10), index=tm.rands_array(10, 10)) msg = "index -11 is out of bounds for axis 0 with size 10" with pytest.raises(IndexError, match=msg): s[-11] with pytest.raises(IndexError, match=msg): s[-11] = "foo"
def test_long_strings(setup_path): # GH6166 df = DataFrame({"a": tm.rands_array(100, size=10)}, index=tm.rands_array(100, size=10)) with ensure_clean_store(setup_path) as store: store.append("df", df, data_columns=["a"]) result = store.select("df") tm.assert_frame_equal(df, result)
def test_series_frame_radd_bug(self, fixed_now_ts): # GH#353 vals = Series(tm.rands_array(5, 10)) result = "foo_" + vals expected = vals.map(lambda x: "foo_" + x) tm.assert_series_equal(result, expected) frame = pd.DataFrame({"vals": vals}) result = "foo_" + frame expected = pd.DataFrame({"vals": vals.map(lambda x: "foo_" + x)}) tm.assert_frame_equal(result, expected) ts = tm.makeTimeSeries() ts.name = "ts" # really raise this time fix_now = fixed_now_ts.to_pydatetime() msg = "|".join([ "unsupported operand type", # wrong error message, see https://github.com/numpy/numpy/issues/18832 "Concatenation operation", ]) with pytest.raises(TypeError, match=msg): fix_now + ts with pytest.raises(TypeError, match=msg): ts + fix_now
def test_compress_group_combinations(self): # ~ 40000000 possible unique groups key1 = tm.rands_array(10, 10000) key1 = np.tile(key1, 2) key2 = key1[::-1] df = DataFrame({"key1": key1, "key2": key2, "value1": np.random.randn(20000)}) df2 = DataFrame( {"key1": key1[::2], "key2": key2[::2], "value2": np.random.randn(10000)} ) # just to hit the label compression code path merge(df, df2, how="outer")
def test_series_frame_radd_bug(self): # GH#353 vals = pd.Series(tm.rands_array(5, 10)) result = "foo_" + vals expected = vals.map(lambda x: "foo_" + x) tm.assert_series_equal(result, expected) frame = pd.DataFrame({"vals": vals}) result = "foo_" + frame expected = pd.DataFrame({"vals": vals.map(lambda x: "foo_" + x)}) tm.assert_frame_equal(result, expected) ts = tm.makeTimeSeries() ts.name = "ts" # really raise this time now = pd.Timestamp.now().to_pydatetime() with pytest.raises(TypeError): now + ts with pytest.raises(TypeError): ts + now
def test_same_len_hash_collisions(l_exp, l_add): length = 2**(l_exp + 8) + l_add s = tm.rands_array(length, 2) result = hash_array(s, "utf8") assert not result[0] == result[1]
def test_very_wide_info_repr(self): df = DataFrame(np.random.randn(10, 20), columns=tm.rands_array(10, 20)) repr(df)
def test_rands_array_2d(): arr = tm.rands_array(7, size=(10, 10)) assert arr.shape == (10, 10) assert len(arr[1, 1]) == 7
def test_rands_array_1d(): arr = tm.rands_array(5, size=10) assert arr.shape == (10, ) assert len(arr[0]) == 5