def test_eq(): left = ["hello", "world", "strings", "morestrings"] right = ["hel", "world", "string", "morestrings"] x = gr.GrizzlySeries(left) y = gr.GrizzlySeries(right) assert list(x.eq(y).evaluate().values) == [False, True, False, True] assert list(x.ne(y).evaluate().values) == [True, False, True, False]
def test_unsupported_binop_error(): # Test unsupported from weld.grizzly.core.error import GrizzlyError with pytest.raises(GrizzlyError): a = gr.GrizzlySeries([1,2,3]) b = pd.Series([1,2,3]) a.add(b) with pytest.raises(TypeError): a = gr.GrizzlySeries(["hello", "world"]) b = gr.GrizzlySeries(["hello", "world"]) a.divide(b)
def test_unsupported_binop_error(): # Test unsupported from weld.grizzly.core.error import GrizzlyError with pytest.raises(GrizzlyError): a = gr.GrizzlySeries([1, 2, 3]) b = pd.Series([1, 2, 3]) a.add(b)
def _compare_vs_pandas(aggs, data=None): """ Compare the result of aggregations vs. Pandas. Returns the code used to compute the result if the result is a `GrizzlySeries`. """ if data is None: data = list(range(-10, 25)) pandas_result = pd.Series(data).agg(aggs) grizzly_result = gr.GrizzlySeries(data).agg(aggs) if isinstance(pandas_result, pd.Series): assert isinstance(grizzly_result, gr.GrizzlySeries) code = grizzly_result.code grizzly_result = grizzly_result.to_pandas() # Need to reset index since labels in Pandas becoem the aggregation name. # Grizzly doesn't support indices right now. assert pandas_result.reset_index(drop=True).equals(grizzly_result) return code else: assert isinstance(pandas_result, (int, float, np.float64, np.int64)) assert isinstance(grizzly_result, (int, float, np.float64, np.int64)) assert pandas_result == grizzly_result return None return grizzly_result
def _test_binop(grizzly_op, pandas_op, name): """ Test binary operators, ensuring that their output/data type matches Pandas. """ types = ['int8', 'uint8', 'int16', 'uint16', 'int32',\ 'uint32', 'int64', 'uint64', 'float32', 'float64'] for left in types: for right in types: a = gr.GrizzlySeries([1, 2, 3], dtype=left) b = gr.GrizzlySeries([1, 2, 3], dtype=right) result = grizzly_op(a, b).to_pandas() a = pd.Series([1, 2, 3], dtype=left) b = pd.Series([1, 2, 3], dtype=right) expect = pandas_op(a, b) assert result.equals(expect), "{}, {} (op={})".format(left, right, name)
def test_name(): # Test that names propagate after operations. x = gr.GrizzlySeries([1,2,3], name="testname") y = x + x assert y.evaluate().name == "testname" y = x.agg(['sum', 'count']) assert y.evaluate().name == "testname" y = x[:2] assert y.evaluate().name == "testname" y = x[x == 1] assert y.evaluate().name == "testname"
def test_indexing(): # We don't compare with Pandas in these tests because the output # doesn't always match (this is because we don't currently support indexes). x = gr.GrizzlySeries(list(range(100)), dtype='int64') assert x[0] == 0 assert x[50] == 50 assert np.array_equal(x[10:50].evaluate().values, np.arange(10, 50, dtype='int64')) assert np.array_equal(x[:50].evaluate().values, np.arange(50, dtype='int64')) assert np.array_equal(x[x > 50].evaluate().values, np.arange(51, 100, dtype='int64')) assert np.array_equal(x[x == 2].evaluate().values, np.array([2], dtype='int64')) assert np.array_equal(x[x < 0].evaluate().values, np.array([], dtype='int64'))
def test_get(): """ Behavior of get is different in Grizzly -- it currently returns empty strings in cases where Pandas returns NaN. This will be changed in a later patch. """ inp = ["hello", "world", "test", "me", ''] expect = ['l', 'l', 't', '', ''] grizzly_result = gr.GrizzlySeries(inp).str.get(3).str.to_pandas() pandas_result = pd.Series(expect) assert pandas_result.equals(grizzly_result) expect = ['o', 'd', 't', 'e', ''] grizzly_result = gr.GrizzlySeries(inp).str.get(-1).str.to_pandas() pandas_result = pd.Series(expect) assert pandas_result.equals(grizzly_result) expect = ['', '', '', '', ''] grizzly_result = gr.GrizzlySeries(inp).str.get(-50).str.to_pandas() pandas_result = pd.Series(expect) assert pandas_result.equals(grizzly_result)
def compare_vs_pandas(func, strings, *args, **kwargs): pandas_series = pd.Series(strings) grizzly_series = gr.GrizzlySeries(strings) pandas_result = getattr(pandas_series.str, func)(*args, **kwargs) grizzly_result = getattr(grizzly_series.str, func)(*args, **kwargs) if grizzly_result.output_type.elem_type != WeldVec(I8()): grizzly_result = grizzly_result.to_pandas() else: # Perform UTF-8 decoding. grizzly_result = grizzly_result.str.to_pandas() assert pandas_result.equals(grizzly_result)
def test_scalar(): types = ['int8', 'uint8', 'int16', 'uint16', 'int32',\ 'uint32', 'int64', 'uint64', 'float32', 'float64'] for left in types: for right in types: a = gr.GrizzlySeries([1, 2, 3], dtype=left) b = 123 result = (a + b).to_pandas() a = pd.Series([1, 2, 3], dtype=left) expect = a + b assert result.equals(expect), "{}, {} (op={})".format(left, right, "scalar")
def test_replace(): """ Behavior of replace is different in Grizzly -- it currently only replaces the *first* occurrance. This will be changed in a later patch. """ import copy inp = ["abc", "abcdefg", "abcabcabc", "gfedcbaabcabcdef", "", "XYZ"] expect = [s.replace("abc", "XYZ", 1) for s in copy.copy(inp)] grizzly_result = gr.GrizzlySeries(inp).str.replace("abc", "XYZ").str.to_pandas() pandas_result = pd.Series(expect) assert pandas_result.equals(grizzly_result)