def check_str_query_method(self, parser, engine): tm.skip_if_no_ne(engine) df = DataFrame(randn(10, 1), columns=['b']) df['strings'] = Series(list('aabbccddee')) expect = df[df.strings == 'a'] if parser != 'pandas': col = 'strings' lst = '"a"' lhs = [col] * 2 + [lst] * 2 rhs = lhs[::-1] eq, ne = '==', '!=' ops = 2 * ([eq] + [ne]) for lhs, op, rhs in zip(lhs, ops, rhs): ex = '{lhs} {op} {rhs}'.format(lhs=lhs, op=op, rhs=rhs) assertRaises(NotImplementedError, df.query, ex, engine=engine, parser=parser, local_dict={'strings': df.strings}) else: res = df.query('"a" == strings', engine=engine, parser=parser) assert_frame_equal(res, expect) res = df.query('strings == "a"', engine=engine, parser=parser) assert_frame_equal(res, expect) assert_frame_equal(res, df[df.strings.isin(['a'])]) expect = df[df.strings != 'a'] res = df.query('strings != "a"', engine=engine, parser=parser) assert_frame_equal(res, expect) res = df.query('"a" != strings', engine=engine, parser=parser) assert_frame_equal(res, expect) assert_frame_equal(res, df[~df.strings.isin(['a'])])
def test_query_with_partially_named_multiindex(self, parser, engine): tm.skip_if_no_ne(engine) skip_if_no_pandas_parser(parser) a = np.random.choice(['red', 'green'], size=10) b = np.arange(10) index = MultiIndex.from_arrays([a, b]) index.names = [None, 'rating'] df = DataFrame(randn(10, 2), index=index) res = df.query('rating == 1', parser=parser, engine=engine) ind = Series(df.index.get_level_values('rating').values, index=index, name='rating') exp = df[ind == 1] assert_frame_equal(res, exp) res = df.query('rating != 1', parser=parser, engine=engine) ind = Series(df.index.get_level_values('rating').values, index=index, name='rating') exp = df[ind != 1] assert_frame_equal(res, exp) res = df.query('ilevel_0 == "red"', parser=parser, engine=engine) ind = Series(df.index.get_level_values(0).values, index=index) exp = df[ind == "red"] assert_frame_equal(res, exp) res = df.query('ilevel_0 != "red"', parser=parser, engine=engine) ind = Series(df.index.get_level_values(0).values, index=index) exp = df[ind != "red"] assert_frame_equal(res, exp)
def check_query_with_partially_named_multiindex(self, parser, engine): tm.skip_if_no_ne(engine) a = np.random.choice(['red', 'green'], size=10) b = np.arange(10) index = MultiIndex.from_arrays([a, b]) index.names = [None, 'rating'] df = DataFrame(randn(10, 2), index=index) res = df.query('rating == 1', parser=parser, engine=engine) ind = Series(df.index.get_level_values('rating').values, index=index, name='rating') exp = df[ind == 1] assert_frame_equal(res, exp) res = df.query('rating != 1', parser=parser, engine=engine) ind = Series(df.index.get_level_values('rating').values, index=index, name='rating') exp = df[ind != 1] assert_frame_equal(res, exp) res = df.query('ilevel_0 == "red"', parser=parser, engine=engine) ind = Series(df.index.get_level_values(0).values, index=index) exp = df[ind == "red"] assert_frame_equal(res, exp) res = df.query('ilevel_0 != "red"', parser=parser, engine=engine) ind = Series(df.index.get_level_values(0).values, index=index) exp = df[ind != "red"] assert_frame_equal(res, exp)
def test_query_with_nested_strings(self, parser, engine): tm.skip_if_no_ne(engine) skip_if_no_pandas_parser(parser) raw = """id event timestamp 1 "page 1 load" 1/1/2014 0:00:01 1 "page 1 exit" 1/1/2014 0:00:31 2 "page 2 load" 1/1/2014 0:01:01 2 "page 2 exit" 1/1/2014 0:01:31 3 "page 3 load" 1/1/2014 0:02:01 3 "page 3 exit" 1/1/2014 0:02:31 4 "page 1 load" 2/1/2014 1:00:01 4 "page 1 exit" 2/1/2014 1:00:31 5 "page 2 load" 2/1/2014 1:01:01 5 "page 2 exit" 2/1/2014 1:01:31 6 "page 3 load" 2/1/2014 1:02:01 6 "page 3 exit" 2/1/2014 1:02:31 """ df = pd.read_csv(StringIO(raw), sep=r'\s{2,}', engine='python', parse_dates=['timestamp']) expected = df[df.event == '"page 1 load"'] res = df.query("""'"page 1 load"' in event""", parser=parser, engine=engine) assert_frame_equal(expected, res)
def test_query_with_nested_special_character(self, parser, engine): skip_if_no_pandas_parser(parser) tm.skip_if_no_ne(engine) df = DataFrame({'a': ['a', 'b', 'test & test'], 'b': [1, 2, 3]}) res = df.query('a == "test & test"', parser=parser, engine=engine) expec = df[df.a == 'test & test'] assert_frame_equal(res, expec)
def check_query_string_scalar_variable(self, parser, engine): tm.skip_if_no_ne(engine) df = pd.DataFrame({'Symbol': ['BUD US', 'BUD US', 'IBM US', 'IBM US'], 'Price': [109.70, 109.72, 183.30, 183.35]}) e = df[df.Symbol == 'BUD US'] symb = 'BUD US' # noqa r = df.query('Symbol == @symb', parser=parser, engine=engine) assert_frame_equal(e, r)
def check_query_single_element_booleans(self, parser, engine): tm.skip_if_no_ne(engine) columns = 'bid', 'bidsize', 'ask', 'asksize' data = np.random.randint(2, size=(1, len(columns))).astype(bool) df = DataFrame(data, columns=columns) res = df.query('bid & ask', engine=engine, parser=parser) expected = df[df.bid & df.ask] assert_frame_equal(res, expected)
def check_query_with_nested_special_character(self, parser, engine): skip_if_no_pandas_parser(parser) tm.skip_if_no_ne(engine) df = DataFrame({'a': ['a', 'b', 'test & test'], 'b': [1, 2, 3]}) res = df.query('a == "test & test"', parser=parser, engine=engine) expec = df[df.a == 'test & test'] assert_frame_equal(res, expec)
def test_query_single_element_booleans(self, parser, engine): tm.skip_if_no_ne(engine) columns = 'bid', 'bidsize', 'ask', 'asksize' data = np.random.randint(2, size=(1, len(columns))).astype(bool) df = DataFrame(data, columns=columns) res = df.query('bid & ask', engine=engine, parser=parser) expected = df[df.bid & df.ask] assert_frame_equal(res, expected)
def check_object_array_eq_ne(self, parser, engine): tm.skip_if_no_ne(engine) df = DataFrame({'a': list('aaaabbbbcccc'), 'b': list('aabbccddeeff'), 'c': np.random.randint(5, size=12), 'd': np.random.randint(9, size=12)}) res = df.query('a == b', parser=parser, engine=engine) exp = df[df.a == df.b] assert_frame_equal(res, exp) res = df.query('a != b', parser=parser, engine=engine) exp = df[df.a != df.b] assert_frame_equal(res, exp)
def check_query_lex_compare_strings(self, parser, engine): tm.skip_if_no_ne(engine=engine) import operator as opr a = Series(np.random.choice(list('abcde'), 20)) b = Series(np.arange(a.size)) df = DataFrame({'X': a, 'Y': b}) ops = {'<': opr.lt, '>': opr.gt, '<=': opr.le, '>=': opr.ge} for op, func in ops.items(): res = df.query('X %s "d"' % op, engine=engine, parser=parser) expected = df[func(df.X, 'd')] assert_frame_equal(res, expected)
def test_query_lex_compare_strings(self, parser, engine): tm.skip_if_no_ne(engine=engine) import operator as opr a = Series(np.random.choice(list('abcde'), 20)) b = Series(np.arange(a.size)) df = DataFrame({'X': a, 'Y': b}) ops = {'<': opr.lt, '>': opr.gt, '<=': opr.le, '>=': opr.ge} for op, func in ops.items(): res = df.query('X %s "d"' % op, engine=engine, parser=parser) expected = df[func(df.X, 'd')] assert_frame_equal(res, expected)
def test_query_with_named_multiindex(self, parser, engine): tm.skip_if_no_ne(engine) skip_if_no_pandas_parser(parser) a = np.random.choice(['red', 'green'], size=10) b = np.random.choice(['eggs', 'ham'], size=10) index = MultiIndex.from_arrays([a, b], names=['color', 'food']) df = DataFrame(randn(10, 2), index=index) ind = Series(df.index.get_level_values('color').values, index=index, name='color') # equality res1 = df.query('color == "red"', parser=parser, engine=engine) res2 = df.query('"red" == color', parser=parser, engine=engine) exp = df[ind == 'red'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # inequality res1 = df.query('color != "red"', parser=parser, engine=engine) res2 = df.query('"red" != color', parser=parser, engine=engine) exp = df[ind != 'red'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # list equality (really just set membership) res1 = df.query('color == ["red"]', parser=parser, engine=engine) res2 = df.query('["red"] == color', parser=parser, engine=engine) exp = df[ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('color != ["red"]', parser=parser, engine=engine) res2 = df.query('["red"] != color', parser=parser, engine=engine) exp = df[~ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # in/not in ops res1 = df.query('["red"] in color', parser=parser, engine=engine) res2 = df.query('"red" in color', parser=parser, engine=engine) exp = df[ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('["red"] not in color', parser=parser, engine=engine) res2 = df.query('"red" not in color', parser=parser, engine=engine) exp = df[~ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp)
def test_str_list_query_method(self, parser, engine): tm.skip_if_no_ne(engine) df = DataFrame(randn(10, 1), columns=['b']) df['strings'] = Series(list('aabbccddee')) expect = df[df.strings.isin(['a', 'b'])] if parser != 'pandas': col = 'strings' lst = '["a", "b"]' lhs = [col] * 2 + [lst] * 2 rhs = lhs[::-1] eq, ne = '==', '!=' ops = 2 * ([eq] + [ne]) for lhs, op, rhs in zip(lhs, ops, rhs): ex = '{lhs} {op} {rhs}'.format(lhs=lhs, op=op, rhs=rhs) with pytest.raises(NotImplementedError): df.query(ex, engine=engine, parser=parser) else: res = df.query('strings == ["a", "b"]', engine=engine, parser=parser) assert_frame_equal(res, expect) res = df.query('["a", "b"] == strings', engine=engine, parser=parser) assert_frame_equal(res, expect) expect = df[~df.strings.isin(['a', 'b'])] res = df.query('strings != ["a", "b"]', engine=engine, parser=parser) assert_frame_equal(res, expect) res = df.query('["a", "b"] != strings', engine=engine, parser=parser) assert_frame_equal(res, expect)
def check_query_with_string_columns(self, parser, engine): tm.skip_if_no_ne(engine) df = DataFrame({'a': list('aaaabbbbcccc'), 'b': list('aabbccddeeff'), 'c': np.random.randint(5, size=12), 'd': np.random.randint(9, size=12)}) if parser == 'pandas': res = df.query('a in b', parser=parser, engine=engine) expec = df[df.a.isin(df.b)] assert_frame_equal(res, expec) res = df.query('a in b and c < d', parser=parser, engine=engine) expec = df[df.a.isin(df.b) & (df.c < df.d)] assert_frame_equal(res, expec) else: with assertRaises(NotImplementedError): df.query('a in b', parser=parser, engine=engine) with assertRaises(NotImplementedError): df.query('a in b and c < d', parser=parser, engine=engine)
def check_query_with_nested_strings(self, parser, engine): tm.skip_if_no_ne(engine) skip_if_no_pandas_parser(parser) raw = """id event timestamp 1 "page 1 load" 1/1/2014 0:00:01 1 "page 1 exit" 1/1/2014 0:00:31 2 "page 2 load" 1/1/2014 0:01:01 2 "page 2 exit" 1/1/2014 0:01:31 3 "page 3 load" 1/1/2014 0:02:01 3 "page 3 exit" 1/1/2014 0:02:31 4 "page 1 load" 2/1/2014 1:00:01 4 "page 1 exit" 2/1/2014 1:00:31 5 "page 2 load" 2/1/2014 1:01:01 5 "page 2 exit" 2/1/2014 1:01:31 6 "page 3 load" 2/1/2014 1:02:01 6 "page 3 exit" 2/1/2014 1:02:31 """ df = pd.read_csv(StringIO(raw), sep=r'\s{2,}', engine='python', parse_dates=['timestamp']) expected = df[df.event == '"page 1 load"'] res = df.query("""'"page 1 load"' in event""", parser=parser, engine=engine) assert_frame_equal(expected, res)
def check_raise_on_panel4d_with_multiindex(self, parser, engine): tm.skip_if_no_ne() p4d = tm.makePanel4D(7) p4d.items = tm.makeCustomIndex(len(p4d.items), nlevels=2) with tm.assertRaises(NotImplementedError): pd.eval('p4d + 1', parser=parser, engine=engine)
def test_query_with_unnamed_multiindex(self, parser, engine): tm.skip_if_no_ne(engine) skip_if_no_pandas_parser(parser) a = np.random.choice(['red', 'green'], size=10) b = np.random.choice(['eggs', 'ham'], size=10) index = MultiIndex.from_arrays([a, b]) df = DataFrame(randn(10, 2), index=index) ind = Series(df.index.get_level_values(0).values, index=index) res1 = df.query('ilevel_0 == "red"', parser=parser, engine=engine) res2 = df.query('"red" == ilevel_0', parser=parser, engine=engine) exp = df[ind == 'red'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # inequality res1 = df.query('ilevel_0 != "red"', parser=parser, engine=engine) res2 = df.query('"red" != ilevel_0', parser=parser, engine=engine) exp = df[ind != 'red'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # list equality (really just set membership) res1 = df.query('ilevel_0 == ["red"]', parser=parser, engine=engine) res2 = df.query('["red"] == ilevel_0', parser=parser, engine=engine) exp = df[ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('ilevel_0 != ["red"]', parser=parser, engine=engine) res2 = df.query('["red"] != ilevel_0', parser=parser, engine=engine) exp = df[~ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # in/not in ops res1 = df.query('["red"] in ilevel_0', parser=parser, engine=engine) res2 = df.query('"red" in ilevel_0', parser=parser, engine=engine) exp = df[ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('["red"] not in ilevel_0', parser=parser, engine=engine) res2 = df.query('"red" not in ilevel_0', parser=parser, engine=engine) exp = df[~ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # ## LEVEL 1 ind = Series(df.index.get_level_values(1).values, index=index) res1 = df.query('ilevel_1 == "eggs"', parser=parser, engine=engine) res2 = df.query('"eggs" == ilevel_1', parser=parser, engine=engine) exp = df[ind == 'eggs'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # inequality res1 = df.query('ilevel_1 != "eggs"', parser=parser, engine=engine) res2 = df.query('"eggs" != ilevel_1', parser=parser, engine=engine) exp = df[ind != 'eggs'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # list equality (really just set membership) res1 = df.query('ilevel_1 == ["eggs"]', parser=parser, engine=engine) res2 = df.query('["eggs"] == ilevel_1', parser=parser, engine=engine) exp = df[ind.isin(['eggs'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('ilevel_1 != ["eggs"]', parser=parser, engine=engine) res2 = df.query('["eggs"] != ilevel_1', parser=parser, engine=engine) exp = df[~ind.isin(['eggs'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # in/not in ops res1 = df.query('["eggs"] in ilevel_1', parser=parser, engine=engine) res2 = df.query('"eggs" in ilevel_1', parser=parser, engine=engine) exp = df[ind.isin(['eggs'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('["eggs"] not in ilevel_1', parser=parser, engine=engine) res2 = df.query('"eggs" not in ilevel_1', parser=parser, engine=engine) exp = df[~ind.isin(['eggs'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp)
def test_raise_on_panel_with_multiindex(self, parser, engine): tm.skip_if_no_ne() p = tm.makePanel(7) p.items = tm.makeCustomIndex(len(p.items), nlevels=2) with pytest.raises(NotImplementedError): pd.eval('p + 1', parser=parser, engine=engine)
def setup_class(cls): cls.engine = 'numexpr' cls.parser = 'pandas' tm.skip_if_no_ne(cls.engine)
def setUpClass(cls): super(TestDataFrameQueryNumExprPandas, cls).setUpClass() cls.engine = 'numexpr' cls.parser = 'pandas' tm.skip_if_no_ne(cls.engine)
def check_raise_on_panel_with_multiindex(self, parser, engine): tm.skip_if_no_ne() p = tm.makePanel(7) p.items = tm.makeCustomIndex(len(p.items), nlevels=2) with tm.assertRaises(NotImplementedError): pd.eval('p + 1', parser=parser, engine=engine)
def setUpClass(cls): super(TestDataFrameEvalNumExprPython, cls).setUpClass() cls.engine = 'numexpr' cls.parser = 'python' tm.skip_if_no_ne(cls.engine)
def check_query_with_unnamed_multiindex(self, parser, engine): tm.skip_if_no_ne(engine) a = np.random.choice(['red', 'green'], size=10) b = np.random.choice(['eggs', 'ham'], size=10) index = MultiIndex.from_arrays([a, b]) df = DataFrame(randn(10, 2), index=index) ind = Series(df.index.get_level_values(0).values, index=index) res1 = df.query('ilevel_0 == "red"', parser=parser, engine=engine) res2 = df.query('"red" == ilevel_0', parser=parser, engine=engine) exp = df[ind == 'red'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # inequality res1 = df.query('ilevel_0 != "red"', parser=parser, engine=engine) res2 = df.query('"red" != ilevel_0', parser=parser, engine=engine) exp = df[ind != 'red'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # list equality (really just set membership) res1 = df.query('ilevel_0 == ["red"]', parser=parser, engine=engine) res2 = df.query('["red"] == ilevel_0', parser=parser, engine=engine) exp = df[ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('ilevel_0 != ["red"]', parser=parser, engine=engine) res2 = df.query('["red"] != ilevel_0', parser=parser, engine=engine) exp = df[~ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # in/not in ops res1 = df.query('["red"] in ilevel_0', parser=parser, engine=engine) res2 = df.query('"red" in ilevel_0', parser=parser, engine=engine) exp = df[ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('["red"] not in ilevel_0', parser=parser, engine=engine) res2 = df.query('"red" not in ilevel_0', parser=parser, engine=engine) exp = df[~ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # ## LEVEL 1 ind = Series(df.index.get_level_values(1).values, index=index) res1 = df.query('ilevel_1 == "eggs"', parser=parser, engine=engine) res2 = df.query('"eggs" == ilevel_1', parser=parser, engine=engine) exp = df[ind == 'eggs'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # inequality res1 = df.query('ilevel_1 != "eggs"', parser=parser, engine=engine) res2 = df.query('"eggs" != ilevel_1', parser=parser, engine=engine) exp = df[ind != 'eggs'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # list equality (really just set membership) res1 = df.query('ilevel_1 == ["eggs"]', parser=parser, engine=engine) res2 = df.query('["eggs"] == ilevel_1', parser=parser, engine=engine) exp = df[ind.isin(['eggs'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('ilevel_1 != ["eggs"]', parser=parser, engine=engine) res2 = df.query('["eggs"] != ilevel_1', parser=parser, engine=engine) exp = df[~ind.isin(['eggs'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # in/not in ops res1 = df.query('["eggs"] in ilevel_1', parser=parser, engine=engine) res2 = df.query('"eggs" in ilevel_1', parser=parser, engine=engine) exp = df[ind.isin(['eggs'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('["eggs"] not in ilevel_1', parser=parser, engine=engine) res2 = df.query('"eggs" not in ilevel_1', parser=parser, engine=engine) exp = df[~ind.isin(['eggs'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp)
def test_raise_on_panel4d_with_multiindex(self, parser, engine): tm.skip_if_no_ne() p4d = tm.makePanel4D(7) p4d.items = tm.makeCustomIndex(len(p4d.items), nlevels=2) with pytest.raises(NotImplementedError): pd.eval('p4d + 1', parser=parser, engine=engine)
def setup_class(cls): super(TestDataFrameQueryNumExprPython, cls).setup_class() cls.engine = 'numexpr' cls.parser = 'python' tm.skip_if_no_ne(cls.engine) cls.frame = TestData().frame
def setUpClass(cls): super(TestDataFrameQueryNumExprPython, cls).setUpClass() cls.engine = 'numexpr' cls.parser = 'python' tm.skip_if_no_ne(cls.engine) cls.frame = TestData().frame
def setup_class(cls): super(TestDataFrameEvalNumExprPython, cls).setup_class() cls.engine = 'numexpr' cls.parser = 'python' tm.skip_if_no_ne(cls.engine)