def check_query_with_named_multiindex(self, parser, engine): tm.skip_if_no_ne(engine) a = tm.choice(['red', 'green'], size=10) b = tm.choice(['eggs', 'ham'], size=10) index = MultiIndex.from_arrays([a, b], names=['color', 'food']) df = DataFrame(randn(10, 2), index=index) ind = Series(df.index.get_level_values('color').values, index=index, name='color') # equality res1 = df.query('color == "red"', parser=parser, engine=engine) res2 = df.query('"red" == color', parser=parser, engine=engine) exp = df[ind == 'red'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # inequality res1 = df.query('color != "red"', parser=parser, engine=engine) res2 = df.query('"red" != color', parser=parser, engine=engine) exp = df[ind != 'red'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # list equality (really just set membership) res1 = df.query('color == ["red"]', parser=parser, engine=engine) res2 = df.query('["red"] == color', parser=parser, engine=engine) exp = df[ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('color != ["red"]', parser=parser, engine=engine) res2 = df.query('["red"] != color', parser=parser, engine=engine) exp = df[~ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # in/not in ops res1 = df.query('["red"] in color', parser=parser, engine=engine) res2 = df.query('"red" in color', parser=parser, engine=engine) exp = df[ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('["red"] not in color', parser=parser, engine=engine) res2 = df.query('"red" not in color', parser=parser, engine=engine) exp = df[~ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp)
def test_hist_by_no_extra_plots(self): import matplotlib.pyplot as plt n = 10 df = DataFrame({'gender': tm.choice(['Male', 'Female'], size=n), 'height': random.normal(66, 4, size=n)}) axes = df.height.hist(by=df.gender) self.assertEqual(len(plt.get_fignums()), 1)
def check_query_with_partially_named_multiindex(self, parser, engine): tm.skip_if_no_ne(engine) a = tm.choice(['red', 'green'], size=10) b = np.arange(10) index = MultiIndex.from_arrays([a, b]) index.names = [None, 'rating'] df = DataFrame(randn(10, 2), index=index) res = df.query('rating == 1', parser=parser, engine=engine) ind = Series(df.index.get_level_values('rating').values, index=index, name='rating') exp = df[ind == 1] assert_frame_equal(res, exp) res = df.query('rating != 1', parser=parser, engine=engine) ind = Series(df.index.get_level_values('rating').values, index=index, name='rating') exp = df[ind != 1] assert_frame_equal(res, exp) res = df.query('ilevel_0 == "red"', parser=parser, engine=engine) ind = Series(df.index.get_level_values(0).values, index=index) exp = df[ind == "red"] assert_frame_equal(res, exp) res = df.query('ilevel_0 != "red"', parser=parser, engine=engine) ind = Series(df.index.get_level_values(0).values, index=index) exp = df[ind != "red"] assert_frame_equal(res, exp)
def test_hist_layout(self): n = 10 gender = tm.choice(["Male", "Female"], size=n) df = DataFrame( {"gender": gender, "height": random.normal(66, 4, size=n), "weight": random.normal(161, 32, size=n)} ) with tm.assertRaises(ValueError): df.height.hist(layout=(1, 1)) with tm.assertRaises(ValueError): df.height.hist(layout=[1, 1])
def test_hist_layout(self): n = 10 gender = tm.choice(['Male', 'Female'], size=n) df = DataFrame({'gender': gender, 'height': random.normal(66, 4, size=n), 'weight': random.normal(161, 32, size=n)}) with tm.assertRaises(ValueError): df.height.hist(layout=(1, 1)) with tm.assertRaises(ValueError): df.height.hist(layout=[1, 1])
def test_grouped_hist_legacy2(self): n = 10 weight = Series(np.random.normal(166, 20, size=n)) height = Series(np.random.normal(60, 10, size=n)) with tm.RNGContext(42): gender_int = tm.choice([0, 1], size=n) df_int = DataFrame({'height': height, 'weight': weight, 'gender': gender_int}) gb = df_int.groupby('gender') axes = gb.hist() self.assertEqual(len(axes), 2) self.assertEqual(len(self.plt.get_fignums()), 2) tm.close()
def check_query_lex_compare_strings(self, parser, engine): tm.skip_if_no_ne(engine=engine) import operator as opr a = Series(tm.choice(list('abcde'), 20)) b = Series(np.arange(a.size)) df = DataFrame({'X': a, 'Y': b}) ops = {'<': opr.lt, '>': opr.gt, '<=': opr.le, '>=': opr.ge} for op, func in ops.items(): res = df.query('X %s "d"' % op, engine=engine, parser=parser) expected = df[func(df.X, 'd')] assert_frame_equal(res, expected)
def test_axis_share_xy(self): n = 100 df = DataFrame({'gender': tm.choice(['Male', 'Female'], size=n), 'height': random.normal(66, 4, size=n), 'weight': random.normal(161, 32, size=n)}) ax1, ax2 = df.hist(column='height', by=df.gender, sharex=True, sharey=True) # share both x and y self.assertTrue(ax1._shared_x_axes.joined(ax1, ax2)) self.assertTrue(ax2._shared_x_axes.joined(ax1, ax2)) self.assertTrue(ax1._shared_y_axes.joined(ax1, ax2)) self.assertTrue(ax2._shared_y_axes.joined(ax1, ax2))
def test_axis_share_xy(self): n = 100 df = DataFrame( { "gender": tm.choice(["Male", "Female"], size=n), "height": random.normal(66, 4, size=n), "weight": random.normal(161, 32, size=n), } ) ax1, ax2 = df.hist(column="height", by=df.gender, sharex=True, sharey=True) # share both x and y self.assertTrue(ax1._shared_x_axes.joined(ax1, ax2)) self.assertTrue(ax2._shared_x_axes.joined(ax1, ax2)) self.assertTrue(ax1._shared_y_axes.joined(ax1, ax2)) self.assertTrue(ax2._shared_y_axes.joined(ax1, ax2))
def test_hist_layout_with_by(self): import matplotlib.pyplot as plt n = 10 gender = tm.choice(['Male', 'Female'], size=n) df = DataFrame({'gender': gender, 'height': random.normal(66, 4, size=n), 'weight': random.normal(161, 32, size=n), 'category': random.randint(4, size=n)}) _check_plot_works(df.height.hist, by=df.gender, layout=(2, 1)) tm.close() _check_plot_works(df.height.hist, by=df.gender, layout=(1, 2)) tm.close() _check_plot_works(df.weight.hist, by=df.category, layout=(1, 4)) tm.close() _check_plot_works(df.weight.hist, by=df.category, layout=(4, 1)) tm.close()
def test_grouped_hist_layout(self): import matplotlib.pyplot as plt n = 100 gender = tm.choice(['Male', 'Female'], size=n) df = DataFrame({ 'gender': gender, 'height': random.normal(66, 4, size=n), 'weight': random.normal(161, 32, size=n), 'category': random.randint(4, size=n) }) self.assertRaises(ValueError, df.hist, column='weight', by=df.gender, layout=(1, 1)) self.assertRaises(ValueError, df.hist, column='weight', by=df.gender, layout=(1, )) self.assertRaises(ValueError, df.hist, column='height', by=df.category, layout=(1, 3)) self.assertRaises(ValueError, df.hist, column='height', by=df.category, layout=(2, 1)) self.assertEqual( df.hist(column='height', by=df.gender, layout=(2, 1)).shape, (2, )) tm.close() self.assertEqual( df.hist(column='height', by=df.category, layout=(4, 1)).shape, (4, )) tm.close() self.assertEqual( df.hist(column='height', by=df.category, layout=(4, 2)).shape, (4, 2))
def test_grouped_plot_fignums(self): n = 10 weight = Series(np.random.normal(166, 20, size=n)) height = Series(np.random.normal(60, 10, size=n)) with tm.RNGContext(42): gender = tm.choice(['male', 'female'], size=n) df = DataFrame({'height': height, 'weight': weight, 'gender': gender}) gb = df.groupby('gender') res = gb.plot() self.assertEqual(len(self.plt.get_fignums()), 2) self.assertEqual(len(res), 2) tm.close() res = gb.boxplot(return_type='axes') self.assertEqual(len(self.plt.get_fignums()), 1) self.assertEqual(len(res), 2) tm.close() # now works with GH 5610 as gender is excluded res = df.groupby('gender').hist() tm.close()
def test_grouped_hist_layout(self): import matplotlib.pyplot as plt n = 100 gender = tm.choice(["Male", "Female"], size=n) df = DataFrame( { "gender": gender, "height": random.normal(66, 4, size=n), "weight": random.normal(161, 32, size=n), "category": random.randint(4, size=n), } ) self.assertRaises(ValueError, df.hist, column="weight", by=df.gender, layout=(1, 1)) self.assertRaises(ValueError, df.hist, column="weight", by=df.gender, layout=(1,)) self.assertRaises(ValueError, df.hist, column="height", by=df.category, layout=(1, 3)) self.assertRaises(ValueError, df.hist, column="height", by=df.category, layout=(2, 1)) self.assertEqual(df.hist(column="height", by=df.gender, layout=(2, 1)).shape, (2,)) tm.close() self.assertEqual(df.hist(column="height", by=df.category, layout=(4, 1)).shape, (4,)) tm.close() self.assertEqual(df.hist(column="height", by=df.category, layout=(4, 2)).shape, (4, 2))
def test_grouped_hist_layout(self): import matplotlib.pyplot as plt n = 100 gender = tm.choice(['Male', 'Female'], size=n) df = DataFrame({'gender': gender, 'height': random.normal(66, 4, size=n), 'weight': random.normal(161, 32, size=n), 'category': random.randint(4, size=n)}) self.assertRaises(ValueError, df.hist, column='weight', by=df.gender, layout=(1, 1)) self.assertRaises(ValueError, df.hist, column='weight', by=df.gender, layout=(1,)) self.assertRaises(ValueError, df.hist, column='height', by=df.category, layout=(1, 3)) self.assertRaises(ValueError, df.hist, column='height', by=df.category, layout=(2, 1)) self.assertEqual(df.hist(column='height', by=df.gender, layout=(2, 1)).shape, (2,)) tm.close() self.assertEqual(df.hist(column='height', by=df.category, layout=(4, 1)).shape, (4,)) tm.close() self.assertEqual(df.hist(column='height', by=df.category, layout=(4, 2)).shape, (4, 2))
def test_hist_layout_with_by(self): import matplotlib.pyplot as plt n = 10 gender = tm.choice(["Male", "Female"], size=n) df = DataFrame( { "gender": gender, "height": random.normal(66, 4, size=n), "weight": random.normal(161, 32, size=n), "category": random.randint(4, size=n), } ) _check_plot_works(df.height.hist, by=df.gender, layout=(2, 1)) tm.close() _check_plot_works(df.height.hist, by=df.gender, layout=(1, 2)) tm.close() _check_plot_works(df.weight.hist, by=df.category, layout=(1, 4)) tm.close() _check_plot_works(df.weight.hist, by=df.category, layout=(4, 1)) tm.close()
def check_query_with_unnamed_multiindex(self, parser, engine): tm.skip_if_no_ne(engine) a = tm.choice(['red', 'green'], size=10) b = tm.choice(['eggs', 'ham'], size=10) index = MultiIndex.from_arrays([a, b]) df = DataFrame(randn(10, 2), index=index) ind = Series(df.index.get_level_values(0).values, index=index) res1 = df.query('ilevel_0 == "red"', parser=parser, engine=engine) res2 = df.query('"red" == ilevel_0', parser=parser, engine=engine) exp = df[ind == 'red'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # inequality res1 = df.query('ilevel_0 != "red"', parser=parser, engine=engine) res2 = df.query('"red" != ilevel_0', parser=parser, engine=engine) exp = df[ind != 'red'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # list equality (really just set membership) res1 = df.query('ilevel_0 == ["red"]', parser=parser, engine=engine) res2 = df.query('["red"] == ilevel_0', parser=parser, engine=engine) exp = df[ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('ilevel_0 != ["red"]', parser=parser, engine=engine) res2 = df.query('["red"] != ilevel_0', parser=parser, engine=engine) exp = df[~ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # in/not in ops res1 = df.query('["red"] in ilevel_0', parser=parser, engine=engine) res2 = df.query('"red" in ilevel_0', parser=parser, engine=engine) exp = df[ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('["red"] not in ilevel_0', parser=parser, engine=engine) res2 = df.query('"red" not in ilevel_0', parser=parser, engine=engine) exp = df[~ind.isin(['red'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # ## LEVEL 1 ind = Series(df.index.get_level_values(1).values, index=index) res1 = df.query('ilevel_1 == "eggs"', parser=parser, engine=engine) res2 = df.query('"eggs" == ilevel_1', parser=parser, engine=engine) exp = df[ind == 'eggs'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # inequality res1 = df.query('ilevel_1 != "eggs"', parser=parser, engine=engine) res2 = df.query('"eggs" != ilevel_1', parser=parser, engine=engine) exp = df[ind != 'eggs'] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # list equality (really just set membership) res1 = df.query('ilevel_1 == ["eggs"]', parser=parser, engine=engine) res2 = df.query('["eggs"] == ilevel_1', parser=parser, engine=engine) exp = df[ind.isin(['eggs'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('ilevel_1 != ["eggs"]', parser=parser, engine=engine) res2 = df.query('["eggs"] != ilevel_1', parser=parser, engine=engine) exp = df[~ind.isin(['eggs'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) # in/not in ops res1 = df.query('["eggs"] in ilevel_1', parser=parser, engine=engine) res2 = df.query('"eggs" in ilevel_1', parser=parser, engine=engine) exp = df[ind.isin(['eggs'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp) res1 = df.query('["eggs"] not in ilevel_1', parser=parser, engine=engine) res2 = df.query('"eggs" not in ilevel_1', parser=parser, engine=engine) exp = df[~ind.isin(['eggs'])] assert_frame_equal(res1, exp) assert_frame_equal(res2, exp)