def test_value_counts_normalize(self): df_temp = pdc.DataFrame( { "state": np.array( [ "texas", "texas", "texas", "florida", "florida", "florida", "florida", "ohio", ] ), "fruit": np.array(["a", "a", "a", "a", "b", "b", "b", "a"]), } ) df_results = df_temp.value_counts(normalize=True) df_answer = pdc.DataFrame( { "state": np.array(["florida", "texas", "ohio"], dtype=object), "count": np.array([0.5, 0.375, 0.125]), } ) assert_df_equals(df_results[0], df_answer) df_answer = pdc.DataFrame( { "fruit": np.array(["a", "b"], dtype=object), "count": np.array([0.625, 0.375]), } ) assert_df_equals(df_results[1], df_answer)
def test_value_counts(self): df_temp = pdc.DataFrame( { "state": np.array( [ "texas", "texas", "texas", "florida", "florida", "florida", "florida", "ohio", ] ), "fruit": np.array(["a", "a", "a", "a", "b", "b", "b", "a"]), } ) df_results = df_temp.value_counts() df_answer = pdc.DataFrame( { "state": np.array(["florida", "texas", "ohio"], dtype=object), "count": np.array([4, 3, 1]), } ) assert_df_equals(df_results[0], df_answer) df_answer = pdc.DataFrame( { "fruit": np.array(["a", "b"], dtype=object), "count": np.array([5, 3]), } ) assert_df_equals(df_results[1], df_answer)
def test_array_length(self): with pytest.raises(ValueError): pdc.DataFrame({'a': np.array([1, 2]), 'b': np.array([1])}) # correct construction. no error pdc.DataFrame({'a': np.array([1, 2]), 'b': np.array([5, 10])})
def test_value_counts(self): df_temp = pdc.DataFrame({ 'state': np.array([ 'texas', 'texas', 'texas', 'florida', 'florida', 'florida', 'florida', 'ohio' ]), 'fruit': np.array(['a', 'a', 'a', 'a', 'b', 'b', 'b', 'a']) }) df_results = df_temp.value_counts() df_answer = pdc.DataFrame({ 'state': np.array(['florida', 'texas', 'ohio'], dtype=object), 'count': np.array([4, 3, 1]) }) assert_df_equals(df_results[0], df_answer) df_answer = pdc.DataFrame({ 'fruit': np.array(['a', 'b'], dtype=object), 'count': np.array([5, 3]) }) assert_df_equals(df_results[1], df_answer) with pytest.raises(TypeError): df_temp.rename(5)
def test_truediv(self): df_result = df5 / 3 df_answer = pdc.DataFrame({"a": a5 / 3, "b": b5 / 3}) assert_df_equals(df_result, df_answer) df_result = 3 / df5 df_answer = pdc.DataFrame({"a": 3 / a5, "b": 3 / b5}) assert_df_equals(df_result, df_answer)
def test_head_tail(self): df_result = df1.head(2) df_answer = pdc.DataFrame({'a': a1[:2], 'b': b1[:2], 'c': c1[:2]}) assert_df_equals(df_result, df_answer) df_result = df1.tail(2) df_answer = pdc.DataFrame({'a': a1[-2:], 'b': b1[-2:], 'c': c1[-2:]}) assert_df_equals(df_result, df_answer)
def test_floordiv(self): df_result = df5 // 3 df_answer = pdc.DataFrame({'a': a5 // 3, 'b': b5 // 3}) assert_df_equals(df_result, df_answer) df_result = 3 // df5 df_answer = pdc.DataFrame({'a': 3 // a5, 'b': 3 // b5}) assert_df_equals(df_result, df_answer)
def test_ge_le(self): df_result = df5 >= 3 df_answer = pdc.DataFrame({'a': a5 >= 3, 'b': b5 >= 3}) assert_df_equals(df_result, df_answer) df_result = df5 < 2 df_answer = pdc.DataFrame({'a': a5 <= 2, 'b': b5 <= 2}) assert_df_equals(df_result, df_answer)
def test_pow(self): df_result = df5**3 df_answer = pdc.DataFrame({'a': a5**3, 'b': b5**3}) assert_df_equals(df_result, df_answer) df_result = 2**df5 df_answer = pdc.DataFrame({'a': 2**a5, 'b': 2**b5}) assert_df_equals(df_result, df_answer)
def test_gt_lt(self): df_result = df5 > 3 df_answer = pdc.DataFrame({'a': a5 > 3, 'b': b5 > 3}) assert_df_equals(df_result, df_answer) df_result = df5 < 2 df_answer = pdc.DataFrame({'a': a5 < 2, 'b': b5 < 2}) assert_df_equals(df_result, df_answer)
def test_sub(self): df_result = df5 - 3 df_answer = pdc.DataFrame({'a': a5 - 3, 'b': b5 - 3}) assert_df_equals(df_result, df_answer) df_result = 3 - df5 df_answer = pdc.DataFrame({'a': 3 - a5, 'b': 3 - b5}) assert_df_equals(df_result, df_answer)
def test_eq_ne(self): df_result = df5 == 3 df_answer = pdc.DataFrame({'a': a5 == 3, 'b': b5 == 3}) assert_df_equals(df_result, df_answer) df_result = df5 != 2 df_answer = pdc.DataFrame({'a': a5 != 2, 'b': b5 != 2}) assert_df_equals(df_result, df_answer)
def test_head_tail(self): df_result = df.head(2) df_answer = pdc.DataFrame({'a': a[:2], 'b': b[:2], 'c': c[:2], 'd': d[:2], 'e': e[:2]}) assert_df_equals(df_result, df_answer) df_result = df.tail(2) df_answer = pdc.DataFrame({'a': a[-2:], 'b': b[-2:], 'c': c[-2:], 'd':d[-2:], 'e': e[-2:]}) assert_df_equals(df_result, df_answer)
def test_value_counts_normalize(self): df_temp = pdc.DataFrame({'state': np.array(['texas', 'texas', 'texas', 'florida', 'florida', 'florida', 'florida', 'ohio']), 'fruit': np.array(['a', 'a', 'a', 'a', 'b', 'b', 'b', 'a'])}) df_results = df_temp.value_counts(normalize=True) df_answer = pdc.DataFrame({'state': np.array(['florida', 'texas', 'ohio'], dtype=object), 'count': np.array([.5, .375, .125])}) assert_df_equals(df_results[0], df_answer) df_answer = pdc.DataFrame({'fruit': np.array(['a', 'b'], dtype=object), 'count': np.array([.625, .375])}) assert_df_equals(df_results[1], df_answer)
def test_col_slice(self): df_answer = pdc.DataFrame({'a': a, 'b': b, 'c': c}) assert_df_equals(df[:, :3], df_answer) df_answer = pdc.DataFrame({'a': a[::2], 'b': b[::2], 'c': c[::2]}) assert_df_equals(df[::2, :3], df_answer) df_answer = pdc.DataFrame({'a': a[::2], 'b': b[::2], 'c': c[::2], 'd': d[::2], 'e': e[::2]}) assert_df_equals(df[::2, :], df_answer) with pytest.raises(TypeError): df[:, set()]
def test_head_tail(self): df_result = df.head(2) df_answer = pdc.DataFrame( {"a": a[:2], "b": b[:2], "c": c[:2], "d": d[:2], "e": e[:2]} ) assert_df_equals(df_result, df_answer) df_result = df.tail(2) df_answer = pdc.DataFrame( {"a": a[-2:], "b": b[-2:], "c": c[-2:], "d": d[-2:], "e": e[-2:]} ) assert_df_equals(df_result, df_answer)
def test_new_column(self): df_result = pdc.DataFrame({'a': a, 'b': b, 'c': c, 'd': d, 'e': e}) f = np.array([1.5, 23, 4.11]) df_result['f'] = f df_answer = pdc.DataFrame({ 'a': a, 'b': b, 'c': c, 'd': d, 'e': e, 'f': f }) assert_df_equals(df_result, df_answer)
def test_value_counts(self): df_results = df8[['a', 'b']].value_counts() df_answer = pdc.DataFrame({ 'a': np.array(['a', 'b'], dtype=object), 'count': np.array([5, 3]) }) assert_df_equals(df_results[0], df_answer) df_answer = pdc.DataFrame({ 'b': np.array(['A', 'B'], dtype=object), 'count': np.array([4, 4]) }) assert_df_equals(df_results[1], df_answer)
def test_sample(self): df_result = df7.sample(2, seed=1) df_answer = pdc.DataFrame({ 'a': np.array(['a', 'a'], dtype=object), 'b': np.array([2., 5.1]) }) assert_df_equals(df_result, df_answer) df_result = df7.sample(frac=.7, seed=1) df_answer = pdc.DataFrame({ 'a': np.array(['a', 'a', 'b'], dtype=object), 'b': np.array([2., 5.1, 6.]) }) assert_df_equals(df_result, df_answer)
def test_col_slice(self): df_answer = pdc.DataFrame({"a": a, "b": b, "c": c}) assert_df_equals(df[:, :3], df_answer) df_answer = pdc.DataFrame({"a": a[::2], "b": b[::2], "c": c[::2]}) assert_df_equals(df[::2, :3], df_answer) df_answer = pdc.DataFrame( {"a": a[::2], "b": b[::2], "c": c[::2], "d": d[::2], "e": e[::2]} ) assert_df_equals(df[::2, :], df_answer) with pytest.raises(TypeError): df[:, set()]
def test_input_types(self): with pytest.raises(TypeError): pdc.DataFrame([1, 2, 3]) with pytest.raises(TypeError): pdc.DataFrame({1: 5, 'b': 10}) with pytest.raises(TypeError): pdc.DataFrame({'a': np.array([1]), 'b': 10}) with pytest.raises(ValueError): pdc.DataFrame({'a': np.array([1]), 'b': np.array([[1]])}) # correct construction. no error pdc.DataFrame({'a': np.array([1]), 'b': np.array([1])})
def test_simple_boolean(self): bool_arr = np.array([True, False, False]) df_bool = pdc.DataFrame({'col': bool_arr}) df_result = df[df_bool] df_answer = pdc.DataFrame({'a': a[bool_arr], 'b': b[bool_arr], 'c': c[bool_arr], 'd': d[bool_arr], 'e': e[bool_arr]}) assert_df_equals(df_result, df_answer) with pytest.raises(ValueError): df_bool = pdc.DataFrame({'col': bool_arr, 'col2': bool_arr}) df[df_bool] with pytest.raises(TypeError): df_bool = pdc.DataFrame({'col': np.array[1, 2, 3]})
def test_round(self): df_result = df42.round(0) df_answer = pdc.DataFrame({ 'a': np.array([-11, 5, 3]), 'b': np.array([3, 5, -6]) }) assert_df_equals(df_result, df_answer)
def test_cumsum(self): df_result = df42.cumsum() df_answer = pdc.DataFrame({ 'a': np.array([-11, -6, -3]), 'b': np.array([3.4, 8.5, 2.5]) }) assert_df_equals(df_result, df_answer)
def test_clip(self): df_result = df42.clip(0, 4) df_answer = pdc.DataFrame({ 'a': np.array([0, 4, 3]), 'b': np.array([3.4, 4, 0]) }) assert_df_equals(df_result, df_answer)
def test_diff(self): df_result = df42.diff(1) df_answer = pdc.DataFrame({ 'a': np.array([np.nan, 16, -2]), 'b': np.array([np.nan, 1.7, -11.1]) }) assert_df_equals(df_result, df_answer)
def test_mul(self): df_result = df5 * 3 df_answer = pdc.DataFrame({'a': a5 * 3, 'b': b5 * 3}) assert_df_equals(df_result, df_answer) df_result = 3 * df5 assert_df_equals(df_result, df_answer)
def test_median(self): df_result = df1.median() df_answer = pdc.DataFrame({ 'b': np.array([8]), 'c': np.array([np.nan]) }) assert_df_equals(df_result, df_answer)
def test_add(self): df_result = df5 + 3 df_answer = pdc.DataFrame({'a': a5 + 3, 'b': b5 + 3}) assert_df_equals(df_result, df_answer) df_result = 3 + df5 assert_df_equals(df_result, df_answer)
def test_std(self): df_result = df1.std() df_answer = pdc.DataFrame({ 'b': np.array([b1.std()]), 'c': np.array([np.nan]) }) assert_df_equals(df_result, df_answer)