def test_query_transform_mean(): original = pd.DataFrame( { 'distance': [20, 10, 30, 40], 'token': ['t1', 't1', 't2', 't2'] }, index=['a', 'b', 'c', 'd']) transformed = query_transform(PandasTable(original), {'query': 'distance < mean[distance]'}) expected = pd.DataFrame({ 'distance': [20, 10], 'token': ['t1', 't1'] }, index=['a', 'b']) assert expected.equals(transformed.to_pandas())
def test_histogram_transform_numeric(): original = pd.DataFrame({'distance': [20, 10, 30, 40]}, index=['a', 'b', 'c', 'd']) transformed = histogram_transform(PandasTable(original), { 'column': 'distance', 'bins': 2 }) expected_side_result = { 'x': [17.5, 32.5], 'y': [2, 2], 'width': [15., 15.] } assert original.equals(transformed.to_pandas()) assert np.allclose(expected_side_result['x'], transformed.side_result['x']) assert np.allclose(expected_side_result['y'], transformed.side_result['y']) assert np.allclose(expected_side_result['width'], transformed.side_result['width'])
def test_search_transform_string(): original = pd.DataFrame( { 'distance': [20, 10, 30, 40], 'token': ['t1', 't1', 't2', 't2'] }, index=['a', 'b', 'c', 'd']) transformed = search_transform(PandasTable(original), { 'column': 'token', 'searchterm': 't2' }) expected = pd.DataFrame({ 'distance': [30, 40], 'token': ['t2', 't2'] }, index=['c', 'd']) assert expected.equals(transformed.to_pandas())
def test_histogram_transform_categorical(): original = pd.DataFrame({'token': ['t1', 't1', 't2', 't2']}, index=['a', 'b', 'c', 'd']) transformed = histogram_transform(PandasTable(original), { 'column': 'token', 'bins': 2 }) expected_side_result = { 'x': ['t1', 't2', 'Other'], 'y': [2, 2, 0], 'width': None } assert original.equals(transformed.to_pandas()) assert expected_side_result.keys() == transformed.side_result.keys() assert expected_side_result['width'] == transformed.side_result['width'] assert set(expected_side_result['x']) == set( transformed.side_result['x']) # 't1', 't2' may be flipped assert expected_side_result['y'] == transformed.side_result['y']
def test_sample_transform(): original = pd.DataFrame({'distance': [20, 10, 30]}, index=['a', 'b', 'c']) transformed = sample_transform(PandasTable(original), {'fraction': 1.0}) expected = pd.DataFrame({'distance': [20, 10, 30]}, index=['a', 'b', 'c']) assert expected.equals(transformed.to_pandas())
def test_pandas_table_apply_bounds(): original = pd.DataFrame({'distance': [20, 10, 30, 40]}, index=['a', 'b', 'c', 'd']) expected = pd.DataFrame({'distance': [10, 30]}, index=['b', 'c']) assert expected.equals( PandasTable(original).apply_bounds(1, 2).to_pandas())
def test_pandas_table_to_pandas(): original = pd.DataFrame({'distance': [20, 10, 30, 40]}, index=['a', 'b', 'c', 'd']) assert original.equals(PandasTable(original).to_pandas())
def test_pandas_table_len(): original = pd.DataFrame({'distance': [20, 10, 30, 40]}, index=['a', 'b', 'c', 'd']) assert 4 == len(PandasTable(original))
def test_sort_transform(): original = pd.DataFrame({'distance': [20, 10, 30]}, index=['a', 'b', 'c']) transformed = sort_transform(PandasTable(original), {'column': 'distance', 'ascending': False}) expected = pd.DataFrame({'distance': [30, 20, 10]}, index=['c', 'a', 'b']) assert expected.equals(transformed.to_pandas())