def test_ReadPandas_pkl(): # create pickle version of table from CSV table df = pd.read_csv('tests/data/pandas_table.csv') df.to_pickle('tests/data/pandas_table.pkl') for ext in ['.pkl', '.csv', '.tsv', '.xlsx']: filepath = 'tests/data/pandas_table' + ext samples = ReadPandas(filepath, dropnan=True) >> Collect() nt.assert_equal(samples, [[1, 4], [3, 6]]) samples = ReadPandas(filepath, dropnan=False) >> Collect() nt.assert_equal(samples, [[1, 4], [2, np.NaN], [3, 6]]) samples = ReadPandas(filepath, replacenan=None) >> Collect() nt.assert_equal(samples, [[1, 4], [2, None], [3, 6]]) samples = ReadPandas(filepath, columns=['col1', 'col2']) >> Collect() nt.assert_equal(samples, [[1, 4], [3, 6]]) samples = ReadPandas(filepath, columns=['col1']) >> Collect() nt.assert_equal(samples, [[1], [2], [3]]) samples = ReadPandas(filepath, columns=['col2']) >> Collect() nt.assert_equal(samples, [[4], [6]]) samples = ReadPandas(filepath, columns=['col2'], replacenan='NA') >> Collect() nt.assert_equal(samples, [[4], ['NA'], [6]]) samples = ReadPandas(filepath, rows='col1 > 1', replacenan=0) >> Collect() nt.assert_equal(samples, [[2, 0], [3, 6]]) samples = ReadPandas(filepath, rows='col1 < 3', columns=['col1']) >> Collect() nt.assert_equal(samples, [[1], [2]])
def test_ReadPandas_dply(): filepath = 'tests/data/pandas_table.csv' samples = ( ReadPandas(filepath).dply() >> dp.select(dp.X.col1) >> DplyToList()) nt.assert_equal(samples, [[1], [2], [3]])