示例#1
0
def test_to_csv(sample_df, tmpdir):
    dt = DataTable(sample_df,
                   name='test_data',
                   index='id',
                   semantic_tags={'id': 'tag1'},
                   logical_types={'age': Ordinal(order=[25, 33, 57])},
                   column_descriptions={
                       'signup_date': 'original signup date',
                       'age': 'age of the user'
                   },
                   column_metadata={
                       'id': {
                           'is_sorted': True
                       },
                       'age': {
                           'interesting_values': [33, 57]
                       }
                   })

    dt.to_csv(str(tmpdir), encoding='utf-8', engine='python')
    _dt = deserialize.read_datatable(str(tmpdir))

    pd.testing.assert_frame_equal(
        to_pandas(dt.to_dataframe(), index=_dt.index, sort_index=True),
        to_pandas(_dt.to_dataframe(), index=_dt.index, sort_index=True))
    assert dt == _dt
示例#2
0
def test_unserializable_table(sample_df, tmpdir):
    dt = DataTable(
        sample_df,
        table_metadata={'not_serializable': sample_df['is_registered'].dtype})

    error = "DataTable is not json serializable. Check table and column metadata for values that may not be serializable."
    with pytest.raises(TypeError, match=error):
        dt.to_csv(str(tmpdir), encoding='utf-8', engine='python')
示例#3
0
def test_to_csv_with_latlong(latlong_df, tmpdir):
    dt = DataTable(
        latlong_df,
        index='tuple_ints',
        logical_types={col: 'LatLong'
                       for col in latlong_df.columns})
    dt.to_csv(str(tmpdir))
    _dt = deserialize.read_datatable(str(tmpdir))

    pd.testing.assert_frame_equal(
        to_pandas(dt.to_dataframe(), index=dt.index, sort_index=True),
        to_pandas(_dt.to_dataframe(), index=_dt.index, sort_index=True))
    assert dt == _dt
示例#4
0
def test_s3_test_profile(sample_df, s3_client, s3_bucket, setup_test_profile):
    xfail_tmp_disappears(sample_df)
    dt = DataTable(sample_df)
    dt.to_csv(TEST_S3_URL,
              encoding='utf-8',
              engine='python',
              profile_name='test')
    make_public(s3_client, s3_bucket)
    _dt = deserialize.read_datatable(TEST_S3_URL, profile_name='test')

    pd.testing.assert_frame_equal(
        to_pandas(dt.to_dataframe(), index=dt.index),
        to_pandas(_dt.to_dataframe(), index=_dt.index))
    assert dt == _dt
示例#5
0
def test_to_csv_S3(sample_df, s3_client, s3_bucket):
    xfail_tmp_disappears(sample_df)

    dt = DataTable(sample_df,
                   name='test_data',
                   index='id',
                   semantic_tags={'id': 'tag1'},
                   logical_types={'age': Ordinal(order=[25, 33, 57])})
    dt.to_csv(TEST_S3_URL, encoding='utf-8', engine='python')
    make_public(s3_client, s3_bucket)

    _dt = deserialize.read_datatable(TEST_S3_URL)

    pd.testing.assert_frame_equal(
        to_pandas(dt.to_dataframe(), index=dt.index),
        to_pandas(_dt.to_dataframe(), index=_dt.index))
    assert dt == _dt
示例#6
0
def test_serialize_url_csv(sample_df):
    dt = DataTable(sample_df)
    error_text = "Writing to URLs is not supported"
    with pytest.raises(ValueError, match=error_text):
        dt.to_csv(URL, encoding='utf-8', engine='python')