Exemplo n.º 1
0
def tzone_compute(con, guid, tz):
    schema = ibis.schema([
        ('ts', dt.timestamp(tz)),
        ('b', 'double'),
        ('c', 'string'),
    ])
    con.create_table(guid, schema=schema)
    t = con.table(guid)

    n = 10
    df = pd.DataFrame({
        'ts': pd.date_range('2017-04-01', periods=n, tz=tz).values,
        'b': np.arange(n).astype('float64'),
        'c': list(string.ascii_lowercase[:n]),
    })

    df.to_sql(
        guid,
        con.con,
        index=False,
        if_exists='append',
        dtype={
            'ts': sa.TIMESTAMP(timezone=True),
            'b': sa.FLOAT,
            'c': sa.TEXT,
        }
    )

    try:
        yield t
    finally:
        con.drop_table(guid)
        assert guid not in con.list_tables()
Exemplo n.º 2
0
    [([True, False, False], dt.boolean), (np.int8([-3, 9, 17]), dt.int8),
     (np.uint8([3, 0, 16]), dt.uint8), (np.int16([-5, 0, 12]), dt.int16),
     (np.uint16([5569, 1, 33]), dt.uint16),
     (np.int32([-12, 3, 25000]), dt.int32),
     (np.uint32([100, 0, 6]), dt.uint32), (np.uint64([666, 2, 3]), dt.uint64),
     (np.int64([102, 67228734, -0]), dt.int64),
     (np.float32([45e-3, -0.4, 99.]), dt.float),
     (np.float64([-3e43, 43., 10000000.]), dt.double),
     (['foo', 'bar', 'hello'], dt.string),
     ([
         pd.Timestamp('2010-11-01 00:01:00'),
         pd.Timestamp('2010-11-01 00:02:00.1000'),
         pd.Timestamp('2010-11-01 00:03:00.300000')
     ], dt.timestamp),
     (pd.date_range('20130101', periods=3,
                    tz='US/Eastern'), dt.timestamp('US/Eastern')),
     ([
         pd.Timedelta('1 days'),
         pd.Timedelta('-1 days 2 min 3us'),
         pd.Timedelta('-2 days +23:57:59.999997')
     ], dt.Interval('ns')),
     (pd.Series(['a', 'b', 'c', 'a']).astype('category'), dt.Category())])
def test_infer_simple_dataframe(column, expected_dtype):
    df = pd.DataFrame({'col': column})
    assert sch.infer(df) == ibis.schema([('col', expected_dtype)])


def test_infer_exhaustive_dataframe():
    df = pd.DataFrame({
        'bigint_col':
        np.array([0, 10, 20, 30, 40, 50, 60, 70, 80, 90], dtype='i8'),
Exemplo n.º 3
0
    (dt.uint32, 'INT64'),
    (dt.int8, 'INT64'),
    (dt.int16, 'INT64'),
    (dt.int32, 'INT64'),
    (dt.int64, 'INT64'),
    (dt.string, 'STRING'),
    (dt.Array(dt.int64), 'ARRAY<INT64>'),
    (dt.Array(dt.string), 'ARRAY<STRING>'),
    (dt.Struct.from_tuples([
        ('a', dt.int64),
        ('b', dt.string),
        ('c', dt.Array(dt.string)),
    ]), 'STRUCT<a INT64, b STRING, c ARRAY<STRING>>'),
    (dt.date, 'DATE'),
    (dt.timestamp, 'TIMESTAMP'),
    pytest.mark.xfail((dt.timestamp(timezone='US/Eastern'), 'TIMESTAMP'),
                      raises=TypeError,
                      reason='Not supported in BigQuery'),
    ('array<struct<a: string>>', 'ARRAY<STRUCT<a STRING>>'),
    pytest.mark.xfail((dt.Decimal(38, 9), 'NUMERIC'),
                      raises=TypeError,
                      reason='Not supported in BigQuery'),
])
def test_simple(datatype, expected):
    context = TypeTranslationContext()
    assert ibis_type_to_bigquery_type(datatype, context) == expected


@pytest.mark.parametrize('datatype', [dt.uint64, dt.Decimal(8, 3)])
def test_simple_failure_mode(datatype):
    with pytest.raises(TypeError):