Пример #1
0
def test_self_join(table):
    # Self-joins are problematic with this design because column
    # expressions may reference either the left or right  For example:
    #
    # SELECT left.key, sum(left.value - right.value) as total_deltas
    # FROM table left
    #  INNER JOIN table right
    #    ON left.current_period = right.previous_period + 1
    # GROUP BY 1
    #
    # One way around the self-join issue is to force the user to add
    # prefixes to the joined fields, then project using those. Not that
    # satisfying, though.
    left = table
    right = table.view()
    metric = (left['a'] - right['b']).mean().name('metric')

    joined = left.inner_join(right, [right['g'] == left['g']])
    # basic check there's no referential problems
    result_repr = repr(joined)
    assert 'ref_0' in result_repr
    assert 'ref_1' in result_repr

    # Cannot be immediately materialized because of the schema overlap
    with pytest.raises(RelationError):
        joined.materialize()

    # Project out left table schema
    proj = joined[[left]]
    assert_equal(proj.schema(), left.schema())

    # Try aggregating on top of joined
    aggregated = joined.aggregate([metric], by=[left['g']])
    ex_schema = api.Schema(['g', 'metric'], ['string', 'double'])
    assert_equal(aggregated.schema(), ex_schema)
Пример #2
0
def test_cross_join(table):
    agg_exprs = [table['a'].sum().name('sum_a'),
                 table['b'].mean().name('mean_b')]
    scalar_aggs = table.aggregate(agg_exprs)

    joined = table.cross_join(scalar_aggs).materialize()
    agg_schema = api.Schema(['sum_a', 'mean_b'], ['int64', 'double'])
    ex_schema = table.schema().append(agg_schema)
    assert_equal(joined.schema(), ex_schema)
Пример #3
0
def test_equijoin_schema_merge():
    table1 = ibis.table([('key1',  'string'), ('value1', 'double')])
    table2 = ibis.table([('key2',  'string'), ('stuff', 'int32')])

    pred = table1['key1'] == table2['key2']
    join_types = ['inner_join', 'left_join', 'outer_join']

    ex_schema = api.Schema(['key1', 'value1', 'key2', 'stuff'],
                           ['string', 'double', 'string', 'int32'])

    for fname in join_types:
        f = getattr(table1, fname)
        joined = f(table2, [pred]).materialize()
        assert_equal(joined.schema(), ex_schema)