示例#1
0
class Covariance(Filterable, Reduction):
    """Covariance of a set of number pairs."""

    left = rlz.column(rlz.numeric)
    right = rlz.column(rlz.numeric)
    how = rlz.isin({'sample', 'pop'})

    output_dtype = dt.float64
示例#2
0
class Correlation(Filterable, Reduction):
    """Coefficient of correlation of a set of number pairs."""

    left = rlz.column(rlz.numeric)
    right = rlz.column(rlz.numeric)
    how = rlz.isin({'sample', 'pop'})

    output_dtype = dt.float64
示例#3
0
 class MyOp(ops.ValueOp):
     input_type = [
         rules.table(name='table',
                     schema=rules.table.with_column_subset(
                         rules.column(name='group',
                                      value_type=rules.number),
                         rules.column(name='value',
                                      value_type=rules.number)))
     ]
     output_type = rules.type_of_arg(0)
示例#4
0
class NotAny(Reduction, _Negatable):
    arg = rlz.column(rlz.boolean)

    output_dtype = dt.boolean

    def negate(self) -> Any:
        return Any(*self.args)
示例#5
0
class Bucket(BucketLike):
    arg = rlz.column(rlz.any)
    buckets = rlz.tuple_of(rlz.scalar(rlz.any))
    closed = rlz.optional(rlz.isin({'left', 'right'}), default='left')
    close_extreme = rlz.optional(rlz.instance_of(bool), default=True)
    include_under = rlz.optional(rlz.instance_of(bool), default=False)
    include_over = rlz.optional(rlz.instance_of(bool), default=False)

    def __init__(self, buckets, include_under, include_over, **kwargs):
        if not len(buckets):
            raise ValueError('Must be at least one bucket edge')
        elif len(buckets) == 1:
            if not include_under or not include_over:
                raise ValueError('If one bucket edge provided, must have '
                                 'include_under=True and include_over=True')
        super().__init__(
            buckets=buckets,
            include_under=include_under,
            include_over=include_over,
            **kwargs,
        )

    @property
    def nbuckets(self):
        return len(self.buckets) - 1 + self.include_over + self.include_under
示例#6
0
class ShiftBase(Analytic):
    arg = rlz.column(rlz.any)

    offset = rlz.optional(rlz.one_of((rlz.integer, rlz.interval)))
    default = rlz.optional(rlz.any)

    output_dtype = rlz.dtype_like("arg")
示例#7
0
class ApproxMedian(Filterable, Reduction):
    """
    Compute the approximate median of a set of comparable values using the
    Count-Min-Sketch algorithm. Exposed in Impala using APPX_MEDIAN.
    """

    arg = rlz.column(rlz.any)
    output_dtype = rlz.dtype_like('arg')
示例#8
0
class Sum(Filterable, Reduction):
    arg = rlz.column(rlz.numeric)

    @immutable_property
    def output_dtype(self):
        if isinstance(self.arg, ir.BooleanValue):
            return dt.int64
        else:
            return self.arg.type().largest
示例#9
0
class VarianceBase(Filterable, Reduction):
    arg = rlz.column(rlz.numeric)
    how = rlz.isin({'sample', 'pop'})

    @immutable_property
    def output_dtype(self):
        if isinstance(self.arg, ir.DecimalValue):
            return self.arg.type().largest
        else:
            return dt.float64
示例#10
0
class ApproxCountDistinct(Filterable, Reduction):
    """Approximate number of unique values using HyperLogLog algorithm.

    Impala offers the NDV built-in function for this.
    """

    arg = rlz.column(rlz.any)

    # Impala 2.0 and higher returns a DOUBLE return ir.DoubleScalar
    output_dtype = dt.int64
示例#11
0
class Contains(Value):
    value = rlz.any
    options = rlz.one_of([
        rlz.value_list_of(rlz.any),
        rlz.set_,
        rlz.column(rlz.any),
        rlz.array_of(rlz.any),
    ])

    output_dtype = dt.boolean
    output_shape = rlz.shape_like("args")
示例#12
0
class CumulativeMean(CumulativeOp):
    """Cumulative mean. Requires an order window."""

    arg = rlz.column(rlz.numeric)

    @immutable_property
    def output_dtype(self):
        if isinstance(self.arg, ir.DecimalValue):
            return self.arg.type().largest
        else:
            return dt.float64
示例#13
0
class CumulativeSum(CumulativeOp):
    """Cumulative sum. Requires an ordering window."""

    arg = rlz.column(rlz.numeric)

    @immutable_property
    def output_dtype(self):
        if isinstance(self.arg, ir.BooleanValue):
            return dt.int64
        else:
            return self.arg.type().largest
示例#14
0
class Mean(Filterable, Reduction):
    arg = rlz.column(rlz.numeric)

    @immutable_property
    def output_dtype(self):
        if isinstance(self.arg, ir.DecimalValue):
            return self.arg.type()
        else:
            return dt.float64

    def root_tables(self):
        return distinct_roots(self.arg)
示例#15
0
class TopK(Node):
    arg = rlz.column(rlz.any)
    k = rlz.non_negative_integer
    by = rlz.one_of((rlz.function_of(rlz.base_table_of("arg")), rlz.any))
    output_type = ir.TopK

    def blocks(self):  # pragma: no cover
        return True

    def root_tables(self):  # pragma: no cover
        args = (arg for arg in self.flat_args() if isinstance(arg, ir.Expr))
        return distinct_roots(*args)
示例#16
0
class BitOr(Filterable, Reduction):
    """Aggregate bitwise OR operation.

    All elements in an integer column are ORed together. This can be used
    to determine which bit flags are set on any element.

    Resources:

    * BigQuery [`BIT_OR`](https://cloud.google.com/bigquery/docs/reference/standard-sql/aggregate_functions#bit_or)
    * MySQL [`BIT_OR`](https://dev.mysql.com/doc/refman/5.7/en/aggregate-functions.html#function_bit-or)
    """  # noqa: E501

    arg = rlz.column(rlz.integer)
    output_dtype = rlz.dtype_like('arg')
示例#17
0
class BitXor(Filterable, Reduction):
    """Aggregate bitwise XOR operation.

    All elements in an integer column are XORed together. This can be used
    as a parity checksum of element values.

    Resources:

    * BigQuery [`BIT_XOR`](https://cloud.google.com/bigquery/docs/reference/standard-sql/aggregate_functions#bit_xor)
    * MySQL [`BIT_XOR`](https://dev.mysql.com/doc/refman/5.7/en/aggregate-functions.html#function_bit-xor)
    """  # noqa: E501

    arg = rlz.column(rlz.integer)
    output_dtype = rlz.dtype_like('arg')
示例#18
0
class ArrayColumn(Value):
    cols = rlz.value_list_of(rlz.column(rlz.any), min_length=1)

    output_shape = rlz.Shape.COLUMNAR

    def __init__(self, cols):
        if len({col.type() for col in cols}) > 1:
            raise com.IbisTypeError(
                f'The types of all input columns must match exactly in a '
                f'{type(self).__name__} operation.')
        super().__init__(cols=cols)

    @immutable_property
    def output_dtype(self):
        first_dtype = self.cols[0].type()
        return dt.Array(first_dtype)
示例#19
0
class VectorizedUDF(Value):
    func = rlz.instance_of((FunctionType, LambdaType))
    func_args = rlz.tuple_of(rlz.column(rlz.any))
    # TODO(kszucs): should rename these arguments to
    # input_dtypes and return_dtype
    input_type = rlz.tuple_of(rlz.datatype)
    return_type = rlz.datatype

    @property
    def inputs(self):
        return self.func_args

    @property
    def output_dtype(self):
        return self.return_type

    def root_tables(self):
        return distinct_roots(*self.func_args)
示例#20
0
class SortKey(Node):
    expr = rlz.column(rlz.any)
    ascending = rlz.optional(
        rlz.map_to({
            True: True,
            False: False,
            1: True,
            0: False,
        }, ),
        default=True,
    )

    output_type = ir.SortExpr

    def root_tables(self):
        return self.expr.op().root_tables()

    def resolve_name(self):
        return self.expr.get_name()
示例#21
0
class MinRank(RankBase):
    """
    Compute position of first element within each equal-value group in sorted
    order. Equivalent to SQL RANK().

    Examples
    --------
    values   ranks
    1        0
    1        0
    2        2
    2        2
    2        2
    3        5

    Returns
    -------
    Int64Column
        The min rank
    """

    arg = rlz.column(rlz.any)
示例#22
0
class DenseRank(RankBase):
    """
    Compute position of first element within each equal-value group in sorted
    order, ignoring duplicate values. Equivalent to SQL DENSE_RANK().

    Examples
    --------
    values   ranks
    1        0
    1        0
    2        1
    2        1
    2        1
    3        2

    Returns
    -------
    IntegerColumn
        The rank
    """

    arg = rlz.column(rlz.any)
示例#23
0
    assert result.equals(expected)


@pytest.mark.parametrize(
    ('units', 'value', 'expected'),
    [({'Y'}, ibis.interval(hours=1), IbisTypeError),
     ({'Y', 'M', 'D'}, ibis.interval(hours=1), IbisTypeError),
     ({'Q', 'W', 'D'}, ibis.interval(seconds=1), IbisTypeError)])
def test_invalid_interval(units, value, expected):
    with pytest.raises(expected):
        rlz.interval(value, units=units)


@pytest.mark.parametrize(
    ('validator', 'value', 'expected'),
    [(rlz.column(rlz.any), table.int_col, table.int_col),
     (rlz.column(rlz.string), table.string_col, table.string_col),
     (rlz.scalar(rlz.integer), ibis.literal(3), ibis.literal(3)),
     (rlz.scalar(rlz.any), 'caracal', ibis.literal('caracal'))])
def test_valid_column_or_scalar(validator, value, expected):
    result = validator(value)
    assert result.equals(expected)


@pytest.mark.parametrize(('validator', 'value', 'expected'), [
    (rlz.column(rlz.integer), table.double_col, IbisTypeError),
    (rlz.column(rlz.any), ibis.literal(3), IbisTypeError),
    (rlz.column(rlz.integer), ibis.literal(3), IbisTypeError),
])
def test_invalid_column_or_scalar(validator, value, expected):
    with pytest.raises(expected):
示例#24
0
    ('units', 'value', 'expected'),
    [
        ({'Y'}, ibis.interval(hours=1), IbisTypeError),
        ({'Y', 'M', 'D'}, ibis.interval(hours=1), IbisTypeError),
        ({'Q', 'W', 'D'}, ibis.interval(seconds=1), IbisTypeError),
    ],
)
def test_invalid_interval(units, value, expected):
    with pytest.raises(expected):
        rlz.interval(value, units=units)


@pytest.mark.parametrize(
    ('validator', 'value', 'expected'),
    [
        (rlz.column(rlz.any), table.int_col, table.int_col),
        (rlz.column(rlz.string), table.string_col, table.string_col),
        (rlz.scalar(rlz.integer), ibis.literal(3), ibis.literal(3)),
        (rlz.scalar(rlz.any), 'caracal', ibis.literal('caracal')),
    ],
)
def test_valid_column_or_scalar(validator, value, expected):
    result = validator(value)
    assert result.equals(expected)


@pytest.mark.parametrize(
    ('validator', 'value', 'expected'),
    [
        (rlz.column(rlz.integer), table.double_col, IbisTypeError),
        (rlz.column(rlz.any), ibis.literal(3), IbisTypeError),
示例#25
0
class Arbitrary(Filterable, Reduction):
    arg = rlz.column(rlz.any)
    how = rlz.optional(rlz.isin({'first', 'last', 'heavy'}))
    output_dtype = rlz.dtype_like('arg')
示例#26
0
class ArrayCollect(Reduction):
    arg = rlz.column(rlz.any)

    @immutable_property
    def output_dtype(self):
        return dt.Array(self.arg.type())
示例#27
0
class CountDistinct(Filterable, Reduction):
    arg = rlz.column(rlz.any)

    output_dtype = dt.int64
示例#28
0
class All(Reduction):
    arg = rlz.column(rlz.boolean)
    output_dtype = dt.boolean

    def negate(self):
        return NotAll(self.arg)
示例#29
0
class Count(Filterable, Reduction):
    arg = rlz.one_of((rlz.column(rlz.any), rlz.table))
    output_dtype = dt.int64
示例#30
0
class GroupConcat(Filterable, Reduction):
    arg = rlz.column(rlz.any)
    sep = rlz.string

    output_dtype = dt.string
示例#31
0
class Min(Filterable, Reduction):
    arg = rlz.column(rlz.any)
    output_dtype = rlz.dtype_like('arg')