Exemplo n.º 1
0
def _cast(t, expr):
    # It's not all fun and games with SQLite

    op = expr.op()
    arg, target_type = op.args
    sa_arg = t.translate(arg)
    sa_type = t.get_sqla_type(target_type)

    if isinstance(target_type, dt.Timestamp):
        if isinstance(arg, ir.IntegerValue):
            return sa.func.datetime(sa_arg, 'unixepoch')
        elif isinstance(arg, ir.StringValue):
            return sa.func.strftime('%Y-%m-%d %H:%M:%f', sa_arg)
        raise com.TranslationError(type(arg))

    if isinstance(target_type, dt.Date):
        if isinstance(arg, ir.IntegerValue):
            return sa.func.date(sa.func.datetime(sa_arg, 'unixepoch'))
        elif isinstance(arg, ir.StringValue):
            return sa.func.date(sa_arg)
        raise com.TranslationError(type(arg))

    if isinstance(arg, ir.CategoryValue) and target_type == 'int32':
        return sa_arg
    else:
        return sa.cast(sa_arg, sa_type)
Exemplo n.º 2
0
    def _validate_join_predicates(self, predicates):
        for pred in predicates:
            op = pred.op()
            if not isinstance(op, ops.Equals):
                raise com.TranslationError('Non-equality join predicates are '
                                           'not supported')

            left_on, right_on = op.args
            if left_on.get_name() != right_on.get_name():
                raise com.TranslationError('Joining on different column names '
                                           'is not supported')
Exemplo n.º 3
0
 def formatter(translator, expr):
     op = expr.op()
     arg_count = len(op.args)
     if arity != arg_count:
         msg = 'Incorrect number of args {0} instead of {1}'
         raise com.TranslationError(msg.format(arg_count, arity))
     return _call(translator, func_name, *op.args)
Exemplo n.º 4
0
def _string_find(translator, expr):
    op = expr.op()
    arg, substr, start, _ = op.args
    if start is not None:
        raise com.TranslationError('String find doesn\'t '
                                   'support start argument')

    return _call(translator, 'position', arg, substr) + ' - 1'
Exemplo n.º 5
0
 def translator(t, expr):
     arg, unit = expr.op().args
     sa_arg = t.translate(arg)
     try:
         modifier = _truncate_modifiers[unit]
     except KeyError:
         raise com.TranslationError('Unsupported truncate unit '
                                    '{}'.format(unit))
     return func(sa_arg, modifier)
Exemplo n.º 6
0
    def _validate_join_predicates(self, predicates):
        for pred in predicates:
            op = pred.op()

            if (not isinstance(op, ops.Equals)
                    and not self._non_equijoin_supported):
                raise com.TranslationError('Non-equality join predicates, '
                                           'i.e. non-equijoins, are not '
                                           'supported')
Exemplo n.º 7
0
def _timestamp_truncate(t, expr):
    arg, unit = expr.op().args
    sa_arg = t.translate(arg)
    try:
        precision = _truncate_precisions[unit]
    except KeyError:
        raise com.TranslationError('Unsupported truncate unit '
                                   '{}'.format(unit))
    return sa.func.date_trunc(precision, sa_arg)
Exemplo n.º 8
0
def _truncate(t, expr):
    arg, unit = expr.op().args
    sa_arg = t.translate(arg)
    try:
        fmt = _truncate_formats[unit]
    except KeyError:
        raise com.TranslationError('Unsupported truncate unit '
                                   '{}'.format(unit))
    return sa.func.date_format(sa_arg, fmt)
Exemplo n.º 9
0
def _truncate(translator, expr):
    op = expr.op()
    arg, unit = op.args

    arg = translator.translate(op.args[0])
    try:
        unit = _impala_unit_names[unit]
    except KeyError:
        raise com.TranslationError('{} unit is not supported in '
                                   'timestamp truncate'.format(unit))

    return "trunc({}, '{}')".format(arg, unit)
Exemplo n.º 10
0
def _timestamp_delta(translator, expr):
    op = expr.op()
    arg, offset = op.args

    if isinstance(arg, ir.TimestampValue):
        offset_ = offset.to_unit('s').n
    elif isinstance(arg, ir.DateValue):
        offset_ = offset.to_unit('d').n
    else:
        raise com.TranslationError('Unsupported timedelta operation')

    arg_ = translator.translate(arg)
    return '{0} + {1}'.format(arg_, offset_)
Exemplo n.º 11
0
def _hash(translator, expr):
    op = expr.op()
    arg, how = op.args

    algorithms = {
        'MD5', 'halfMD5', 'SHA1', 'SHA224', 'SHA256', 'intHash32', 'intHash64',
        'cityHash64', 'sipHash64', 'sipHash128'
    }

    if how not in algorithms:
        raise com.TranslationError(
            'Unsupported hash algorithm {0}'.format(how))

    return _call(translator, how, arg)
Exemplo n.º 12
0
    def translate(self, expr):
        # The operation node type the typed expression wraps
        op = expr.op()

        # TODO: use op MRO for subclasses instead of this isinstance spaghetti
        if isinstance(op, ir.Parameter):
            return self._trans_param(expr)
        elif isinstance(op, ops.TableNode):
            # HACK/TODO: revisit for more complex cases
            return '*'
        elif type(op) in _operation_registry:
            formatter = _operation_registry[type(op)]
            return formatter(self, expr)
        else:
            raise com.TranslationError('No translator rule for {0}'.format(op))
Exemplo n.º 13
0
def _window(translator, expr):
    op = expr.op()

    arg, window = op.args
    window_op = arg.op()

    _require_order_by = (
        ops.Lag,
        ops.Lead,
        ops.DenseRank,
        ops.MinRank,
        ops.FirstValue,
        ops.LastValue,
        ops.PercentRank,
        ops.NTile,
    )

    _unsupported_reductions = (
        ops.CMSMedian,
        ops.GroupConcat,
        ops.HLLCardinality,
    )

    if isinstance(window_op, _unsupported_reductions):
        raise com.TranslationError('{} is not supported in '
                                   'window functions'.format(type(window_op)))

    if isinstance(window_op, ops.CumulativeOp):
        arg = _cumulative_to_window(translator, arg, window)
        return translator.translate(arg)

    # Some analytic functions need to have the expression of interest in
    # the ORDER BY part of the window clause
    if (isinstance(window_op, _require_order_by)
            and len(window._order_by) == 0):
        window = window.order_by(window_op.args[0])

    window_formatted = _format_window(translator, window)

    arg_formatted = translator.translate(arg)
    result = '{} {}'.format(arg_formatted, window_formatted)

    if type(window_op) in _expr_transforms:
        return _expr_transforms[type(window_op)](result)
    else:
        return result
Exemplo n.º 14
0
def _parse_url(translator, expr):
    op = expr.op()
    arg, extract, key = op.args

    if extract == 'HOST':
        return _call(translator, 'domain', arg)
    elif extract == 'PROTOCOL':
        return _call(translator, 'protocol', arg)
    elif extract == 'PATH':
        return _call(translator, 'path', arg)
    elif extract == 'QUERY':
        if key is not None:
            return _call(translator, 'extractURLParameter', arg, key)
        else:
            return _call(translator, 'queryString', arg)
    else:
        raise com.TranslationError('Parse url with extrac {0} is not '
                                   'supported'.format(extract))
Exemplo n.º 15
0
def _truncate(translator, expr):
    op = expr.op()
    arg, unit = op.args

    converters = {
        'Y': 'toStartOfYear',
        'M': 'toStartOfMonth',
        'D': 'toDate',
        'H': 'toStartOfHour',
        'MI': 'toStartOfMinute'
    }

    try:
        converter = converters[unit]
    except KeyError:
        raise com.TranslationError('Unsupported concat unit {0}'.format(unit))

    return _call(translator, converter, arg)
Exemplo n.º 16
0
def _cast(t, expr):
    # It's not all fun and games with SQLite

    op = expr.op()
    arg, target_type = op.args
    sa_arg = t.translate(arg)
    sa_type = t.get_sqla_type(target_type)

    # SQLite does not have a physical date/time/timestamp type, so
    # unfortunately cast to typestamp must be a no-op, and we have to trust
    # that the user's data can actually be correctly parsed by SQLite.
    if isinstance(target_type, dt.Timestamp):
        if not isinstance(arg, (ir.IntegerValue, ir.StringValue)):
            raise com.TranslationError(type(arg))

        return sa_arg

    if isinstance(arg, ir.CategoryValue) and target_type == 'int32':
        return sa_arg
    else:
        return sa.cast(sa_arg, sa_type)
Exemplo n.º 17
0
def _adapt_expr(expr):
    # Non-table expressions need to be adapted to some well-formed table
    # expression, along with a way to adapt the results to the desired
    # arity (whether array-like or scalar, for example)
    #
    # Canonical case is scalar values or arrays produced by some reductions
    # (simple reductions, or distinct, say)
    def as_is(x):
        return x

    if isinstance(expr, ir.TableExpr):
        return expr, as_is

    def _scalar_reduce(x):
        return isinstance(x, ir.ScalarExpr) and ops.is_reduction(x)

    def _get_scalar(field):
        def scalar_handler(results):
            return results[field][0]

        return scalar_handler

    if isinstance(expr, ir.ScalarExpr):

        if _scalar_reduce(expr):
            table_expr, name = _reduction_to_aggregation(expr,
                                                         default_name='tmp')
            return table_expr, _get_scalar(name)
        else:
            base_table = ir.find_base_table(expr)
            if base_table is None:
                # expr with no table refs
                return expr.name('tmp'), _get_scalar('tmp')
            else:
                raise NotImplementedError(expr._repr())

    elif isinstance(expr, ir.AnalyticExpr):
        return expr.to_aggregation(), as_is

    elif isinstance(expr, ir.ExprList):
        exprs = expr.exprs()

        is_aggregation = True
        any_aggregation = False

        for x in exprs:
            if not _scalar_reduce(x):
                is_aggregation = False
            else:
                any_aggregation = True

        if is_aggregation:
            table = ir.find_base_table(exprs[0])
            return table.aggregate(exprs), as_is
        elif not any_aggregation:
            return expr, as_is
        else:
            raise NotImplementedError(expr._repr())

    elif isinstance(expr, ir.ArrayExpr):
        op = expr.op()

        def _get_column(name):
            def column_handler(results):
                return results[name]

            return column_handler

        if isinstance(op, ops.TableColumn):
            table_expr = op.table[[op.name]]
            result_handler = _get_column(op.name)
        else:
            # Something more complicated.
            base_table = L.find_source_table(expr)

            if isinstance(op, ops.DistinctArray):
                expr = op.arg
                try:
                    name = op.arg.get_name()
                except Exception:
                    name = 'tmp'

                table_expr = (base_table.projection([expr.name(name)
                                                     ]).distinct())
                result_handler = _get_column(name)
            else:
                table_expr = base_table.projection([expr.name('tmp')])
                result_handler = _get_column('tmp')

        return table_expr, result_handler
    else:
        raise com.TranslationError('Do not know how to execute: {0}'.format(
            type(expr)))
Exemplo n.º 18
0
def raise_error(translator, expr, *args):
    msg = 'Clickhouse backend doesn\'t support {0} operation!'
    op = expr.op()
    raise com.TranslationError(msg.format(type(op)))