def _cast(t, expr): # It's not all fun and games with SQLite op = expr.op() arg, target_type = op.args sa_arg = t.translate(arg) sa_type = t.get_sqla_type(target_type) if isinstance(target_type, dt.Timestamp): if isinstance(arg, ir.IntegerValue): return sa.func.datetime(sa_arg, 'unixepoch') elif isinstance(arg, ir.StringValue): return sa.func.strftime('%Y-%m-%d %H:%M:%f', sa_arg) raise com.TranslationError(type(arg)) if isinstance(target_type, dt.Date): if isinstance(arg, ir.IntegerValue): return sa.func.date(sa.func.datetime(sa_arg, 'unixepoch')) elif isinstance(arg, ir.StringValue): return sa.func.date(sa_arg) raise com.TranslationError(type(arg)) if isinstance(arg, ir.CategoryValue) and target_type == 'int32': return sa_arg else: return sa.cast(sa_arg, sa_type)
def _validate_join_predicates(self, predicates): for pred in predicates: op = pred.op() if not isinstance(op, ops.Equals): raise com.TranslationError('Non-equality join predicates are ' 'not supported') left_on, right_on = op.args if left_on.get_name() != right_on.get_name(): raise com.TranslationError('Joining on different column names ' 'is not supported')
def formatter(translator, expr): op = expr.op() arg_count = len(op.args) if arity != arg_count: msg = 'Incorrect number of args {0} instead of {1}' raise com.TranslationError(msg.format(arg_count, arity)) return _call(translator, func_name, *op.args)
def _string_find(translator, expr): op = expr.op() arg, substr, start, _ = op.args if start is not None: raise com.TranslationError('String find doesn\'t ' 'support start argument') return _call(translator, 'position', arg, substr) + ' - 1'
def translator(t, expr): arg, unit = expr.op().args sa_arg = t.translate(arg) try: modifier = _truncate_modifiers[unit] except KeyError: raise com.TranslationError('Unsupported truncate unit ' '{}'.format(unit)) return func(sa_arg, modifier)
def _validate_join_predicates(self, predicates): for pred in predicates: op = pred.op() if (not isinstance(op, ops.Equals) and not self._non_equijoin_supported): raise com.TranslationError('Non-equality join predicates, ' 'i.e. non-equijoins, are not ' 'supported')
def _timestamp_truncate(t, expr): arg, unit = expr.op().args sa_arg = t.translate(arg) try: precision = _truncate_precisions[unit] except KeyError: raise com.TranslationError('Unsupported truncate unit ' '{}'.format(unit)) return sa.func.date_trunc(precision, sa_arg)
def _truncate(t, expr): arg, unit = expr.op().args sa_arg = t.translate(arg) try: fmt = _truncate_formats[unit] except KeyError: raise com.TranslationError('Unsupported truncate unit ' '{}'.format(unit)) return sa.func.date_format(sa_arg, fmt)
def _truncate(translator, expr): op = expr.op() arg, unit = op.args arg = translator.translate(op.args[0]) try: unit = _impala_unit_names[unit] except KeyError: raise com.TranslationError('{} unit is not supported in ' 'timestamp truncate'.format(unit)) return "trunc({}, '{}')".format(arg, unit)
def _timestamp_delta(translator, expr): op = expr.op() arg, offset = op.args if isinstance(arg, ir.TimestampValue): offset_ = offset.to_unit('s').n elif isinstance(arg, ir.DateValue): offset_ = offset.to_unit('d').n else: raise com.TranslationError('Unsupported timedelta operation') arg_ = translator.translate(arg) return '{0} + {1}'.format(arg_, offset_)
def _hash(translator, expr): op = expr.op() arg, how = op.args algorithms = { 'MD5', 'halfMD5', 'SHA1', 'SHA224', 'SHA256', 'intHash32', 'intHash64', 'cityHash64', 'sipHash64', 'sipHash128' } if how not in algorithms: raise com.TranslationError( 'Unsupported hash algorithm {0}'.format(how)) return _call(translator, how, arg)
def translate(self, expr): # The operation node type the typed expression wraps op = expr.op() # TODO: use op MRO for subclasses instead of this isinstance spaghetti if isinstance(op, ir.Parameter): return self._trans_param(expr) elif isinstance(op, ops.TableNode): # HACK/TODO: revisit for more complex cases return '*' elif type(op) in _operation_registry: formatter = _operation_registry[type(op)] return formatter(self, expr) else: raise com.TranslationError('No translator rule for {0}'.format(op))
def _window(translator, expr): op = expr.op() arg, window = op.args window_op = arg.op() _require_order_by = ( ops.Lag, ops.Lead, ops.DenseRank, ops.MinRank, ops.FirstValue, ops.LastValue, ops.PercentRank, ops.NTile, ) _unsupported_reductions = ( ops.CMSMedian, ops.GroupConcat, ops.HLLCardinality, ) if isinstance(window_op, _unsupported_reductions): raise com.TranslationError('{} is not supported in ' 'window functions'.format(type(window_op))) if isinstance(window_op, ops.CumulativeOp): arg = _cumulative_to_window(translator, arg, window) return translator.translate(arg) # Some analytic functions need to have the expression of interest in # the ORDER BY part of the window clause if (isinstance(window_op, _require_order_by) and len(window._order_by) == 0): window = window.order_by(window_op.args[0]) window_formatted = _format_window(translator, window) arg_formatted = translator.translate(arg) result = '{} {}'.format(arg_formatted, window_formatted) if type(window_op) in _expr_transforms: return _expr_transforms[type(window_op)](result) else: return result
def _parse_url(translator, expr): op = expr.op() arg, extract, key = op.args if extract == 'HOST': return _call(translator, 'domain', arg) elif extract == 'PROTOCOL': return _call(translator, 'protocol', arg) elif extract == 'PATH': return _call(translator, 'path', arg) elif extract == 'QUERY': if key is not None: return _call(translator, 'extractURLParameter', arg, key) else: return _call(translator, 'queryString', arg) else: raise com.TranslationError('Parse url with extrac {0} is not ' 'supported'.format(extract))
def _truncate(translator, expr): op = expr.op() arg, unit = op.args converters = { 'Y': 'toStartOfYear', 'M': 'toStartOfMonth', 'D': 'toDate', 'H': 'toStartOfHour', 'MI': 'toStartOfMinute' } try: converter = converters[unit] except KeyError: raise com.TranslationError('Unsupported concat unit {0}'.format(unit)) return _call(translator, converter, arg)
def _cast(t, expr): # It's not all fun and games with SQLite op = expr.op() arg, target_type = op.args sa_arg = t.translate(arg) sa_type = t.get_sqla_type(target_type) # SQLite does not have a physical date/time/timestamp type, so # unfortunately cast to typestamp must be a no-op, and we have to trust # that the user's data can actually be correctly parsed by SQLite. if isinstance(target_type, dt.Timestamp): if not isinstance(arg, (ir.IntegerValue, ir.StringValue)): raise com.TranslationError(type(arg)) return sa_arg if isinstance(arg, ir.CategoryValue) and target_type == 'int32': return sa_arg else: return sa.cast(sa_arg, sa_type)
def _adapt_expr(expr): # Non-table expressions need to be adapted to some well-formed table # expression, along with a way to adapt the results to the desired # arity (whether array-like or scalar, for example) # # Canonical case is scalar values or arrays produced by some reductions # (simple reductions, or distinct, say) def as_is(x): return x if isinstance(expr, ir.TableExpr): return expr, as_is def _scalar_reduce(x): return isinstance(x, ir.ScalarExpr) and ops.is_reduction(x) def _get_scalar(field): def scalar_handler(results): return results[field][0] return scalar_handler if isinstance(expr, ir.ScalarExpr): if _scalar_reduce(expr): table_expr, name = _reduction_to_aggregation(expr, default_name='tmp') return table_expr, _get_scalar(name) else: base_table = ir.find_base_table(expr) if base_table is None: # expr with no table refs return expr.name('tmp'), _get_scalar('tmp') else: raise NotImplementedError(expr._repr()) elif isinstance(expr, ir.AnalyticExpr): return expr.to_aggregation(), as_is elif isinstance(expr, ir.ExprList): exprs = expr.exprs() is_aggregation = True any_aggregation = False for x in exprs: if not _scalar_reduce(x): is_aggregation = False else: any_aggregation = True if is_aggregation: table = ir.find_base_table(exprs[0]) return table.aggregate(exprs), as_is elif not any_aggregation: return expr, as_is else: raise NotImplementedError(expr._repr()) elif isinstance(expr, ir.ArrayExpr): op = expr.op() def _get_column(name): def column_handler(results): return results[name] return column_handler if isinstance(op, ops.TableColumn): table_expr = op.table[[op.name]] result_handler = _get_column(op.name) else: # Something more complicated. base_table = L.find_source_table(expr) if isinstance(op, ops.DistinctArray): expr = op.arg try: name = op.arg.get_name() except Exception: name = 'tmp' table_expr = (base_table.projection([expr.name(name) ]).distinct()) result_handler = _get_column(name) else: table_expr = base_table.projection([expr.name('tmp')]) result_handler = _get_column('tmp') return table_expr, result_handler else: raise com.TranslationError('Do not know how to execute: {0}'.format( type(expr)))
def raise_error(translator, expr, *args): msg = 'Clickhouse backend doesn\'t support {0} operation!' op = expr.op() raise com.TranslationError(msg.format(type(op)))