def _get_interval_col( t, interval_ibis_expr, scope, timecontext, allowed_units=None ): # if interval expression is a binary op, translate expression into # an interval column and return if isinstance(interval_ibis_expr.op(), ops.IntervalBinaryOp): return t.translate(interval_ibis_expr, scope, timecontext) # otherwise, translate expression into a literal op and construct # interval column from literal value and dtype if isinstance(interval_ibis_expr.op(), ops.Literal): op = interval_ibis_expr.op() else: op = t.translate(interval_ibis_expr, scope, timecontext).op() dtype = op.dtype if not isinstance(dtype, dtypes.Interval): raise com.UnsupportedArgumentError( '{} expression cannot be converted to interval column. ' 'Must be Interval dtype.'.format(dtype) ) if allowed_units and dtype.unit not in allowed_units: raise com.UnsupportedArgumentError( 'Interval unit "{}" is not allowed. Allowed units are: ' '{}'.format(dtype.unit, allowed_units) ) return F.expr( 'INTERVAL {} {}'.format(op.value, _time_unit_mapping[dtype.unit]) )
def get_schema( self, table_name: str, database: str | None = None, ) -> sch.Schema: """Return a Schema object for the indicated table and database. Parameters ---------- table_name Table name. May be fully qualified database Spark does not have a database argument for its table() method, so this must be None Returns ------- Schema An ibis schema """ if database is not None: raise com.UnsupportedArgumentError( 'Spark does not support the `database` argument for ' '`get_schema`' ) df = self._session.table(table_name) return sch.infer(df)
def compile_limit(t, expr, scope, **kwargs): op = expr.op() if op.offset != 0: raise com.UnsupportedArgumentError( 'PySpark backend does not support non-zero offset is for ' 'limit operation. Got offset {}.'.format(op.offset)) df = compile_with_scope(t, op.table, scope) return df.limit(op.n)
def validate_func_and_types(self, func): if isinstance(self.spark_output_type, (pt.MapType, pt.StructType)): raise com.IbisTypeError( 'Spark does not support MapType or StructType output for \ Pandas UDFs') if not self.input_type: raise com.UnsupportedArgumentError( 'Spark does not support 0-arg pandas UDFs. Instead, create \ a 1-arg pandas UDF and ignore the arg in your function') super().validate_func_and_types(func)
def compile_string_to_timestamp(t, expr, scope, **kwargs): op = expr.op() src_column = t.translate(op.arg, scope) fmt = op.format_str.op().value if op.timezone is not None and op.timezone.op().value != "UTC": raise com.UnsupportedArgumentError( 'PySpark backend only supports timezone UTC for converting string ' 'to timestamp.') return F.to_timestamp(src_column, fmt)
def compile_timestamp_from_unix(t, expr, scope, **kwargs): op = expr.op() unixtime = t.translate(op.arg, scope) if not op.unit: return F.to_timestamp(F.from_unixtime(unixtime)) elif op.unit == 's': fmt = 'yyyy-MM-dd HH:mm:ss' return F.to_timestamp(F.from_unixtime(unixtime, fmt), fmt) else: raise com.UnsupportedArgumentError( 'PySpark backend does not support timestamp from unix time with ' 'unit {}. Supported unit is s.'.format(op.unit))
def compile_cast(t, expr, scope, **kwargs): op = expr.op() if isinstance(op.to, dtypes.Interval): if isinstance(op.arg.op(), ops.Literal): return interval(op.arg.op().value, op.to.unit) else: raise com.UnsupportedArgumentError( 'Casting to intervals is only supported for literals ' 'in the PySpark backend. {} not allowed.'.format(type(op.arg))) if isinstance(op.to, dtypes.Array): cast_type = ibis_array_dtype_to_spark_dtype(op.to) else: cast_type = ibis_dtype_to_spark_dtype(op.to) src_column = t.translate(op.arg, scope) return src_column.cast(cast_type)
def get_schema(self, table_name, database=None): """ Return a Schema object for the indicated table and database Parameters ---------- table_name : string May be fully qualified database : string Spark does not have a database argument for its table() method, so this must be None Returns ------- schema : ibis Schema """ if database is not None: raise com.UnsupportedArgumentError( 'Spark does not support database param for table') df = self._session.table(table_name) return sch.infer(df)