Пример #1
0
def coerce_to_output(
        result: Any,
        expr: ir.Expr,
        index: Optional[pd.Index] = None) -> Union[pd.Series, pd.DataFrame]:
    """ Cast the result to either a Series or DataFrame.

    This method casts result of an execution to a Series or DataFrame,
    depending on the type of the expression and shape of the result.

    Parameters
    ----------
    result: Any
        The result to cast
    expr: ibis.expr.types.Expr
        The expression associated with the result
    index: pd.Index
        Optional. If passed, scalar results will be broadcasted according
        to the index.

    Returns
    -------
    result: A Series or DataFrame
    """
    result_name = getattr(expr, '_name', None)

    if isinstance(expr, (ir.DestructColumn, ir.StructColumn)):
        return ibis.util.coerce_to_dataframe(result, expr.type().names)
    elif isinstance(expr, (ir.DestructScalar, ir.StructScalar)):
        # Here there are two cases, if this is groupby aggregate,
        # then the result e a Series of tuple/list, or
        # if this is non grouped aggregate, then the result
        return ibis.util.coerce_to_dataframe(result, expr.type().names)
    elif isinstance(result, pd.Series):
        return result.rename(result_name)
    elif isinstance(result, np.ndarray):
        return pd.Series(result, name=result_name)
    elif isinstance(expr.op(), ops.Reduction):
        # We either wrap a scalar into a single element Series
        # or broadcast the scalar to a multi element Series
        if index is None:
            return pd.Series(result, name=result_name)
        else:
            return pd.Series(
                np.repeat(result, len(index)),
                index=index,
                name=result_name,
            )
    else:
        raise ValueError(f"Cannot coerce_to_output. Result: {result}")
Пример #2
0
def coerce_to_output(
        result: Any,
        expr: ir.Expr,
        index: Optional[pd.Index] = None) -> Union[dd.Series, dd.DataFrame]:
    """Cast the result to either a Series of DataFrame, renaming as needed.

    Reimplementation of `coerce_to_output` in the pandas backend, but
    creates dask objects and adds special handling for dd.Scalars.

    Parameters
    ----------
    result: Any
        The result to cast
    expr: ibis.expr.types.Expr
        The expression associated with the result
    index: pd.Index
        Optional. If passed, scalar results will be broadcasted according
        to the index.

    Returns
    -------
    result: A `dd.Series` or `dd.DataFrame`

    Raises
    ------
    ValueError
        If unable to coerce result

    Examples
    --------
    For dataframe outputs, see ``_coerce_to_dataframe``. Examples below use
    pandas objects for legibility, but functionality is the same on dask
    objects.

    >>> coerce_to_output(pd.Series(1), expr)
    0    1
    Name: result, dtype: int64
    >>> coerce_to_output(1, expr)
    0    1
    Name: result, dtype: int64
    >>> coerce_to_output(1, expr, [1,2,3])
    1    1
    2    1
    3    1
    Name: result, dtype: int64
    >>> coerce_to_output([1,2,3], expr)
    0    [1, 2, 3]
    Name: result, dtype: object
    """
    result_name = expr.get_name()
    dataframe_exprs = (
        ir.DestructColumn,
        ir.StructColumn,
        ir.DestructScalar,
        ir.StructScalar,
    )
    if isinstance(expr, dataframe_exprs):
        return _coerce_to_dataframe(result,
                                    expr.type().names,
                                    expr.type().types)
    elif isinstance(result, (pd.Series, dd.Series)):
        # Series from https://github.com/ibis-project/ibis/issues/2711
        return result.rename(result_name)
    elif isinstance(expr.op(), ops.Reduction):
        if isinstance(result, dd.core.Scalar):
            # wrap the scalar in a series
            out_dtype = _pandas_dtype_from_dd_scalar(result)
            out_len = 1 if index is None else len(index)
            meta = make_meta_series(dtype=out_dtype, name=result_name)
            # Specify `divisions` so that the created Dask object has
            # known divisions (to be concatenatable with Dask objects
            # created using `dd.from_pandas`)
            series = dd.from_delayed(
                _wrap_dd_scalar(result, result_name, out_len),
                meta=meta,
                divisions=(0, out_len - 1),
            )

            return series
        else:
            return dd.from_pandas(pd_util.coerce_to_output(
                result, expr, index),
                                  npartitions=1)
    else:
        raise ValueError(f"Cannot coerce_to_output. Result: {result}")
Пример #3
0
def coerce_to_output(
        result: Any,
        expr: ir.Expr,
        index: Optional[pd.Index] = None) -> Union[pd.Series, pd.DataFrame]:
    """Cast the result to either a Series or DataFrame.

    This method casts result of an execution to a Series or DataFrame,
    depending on the type of the expression and shape of the result.

    Parameters
    ----------
    result: Any
        The result to cast
    expr: ibis.expr.types.Expr
        The expression associated with the result
    index: pd.Index
        Optional. If passed, scalar results will be broadcasted according
        to the index.

    Returns
    -------
    result: A Series or DataFrame

    Examples
    --------
    For dataframe outputs, see ``ibis.util.coerce_to_dataframe``.

    >>> coerce_to_output(pd.Series(1), expr)
    0    1
    Name: result, dtype: int64
    >>> coerce_to_output(1, expr)
    0    1
    Name: result, dtype: int64
    >>> coerce_to_output(1, expr, [1,2,3])
    1    1
    2    1
    3    1
    Name: result, dtype: int64
    >>> coerce_to_output([1,2,3], expr)
    0    [1, 2, 3]
    Name: result, dtype: object
    """
    result_name = expr._safe_name

    if isinstance(expr, (ir.DestructColumn, ir.StructColumn)):
        return _coerce_to_dataframe(result, expr.type())
    elif isinstance(expr, (ir.DestructScalar, ir.StructScalar)):
        # Here there are two cases, if this is groupby aggregate,
        # then the result e a Series of tuple/list, or
        # if this is non grouped aggregate, then the result
        return _coerce_to_dataframe(result, expr.type())
    elif isinstance(result, pd.Series):
        return result.rename(result_name)
    elif isinstance(expr, ir.ScalarExpr):
        if index is None:
            # Wrap `result` into a single-element Series.
            return pd.Series([result], name=result_name)
        else:
            # Broadcast `result` to a multi-element Series according to the
            # given `index`.
            return pd.Series(
                np.repeat(result, len(index)),
                index=index,
                name=result_name,
            )
    elif isinstance(result, np.ndarray):
        return pd.Series(result, name=result_name)
    else:
        raise ValueError(f"Cannot coerce_to_output. Result: {result}")