Exemplo n.º 1
0
def apply(
    exprs: List[Union[str, "pli.Expr"]],
    f: Callable[[List["pli.Series"]], Union["pli.Series", Any]],
    return_dtype: Optional[Type[DataType]] = None,
) -> "pli.Expr":
    """
    Apply a custom function in a GroupBy context.

    Depending on the context it has the following behavior:

    ## Context

    * Select/Project
        Don't do this, use `map`
    * GroupBy
        expected type `f`: Callable[[Series], Series]
        Applies a python function over each group.

    Parameters
    ----------
    exprs
        Input Series to f
    f
        Function to apply over the input
    return_dtype
        dtype of the output Series

    Returns
    -------
    Expr
    """
    exprs = pli.selection_to_pyexpr_list(exprs)
    return pli.wrap_expr(_map_mul(exprs, f, return_dtype, apply_groups=True))
Exemplo n.º 2
0
def map_binary(
    a: Union[str, "pli.Expr"],
    b: Union[str, "pli.Expr"],
    f: Callable[["pli.Series", "pli.Series"], "pli.Series"],
    return_dtype: Optional[Type[DataType]] = None,
) -> "pli.Expr":
    """
     .. deprecated:: 0.10.4
       use `map` or `apply`
    Map a custom function over two columns and produce a single Series result.

    Parameters
    ----------
    a
        Input Series a.
    b
        Input Series b.
    f
        Function to apply.
    return_dtype
        Output type of the udf.
    """
    if isinstance(a, str):
        a = col(a)
    if isinstance(b, str):
        b = col(b)
    return pli.wrap_expr(
        pybinary_function(a._pyexpr, b._pyexpr, f, return_dtype))
Exemplo n.º 3
0
def first(
    column: Optional[Union[str,
                           "pli.Series"]] = None) -> Union["pli.Expr", Any]:
    """
    Get the first value.

    Depending on the input type this function does different things:

    input:

    - None -> expression to take first column of a context.
    - str -> syntactic sugar for `pl.col(..).first()`
    - Series -> Take first value in `Series`

    """

    if column is None:
        return pli.wrap_expr(_first())

    if isinstance(column, pli.Series):
        if column.len() > 0:
            return column[0]
        else:
            raise IndexError(
                "The series is empty, so no first value can be returned.")
    return col(column).first()
Exemplo n.º 4
0
def arange(
    low: int | pli.Expr | pli.Series,
    high: int | pli.Expr | pli.Series,
    step: int = 1,
    *,
    eager: bool = False,
) -> pli.Expr | pli.Series:
    """
    Create a range expression. This can be used in a `select`, `with_column` etc.
    Be sure that the range size is equal to the DataFrame you are collecting.

    Examples
    --------
    >>> df.lazy().filter(pl.col("foo") < pl.arange(0, 100)).collect()  # doctest: +SKIP

    Parameters
    ----------
    low
        Lower bound of range.
    high
        Upper bound of range.
    step
        Step size of the range.
    eager
        If eager evaluation is `True`, a Series is returned instead of an Expr.
    """
    low = pli.expr_to_lit_or_expr(low, str_to_lit=False)
    high = pli.expr_to_lit_or_expr(high, str_to_lit=False)

    if eager:
        df = pli.DataFrame({"a": [1]})
        return df.select(arange(low, high, step).alias("arange"))["arange"]

    return pli.wrap_expr(pyarange(low._pyexpr, high._pyexpr, step))
Exemplo n.º 5
0
def arg_where(condition: pli.Expr | pli.Series,
              eager: bool = False) -> pli.Expr | pli.Series:
    """
    Return indices where `condition` evaluates `True`.

    Parameters
    ----------
    condition
        Boolean expression to evaluate

    Examples
    --------
    >>> df = pl.DataFrame({"a": [1, 2, 3, 4, 5]})
    >>> df.select(
    ...     [
    ...         pl.arg_where(pl.col("a") % 2 == 0),
    ...     ]
    ... ).to_series()
    shape: (2,)
    Series: 'a' [u32]
    [
        1
        3
    ]
    """
    if eager:
        if not isinstance(condition, pli.Series):
            raise ValueError(
                f"expected 'Series' in 'arg_where' if 'eager=True', got {type(condition)}"
            )
        return (condition.to_frame().select(arg_where(pli.col(
            condition.name))).to_series())
    else:
        condition = pli.expr_to_lit_or_expr(condition, str_to_lit=True)
        return pli.wrap_expr(py_arg_where(condition._pyexpr))
Exemplo n.º 6
0
def repeat(
    value: float | int | str | bool | None,
    n: pli.Expr | int,
    *,
    eager: bool = False,
    name: str | None = None,
) -> pli.Expr | pli.Series:
    """
    Repeat a single value n times.

    Parameters
    ----------
    value
        Value to repeat.
    n
        repeat `n` times
    eager
        Run eagerly and collect into a `Series`
    name
        Only used in `eager` mode. As expression, us `alias`
    """
    if eager:
        if name is None:
            name = ""
        dtype = py_type_to_dtype(type(value))
        s = pli.Series._repeat(name, value, n, dtype)  # type: ignore[arg-type]
        return s
    else:
        if isinstance(n, int):
            n = lit(n)
        return pli.wrap_expr(_repeat(value, n._pyexpr))
Exemplo n.º 7
0
def repeat(
    value: Optional[Union[float, int, str, bool]],
    n: Union["pli.Expr", int],
    *,
    eager: bool = False,
    name: Optional[str] = None,
) -> Union["pli.Expr", "pli.Series"]:
    """
    Repeat a single value n times.

    Parameters
    ----------
    value
        Value to repeat.
    n
        repeat `n` times
    eager
        Run eagerly and collect into a `Series`
    name
        Only used in `eager` mode. As expression, us `alias`
    """
    if eager:
        if name is None:
            name = ""
        dtype = py_type_to_dtype(type(value))
        s = pli.Series._repeat(name, value, n, dtype)  # type: ignore
        return s
    else:
        if isinstance(n, int):
            n = lit(n)
        return pli.wrap_expr(_repeat(value, n._pyexpr))
Exemplo n.º 8
0
def apply(
    exprs: list[str | pli.Expr],
    f: Callable[[list[pli.Series]], pli.Series | Any],
    return_dtype: type[DataType] | None = None,
) -> pli.Expr:
    """
    Apply a custom function in a GroupBy context.

    Depending on the context it has the following behavior:

    * Select
        Don't use apply, use `map`
    * GroupBy
        expected type `f`: Callable[[Series], Series]
        Applies a python function over each group.

    Parameters
    ----------
    exprs
        Input Series to f
    f
        Function to apply over the input
    return_dtype
        dtype of the output Series

    Returns
    -------
    Expr
    """
    exprs = pli.selection_to_pyexpr_list(exprs)
    return pli.wrap_expr(_map_mul(exprs, f, return_dtype, apply_groups=True))
Exemplo n.º 9
0
def map_binary(
    a: str | pli.Expr,
    b: str | pli.Expr,
    f: Callable[[pli.Series, pli.Series], pli.Series],
    return_dtype: type[DataType] | None = None,
) -> pli.Expr:
    """
    Map a custom function over two columns and produce a single Series result.

    .. deprecated:: 0.10.4
        Use :func:`map` or :func:`apply` instead.

    Parameters
    ----------
    a
        Input Series a.
    b
        Input Series b.
    f
        Function to apply.
    return_dtype
        Output type of the udf.
    """
    if isinstance(a, str):
        a = col(a)
    if isinstance(b, str):
        b = col(b)
    return pli.wrap_expr(
        pybinary_function(a._pyexpr, b._pyexpr, f, return_dtype))
Exemplo n.º 10
0
def fold(
    acc: pli.IntoExpr,
    f: Callable[[pli.Series, pli.Series], pli.Series],
    exprs: Sequence[pli.Expr | str] | pli.Expr,
) -> pli.Expr:
    """
    Accumulate over multiple columns horizontally/ row wise with a left fold.

    Parameters
    ----------
    acc
     Accumulator Expression. This is the value that will be initialized when the fold starts.
     For a sum this could for instance be lit(0).

    f
        Function to apply over the accumulator and the value.
        Fn(acc, value) -> new_value
    exprs
        Expressions to aggregate over. May also be a wildcard expression.
    """
    # in case of pl.col("*")
    acc = pli.expr_to_lit_or_expr(acc, str_to_lit=True)
    if isinstance(exprs, pli.Expr):
        exprs = [exprs]

    exprs = pli.selection_to_pyexpr_list(exprs)
    return pli.wrap_expr(pyfold(acc._pyexpr, f, exprs))
Exemplo n.º 11
0
    def otherwise(self, expr: Union[pli.Expr, int, float, str]) -> pli.Expr:
        """
        Values to return in case of the predicate being `False`.

        See Also: the `when` function.
        """
        expr = pli.expr_to_lit_or_expr(expr)
        return pli.wrap_expr(self._pywhenthen.otherwise(expr._pyexpr))
Exemplo n.º 12
0
def _datetime(
    year: pli.Expr | str,
    month: pli.Expr | str,
    day: pli.Expr | str,
    hour: pli.Expr | str | None = None,
    minute: pli.Expr | str | None = None,
    second: pli.Expr | str | None = None,
    millisecond: pli.Expr | str | None = None,
) -> pli.Expr:
    """
    Create polars `Datetime` from distinct time components.

    Parameters
    ----------
    year
        column or literal.
    month
        column or literal, ranging from 1-12.
    day
        column or literal, ranging from 1-31.
    hour
        column or literal, ranging from 1-24.
    minute
        column or literal, ranging from 1-60.
    second
        column or literal, ranging from 1-60.
    millisecond
        column or literal, ranging from 1-1000.

    Returns
    -------
    Expr of type `pl.Datetime`
    """

    year_expr = pli.expr_to_lit_or_expr(year, str_to_lit=False)
    month_expr = pli.expr_to_lit_or_expr(month, str_to_lit=False)
    day_expr = pli.expr_to_lit_or_expr(day, str_to_lit=False)

    if hour is not None:
        hour = pli.expr_to_lit_or_expr(hour, str_to_lit=False)._pyexpr
    if minute is not None:
        minute = pli.expr_to_lit_or_expr(minute, str_to_lit=False)._pyexpr
    if second is not None:
        second = pli.expr_to_lit_or_expr(second, str_to_lit=False)._pyexpr
    if millisecond is not None:
        millisecond = pli.expr_to_lit_or_expr(millisecond,
                                              str_to_lit=False)._pyexpr
    return pli.wrap_expr(
        py_datetime(
            year_expr._pyexpr,
            month_expr._pyexpr,
            day_expr._pyexpr,
            hour,
            minute,
            second,
            millisecond,
        ))
Exemplo n.º 13
0
def struct(
        exprs: Union[Sequence[Union["pli.Expr", str]],
                     "pli.Expr"]) -> "pli.Expr":
    """
    Collect several columns into a Series of dtype Struct

    Parameters
    ----------
    exprs
        Columns/Expressions to collect into a Struct

    Examples
    --------

    >>> pl.DataFrame(
    ...     {
    ...         "int": [1, 2],
    ...         "str": ["a", "b"],
    ...         "bool": [True, None],
    ...         "list": [[1, 2], [3]],
    ...     }
    ... ).select([pl.struct(pl.all()).alias("my_struct")])
    shape: (2, 1)
    ┌───────────────────────┐
    │ my_struct             │
    │ ---                   │
    │ struct{int, ... list} │
    ╞═══════════════════════╡
    │ {1,"a",true,[1, 2]}   │
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
    │ {2,"b",null,[3]}      │
    └───────────────────────┘

    Only collect specific columns as a struct:

    >>> df = pl.DataFrame(
    ...     {"a": [1, 2, 3, 4], "b": ["one", "two", "three", "four"], "c": [9, 8, 7, 6]}
    ... )
    >>> df.with_column(pl.struct(pl.col(["a", "b"])).alias("a_and_b"))
    shape: (4, 4)
    ┌─────┬───────┬─────┬───────────────────────────────┐
    │ a   ┆ b     ┆ c   ┆ a_and_b                       │
    │ --- ┆ ---   ┆ --- ┆ ---                           │
    │ i64 ┆ str   ┆ i64 ┆ struct[2]{'a': i64, 'b': str} │
    ╞═════╪═══════╪═════╪═══════════════════════════════╡
    │ 1   ┆ one   ┆ 9   ┆ {1,"one"}                     │
    ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
    │ 2   ┆ two   ┆ 8   ┆ {2,"two"}                     │
    ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
    │ 3   ┆ three ┆ 7   ┆ {3,"three"}                   │
    ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
    │ 4   ┆ four  ┆ 6   ┆ {4,"four"}                    │
    └─────┴───────┴─────┴───────────────────────────────┘

    """
    exprs = pli.selection_to_pyexpr_list(exprs)
    return pli.wrap_expr(_as_struct(exprs))
Exemplo n.º 14
0
    def otherwise(self, expr: pli.Expr | int | float | str | None) -> pli.Expr:
        """
        Values to return in case of the predicate being `False`.

        See Also
        --------
        when : Start another when, then, otherwise layer.
        then : Values to return in case of the predicate being `True`.
        """
        expr = pli.expr_to_lit_or_expr(expr)
        return pli.wrap_expr(self._pywhenthen.otherwise(expr._pyexpr))
Exemplo n.º 15
0
def concat_list(
    exprs: Union[Sequence[Union[str, "pli.Expr", "pli.Series"]], "pli.Expr"]
) -> "pli.Expr":
    """
    Concat the arrays in a Series dtype List in linear time.

    Parameters
    ----------
    exprs
        Columns to concat into a List Series

    Examples
    --------

    Create lagged columns and collect them into a list. This mimics a rolling window.

    >>> df = pl.DataFrame(
    ...     {
    ...         "A": [1.0, 2.0, 9.0, 2.0, 13.0],
    ...     }
    ... )
    >>> (
    ...     df.with_columns(
    ...         [pl.col("A").shift(i).alias(f"A_lag_{i}") for i in range(3)]
    ...     ).select(
    ...         [
    ...             pl.concat_list([f"A_lag_{i}" for i in range(3)][::-1]).alias(
    ...                 "A_rolling"
    ...             )
    ...         ]
    ...     )
    ... )
    shape: (5, 1)
    ┌─────────────────┐
    │ A_rolling       │
    │ ---             │
    │ list [f64]      │
    ╞═════════════════╡
    │ [null, null, 1] │
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
    │ [null, 1, 2]    │
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
    │ [1, 2, 9]       │
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
    │ [2, 9, 2]       │
    ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
    │ [9, 2, 13]      │
    └─────────────────┘

    """
    exprs = pli.selection_to_pyexpr_list(exprs)
    return pli.wrap_expr(_concat_lst(exprs))
Exemplo n.º 16
0
def concat_str(exprs: Union[Sequence[Union["pli.Expr", str]], "pli.Expr"],
               sep: str = "") -> "pli.Expr":
    """
    Horizontally Concat Utf8 Series in linear time. Non utf8 columns are cast to utf8.

    Parameters
    ----------
    exprs
        Columns to concat into a Utf8 Series
    sep
        String value that will be used to separate the values.
    """
    exprs = pli.selection_to_pyexpr_list(exprs)
    return pli.wrap_expr(_concat_str(exprs, sep))
Exemplo n.º 17
0
def concat_str(exprs: Sequence[pli.Expr | str] | pli.Expr,
               sep: str = "") -> pli.Expr:
    """
    Horizontally concat Utf8 Series in linear time. Non-Utf8 columns are cast to Utf8.

    Parameters
    ----------
    exprs
        Columns to concat into a Utf8 Series.
    sep
        String value that will be used to separate the values.

    Examples
    --------
    >>> df = pl.DataFrame(
    ...     {
    ...         "a": [1, 2, 3],
    ...         "b": ["dogs", "cats", None],
    ...         "c": ["play", "swim", "walk"],
    ...     }
    ... )
    >>> df.with_columns(
    ...     [
    ...         pl.concat_str(
    ...             [
    ...                 pl.col("a") * 2,
    ...                 pl.col("b"),
    ...                 pl.col("c"),
    ...             ],
    ...             sep=" ",
    ...         ).alias("full_sentence"),
    ...     ]
    ... )
    shape: (3, 4)
    ┌─────┬──────┬──────┬───────────────┐
    │ a   ┆ b    ┆ c    ┆ full_sentence │
    │ --- ┆ ---  ┆ ---  ┆ ---           │
    │ i64 ┆ str  ┆ str  ┆ str           │
    ╞═════╪══════╪══════╪═══════════════╡
    │ 1   ┆ dogs ┆ play ┆ 2 dogs play   │
    ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
    │ 2   ┆ cats ┆ swim ┆ 4 cats swim   │
    ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
    │ 3   ┆ null ┆ walk ┆ null          │
    └─────┴──────┴──────┴───────────────┘

    """
    exprs = pli.selection_to_pyexpr_list(exprs)
    return pli.wrap_expr(_concat_str(exprs, sep))
Exemplo n.º 18
0
def min(column: str | list[pli.Expr | str] | pli.Series) -> pli.Expr | Any:
    """
    Get the minimum value.

    column
        Column(s) to be used in aggregation. Will lead to different behavior based on the input.
        input:
        - Union[str, Series] -> aggregate the sum value of that column.
        - List[Expr] -> aggregate the sum value horizontally.
    """
    if isinstance(column, pli.Series):
        return column.min()
    elif isinstance(column, list):
        exprs = pli.selection_to_pyexpr_list(column)
        return pli.wrap_expr(_min_exprs(exprs))
    else:
        return col(column).min()
Exemplo n.º 19
0
def cov(
    a: str | pli.Expr,
    b: str | pli.Expr,
) -> pli.Expr:
    """
    Compute the covariance between two columns/ expressions.

    Parameters
    ----------
    a
        Column name or Expression.
    b
        Column name or Expression.
    """
    if isinstance(a, str):
        a = col(a)
    if isinstance(b, str):
        b = col(b)
    return pli.wrap_expr(pycov(a._pyexpr, b._pyexpr))
Exemplo n.º 20
0
def spearman_rank_corr(
    a: Union[str, "pli.Expr"],
    b: Union[str, "pli.Expr"],
) -> "pli.Expr":
    """
    Compute the spearman rank correlation between two columns.

    Parameters
    ----------
    a
        Column name or Expression.
    b
        Column name or Expression.
    """
    if isinstance(a, str):
        a = col(a)
    if isinstance(b, str):
        b = col(b)
    return pli.wrap_expr(pyspearman_rank_corr(a._pyexpr, b._pyexpr))
Exemplo n.º 21
0
def count(column: str | pli.Series | None = None) -> pli.Expr | int:
    """
    Count the number of values in this column/context.

    Parameters
    ----------
    column
        If dtype is:

        * ``pl.Series`` : count the values in the series.
        * ``str`` : count the values in this column.
        * ``None`` : count the number of values in this context.
    """
    if column is None:
        return pli.wrap_expr(_count())

    if isinstance(column, pli.Series):
        return column.len()
    return col(column).count()
Exemplo n.º 22
0
def pearson_corr(
    a: str | pli.Expr,
    b: str | pli.Expr,
) -> pli.Expr:
    """
    Compute the pearson's correlation between two columns.

    Parameters
    ----------
    a
        Column name or Expression.
    b
        Column name or Expression.
    """
    if isinstance(a, str):
        a = col(a)
    if isinstance(b, str):
        b = col(b)
    return pli.wrap_expr(pypearson_corr(a._pyexpr, b._pyexpr))
Exemplo n.º 23
0
def count(
    column: Optional[Union[str,
                           "pli.Series"]] = None) -> Union["pli.Expr", int]:
    """
    Count the number of values in this column/context.

    Parameters
    ----------
    column
        If dtype is:
            pl.Series -> count the values in the series
            str -> count the values in this column
            None -> count the number of values in this context
    """
    if column is None:
        return pli.wrap_expr(_count())

    if isinstance(column, pli.Series):
        return column.len()
    return col(column).count()
Exemplo n.º 24
0
def argsort_by(exprs: List[Union["pli.Expr", str]],
               reverse: Union[List[bool], bool] = False) -> "pli.Expr":
    """
    Find the indexes that would sort the columns.

    Argsort by multiple columns. The first column will be used for the ordering.
    If there are duplicates in the first column, the second column will be used to determine the ordering
    and so on.

    Parameters
    ----------
    exprs
        Columns use to determine the ordering.
    reverse
        Default is ascending.
    """
    if not isinstance(reverse, list):
        reverse = [reverse] * len(exprs)
    exprs = pli.selection_to_pyexpr_list(exprs)
    return pli.wrap_expr(pyargsort_by(exprs, reverse))
Exemplo n.º 25
0
def max(
    column: Union[str, List[Union["pli.Expr", str]], "pli.Series"]
) -> Union["pli.Expr", Any]:
    """
    Get the maximum value. Can be used horizontally or vertically.

    Parameters
    ----------
    column
        Column(s) to be used in aggregation. Will lead to different behavior based on the input.
        input:
        - Union[str, Series] -> aggregate the maximum value of that column.
        - List[Expr] -> aggregate the maximum value horizontally.
    """
    if isinstance(column, pli.Series):
        return column.max()
    elif isinstance(column, list):
        exprs = pli.selection_to_pyexpr_list(column)
        return pli.wrap_expr(_max_exprs(exprs))
    else:
        return col(column).max()
Exemplo n.º 26
0
def last(column: str | pli.Series | None = None) -> pli.Expr:
    """
    Get the last value.

    Depending on the input type this function does different things:

    input:

    - None -> expression to take last column of a context.
    - str -> syntactic sugar for `pl.col(..).last()`
    - Series -> Take last value in `Series`
    """

    if column is None:
        return pli.wrap_expr(_last())

    if isinstance(column, pli.Series):
        if column.len() > 0:
            return column[-1]
        else:
            raise IndexError(
                "The series is empty, so no last value can be returned,")
    return col(column).last()
Exemplo n.º 27
0
def map(
    exprs: list[str] | list[pli.Expr],
    f: Callable[[list[pli.Series]], pli.Series],
    return_dtype: type[DataType] | None = None,
) -> pli.Expr:
    """
    Map a custom function over multiple columns/expressions and produce a single Series result.

    Parameters
    ----------
    exprs
        Input Series to f
    f
        Function to apply over the input
    return_dtype
        dtype of the output Series

    Returns
    -------
    Expr
    """
    exprs = pli.selection_to_pyexpr_list(exprs)
    return pli.wrap_expr(_map_mul(exprs, f, return_dtype, apply_groups=False))
Exemplo n.º 28
0
def argsort_by(
    exprs: pli.Expr | str | Sequence[pli.Expr | str],
    reverse: list[bool] | bool = False,
) -> pli.Expr:
    """
    Find the indexes that would sort the columns.

    Argsort by multiple columns. The first column will be used for the ordering.
    If there are duplicates in the first column, the second column will be used to determine the ordering
    and so on.

    Parameters
    ----------
    exprs
        Columns use to determine the ordering.
    reverse
        Default is ascending.
    """
    if isinstance(exprs, str) or not isinstance(exprs, Sequence):
        exprs = [exprs]
    if isinstance(reverse, bool):
        reverse = [reverse] * len(exprs)
    exprs = pli.selection_to_pyexpr_list(exprs)
    return pli.wrap_expr(pyargsort_by(exprs, reverse))
Exemplo n.º 29
0
def lit(
    value: None |
    (float | int | str | date | datetime | pli.Series | np.ndarray | Any),
    dtype: type[DataType] | None = None,
) -> pli.Expr:
    """
    A literal value.

    Parameters
    ----------
    value
        Value that should be used as a `literal`.
    dtype
        Optionally define a dtype.

    Examples
    --------
    Literal integer:

    >>> pl.lit(1)  # doctest: +IGNORE_RESULT

    Literal str:

    >>> pl.lit("foo")  # doctest: +IGNORE_RESULT

    Literal datetime:

    >>> from datetime import datetime
    >>> pl.lit(datetime(2021, 1, 20))  # doctest: +IGNORE_RESULT

    Literal Null:

    >>> pl.lit(None)  # doctest: +IGNORE_RESULT

    Literal eager Series:

    >>> pl.lit(pl.Series("a", [1, 2, 3]))  # doctest: +IGNORE_RESULT

    """
    if isinstance(value, datetime):
        if in_nanoseconds_window(value):
            tu = "ns"
        else:
            tu = "ms"
        return (lit(_datetime_to_pl_timestamp(
            value, tu)).cast(Datetime).dt.and_time_unit(tu))
    if isinstance(value, timedelta):
        if timedelta_in_nanoseconds_window(value):
            tu = "ns"
        else:
            tu = "ms"
        return (lit(_timedelta_to_pl_timedelta(
            value, tu)).cast(Duration).dt.and_time_unit(tu, dtype=Duration))

    if isinstance(value, date):
        return lit(datetime(value.year, value.month, value.day)).cast(Date)

    if isinstance(value, pli.Series):
        name = value.name
        value = value._s
        e = pli.wrap_expr(pylit(value))
        if name == "":
            return e
        return e.alias(name)

    if _NUMPY_AVAILABLE and isinstance(value, np.ndarray):
        return lit(pli.Series("", value))

    if dtype:
        return pli.wrap_expr(pylit(value)).cast(dtype)
    # numpy literals like np.float32(0)
    # have an item
    if hasattr(value, "item"):
        value = value.item()  # type: ignore[union-attr]
    return pli.wrap_expr(pylit(value))
Exemplo n.º 30
0
def col(
    name: (str | list[str] | Sequence[PolarsDataType] | pli.Series
           | PolarsDataType),
) -> pli.Expr:
    """
    A column in a DataFrame.
    Can be used to select:

    - a single column by name
    - all columns by using a wildcard `"*"`
    - column by regular expression if the regex starts with `^` and ends with `$`

    Parameters
    ----------
    name
        A string that holds the name of the column

    Examples
    --------
    >>> df = pl.DataFrame(
    ...     {
    ...         "ham": [1, 2, 3],
    ...         "hamburger": [11, 22, 33],
    ...         "foo": [3, 2, 1],
    ...     }
    ... )
    >>> df.select(pl.col("foo"))
    shape: (3, 1)
    ┌─────┐
    │ foo │
    │ --- │
    │ i64 │
    ╞═════╡
    │ 3   │
    ├╌╌╌╌╌┤
    │ 2   │
    ├╌╌╌╌╌┤
    │ 1   │
    └─────┘
    >>> df.select(pl.col("*"))
    shape: (3, 3)
    ┌─────┬───────────┬─────┐
    │ ham ┆ hamburger ┆ foo │
    │ --- ┆ ---       ┆ --- │
    │ i64 ┆ i64       ┆ i64 │
    ╞═════╪═══════════╪═════╡
    │ 1   ┆ 11        ┆ 3   │
    ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ 2   ┆ 22        ┆ 2   │
    ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ 3   ┆ 33        ┆ 1   │
    └─────┴───────────┴─────┘
    >>> df.select(pl.col("^ham.*$"))
    shape: (3, 2)
    ┌─────┬───────────┐
    │ ham ┆ hamburger │
    │ --- ┆ ---       │
    │ i64 ┆ i64       │
    ╞═════╪═══════════╡
    │ 1   ┆ 11        │
    ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
    │ 2   ┆ 22        │
    ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
    │ 3   ┆ 33        │
    └─────┴───────────┘
    >>> df.select(pl.col("*").exclude("ham"))
    shape: (3, 2)
    ┌───────────┬─────┐
    │ hamburger ┆ foo │
    │ ---       ┆ --- │
    │ i64       ┆ i64 │
    ╞═══════════╪═════╡
    │ 11        ┆ 3   │
    ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ 22        ┆ 2   │
    ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ 33        ┆ 1   │
    └───────────┴─────┘
    >>> df.select(pl.col(["hamburger", "foo"]))
    shape: (3, 2)
    ┌───────────┬─────┐
    │ hamburger ┆ foo │
    │ ---       ┆ --- │
    │ i64       ┆ i64 │
    ╞═══════════╪═════╡
    │ 11        ┆ 3   │
    ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ 22        ┆ 2   │
    ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ 33        ┆ 1   │
    └───────────┴─────┘

    """
    if isinstance(name, pli.Series):
        name = name.to_list()  # type: ignore[assignment]

    # note: we need the typing.cast call here twice to make mypy happy under Python 3.7
    # On Python 3.10, it is not needed. We use cast as it works across versions, ignoring
    # the typing error would lead to unneeded ignores under Python 3.10.
    if isclass(name) and issubclass(cast(type, name), DataType):
        name = [cast(type, name)]

    if isinstance(name, DataType):
        return pli.wrap_expr(_dtype_cols([name]))

    if isinstance(name, list):
        if len(name) == 0 or isinstance(name[0], str):
            return pli.wrap_expr(pycols(name))
        elif (isclass(name[0]) and issubclass(name[0], DataType)
              or isinstance(name[0], DataType)):
            return pli.wrap_expr(_dtype_cols(name))
        else:
            raise ValueError(
                "did expect argument of List[str] or List[DataType]")
    return pli.wrap_expr(pycol(name))