Exemplo n.º 1
0
def col(name: Union[str, tp.List[str]]) -> "pl.Expr":
    """
    A column in a DataFrame.
    Can be used to select:

     * a single column by name
     * all columns by using a wildcard `"*"`
     * column by regular expression if the regex starts with `^` and ends with `$`

    Parameters
    col
        A string that holds the name of the column

    Examples
    -------

    >>> df = pl.DataFrame({
    >>> "ham": [1, 2, 3],
    >>> "hamburger": [11, 22, 33],
    >>> "foo": [3, 2, 1]})
    >>> df.select(col("foo"))
    shape: (3, 1)
    ╭─────╮
    │ foo │
    │ --- │
    │ i64 │
    ╞═════╡
    │ 3   │
    ├╌╌╌╌╌┤
    │ 2   │
    ├╌╌╌╌╌┤
    │ 1   │
    ╰─────╯
    >>> df.select(col("*"))
    shape: (3, 3)
    ╭─────┬───────────┬─────╮
    │ ham ┆ hamburger ┆ foo │
    │ --- ┆ ---       ┆ --- │
    │ i64 ┆ i64       ┆ i64 │
    ╞═════╪═══════════╪═════╡
    │ 1   ┆ 11        ┆ 3   │
    ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ 2   ┆ 22        ┆ 2   │
    ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ 3   ┆ 33        ┆ 1   │
    ╰─────┴───────────┴─────╯
    >>> df.select(col("^ham.*$"))
    shape: (3, 2)
    ╭─────┬───────────╮
    │ ham ┆ hamburger │
    │ --- ┆ ---       │
    │ i64 ┆ i64       │
    ╞═════╪═══════════╡
    │ 1   ┆ 11        │
    ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
    │ 2   ┆ 22        │
    ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
    │ 3   ┆ 33        │
    ╰─────┴───────────╯
    >>> df.select(col("*").exclude("ham"))
    shape: (3, 2)
    ╭───────────┬─────╮
    │ hamburger ┆ foo │
    │ ---       ┆ --- │
    │ i64       ┆ i64 │
    ╞═══════════╪═════╡
    │ 11        ┆ 3   │
    ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ 22        ┆ 2   │
    ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ 33        ┆ 1   │
    ╰───────────┴─────╯
    >>> df.select(col(["hamburger", "foo"])
    shape: (3, 2)
    ╭───────────┬─────╮
    │ hamburger ┆ foo │
    │ ---       ┆ --- │
    │ i64       ┆ i64 │
    ╞═══════════╪═════╡
    │ 11        ┆ 3   │
    ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ 22        ┆ 2   │
    ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ 33        ┆ 1   │
    ╰───────────┴─────╯

    """
    if isinstance(name, list):
        return pl.lazy.expr.wrap_expr(pycols(name))
    return pl.lazy.expr.wrap_expr(pycol(name))
Exemplo n.º 2
0
def col(
    name: (str | list[str] | Sequence[PolarsDataType] | pli.Series
           | PolarsDataType),
) -> pli.Expr:
    """
    A column in a DataFrame.
    Can be used to select:

    - a single column by name
    - all columns by using a wildcard `"*"`
    - column by regular expression if the regex starts with `^` and ends with `$`

    Parameters
    ----------
    name
        A string that holds the name of the column

    Examples
    --------
    >>> df = pl.DataFrame(
    ...     {
    ...         "ham": [1, 2, 3],
    ...         "hamburger": [11, 22, 33],
    ...         "foo": [3, 2, 1],
    ...     }
    ... )
    >>> df.select(pl.col("foo"))
    shape: (3, 1)
    ┌─────┐
    │ foo │
    │ --- │
    │ i64 │
    ╞═════╡
    │ 3   │
    ├╌╌╌╌╌┤
    │ 2   │
    ├╌╌╌╌╌┤
    │ 1   │
    └─────┘
    >>> df.select(pl.col("*"))
    shape: (3, 3)
    ┌─────┬───────────┬─────┐
    │ ham ┆ hamburger ┆ foo │
    │ --- ┆ ---       ┆ --- │
    │ i64 ┆ i64       ┆ i64 │
    ╞═════╪═══════════╪═════╡
    │ 1   ┆ 11        ┆ 3   │
    ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ 2   ┆ 22        ┆ 2   │
    ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ 3   ┆ 33        ┆ 1   │
    └─────┴───────────┴─────┘
    >>> df.select(pl.col("^ham.*$"))
    shape: (3, 2)
    ┌─────┬───────────┐
    │ ham ┆ hamburger │
    │ --- ┆ ---       │
    │ i64 ┆ i64       │
    ╞═════╪═══════════╡
    │ 1   ┆ 11        │
    ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
    │ 2   ┆ 22        │
    ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
    │ 3   ┆ 33        │
    └─────┴───────────┘
    >>> df.select(pl.col("*").exclude("ham"))
    shape: (3, 2)
    ┌───────────┬─────┐
    │ hamburger ┆ foo │
    │ ---       ┆ --- │
    │ i64       ┆ i64 │
    ╞═══════════╪═════╡
    │ 11        ┆ 3   │
    ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ 22        ┆ 2   │
    ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ 33        ┆ 1   │
    └───────────┴─────┘
    >>> df.select(pl.col(["hamburger", "foo"]))
    shape: (3, 2)
    ┌───────────┬─────┐
    │ hamburger ┆ foo │
    │ ---       ┆ --- │
    │ i64       ┆ i64 │
    ╞═══════════╪═════╡
    │ 11        ┆ 3   │
    ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ 22        ┆ 2   │
    ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
    │ 33        ┆ 1   │
    └───────────┴─────┘

    """
    if isinstance(name, pli.Series):
        name = name.to_list()  # type: ignore[assignment]

    # note: we need the typing.cast call here twice to make mypy happy under Python 3.7
    # On Python 3.10, it is not needed. We use cast as it works across versions, ignoring
    # the typing error would lead to unneeded ignores under Python 3.10.
    if isclass(name) and issubclass(cast(type, name), DataType):
        name = [cast(type, name)]

    if isinstance(name, DataType):
        return pli.wrap_expr(_dtype_cols([name]))

    if isinstance(name, list):
        if len(name) == 0 or isinstance(name[0], str):
            return pli.wrap_expr(pycols(name))
        elif (isclass(name[0]) and issubclass(name[0], DataType)
              or isinstance(name[0], DataType)):
            return pli.wrap_expr(_dtype_cols(name))
        else:
            raise ValueError(
                "did expect argument of List[str] or List[DataType]")
    return pli.wrap_expr(pycol(name))