Exemplo n.º 1
0
def from_modin(df: "modin.DataFrame") -> Dataset[ArrowRow]:
    """Create a dataset from a Modin dataframe.

    Args:
        df: A Modin dataframe, which must be using the Ray backend.

    Returns:
        Dataset holding Arrow records read from the dataframe.
    """
    from modin.distributed.dataframe.pandas.partitions import unwrap_partitions

    parts = unwrap_partitions(df, axis=0)
    return from_pandas_refs(parts)
Exemplo n.º 2
0
    def from_modin(cls, df, num_shards: int = 2):
        """Create a MLDataset from a Modin Dataframe.

        Args:
            df (modin.pandas.DataFrame): A Modin Dataframe.
            num_shards (int): The number of worker actors to create.
        """
        try:
            import modin.pandas as pd
        except ImportError:
            raise ImportError("Cannot convert from Modin because "
                              "Modin is not installed.") from None
        if not isinstance(df, (pd.DataFrame, pd.Series)):
            raise ValueError("Must provide a modin.pandas DataFrame or Series")
        from modin.distributed.dataframe.pandas.partitions import unwrap_partitions

        parts = unwrap_partitions(df)
        modin_iter = from_items(parts, num_shards=num_shards, repeat=False)
        return cls.from_parallel_it(modin_iter, batch_size=0, repeated=False)