def from_modin(df: "modin.DataFrame") -> Dataset[ArrowRow]: """Create a dataset from a Modin dataframe. Args: df: A Modin dataframe, which must be using the Ray backend. Returns: Dataset holding Arrow records read from the dataframe. """ from modin.distributed.dataframe.pandas.partitions import unwrap_partitions parts = unwrap_partitions(df, axis=0) return from_pandas_refs(parts)
def from_modin(cls, df, num_shards: int = 2): """Create a MLDataset from a Modin Dataframe. Args: df (modin.pandas.DataFrame): A Modin Dataframe. num_shards (int): The number of worker actors to create. """ try: import modin.pandas as pd except ImportError: raise ImportError("Cannot convert from Modin because " "Modin is not installed.") from None if not isinstance(df, (pd.DataFrame, pd.Series)): raise ValueError("Must provide a modin.pandas DataFrame or Series") from modin.distributed.dataframe.pandas.partitions import unwrap_partitions parts = unwrap_partitions(df) modin_iter = from_items(parts, num_shards=num_shards, repeat=False) return cls.from_parallel_it(modin_iter, batch_size=0, repeated=False)