def test_engine_switch(): Engine.put("Test") assert EngineDispatcher.get_engine() == PandasOnTestFactory assert EngineDispatcher.get_engine().io_cls == "Foo" Engine.put("Python") # revert engine to default Backend.put("Test") assert EngineDispatcher.get_engine() == TestOnPythonFactory assert EngineDispatcher.get_engine().io_cls == "Bar" Backend.put("Pandas") # revert engine to default
def test_engine_switch(): execution_engine.put("Test") assert EngineDispatcher.get_engine() == PandasOnTestFactory assert EngineDispatcher.get_engine().io_cls == "Foo" execution_engine.put("Python") # revert engine to default partition_format.put("Test") assert EngineDispatcher.get_engine() == TestOnPythonFactory assert EngineDispatcher.get_engine().io_cls == "Bar" partition_format.put("Pandas") # revert engine to default
def from_partitions(partitions, axis): """ Create DataFrame from remote partitions. Parameters ---------- partitions : list A list of Ray.ObjectRef/Dask.Future to partitions depending on the engine used. Or a list of tuples of Ray.ObjectRef/Dask.Future to node ip addresses and partitions depending on the engine used (i.e. ``[(Ray.ObjectRef/Dask.Future, Ray.ObjectRef/Dask.Future), ...]``). axis : None, 0 or 1 The ``axis`` parameter is used to identify what are the partitions passed. You have to set: * ``axis=0`` if you want to create DataFrame from row partitions * ``axis=1`` if you want to create DataFrame from column partitions * ``axis=None`` if you want to create DataFrame from 2D list of partitions Returns ------- DataFrame DataFrame instance created from remote partitions. """ from modin.data_management.factories.dispatcher import EngineDispatcher factory = EngineDispatcher.get_engine() partition_class = factory.io_cls.frame_cls._frame_mgr_cls._partition_class partition_frame_class = factory.io_cls.frame_cls partition_mgr_class = factory.io_cls.frame_cls._frame_mgr_cls # Since we store partitions of Modin DataFrame as a 2D NumPy array we need to place # passed partitions to 2D NumPy array to pass it to internal Modin Frame class. # `axis=None` - convert 2D list to 2D NumPy array if axis is None: if isinstance(partitions[0][0], tuple): parts = np.array( [ [partition_class(partition, ip=ip) for ip, partition in row] for row in partitions ] ) else: parts = np.array( [ [partition_class(partition) for partition in row] for row in partitions ] ) # `axis=0` - place row partitions to 2D NumPy array so that each row of the array is one row partition. elif axis == 0: if isinstance(partitions[0], tuple): parts = np.array( [[partition_class(partition, ip=ip)] for ip, partition in partitions] ) else: parts = np.array([[partition_class(partition)] for partition in partitions]) # `axis=1` - place column partitions to 2D NumPy array so that each column of the array is one column partition. elif axis == 1: if isinstance(partitions[0], tuple): parts = np.array( [[partition_class(partition, ip=ip) for ip, partition in partitions]] ) else: parts = np.array([[partition_class(partition) for partition in partitions]]) else: raise ValueError( f"Got unacceptable value of axis {axis}. Possible values are {0}, {1} or {None}." ) index = partition_mgr_class.get_indices(0, parts, lambda df: df.axes[0]) columns = partition_mgr_class.get_indices(1, parts, lambda df: df.axes[1]) return DataFrame( query_compiler=PandasQueryCompiler(partition_frame_class(parts, index, columns)) )
def test_set_backends(): set_backends("Bar", "Foo") assert EngineDispatcher.get_engine() == FooOnBarFactory
def test_default_engine(): assert issubclass(EngineDispatcher.get_engine(), factories.BaseFactory) assert EngineDispatcher.get_engine().io_cls