Пример #1
0
 def _update_engine(cls, _):
     factory_name = get_current_backend() + "Factory"
     try:
         cls.__engine = getattr(factories, factory_name)
     except AttributeError:
         if not IsExperimental.get():
             # allow missing factories in experimenal mode only
             if hasattr(factories, "Experimental" + factory_name):
                 msg = (
                     "{0} on {1} is only accessible through the experimental API.\nRun "
                     "`import modin.experimental.pandas as pd` to use {0} on {1}."
                 )
             else:
                 msg = (
                     "Cannot find a factory for partition '{}' and execution engine '{}'. "
                     "Potential reason might be incorrect environment variable value for "
                     f"{Backend.varname} or {Engine.varname}")
             raise FactoryNotFoundError(
                 msg.format(Backend.get(), Engine.get()))
         cls.__engine = StubFactory.set_failing_name(factory_name)
     else:
         cls.__engine.prepare()
Пример #2
0
    def predict(
            self,
            data: DMatrix,
            nthread: Optional[int] = cpu_count(),
            **kwargs,
    ):
        """
        Run prediction with a trained booster.

        Parameters
        ----------
        data : DMatrix
            Input data used for prediction.
        nthread : int. Default is number of threads on master node
            Number of threads for using in each node.
        \\*\\*kwargs :
            Other parameters are the same as `xgboost.Booster.predict`.

        Returns
        -------
        ``modin.pandas.DataFrame``
            Modin DataFrame with prediction results.
        """
        LOGGER.info("Prediction started")

        if Engine.get() == "Ray":
            from .xgboost_ray import _predict
        else:
            raise ValueError("Current version supports only Ray engine.")

        assert isinstance(
            data, DMatrix
        ), f"Type of `data` is {type(data)}, but expected {DMatrix}."

        result = _predict(self.copy(), data, nthread, **kwargs)
        LOGGER.info("Prediction finished")

        return result
Пример #3
0
    def predict(
        self,
        data: DMatrix,
        **kwargs,
    ):
        """
        Run distributed prediction with a trained booster.

        During execution it runs ``xgb.predict`` on each worker for subset of `data`
        and creates Modin DataFrame with prediction results.

        Parameters
        ----------
        data : modin.experimental.xgboost.DMatrix
            Input data used for prediction.
        **kwargs : dict
            Other parameters are the same as for ``xgboost.Booster.predict``.

        Returns
        -------
        modin.pandas.DataFrame
            Modin DataFrame with prediction results.
        """
        LOGGER.info("Prediction started")

        if Engine.get() == "Ray":
            from .xgboost_ray import _predict
        else:
            raise ValueError("Current version supports only Ray engine.")

        assert isinstance(
            data, DMatrix
        ), f"Type of `data` is {type(data)}, but expected {DMatrix}."

        result = _predict(self.copy(), data, **kwargs)
        LOGGER.info("Prediction finished")

        return result
Пример #4
0
    def _update_factory(cls, _):
        """
        Update and prepare factory with a new one specified via Modin config.

        Parameters
        ----------
        _ : object
            This parameters serves the compatibility purpose.
            Does not affect the result.
        """
        factory_name = get_current_execution() + "Factory"
        try:
            cls.__factory = getattr(factories, factory_name)
        except AttributeError:
            if factory_name == "ExperimentalOmnisciOnRayFactory":
                msg = ("OmniSci storage format no longer needs Ray engine; " +
                       "please specify MODIN_ENGINE='native'")
                raise FactoryNotFoundError(msg)
            if not IsExperimental.get():
                # allow missing factories in experimenal mode only
                if hasattr(factories, "Experimental" + factory_name):
                    msg = (
                        "{0} on {1} is only accessible through the experimental API.\nRun "
                        +
                        "`import modin.experimental.pandas as pd` to use {0} on {1}."
                    )
                else:
                    msg = (
                        "Cannot find a factory for partition '{}' and execution engine '{}'. "
                        +
                        "Potential reason might be incorrect environment variable value for "
                        + f"{StorageFormat.varname} or {Engine.varname}")
                raise FactoryNotFoundError(
                    msg.format(StorageFormat.get(), Engine.get()))
            cls.__factory = StubFactory.set_failing_name(factory_name)
        else:
            cls.__factory.prepare()
Пример #5
0
def train(
    params: Dict,
    dtrain: ModinDMatrix,
    *args,
    evals=(),
    nthread: Optional[int] = cpu_count(),
    evenly_data_distribution: Optional[bool] = True,
    **kwargs,
):
    """
    Train XGBoost model.

    Parameters
    ----------
    params : dict
        Booster params.
    dtrain : ModinDMatrix
        Data to be trained against.
    evals: list of pairs (ModinDMatrix, string)
        List of validation sets for which metrics will evaluated during training.
        Validation metrics will help us track the performance of the model.
    nthread : int
        Number of threads for using in each node. By default it is equal to
        number of threads on master node.
    evenly_data_distribution : boolean, default True
        Whether make evenly distribution of partitions between nodes or not.
        In case `False` minimal datatransfer between nodes will be provided
        but the data may not be evenly distributed.
    \\*\\*kwargs :
        Other parameters are the same as `xgboost.train` except for
        `evals_result`, which is returned as part of function return value
        instead of argument.

    Returns
    -------
    dict
        A dictionary containing trained booster and evaluation history.
        `history` field is the same as `eval_result` from `xgboost.train`.

        .. code-block:: python

            {'booster': xgboost.Booster,
             'history': {'train': {'logloss': ['0.48253', '0.35953']},
                         'eval': {'logloss': ['0.480385', '0.357756']}}}
    """
    LOGGER.info("Training started")

    if Engine.get() == "Ray":
        from .xgboost_ray import _train
    else:
        raise ValueError("Current version supports only Ray engine.")

    result = _train(dtrain,
                    nthread,
                    evenly_data_distribution,
                    params,
                    *args,
                    evals=evals,
                    **kwargs)
    LOGGER.info("Training finished")
    return result
Пример #6
0
RAND_HIGH = 100
random_state = np.random.RandomState(seed=42)

try:
    from modin.config import NPartitions

    NPARTITIONS = NPartitions.get()
except ImportError:
    NPARTITIONS = pd.DEFAULT_NPARTITIONS

try:
    from modin.config import TestDatasetSize, AsvImplementation, Engine

    ASV_USE_IMPL = AsvImplementation.get()
    ASV_DATASET_SIZE = TestDatasetSize.get() or "Small"
    ASV_USE_ENGINE = Engine.get()
except ImportError:
    # The same benchmarking code can be run for different versions of Modin, so in
    # case of an error importing important variables, we'll just use predefined values
    ASV_USE_IMPL = os.environ.get("MODIN_ASV_USE_IMPL", "modin")
    ASV_DATASET_SIZE = os.environ.get("MODIN_TEST_DATASET_SIZE", "Small")
    ASV_USE_ENGINE = os.environ.get("MODIN_ENGINE", "Ray")

ASV_USE_IMPL = ASV_USE_IMPL.lower()
ASV_DATASET_SIZE = ASV_DATASET_SIZE.lower()
ASV_USE_ENGINE = ASV_USE_ENGINE.lower()

assert ASV_USE_IMPL in ("modin", "pandas")
assert ASV_DATASET_SIZE in ("big", "small")
assert ASV_USE_ENGINE in ("ray", "dask", "python")
Пример #7
0
def read_sql(
    sql,
    con,
    index_col=None,
    coerce_float=True,
    params=None,
    parse_dates=None,
    columns=None,
    chunksize=None,
    partition_column: Optional[str] = None,
    lower_bound: Optional[int] = None,
    upper_bound: Optional[int] = None,
    max_sessions: Optional[int] = None,
) -> DataFrame:
    """
    General documentation is available in `modin.pandas.read_sql`.

    This experimental feature provides distributed reading from a sql file.

    Parameters
    ----------
    sql : str or SQLAlchemy Selectable (select or text object)
        SQL query to be executed or a table name.
    con : SQLAlchemy connectable, str, or sqlite3 connection
        Using SQLAlchemy makes it possible to use any DB supported by that
        library. If a DBAPI2 object, only sqlite3 is supported. The user is responsible
        for engine disposal and connection closure for the SQLAlchemy
        connectable; str connections are closed automatically. See
        `here <https://docs.sqlalchemy.org/en/13/core/connections.html>`_.
    index_col : str or list of str, optional
        Column(s) to set as index(MultiIndex).
    coerce_float : bool, default: True
        Attempts to convert values of non-string, non-numeric objects (like
        decimal.Decimal) to floating point, useful for SQL result sets.
    params : list, tuple or dict, optional
        List of parameters to pass to execute method. The syntax used to pass
        parameters is database driver dependent. Check your database driver
        documentation for which of the five syntax styles, described in PEP 249's
        paramstyle, is supported. Eg. for psycopg2, uses %(name)s so use params=
        {'name' : 'value'}.
    parse_dates : list or dict, optional
        - List of column names to parse as dates.
        - Dict of ``{column_name: format string}`` where format string is
          strftime compatible in case of parsing string times, or is one of
          (D, s, ns, ms, us) in case of parsing integer timestamps.
        - Dict of ``{column_name: arg dict}``, where the arg dict corresponds
          to the keyword arguments of :func:`pandas.to_datetime`
          Especially useful with databases without native Datetime support,
          such as SQLite.
    columns : list, optional
        List of column names to select from SQL table (only used when reading
        a table).
    chunksize : int, optional
        If specified, return an iterator where `chunksize` is the
        number of rows to include in each chunk.
    partition_column : str, optional
        Column used to share the data between the workers (MUST be a INTEGER column).
    lower_bound : int, optional
        The minimum value to be requested from the partition_column.
    upper_bound : int, optional
        The maximum value to be requested from the partition_column.
    max_sessions : int, optional
        The maximum number of simultaneous connections allowed to use.

    Returns
    -------
    modin.DataFrame
    """
    Engine.subscribe(_update_engine)
    assert IsExperimental.get(), "This only works in experimental mode"
    _, _, _, kwargs = inspect.getargvalues(inspect.currentframe())
    return DataFrame(query_compiler=FactoryDispatcher.read_sql(**kwargs))
Пример #8
0
 def __init__(self):
     self.__own_attrs__ = set(type(self).__dict__.keys())
     Engine.subscribe(self.__update_engine)
Пример #9
0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import numpy as np
import pandas
import pytest

import modin.pandas as pd
from modin.distributed.dataframe.pandas import unwrap_partitions, from_partitions
from modin.config import Engine, NPartitions
from modin.pandas.test.utils import df_equals

if Engine.get() == "Ray":
    import ray
if Engine.get() == "Dask":
    from distributed.client import get_client

NPartitions.put(4)


@pytest.mark.parametrize("axis", [None, 0, 1])
def test_unwrap_partitions(axis):
    data = np.random.randint(0, 100, size=(2**16, 2**8))
    df = pd.DataFrame(data)

    if axis is None:
        expected_partitions = df._query_compiler._modin_frame._partitions
        actual_partitions = np.array(unwrap_partitions(df, axis=axis))
Пример #10
0
import pandas
import pytest
import modin.experimental.pandas as pd
from modin.config import Engine
from modin.pandas.test.test_io import (  # noqa: F401
    df_equals,
    eval_io,
    make_sql_connection,
    _make_csv_file,
    teardown_test_files,
)
from modin.pandas.test.utils import get_unique_filename


@pytest.mark.skipif(
    Engine.get() == "Dask",
    reason="Dask does not have experimental API",
)
def test_from_sql_distributed(make_sql_connection):  # noqa: F811
    if Engine.get() == "Ray":
        pytest.xfail("Distributed read_sql is broken, see GH#2194")
        filename = "test_from_sql_distributed.db"
        table = "test_from_sql_distributed"
        conn = make_sql_connection(filename, table)
        query = "select * from {0}".format(table)

        pandas_df = pandas.read_sql(query, conn)
        modin_df_from_query = pd.read_sql(
            query, conn, partition_column="col1", lower_bound=0, upper_bound=6
        )
        modin_df_from_table = pd.read_sql(
Пример #11
0
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import pytest
from modin.config import Engine

import modin.experimental.xgboost as xgb
import modin.pandas as pd


@pytest.mark.skipif(
    Engine.get() == "Ray",
    reason="This test doesn't make sense on Ray backend.",
)
@pytest.mark.skipif(
    Engine.get() == "Python",
    reason=
    "This test doesn't make sense on not distributed backend (see issue #2938).",
)
def test_backend():
    try:
        xgb.train({}, xgb.DMatrix(pd.DataFrame([0]), pd.DataFrame([0])))
    except ValueError:
        pass
Пример #12
0
def test_engine_wrong_factory():
    with pytest.raises(FactoryNotFoundError):
        Engine.put("BadEngine")
    Engine.put("Python")  # revert engine to default
Пример #13
0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import os
import pandas
import numpy as np
import pyarrow
import pytest

from modin.config import IsExperimental, Engine, Backend

IsExperimental.put(True)
Engine.put("ray")
Backend.put("omnisci")

import modin.pandas as pd
from modin.pandas.test.utils import (
    df_equals,
    bool_arg_values,
    to_pandas,
    test_data_values,
    test_data_keys,
    generate_multiindex,
    eval_general,
)


def set_execution_mode(frame, mode, recursive=False):
Пример #14
0
# Licensed to Modin Development Team under one or more contributor license agreements.
# See the NOTICE file distributed with this work for additional information regarding
# copyright ownership.  The Modin Development Team licenses this file to you under the
# Apache License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import pytest
from modin.config import Engine

import modin.experimental.xgboost as xgb


@pytest.mark.skipif(
    Engine.get() == "Ray",
    reason="This test doesn't make sense on Ray backend.",
)
@pytest.mark.parametrize("func", ["train", "predict"])
def test_backend(func):
    try:
        getattr(xgb, func)({}, xgb.ModinDMatrix(None, None))
    except ValueError:
        pass
Пример #15
0
def train(
    params: Dict,
    dtrain: DMatrix,
    *args,
    evals=(),
    num_actors: Optional[int] = None,
    evals_result: Optional[Dict] = None,
    **kwargs,
):
    """
    Run distributed training of XGBoost model.

    During work it evenly distributes `dtrain` between workers according
    to IP addresses partitions (in case of not even distribution of `dtrain`
    over nodes, some partitions will be re-distributed between nodes),
    runs xgb.train on each worker for subset of `dtrain` and reduces training results
    of each worker using Rabit Context.

    Parameters
    ----------
    params : dict
        Booster params.
    dtrain : modin.experimental.xgboost.DMatrix
        Data to be trained against.
    *args : iterable
        Other parameters for `xgboost.train`.
    evals : list of pairs (modin.experimental.xgboost.DMatrix, str), default: empty
        List of validation sets for which metrics will evaluated during training.
        Validation metrics will help us track the performance of the model.
    num_actors : int, optional
        Number of actors for training. If unspecified, this value will be
        computed automatically.
    evals_result : dict, optional
        Dict to store evaluation results in.
    **kwargs : dict
        Other parameters are the same as `xgboost.train`.

    Returns
    -------
    modin.experimental.xgboost.Booster
        A trained booster.
    """
    LOGGER.info("Training started")

    if Engine.get() == "Ray":
        from .xgboost_ray import _train
    else:
        raise ValueError("Current version supports only Ray engine.")

    assert isinstance(
        dtrain, DMatrix
    ), f"Type of `dtrain` is {type(dtrain)}, but expected {DMatrix}."
    result = _train(dtrain,
                    params,
                    *args,
                    num_actors=num_actors,
                    evals=evals,
                    **kwargs)
    if isinstance(evals_result, dict):
        evals_result.update(result["history"])

    LOGGER.info("Training finished")
    return Booster(model_file=result["booster"])
Пример #16
0
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
# ANY KIND, either express or implied. See the License for the specific language
# governing permissions and limitations under the License.

import pandas
import pytest
import modin.experimental.pandas as pd
from modin.config import Engine
from modin.pandas.test.test_io import (  # noqa: F401
    df_equals,
    make_sql_connection,
)


@pytest.mark.skipif(
    Engine.get() == "Dask",
    reason="Dask does not have experimental API",
)
def test_from_sql_distributed(make_sql_connection):  # noqa: F811
    if Engine.get() == "Ray":
        pytest.xfail("Distributed read_sql is broken, see GH#2194")
        filename = "test_from_sql_distributed.db"
        table = "test_from_sql_distributed"
        conn = make_sql_connection(filename, table)
        query = "select * from {0}".format(table)

        pandas_df = pandas.read_sql(query, conn)
        modin_df_from_query = pd.read_sql(
            query, conn, partition_column="col1", lower_bound=0, upper_bound=6
        )
        modin_df_from_table = pd.read_sql(
Пример #17
0
    def predict(
        self,
        data: DMatrix,
        **kwargs,
    ):
        """
        Run distributed prediction with a trained booster.

        During execution it runs ``xgb.predict`` on each worker for subset of `data`
        and creates Modin DataFrame with prediction results.

        Parameters
        ----------
        data : modin.experimental.xgboost.DMatrix
            Input data used for prediction.
        **kwargs : dict
            Other parameters are the same as for ``xgboost.Booster.predict``.

        Returns
        -------
        modin.pandas.DataFrame
            Modin DataFrame with prediction results.
        """
        LOGGER.info("Prediction started")

        if Engine.get() == "Ray":
            from .xgboost_ray import _predict
        else:
            raise ValueError("Current version supports only Ray engine.")

        assert isinstance(
            data, DMatrix
        ), f"Type of `data` is {type(data)}, but expected {DMatrix}."

        if (
            self.feature_names is not None
            and data.feature_names is not None
            and self.feature_names != data.feature_names
        ):
            data_missing = set(self.feature_names) - set(data.feature_names)
            self_missing = set(data.feature_names) - set(self.feature_names)

            msg = "feature_names mismatch: {0} {1}"

            if data_missing:
                msg += (
                    "\nexpected "
                    + ", ".join(str(s) for s in data_missing)
                    + " in input data"
                )

            if self_missing:
                msg += (
                    "\ntraining data did not have the following fields: "
                    + ", ".join(str(s) for s in self_missing)
                )

            raise ValueError(msg.format(self.feature_names, data.feature_names))

        result = _predict(self.copy(), data, **kwargs)
        LOGGER.info("Prediction finished")

        return result
Пример #18
0
import pytest
import modin.experimental.pandas as pd
from modin.config import Engine
from modin.utils import get_current_execution
from modin.pandas.test.utils import (
    df_equals,
    get_unique_filename,
    teardown_test_files,
    test_data,
)
from modin.test.test_utils import warns_that_defaulting_to_pandas
from modin.pandas.test.utils import parse_dates_values_by_id, time_parsing_csv_path


@pytest.mark.skipif(
    Engine.get() == "Dask",
    reason="Dask does not have experimental API",
)
def test_from_sql_distributed(make_sql_connection):  # noqa: F811
    if Engine.get() == "Ray":
        filename = "test_from_sql_distributed.db"
        table = "test_from_sql_distributed"
        conn = make_sql_connection(filename, table)
        query = "select * from {0}".format(table)

        pandas_df = pandas.read_sql(query, conn)
        modin_df_from_query = pd.read_sql(
            query,
            conn,
            partition_column="col1",
            lower_bound=0,
Пример #19
0
        return cls.__factory._read_sql_table(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._read_sql_query)
    def read_sql_query(cls, **kwargs):
        return cls.__factory._read_sql_query(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._read_spss)
    def read_spss(cls, **kwargs):
        return cls.__factory._read_spss(**kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._to_sql)
    def to_sql(cls, *args, **kwargs):
        return cls.__factory._to_sql(*args, **kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._to_pickle)
    def to_pickle(cls, *args, **kwargs):
        return cls.__factory._to_pickle(*args, **kwargs)

    @classmethod
    @_inherit_docstrings(factories.BaseFactory._to_csv)
    def to_csv(cls, *args, **kwargs):
        return cls.__factory._to_csv(*args, **kwargs)


Engine.subscribe(FactoryDispatcher._update_factory)
Backend.subscribe(FactoryDispatcher._update_factory)
Пример #20
0
        num_cpus = remote_ray.cluster_resources()["CPU"]
    elif publisher.get() == "Cloudpython":
        from modin.experimental.cloud import get_connection

        get_connection().modules["modin"].set_backends("Python")

    elif publisher.get() not in _NOINIT_ENGINES:
        raise ImportError("Unrecognized execution engine: {}.".format(
            publisher.get()))

    _is_first_update[publisher.get()] = False
    DEFAULT_NPARTITIONS = max(4, int(num_cpus))


Engine.subscribe(_update_engine)

from .. import __version__
from .dataframe import DataFrame
from .io import (
    read_csv,
    read_parquet,
    read_json,
    read_html,
    read_clipboard,
    read_excel,
    read_hdf,
    read_feather,
    read_stata,
    read_sas,
    read_pickle,
Пример #21
0
def make_wrapped_class(local_cls: type, rpyc_wrapper_name: str):
    """
    Replaces given local class in its module with a replacement class
    which has __new__ defined (a dual-nature class).
    This new class is instantiated differently depending on
    whether this is done in remote or local context.

    In local context we effectively get the same behaviour, but in remote
    context the created class is actually of separate type which
    proxies most requests to a remote end.

    Parameters
    ----------
    local_cls: class
        The class to replace with a dual-nature class
    rpyc_wrapper_name: str
        The function *name* to make a proxy class type.
        Note that this is specifically taken as string to not import
        "rpyc_proxy" module in top-level, as it requires RPyC to be
        installed, and not all users of Modin (even in experimental mode)
        need remote context.
    """
    # get a copy of local_cls attributes' dict but skip _very_ special attributes,
    # because copying them to a different type leads to them not working.
    # Python should create new descriptors automatically for us instead.
    namespace = {
        name: value
        for name, value in local_cls.__dict__.items()
        if not isinstance(value, types.GetSetDescriptorType)
    }
    namespace["__real_cls__"] = None
    namespace["__new__"] = None
    # define a new class the same way original was defined but with replaced
    # metaclass and a few more attributes in namespace
    result = RemoteMeta(local_cls.__name__, local_cls.__bases__, namespace)

    def make_new(__class__):
        """
        Define a __new__() with a __class__ that is closure-bound, needed for super() to work
        """
        # update '__class__' magic closure value - used by super()
        for attr in __class__.__dict__.values():
            if not callable(attr):
                continue
            cells = getattr(attr, "__closure__", None) or ()
            for cell in cells:
                if cell.cell_contents is local_cls:
                    cell.cell_contents = __class__

        def __new__(cls, *a, **kw):
            if cls is result and cls.__real_cls__ is not result:
                return cls.__real_cls__(*a, **kw)
            return super().__new__(cls)

        __class__.__new__ = __new__

    make_new(result)
    setattr(sys.modules[local_cls.__module__], local_cls.__name__, result)
    _KNOWN_DUALS[local_cls] = result

    def update_class(_):
        if Engine.get() in REMOTE_ENGINES:
            from . import rpyc_proxy

            result.__real_cls__ = getattr(rpyc_proxy,
                                          rpyc_wrapper_name)(result)
        else:
            result.__real_cls__ = result

    Engine.subscribe(update_class)
Пример #22
0
    def read_sql(cls, **kwargs):
        return cls.__engine._read_sql(**kwargs)

    @classmethod
    def read_fwf(cls, **kwargs):
        return cls.__engine._read_fwf(**kwargs)

    @classmethod
    def read_sql_table(cls, **kwargs):
        return cls.__engine._read_sql_table(**kwargs)

    @classmethod
    def read_sql_query(cls, **kwargs):
        return cls.__engine._read_sql_query(**kwargs)

    @classmethod
    def read_spss(cls, **kwargs):
        return cls.__engine._read_spss(**kwargs)

    @classmethod
    def to_sql(cls, *args, **kwargs):
        return cls.__engine._to_sql(*args, **kwargs)

    @classmethod
    def to_pickle(cls, *args, **kwargs):
        return cls.__engine._to_pickle(*args, **kwargs)


Engine.subscribe(EngineDispatcher._update_engine)
Backend.subscribe(EngineDispatcher._update_engine)
Пример #23
0
import numpy as np
import pandas
import pytest

import modin.pandas as pd
from modin.distributed.dataframe.pandas import unwrap_partitions, from_partitions
from modin.config import Engine, NPartitions
from modin.pandas.test.utils import df_equals
from modin.pandas.indexing import compute_sliced_len
from modin.data_management.factories.dispatcher import FactoryDispatcher

PartitionClass = (FactoryDispatcher.get_factory().io_cls.frame_cls.
                  _partition_mgr_cls._partition_class)

if Engine.get() == "Ray":
    import ray

    put_func = ray.put
    get_func = ray.get
    FutureType = ray.ObjectRef
elif Engine.get() == "Dask":
    from distributed.client import default_client
    from distributed import Future

    put_func = lambda x: default_client().scatter(x)  # noqa: E731
    get_func = lambda x: x.result()  # noqa: E731
    FutureType = Future
elif Engine.get() == "Python":
    put_func = lambda x: x  # noqa: E731
    get_func = lambda x: x  # noqa: E731