Exemplo n.º 1
def test_backend_hinting_and_constraints_with_custom_backends(capsys):
    # Custom backends can declare that they use threads and have shared memory
    # semantics:
    class MyCustomThreadingBackend(ParallelBackendBase):
        supports_sharedmem = True
        use_threads = True

        def apply_async(self):

        def effective_n_jobs(self, n_jobs):
            return n_jobs

    with parallel_backend(MyCustomThreadingBackend()):
        p = Parallel(n_jobs=2, prefer='processes')  # ignored
        assert type(p._backend) == MyCustomThreadingBackend

        p = Parallel(n_jobs=2, require='sharedmem')
        assert type(p._backend) == MyCustomThreadingBackend

    class MyCustomProcessingBackend(ParallelBackendBase):
        supports_sharedmem = False
        use_threads = False

        def apply_async(self):

        def effective_n_jobs(self, n_jobs):
            return n_jobs

    with parallel_backend(MyCustomProcessingBackend()):
        p = Parallel(n_jobs=2, prefer='processes')
        assert type(p._backend) == MyCustomProcessingBackend

        out, err = capsys.readouterr()
        assert out == ""
        assert err == ""

        p = Parallel(n_jobs=2, require='sharedmem', verbose=10)
        assert type(p._backend) == ThreadingBackend

        out, err = capsys.readouterr()
        expected = ("Using ThreadingBackend as joblib.Parallel backend "
                    "instead of MyCustomProcessingBackend as the latter "
                    "does not provide shared memory semantics.")
        assert out.strip() == expected
        assert err == ""

    with raises(ValueError):
        Parallel(backend=MyCustomProcessingBackend(), require='sharedmem')
Exemplo n.º 2
def test_thread_bomb_mitigation(backend):
    # Test that recursive parallelism raises a recursion rather than
    # saturating the operating system resources by creating a unbounded number
    # of threads.
    with parallel_backend(backend, n_jobs=2):
        with raises(RecursionError):
Exemplo n.º 3
def test_global_parallel_backend():
    default = Parallel()._backend

    pb = parallel_backend('threading')
    assert isinstance(Parallel()._backend, ThreadingBackend)

    assert type(Parallel()._backend) is type(default)
Exemplo n.º 4
def test_external_backends():
    def register_foo():
        BACKENDS['foo'] = ThreadingBackend

    EXTERNAL_BACKENDS['foo'] = register_foo

    with parallel_backend('foo'):
        assert isinstance(Parallel()._backend, ThreadingBackend)
Exemplo n.º 5
def test_sklearn_cv():
    iris = datasets.load_iris()
    clf = svm.SVC(kernel='linear', C=1)
    with parallel_backend('spark', n_jobs=3):
        scores = cross_val_score(clf, iris.data, iris.target, cv=5)

    expected = [0.97, 1.0, 0.97, 0.97, 1.0]

    for i in range(5):
        assert (pytest.approx(scores[i], 0.01) == expected[i])

    # test with default n_jobs=-1
    with parallel_backend('spark'):
        scores = cross_val_score(clf, iris.data, iris.target, cv=5)

    for i in range(5):
        assert (pytest.approx(scores[i], 0.01) == expected[i])
Exemplo n.º 6
def test_simple():
    with parallel_backend('spark') as (ba, _):
        seq = Parallel(n_jobs=5)(delayed(inc)(i) for i in range(10))
        assert seq == [inc(i) for i in range(10)]

    with pytest.raises(BaseException):
        Parallel(n_jobs=5)(delayed(slow_raise_value_error)(i == 3)
                           for i in range(10))
Exemplo n.º 7
def test_backend_hinting_and_constraints():
    for n_jobs in [1, 2, -1]:
        assert type(Parallel(n_jobs=n_jobs)._backend) == LokyBackend

        p = Parallel(n_jobs=n_jobs, prefer='threads')
        assert type(p._backend) == ThreadingBackend

        p = Parallel(n_jobs=n_jobs, prefer='processes')
        assert type(p._backend) == LokyBackend

        p = Parallel(n_jobs=n_jobs, require='sharedmem')
        assert type(p._backend) == ThreadingBackend

    # Explicit backend selection can override backend hinting although it
    # is useless to pass a hint when selecting a backend.
    p = Parallel(n_jobs=2, backend='loky', prefer='threads')
    assert type(p._backend) == LokyBackend

    with parallel_backend('loky', n_jobs=2):
        # Explicit backend selection by the user with the context manager
        # should be respected when combined with backend hints only.
        p = Parallel(prefer='threads')
        assert type(p._backend) == LokyBackend
        assert p.n_jobs == 2

    with parallel_backend('loky', n_jobs=2):
        # Locally hard-coded n_jobs value is respected.
        p = Parallel(n_jobs=3, prefer='threads')
        assert type(p._backend) == LokyBackend
        assert p.n_jobs == 3

    with parallel_backend('loky', n_jobs=2):
        # Explicit backend selection by the user with the context manager
        # should be ignored when the Parallel call has hard constraints.
        # In this case, the default backend that supports shared mem is
        # used an the default number of processes is used.
        p = Parallel(require='sharedmem')
        assert type(p._backend) == ThreadingBackend
        assert p.n_jobs == 1

    with parallel_backend('loky', n_jobs=2):
        p = Parallel(n_jobs=3, require='sharedmem')
        assert type(p._backend) == ThreadingBackend
        assert p.n_jobs == 3
Exemplo n.º 8
def test_nested_parallelism_with_dask():
    distributed = pytest.importorskip('distributed')
    client = distributed.Client(n_workers=2, threads_per_worker=2)  # noqa

    # 10 MB of data as argument to trigger implicit scattering
    data = np.ones(int(1e7), dtype=np.uint8)
    for i in range(2):
        with parallel_backend('dask'):
            backend_types_and_levels = _recursive_backend_info(data=data)
        assert len(backend_types_and_levels) == 4
        assert all(name == 'DaskDistributedBackend'
                   for name, _ in backend_types_and_levels)

    # No argument
    with parallel_backend('dask'):
        backend_types_and_levels = _recursive_backend_info()
    assert len(backend_types_and_levels) == 4
    assert all(name == 'DaskDistributedBackend'
               for name, _ in backend_types_and_levels)
Exemplo n.º 9
def test_nested_parallel_limit(backend):
    with parallel_backend(backend, n_jobs=2):
        backend_types_and_levels = _recursive_backend_info()

    top_level_backend_type = backend.title() + 'Backend'
    expected_types_and_levels = [(top_level_backend_type, 0),
                                 ('ThreadingBackend', 1),
                                 ('SequentialBackend', 2),
                                 ('SequentialBackend', 3)]
    assert backend_types_and_levels == expected_types_and_levels
Exemplo n.º 10
async def get_variables():
    global PROJECTS
    global WITH_UPVOTE
    global UPTIME
    this_PROJECTS = copy.copy(PROJECTS)
    # issue https://github.com/scikit-learn/scikit-learn/issues/8920
    with parallel_backend('threading'):
        data = Parallel(n_jobs=4)(delayed(check_project_upvote)(project) for project in this_PROJECTS)
        WITH_UPVOTE = [i for i in data if i is not None]
    print(f"found {len(WITH_UPVOTE)} MRBV_BOT_UPVOTE in {len(PROJECTS)} projects")
Exemplo n.º 11
def test_nested_backend_context_manager():
    # Check that by default, nested parallel calls will always use the
    # ThreadingBackend

    def get_nested_pids():
        assert _active_backend_type() == ThreadingBackend
        return Parallel(n_jobs=2)(delayed(os.getpid)() for _ in range(2))

    for backend in ['threading', 'loky', 'multiprocessing']:
        with parallel_backend(backend):
            pid_groups = Parallel(n_jobs=2)(delayed(get_nested_pids)()
                                            for _ in range(10))
            for pid_group in pid_groups:
                assert len(set(pid_group)) == 1
Exemplo n.º 12
async def get_merges():
    global WITH_UPVOTE
    global PROJECTS
    global UPTIME
    this_WITH_UPVOTE = copy.copy(WITH_UPVOTE)
    with parallel_backend('threading'):
        data = Parallel(n_jobs=4)(delayed(check_merge_upvote)(data) for data in this_WITH_UPVOTE)
        buff = []
        for mergerequests in [i for i in data if i is not None]:
            buff = buff + mergerequests
        MERGE_REQUESTS = buff
    print(f"found {len(MERGE_REQUESTS)} merges with {len(WITH_UPVOTE)} MRBV_BOT_UPVOTE in {len(PROJECTS)} projects")
Exemplo n.º 13
def check_backend_context_manager(backend_name):
    with parallel_backend(backend_name, n_jobs=3):
        active_backend, active_n_jobs = parallel.get_active_backend()
        assert_equal(active_n_jobs, 3)
        assert_equal(effective_n_jobs(3), 3)
        p = Parallel()
        assert_equal(p.n_jobs, 3)
        if backend_name == "multiprocessing":
            assert_equal(type(active_backend), MultiprocessingBackend)
            assert_equal(type(p._backend), MultiprocessingBackend)
        elif backend_name == "threading":
            assert_equal(type(active_backend), ThreadingBackend)
            assert_equal(type(p._backend), ThreadingBackend)
        elif backend_name.startswith("test_"):
            assert_equal(type(active_backend), FakeParallelBackend)
            assert_equal(type(p._backend), FakeParallelBackend)
Exemplo n.º 14
Exemplo n.º 15
        def __call__(self):
            # Exactly the same as in BatchedCalls, with the
            # difference being that it gets args and kwargs from
            # object store (which have been put in there by
            # put_items_in_object_store)

            # Set the default nested backend to self._backend but do
            # not set the change the default number of processes to -1
            with parallel_backend(self._backend, n_jobs=self._n_jobs):
                return [
                        *[ray_get_if_needed(arg) for arg in args],
                        **{k: ray_get_if_needed(v) for k, v in kwargs.items()},
                    for func, args, kwargs in self.items
Exemplo n.º 16
Exemplo n.º 17
Exemplo n.º 18
def test_nested_parallelism_limit(backend):
    with parallel_backend(backend, n_jobs=2):
        backend_types_and_levels = _recursive_backend_info()

    if cpu_count() == 1:
        second_level_backend_type = 'SequentialBackend'
        second_level_backend_type = 'ThreadingBackend'

    top_level_backend_type = backend.title() + 'Backend'
    expected_types_and_levels = [
        (top_level_backend_type, 0),
        (second_level_backend_type, 1),
        ('SequentialBackend', 2),
        ('SequentialBackend', 3)
    assert backend_types_and_levels == expected_types_and_levels
Exemplo n.º 19
def test_direct_parameterized_backend_context_manager():
    assert _active_backend_type() == MultiprocessingBackend

    # Check that it's possible to pass a backend instance directly,
    # without registration
    with parallel_backend(ParameterizedParallelBackend(param=43), n_jobs=5):
        active_backend, active_n_jobs = parallel.get_active_backend()
        assert type(active_backend) == ParameterizedParallelBackend
        assert active_backend.param == 43
        assert active_n_jobs == 5
        p = Parallel()
        assert p.n_jobs == 5
        assert p._backend is active_backend
        results = p(delayed(sqrt)(i) for i in range(5))
    assert results == [sqrt(i) for i in range(5)]

    # The default backend is again retored
    assert _active_backend_type() == MultiprocessingBackend
Exemplo n.º 20
Exemplo n.º 21
Exemplo n.º 22
def test_direct_parameterized_backend_context_manager():
    assert _active_backend_type() == DefaultBackend

    # Check that it's possible to pass a backend instance directly,
    # without registration
    with parallel_backend(ParameterizedParallelBackend(param=43), n_jobs=5):
        active_backend, active_n_jobs = parallel.get_active_backend()
        assert type(active_backend) == ParameterizedParallelBackend
        assert active_backend.param == 43
        assert active_n_jobs == 5
        p = Parallel()
        assert p.n_jobs == 5
        assert p._backend is active_backend
        results = p(delayed(sqrt)(i) for i in range(5))
    assert results == [sqrt(i) for i in range(5)]

    # The default backend is again restored
    assert _active_backend_type() == DefaultBackend
Exemplo n.º 23
def test_parameterized_backend_context_manager(monkeypatch):
    monkeypatch.setitem(BACKENDS, 'param_backend',
    assert _active_backend_type() == DefaultBackend

    with parallel_backend('param_backend', param=42, n_jobs=3):
        active_backend, active_n_jobs = parallel.get_active_backend()
        assert type(active_backend) == ParameterizedParallelBackend
        assert active_backend.param == 42
        assert active_n_jobs == 3
        p = Parallel()
        assert p.n_jobs == 3
        assert p._backend is active_backend
        results = p(delayed(sqrt)(i) for i in range(5))
    assert results == [sqrt(i) for i in range(5)]

    # The default backend is again restored
    assert _active_backend_type() == DefaultBackend
Exemplo n.º 24
def test_retrieval_context():
    import contextlib

    class MyBackend(ThreadingBackend):
        i = 0

        def retrieval_context(self):
            self.i += 1

    register_parallel_backend("retrieval", MyBackend)

    def nested_call(n):
        return Parallel(n_jobs=2)(delayed(id)(i) for i in range(n))

    with parallel_backend("retrieval") as (ba, _):
        Parallel(n_jobs=2)(delayed(nested_call, check_pickle=False)(i)
                           for i in range(5))
        assert ba.i == 1
Exemplo n.º 25
Exemplo n.º 26
Exemplo n.º 27
Exemplo n.º 28
Exemplo n.º 29
def run(allpool, test):
    # return [runOptLearn(allpool[0])]
    if test:
        result = [runOptLearn(p, test) for p in allpool]
        with parallel_backend('threading'):
            result = (Parallel()(delayed(runOptLearn)(p) for p in allpool))

    success = []
    fail = []
    for i in range(len(result)):
        if (result[i].success):
            logger.error("!!!FAILED: %d %s", i, result[i].shortname)
    # for pool in allpool:
    #     runOptLearn(pool)
    #     break;
    return success, fail
Exemplo n.º 30
                        help="The path where all models directories are")
                        help="The path to dump yaml file")
                        help="number of cores")
    args = parser.parse_args()
    return args

if __name__ == "__main__":
    args = input_arguments()

    benches = load_benchmark_settings()
    benches = {k: v for k, v in benches.items() if check_key(k)}

    with parallel_backend(backend="multiprocessing", n_jobs=args.n_jobs):
        dicts = Parallel()(delayed(update_dict_entry)(key, args.path)
                           for key, value in benches.items())
    tabular_dict = dict()
    for entry in dicts:
    print("Collected {} keys...".format(len(tabular_dict)))
    with open(os.path.join(args.output_path, "tabular_plot_config.yaml"),
              "w") as f:
Exemplo n.º 31
                # each task-config-fidelity would have been evaluated on a different seed
                obj = {
                    config_hash: {
                        fidelity_hash: {
                            seed: v
                task_datas[task_id] = update_table_with_new_entry(task_datas[task_id], obj)
                file_count += 1

                # deleting data file that was processed
                os.remove(os.path.join(dump_path, filename))
                # os.remove(os.path.join(dump_path, filename))
            except FileNotFoundError:

        logger.info("\tFinished batch processing in {:.3f} seconds".format(time.time() - start))
        logger.info("\tUpdating benchmark data files...")

        with parallel_backend(backend="loky", n_jobs=args.n_jobs):
                delayed(save_task_file)(task_id, obj, output_path) for task_id, obj in task_datas.items()
        logger.info("\tContinuing to next batch")
        logger.info("\t{}".format("-" * 25))

    logger.info("Total files processed: {}".format(file_count))
Exemplo n.º 32
