示例#1
0
    def __init__(self,
                 hyperopt_sampler: HyperoptSampler,
                 output_feature: str,
                 metric: str,
                 split: str,
                 num_workers: int = 2,
                 num_cpus_per_worker: int = -1,
                 num_gpus_per_worker: int = -1,
                 fiber_backend: str = "local",
                 **kwargs) -> None:
        import fiber

        HyperoptExecutor.__init__(self, hyperopt_sampler, output_feature,
                                  metric, split)

        fiber.init(backend=fiber_backend)
        self.fiber_meta = fiber.meta

        self.num_cpus_per_worker = num_cpus_per_worker
        self.num_gpus_per_worker = num_gpus_per_worker

        self.resource_limits = {}
        if num_cpus_per_worker != -1:
            self.resource_limits["cpu"] = num_cpus_per_worker

        if num_gpus_per_worker != -1:
            self.resource_limits["gpu"] = num_gpus_per_worker

        self.num_workers = num_workers
        self.pool = fiber.Pool(num_workers)
示例#2
0
文件: test_pool.py 项目: zw0610/fiber
    def test_pool_with_no_argument(self):
        # Make sure no exception is raised
        p = fiber.Pool()
        p.map(print, [1, 2, 3, 4])
        p.terminate()
        p.join()

        assert 1 == 1
示例#3
0
    def test_start_timeout(self):
        fiber.backend.get_backend(name="docker")
        old_backend = fiber.backend._backends["docker"]
        fiber.backend._backends["docker"] = TimeoutBackend(n=4)

        p = fiber.Pool(4)
        res = p.map(square_worker, [1, 2, 3, 4])
        p.terminate()
        fiber.backend._backends["docker"] = old_backend

        assert res == [i**2 for i in range(1, 5)]
示例#4
0
    def test_error_handling(self):
        try:
            pool = fiber.Pool(3, error_handling=True)
            pool.wait_until_workers_up()
            res = pool.map(random_error_worker, [i for i in range(300)],
                           chunksize=1)

            assert res == [i for i in range(300)]

        finally:
            pool.terminate()
            pool.join()
示例#5
0
def bench_fiber(tasks, workers, task_duration, warmup=True, pool=None):
    if warmup:
        if not pool:
            pool = fiber.Pool(workers)
        pool.map(sleep_worker, [task_duration for x in range(tasks)],
                 chunksize=1)
        logger.debug("warm up finished")

    res, elapsed = timeit(
        pool.map,
        sleep_worker,
        [task_duration for x in range(tasks)],
        chunksize=1,
    )

    return elapsed
示例#6
0
    def test_error_handling_unordered(self):
        try:
            pool = fiber.Pool(3, error_handling=True)
            pool.wait_until_workers_up()
            res_iter = pool.imap_unordered(random_error_worker,
                                           [i for i in range(300)],
                                           chunksize=1)

            res = list(res_iter)
            res.sort()

            assert res == [i for i in range(300)]

        finally:
            pool.terminate()
            pool.join()
示例#7
0
    def test_job_creation_with_delay(self):
        fiber.backend.get_backend(name="docker")
        old_backend = fiber.backend._backends["docker"]
        fiber.backend._backends["docker"] = DelayedBackend()

        p = fiber.Pool(4)
        res = p.map(square_worker, [1, 2, 3, 4])

        p.wait_until_workers_up()
        p.terminate()
        fiber.backend._backends["docker"] = old_backend

        assert res == [i**2 for i in range(1, 5)]

        # wait for 2 seconds to let docker finish starting
        #time.sleep(2)
        p.join()
示例#8
0
文件: es.py 项目: zw0610/fiber
def es(theta0, worker, workers=40, sigma=0.1, alpha=0.05, iterations=200):
    dim = theta0.shape[0]
    theta = theta0
    pool = fiber.Pool(workers)
    func = functools.partial(worker, dim, sigma)

    for t in range(iterations):
        returns = pool.map(func, [theta] * workers)
        rewards = [ret[0] for ret in returns]
        epsilons = [ret[1] for ret in returns]
        # normalize rewards
        normalized_rewards = (rewards - np.mean(rewards)) / np.std(rewards)
        theta = theta + alpha * 1.0 / (workers * sigma) * sum([
            reward * epsilon
            for reward, epsilon in zip(normalized_rewards, epsilons)
        ])
        if t % 10 == 0:
            print(theta)
    return theta
示例#9
0
    def test_many_jobs(self):
        """
        This is to test a race condition in handling data in pending table
        """
        workers = 5
        pool = fiber.Pool(workers)
        tasks = 5000
        duration = 0.001

        pool.wait_until_workers_up()

        res = [None] * workers
        for i in range(tasks // workers):
            for j in range(workers):
                handle = pool.apply_async(sleep_worker, (duration, ))
                res[j] = handle
            for j in range(workers):
                res[j].get()

        pool.terminate()
        pool.join()
示例#10
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        'frameworks',
        nargs='+',
        choices=['mp', 'fiber', 'pyspark', 'ray', 'ipyparallel'],
        help='frameworks to benchmark')
    parser.add_argument('-t',
                        '--total-duration',
                        type=int,
                        default=1,
                        help='total running time')

    parser.add_argument('-d',
                        '--task-duration',
                        type=float,
                        default=None,
                        choices=[0.001, 0.01, 0.1, 1],
                        help='task duration in ms')

    args = parser.parse_args()

    workers = 5

    max_duration = args.total_duration

    results = {}
    frameworks = args.frameworks

    for framework in frameworks:
        results[framework] = []
        results[framework + "_seq"] = []

    if "pyspark" in frameworks:
        from pyspark import SparkContext
        import pyspark

        sc = SparkContext()
        conf = pyspark.SparkConf().setAll([("spark.cores.max", 5)])
        sc.stop()
        sc = pyspark.SparkContext(conf=conf)

    if "ray" in frameworks:
        import ray

        ray.init()

    if "fiber" in frameworks:
        import fiber.pool
        fiber_pool = fiber.Pool(workers)

    if "ipyparallel" in frameworks:
        print("before popen")
        #ipp_controller = subprocess.Popen(["ipcontroller", "--ip", "*"])
        print("after popen")
        import atexit
        import signal
        import os
        #atexit.register(ipp_controller.kill)
        pids = spawn_workers(workers)
        for pid in pids:
            atexit.register(os.kill, pid, signal.SIGKILL)
        time.sleep(4)

    for i in range(4):
        factor = 10**i
        duration = 1 / factor
        if args.task_duration is not None:
            print(args.task_duration, duration, type(args.task_duration),
                  type(duration))
            if args.task_duration != duration:
                continue
        tasks = int(max_duration * workers / duration)

        print("Benchmarking {} workers with {} tasks each takes {} "
              "seconds".format(workers, tasks, duration))

        # sequential tests (simulating RL)
        if "mp" in frameworks:
            elapsed = bench_mp_seq(tasks, workers, duration, True)
            results["mp_seq"].append({
                "task_duration": duration,
                "elapsed": elapsed
            })
            print("mp_seq", elapsed)

        if "fiber" in frameworks:
            elapsed = bench_fiber_seq(tasks,
                                      workers,
                                      duration,
                                      True,
                                      pool=fiber_pool)
            results["fiber_seq"].append({
                "task_duration": duration,
                "elapsed": elapsed
            })
            print("fiber_seq", elapsed)

        if "pyspark" in frameworks:
            elapsed = bench_spark_seq(tasks,
                                      workers,
                                      duration,
                                      warmup=True,
                                      sc=sc)
            results["pyspark_seq"].append({
                "task_duration": duration,
                "elapsed": elapsed
            })
            print("pyspark_seq", elapsed)

        if "ray" in frameworks:
            elapsed = bench_ray_seq(tasks, workers, duration, warmup=True)
            results["ray_seq"].append({
                "task_duration": duration,
                "elapsed": elapsed
            })
            print("ray_seq", elapsed)

        if "ipyparallel" in frameworks:
            elapsed = bench_ipp_seq(tasks, workers, duration, warmup=True)
            results["ipyparallel_seq"].append({
                "task_duration": duration,
                "elapsed": elapsed
            })
            print("ipyparallel_seq", elapsed)

        # batch tests (simulating ES)
        """
        if "mp" in frameworks:
            elapsed = bench_mp(tasks, workers, duration, True)
            results["mp"].append({"task_duration": duration,
                                  "elapsed": elapsed})
            print("mp", elapsed)

        if "fiber" in frameworks:
            elapsed = bench_fiber(tasks, workers, duration, True,
                                  pool=fiber_pool)
            results["fiber"].append({"task_duration": duration,
                                     "elapsed": elapsed})
            print("fiber", elapsed)

        if "pyspark" in frameworks:
            elapsed = bench_spark(tasks, workers, duration, warmup=True, sc=sc)
            results["pyspark"].append({"task_duration": duration,
                                       "elapsed": elapsed})
            print("pyspark", elapsed)

        if "ray" in frameworks:
            elapsed = bench_ray(tasks, workers, duration, warmup=True)
            results["ray"].append({"task_duration": duration,
                                   "elapsed": elapsed})
            print("ray", elapsed)
        """

    pprint(results)
示例#11
0
def main():
    files = sorted(Path('.').glob('*.py'))
    pool = fiber.Pool(4)
    counts = pool.map(line_count, files)
    for f, c in zip(files, counts):
        print("{}\t{}".format(f, c))