예제 #1
0
def automl_fixture():
    from zoo import init_spark_on_local
    from zoo.ray.util.raycontext import RayContext
    sc = init_spark_on_local(cores=4, spark_log_level="INFO")
    ray_ctx = RayContext(sc=sc, object_store_memory="1g")
    ray_ctx.init()
    yield
    ray_ctx.stop()
    sc.stop()
예제 #2
0
 def test_local(self):
     node_num = 4
     sc = init_spark_on_local(cores=node_num)
     ray_ctx = RayContext(sc=sc)
     ray_ctx.init()
     actors = [TestRay.remote() for i in range(0, node_num)]
     print([ray.get(actor.hostname.remote()) for actor in actors])
     ray_ctx.stop()
     sc.stop()
     for process_info in ray_ctx.ray_processesMonitor.process_infos:
         for pid in process_info.pids:
             assert not psutil.pid_exists(pid)
예제 #3
0
                        type=str,
                        help="turn on yarn mode by passing the hadoop path"
                        "configuration folder. Otherwise, turn on local mode.")

    args, _ = parser.parse_known_args()

    if args.hadoop_conf:
        sc = init_spark_on_yarn(hadoop_conf=args.hadoop_conf,
                                conda_name="rayexample",
                                num_executor=args.num_replicas,
                                executor_cores=88,
                                executor_memory="10g",
                                driver_memory="3g",
                                driver_cores=4,
                                extra_executor_memory_for_ray="2g")
        ray_ctx = RayContext(sc=sc, object_store_memory="5g")
        ray_ctx.init()
    else:
        ray.init(redis_address=args.redis_address)
        # sc = init_spark_on_local(cores=44)
        # ray_ctx = RayContext(sc=sc, object_store_memory="5g")

    #ray.init(redis_address=args.redis_address)

    if args.tune:
        tune_example(num_replicas=args.num_replicas, use_gpu=args.use_gpu)
    else:
        train_example(num_replicas=args.num_replicas,
                      batch_size=args.batch_size,
                      use_gpu=args.use_gpu)
예제 #4
0
        default="4g",
        help="The memory to store data on local."
        "You can change it depending on your own cluster setting.")

    args = parser.parse_args()
    if args.hadoop_conf:
        sc = init_spark_on_yarn(
            hadoop_conf=args.hadoop_conf,
            conda_name=args.conda_name,
            num_executor=args.slave_num,
            executor_cores=args.executor_cores,
            executor_memory=args.executor_memory,
            driver_memory=args.driver_memory,
            driver_cores=args.driver_cores,
            extra_executor_memory_for_ray=args.extra_executor_memory_for_ray)
        ray_ctx = RayContext(sc=sc,
                             object_store_memory=args.object_store_memory)
    else:
        sc = init_spark_on_local(cores=args.driver_cores)
        ray_ctx = RayContext(sc=sc,
                             object_store_memory=args.object_store_memory)
    ray_ctx.init()

    batch_size = args.batch_size
    # Run the reinforcement learning.
    running_reward = None
    batch_num = 1
    model = {}
    # "Xavier" initialization.
    model["W1"] = np.random.randn(H, D) / np.sqrt(D)
    model["W2"] = np.random.randn(H) / np.sqrt(H)
    # Update buffers that add up gradients over a batch.
예제 #5
0
slave_num = 2

sc = init_spark_on_yarn(hadoop_conf="/opt/work/almaren-yarn-config/",
                        conda_name="ray_train",
                        num_executor=slave_num,
                        executor_cores=28,
                        executor_memory="10g",
                        driver_memory="2g",
                        driver_cores=4,
                        extra_executor_memory_for_ray="30g",
                        spark_conf={"hello": "world"})

ray_ctx = RayContext(sc=sc,
                     object_store_memory="25g",
                     extra_params={"temp-dir": "/tmp/hello/"},
                     env={
                         "http_proxy": "http://child-prc.intel.com:913",
                         "http_proxys": "http://child-prc.intel.com:913"
                     })
ray_ctx.init(object_store_memory="2g", num_cores=0, labels="", extra_params={})


@ray.remote
class TestRay():
    def hostname(self):
        import socket
        return socket.gethostname()

    def check_cv2(self):
        # conda install -c conda-forge opencv==3.4.2
        import cv2
예제 #6
0
class TestRay():
    def hostname(self):
        import socket
        return socket.gethostname()


node_num = 4
sc = init_spark_on_yarn(
    hadoop_conf="/opt/work/hadoop-2.7.2/etc/hadoop/",
    conda_name="rayexample",
    num_executor=node_num,
    executor_cores=28,
    executor_memory="10g",
    driver_memory="2g",
    driver_cores=4,
    extra_executor_memory_for_ray="30g")
ray_ctx = RayContext(sc=sc, object_store_memory="2g")
ray_ctx.init()
actors = [TestRay.remote() for i in range(0, node_num)]
print([ray.get(actor.hostname.remote()) for actor in actors])
ray_ctx.stop()
# repeat
ray_ctx = RayContext(sc=sc, object_store_memory="1g")
ray_ctx.init()
actors = [TestRay.remote() for i in range(0, node_num)]
print([ray.get(actor.hostname.remote()) for actor in actors])
ray_ctx.stop()

sc.stop()
time.sleep(3)
예제 #7
0
 def test_local(self):
     node_num = 4
     sc = init_spark_on_local(cores=node_num)
     ray_ctx = RayContext(sc=sc, object_store_memory="1g")
     ray_ctx.init()
     actors = [TestRay.remote() for i in range(0, node_num)]
     print(ray.get([actor.hostname.remote() for actor in actors]))
     ray_ctx.stop()
     time.sleep(3)
     # repeat
     print("-------------------first repeat begin!------------------")
     ray_ctx = RayContext(sc=sc, object_store_memory="1g")
     ray_ctx.init()
     actors = [TestRay.remote() for i in range(0, node_num)]
     print(ray.get([actor.hostname.remote() for actor in actors]))
     ray_ctx.stop()
     sc.stop()
     time.sleep(3)
     for process_info in ray_ctx.ray_processesMonitor.process_infos:
         for pid in process_info.pids:
             assert not psutil.pid_exists(pid)
예제 #8
0
from zoo.ray.util.raycontext import RayContext

slave_num = 2

sc = init_spark_on_yarn(hadoop_conf="/opt/work/almaren-yarn-config/",
                        conda_name="ray36-dev",
                        num_executor=slave_num,
                        executor_cores=28,
                        executor_memory="10g",
                        driver_memory="2g",
                        driver_cores=4,
                        extra_executor_memory_for_ray="30g")

ray_ctx = RayContext(sc=sc,
                     object_store_memory="25g",
                     env={
                         "http_proxy": "http://child-prc.intel.com:913",
                         "http_proxys": "http://child-prc.intel.com:913"
                     })
ray_ctx.init()


@ray.remote
class TestRay():
    def hostname(self):
        import socket
        return socket.gethostname()

    def check_cv2(self):
        # conda install -c conda-forge opencv==3.4.2
        import cv2
        return cv2.__version__