def automl_fixture(): from zoo import init_spark_on_local from zoo.ray.util.raycontext import RayContext sc = init_spark_on_local(cores=4, spark_log_level="INFO") ray_ctx = RayContext(sc=sc, object_store_memory="1g") ray_ctx.init() yield ray_ctx.stop() sc.stop()
def test_local(self): node_num = 4 sc = init_spark_on_local(cores=node_num) ray_ctx = RayContext(sc=sc) ray_ctx.init() actors = [TestRay.remote() for i in range(0, node_num)] print([ray.get(actor.hostname.remote()) for actor in actors]) ray_ctx.stop() sc.stop() for process_info in ray_ctx.ray_processesMonitor.process_infos: for pid in process_info.pids: assert not psutil.pid_exists(pid)
type=str, help="turn on yarn mode by passing the hadoop path" "configuration folder. Otherwise, turn on local mode.") args, _ = parser.parse_known_args() if args.hadoop_conf: sc = init_spark_on_yarn(hadoop_conf=args.hadoop_conf, conda_name="rayexample", num_executor=args.num_replicas, executor_cores=88, executor_memory="10g", driver_memory="3g", driver_cores=4, extra_executor_memory_for_ray="2g") ray_ctx = RayContext(sc=sc, object_store_memory="5g") ray_ctx.init() else: ray.init(redis_address=args.redis_address) # sc = init_spark_on_local(cores=44) # ray_ctx = RayContext(sc=sc, object_store_memory="5g") #ray.init(redis_address=args.redis_address) if args.tune: tune_example(num_replicas=args.num_replicas, use_gpu=args.use_gpu) else: train_example(num_replicas=args.num_replicas, batch_size=args.batch_size, use_gpu=args.use_gpu)
default="4g", help="The memory to store data on local." "You can change it depending on your own cluster setting.") args = parser.parse_args() if args.hadoop_conf: sc = init_spark_on_yarn( hadoop_conf=args.hadoop_conf, conda_name=args.conda_name, num_executor=args.slave_num, executor_cores=args.executor_cores, executor_memory=args.executor_memory, driver_memory=args.driver_memory, driver_cores=args.driver_cores, extra_executor_memory_for_ray=args.extra_executor_memory_for_ray) ray_ctx = RayContext(sc=sc, object_store_memory=args.object_store_memory) else: sc = init_spark_on_local(cores=args.driver_cores) ray_ctx = RayContext(sc=sc, object_store_memory=args.object_store_memory) ray_ctx.init() batch_size = args.batch_size # Run the reinforcement learning. running_reward = None batch_num = 1 model = {} # "Xavier" initialization. model["W1"] = np.random.randn(H, D) / np.sqrt(D) model["W2"] = np.random.randn(H) / np.sqrt(H) # Update buffers that add up gradients over a batch.
slave_num = 2 sc = init_spark_on_yarn(hadoop_conf="/opt/work/almaren-yarn-config/", conda_name="ray_train", num_executor=slave_num, executor_cores=28, executor_memory="10g", driver_memory="2g", driver_cores=4, extra_executor_memory_for_ray="30g", spark_conf={"hello": "world"}) ray_ctx = RayContext(sc=sc, object_store_memory="25g", extra_params={"temp-dir": "/tmp/hello/"}, env={ "http_proxy": "http://child-prc.intel.com:913", "http_proxys": "http://child-prc.intel.com:913" }) ray_ctx.init(object_store_memory="2g", num_cores=0, labels="", extra_params={}) @ray.remote class TestRay(): def hostname(self): import socket return socket.gethostname() def check_cv2(self): # conda install -c conda-forge opencv==3.4.2 import cv2
class TestRay(): def hostname(self): import socket return socket.gethostname() node_num = 4 sc = init_spark_on_yarn( hadoop_conf="/opt/work/hadoop-2.7.2/etc/hadoop/", conda_name="rayexample", num_executor=node_num, executor_cores=28, executor_memory="10g", driver_memory="2g", driver_cores=4, extra_executor_memory_for_ray="30g") ray_ctx = RayContext(sc=sc, object_store_memory="2g") ray_ctx.init() actors = [TestRay.remote() for i in range(0, node_num)] print([ray.get(actor.hostname.remote()) for actor in actors]) ray_ctx.stop() # repeat ray_ctx = RayContext(sc=sc, object_store_memory="1g") ray_ctx.init() actors = [TestRay.remote() for i in range(0, node_num)] print([ray.get(actor.hostname.remote()) for actor in actors]) ray_ctx.stop() sc.stop() time.sleep(3)
def test_local(self): node_num = 4 sc = init_spark_on_local(cores=node_num) ray_ctx = RayContext(sc=sc, object_store_memory="1g") ray_ctx.init() actors = [TestRay.remote() for i in range(0, node_num)] print(ray.get([actor.hostname.remote() for actor in actors])) ray_ctx.stop() time.sleep(3) # repeat print("-------------------first repeat begin!------------------") ray_ctx = RayContext(sc=sc, object_store_memory="1g") ray_ctx.init() actors = [TestRay.remote() for i in range(0, node_num)] print(ray.get([actor.hostname.remote() for actor in actors])) ray_ctx.stop() sc.stop() time.sleep(3) for process_info in ray_ctx.ray_processesMonitor.process_infos: for pid in process_info.pids: assert not psutil.pid_exists(pid)
from zoo.ray.util.raycontext import RayContext slave_num = 2 sc = init_spark_on_yarn(hadoop_conf="/opt/work/almaren-yarn-config/", conda_name="ray36-dev", num_executor=slave_num, executor_cores=28, executor_memory="10g", driver_memory="2g", driver_cores=4, extra_executor_memory_for_ray="30g") ray_ctx = RayContext(sc=sc, object_store_memory="25g", env={ "http_proxy": "http://child-prc.intel.com:913", "http_proxys": "http://child-prc.intel.com:913" }) ray_ctx.init() @ray.remote class TestRay(): def hostname(self): import socket return socket.gethostname() def check_cv2(self): # conda install -c conda-forge opencv==3.4.2 import cv2 return cv2.__version__