예제 #1
0
 def _enrich_object_sotre_memory(self, sc, object_store_memory):
     if is_local(sc):
         assert not object_store_memory, "you should not set object_store_memory on spark local"
         return resourceToBytes(self._get_ray_plasma_memory_local())
     else:
         return resourceToBytes(
             str(object_store_memory)) if object_store_memory else None
예제 #2
0
 def _enrich_object_sotre_memory(self, sc, object_store_memory):
     if is_local(sc):
         if self.object_store_memory is None:
             self.object_store_memory = self._get_ray_plasma_memory_local()
         return resourceToBytes(self.object_store_memory)
     else:
         return resourceToBytes(
             str(object_store_memory)) if object_store_memory else None
예제 #3
0
 def clean_fn(self):
     import ray
     ray.shutdown()
     if not is_local(self.sc):
         self.ray_rdd.map(gen_shutdown_per_node(self.pgids,
                                                self.node_ips)).collect()
     else:
         gen_shutdown_per_node(self.pgids, self.node_ips)([])
예제 #4
0
 def __init__(self,
              sc,
              redis_port=None,
              password="******",
              object_store_memory=None,
              verbose=False,
              env=None,
              local_ray_node_num=2,
              waiting_time_sec=8,
              extra_params=None):
     """
     The RayContext would init a ray cluster on top of the configuration of SparkContext.
     For spark cluster mode: The number of raylets is equal to number of executors.
     For Spark local mode: The number of raylets is controlled by local_ray_node_num.
     CPU cores for each is raylet equals to spark_cores/local_ray_node_num.
     :param sc:
     :param redis_port: redis port for the "head" node.
            The value would be randomly picked if not specified.
     :param password: [optional] password for the redis.
     :param object_store_memory: Memory size for the object_store.
     :param verbose: True for more logs.
     :param env: The environment variable dict for running Ray.
     :param local_ray_node_num number of raylets to be created.
     :param waiting_time_sec: Waiting time for the raylets before connecting to redis.
     :param extra_params: key value dictionary for extra options to launch Ray.
                          i.e extra_params={"temp-dir": "/tmp/ray2/"}
     """
     self.sc = sc
     self.stopped = False
     self.is_local = is_local(sc)
     self.local_ray_node_num = local_ray_node_num
     self.ray_node_cpu_cores = self._get_ray_node_cpu_cores()
     self.num_ray_nodes = self._get_num_ray_nodes()
     self.python_loc = os.environ['PYSPARK_PYTHON']
     self.ray_processesMonitor = None
     self.verbose = verbose
     self.redis_password = password
     self.object_store_memory = object_store_memory
     self.redis_port = self._new_port() if not redis_port else redis_port
     self.ray_service = RayServiceFuncGenerator(
         python_loc=self.python_loc,
         redis_port=self.redis_port,
         ray_node_cpu_cores=self.ray_node_cpu_cores,
         mkl_cores=self._get_mkl_cores(),
         password=password,
         object_store_memory=self._enrich_object_sotre_memory(
             sc, object_store_memory),
         verbose=verbose,
         env=env,
         waitting_time_sec=waiting_time_sec,
         extra_params=extra_params)
     self._gather_cluster_ips()
     from bigdl.util.common import init_executor_gateway
     print("Start to launch the JVM guarding process")
     init_executor_gateway(sc)
     print("JVM guarding process has been successfully launched")
예제 #5
0
 def clean_fn(self):
     if self.raycontext.stopped:
         return
     import ray
     ray.shutdown()
     if not self.sc:
         print("WARNING: SparkContext has been stopped before cleaning the Ray resources")
     if self.sc and (not is_local(self.sc)):
         self.ray_rdd.map(gen_shutdown_per_node(self.pgids, self.node_ips)).collect()
     else:
         gen_shutdown_per_node(self.pgids, self.node_ips)([])
예제 #6
0
 def __init__(self, process_infos, sc, ray_rdd, verbose=False):
     self.sc = sc
     self.verbose = verbose
     self.ray_rdd = ray_rdd
     self.master = []
     self.slaves = []
     self.pgids = []
     self.node_ips = []
     self.process_infos = process_infos
     for process_info in process_infos:
         self.pgids.append(process_info.pgid)
         self.node_ips.append(process_info.node_ip)
         if process_info.master_addr:
             self.master.append(process_info)
         else:
             self.slaves.append(process_info)
     ProcessMonitor.register_shutdown_hook(extra_close_fn=self.clean_fn)
     assert len(self.master) == 1, \
         "We should got 1 master only, but we got {}".format(len(self.master))
     self.master = self.master[0]
     if not is_local(self.sc):
         self.print_ray_remote_err_out()