示例#1
0
    def setUp(self):
        # Start one Redis server and N pairs of (plasma, local_scheduler)
        self.node_ip_address = "127.0.0.1"
        redis_address, redis_shards = services.start_redis(
            self.node_ip_address)
        redis_port = services.get_port(redis_address)
        time.sleep(0.1)
        # Create a client for the global state store.
        self.state = state.GlobalState()
        self.state._initialize_global_state(self.node_ip_address, redis_port)

        # Start one global scheduler.
        self.p1 = global_scheduler.start_global_scheduler(
            redis_address, self.node_ip_address, use_valgrind=USE_VALGRIND)
        self.plasma_store_pids = []
        self.plasma_manager_pids = []
        self.local_scheduler_pids = []
        self.plasma_clients = []
        self.local_scheduler_clients = []

        for i in range(NUM_CLUSTER_NODES):
            # Start the Plasma store. Plasma store name is randomly generated.
            plasma_store_name, p2 = plasma.start_plasma_store()
            self.plasma_store_pids.append(p2)
            # Start the Plasma manager.
            # Assumption: Plasma manager name and port are randomly generated
            # by the plasma module.
            manager_info = plasma.start_plasma_manager(plasma_store_name,
                                                       redis_address)
            plasma_manager_name, p3, plasma_manager_port = manager_info
            self.plasma_manager_pids.append(p3)
            plasma_address = "{}:{}".format(self.node_ip_address,
                                            plasma_manager_port)
            plasma_client = pa.plasma.connect(plasma_store_name,
                                              plasma_manager_name, 64)
            self.plasma_clients.append(plasma_client)
            # Start the local scheduler.
            local_scheduler_name, p4 = local_scheduler.start_local_scheduler(
                plasma_store_name,
                plasma_manager_name=plasma_manager_name,
                plasma_address=plasma_address,
                redis_address=redis_address,
                static_resources={"CPU": 10})
            # Connect to the scheduler.
            local_scheduler_client = local_scheduler.LocalSchedulerClient(
                local_scheduler_name, NIL_WORKER_ID, False, random_task_id(),
                False)
            self.local_scheduler_clients.append(local_scheduler_client)
            self.local_scheduler_pids.append(p4)
示例#2
0
    def __init__(self, redis_address, autoscaling_config, redis_password=None):
        # Initialize the Redis clients.
        self.state = ray.experimental.state.GlobalState()
        redis_ip_address = get_ip_address(args.redis_address)
        redis_port = get_port(args.redis_address)
        self.state._initialize_global_state(redis_ip_address,
                                            redis_port,
                                            redis_password=redis_password)
        self.redis = ray.services.create_redis_client(redis_address,
                                                      password=redis_password)
        # Setup subscriptions to the primary Redis server and the Redis shards.
        self.primary_subscribe_client = self.redis.pubsub(
            ignore_subscribe_messages=True)
        # Keep a mapping from raylet client ID to IP address to use
        # for updating the load metrics.
        self.raylet_id_to_ip_map = {}
        self.load_metrics = LoadMetrics()
        if autoscaling_config:
            self.autoscaler = StandardAutoscaler(autoscaling_config,
                                                 self.load_metrics)
        else:
            self.autoscaler = None

        # Experimental feature: GCS flushing.
        self.issue_gcs_flushes = "RAY_USE_NEW_GCS" in os.environ
        self.gcs_flush_policy = None
        if self.issue_gcs_flushes:
            # Data is stored under the first data shard, so we issue flushes to
            # that redis server.
            addr_port = self.redis.lrange("RedisShards", 0, -1)
            if len(addr_port) > 1:
                logger.warning(
                    "Monitor: "
                    "TODO: if launching > 1 redis shard, flushing needs to "
                    "touch shards in parallel.")
                self.issue_gcs_flushes = False
            else:
                addr_port = addr_port[0].split(b":")
                self.redis_shard = redis.StrictRedis(host=addr_port[0],
                                                     port=addr_port[1],
                                                     password=redis_password)
                try:
                    self.redis_shard.execute_command("HEAD.FLUSH 0")
                except redis.exceptions.ResponseError as e:
                    logger.info(
                        "Monitor: "
                        "Turning off flushing due to exception: {}".format(
                            str(e)))
                    self.issue_gcs_flushes = False
示例#3
0
    def __init__(self, redis_address, autoscaling_config, redis_password=None):
        # Initialize the Redis clients.
        self.state = ray.experimental.state.GlobalState()
        redis_ip_address = get_ip_address(args.redis_address)
        redis_port = get_port(args.redis_address)
        self.state._initialize_global_state(
            redis_ip_address, redis_port, redis_password=redis_password)
        self.redis = ray.services.create_redis_client(
            redis_address, password=redis_password)
        # Setup subscriptions to the primary Redis server and the Redis shards.
        self.primary_subscribe_client = self.redis.pubsub(
            ignore_subscribe_messages=True)
        # Keep a mapping from local scheduler client ID to IP address to use
        # for updating the load metrics.
        self.local_scheduler_id_to_ip_map = {}
        self.load_metrics = LoadMetrics()
        if autoscaling_config:
            self.autoscaler = StandardAutoscaler(autoscaling_config,
                                                 self.load_metrics)
        else:
            self.autoscaler = None

        # Experimental feature: GCS flushing.
        self.issue_gcs_flushes = "RAY_USE_NEW_GCS" in os.environ
        self.gcs_flush_policy = None
        if self.issue_gcs_flushes:
            # Data is stored under the first data shard, so we issue flushes to
            # that redis server.
            addr_port = self.redis.lrange("RedisShards", 0, -1)
            if len(addr_port) > 1:
                logger.warning(
                    "Monitor: "
                    "TODO: if launching > 1 redis shard, flushing needs to "
                    "touch shards in parallel.")
                self.issue_gcs_flushes = False
            else:
                addr_port = addr_port[0].split(b":")
                self.redis_shard = redis.StrictRedis(
                    host=addr_port[0],
                    port=addr_port[1],
                    password=redis_password)
                try:
                    self.redis_shard.execute_command("HEAD.FLUSH 0")
                except redis.exceptions.ResponseError as e:
                    logger.info(
                        "Monitor: "
                        "Turning off flushing due to exception: {}".format(
                            str(e)))
                    self.issue_gcs_flushes = False
示例#4
0
文件: test.py 项目: adgirish/ray
    def setUp(self):
        # Start one Redis server and N pairs of (plasma, local_scheduler)
        self.node_ip_address = "127.0.0.1"
        redis_address, redis_shards = services.start_redis(
            self.node_ip_address)
        redis_port = services.get_port(redis_address)
        time.sleep(0.1)
        # Create a client for the global state store.
        self.state = state.GlobalState()
        self.state._initialize_global_state(self.node_ip_address, redis_port)

        # Start one global scheduler.
        self.p1 = global_scheduler.start_global_scheduler(
            redis_address, self.node_ip_address, use_valgrind=USE_VALGRIND)
        self.plasma_store_pids = []
        self.plasma_manager_pids = []
        self.local_scheduler_pids = []
        self.plasma_clients = []
        self.local_scheduler_clients = []

        for i in range(NUM_CLUSTER_NODES):
            # Start the Plasma store. Plasma store name is randomly generated.
            plasma_store_name, p2 = plasma.start_plasma_store()
            self.plasma_store_pids.append(p2)
            # Start the Plasma manager.
            # Assumption: Plasma manager name and port are randomly generated
            # by the plasma module.
            manager_info = plasma.start_plasma_manager(plasma_store_name,
                                                       redis_address)
            plasma_manager_name, p3, plasma_manager_port = manager_info
            self.plasma_manager_pids.append(p3)
            plasma_address = "{}:{}".format(self.node_ip_address,
                                            plasma_manager_port)
            plasma_client = pa.plasma.connect(plasma_store_name,
                                              plasma_manager_name, 64)
            self.plasma_clients.append(plasma_client)
            # Start the local scheduler.
            local_scheduler_name, p4 = local_scheduler.start_local_scheduler(
                plasma_store_name,
                plasma_manager_name=plasma_manager_name,
                plasma_address=plasma_address,
                redis_address=redis_address,
                static_resources={"CPU": 10})
            # Connect to the scheduler.
            local_scheduler_client = local_scheduler.LocalSchedulerClient(
                local_scheduler_name, NIL_WORKER_ID, NIL_ACTOR_ID, False, 0)
            self.local_scheduler_clients.append(local_scheduler_client)
            self.local_scheduler_pids.append(p4)
示例#5
0
文件: monitor.py 项目: zerocurve/ray
            time.sleep(ray._config.heartbeat_timeout_milliseconds() * 1e-3)

        # TODO(rkn): This infinite loop should be inside of a try/except block,
        # and if an exception is thrown we should push an error message to all
        # drivers.


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description=("Parse Redis server for the "
                                                  "monitor to connect to."))
    parser.add_argument("--redis-address",
                        required=True,
                        type=str,
                        help="the address to use for Redis")
    parser.add_argument("--autoscaling-config",
                        required=False,
                        type=str,
                        help="the path to the autoscaling config file")
    args = parser.parse_args()

    redis_ip_address = get_ip_address(args.redis_address)
    redis_port = get_port(args.redis_address)

    if args.autoscaling_config:
        autoscaling_config = os.path.expanduser(args.autoscaling_config)
    else:
        autoscaling_config = None

    monitor = Monitor(redis_ip_address, redis_port, autoscaling_config)
    monitor.run()
示例#6
0
        type=str,
        default=ray_constants.LOGGER_LEVEL,
        choices=ray_constants.LOGGER_LEVEL_CHOICES,
        help=ray_constants.LOGGER_LEVEL_HELP)
    parser.add_argument(
        "--logging-format",
        required=False,
        type=str,
        default=ray_constants.LOGGER_FORMAT,
        help=ray_constants.LOGGER_FORMAT_HELP)
    args = parser.parse_args()
    level = logging.getLevelName(args.logging_level.upper())
    logging.basicConfig(level=level, format=args.logging_format)

    redis_ip_address = get_ip_address(args.redis_address)
    redis_port = get_port(args.redis_address)

    if args.autoscaling_config:
        autoscaling_config = os.path.expanduser(args.autoscaling_config)
    else:
        autoscaling_config = None

    monitor = Monitor(
        redis_ip_address,
        redis_port,
        autoscaling_config,
        redis_password=args.redis_password)

    try:
        monitor.run()
    except Exception as e: