def test_parse_errors(): with pytest.raises(ValueError) as e: parse("host1:port1") assert e.value.message == "Expecting 'zk://' at the beginning of the URL" # This method doesn't validate the values in the tuple. assert parse("zk://") == (None, "", "/") assert parse("zk://host_no_port") == (None, "host_no_port", "/") assert parse("zk://jake@host") == ("jake", "host", "/")
def resolve_master( cluster_url, master_callback=lambda: True, termination_callback=lambda: True, zk_client=None): """ Resolve the MySQL cluster master's endpoint from the given URL for this cluster. :param cluster_url: The ZooKeeper URL for this cluster. :param master_callback: A callback method with one argument: the ServiceInstance for the elected master. :param termination_callback: A callback method with no argument. Invoked when the cluster terminates. :param zk_client: Use a custom ZK client instead of Kazoo if specified. """ try: _, zk_servers, cluster_path = zookeeper.parse(cluster_url) except Exception as e: raise ValueError("Invalid cluster_url: %s" % e.message) if not zk_client: zk_client = KazooClient(zk_servers) zk_client.start() listener = ClusterListener( zk_client, cluster_path, None, master_callback=master_callback, termination_callback=termination_callback) listener.start()
def from_task(self, task, sandbox): data = json.loads(task.data) cluster_name, host, port, zk_url = data['cluster'], data['host'], data['port'], data['zk_url'] _, servers, path = parse(zk_url) kazoo = KazooClient(servers) kazoo.start() self_instance = ServiceInstance(Endpoint(host, port)) try: task_control = self._task_control_provider.from_task(task, sandbox) installer = self._installer_provider.from_task(task, sandbox) backup_store = self._backup_store_provider.from_task(task, sandbox) except (TaskControl.Error, PackageInstaller.Error) as e: kazoo.stop() # Kazoo needs to be cleaned up. See kazoo/issues/217. raise TaskError(e.message) state_manager = StateManager(sandbox, backup_store) return MysosTaskRunner( self_instance, kazoo, get_cluster_path(path, cluster_name), installer, task_control, state_manager)
def resolve_master(cluster_url, master_callback=lambda: True, termination_callback=lambda: True, zk_client=None): """ Resolve the MySQL cluster master's endpoint from the given URL for this cluster. :param cluster_url: The ZooKeeper URL for this cluster. :param master_callback: A callback method with one argument: the ServiceInstance for the elected master. :param termination_callback: A callback method with no argument. Invoked when the cluster terminates. :param zk_client: Use a custom ZK client instead of Kazoo if specified. """ try: _, zk_servers, cluster_path = zookeeper.parse(cluster_url) except Exception as e: raise ValueError("Invalid cluster_url: %s" % e.message) if not zk_client: zk_client = KazooClient(zk_servers) zk_client.start() listener = ClusterListener(zk_client, cluster_path, None, master_callback=master_callback, termination_callback=termination_callback) listener.start()
def test_parse(): assert parse("zk://*****:*****@host1:port1") == ("jake:1", "host1:port1", "/") assert parse("zk://*****:*****@host1:port1/") == ("jake:1", "host1:port1", "/") assert (parse("zk://*****:*****@host1:port1,host2:port2") == ("jake:1", "host1:port1,host2:port2", "/")) assert (parse("zk://*****:*****@host1:port1,host2:port2/") == ("jake:1", "host1:port1,host2:port2", "/")) assert (parse("zk://*****:*****@host1:port1,host2:port2/path/to/znode") == ("jake:1", "host1:port1,host2:port2", "/path/to/znode"))
def test_parse(): assert parse("zk://*****:*****@host1:port1") == ("jake:1", "host1:port1", "/") assert parse("zk://*****:*****@host1:port1/") == ("jake:1", "host1:port1", "/") assert (parse("zk://*****:*****@host1:port1,host2:port2") == ( "jake:1", "host1:port1,host2:port2", "/")) assert (parse("zk://*****:*****@host1:port1,host2:port2/") == ( "jake:1", "host1:port1,host2:port2", "/")) assert (parse("zk://*****:*****@host1:port1,host2:port2/path/to/znode") == ( "jake:1", "host1:port1,host2:port2", "/path/to/znode"))
def from_task(self, task, sandbox): data = json.loads(task.data) cluster_name, port, zk_url = data['cluster'], data['port'], data[ 'zk_url'] _, servers, path = zookeeper.parse(zk_url) zk_client = FakeClient() zk_client.start() self_instance = ServiceInstance( Endpoint(socket.gethostbyname(socket.gethostname()), port)) task_control = self._task_control_provider.from_task(task, sandbox) return MysosTaskRunner(self_instance, zk_client, posixpath.join(path, cluster_name), NoopPackageInstaller(), task_control, Fake())
def from_task(self, task, sandbox): data = json.loads(task.data) cluster_name, port, zk_url = data['cluster'], data['port'], data['zk_url'] _, servers, path = zookeeper.parse(zk_url) zk_client = FakeClient() zk_client.start() self_instance = ServiceInstance(Endpoint(socket.gethostbyname(socket.gethostname()), port)) task_control = self._task_control_provider.from_task(task, sandbox) return MysosTaskRunner( self_instance, zk_client, posixpath.join(path, cluster_name), NoopPackageInstaller(), task_control, Fake())
def from_task(self, task, sandbox): data = json.loads(task.data) cluster_name, host, port, zk_url = data['cluster'], data['host'], data[ 'port'], data['zk_url'] _, servers, path = parse(zk_url) kazoo = KazooClient(servers) kazoo.start() self_instance = ServiceInstance(Endpoint(host, port)) try: task_control = self._task_control_provider.from_task(task, sandbox) installer = self._installer_provider.from_task(task, sandbox) backup_store = self._backup_store_provider.from_task(task, sandbox) except (TaskControl.Error, PackageInstaller.Error) as e: raise TaskError(e.message) state_manager = StateManager(sandbox, backup_store) return MysosTaskRunner(self_instance, kazoo, get_cluster_path(path, cluster_name), installer, task_control, state_manager)
def main(args, options): log.info("Options in use: %s", options) if not options.api_port: app.error('Must specify --port') if not options.mesos_master: app.error('Must specify --mesos_master') if not options.framework_user: app.error('Must specify --framework_user') if not options.executor_uri: app.error('Must specify --executor_uri') if not options.executor_cmd: app.error('Must specify --executor_cmd') if not options.zk_url: app.error('Must specify --zk_url') if not options.admin_keypath: app.error('Must specify --admin_keypath') try: election_timeout = parse_time(options.election_timeout) framework_failover_timeout = parse_time( options.framework_failover_timeout) except InvalidTime as e: app.error(e.message) try: _, zk_servers, zk_root = zookeeper.parse(options.zk_url) except Exception as e: app.error("Invalid --zk_url: %s" % e.message) web_assets_dir = os.path.join(options.work_dir, "web") pkgutil.unpack_assets(web_assets_dir, MYSOS_MODULE, ASSET_RELPATH) log.info("Extracted web assets into %s" % options.work_dir) fw_principal = None fw_secret = None if options.framework_authentication_file: try: with open(options.framework_authentication_file, "r") as f: cred = yaml.load(f) fw_principal = cred["principal"] fw_secret = cred["secret"] log.info( "Loaded credential (principal=%s) for framework authentication" % fw_principal) except IOError as e: app.error( "Unable to read the framework authentication key file: %s" % e) except (KeyError, yaml.YAMLError) as e: app.error( "Invalid framework authentication key file format %s" % e) log.info("Starting Mysos scheduler") kazoo = KazooClient(zk_servers) kazoo.start() if options.state_storage == 'zk': log.info("Using ZooKeeper (path: %s) for state storage" % zk_root) state_provider = ZooKeeperStateProvider(kazoo, zk_root) else: log.info("Using local disk for state storage") state_provider = LocalStateProvider(options.work_dir) try: state = state_provider.load_scheduler_state() except StateProvider.Error as e: app.error(e.message) if state: log.info("Successfully restored scheduler state") framework_info = state.framework_info if framework_info.HasField('id'): log.info("Recovered scheduler's FrameworkID is %s" % framework_info.id.value) else: log.info("No scheduler state to restore") framework_info = FrameworkInfo( user=options.framework_user, name=FRAMEWORK_NAME, checkpoint=True, failover_timeout=framework_failover_timeout.as_(Time.SECONDS), role=options.framework_role) if fw_principal: framework_info.principal = fw_principal state = Scheduler(framework_info) state_provider.dump_scheduler_state(state) scheduler = MysosScheduler(state, state_provider, options.framework_user, options.executor_uri, options.executor_cmd, kazoo, options.zk_url, election_timeout, options.admin_keypath, installer_args=options.installer_args, backup_store_args=options.backup_store_args, executor_environ=options.executor_environ, framework_role=options.framework_role) if fw_principal and fw_secret: cred = Credential(principal=fw_principal, secret=fw_secret) scheduler_driver = mesos.native.MesosSchedulerDriver( scheduler, framework_info, options.mesos_master, cred) else: scheduler_driver = mesos.native.MesosSchedulerDriver( scheduler, framework_info, options.mesos_master) scheduler_driver.start() server = HttpServer() server.mount_routes(MysosServer(scheduler, web_assets_dir)) et = ExceptionalThread(target=server.run, args=('0.0.0.0', options.api_port, 'cherrypy')) et.daemon = True et.start() try: # Wait for the scheduler to stop. # The use of 'stopped' event instead of scheduler_driver.join() is necessary to stop the # process with SIGINT. while not scheduler.stopped.wait(timeout=0.5): pass except KeyboardInterrupt: log.info('Interrupted, exiting.') else: log.info('Scheduler exited.') app.shutdown( 1 ) # Mysos scheduler is supposed to be long-running thus the use of exit status 1.
def main(args, options): log.info("Options in use: %s", options) if not options.api_port: app.error('Must specify --port') if not options.mesos_master: app.error('Must specify --mesos_master') if not options.framework_user: app.error('Must specify --framework_user') if not options.executor_uri: app.error('Must specify --executor_uri') if not options.executor_cmd: app.error('Must specify --executor_cmd') if not options.zk_url: app.error('Must specify --zk_url') if not options.admin_keypath: app.error('Must specify --admin_keypath') try: election_timeout = parse_time(options.election_timeout) framework_failover_timeout = parse_time(options.framework_failover_timeout) except InvalidTime as e: app.error(e.message) try: _, zk_servers, zk_root = zookeeper.parse(options.zk_url) except Exception as e: app.error("Invalid --zk_url: %s" % e.message) web_assets_dir = os.path.join(options.work_dir, "web") pkgutil.unpack_assets(web_assets_dir, MYSOS_MODULE, ASSET_RELPATH) log.info("Extracted web assets into %s" % options.work_dir) fw_principal = None fw_secret = None if options.framework_authentication_file: try: with open(options.framework_authentication_file, "r") as f: cred = yaml.load(f) fw_principal = cred["principal"] fw_secret = cred["secret"] log.info("Loaded credential (principal=%s) for framework authentication" % fw_principal) except IOError as e: app.error("Unable to read the framework authentication key file: %s" % e) except (KeyError, yaml.YAMLError) as e: app.error("Invalid framework authentication key file format %s" % e) log.info("Starting Mysos scheduler") kazoo = KazooClient(zk_servers) kazoo.start() if options.state_storage == 'zk': log.info("Using ZooKeeper (path: %s) for state storage" % zk_root) state_provider = ZooKeeperStateProvider(kazoo, zk_root) else: log.info("Using local disk for state storage") state_provider = LocalStateProvider(options.work_dir) try: state = state_provider.load_scheduler_state() except StateProvider.Error as e: app.error(e.message) if state: log.info("Successfully restored scheduler state") framework_info = state.framework_info if framework_info.HasField('id'): log.info("Recovered scheduler's FrameworkID is %s" % framework_info.id.value) else: log.info("No scheduler state to restore") framework_info = FrameworkInfo( user=options.framework_user, name=FRAMEWORK_NAME, checkpoint=True, failover_timeout=framework_failover_timeout.as_(Time.SECONDS), role=options.framework_role) if fw_principal: framework_info.principal = fw_principal state = Scheduler(framework_info) state_provider.dump_scheduler_state(state) scheduler = MysosScheduler( state, state_provider, options.framework_user, options.executor_uri, options.executor_cmd, kazoo, options.zk_url, election_timeout, options.admin_keypath, installer_args=options.installer_args, backup_store_args=options.backup_store_args, executor_environ=options.executor_environ, framework_role=options.framework_role) if fw_principal and fw_secret: cred = Credential(principal=fw_principal, secret=fw_secret) scheduler_driver = mesos.native.MesosSchedulerDriver( scheduler, framework_info, options.mesos_master, cred) else: scheduler_driver = mesos.native.MesosSchedulerDriver( scheduler, framework_info, options.mesos_master) scheduler_driver.start() server = HttpServer() server.mount_routes(MysosServer(scheduler, web_assets_dir)) et = ExceptionalThread( target=server.run, args=('0.0.0.0', options.api_port, 'cherrypy')) et.daemon = True et.start() try: # Wait for the scheduler to stop. # The use of 'stopped' event instead of scheduler_driver.join() is necessary to stop the # process with SIGINT. while not scheduler.stopped.wait(timeout=0.5): pass except KeyboardInterrupt: log.info('Interrupted, exiting.') else: log.info('Scheduler exited.') app.shutdown(1) # Mysos scheduler is supposed to be long-running thus the use of exit status 1.
def __init__( self, driver, cluster, state_provider, zk_url, kazoo, framework_user, executor_uri, executor_cmd, election_timeout, admin_keypath, scheduler_key, installer_args=None, backup_store_args=None, executor_environ=None, framework_role='*', query_interval=Amount(1, Time.SECONDS)): """ :param driver: Mesos scheduler driver. :param cluster: The MySQLCluster state object. :param state_provider: For restoring and persisting the cluster state. :param zk_url: The ZooKeeper URL for cluster member discovery and master election. :param kazoo: The Kazoo client to access ZooKeeper with. :param executor_uri: See flags. :param executor_cmd: See flags. :param election_timeout: See flags. :param admin_keypath: See flags. :param scheduler_key: Used for encrypting cluster passwords. :param installer_args: See flags. :param backup_store_args: See flags. :param executor_environ: See flags. :param framework_role: See flags. :param query_interval: See MySQLMasterElector. Use the default value for production and allow tests to use a different value. """ self._driver = driver if not isinstance(cluster, MySQLCluster): raise TypeError("'cluster' should be an instance of MySQLCluster") self._cluster = cluster if not isinstance(state_provider, StateProvider): raise TypeError("'state_provider' should be an instance of StateProvider") self._state_provider = state_provider self._framework_role = framework_role # Passed along to executors. self._zk_url = zk_url self._framework_user = framework_user self._executor_uri = executor_uri self._executor_cmd = executor_cmd self._election_timeout = election_timeout self._admin_keypath = admin_keypath self._installer_args = installer_args self._backup_store_args = backup_store_args self._executor_environ = executor_environ # Used by the elector. self._query_interval = query_interval zk_root = zookeeper.parse(zk_url)[2] self._cluster_manager = ClusterManager(kazoo, get_cluster_path(zk_root, cluster.name)) self._password_box = PasswordBox(scheduler_key) self._password_box.decrypt(cluster.encrypted_password) # Validate the password. self._lock = threading.Lock() if self._cluster.master_id: log.info("Republish master %s for cluster %s in case it's not published" % ( self._cluster.master_id, self.cluster_name)) self._cluster_manager.promote_member(self._cluster.master_id) if len(self._cluster.tasks) > 0: log.info("Recovered %s tasks for cluster '%s'" % ( len(self._cluster.tasks), self.cluster_name)) # A recovered launcher should continue the election if the previous one was incomplete when the # scheduler failed over. Mesos will deliver all missed events that affect the election to the # scheduler. if len(self._cluster.running_tasks) > 0 and not self._cluster.master_id: log.info("Restarting election for the recovered launcher") self._elector = self._new_elector() # Add current slaves. for t in self._cluster.running_tasks: self._elector.add_slave(t.task_id, t.mesos_slave_id) self._elector.start() else: # New launcher, the elector is set when the election starts and reset to None when it ends. self._elector = None self._terminating = False
def __init__(self, driver, cluster, state_provider, zk_url, kazoo, framework_user, executor_uri, executor_cmd, election_timeout, admin_keypath, scheduler_key, installer_args=None, backup_store_args=None, executor_environ=None, executor_source_prefix=None, framework_role='*', query_interval=Amount(1, Time.SECONDS)): """ :param driver: Mesos scheduler driver. :param cluster: The MySQLCluster state object. :param state_provider: For restoring and persisting the cluster state. :param zk_url: The ZooKeeper URL for cluster member discovery and master election. :param kazoo: The Kazoo client to access ZooKeeper with. :param executor_uri: See flags. :param executor_cmd: See flags. :param election_timeout: See flags. :param admin_keypath: See flags. :param scheduler_key: Used for encrypting cluster passwords. :param installer_args: See flags. :param backup_store_args: See flags. :param executor_environ: See flags. :param executor_source_prefix: See flags. :param framework_role: See flags. :param query_interval: See MySQLMasterElector. Use the default value for production and allow tests to use a different value. """ self._driver = driver if not isinstance(cluster, MySQLCluster): raise TypeError("'cluster' should be an instance of MySQLCluster") self._cluster = cluster if not isinstance(state_provider, StateProvider): raise TypeError( "'state_provider' should be an instance of StateProvider") self._state_provider = state_provider self._framework_role = framework_role # Passed along to executors. self._zk_url = zk_url self._framework_user = framework_user self._executor_uri = executor_uri self._executor_cmd = executor_cmd self._election_timeout = election_timeout self._admin_keypath = admin_keypath self._installer_args = installer_args self._backup_store_args = backup_store_args self._executor_environ = executor_environ self._executor_source_prefix = executor_source_prefix # Used by the elector. self._query_interval = query_interval zk_root = zookeeper.parse(zk_url)[2] self._cluster_manager = ClusterManager( kazoo, get_cluster_path(zk_root, cluster.name)) self._password_box = PasswordBox(scheduler_key) self._password_box.decrypt( cluster.encrypted_password) # Validate the password. self._lock = threading.Lock() if self._cluster.master_id: log.info( "Republish master %s for cluster %s in case it's not published" % (self._cluster.master_id, self.cluster_name)) self._cluster_manager.promote_member(self._cluster.master_id) if len(self._cluster.tasks) > 0: log.info("Recovered %s tasks for cluster '%s'" % (len(self._cluster.tasks), self.cluster_name)) # A recovered launcher should continue the election if the previous one was incomplete when the # scheduler failed over. Mesos will deliver all missed events that affect the election to the # scheduler. if len(self._cluster.running_tasks ) > 0 and not self._cluster.master_id: log.info("Restarting election for the recovered launcher") self._elector = self._new_elector() # Add current slaves. for t in self._cluster.running_tasks: self._elector.add_slave(t.task_id, t.mesos_slave_id) self._elector.start() else: # New launcher, the elector is set when the election starts and reset to None when it ends. self._elector = None self._terminating = False