示例#1
0
    def test_existing_zk(self):
        """
      ClusterManager needs to be able to recover from an existing ZK group for scheduler failover.
    """
        manager = ClusterManager(self.client, "/home/my_cluster")

        instance1 = ServiceInstance(Endpoint("host1", 10000))
        member1 = manager.add_member(instance1)
        instance2 = ServiceInstance(Endpoint("host2", 10000))
        member2 = manager.add_member(instance2)

        assert (self.storage.paths["/home/my_cluster/slaves/member_0000000000"]
                ["data"] == ServiceInstance.pack(instance1))
        assert (self.storage.paths["/home/my_cluster/slaves/member_0000000001"]
                ["data"] == ServiceInstance.pack(instance2))

        manager.promote_member(member1)

        # Test the new ClusterManager.
        manager2 = ClusterManager(self.client, "/home/my_cluster")
        assert len(manager2._cluster.members) == 2
        assert member1 in manager2._cluster.members
        assert member2 in manager2._cluster.members
        assert manager2._cluster.members[member1] == ServiceInstance.pack(
            instance1)
示例#2
0
    def test_promote_member(self):
        manager = ClusterManager(self.client, "/home/my_cluster")
        instance = ServiceInstance(Endpoint("host", 10000))
        member = manager.add_member(instance)

        assert manager.promote_member(member)
        assert not manager.promote_member(
            member)  # The 2nd promotion is a no-op.

        assert (self.storage.paths["/home/my_cluster/master/member_0000000000"]
                ["data"] == ServiceInstance.pack(instance))
示例#3
0
    def test_callbacks(self):
        manager = ClusterManager(self.client, "/home/my_cluster")

        # Set up 2 listeners.
        instance1 = ServiceInstance(Endpoint("host1", 10000))
        handler1 = CallbackHandler()
        listener1 = ClusterListener(self.client, "/home/my_cluster", instance1,
                                    handler1.promotion_callback,
                                    handler1.demotion_callback,
                                    handler1.master_callback,
                                    handler1.termination_callback)
        listener1.start()
        member1 = manager.add_member(instance1)

        instance2 = ServiceInstance(Endpoint("host2", 10000))
        handler2 = CallbackHandler()
        listener2 = ClusterListener(self.client, "/home/my_cluster", instance2,
                                    handler2.promotion_callback,
                                    handler2.demotion_callback,
                                    handler2.master_callback)
        listener2.start()
        member2 = manager.add_member(instance2)

        # Test promotion.
        manager.promote_member(member1)

        assert handler1.promoted.wait(1)
        assert handler2.detected.get(True, 1) == instance1

        assert (self.storage.paths["/home/my_cluster/master/member_0000000000"]
                ["data"] == ServiceInstance.pack(instance1))
        assert (self.storage.paths["/home/my_cluster/slaves/member_0000000001"]
                ["data"] == ServiceInstance.pack(instance2))

        manager.promote_member(member2)

        assert handler1.demoted.wait(1)
        assert handler2.promoted.wait(1)

        assert (self.storage.paths["/home/my_cluster/master/member_0000000001"]
                ["data"] == ServiceInstance.pack(instance2))
        assert "/home/my_cluster/master/member_0000000000" not in self.storage.paths

        manager.remove_member(member2)
        assert handler2.demoted.wait(1)

        # Test removing cluster.
        manager.remove_member(member1)
        manager.delete_cluster()
        assert handler1.terminated.wait(1)
示例#4
0
    def add_member(self, service_instance):
        """
      Add the member to the ZooKeeper group.
      NOTE:
        - New members are slaves until being promoted.
        - A new member is not added if the specified service_instance already exists in the group.
      :return: The member ID for the ServiceInstance generated by ZooKeeper.
    """
        if not isinstance(service_instance, ServiceInstance):
            raise TypeError("'service_instance' should be a ServiceInstance")

        content = ServiceInstance.pack(service_instance)

        for k, v in self._cluster.members.items():
            if content == v:
                log.info(
                    "%s not added because it already exists in the group" %
                    service_instance)
                return k

        znode_path = self._client.create(posixpath.join(
            self._cluster.slaves_group, self._cluster.MEMBER_PREFIX),
                                         content,
                                         sequence=True)
        _, member_id = posixpath.split(znode_path)
        with self._lock:
            self._cluster.members[member_id] = content
            return member_id
示例#5
0
    def __init__(self,
                 client,
                 cluster_path,
                 self_instance=None,
                 promotion_callback=None,
                 demotion_callback=None,
                 master_callback=None,
                 termination_callback=None):
        """
      :param client: Kazoo client.
      :param cluster_path: The path for this cluster on ZooKeeper.
      :param self_instance: The local ServiceInstance associated with this listener.
      :param promotion_callback: Invoked when 'self_instance' is promoted.
      :param demotion_callback: Invoked when 'self_instance' is demoted.
      :param master_callback: Invoked when there is a master change otherwise.
      :param termination_callback: Invoked when the cluster is terminated.
      NOTE: Callbacks are executed synchronously in Kazoo's completion thread to ensure the delivery
            order of events. Blocking the callback method means no future callbacks will be invoked.
    """
        self._client = client
        self._cluster = Cluster(cluster_path)
        self._self_content = ServiceInstance.pack(
            self_instance) if self_instance else None
        self._master = None
        self._master_content = None
        self._promotion_callback = promotion_callback or (lambda: True)
        self._demotion_callback = demotion_callback or (lambda: True)
        self._master_callback = master_callback or (lambda x: True)
        self._termination_callback = termination_callback or (lambda: True)

        self._children_watch = None  # Set when the watcher detects that the master group exists.
示例#6
0
def test_internal_monitor(mock_group_impl_validator, MockActiveKazooGroup):
  mock_zk = mock.Mock(spec=KazooClient)
  mock_group = mock.MagicMock(spec=GroupInterface)
  MockActiveKazooGroup.mock_add_spec(ActiveKazooGroup)
  MockActiveKazooGroup.return_value = mock_group

  # by default it tries to assert that the group impl is a subclass of GroupInterface
  # since the group impl will be a mock, it doesn't pass that check, so we mock the validator
  # as well.
  mock_group_impl_validator.return_value = True

  def devnull(*args, **kwargs): pass

  serverset = ServerSet(
      mock_zk,
      '/some/path/to/group',
      on_join=devnull,
      on_leave=devnull)

  members = [Membership(id) for id in range(2)]

  print("Members are: %s" % members)
  serverset._internal_monitor(frozenset(members))

  for call in mock_group.info.mock_calls:
    _, (_, callback), _ = call
    callback(ServiceInstance.unpack(SERVICE_INSTANCE_JSON))

  assert len(serverset._members) == 2
示例#7
0
文件: cluster.py 项目: GavinHwa/mysos
  def __init__(self,
               client,
               cluster_path,
               self_instance=None,
               promotion_callback=None,
               demotion_callback=None,
               master_callback=None,
               termination_callback=None):
    """
      :param client: Kazoo client.
      :param cluster_path: The path for this cluster on ZooKeeper.
      :param self_instance: The local ServiceInstance associated with this listener.
      :param promotion_callback: Invoked when 'self_instance' is promoted.
      :param demotion_callback: Invoked when 'self_instance' is demoted.
      :param master_callback: Invoked when there is a master change otherwise.
      :param termination_callback: Invoked when the cluster is terminated.
      NOTE: Callbacks are executed synchronously in Kazoo's completion thread to ensure the delivery
            order of events. Blocking the callback method means no future callbacks will be invoked.
    """
    self._client = client
    self._cluster = Cluster(cluster_path)
    self._self_content = ServiceInstance.pack(self_instance) if self_instance else None
    self._master = None
    self._master_content = None
    self._promotion_callback = promotion_callback or (lambda: True)
    self._demotion_callback = demotion_callback or (lambda: True)
    self._master_callback = master_callback or (lambda x: True)
    self._termination_callback = termination_callback or (lambda: True)

    self._children_watch = None  # Set when the watcher detects that the master group exists.
示例#8
0
def test_internal_monitor(mock_group_impl_validator, MockActiveKazooGroup):
    mock_zk = mock.Mock(spec=KazooClient)
    mock_group = mock.MagicMock(spec=GroupInterface)
    MockActiveKazooGroup.mock_add_spec(ActiveKazooGroup)
    MockActiveKazooGroup.return_value = mock_group

    # by default it tries to assert that the group impl is a subclass of GroupInterface
    # since the group impl will be a mock, it doesn't pass that check, so we mock the validator
    # as well.
    mock_group_impl_validator.return_value = True

    def devnull(*args, **kwargs):
        pass

    serverset = ServerSet(mock_zk,
                          '/some/path/to/group',
                          on_join=devnull,
                          on_leave=devnull)

    members = [Membership(id) for id in range(2)]

    print("Members are: %s" % members)
    serverset._internal_monitor(frozenset(members))

    for call in mock_group.info.mock_calls:
        _, (_, callback), _ = call
        callback(ServiceInstance.unpack(SERVICE_INSTANCE_JSON))

    assert len(serverset._members) == 2
示例#9
0
文件: cluster.py 项目: GavinHwa/mysos
  def add_member(self, service_instance):
    """
      Add the member to the ZooKeeper group.
      NOTE:
        - New members are slaves until being promoted.
        - A new member is not added if the specified service_instance already exists in the group.
      :return: The member ID for the ServiceInstance generated by ZooKeeper.
    """
    if not isinstance(service_instance, ServiceInstance):
      raise TypeError("'service_instance' should be a ServiceInstance")

    content = ServiceInstance.pack(service_instance)

    for k, v in self._cluster.members.items():
      if content == v:
        log.info("%s not added because it already exists in the group" % service_instance)
        return k

    znode_path = self._client.create(
        posixpath.join(self._cluster.slaves_group, self._cluster.MEMBER_PREFIX),
        content,
        sequence=True)
    _, member_id = posixpath.split(znode_path)
    with self._lock:
      self._cluster.members[member_id] = content
      return member_id
  def test_demote(self):
    task_control = FakeTaskControl()
    runner = MysosTaskRunner(
        self._self_instance,
        self._client,
        "/home/test/my_cluster",
        NoopPackageInstaller(),
        task_control,
        self._state_manager)

    manager = ClusterManager(self._client, "/home/test/my_cluster")
    runner.start()

    self_member = manager.add_member(self._self_instance)

    # 'self_instance' becomes the master.
    manager.promote_member(self_member)

    runner.promoted.wait(1)

    another_member = manager.add_member(ServiceInstance(Endpoint("another_host", 10000)))

    # This demotes 'self_instance', which should cause runner to stop.
    manager.promote_member(another_member)

    assert deadline(runner.join, Amount(1, Time.SECONDS))
    def test_existing_zk(self):
        """
      ClusterManager needs to be able to recover from an existing ZK group for scheduler failover.
    """
        manager = ClusterManager(self.client, "/home/my_cluster")

        instance1 = ServiceInstance(Endpoint("host1", 10000))
        member1 = manager.add_member(instance1)
        instance2 = ServiceInstance(Endpoint("host2", 10000))
        member2 = manager.add_member(instance2)

        assert self.storage.paths["/home/my_cluster/slaves/member_0000000000"]["data"] == ServiceInstance.pack(
            instance1
        )
        assert self.storage.paths["/home/my_cluster/slaves/member_0000000001"]["data"] == ServiceInstance.pack(
            instance2
        )

        manager.promote_member(member1)

        # Test the new ClusterManager.
        manager2 = ClusterManager(self.client, "/home/my_cluster")
        assert len(manager2._cluster.members) == 2
        assert member1 in manager2._cluster.members
        assert member2 in manager2._cluster.members
        assert manager2._cluster.members[member1] == ServiceInstance.pack(instance1)
示例#12
0
    def test_add_member(self):
        manager = ClusterManager(self.client, "/home/my_cluster")

        instance1 = ServiceInstance(Endpoint("host1", 10000))
        member1 = manager.add_member(instance1)
        assert member1 == manager.add_member(
            instance1)  # Second insertion is ignored.

        instance2 = ServiceInstance(Endpoint("host2", 10000))
        manager.add_member(instance2)

        assert len(manager._cluster.members) == 2

        assert (self.storage.paths["/home/my_cluster/slaves/member_0000000000"]
                ["data"] == ServiceInstance.pack(instance1))
        assert (self.storage.paths["/home/my_cluster/slaves/member_0000000001"]
                ["data"] == ServiceInstance.pack(instance2))
示例#13
0
    def test_remove_cluster(self):
        manager = ClusterManager(self.client, "/home/my_cluster")

        instance1 = ServiceInstance(Endpoint("host1", 10000))
        member1 = manager.add_member(instance1)
        instance2 = ServiceInstance(Endpoint("host2", 10000))
        member2 = manager.add_member(instance2)

        manager.promote_member(member1)

        with pytest.raises(ClusterManager.Error):
            manager.delete_cluster()

        manager.remove_member(member1)
        manager.remove_member(member2)
        manager.delete_cluster()

        assert "/home/my_cluster" not in self.storage.paths
示例#14
0
    def test_remove_member(self):
        manager = ClusterManager(self.client, "/home/my_cluster")
        instance = ServiceInstance(Endpoint("host", 10000))
        member = manager.add_member(instance)

        assert manager.remove_member(member)
        assert not manager.remove_member(
            member)  # The second deletion is ignored.

        assert "/home/my_cluster/master/member_0000000000" not in self.storage.paths
示例#15
0
def test_service_instance_to_json():
    json = """{
    "additionalEndpoints": {
        "aurora": {
            "host": "hostname",
            "inet6": "2001:db8:1234:ffff:ffff:ffff:ffff:ffff",
            "port": 22
        },
        "health": {
            "host": "hostname",
            "inet": "1.2.3.4",
            "port": 23
        },
        "http": {
            "host": "hostname",
            "inet": "1.2.3.4",
            "inet6": "2001:db8:1234:ffff:ffff:ffff:ffff:ffff",
            "port": 23
        }
    },
    "serviceEndpoint": {
        "host": "hostname",
        "port": 24
    },
    "shard": 1,
    "status": "ALIVE"
  }"""
    service_instance = ServiceInstance(
        Endpoint("hostname", 24), {
            "aurora":
            Endpoint("hostname", 22, "1.2.3.4"),
            "health":
            Endpoint("hostname", 23, None,
                     "2001:db8:1234:ffff:ffff:ffff:ffff:ffff"),
            "http":
            Endpoint("hostname", 23, "1.2.3.4",
                     "2001:db8:1234:ffff:ffff:ffff:ffff:ffff"),
        }, 'ALIVE', 1)

    assert ServiceInstance.unpack(json) == service_instance
    assert ServiceInstance.unpack(
        ServiceInstance.pack(service_instance)) == service_instance
示例#16
0
def test_url_when_not_connected_and_cluster_has_no_proxy_url(scheme):
    host = 'some-host.example.com'
    port = 31181

    mock_zk = mock.create_autospec(spec=TwitterKazooClient, instance=True)

    service_json = '''{
    "additionalEndpoints": {
        "%(scheme)s": {
            "host": "%(host)s",
            "port": %(port)d
        }
    },
    "serviceEndpoint": {
        "host": "%(host)s",
        "port": %(port)d
    },
    "shard": 0,
    "status": "ALIVE"
  }''' % dict(host=host, port=port, scheme=scheme)

    service_endpoints = [ServiceInstance.unpack(service_json)]

    def make_mock_client(proxy_url):
        client = scheduler_client.ZookeeperSchedulerClient(
            Cluster(proxy_url=proxy_url),
            auth=None,
            user_agent='Some-User-Agent',
            _deadline=lambda x, **kws: x())
        client.get_scheduler_serverset = mock.MagicMock(
            return_value=(mock_zk, service_endpoints))
        client.SERVERSET_TIMEOUT = Amount(0, Time.SECONDS)
        client._connect_scheduler = mock.MagicMock()
        return client

    client = make_mock_client(proxy_url=None)
    assert client.url == '%s://%s:%d' % (scheme, host, port)
    assert client.url == client.raw_url
    client._connect_scheduler.assert_has_calls([])

    client = make_mock_client(proxy_url='https://scheduler.proxy')
    assert client.url == 'https://scheduler.proxy'
    assert client.raw_url == '%s://%s:%d' % (scheme, host, port)
    client._connect_scheduler.assert_has_calls([])

    client = make_mock_client(proxy_url=None)
    client.get_thrift_client()
    assert client.url == '%s://%s:%d' % (scheme, host, port)
    client._connect_scheduler.assert_has_calls(
        [mock.call('%s://%s:%d/api' % (scheme, host, port))])
    client._connect_scheduler.reset_mock()
    client.get_thrift_client()
    client._connect_scheduler.assert_has_calls([])
示例#17
0
def test_service_instance_to_json():
  json = """{
    "additionalEndpoints": {
        "aurora": {
            "host": "hostname",
            "inet6": "2001:db8:1234:ffff:ffff:ffff:ffff:ffff",
            "port": 22
        },
        "health": {
            "host": "hostname",
            "inet": "1.2.3.4",
            "port": 23
        },
        "http": {
            "host": "hostname",
            "inet": "1.2.3.4",
            "inet6": "2001:db8:1234:ffff:ffff:ffff:ffff:ffff",
            "port": 23
        }
    },
    "serviceEndpoint": {
        "host": "hostname",
        "port": 24
    },
    "shard": 1,
    "status": "ALIVE"
  }"""
  service_instance = ServiceInstance(
    Endpoint("hostname", 24),
    {"aurora": Endpoint("hostname", 22, "1.2.3.4"),
     "health": Endpoint("hostname", 23, None, "2001:db8:1234:ffff:ffff:ffff:ffff:ffff"),
     "http": Endpoint("hostname", 23, "1.2.3.4", "2001:db8:1234:ffff:ffff:ffff:ffff:ffff"),
   },
    'ALIVE',
    1
  )

  assert ServiceInstance.unpack(json) == service_instance
  assert ServiceInstance.unpack(ServiceInstance.pack(service_instance)) == service_instance
示例#18
0
def test_url_when_not_connected_and_cluster_has_no_proxy_url(scheme):
  host = 'some-host.example.com'
  port = 31181

  mock_zk = mock.create_autospec(spec=TwitterKazooClient, instance=True)

  service_json = '''{
    "additionalEndpoints": {
        "%(scheme)s": {
            "host": "%(host)s",
            "port": %(port)d
        }
    },
    "serviceEndpoint": {
        "host": "%(host)s",
        "port": %(port)d
    },
    "shard": 0,
    "status": "ALIVE"
  }''' % dict(host=host, port=port, scheme=scheme)

  service_endpoints = [ServiceInstance.unpack(service_json)]

  def make_mock_client(proxy_url):
    client = scheduler_client.ZookeeperSchedulerClient(
        Cluster(proxy_url=proxy_url),
        auth=None,
        user_agent='Some-User-Agent',
        _deadline=lambda x, **kws: x())
    client.get_scheduler_serverset = mock.MagicMock(return_value=(mock_zk, service_endpoints))
    client.SERVERSET_TIMEOUT = Amount(0, Time.SECONDS)
    client._connect_scheduler = mock.MagicMock()
    return client

  client = make_mock_client(proxy_url=None)
  assert client.url == '%s://%s:%d' % (scheme, host, port)
  assert client.url == client.raw_url
  client._connect_scheduler.assert_has_calls([])

  client = make_mock_client(proxy_url='https://scheduler.proxy')
  assert client.url == 'https://scheduler.proxy'
  assert client.raw_url == '%s://%s:%d' % (scheme, host, port)
  client._connect_scheduler.assert_has_calls([])

  client = make_mock_client(proxy_url=None)
  client.get_thrift_client()
  assert client.url == '%s://%s:%d' % (scheme, host, port)
  client._connect_scheduler.assert_has_calls([mock.call('%s://%s:%d/api' % (scheme, host, port))])
  client._connect_scheduler.reset_mock()
  client.get_thrift_client()
  client._connect_scheduler.assert_has_calls([])
示例#19
0
文件: cluster.py 项目: GavinHwa/mysos
  def _swap(self, master, master_content):
    i_was_master = self._self_content and self._master_content == self._self_content
    self._master, self._master_content = master, master_content
    i_am_master = self._self_content and self._master_content == self._self_content

    # Invoke callbacks accordingly.
    # NOTE: No callbacks are invoked if there is currently no master and 'self_instance' wasn't the
    # master.
    if i_was_master and not i_am_master:
      self._demotion_callback()
    elif not i_was_master and i_am_master:
      self._promotion_callback()
    elif not i_was_master and not i_am_master and master:
      assert master_content
      self._master_callback(ServiceInstance.unpack(master_content))
示例#20
0
    def _swap(self, master, master_content):
        i_was_master = self._self_content and self._master_content == self._self_content
        self._master, self._master_content = master, master_content
        i_am_master = self._self_content and self._master_content == self._self_content

        # Invoke callbacks accordingly.
        # NOTE: No callbacks are invoked if there is currently no master and 'self_instance' wasn't the
        # master.
        if i_was_master and not i_am_master:
            self._demotion_callback()
        elif not i_was_master and i_am_master:
            self._promotion_callback()
        elif not i_was_master and not i_am_master and master:
            assert master_content
            self._master_callback(ServiceInstance.unpack(master_content))
示例#21
0
    def test_invalid_znode(self):
        instance1 = ServiceInstance(Endpoint("host1", 10000))
        handler1 = CallbackHandler()
        listener1 = ClusterListener(self.client, "/home/my_cluster", instance1,
                                    handler1.promotion_callback,
                                    handler1.demotion_callback,
                                    handler1.master_callback)
        listener1.start()

        self.client.ensure_path("/home/my_cluster/master")
        self.client.create("/home/my_cluster/master/member_",
                           "Invalid Data",
                           sequence=True)

        # Invalid ZNode data translates into a 'None' return.
        assert handler1.detected.get(True, 1) is None
示例#22
0
    def from_task(self, task, sandbox):
        data = json.loads(task.data)
        cluster_name, port, zk_url = data['cluster'], data['port'], data[
            'zk_url']

        _, servers, path = zookeeper.parse(zk_url)

        zk_client = FakeClient()
        zk_client.start()
        self_instance = ServiceInstance(
            Endpoint(socket.gethostbyname(socket.gethostname()), port))
        task_control = self._task_control_provider.from_task(task, sandbox)

        return MysosTaskRunner(self_instance, zk_client,
                               posixpath.join(path, cluster_name),
                               NoopPackageInstaller(), task_control, Fake())
    def mock_get_serverset(*args, **kwargs):
      service_json = '''{
        "additionalEndpoints": {
            "http": {
                "host": "%s",
                "port": %d
            }
        },
        "serviceEndpoint": {
            "host": "%s",
            "port": %d
        },
        "shard": 0,
        "status": "ALIVE"
    }''' % (host, port, host, port)

      return mock_zk, [ServiceInstance.unpack(service_json)]
示例#24
0
def _service_instance(vals):
    json = '''{
    "additionalEndpoints": {
        "aurora": {
            "host": "smfd-akb-%d-sr1.devel.twitter.com",
            "port": 31181
        },
        "health": {
            "host": "smfd-akb-%d-sr1.devel.twitter.com",
            "port": 31181
        }
    },
    "serviceEndpoint": {
        "host": "smfd-akb-%d-sr1.devel.twitter.com",
        "port": 31181
    },
    "shard": %d,
    "status": "ALIVE"
}''' % vals

    return ServiceInstance.unpack(json)
示例#25
0
    def from_task(self, task, sandbox):
        data = json.loads(task.data)
        cluster_name, host, port, zk_url = data['cluster'], data['host'], data[
            'port'], data['zk_url']
        _, servers, path = parse(zk_url)
        kazoo = KazooClient(servers)
        kazoo.start()
        self_instance = ServiceInstance(Endpoint(host, port))

        try:
            task_control = self._task_control_provider.from_task(task, sandbox)
            installer = self._installer_provider.from_task(task, sandbox)
            backup_store = self._backup_store_provider.from_task(task, sandbox)
        except (TaskControl.Error, PackageInstaller.Error) as e:
            raise TaskError(e.message)

        state_manager = StateManager(sandbox, backup_store)

        return MysosTaskRunner(self_instance, kazoo,
                               get_cluster_path(path, cluster_name), installer,
                               task_control, state_manager)
示例#26
0
def _service_instance(vals):
  json = '''{
    "additionalEndpoints": {
        "aurora": {
            "host": "smfd-akb-%d-sr1.devel.twitter.com",
            "port": 31181
        },
        "health": {
            "host": "smfd-akb-%d-sr1.devel.twitter.com",
            "port": 31181
        }
    },
    "serviceEndpoint": {
        "host": "smfd-akb-%d-sr1.devel.twitter.com",
        "port": 31181
    },
    "shard": %d,
    "status": "ALIVE"
}''' % vals

  return ServiceInstance.unpack(json)
  def test_reparent(self):
    task_control = FakeTaskControl()
    runner = MysosTaskRunner(
        self._self_instance,
        self._client,
        "/home/test/my_cluster",
        NoopPackageInstaller(),
        task_control,
        self._state_manager)

    manager = ClusterManager(self._client, "/home/test/my_cluster")
    runner.start()

    # Promote another instance.
    master = ServiceInstance(Endpoint("another_host", 10000))
    another_member = manager.add_member(master)
    manager.promote_member(another_member)

    assert runner.master.get(True, 1) == master

    assert runner.stop()
    assert deadline(runner.join, Amount(1, Time.SECONDS))
    def test_promote_member(self):
        manager = ClusterManager(self.client, "/home/my_cluster")
        instance = ServiceInstance(Endpoint("host", 10000))
        member = manager.add_member(instance)

        assert manager.promote_member(member)
        assert not manager.promote_member(member)  # The 2nd promotion is a no-op.

        assert self.storage.paths["/home/my_cluster/master/member_0000000000"]["data"] == ServiceInstance.pack(instance)
    def test_add_member(self):
        manager = ClusterManager(self.client, "/home/my_cluster")

        instance1 = ServiceInstance(Endpoint("host1", 10000))
        member1 = manager.add_member(instance1)
        assert member1 == manager.add_member(instance1)  # Second insertion is ignored.

        instance2 = ServiceInstance(Endpoint("host2", 10000))
        manager.add_member(instance2)

        assert len(manager._cluster.members) == 2

        assert self.storage.paths["/home/my_cluster/slaves/member_0000000000"]["data"] == ServiceInstance.pack(
            instance1
        )
        assert self.storage.paths["/home/my_cluster/slaves/member_0000000001"]["data"] == ServiceInstance.pack(
            instance2
        )
    def test_callbacks(self):
        manager = ClusterManager(self.client, "/home/my_cluster")

        # Set up 2 listeners.
        instance1 = ServiceInstance(Endpoint("host1", 10000))
        handler1 = CallbackHandler()
        listener1 = ClusterListener(
            self.client,
            "/home/my_cluster",
            instance1,
            handler1.promotion_callback,
            handler1.demotion_callback,
            handler1.master_callback,
            handler1.termination_callback,
        )
        listener1.start()
        member1 = manager.add_member(instance1)

        instance2 = ServiceInstance(Endpoint("host2", 10000))
        handler2 = CallbackHandler()
        listener2 = ClusterListener(
            self.client,
            "/home/my_cluster",
            instance2,
            handler2.promotion_callback,
            handler2.demotion_callback,
            handler2.master_callback,
        )
        listener2.start()
        member2 = manager.add_member(instance2)

        # Test promotion.
        manager.promote_member(member1)

        assert handler1.promoted.wait(1)
        assert handler2.detected.get(True, 1) == instance1

        assert self.storage.paths["/home/my_cluster/master/member_0000000000"]["data"] == ServiceInstance.pack(
            instance1
        )
        assert self.storage.paths["/home/my_cluster/slaves/member_0000000001"]["data"] == ServiceInstance.pack(
            instance2
        )

        manager.promote_member(member2)

        assert handler1.demoted.wait(1)
        assert handler2.promoted.wait(1)

        assert self.storage.paths["/home/my_cluster/master/member_0000000001"]["data"] == ServiceInstance.pack(
            instance2
        )
        assert "/home/my_cluster/master/member_0000000000" not in self.storage.paths

        manager.remove_member(member2)
        assert handler2.demoted.wait(1)

        # Test removing cluster.
        manager.remove_member(member1)
        manager.delete_cluster()
        assert handler1.terminated.wait(1)
 def setUp(self):
   self._storage = FakeStorage(SequentialThreadingHandler())
   self._client = FakeClient(storage=self._storage)
   self._client.start()
   self._self_instance = ServiceInstance(Endpoint("host", 10000))
   self._state_manager = FakeStateManager()
示例#32
0
    def status_update(self, status):
        """
      Handle the status update for a task of this cluster.

      NOTE:
        Duplicate status updates may be handled by either the same scheduler instance or a new
        instance with the restored state.
    """
        with self._lock:
            task_id = status.task_id.value

            if task_id not in self._cluster.tasks:
                log.warn("Ignoring status update for unknown task %s" %
                         task_id)
                return

            task = self._cluster.tasks[task_id]
            previous_state = task.state

            # We don't want to ignore a duplicate update if the previous one was not successfully handled.
            # Therefore, we should not checkpoint the status change until we have finished all operations.
            if previous_state == status.state:
                log.info('Ignoring duplicate status update %s for task %s' %
                         (mesos_pb2.TaskState.Name(status.state), task_id))
                return

            if is_terminal(previous_state):
                log.info(
                    'Ignoring status update %s for task %s as it is in terminal state %s'
                    % (mesos_pb2.TaskState.Name(status.state), task_id,
                       mesos_pb2.TaskState.Name(previous_state)))
                return

            log.info('Updating state of task %s of cluster %s from %s to %s' %
                     (status.task_id.value, self.cluster_name,
                      mesos_pb2.TaskState.Name(previous_state),
                      mesos_pb2.TaskState.Name(status.state)))
            task.state = status.state

            if status.state == mesos_pb2.TASK_RUNNING:
                # Register this cluster member.
                endpoint = Endpoint(self._cluster.tasks[task_id].hostname,
                                    self._cluster.tasks[task_id].port)

                # If the scheduler fails over after ZK is updated but before the state change is
                # checkpointed, it will receive the same status update again and try to publish a duplicate
                # member to ZK. ClusterManager.add_member() is idempotent and doesn't update ZK in this
                # case.
                member_id = self._cluster_manager.add_member(
                    ServiceInstance(endpoint))
                log.info('Added %s (member id=%s) to cluster %s' %
                         (endpoint, member_id, self.cluster_name))
                self._cluster.members[task_id] = member_id

                # Checkpoint the status update here. It's OK if the elector fails to launch later because
                # the new scheduler instance will retry based on the fact that there are running instances
                # of the cluster but no master.
                self._state_provider.dump_cluster_state(self._cluster)

                # If MySQL master is already elected for this cluster don't bother adding it to the elector.
                if self._cluster.master_id:
                    log.info(
                        "MySQL slave task %s on %s started after a master is already elected for cluster %s"
                        % (task_id, endpoint.host, self.cluster_name))
                    return

                if not self._elector:
                    self._elector = self._new_elector()
                    # Add current slaves.
                    for t in self._cluster.running_tasks:
                        self._elector.add_slave(t.task_id, t.mesos_slave_id)
                    self._elector.start()
                else:
                    self._elector.add_slave(task_id, status.slave_id.value)
            elif status.state == mesos_pb2.TASK_FINISHED:
                raise self.Error(
                    "Task %s is in unexpected state %s with message '%s'" %
                    (status.task_id.value,
                     mesos_pb2.TaskState.Name(status.state), status.message))
            elif is_terminal(status.state):
                if status.state == mesos_pb2.TASK_KILLED:
                    log.info("Task %s was successfully killed" %
                             status.task_id.value)
                else:
                    log.error(
                        "Task %s is now in terminal state %s with message '%s'"
                        % (status.task_id.value,
                           mesos_pb2.TaskState.Name(
                               status.state), status.message))
                del self._cluster.tasks[task_id]

                if task_id in self._cluster.members:
                    member_id = self._cluster.members[task_id]
                    del self._cluster.members[task_id]

                    # If the scheduler fails over after ZK is updated but before its result is persisted, it
                    # will receive the same status update and try to remove the non-existent member.
                    # Removing a non-existent member is a no-op for ClusterManager.remove_member().
                    # Note that if the order is reversed, the scheduler will fail to clean up the orphan ZK
                    # entry.
                    self._cluster_manager.remove_member(member_id)

                    if member_id == self._cluster.master_id:
                        self._cluster.master_id = None
                        log.info(
                            "Master of cluster %s has terminated. Restarting election"
                            % self.cluster_name)

                        assert not self._elector, "Election must not be running since there is a current master"
                        self._elector = self._new_elector()

                        # Add current slaves after removing the terminated task.
                        for t in self._cluster.running_tasks:
                            self._elector.add_slave(t.task_id,
                                                    t.mesos_slave_id)
                        self._elector.start()
                    else:
                        # It will be rescheduled next time the launcher is given an offer.
                        log.info("Slave %s of cluster %s has terminated" %
                                 (task_id, self.cluster_name))
                else:
                    assert previous_state != mesos_pb2.TASK_RUNNING, (
                        "Task must exist in ClusterManager if it was running")
                    log.warn("Slave %s of cluster %s failed to start running" %
                             (task_id, self.cluster_name))

                if self.terminated:
                    log.info("Shutting down launcher for cluster %s" %
                             self.cluster_name)
                    self._shutdown()
                    return

                # Finally, checkpoint the status update.
                self._state_provider.dump_cluster_state(self._cluster)
                log.info(
                    "Checkpointed the status update for task %s of cluster %s"
                    % (task_id, self.cluster_name))