Пример #1
0
  def test_launcher_recovery_corrupted_password(self):
    # 1. Launch a single instance for a cluster on the running launcher.
    task_id, remaining = self._launcher.launch(self._offer)
    del self._offer.resources[:]
    self._offer.resources.extend(remaining)
    assert task_id == "mysos-cluster0-0"

    # The task has successfully started.
    status = mesos_pb2.TaskStatus()
    status.state = mesos_pb2.TASK_RUNNING
    status.slave_id.value = self._offer.slave_id.value
    status.task_id.value = "mysos-cluster0-0"
    self._launcher.status_update(status)

    # 2. Recover the launcher.
    self._cluster = self._state_provider.load_cluster_state(self._cluster.name)
    self._cluster.encrypted_password = "******"

    # The corrupted password causes the launcher constructor to fail.
    with pytest.raises(ValueError):
      self._launcher = MySQLClusterLauncher(
          self._driver,
          self._cluster,
          self._state_provider,
          self._zk_url,
          self._zk_client,
          self._framework_user,
          "./executor.pex",
          "cmd.sh",
          Amount(5, Time.SECONDS),
          "/etc/mysos/admin_keyfile.yml",
          self._scheduler_key,
          query_interval=Amount(150, Time.MILLISECONDS))
Пример #2
0
    def test_invalid_status_update(self):
        """Launcher raises an exception when an invalid status is received."""
        self._cluster.num_nodes = 1
        launcher = MySQLClusterLauncher(self._driver, self._cluster,
                                        self._state_provider, self._zk_url,
                                        self._zk_client, self._framework_user,
                                        "./executor.pex", "cmd.sh",
                                        Amount(5, Time.SECONDS),
                                        "/etc/mysos/admin_keyfile.yml")
        self._launchers.append(launcher)

        resources = create_resources(cpus=4, mem=512 * 3, ports=set([10000]))
        self._offer.resources.extend(resources)

        task_id, _ = launcher.launch(self._offer)
        assert task_id == "mysos-cluster0-0"

        tasks = self._driver.method_calls["launchTasks"]
        assert len(tasks) == self._cluster.num_nodes

        status = mesos_pb2.TaskStatus()
        status.task_id.value = task_id
        status.state = mesos_pb2.TASK_RUNNING  # Valid state.
        launcher.status_update(status)

        status.state = mesos_pb2.TASK_FINISHED  # An invalid state.

        with pytest.raises(MySQLClusterLauncher.Error):
            launcher.status_update(status)
Пример #3
0
    def setup(self, request):
        self._driver = FakeDriver()
        self._storage = FakeStorage(SequentialThreadingHandler())
        self._zk_client = FakeClient(storage=self._storage)
        self._zk_client.start()

        self._offer = mesos_pb2.Offer()
        self._offer.id.value = "offer_id_0"
        self._offer.framework_id.value = "framework_id_0"
        self._offer.slave_id.value = "slave_id_0"
        self._offer.hostname = "localhost"

        # Enough memory and ports to fit three tasks.
        resources = create_resources(cpus=4,
                                     mem=512 * 3,
                                     ports=set([10000, 10001, 10002]))
        self._offer.resources.extend(resources)

        self._framework_user = "******"

        # Some tests use the default launcher; some don't.
        self._zk_url = "zk://host/mysos/test"
        self._cluster = MySQLCluster("cluster0", "user", "pass", 3)

        # Construct the state provider based on the test parameter.
        if request.param == LocalStateProvider:
            tmpdir = tempfile.mkdtemp()
            self._state_provider = LocalStateProvider(tmpdir)
            request.addfinalizer(lambda: shutil.rmtree(tmpdir, True)
                                 )  # Clean up after ourselves.
        elif request.param == ZooKeeperStateProvider:
            self._state_provider = ZooKeeperStateProvider(
                self._zk_client, "/mysos/test")

        self._launcher = MySQLClusterLauncher(
            self._driver,
            self._cluster,
            self._state_provider,
            self._zk_url,
            self._zk_client,
            self._framework_user,
            "./executor.pex",
            "cmd.sh",
            Amount(5, Time.SECONDS),
            "/etc/mysos/admin_keyfile.yml",
            query_interval=Amount(150, Time.MILLISECONDS))  # Short interval.

        self._elected = threading.Event()
        self._launchers = [self._launcher]  # See teardown().

        request.addfinalizer(self.teardown)
Пример #4
0
    def test_two_launchers(self):
        """Two launchers share resources and launch their clusters successfully."""
        launchers = [
            MySQLClusterLauncher(self._driver,
                                 MySQLCluster("cluster0", "user0", "pass0", 1),
                                 self._state_provider, self._zk_url,
                                 self._zk_client, self._framework_user,
                                 "./executor.pex", "cmd.sh",
                                 Amount(5, Time.SECONDS),
                                 "/etc/mysos/admin_keyfile.yml"),
            MySQLClusterLauncher(self._driver,
                                 MySQLCluster("cluster1", "user1", "pass1", 2),
                                 self._state_provider, self._zk_url,
                                 self._zk_client, self._framework_user,
                                 "./executor.pex", "cmd.sh",
                                 Amount(5, Time.SECONDS),
                                 "/etc/mysos/admin_keyfile.yml")
        ]
        self._launchers.extend(launchers)

        resources = create_resources(cpus=4,
                                     mem=512 * 3,
                                     ports=set([10000, 10001, 10002]))
        self._offer.resources.extend(resources)

        # Three nodes in total across two clusters.
        # Simulate the scheduler.
        for i in range(3):
            for launcher in launchers:
                task_id, remaining = launcher.launch(self._offer)
                if task_id:
                    # Update the offer so other launchers will use its remaining resources.
                    del self._offer.resources[:]
                    self._offer.resources.extend(remaining)
                    break

        tasks = self._driver.method_calls["launchTasks"]
        assert len(tasks) == 3
Пример #5
0
  def test_terminal_status_update(self):
    """Launcher reacts to terminated task by launching a new one."""
    self._cluster.num_nodes = 1
    launcher = MySQLClusterLauncher(
        self._driver,
        self._cluster,
        self._state_provider,
        self._zk_url,
        self._zk_client,
        self._framework_user,
        "./executor.pex",
        "cmd.sh",
        Amount(1, Time.SECONDS),
        "/etc/mysos/admin_keyfile.yml",
        self._scheduler_key)
    self._launchers.append(launcher)

    resources = create_resources(
        cpus=DEFAULT_TASK_CPUS,
        mem=DEFAULT_TASK_MEM,
        disk=DEFAULT_TASK_DISK,
        ports=set([10000]))
    self._offer.resources.extend(resources)

    task_id, _ = launcher.launch(self._offer)
    assert task_id == "mysos-cluster0-0"

    launched = self._driver.method_calls["launchTasks"]
    assert len(launched) == self._cluster.num_nodes

    status = mesos_pb2.TaskStatus()
    status.task_id.value = task_id
    status.state = mesos_pb2.TASK_RUNNING
    launcher.status_update(status)

    assert len(launcher._cluster.running_tasks) == 1

    status.state = mesos_pb2.TASK_LOST
    launcher.status_update(status)

    assert len(launcher._cluster.running_tasks) == 0

    task_id, _ = launcher.launch(self._offer)
    assert task_id == "mysos-cluster0-1"

    launched = self._driver.method_calls["launchTasks"]

    # One task is relaunched to make up for the lost one.
    assert len(launched) == self._cluster.num_nodes + 1
Пример #6
0
  def test_launcher_recovery_before_election_completed(self):
    # 1. Launch a cluster on the running launcher.
    for i in range(self._cluster.num_nodes):
      task_id, remaining = self._launcher.launch(self._offer)
      del self._offer.resources[:]
      self._offer.resources.extend(remaining)
      assert task_id == "mysos-cluster0-%s" % i

    tasks = self._driver.method_calls["launchTasks"]
    assert len(tasks) == self._cluster.num_nodes

    # No new tasks are launched.
    assert self._launcher.launch(self._offer)[0] is None
    assert len(self._driver.method_calls["launchTasks"]) == self._cluster.num_nodes

    # All 3 nodes have successfully started.
    status = mesos_pb2.TaskStatus()
    status.state = mesos_pb2.TASK_RUNNING
    status.slave_id.value = self._offer.slave_id.value
    for i in range(self._cluster.num_nodes):
      status.task_id.value = "mysos-cluster0-%s" % i
      self._launcher.status_update(status)

    deadline(
        lambda: wait_for_master(
            get_cluster_path(self._zk_url, self._cluster.name),
            self._zk_client),
        Amount(5, Time.SECONDS))

    # The first slave is elected.
    assert "/mysos/test/cluster0/master/member_0000000000" in self._storage.paths
    # Two slaves.
    assert len([x for x in self._storage.paths.keys() if x.startswith(
        "/mysos/test/cluster0/slaves/member_")]) == 2

    # Now fail the master task which leads to re-election.
    status.task_id.value = "mysos-cluster0-0"
    status.state = mesos_pb2.TASK_FAILED
    self._launcher.status_update(status)

    # 2. Recover the launcher.
    self._cluster = self._state_provider.load_cluster_state(self._cluster.name)
    self._launcher = MySQLClusterLauncher(
        self._driver,
        self._cluster,
        self._state_provider,
        self._zk_url,
        self._zk_client,
        self._framework_user,
        "./executor.pex",
        "cmd.sh",
        Amount(5, Time.SECONDS),
        "/etc/mysos/admin_keyfile.yml",
        self._scheduler_key,
        query_interval=Amount(150, Time.MILLISECONDS))

    for i in range(1, self._cluster.num_nodes):
      self._launcher.framework_message(
          "mysos-cluster0-%s" % i,
          self._offer.slave_id.value,
          json.dumps(dict(epoch=2, position=str(i))))

    deadline(
        lambda: wait_for_master(
            get_cluster_path(self._zk_url, self._cluster.name),
            self._zk_client),
        Amount(5, Time.SECONDS))

    # The second slave has the larger position and is elected.
    assert "/mysos/test/cluster0/master/member_0000000002" in self._storage.paths