示例#1
0
    def setUp(self):
        super(TestEphemeralLifecycle, self).setUp()

        EphemeralBuilderManager.EXECUTORS["test"] = self._create_mock_executor

        self.register_component_callback = Mock()
        self.unregister_component_callback = Mock()
        self.job_heartbeat_callback = Mock()
        self.job_complete_callback = AsyncWrapper(Mock())

        self.manager = EphemeralBuilderManager(
            self.register_component_callback,
            self.unregister_component_callback,
            self.job_heartbeat_callback,
            self.job_complete_callback,
            "127.0.0.1",
            30,
        )

        self.manager.initialize({
            "EXECUTOR": "test",
            "ORCHESTRATOR": {
                "MEM_CONFIG": None
            },
        })

        # Ensure that that the realm and building callbacks have been registered
        callback_keys = [key for key in self.manager._orchestrator.callbacks]
        self.assertIn(REALM_PREFIX, callback_keys)
        self.assertIn(JOB_PREFIX, callback_keys)

        self.mock_job = self._create_build_job()
        self.mock_job_key = slash_join("building", BUILD_UUID)
示例#2
0
    def __init__(self, build_logs, repository_build_uuid):
        self._current_phase = None
        self._current_command = None
        self._uuid = repository_build_uuid
        self._build_logs = AsyncWrapper(build_logs)
        self._sync_build_logs = build_logs
        self._build_model = AsyncWrapper(model.build)

        self._status = {
            'total_commands': 0,
            'current_command': None,
            'push_completion': 0.0,
            'pull_completion': 0.0,
        }

        # Write the initial status.
        self.__exit__(None, None, None)
示例#3
0
文件: executor.py 项目: zhill/quay
 def _get_conn(self):
     """ Creates an ec2 connection which can be used to manage instances. """
     return AsyncWrapper(
         boto.ec2.connect_to_region(
             self.executor_config["EC2_REGION"],
             aws_access_key_id=self.executor_config["AWS_ACCESS_KEY"],
             aws_secret_access_key=self.executor_config["AWS_SECRET_KEY"],
         ))
示例#4
0
class TestEphemeralLifecycle(EphemeralBuilderTestCase):
    """ Tests the various lifecycles of the ephemeral builder and its interaction with etcd. """
    def __init__(self, *args, **kwargs):
        super(TestEphemeralLifecycle, self).__init__(*args, **kwargs)
        self.etcd_client_mock = None
        self.test_executor = None

    def _create_completed_future(self, result=None):
        def inner(*args, **kwargs):
            new_future = Future()
            new_future.set_result(result)
            return new_future

        return inner

    def _create_mock_executor(self, *args, **kwargs):
        self.test_executor = Mock(spec=BuilderExecutor)
        self.test_executor.start_builder = Mock(
            side_effect=self._create_completed_future("123"))
        self.test_executor.stop_builder = Mock(
            side_effect=self._create_completed_future())
        self.test_executor.setup_time = 60
        self.test_executor.name = "MockExecutor"
        self.test_executor.minimum_retry_threshold = 0
        return self.test_executor

    def setUp(self):
        super(TestEphemeralLifecycle, self).setUp()

        EphemeralBuilderManager.EXECUTORS["test"] = self._create_mock_executor

        self.register_component_callback = Mock()
        self.unregister_component_callback = Mock()
        self.job_heartbeat_callback = Mock()
        self.job_complete_callback = AsyncWrapper(Mock())

        self.manager = EphemeralBuilderManager(
            self.register_component_callback,
            self.unregister_component_callback,
            self.job_heartbeat_callback,
            self.job_complete_callback,
            "127.0.0.1",
            30,
        )

        self.manager.initialize({
            "EXECUTOR": "test",
            "ORCHESTRATOR": {
                "MEM_CONFIG": None
            },
        })

        # Ensure that that the realm and building callbacks have been registered
        callback_keys = [key for key in self.manager._orchestrator.callbacks]
        self.assertIn(REALM_PREFIX, callback_keys)
        self.assertIn(JOB_PREFIX, callback_keys)

        self.mock_job = self._create_build_job()
        self.mock_job_key = slash_join("building", BUILD_UUID)

    def tearDown(self):
        super(TestEphemeralLifecycle, self).tearDown()
        self.manager.shutdown()

    @coroutine
    def _setup_job_for_managers(self):
        test_component = Mock(spec=BuildComponent)
        test_component.builder_realm = REALM_ID
        test_component.start_build = Mock(
            side_effect=self._create_completed_future())
        self.register_component_callback.return_value = test_component

        is_scheduled = yield From(self.manager.schedule(self.mock_job))
        self.assertTrue(is_scheduled)
        self.assertEqual(self.test_executor.start_builder.call_count, 1)

        # Ensure that that the job, realm, and metric callbacks have been registered
        callback_keys = [key for key in self.manager._orchestrator.callbacks]
        self.assertIn(self.mock_job_key, self.manager._orchestrator.state)
        self.assertIn(REALM_PREFIX, callback_keys)
        # TODO: assert metric key has been set

        realm_for_build = self._find_realm_key(self.manager._orchestrator,
                                               BUILD_UUID)

        raw_realm_data = yield From(
            self.manager._orchestrator.get_key(
                slash_join("realm", realm_for_build)))
        realm_data = json.loads(raw_realm_data)
        realm_data["realm"] = REALM_ID

        # Right now the job is not registered with any managers because etcd has not accepted the job
        self.assertEqual(self.register_component_callback.call_count, 0)

        # Fire off a realm changed with the same data.
        yield From(
            self.manager._realm_callback(
                KeyChange(KeyEvent.CREATE, slash_join(REALM_PREFIX, REALM_ID),
                          json.dumps(realm_data))))

        # Ensure that we have at least one component node.
        self.assertEqual(self.register_component_callback.call_count, 1)
        self.assertEqual(1, self.manager.num_workers())

        # Ensure that the build info exists.
        self.assertIsNotNone(self.manager._build_uuid_to_info.get(BUILD_UUID))

        raise Return(test_component)

    @staticmethod
    def _find_realm_key(orchestrator, build_uuid):
        for key, value in iteritems(orchestrator.state):
            if key.startswith(REALM_PREFIX):
                parsed_value = json.loads(value)
                body = json.loads(parsed_value["job_queue_item"]["body"])
                if body["build_uuid"] == build_uuid:
                    return parsed_value["realm"]
                continue
        raise KeyError

    @async_test
    def test_schedule_and_complete(self):
        # Test that a job is properly registered with all of the managers
        test_component = yield From(self._setup_job_for_managers())

        # Take the job ourselves
        yield From(self.manager.build_component_ready(test_component))

        self.assertIsNotNone(self.manager._build_uuid_to_info.get(BUILD_UUID))

        # Finish the job
        yield From(
            self.manager.job_completed(self.mock_job, BuildJobResult.COMPLETE,
                                       test_component))

        # Ensure that the executor kills the job.
        self.assertEqual(self.test_executor.stop_builder.call_count, 1)

        # Ensure the build information is cleaned up.
        self.assertIsNone(self.manager._build_uuid_to_info.get(BUILD_UUID))
        self.assertEqual(0, self.manager.num_workers())

    @async_test
    def test_another_manager_takes_job(self):
        # Prepare a job to be taken by another manager
        test_component = yield From(self._setup_job_for_managers())

        yield From(
            self.manager._realm_callback(
                KeyChange(
                    KeyEvent.DELETE,
                    slash_join(REALM_PREFIX, REALM_ID),
                    json.dumps({
                        "realm": REALM_ID,
                        "token": "beef",
                        "execution_id": "123",
                        "job_queue_item": self.mock_job.job_item,
                    }),
                )))

        self.unregister_component_callback.assert_called_once_with(
            test_component)

        # Ensure that the executor does not kill the job.
        self.assertEqual(self.test_executor.stop_builder.call_count, 0)

        # Ensure that we still have the build info, but not the component.
        self.assertEqual(0, self.manager.num_workers())
        self.assertIsNotNone(self.manager._build_uuid_to_info.get(BUILD_UUID))

        # Delete the job once it has "completed".
        yield From(
            self.manager._job_callback(
                KeyChange(
                    KeyEvent.DELETE,
                    self.mock_job_key,
                    json.dumps({
                        "had_heartbeat": False,
                        "job_queue_item": self.mock_job.job_item
                    }),
                )))

        # Ensure the job was removed from the info, but stop was not called.
        self.assertIsNone(self.manager._build_uuid_to_info.get(BUILD_UUID))
        self.assertEqual(self.test_executor.stop_builder.call_count, 0)

    @async_test
    def test_job_started_by_other_manager(self):
        # Ensure that that the building callbacks have been registered
        callback_keys = [key for key in self.manager._orchestrator.callbacks]
        self.assertIn(JOB_PREFIX, callback_keys)

        # Send a signal to the callback that the job has been created.
        yield From(
            self.manager._job_callback(
                KeyChange(
                    KeyEvent.CREATE,
                    self.mock_job_key,
                    json.dumps({
                        "had_heartbeat": False,
                        "job_queue_item": self.mock_job.job_item
                    }),
                )))

        # Ensure the create does nothing.
        self.assertEqual(self.test_executor.stop_builder.call_count, 0)

    @async_test
    def test_expiring_worker_not_started(self):
        # Ensure that that the building callbacks have been registered
        callback_keys = [key for key in self.manager._orchestrator.callbacks]
        self.assertIn(JOB_PREFIX, callback_keys)

        # Send a signal to the callback that a worker has expired
        yield From(
            self.manager._job_callback(
                KeyChange(
                    KeyEvent.EXPIRE,
                    self.mock_job_key,
                    json.dumps({
                        "had_heartbeat": True,
                        "job_queue_item": self.mock_job.job_item
                    }),
                )))

        # Since the realm was never registered, expiration should do nothing.
        self.assertEqual(self.test_executor.stop_builder.call_count, 0)

    @async_test
    def test_expiring_worker_started(self):
        test_component = yield From(self._setup_job_for_managers())

        # Ensure that that the building callbacks have been registered
        callback_keys = [key for key in self.manager._orchestrator.callbacks]
        self.assertIn(JOB_PREFIX, callback_keys)

        yield From(
            self.manager._job_callback(
                KeyChange(
                    KeyEvent.EXPIRE,
                    self.mock_job_key,
                    json.dumps({
                        "had_heartbeat": True,
                        "job_queue_item": self.mock_job.job_item
                    }),
                )))

        self.test_executor.stop_builder.assert_called_once_with("123")
        self.assertEqual(self.test_executor.stop_builder.call_count, 1)

    @async_test
    def test_buildjob_deleted(self):
        test_component = yield From(self._setup_job_for_managers())

        # Ensure that that the building callbacks have been registered
        callback_keys = [key for key in self.manager._orchestrator.callbacks]
        self.assertIn(JOB_PREFIX, callback_keys)

        # Send a signal to the callback that a worker has expired
        yield From(
            self.manager._job_callback(
                KeyChange(
                    KeyEvent.DELETE,
                    self.mock_job_key,
                    json.dumps({
                        "had_heartbeat": False,
                        "job_queue_item": self.mock_job.job_item
                    }),
                )))

        self.assertEqual(self.test_executor.stop_builder.call_count, 0)
        self.assertEqual(self.job_complete_callback.call_count, 0)
        self.assertIsNone(self.manager._build_uuid_to_info.get(BUILD_UUID))

    @async_test
    def test_builder_never_starts(self):
        test_component = yield From(self._setup_job_for_managers())

        # Ensure that that the building callbacks have been registered
        callback_keys = [key for key in self.manager._orchestrator.callbacks]
        self.assertIn(JOB_PREFIX, callback_keys)

        # Send a signal to the callback that a worker has expired
        yield From(
            self.manager._job_callback(
                KeyChange(
                    KeyEvent.EXPIRE,
                    self.mock_job_key,
                    json.dumps({
                        "had_heartbeat": False,
                        "job_queue_item": self.mock_job.job_item
                    }),
                )))

        self.test_executor.stop_builder.assert_called_once_with("123")
        self.assertEqual(self.test_executor.stop_builder.call_count, 1)

        # Ensure the job was marked as incomplete, with an update_phase to True (so the DB record and
        # logs are updated as well)
        yield From(
            self.job_complete_callback.assert_called_once_with(
                ANY,
                BuildJobResult.INCOMPLETE,
                "MockExecutor",
                update_phase=True))

    @async_test
    def test_change_worker(self):
        # Send a signal to the callback that a worker key has been changed
        self.manager._job_callback(
            KeyChange(KeyEvent.SET, self.mock_job_key, "value"))
        self.assertEqual(self.test_executor.stop_builder.call_count, 0)

    @async_test
    def test_realm_expired(self):
        test_component = yield From(self._setup_job_for_managers())

        # Send a signal to the callback that a realm has expired
        yield From(
            self.manager._realm_callback(
                KeyChange(
                    KeyEvent.EXPIRE,
                    self.mock_job_key,
                    json.dumps({
                        "realm": REALM_ID,
                        "execution_id": "foobar",
                        "executor_name": "MockExecutor",
                        "job_queue_item": {
                            "body": '{"build_uuid": "fakeid"}'
                        },
                    }),
                )))

        # Ensure that the cleanup code for the executor was called.
        self.test_executor.stop_builder.assert_called_once_with("foobar")
        self.assertEqual(self.test_executor.stop_builder.call_count, 1)
示例#5
0
    def start_builder(self, realm, token, build_uuid):
        region = self.executor_config["EC2_REGION"]
        channel = self.executor_config.get("COREOS_CHANNEL", "stable")

        coreos_ami = self.executor_config.get("COREOS_AMI", None)
        if coreos_ami is None:
            get_ami_callable = partial(self._get_coreos_ami, region, channel)
            coreos_ami = yield From(
                self._loop.run_in_executor(None, get_ami_callable))

        user_data = self.generate_cloud_config(realm, token, build_uuid,
                                               channel, self.manager_hostname)
        logger.debug("Generated cloud config for build %s: %s", build_uuid,
                     user_data)

        ec2_conn = self._get_conn()

        ssd_root_ebs = boto.ec2.blockdevicemapping.BlockDeviceType(
            size=int(self.executor_config.get("BLOCK_DEVICE_SIZE", 48)),
            volume_type="gp2",
            delete_on_termination=True,
        )
        block_devices = boto.ec2.blockdevicemapping.BlockDeviceMapping()
        block_devices["/dev/xvda"] = ssd_root_ebs

        interfaces = None
        if self.executor_config.get("EC2_VPC_SUBNET_ID", None) is not None:
            interface = boto.ec2.networkinterface.NetworkInterfaceSpecification(
                subnet_id=self.executor_config["EC2_VPC_SUBNET_ID"],
                groups=self.executor_config["EC2_SECURITY_GROUP_IDS"],
                associate_public_ip_address=True,
            )
            interfaces = boto.ec2.networkinterface.NetworkInterfaceCollection(
                interface)

        try:
            reservation = yield From(
                ec2_conn.run_instances(
                    coreos_ami,
                    instance_type=self.executor_config["EC2_INSTANCE_TYPE"],
                    key_name=self.executor_config.get("EC2_KEY_NAME", None),
                    user_data=user_data,
                    instance_initiated_shutdown_behavior="terminate",
                    block_device_map=block_devices,
                    network_interfaces=interfaces,
                ))
        except boto.exception.EC2ResponseError as ec2e:
            logger.exception("Unable to spawn builder instance")
            metric_queue.ephemeral_build_worker_failure.Inc()
            raise ec2e

        if not reservation.instances:
            raise ExecutorException("Unable to spawn builder instance.")
        elif len(reservation.instances) != 1:
            raise ExecutorException("EC2 started wrong number of instances!")

        launched = AsyncWrapper(reservation.instances[0])

        # Sleep a few seconds to wait for AWS to spawn the instance.
        yield From(trollius.sleep(_TAG_RETRY_SLEEP))

        # Tag the instance with its metadata.
        for i in range(0, _TAG_RETRY_COUNT):
            try:
                yield From(
                    launched.add_tags({
                        "Name": "Quay Ephemeral Builder",
                        "Realm": realm,
                        "Token": token,
                        "BuildUUID": build_uuid,
                    }))
            except boto.exception.EC2ResponseError as ec2e:
                if ec2e.error_code == "InvalidInstanceID.NotFound":
                    if i < _TAG_RETRY_COUNT - 1:
                        logger.warning(
                            "Failed to write EC2 tags for instance %s for build %s (attempt #%s)",
                            launched.id,
                            build_uuid,
                            i,
                        )
                        yield From(trollius.sleep(_TAG_RETRY_SLEEP))
                        continue

                    raise ExecutorException("Unable to find builder instance.")

                logger.exception("Failed to write EC2 tags (attempt #%s)", i)

        logger.debug("Machine with ID %s started for build %s", launched.id,
                     build_uuid)
        raise Return(launched.id)
示例#6
0
class StatusHandler(object):
    """ Context wrapper for writing status to build logs. """
    def __init__(self, build_logs, repository_build_uuid):
        self._current_phase = None
        self._current_command = None
        self._uuid = repository_build_uuid
        self._build_logs = AsyncWrapper(build_logs)
        self._sync_build_logs = build_logs
        self._build_model = AsyncWrapper(model.build)

        self._status = {
            'total_commands': 0,
            'current_command': None,
            'push_completion': 0.0,
            'pull_completion': 0.0,
        }

        # Write the initial status.
        self.__exit__(None, None, None)

    @coroutine
    def _append_log_message(self, log_message, log_type=None, log_data=None):
        log_data = log_data or {}
        log_data['datetime'] = str(datetime.datetime.now())

        try:
            yield From(
                self._build_logs.append_log_message(self._uuid, log_message,
                                                    log_type, log_data))
        except RedisError:
            logger.exception('Could not save build log for build %s: %s',
                             self._uuid, log_message)

    @coroutine
    def append_log(self, log_message, extra_data=None):
        if log_message is None:
            return

        yield From(self._append_log_message(log_message, log_data=extra_data))

    @coroutine
    def set_command(self, command, extra_data=None):
        if self._current_command == command:
            raise Return()

        self._current_command = command
        yield From(
            self._append_log_message(command, self._build_logs.COMMAND,
                                     extra_data))

    @coroutine
    def set_error(self,
                  error_message,
                  extra_data=None,
                  internal_error=False,
                  requeued=False):
        error_phase = BUILD_PHASE.INTERNAL_ERROR if internal_error and requeued else BUILD_PHASE.ERROR
        yield From(self.set_phase(error_phase))

        extra_data = extra_data or {}
        extra_data['internal_error'] = internal_error
        yield From(
            self._append_log_message(error_message, self._build_logs.ERROR,
                                     extra_data))

    @coroutine
    def set_phase(self, phase, extra_data=None):
        if phase == self._current_phase:
            raise Return(False)

        self._current_phase = phase
        yield From(
            self._append_log_message(phase, self._build_logs.PHASE,
                                     extra_data))

        # Update the repository build with the new phase
        raise Return(
            self._build_model.update_phase_then_close(self._uuid, phase))

    def __enter__(self):
        return self._status

    def __exit__(self, exc_type, value, traceback):
        try:
            self._sync_build_logs.set_status(self._uuid, self._status)
        except RedisError:
            logger.exception('Could not set status of build %s to %s',
                             self._uuid, self._status)