示例#1
0
文件: update.py 项目: grnet/amaltheia
    def __init__(self, host_name, host_args, updater_args):
        super(JenkinsUpdater, self).__init__(host_name, host_args,
                                             updater_args)
        self.wait = self.updater_args.get('wait', True)

        try:
            self.wait_timeout = int(self.updater_args.get('wait-timeout', 500))
        except (ValueError, TypeError):
            log.debug('[jenkins] Default to 200 seconds timeout')
            self.wait_timeout = 500

        try:
            self.wait_check_interval = int(
                self.updater_args.get('wait-check-interval', 10))
        except (ValueError, TypeError):
            log.debug('[jenkins] Default to 10 seconds check interval')
            self.wait_check_interval = 10

        self.server = jinja(self.updater_args.get('server'))
        self.username = jinja(self.updater_args.get('username'))
        self.password = jinja(self.updater_args.get('password'))
        self.job = jinja(self.updater_args.get('job'))

        try:
            self.jenkins = jenkins.Jenkins(self.server, self.username,
                                           self.password)
        except:
            log.exception('[{}] [jenkins] Could not connect to {}'.format(
                self.host, self.server))
            self.jenkins = None
示例#2
0
def amaltheia(args):
    job = parse_job(args)

    config.load(job.get('config', {}))
    log.setup(level=config.log_level)

    log.debug('[amaltheia] Loaded variables: {}'.format(config.variables))
    log.debug('[amaltheia] Loaded config: {}'.format(config._entries))

    run_strategy(job)
示例#3
0
文件: update.py 项目: grnet/amaltheia
    def __init__(self, host_name, host_args, updater_args):
        super(RebootUpdater, self).__init__(host_name, host_args, updater_args)

        self.wait = self.updater_args.get('wait', True)

        try:
            self.wait_timeout = int(self.updater_args.get('wait-timeout', 500))
        except (ValueError, TypeError):
            log.debug('[reboot] Default to 500 seconds timeout')
            self.wait_timeout = 500

        try:
            self.wait_check_interval = int(
                self.updater_args.get('wait-check-interval', 10))
        except (ValueError, TypeError):
            log.debug('[reboot] Default to 10 seconds check interval')
            self.wait_check_interval = 10
示例#4
0
文件: update.py 项目: grnet/amaltheia
    def update(self):
        ssh_cmd(self.host, self.host_args, 'sudo reboot')

        if not self.wait:
            log.debug('[{}] Not waiting for reboot'.format(self.host))
            return True

        now = datetime.now()
        timeout = now + timedelta(seconds=self.wait_timeout)
        success = False
        while not success and datetime.now() <= timeout:
            log.debug('[{}] Waiting for reboot...'.format(self.host))
            success = ssh_try_connect(self.host,
                                      self.host_args,
                                      timeout=self.wait_check_interval)

        if not success:
            log.fatal('[{}] Timeout waiting for reboot'.format(self.host))

        return success
示例#5
0
    def evacuate(self):
        """Disable nova-compute service on this host, migrate away
        all running and stopped instances"""

        if self.service_args.get('skip-evacuate'):
            return True

        # Disable nova-compute
        openstack_cmd(
            'openstack compute service set {} nova-compute --disable'.format(
                quote(self.host)))

        # Retrieve list of VMs, indexable by their Instance ID
        server_list = openstack_cmd_table('nova hypervisor-servers {}'.format(
            quote(self.host)))
        servers = {s['ID']: s for s in server_list}

        # Schedule live migration for running VMs
        result = openstack_cmd_table('nova host-evacuate-live {}'.format(
            quote(self.host)))

        for server in result:
            iid = server['Server UUID']

            if server['Live Migration Accepted'] == 'True':
                servers[iid].update({'status': 'OK'})
            else:
                servers[iid].update({
                    'status': 'NOTOK',
                    'error': server['Error Message']
                })

        # Errors with live migration may occur for VMs that are stopped.
        # Migrate them as well
        result = openstack_cmd_table('nova host-servers-migrate {}'.format(
            quote(self.host)))

        for server in result:
            iid = server['Server UUID']

            if server['Migration Accepted'] == 'True':
                servers[iid].update({'status': 'OK'})
                del servers[iid]['error']
            elif servers[iid].get('status', '') != 'OK':
                servers[iid].update({
                    'status': 'NOTOK',
                    'error': server['Error Message']
                })

        errors = {k: v for k, v in servers.items() if v['status'] != 'OK'}
        if errors:
            log.fatal('[{}] {}'.format(self.host, errors))
            return False

        # Wait for migrations to complete
        try:
            timeout_per_server = int(self.service_args.get('timeout', 40))
        except (ValueError, TypeError):
            log.debug('[{}] Defaulting to 40 seconds timeout'.format(
                self.host))

            timeout_per_server = 40

        timeout = len(server_list) * timeout_per_server
        while server_list and timeout > 0:
            timeout -= 5
            sleep(5)

            server_list = openstack_cmd_table(
                'nova hypervisor-servers {}'.format(quote(self.host)))

            log.debug('[{}] Waiting for migrations, {} remaining'.format(
                self.host, len(server_list)))

        if server_list:
            log.fatal('[{}] Some migrations timed-out: {}'.format(
                self.host, server_list))
            return False
        else:
            log.debug('[{}] All servers migrated successfully'.format(
                self.host))

        return True
示例#6
0
文件: update.py 项目: grnet/amaltheia
    def update(self):
        try:
            self.jenkins.get_whoami()
        except:
            log.exception('[{}] [jenkins] Failed to authenticate'.format(
                self.host))
            return False

        if self.job is None:
            log.fatal('[{}] [jenkins] Empty job name'.format(self.host))
            return False

        raw_args = self.updater_args.get('build-arguments')
        try:
            if raw_args:
                queue_id = self.jenkins.build_job(
                    self.job,
                    jinja(raw_args, host=self.host, host_args=self.host_args))
            else:
                queue_id = self.jenkins.build_job(self.job)
        except:
            log.exception('[{}] [jenkins] Failed to queue job {}'.format(
                self.host, self.job))
            return False

        log.info('[{}] [jenkins] Queued job {} (queue id {})'.format(
            self.host, self.job, queue_id))

        if not self.wait:
            return True

        now = datetime.now()
        timeout = now + timedelta(seconds=self.wait_timeout)

        while True:
            try:
                queue_item = self.jenkins.get_queue_item(queue_id)
                job_number = queue_item['executable']['number']
                break
            except KeyError:
                sleep(self.wait_check_interval)
                log.debug('[{}] [jenkins] Waiting for job queue {}'.format(
                    self.host, self.job))
            except:
                log.exception('[{}] [jenkins] Failed to queue job {}'.format(
                    self.host, self.job))
                return False

            if datetime.now() > timeout:
                log.fatal(
                    '[{}] [jenkins] Timeout waiting for job queue {}'.format(
                        self.host, self.job))
                return False

        log.info('[{}] [jenkins] Started job {}/{} (queue id {})'.format(
            self.host, self.job, job_number, queue_id))

        done = False
        while not done and datetime.now() <= timeout:
            log.debug('[{}] [jenkins] Waiting for job run {}/{}'.format(
                self.host, self.job, job_number))
            build_info = self.jenkins.get_build_info(self.job, job_number)

            done = build_info['result'] is not None
            if not done:
                sleep(self.wait_check_interval)

        if not done:
            log.fatal(
                '[{}] [jenkins] Timeout waiting for job run {}/{}'.format(
                    self.host, self.job, job_number))
            return False

        return build_info['result'] == 'SUCCESS'