Пример #1
0
def ignore_exception(processname, e):
    msg = '[Exception] Ignored error in %s: %s' % (processname, e)
    _, _, tb = sys.exc_info()
    if tb:
        msg += '\n%s' % traceback.format_exc()

    logutil.error(None, msg)
Пример #2
0
    def _populate_block_devices(self):
        disk_spec = self.db_entry['disk_spec']
        if not disk_spec:
            # This should not occur since the API will filter for zero disks.
            logutil.error([self], 'Found disk spec empty: %s' % self.db_entry)

            # Stop continuous crashing by falsely claiming disks are configured.
            self.db_entry['block_devices'] = {'finalized': True}
            return

        bus = _get_defaulted_disk_bus(disk_spec[0])
        root_device = _get_disk_device_base(bus) + 'a'
        config_device = _get_disk_device_base(bus) + 'b'

        disk_type = 'qcow2'
        if config.parsed.get('DISK_FORMAT') == 'flat':
            disk_type = 'raw'

        self.db_entry['block_devices'] = {
            'devices': [
                {
                    'type': disk_type,
                    'size': self.db_entry['disk_spec'][0].get('size'),
                    'device': root_device,
                    'bus': bus,
                    'path': os.path.join(self.instance_path, root_device),
                    'base': self.db_entry['disk_spec'][0].get('base'),
                    'present_as': _get_defaulted_disk_type(self.db_entry['disk_spec'][0]),
                    'snapshot_ignores': False
                },
                {
                    'type': 'raw',
                    'device': config_device,
                    'bus': bus,
                    'path': os.path.join(self.instance_path, config_device),
                    'present_as': 'disk',
                    'snapshot_ignores': True
                }
            ]
        }

        i = 0
        for d in self.db_entry['disk_spec'][1:]:
            bus = _get_defaulted_disk_bus(d)
            device = _get_disk_device_base(bus) + chr(ord('c') + i)
            self.db_entry['block_devices']['devices'].append({
                'type': disk_type,
                'size': d.get('size'),
                'device': device,
                'bus': bus,
                'path': os.path.join(self.instance_path, device),
                'base': d.get('base'),
                'present_as': _get_defaulted_disk_type(d),
                'snapshot_ignores': False
            })
            i += 1

        self.db_entry['block_devices']['finalized'] = False
Пример #3
0
    def power_off(self):
        libvirt = util.get_libvirt()
        instance = self._get_domain()
        if not instance:
            return

        try:
            instance.destroy()
        except libvirt.libvirtError as e:
            logutil.error([self], 'Failed to delete domain: %s' % e)

        db.add_event(
            'instance', self.db_entry['uuid'], 'poweroff', 'complete', None, None)
Пример #4
0
def error(status_code, message):
    global TESTING

    body = {'error': message, 'status': status_code}

    if TESTING or config.parsed.get('INCLUDE_TRACEBACKS') == '1':
        _, _, tb = sys.exc_info()
        if tb:
            body['traceback'] = traceback.format_exc()

    resp = flask.Response(json.dumps(body), mimetype='application/json')
    resp.status_code = status_code
    logutil.error(
        None, 'Returning API error: %d, %s\n    %s' %
        (status_code, message, '\n    '.join(
            body.get('traceback', '').split('\n'))))
    return resp
Пример #5
0
def handle(jobname, workitem):
    j = JobName(jobname)
    logutil.info([j], 'Processing workitem')
    setproctitle.setproctitle('%s-%s' %
                              (daemon.process_name('queues'), jobname))

    instance_uuid = None
    task = None
    try:
        for task in workitem.get('tasks', []):
            ro = [j]
            instance_uuid = task.get('instance_uuid')
            if instance_uuid:
                i = virt.from_db(instance_uuid)
                ro.append(i)

            if task.get('type').startswith('instance_') and not instance_uuid:
                logutil.error(ro, 'Instance task lacks instance uuid')
                return

            if instance_uuid:
                db.add_event('instance', instance_uuid,
                             task.get('type').replace('_', ' '), 'dequeued',
                             None, 'Work item %s' % jobname)

            logutil.info(
                ro,
                'Executing task %s: %s' % (task.get('type', 'unknown'), task))
            if task.get('type') == 'image_fetch':
                image_fetch(task.get('url'), instance_uuid)

            if task.get('type') == 'instance_preflight':
                redirect_to = instance_preflight(instance_uuid,
                                                 task.get('network'))
                if redirect_to:
                    util.log('info', ro,
                             'Redirecting instance start to %s' % redirect_to)
                    db.place_instance(instance_uuid, redirect_to)
                    db.enqueue(redirect_to, workitem)
                    return

            if task.get('type') == 'instance_start':
                instance_start(instance_uuid, task.get('network'))
                db.update_instance_state(instance_uuid, 'created')
                db.enqueue('%s-metrics' % config.parsed.get('NODE_NAME'), {})

            if task.get('type') == 'instance_delete':
                try:
                    instance_delete(instance_uuid)
                    db.update_instance_state(instance_uuid,
                                             task.get('next_state', 'unknown'))
                    if task.get('next_state_message'):
                        db.update_instance_error_message(
                            instance_uuid, task.get('next_state_message'))
                    db.enqueue('%s-metrics' % config.parsed.get('NODE_NAME'),
                               {})
                except Exception as e:
                    util.ignore_exception(daemon.process_name('queues'), e)

    except Exception as e:
        if instance_uuid:
            util.ignore_exception(daemon.process_name('queues'), e)
            db.enqueue_instance_delete(config.parsed.get('NODE_NAME'),
                                       instance_uuid, 'error',
                                       'failed queue task: %s' % e)

    finally:
        db.resolve(config.parsed.get('NODE_NAME'), jobname)
        if instance_uuid:
            db.add_event('instance', instance_uuid, 'tasks complete',
                         'dequeued', None, 'Work item %s' % jobname)
        logutil.info([j], 'Completed workitem')
Пример #6
0
    def _update_power_states(self):
        libvirt = util.get_libvirt()
        conn = libvirt.open(None)
        try:
            seen = []

            # Active VMs have an ID. Active means running in libvirt
            # land.
            for domain_id in conn.listDomainsID():
                domain = conn.lookupByID(domain_id)
                if not domain.name().startswith('sf:'):
                    continue

                instance_uuid = domain.name().split(':')[1]
                instance = db.get_instance(instance_uuid)
                if not instance:
                    # Instance is SF but not in database. Kill to reduce load.
                    logutil.warning([virt.ThinInstance(instance_uuid)],
                                    'Destroying unknown instance')
                    util.execute(None, 'virsh destroy "sf:%s"' % instance_uuid)
                    continue

                db.place_instance(instance_uuid,
                                  config.parsed.get('NODE_NAME'))
                seen.append(domain.name())

                if instance.get('state') == 'deleted':
                    # NOTE(mikal): a delete might be in-flight in the queue.
                    # We only worry about instances which should have gone
                    # away five minutes ago.
                    if time.time() - instance['state_updated'] < 300:
                        continue

                    db.instance_enforced_deletes_increment(instance_uuid)
                    attempts = instance.get('enforced_deletes', 0)

                    if attempts > 5:
                        # Sometimes we just can't delete the VM. Try the big hammer instead.
                        logutil.warning(
                            [virt.ThinInstance(instance_uuid)],
                            'Attempting alternate delete method for instance')
                        util.execute(None,
                                     'virsh destroy "sf:%s"' % instance_uuid)

                        db.add_event('instance', instance_uuid,
                                     'enforced delete', 'complete', None, None)
                    else:
                        i = virt.from_db(instance_uuid)
                        i.delete()
                        i.update_instance_state('deleted')

                    logutil.warning([virt.ThinInstance(instance_uuid)],
                                    'Deleting stray instance (attempt %d)' %
                                    attempts)

                    continue

                state = util.extract_power_state(libvirt, domain)
                db.update_instance_power_state(instance_uuid, state)
                if state == 'crashed':
                    db.update_instance_state(instance_uuid, 'error')

            # Inactive VMs just have a name, and are powered off
            # in our state system.
            for domain_name in conn.listDefinedDomains():
                if not domain_name.startswith('sf:'):
                    continue

                if domain_name not in seen:
                    instance_uuid = domain_name.split(':')[1]
                    instance = db.get_instance(instance_uuid)

                    if instance.get('state') == 'deleted':
                        # NOTE(mikal): a delete might be in-flight in the queue.
                        # We only worry about instances which should have gone
                        # away five minutes ago.
                        if time.time() - instance['state_updated'] < 300:
                            continue

                        domain = conn.lookupByName(domain_name)
                        domain.undefine()
                        logutil.info([virt.ThinInstance(instance_uuid)],
                                     'Detected stray instance')
                        db.add_event('instance', instance_uuid,
                                     'deleted stray', 'complete', None, None)
                        continue

                    db.place_instance(instance_uuid,
                                      config.parsed.get('NODE_NAME'))
                    instance_path = os.path.join(
                        config.parsed.get('STORAGE_PATH'), 'instances',
                        instance_uuid)

                    if not os.path.exists(instance_path):
                        # If we're inactive and our files aren't on disk,
                        # we have a problem.
                        logutil.info([virt.ThinInstance(instance_uuid)],
                                     'Detected error state for instance')
                        db.update_instance_state(instance_uuid, 'error')
                    elif instance.get('power_state') != 'off':
                        logutil.info([virt.ThinInstance(instance_uuid)],
                                     'Detected power off for instance')
                        db.update_instance_power_state(instance_uuid, 'off')
                        db.add_event('instance', instance_uuid,
                                     'detected poweroff', 'complete', None,
                                     None)

        except libvirt.libvirtError as e:
            logutil.error(None, 'Failed to lookup all domains: %s' % e)