Exemplo n.º 1
    def _add_missing_subinterfaces(self, context, edge_id, vnic_binds,
                                   backend_vnics, if_changed, readonly):
        # Verify that all the entries in
        # nsxv_edge_vnic_bindings are attached on the Edge

        # Arrange the vnic binds in a list of lists - vnics and subinterfaces

        metadata_nets = [
            net['network_id'] for net in nsxv_db.get_nsxv_internal_networks(

        for vnic_bind in vnic_binds:
            if vnic_bind['network_id'] in metadata_nets:

            for vnic in backend_vnics:
                if vnic['index'] == vnic_bind['vnic_index']:
                    found = False
                    tunnel_index = vnic_bind['tunnel_index']
                    network_id = vnic_bind['network_id']
                    for sub_if in (vnic.get('subInterfaces',
                                            {}).get('subInterfaces', [])):
                        if sub_if['tunnelId'] == tunnel_index:
                            found = True
                            if sub_if.get('logicalSwitchName') != network_id:
                                self.error_count += 1
                                self.error_info = base_job.housekeeper_warning(
                                    'subinterface %s on vnic %s on edge %s '
                                    'should be connected to network %s',
                                    tunnel_index, vnic['index'], edge_id,
                                if_changed[vnic['index']] = True
                                if not readonly:
                                        context, network_id, edge_id, vnic,
                                    self.fixed_count += 1
                                sub_if['name'] = network_id
                    if not found:
                        self.error_count += 1
                        self.error_info = base_job.housekeeper_warning(
                            'subinterface %s on vnic %s on edge %s should be '
                            'connected to network %s but is missing',
                            tunnel_index, vnic['index'], edge_id, network_id)
                        if_changed[vnic['index']] = True

                        if not readonly:
                                context, network_id, edge_id, vnic,
                            self.fixed_sub_if_count += 1
Exemplo n.º 2
    def run(self, context, readonly=False):
        super(MismatchLogicalportJob, self).run(context)

        # get all orphaned DHCP servers
        mismatch_ports = v3_utils.get_mismatch_logical_ports(
            context, self.plugin.nsxlib, self.plugin)
        info = ""
        if not mismatch_ports:
            msg = 'No mismatched logical ports detected.'
            info = base_job.housekeeper_info(info, msg)
            return {'error_count': 0, 'fixed_count': 0, 'error_info': info}

        msg = ("Found %(len)s mismatched logical port%(plural)s:" %
               {'len': len(mismatch_ports),
                'plural': 's' if len(mismatch_ports) > 1 else ''})
        info = base_job.housekeeper_warning(info, msg)

        fixed_count = 0
        for port_problem in mismatch_ports:
            msg = ("Logical port %(nsx_id)s "
                   "[neutron id: %(id)s] error: %(err)s" %
                   {'nsx_id': port_problem['nsx_id'],
                    'id': port_problem['neutron_id'],
                    'err': port_problem['error']})
            if not readonly:
                # currently we mitigate only address bindings mismatches
                err_type = port_problem['error_type']
                if err_type == v3_utils.PORT_ERROR_TYPE_BINDINGS:
                    # Create missing address bindings on backend
                    port = port_problem['port']
                        address_bindings = self.plugin._build_address_bindings(
                            port_problem['nsx_id'], port_problem['neutron_id'],
                    except Exception as e:
                        msg = "%s failed to be fixed: %s" % (msg, e)
                        fixed_count = fixed_count + 1
                        msg = "%s was fixed." % msg
                    msg = "%s cannot be fixed automatically." % msg
            info = base_job.housekeeper_warning(info, msg)

        return {'error_count': len(mismatch_ports),
                'error_info': info,
                'fixed_count': fixed_count}
Exemplo n.º 3
    def _validate_edge_subinterfaces(self, context, edge_id, backend_vnics,
                                     vnic_dict, if_changed):
        # Validate that all the interfaces on the Edge
        # appliance are registered in nsxv_edge_vnic_bindings
        for vnic in backend_vnics:
            if_changed[vnic['index']] = False
            if (vnic['isConnected'] and vnic['type'] == 'trunk'
                    and vnic['subInterfaces']):

                for sub_if in vnic['subInterfaces']['subInterfaces']:
                    # Subinterface name field contains the net id
                    vnic_bind = vnic_dict.get(sub_if['logicalSwitchName'])
                    if (vnic_bind and vnic_bind['vnic_index'] == vnic['index']
                            and vnic_bind['tunnel_index']
                            == sub_if['tunnelId']):
                        self.error_count += 1
                        self.error_info = base_job.housekeeper_warning(
                            'subinterface %s for vnic %s on edge %s is not '
                            'defined in nsxv_edge_vnic_bindings',
                            sub_if['tunnelId'], vnic['index'], edge_id)
                        self.fixed_sub_if_count += 1
                        if_changed[vnic['index']] = True
Exemplo n.º 4
    def run(self, context, readonly=False):
        super(LbaasPendingJob, self).run(context)
        curr_time = time.time()
        error_count = 0
        fixed_count = 0
        error_info = ''

        for model in self.lbaas_models:
            sess = context.session
            elements = sess.query(model).filter(

            for element in elements:
                if element['id'] in self.lbaas_objects:
                    obj = self.lbaas_objects[element['id']]
                    lifetime = curr_time - obj['time_added']
                    if lifetime > ELEMENT_LIFETIME:
                        # Entry has been pending for more than lifetime.
                        # Report and remove when in R/W mode
                        error_count += 1
                        error_info = base_job.housekeeper_warning(
                            'LBaaS %s %s is stuck in pending state',
                            model.NAME, element['id'])

                        if not readonly:
                            element['provisioning_status'] = constants.ERROR
                            fixed_count += 1
                        del self.lbaas_objects[element['id']]
                        # Entry is still pending but haven't reached lifetime
                        LOG.debug('Housekeeping: LBaaS object %s %s in '
                                  'PENDING state for %d seconds', model.NAME,
                                  element['id'], lifetime)
                        obj['time_seen'] = curr_time
                    # Entry wasn't seen before this iteration - add to dict
                    LOG.debug('Housekeeping: monitoring PENDING state for '
                              'LBaaS object %s %s', model.NAME, element['id'])
                    self.lbaas_objects[element.id] = {
                        'model': model,
                        'time_added': curr_time,
                        'time_seen': curr_time}

        # Look for dictionary entries which weren't seen in this iteration.
        # Such entries were either removed from DB or their state was changed.
        for obj_id in self.lbaas_objects.keys():
            if self.lbaas_objects[obj_id]['time_seen'] != curr_time:
                LOG.debug('Housekeeping: LBaaS %s %s is back to normal',
                          self.lbaas_objects[obj_id]['model'].NAME, obj_id)
                del self.lbaas_objects[obj_id]

        if error_count == 0:
            error_info = 'No LBaaS objects in pending state'
        return {'error_count': error_count,
                'fixed_count': fixed_count,
                'error_info': error_info}
Exemplo n.º 5
    def run(self, context, readonly=False):
        super(ErrorBackupEdgeJob, self).run(context)
        error_count = 0
        fixed_count = 0
        error_info = ''

        # Gather ERROR state backup edges into dict
        filters = {'status': [constants.ERROR]}
        like_filters = {'router_id': vcns_const.BACKUP_ROUTER_PREFIX + "%"}
        with locking.LockManager.get_lock('nsx-edge-backup-pool'):
            error_edge_bindings = nsxv_db.get_nsxv_router_bindings(
                context.session, filters=filters, like_filters=like_filters)

        if not error_edge_bindings:
            LOG.debug('Housekeeping: no backup edges in ERROR state detected')
            return {'error_count': 0,
                    'fixed_count': 0,
                    'error_info': 'No backup edges in ERROR state detected'}

        # Keep list of current broken backup edges - as it may change while
        # HK is running
        for binding in error_edge_bindings:
            error_count += 1
            error_info = base_job.housekeeper_warning(
                error_info, 'Backup Edge appliance %s is in ERROR state',

            if not readonly:
                with locking.LockManager.get_lock(binding['edge_id']):
                    if self._handle_backup_edge(context, binding):
                        fixed_count += 1

        return {'error_count': error_count,
                'fixed_count': fixed_count,
                'error_info': error_info}
Exemplo n.º 6
    def run(self, context, readonly=False):
        super(OrphanedLogicalSwitchJob, self).run(context)

        # get all orphaned DHCP servers
        orphaned_swithces = v3_utils.get_orphaned_networks(
            context, self.plugin.nsxlib)

        info = ""
        if not orphaned_swithces:
            msg = 'No orphaned logical switches detected.'
            info = base_job.housekeeper_info(info, msg)
            return {'error_count': 0, 'fixed_count': 0, 'error_info': info}

        msg = ("Found %(len)s orphaned logical switch%(plural)s:" % {
            'len': len(orphaned_swithces),
            'plural': 'es' if len(orphaned_swithces) > 1 else ''
        info = base_job.housekeeper_warning(info, msg)
        fixed_count = 0
        for switch in orphaned_swithces:
            msg = ("Logical switch %(name)s [id: %(id)s] "
                   "(neutron network: %(net)s)" % {
                       if switch['neutron_net_id'] else 'Unknown'
            if not readonly:
                except Exception as e:
                    msg = "%s failed to be removed: %s." % (msg, e)
                    fixed_count = fixed_count + 1
                    msg = "%s was removed." % (msg)
            info = base_job.housekeeper_warning(info, msg)

        return {
            'error_count': len(orphaned_swithces),
            'error_info': info,
            'fixed_count': fixed_count
Exemplo n.º 7
    def run(self, context, readonly=False):
        super(OrphanedLogicalRouterJob, self).run(context)

        # get all orphaned DHCP servers
        orphaned_routers = v3_utils.get_orphaned_routers(
            context, self.plugin.nsxlib)

        info = ""
        if not orphaned_routers:
            msg = 'No orphaned logical routers detected.'
            info = base_job.housekeeper_info(info, msg)
            return {'error_count': 0, 'fixed_count': 0, 'error_info': info}

        msg = ("Found %(len)s orphaned logical router%(plural)s:" % {
            'len': len(orphaned_routers),
            'plural': 's' if len(orphaned_routers) > 1 else ''
        info = base_job.housekeeper_warning(info, msg)
        fixed_count = 0
        for router in orphaned_routers:
            msg = ("Logical router %(name)s [id: %(id)s] "
                   "(neutron router: %(rtr)s)" % {
                       if router['neutron_router_id'] else 'Unknown'
            if not readonly:
                success, error = v3_utils.delete_orphaned_router(
                    self.plugin.nsxlib, router['id'])
                if success:
                    fixed_count = fixed_count + 1
                    msg = "%s was removed." % msg
                    msg = "%s failed to be removed: %s." % (msg, error)
            info = base_job.housekeeper_warning(info, msg)

        return {
            'error_count': len(orphaned_routers),
            'error_info': info,
            'fixed_count': fixed_count
Exemplo n.º 8
    def run(self, context, readonly=False):
        super(OrphanedFirewallSectionJob, self).run(context)

        # get all orphaned firewall sections
        orphaned_sections = v3_utils.get_orphaned_firewall_sections(
            context, self.plugin.nsxlib)

        info = ""
        if not orphaned_sections:
            msg = 'No orphaned firewall sections detected.'
            info = base_job.housekeeper_info(info, msg)
            return {'error_count': 0, 'fixed_count': 0, 'error_info': info}

        msg = ("Found %(len)s orphaned firewall section%(plural)s:" % {
            'len': len(orphaned_sections),
            'plural': 's' if len(orphaned_sections) > 1 else ''
        info = base_job.housekeeper_warning(info, msg)
        fixed_count = 0
        for section in orphaned_sections:
            msg = ("Firewall section %(name)s [id: %(id)s] "
                   "neutron security group: %(sg)s" % {
                       if section['neutron_sg_id'] else 'Unknown'
            if not readonly:
                except Exception as e:
                    msg = "%s failed to be removed: %s." % (msg, e)
                    fixed_count = fixed_count + 1
                    msg = "%s was removed." % msg
            info = base_job.housekeeper_warning(info, msg)

        return {
            'error_count': len(orphaned_sections),
            'error_info': info,
            'fixed_count': fixed_count
Exemplo n.º 9
    def run(self, context, readonly=False):
        super(ErrorDhcpEdgeJob, self).run(context)
        self.error_count = 0
        self.fixed_count = 0
        self.fixed_sub_if_count = 0
        self.error_info = ''

        # Gather ERROR state DHCP edges into dict
        filters = {'status': [constants.ERROR]}
        error_edge_bindings = nsxv_db.get_nsxv_router_bindings(context.session,

        if not error_edge_bindings:
            LOG.debug('Housekeeping: no DHCP edges in ERROR state detected')
            return {
                'error_count': self.error_count,
                'fixed_count': self.fixed_count,
                'error_info': 'No DHCP error state edges detected'

        with locking.LockManager.get_lock('nsx-dhcp-edge-pool'):
            edge_dict = {}
            for binding in error_edge_bindings:
                if binding['router_id'].startswith(
                    bind_list = edge_dict.get(binding['edge_id'], [])
                    edge_dict[binding['edge_id']] = bind_list

        # Get valid neutron networks and create a prefix dict.
        networks = [
            for net in self.plugin.get_networks(context, fields=['id'])
        pfx_dict = {
            net[:36 - len(vcns_const.DHCP_EDGE_PREFIX)]: net
            for net in networks

        for edge_id in edge_dict.keys():
                self._validate_dhcp_edge(context, edge_dict, pfx_dict,
                                         networks, edge_id, readonly)
            except Exception as e:
                self.error_count += 1
                self.error_info = base_job.housekeeper_warning(
                    self.error_info, 'Failed to recover DHCP Edge %s (%s)',
                    edge_id, e)

        return {
            'error_count': self.error_count,
            'fixed_count': self.fixed_count,
            'error_info': self.error_info
Exemplo n.º 10
    def run(self, context, readonly=False):
        super(OrphanedDhcpServerJob, self).run(context)

        # get all orphaned DHCP servers
        orphaned_servers = v3_utils.get_orphaned_dhcp_servers(
            context, self.plugin, self.plugin.nsxlib)

        info = ""
        if not orphaned_servers:
            msg = 'No orphaned DHCP servers detected.'
            info = base_job.housekeeper_info(info, msg)
            return {'error_count': 0, 'fixed_count': 0, 'error_info': msg}

        msg = ("Found %(len)s orphaned DHCP server%(plural)s:" %
               {'len': len(orphaned_servers),
                'plural': 's' if len(orphaned_servers) > 1 else ''})
        info = base_job.housekeeper_warning(info, msg)
        fixed_count = 0
        for server in orphaned_servers:
            msg = ("DHCP server %(name)s [id: %(id)s] "
                   "(neutron network: %(net)s)" %
                   {'name': server['display_name'],
                    'id': server['id'],
                    'net': server['neutron_net_id']
                    if server.get('neutron_net_id') else 'Unknown'})
            if not readonly:
                success, error = v3_utils.delete_orphaned_dhcp_server(
                    context, self.plugin.nsxlib, server)
                if success:
                    msg = "%s was removed." % msg
                    fixed_count = fixed_count + 1
                    msg = "%s failed to be removed: %s." % (msg, error)
            info = base_job.housekeeper_warning(info, msg)

        return {'error_count': len(orphaned_servers),
                'error_info': info,
                'fixed_count': fixed_count}
    def run(self, context, readonly=False):
        super(OrphanedFirewallSectionJob, self).run(context)

        # get all orphaned firewall sections
        orphaned_sections = v3_utils.get_orphaned_firewall_sections(
            context, self.plugin.nsxlib)

        info = ""
        if not orphaned_sections:
            msg = 'No orphaned firewall sections detected.'
            info = base_job.housekeeper_info(info, msg)
            return {'error_count': 0, 'fixed_count': 0, 'error_info': info}

        msg = ("Found %(len)s orphaned firewall section%(plural)s:" %
               {'len': len(orphaned_sections),
                'plural': 's' if len(orphaned_sections) > 1 else ''})
        info = base_job.housekeeper_warning(info, msg)
        fixed_count = 0
        for section in orphaned_sections:
            msg = ("Firewall section %(name)s [id: %(id)s] "
                   "neutron security group: %(sg)s" %
                   {'name': section['display_name'],
                    'id': section['id'],
                    'sg': section['neutron_sg_id'] if section['neutron_sg_id']
                    else 'Unknown'})
            if not readonly:
                except Exception as e:
                    msg = "%s failed to be removed: %s." % (msg, e)
                    fixed_count = fixed_count + 1
                    msg = "%s was removed." % msg
            info = base_job.housekeeper_warning(info, msg)

        return {'error_count': len(orphaned_sections),
                'error_info': info,
                'fixed_count': fixed_count}
Exemplo n.º 12
    def run(self, context, readonly=False):
        super(OrphanedLogicalSwitchJob, self).run(context)

        # get all orphaned DHCP servers
        orphaned_swithces = v3_utils.get_orphaned_networks(
            context, self.plugin.nsxlib)

        info = ""
        if not orphaned_swithces:
            msg = 'No orphaned logical switches detected.'
            info = base_job.housekeeper_info(info, msg)
            return {'error_count': 0, 'fixed_count': 0, 'error_info': info}

        msg = ("Found %(len)s orphaned logical switch%(plural)s:" %
               {'len': len(orphaned_swithces),
                'plural': 'es' if len(orphaned_swithces) > 1 else ''})
        info = base_job.housekeeper_warning(info, msg)
        fixed_count = 0
        for switch in orphaned_swithces:
            msg = ("Logical switch %(name)s [id: %(id)s] "
                   "(neutron network: %(net)s)" %
                   {'name': switch['display_name'],
                    'id': switch['id'],
                    'net': switch['neutron_net_id'] if switch['neutron_net_id']
                    else 'Unknown'})
            if not readonly:
                except Exception as e:
                    msg = "%s failed to be removed: %s." % (msg, e)
                    fixed_count = fixed_count + 1
                    msg = "%s was removed." % (msg)
            info = base_job.housekeeper_warning(info, msg)

        return {'error_count': len(orphaned_swithces),
                'error_info': info,
                'fixed_count': fixed_count}
Exemplo n.º 13
    def run(self, context, readonly=False):
        super(ErrorBackupEdgeJob, self).run(context)
        error_count = 0
        fixed_count = 0
        error_info = ''

        # Gather ERROR state backup edges into dict
        filters = {'status': [constants.ERROR]}
        like_filters = {'router_id': vcns_const.BACKUP_ROUTER_PREFIX + "%"}
        with locking.LockManager.get_lock('nsx-edge-backup-pool'):
            error_edge_bindings = nsxv_db.get_nsxv_router_bindings(
                context.session, filters=filters, like_filters=like_filters)

        if not error_edge_bindings:
            LOG.debug('Housekeeping: no backup edges in ERROR state detected')
            return {
                'error_count': 0,
                'fixed_count': 0,
                'error_info': 'No backup edges in ERROR state detected'

        # Keep list of current broken backup edges - as it may change while
        # HK is running
        for binding in error_edge_bindings:
            error_count += 1
            error_info = base_job.housekeeper_warning(
                error_info, 'Backup Edge appliance %s is in ERROR state',

            if not readonly:
                with locking.LockManager.get_lock(binding['edge_id']):
                    if self._handle_backup_edge(context, binding):
                        fixed_count += 1

        return {
            'error_count': error_count,
            'fixed_count': fixed_count,
            'error_info': error_info
Exemplo n.º 14
    def _validate_dhcp_edge(self, context, edge_dict, pfx_dict, networks,
                            edge_id, readonly):
        # Also metadata network should be a valid network for the edge
        az_name = self.plugin.get_availability_zone_name_by_edge(
            context, edge_id)
        with locking.LockManager.get_lock(edge_id):
            vnic_binds = nsxv_db.get_edge_vnic_bindings_by_edge(
                context.session, edge_id)
            edge_networks = [bind['network_id'] for bind in vnic_binds]

            # Step (A)
            # Find router bindings which are mapped to dead networks, or
            # do not have interfaces registered in nsxv tables
            for binding in edge_dict[edge_id]:
                router_id = binding['router_id']

                net_pfx = router_id[len(vcns_const.DHCP_EDGE_PREFIX):]
                net_id = pfx_dict.get(net_pfx)

                if net_id is None:
                    # Delete router binding as we do not have such network
                    # in Neutron
                    self.error_count += 1
                    self.error_info = base_job.housekeeper_warning(
                        'router binding %s for edge %s has no matching '
                        'neutron network', router_id, edge_id)

                    if not readonly:
                            context.session, binding['router_id'])
                        self.fixed_count += 1
                    if net_id not in edge_networks:
                        # Create vNic bind here
                        self.error_count += 1
                        self.error_info = base_job.housekeeper_warning(
                            'edge %s vnic binding missing for network %s',
                            edge_id, net_id)

                        if not readonly:
                                context.session, edge_id, net_id, az_name)
                            self.fixed_count += 1

            # Step (B)
            # Find vNic bindings which reference invalid networks or aren't
            # bound to any router binding

            # Reread vNic binds as we might created more or deleted some in
            #  step (A)
            vnic_binds = nsxv_db.get_edge_vnic_bindings_by_edge(
                context.session, edge_id)

            for bind in vnic_binds:
                if bind['network_id'] not in networks:
                    self.error_count += 1
                    self.error_info = base_job.housekeeper_warning(
                        'edge vnic binding for edge %s is for invalid '
                        'network id %s', edge_id, bind['network_id'])

                    if not readonly:
                            context.session, edge_id, bind['network_id'])
                        self.fixed_count += 1

            # Step (C)
            # Verify that backend is in sync with Neutron

            # Reread vNic binds as we might deleted some in step (B)
            vnic_binds = nsxv_db.get_edge_vnic_bindings_by_edge(
                context.session, edge_id)

            # Transform to network-keyed dict
            vnic_dict = {
                vnic['network_id']: {
                    'vnic_index': vnic['vnic_index'],
                    'tunnel_index': vnic['tunnel_index']
                for vnic in vnic_binds

            backend_vnics = self.plugin.nsx_v.vcns.get_interfaces(
                edge_id)[1].get('vnics', [])
            if_changed = {}
            self._validate_edge_subinterfaces(context, edge_id, backend_vnics,
                                              vnic_dict, if_changed)
            self._add_missing_subinterfaces(context, edge_id, vnic_binds,
                                            backend_vnics, if_changed,

            if not readonly:
                for vnic in backend_vnics:
                    if if_changed[vnic['index']]:
                        self.plugin.nsx_v.vcns.update_interface(edge_id, vnic)

                    self._update_router_bindings(context, edge_id)

                self.fixed_count += self.fixed_sub_if_count
Exemplo n.º 15
    def run(self, context, readonly=False):
        super(LbaasPendingJob, self).run(context)
        curr_time = time.time()
        error_count = 0
        fixed_count = 0
        error_info = ''

        for model in self.lbaas_models:
            sess = context.session
            elements = sess.query(model).filter(
                    constants.PENDING_CREATE, constants.PENDING_UPDATE,

            for element in elements:
                if element['id'] in self.lbaas_objects:
                    obj = self.lbaas_objects[element['id']]
                    lifetime = curr_time - obj['time_added']
                    if lifetime > ELEMENT_LIFETIME:
                        # Entry has been pending for more than lifetime.
                        # Report and remove when in R/W mode
                        error_count += 1
                        error_info = base_job.housekeeper_warning(
                            'LBaaS %s %s is stuck in pending state',
                            model.NAME, element['id'])

                        if not readonly:
                            element['provisioning_status'] = constants.ERROR
                            fixed_count += 1
                        del self.lbaas_objects[element['id']]
                        # Entry is still pending but haven't reached lifetime
                            'Housekeeping: LBaaS object %s %s in '
                            'PENDING state for %d seconds', model.NAME,
                            element['id'], lifetime)
                        obj['time_seen'] = curr_time
                    # Entry wasn't seen before this iteration - add to dict
                        'Housekeeping: monitoring PENDING state for '
                        'LBaaS object %s %s', model.NAME, element['id'])
                    self.lbaas_objects[element.id] = {
                        'model': model,
                        'time_added': curr_time,
                        'time_seen': curr_time

        # Look for dictionary entries which weren't seen in this iteration.
        # Such entries were either removed from DB or their state was changed.
        for obj_id in self.lbaas_objects.keys():
            if self.lbaas_objects[obj_id]['time_seen'] != curr_time:
                LOG.debug('Housekeeping: LBaaS %s %s is back to normal',
                          self.lbaas_objects[obj_id]['model'].NAME, obj_id)
                del self.lbaas_objects[obj_id]

        if error_count == 0:
            error_info = 'No LBaaS objects in pending state'
        return {
            'error_count': error_count,
            'fixed_count': fixed_count,
            'error_info': error_info