Пример #1
0
    def create_security_group(self, knp, project_id):
        sg_name = ("sg-" + knp['metadata']['namespace'] + "-" +
                   knp['metadata']['name'])
        desc = ("Kuryr-Kubernetes Network Policy %s SG" %
                utils.get_res_unique_name(knp))
        try:
            # Create initial security group
            sg = self.os_net.create_security_group(name=sg_name,
                                                   project_id=project_id,
                                                   description=desc)
            driver_utils.tag_neutron_resources([sg])
            # NOTE(dulek): Neutron populates every new SG with two rules
            #              allowing egress on IPv4 and IPv6. This collides with
            #              how network policies are supposed to work, because
            #              initially even egress traffic should be blocked.
            #              To work around this we will delete those two SG
            #              rules just after creation.
            for sgr in sg.security_group_rules:
                self.os_net.delete_security_group_rule(sgr['id'])
        except (os_exc.SDKException, exceptions.ResourceNotReady) as exc:
            np = utils.get_referenced_object(knp, 'NetworkPolicy')
            if np:
                self.kubernetes.add_event(np, 'FailedToAddSecurityGroup',
                                          f'Adding new security group or '
                                          f'security group rules for '
                                          f'corresponding network policy has '
                                          f'failed: {exc}', 'Warning')
            LOG.exception("Error creating security group for network policy "
                          " %s", knp['metadata']['name'])
            raise

        return sg.id
Пример #2
0
    def add(self, params):
        kp_name = self._get_obj_name(params)
        timeout = CONF.cni_daemon.vif_annotation_timeout

        # In order to fight race conditions when pods get recreated with the
        # same name (think StatefulSet), we're trying to get pod UID either
        # from the request or the API in order to use it as the ID to compare.
        if 'K8S_POD_UID' not in params.args:
            # CRI doesn't pass K8S_POD_UID, get it from the API.
            pod = self._get_pod(params)
            if not pod:
                raise exceptions.CNIPodGone(kp_name)
            params.args.K8S_POD_UID = pod['metadata']['uid']

        vifs = self._do_work(params, b_base.connect, timeout)

        # NOTE(dulek): Saving containerid to be able to distinguish old DEL
        #              requests that we should ignore. We need a lock to
        #              prevent race conditions and replace whole object in the
        #              dict for multiprocessing.Manager to notice that.
        with lockutils.lock(kp_name, external=True):
            d = self.registry[kp_name]
            d['containerid'] = params.CNI_CONTAINERID
            self.registry[kp_name] = d
            LOG.debug('Saved containerid = %s for CRD %s',
                      params.CNI_CONTAINERID, kp_name)

        # Wait for timeout sec, 1 sec between tries, retry when even one
        # vif is not active.
        @retrying.retry(stop_max_delay=timeout * 1000,
                        wait_fixed=RETRY_DELAY,
                        retry_on_result=utils.any_vif_inactive)
        def wait_for_active(kp_name):
            return self.registry[kp_name]['vifs']

        data = {
            'metadata': {
                'name': params.args.K8S_POD_NAME,
                'namespace': params.args.K8S_POD_NAMESPACE
            }
        }
        pod = k_utils.get_referenced_object(data, 'Pod')

        try:
            self.k8s.add_event(pod,
                               'CNIWaitingForVIFs',
                               f'Waiting for Neutron ports of {kp_name} to '
                               f'become ACTIVE after binding.',
                               component='kuryr-daemon')
            vifs = wait_for_active(kp_name)
        except retrying.RetryError:
            self.k8s.add_event(
                pod, 'CNITimedOutWaitingForVIFs',
                f'Timed out waiting for Neutron ports of '
                f'{kp_name} to become ACTIVE after binding.', 'Warning',
                'kuryr-daemon')
            raise exceptions.CNINeutronPortActivationTimeout(
                kp_name, self.registry[kp_name]['vifs'])

        return vifs[k_const.DEFAULT_IFNAME]
Пример #3
0
    def _get_vifs_from_registry(self, params, timeout):
        kp_name = self._get_obj_name(params)

        # In case of KeyError retry for `timeout` s, wait 1 s between tries.
        @retrying.retry(stop_max_delay=timeout * 1000,
                        wait_fixed=RETRY_DELAY,
                        retry_on_exception=lambda e: isinstance(
                            e, (KeyError, exceptions.CNIPodUidMismatch)))
        def find():
            d = self.registry[kp_name]
            if d == k_const.CNI_DELETED_POD_SENTINEL:
                # Pod got deleted meanwhile
                raise exceptions.CNIPodGone(kp_name)

            static = d['kp']['spec'].get('podStatic', None)
            uid = d['kp']['spec']['podUid']
            # FIXME(dulek): This is weirdly structured for upgrades support.
            #               If podStatic is not set (KuryrPort created by old
            #               Kuryr version), then on uid mismatch we're fetching
            #               pod from API and check if it's static here. Pods
            #               are quite ephemeral, so will gradually get replaced
            #               after the upgrade and in a while all should have
            #               the field set and the performance penalty should
            #               be resolved. Remove in the future.
            if 'K8S_POD_UID' in params.args and uid != params.args.K8S_POD_UID:
                if static is None:
                    pod = self._get_pod(params)
                    static = k_utils.is_pod_static(pod)

                # Static pods have mirror pod UID in API, so it's always
                # mismatched. We don't raise in that case. See [1] for more.
                # [1] https://github.com/k8snetworkplumbingwg/multus-cni/
                #     issues/773
                if not static:
                    raise exceptions.CNIPodUidMismatch(kp_name,
                                                       params.args.K8S_POD_UID,
                                                       uid)
            return d

        try:
            d = find()
            return d['kp'], d['vifs']
        except KeyError:
            data = {
                'metadata': {
                    'name': params.args.K8S_POD_NAME,
                    'namespace': params.args.K8S_POD_NAMESPACE
                }
            }
            pod = k_utils.get_referenced_object(data, 'Pod')
            self.k8s.add_event(
                pod, 'CNITimeoutKuryrPortRegistry',
                f'Timed out waiting for Neutron ports to be '
                f'created for {kp_name}. Check '
                f'kuryr-controller logs.', 'Warning', 'kuryr-daemon')
            raise exceptions.CNIKuryrPortTimeout(kp_name)
Пример #4
0
def create_security_group_rule(body, knp):
    os_net = clients.get_network_client()
    k8s = clients.get_kubernetes_client()

    try:
        params = dict(body)
        if 'ethertype' in params:
            # NOTE(gryf): in openstacksdk, there is ether_type attribute in
            # the security_group_rule object, in CRD we have 'ethertype'
            # instead, just like it was returned by the neutron client.
            params['ether_type'] = params['ethertype']
            del params['ethertype']
        sgr = os_net.create_security_group_rule(**params)
        return sgr.id
    except os_exc.ConflictException as ex:
        if 'quota' in ex.details.lower():
            np = utils.get_referenced_object(knp, 'NetworkPolicy')
            k8s.add_event(
                np, 'FailedToCreateSecurityGroupRule',
                f'Creating security group rule for corresponding '
                f'Network Policy has failed: {ex}', 'Warning')
            LOG.error("Failed to create security group rule %s: %s", body,
                      ex.details)
            raise
        else:
            LOG.debug(
                "Failed to create already existing security group "
                "rule %s", body)
            # Get existent sg rule id from exception message
            return str(ex).split()[-1][:-1]
    except os_exc.SDKException as exc:
        np = utils.get_referenced_object(knp, 'NetworkPolicy')
        k8s.add_event(
            np, 'FailedToCreateSecurityGroupRule',
            f'Creating security group rule for corresponding '
            f'Network Policy has failed: {exc}', 'Warning')
        LOG.debug("Error creating security group rule")
        raise
Пример #5
0
def delete_security_group_rule(security_group_rule_id, knp):
    os_net = clients.get_network_client()
    k8s = clients.get_kubernetes_client()

    try:
        LOG.debug("Deleting sg rule with ID: %s", security_group_rule_id)
        os_net.delete_security_group_rule(security_group_rule_id)
    except os_exc.SDKException as exc:
        np = utils.get_referenced_object(knp, 'NetworkPolicy')
        k8s.add_event(np, 'FailedToDeleteSecurityGroupRule',
                      f'Deleting security group rule for corresponding '
                      f'Network Policy has failed: {exc}',
                      'Warning')
        LOG.debug("Error deleting security group rule: %s",
                  security_group_rule_id)
        raise
Пример #6
0
    def _do_work(self, params, fn, timeout):
        kp_name = self._get_obj_name(params)

        # In case of KeyError retry for `timeout` s, wait 1 s between tries.
        @retrying.retry(stop_max_delay=timeout * 1000,
                        wait_fixed=RETRY_DELAY,
                        retry_on_exception=lambda e: isinstance(e, KeyError))
        def find():
            return self.registry[kp_name]

        try:
            d = find()
            kp = d['kp']
            vifs = d['vifs']
        except KeyError:
            data = {
                'metadata': {
                    'name': params.args.K8S_POD_NAME,
                    'namespace': params.args.K8S_POD_NAMESPACE
                }
            }
            pod = k_utils.get_referenced_object(data, 'Pod')
            self.k8s.add_event(
                pod, 'CNITimeoutKuryrPortRegistry',
                f'Timed out waiting for Neutron ports to be '
                f'created for {kp_name}. Check '
                f'kuryr-controller logs.', 'Warning')
            raise exceptions.CNIKuryrPortTimeout(kp_name)

        for ifname, vif in vifs.items():
            is_default_gateway = (ifname == k_const.DEFAULT_IFNAME)
            if is_default_gateway:
                # NOTE(ygupta): if this is the default interface, we should
                # use the ifname supplied in the CNI ADD request
                ifname = params.CNI_IFNAME

            fn(vif,
               self._get_inst(kp),
               ifname,
               params.CNI_NETNS,
               report_health=self.report_drivers_health,
               is_default_gateway=is_default_gateway,
               container_id=params.CNI_CONTAINERID)
        return vifs
    def _patch_kuryrnetworkpolicy_crd(self, knp, field, data,
                                      action='replace'):
        name = knp['metadata']['name']
        LOG.debug('Patching KuryrNet CRD %s', name)
        try:
            status = self.k8s.patch_crd(field, utils.get_res_link(knp),
                                        data, action=action)
        except exceptions.K8sResourceNotFound:
            LOG.debug('KuryrNetworkPolicy CRD not found %s', name)
            return None
        except exceptions.K8sClientException as exc:
            np = utils.get_referenced_object(knp, 'NetworkPolicy')
            self.k8s.add_event(np, 'FailedToPatchKuryrNetworkPolicy',
                               f'Failed to update KuryrNetworkPolicy CRD: '
                               f'{exc}', 'Warning')
            LOG.exception('Error updating KuryrNetworkPolicy CRD %s', name)
            raise

        knp['status'] = status
        return knp
    def on_finalize(self, knp, *args, **kwargs):
        LOG.debug("Finalizing KuryrNetworkPolicy %s", knp)
        project_id = self._drv_project.get_project(knp)
        pods_to_update = self._drv_policy.affected_pods(knp)
        crd_sg = knp['status'].get('securityGroupId')
        try:
            policy = self._get_networkpolicy(knp['metadata']['annotations']
                                             ['networkPolicyLink'])
        except exceptions.K8sResourceNotFound:
            # NP is already gone, let's just try to clean up.
            policy = None

        if crd_sg:
            for pod in pods_to_update:
                if (utils.is_host_network(pod)
                        or not driver_utils.is_pod_scheduled(pod)):
                    continue
                pod_sgs = self._drv_pod_sg.get_security_groups(pod, project_id)
                if crd_sg in pod_sgs:
                    pod_sgs.remove(crd_sg)
                if not pod_sgs:
                    pod_sgs = CONF.neutron_defaults.pod_security_groups
                    if not pod_sgs:
                        raise cfg.RequiredOptError(
                            'pod_security_groups',
                            cfg.OptGroup('neutron_defaults'))
                try:
                    self._drv_vif_pool.update_vif_sgs(pod, pod_sgs)
                except os_exc.NotFoundException:
                    # Pod got deleted in the meanwhile, safe to ignore.
                    pass

            # ensure ports at the pool don't have the NP sg associated
            try:
                net_id = self._get_policy_net_id(knp)
                self._drv_vif_pool.remove_sg_from_pools(crd_sg, net_id)
            except exceptions.K8sResourceNotFound:
                # Probably the network got removed already, we can ignore it.
                pass

            try:
                self._drv_policy.delete_np_sg(crd_sg)
            except os_exc.SDKException as exc:
                np = utils.get_referenced_object(knp, 'NetworkPolicy')
                if np:
                    self.k8s.add_event(np, 'FailedToRemoveSecurityGroup',
                                       f'Deleting security group for '
                                       f'corresponding Network Policy has '
                                       f'failed: {exc}', 'Warning')
                    raise

            if (CONF.octavia_defaults.enforce_sg_rules and policy and
                    not self._is_egress_only_policy(policy)):
                services = driver_utils.get_services(
                    knp['metadata']['namespace'])
                for svc in services.get('items'):
                    if (not svc['spec'].get('selector') or not
                            self._is_service_affected(svc, pods_to_update)):
                        continue

                    sgs = self._drv_svc_sg.get_security_groups(svc, project_id)

                    if crd_sg in sgs:
                        # Remove our crd_sg out of service groups since we
                        # don't have it anymore
                        sgs.remove(crd_sg)

                    try:
                        self._drv_lbaas.update_lbaas_sg(svc, sgs)
                    except exceptions.ResourceNotReady:
                        # We can ignore LB that's being created - its SGs will
                        # get handled when members will be getting created.
                        pass

        LOG.debug("Removing finalizers from KuryrNetworkPolicy and "
                  "NetworkPolicy.")
        if policy:
            self.k8s.remove_finalizer(policy,
                                      constants.NETWORKPOLICY_FINALIZER)
        self.k8s.remove_finalizer(knp, constants.NETWORKPOLICY_FINALIZER)
Пример #9
0
    def add(self, params):
        kp_name = self._get_obj_name(params)
        timeout = CONF.cni_daemon.vif_annotation_timeout

        # Try to confirm if CRD in the registry is not stale cache. If it is,
        # remove it.
        with lockutils.lock(kp_name, external=True):
            if kp_name in self.registry:
                cached_kp = self.registry[kp_name]['kp']
                try:
                    kp = self.k8s.get(k_utils.get_res_link(cached_kp))
                except Exception:
                    LOG.exception('Error when getting KuryrPort %s', kp_name)
                    raise exceptions.ResourceNotReady(kp_name)

                if kp['metadata']['uid'] != cached_kp['metadata']['uid']:
                    LOG.warning(
                        'Stale KuryrPort %s detected in cache. (API '
                        'uid=%s, cached uid=%s). Removing it from '
                        'cache.', kp_name, kp['metadata']['uid'],
                        cached_kp['metadata']['uid'])
                    del self.registry[kp_name]

        vifs = self._do_work(params, b_base.connect, timeout)

        # NOTE(dulek): Saving containerid to be able to distinguish old DEL
        #              requests that we should ignore. We need a lock to
        #              prevent race conditions and replace whole object in the
        #              dict for multiprocessing.Manager to notice that.
        with lockutils.lock(kp_name, external=True):
            d = self.registry[kp_name]
            d['containerid'] = params.CNI_CONTAINERID
            self.registry[kp_name] = d
            LOG.debug('Saved containerid = %s for CRD %s',
                      params.CNI_CONTAINERID, kp_name)

        # Wait for timeout sec, 1 sec between tries, retry when even one
        # vif is not active.
        @retrying.retry(stop_max_delay=timeout * 1000,
                        wait_fixed=RETRY_DELAY,
                        retry_on_result=utils.any_vif_inactive)
        def wait_for_active(kp_name):
            return self.registry[kp_name]['vifs']

        data = {
            'metadata': {
                'name': params.args.K8S_POD_NAME,
                'namespace': params.args.K8S_POD_NAMESPACE
            }
        }
        pod = k_utils.get_referenced_object(data, 'Pod')

        try:
            self.k8s.add_event(
                pod, 'CNIWaitingForVIFs',
                f'Waiting for Neutron ports of {kp_name} to '
                f'become ACTIVE after binding.')
            vifs = wait_for_active(kp_name)
        except retrying.RetryError:
            self.k8s.add_event(
                pod, 'CNITimedOutWaitingForVIFs',
                f'Timed out waiting for Neutron ports of '
                f'{kp_name} to become ACTIVE after binding.', 'Warning')
            raise exceptions.CNINeutronPortActivationTimeout(
                kp_name, self.registry[kp_name]['vifs'])

        return vifs[k_const.DEFAULT_IFNAME]