Exemplo n.º 1
0
class CniPlugin(object):
    """
    Class which encapsulates the function of a CNI plugin.
    """
    def __init__(self, network_config, env):
        self._client = DatastoreClient()
        """
        DatastoreClient for access to the Calico datastore.
        """

        # Parse CNI_ARGS into dictionary so we can extract values.
        cni_args = parse_cni_args(env.get(CNI_ARGS_ENV, ""))

        self.k8s_pod_name = cni_args.get(K8S_POD_NAME)
        """
        Name of Kubernetes pod if running under Kubernetes, else None.
        """

        self.k8s_namespace = cni_args.get(K8S_POD_NAMESPACE)
        """
        Name of Kubernetes namespace if running under Kubernetes, else None.
        """

        self.network_config = network_config
        """
        Network config as provided in the CNI network file passed in
        via stdout.
        """

        self.network_name = network_config["name"]
        """
        Name of the network from the provided network config file.
        """

        self.ipam_type = network_config["ipam"]["type"]
        """
        Type of IPAM to use, e.g calico-ipam.
        """

        self.hostname = network_config.get("hostname", socket.gethostname())
        """
        The hostname to register endpoints under.
        """

        self.container_engine = get_container_engine(self.k8s_pod_name)
        """
        Chooses the correct container engine based on the given configuration.
        """

        self.ipam_env = env
        """
        Environment dictionary used when calling the IPAM plugin.
        """

        self.command = env[CNI_COMMAND_ENV]
        assert self.command in [CNI_CMD_DELETE, CNI_CMD_ADD], \
                "Invalid CNI command %s" % self.command
        """
        The command to execute for this plugin instance. Required.
        One of:
          - CNI_CMD_ADD
          - CNI_CMD_DELETE
        """

        self.container_id = env[CNI_CONTAINERID_ENV]
        """
        The container's ID in the containerizer. Required.
        """

        self.cni_netns = env[CNI_NETNS_ENV]
        """
        Relative path to the network namespace of this container.
        """

        self.interface = env[CNI_IFNAME_ENV]
        """
        Name of the interface to create within the container.
        """

        self.cni_path = env[CNI_PATH_ENV]
        """
        Path in which to search for CNI plugins.
        """

        self.running_under_k8s = self.k8s_namespace and self.k8s_pod_name
        if self.running_under_k8s:
            self.workload_id = "%s.%s" % (self.k8s_namespace,
                                          self.k8s_pod_name)
            self.orchestrator_id = "k8s"
        else:
            self.workload_id = self.container_id
            self.orchestrator_id = "cni"
        kubernetes_config = network_config.get("kubernetes", {})
        self.kubeconfig_path = kubernetes_config.get("kubeconfig")
        self.k8s_node_name = kubernetes_config.get("node_name",
                                                   socket.gethostname())
        """
        Configure orchestrator specific settings.
        workload_id: In Kubernetes, this is the pod's namespace and name.
                     Otherwise, this is the container ID.
        orchestrator_id: Either "k8s" or "cni".
        """

        # Ensure that the ipam_env CNI_ARGS contains the IgnoreUnknown=1 option
        # See https://github.com/appc/cni/pull/158
        # And https://github.com/appc/cni/pull/127
        self.ipam_env[CNI_ARGS_ENV] = 'IgnoreUnknown=1'
        if env.get(CNI_ARGS_ENV):
            # Append any existing args - if they are set.
            self.ipam_env[CNI_ARGS_ENV] += ";%s" % env.get(CNI_ARGS_ENV)

        self.policy_driver = get_policy_driver(self)
        """
        Chooses the correct policy driver based on the given configuration
        """

    def execute(self):
        """
        Execute the CNI plugin - uses the given CNI_COMMAND to determine
        which action to take.

        :return: None.
        """
        if self.command == CNI_CMD_ADD:
            self.add()
        else:
            self.delete()

    def add(self):
        """"Handles CNI_CMD_ADD requests.

        Configures Calico networking and prints required json to stdout.

        In CNI, a container can be added to multiple networks, in which case
        the CNI plugin will be called multiple times.  In Calico, each network
        is represented by a profile, and each container only receives a single
        endpoint / veth / IP address even when it is on multiple CNI networks.

        :return: None.
        """
        # If this container uses host networking, don't network it.
        # This should only be hit when running in Kubernetes mode with
        # docker - rkt doesn't call plugins when using host networking.
        if self.container_engine.uses_host_networking(self.container_id):
            _log.info(
                "Cannot network container %s since it is configured "
                "with host networking.", self.container_id)
            sys.exit(0)

        _log.info("Configuring network '%s' for container: %s",
                  self.network_name, self.container_id)

        _log.debug("Checking for existing Calico endpoint")
        endpoint = self._get_endpoint()
        if endpoint and not self.running_under_k8s:
            # We've received a create for an existing container, likely on
            # a new CNI network.  We don't need to configure the veth or
            # assign IP addresses, we simply need to add to the new
            # CNI network.  Kubernetes handles this case
            # differently (see below).
            _log.info("Endpoint for container exists - add to new network")
            output = self._add_existing_endpoint(endpoint)
        elif endpoint and self.running_under_k8s:
            # Running under Kubernetes and we've received a create for
            # an existing workload.  Kubernetes only supports a single CNI
            # network, which means that the old pod has been destroyed
            # under our feet and we need to set up networking on the new one.
            # We should also clean up any stale endpoint / IP assignment.
            _log.info("Kubernetes pod has been recreated")
            self._remove_stale_endpoint(endpoint)

            # Release any previous IP addresses assigned to this workload.
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)

            # Clean up any profiles for the stale endpoint
            self.policy_driver.remove_profile()

            # Configure the new workload.
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_ADD
            output = self._add_new_endpoint()
        else:
            # No endpoint exists - we need to configure a new one.
            _log.info("No endpoint exists for workload - creating")
            output = self._add_new_endpoint()

        # If all successful, print the IPAM plugin's output to stdout.
        dump = json.dumps(output)
        _log.debug("Printing CNI result to stdout: %s", dump)
        print(dump)

        _log.info("Finished networking container: %s", self.container_id)

    def _add_new_endpoint(self):
        """
        Handled adding a new container to a Calico network.
        """
        # Assign IP addresses using the given IPAM plugin.
        _log.info("Configuring a new Endpoint")
        ipv4, ipv6, ipam_result = self._assign_ips(self.ipam_env)

        # Filter out addresses that didn't get assigned.
        ip_list = [ip for ip in [ipv4, ipv6] if ip is not None]

        # Create the Calico endpoint object.
        endpoint = self._create_endpoint(ip_list)

        # Provision the veth for this endpoint.
        endpoint = self._provision_veth(endpoint)

        # Provision / apply profile on the created endpoint.
        try:
            self.policy_driver.apply_profile(endpoint)
        except PolicyException as e:
            _log.error("Failed to apply profile to endpoint %s", endpoint.name)
            self._remove_veth(endpoint)
            self._remove_workload()
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)
            print_cni_error(ERR_CODE_GENERIC, e.message, e.details)
            sys.exit(ERR_CODE_GENERIC)

        # Return the IPAM plugin's result.
        return ipam_result

    def _add_existing_endpoint(self, endpoint):
        """
        Handles adding an existing container to a new Calico network.

        We've already assigned an IP address and created the veth,
        we just need to apply a new profile to this endpoint.
        """
        # Get the already existing IP information for this Endpoint.
        try:
            ip4 = next(iter(endpoint.ipv4_nets))
        except StopIteration:
            # No IPv4 address on this endpoint.
            _log.warning("No IPV4 address attached to existing endpoint")
            ip4 = IPNetwork("0.0.0.0/32")

        try:
            ip6 = next(iter(endpoint.ipv6_nets))
        except StopIteration:
            # No IPv6 address on this endpoint.
            _log.warning("No IPV6 address attached to existing endpoint")
            ip6 = IPNetwork("::/128")

        # Apply a new profile to this endpoint.
        try:
            self.policy_driver.apply_profile(endpoint)
        except PolicyException as e:
            # Hit an exception applying the profile.  We haven't configured
            # anything, so we don't need to clean anything up.  Just exit.
            _log.error("Failed to apply profile to endpoint %s", endpoint.name)
            print_cni_error(ERR_CODE_GENERIC, e.message)
            sys.exit(ERR_CODE_GENERIC)

        return {"ip4": {"ip": str(ip4.cidr)}, "ip6": {"ip": str(ip6.cidr)}}

    def delete(self):
        """Handles CNI_CMD_DELETE requests.

        Remove this container from Calico networking.

        :return: None.
        """
        _log.info("Remove network '%s' from container: %s", self.network_name,
                  self.container_id)

        # Step 1: Remove any IP assignments.
        self._release_ip(self.ipam_env)

        # Step 2: Get the Calico endpoint for this workload. If it does not
        # exist, log a warning and exit successfully.
        endpoint = self._get_endpoint()
        if not endpoint:
            _log.warning("No Calico Endpoint for workload: %s",
                         self.workload_id)
            sys.exit(0)

        # Step 3: Delete the veth interface for this endpoint.
        self._remove_veth(endpoint)

        # Step 4: Delete the Calico workload.
        self._remove_workload()

        # Step 5: Delete any profiles for this endpoint
        self.policy_driver.remove_profile()

        _log.info("Finished removing container: %s", self.container_id)

    def _assign_ips(self, env):
        """Assigns and returns an IPv4 address using the IPAM plugin
        specified in the network config file.

        :return: ipv4, ipv6 - The IP addresses assigned by the IPAM plugin.
        """
        # Call the IPAM plugin.  Returns the plugin returncode,
        # as well as the CNI result from stdout.
        _log.debug("Assigning IP address")
        assert env[CNI_COMMAND_ENV] == CNI_CMD_ADD
        rc, result = self._call_ipam_plugin(env)

        try:
            # Load the response - either the assigned IP addresses or
            # a CNI error message.
            ipam_result = json.loads(result)
        except ValueError:
            message = "Failed to parse IPAM response, exiting"
            _log.exception(message)
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        if rc:
            # The IPAM plugin failed to assign an IP address. At this point in
            # execution, we haven't done anything yet, so we don't have to
            # clean up.
            _log.error("IPAM plugin error (rc=%s): %s", rc, result)
            code = ipam_result.get("code", ERR_CODE_GENERIC)
            msg = ipam_result.get("msg", "Unknown IPAM error")
            details = ipam_result.get("details")
            print_cni_error(code, msg, details)
            sys.exit(int(code))

        try:
            ipv4 = IPNetwork(ipam_result["ip4"]["ip"])
            _log.info("IPAM plugin assigned IPv4 address: %s", ipv4)
        except KeyError:
            ipv4 = None
        except (AddrFormatError, ValueError):
            message = "Invalid or Empty IPv4 address: %s" % \
                      (ipam_result["ip4"]["ip"])
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        try:
            ipv6 = IPNetwork(ipam_result["ip6"]["ip"])
            _log.info("IPAM plugin assigned IPv6 address: %s", ipv6)
        except KeyError:
            ipv6 = None
        except (AddrFormatError, ValueError):
            message = "Invalid or Empty IPv6 address: %s" % \
                      (ipam_result["ip6"]["ip"])
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        if not ipv4 and not ipv6:
            message = "IPAM plugin did not return any valid addresses."
            _log.warning("Bad IPAM plugin response: %s", ipam_result)
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        return ipv4, ipv6, ipam_result

    def _release_ip(self, env):
        """Releases the IP address(es) for this container using the IPAM plugin
        specified in the network config file.

        :param env - A dictionary of environment variables to pass to the
        IPAM plugin
        :return: None.
        """
        _log.info("Releasing IP address")
        assert env[CNI_COMMAND_ENV] == CNI_CMD_DELETE
        rc, _ = self._call_ipam_plugin(env)

        if rc:
            _log.error("IPAM plugin failed to release IP address")

    def _call_ipam_plugin(self, env):
        """
        Executes a CNI IPAM plugin.  If `calico-ipam` is the provided IPAM
        type, then calls directly into ipam.py as a performance optimization.

        For all other types of IPAM, searches the CNI_PATH for the
        correct binary and executes it.

        :return: Tuple of return code, response from the IPAM plugin.
        """
        if self.ipam_type == "calico-ipam":
            _log.info("Using Calico IPAM")
            try:
                response = IpamPlugin(env,
                                      self.network_config["ipam"]).execute()
                code = 0
            except CniError as e:
                # We hit a CNI error - return the appropriate CNI formatted
                # error dictionary.
                response = json.dumps({
                    "code": e.code,
                    "msg": e.msg,
                    "details": e.details
                })
                code = e.code
        elif self.ipam_type == "host-local":
            # We've been told to use the "host-local" IPAM plugin.
            # Check if we need to use the Kubernetes podCidr for this node, and
            # if so replace the subnet field with the correct value.
            if self.network_config["ipam"].get("subnet") == "usePodCidr":
                if not self.running_under_k8s:
                    print_cni_error(
                        ERR_CODE_GENERIC, "Invalid network config",
                        "Must be running under Kubernetes to use 'subnet: usePodCidr'"
                    )
                    sys.exit(ERR_CODE_GENERIC)
                _log.info("Using Kubernetes podCIDR for node: %s",
                          self.k8s_node_name)
                pod_cidr = self._get_kubernetes_pod_cidr()
                self.network_config["ipam"]["subnet"] = str(pod_cidr)

            # Call the IPAM plugin.
            _log.debug("Calling host-local IPAM plugin")
            code, response = self._call_binary_ipam_plugin(env)
        else:
            # Using some other IPAM plugin - call it.
            _log.debug("Using binary plugin")
            code, response = self._call_binary_ipam_plugin(env)

        # Return the IPAM return code and output.
        _log.debug("IPAM response (rc=%s): %s", code, response)
        return code, response

    def _get_kubernetes_pod_cidr(self):
        """
        Attempt to get the Kubernetes pod CIDR for this node.
        First check if we've written it to disk.  If so, use that value.  If
        not, then query the Kubernetes API for it.
        """
        _log.info("Getting node.spec.podCidr from API, kubeconfig: %s",
                  self.kubeconfig_path)
        if not self.kubeconfig_path:
            # For now, kubeconfig is the only supported auth method.
            print_cni_error(
                ERR_CODE_GENERIC, "Missing kubeconfig",
                "usePodCidr requires specification of kubeconfig file")
            sys.exit(ERR_CODE_GENERIC)

        # Query the API for this node.  Default node name to the hostname.
        try:
            api = HTTPClient(KubeConfig.from_file(self.kubeconfig_path))
            node = None
            for n in Node.objects(api):
                _log.debug("Checking node: %s", n.obj["metadata"]["name"])
                if n.obj["metadata"]["name"] == self.k8s_node_name:
                    node = n
                    break
            if not node:
                raise KeyError("Unable to find node in API: %s",
                               self.k8s_node_name)
            _log.debug("Found node %s: %s: ", node.obj["metadata"]["name"],
                       node.obj["spec"])
        except Exception:
            print_cni_error(ERR_CODE_GENERIC, "Error querying Kubernetes API",
                            "Failed to get podCidr from Kubernetes API")
            sys.exit(ERR_CODE_GENERIC)
        else:
            pod_cidr = node.obj["spec"].get("podCIDR")
            if not pod_cidr:
                print_cni_error(ERR_CODE_GENERIC, "Missing podCidr",
                                "No podCidr for node %s" % self.k8s_node_name)
                sys.exit(ERR_CODE_GENERIC)
        _log.debug("Using podCidr: %s", pod_cidr)
        return pod_cidr

    def _call_binary_ipam_plugin(self, env):
        """Calls through to the specified IPAM plugin binary.

        Utilizes the IPAM config as specified in the CNI network
        configuration file.  A dictionary with the following form:
            {
              type: <IPAM TYPE>
            }

        :param env - A dictionary of environment variables to pass to the
        IPAM plugin
        :return: Tuple of return code, response from the IPAM plugin.
        """
        # Find the correct plugin based on the given type.
        plugin_path = self._find_ipam_plugin()
        if not plugin_path:
            message = "Could not find IPAM plugin of type %s in path %s." % \
                      (self.ipam_type, self.cni_path)
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        # Execute the plugin and return the result.
        _log.info("Using IPAM plugin at: %s", plugin_path)
        _log.debug("Passing in environment to IPAM plugin: \n%s",
                   json.dumps(env, indent=2))
        p = Popen(plugin_path, stdin=PIPE, stdout=PIPE, stderr=PIPE, env=env)
        stdout, stderr = p.communicate(json.dumps(self.network_config))
        _log.debug("IPAM plugin return code: %s", p.returncode)
        _log.debug("IPAM plugin output: \nstdout:\n%s\nstderr:\n%s", stdout,
                   stderr)
        return p.returncode, stdout

    def _create_endpoint(self, ip_list):
        """Creates an endpoint in the Calico datastore with the client.

        :param ip_list - list of IP addresses that have been already allocated
        :return Calico endpoint object
        """
        _log.debug("Creating Calico endpoint with workload_id=%s",
                   self.workload_id)
        try:
            endpoint = self._client.create_endpoint(self.hostname,
                                                    self.orchestrator_id,
                                                    self.workload_id, ip_list)
        except (AddrFormatError, KeyError) as e:
            # AddrFormatError: Raised when an IP address type is not
            #                  compatible with the node.
            # KeyError: Raised when BGP config for host is not found.
            _log.exception("Failed to create Calico endpoint.")
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)
            print_cni_error(ERR_CODE_GENERIC, e.message)
            sys.exit(ERR_CODE_GENERIC)

        _log.info("Created Calico endpoint with IP address(es) %s", ip_list)
        return endpoint

    def _remove_stale_endpoint(self, endpoint):
        """
        Removes the given endpoint from Calico.
        Called when we discover a stale endpoint that is no longer in use.
        Note that this doesn't release IP allocations - that must be done
        using the designated IPAM plugin.
        """
        _log.info("Removing stale Calico endpoint '%s'", endpoint)
        try:
            self._client.remove_endpoint(endpoint)
        except KeyError:
            # Shouldn't hit this since we know the workload exists.
            _log.info("Error removing stale endpoint, ignoring")

    def _remove_workload(self):
        """Removes the given endpoint from the Calico datastore

        :return: None
        """
        try:
            _log.info("Removing Calico workload '%s'", self.workload_id)
            self._client.remove_workload(hostname=self.hostname,
                                         orchestrator_id=self.orchestrator_id,
                                         workload_id=self.workload_id)
        except KeyError:
            # Attempt to remove the workload using the container ID as the
            # workload ID.  Earlier releases of the plugin used the
            # container ID for the workload ID rather than the Kubernetes pod
            # name and namespace.
            _log.debug("Could not find workload with workload ID %s.",
                       self.workload_id)
            try:
                self._client.remove_workload(hostname=self.hostname,
                                             orchestrator_id="cni",
                                             workload_id=self.container_id)
            except KeyError:
                _log.warning("Could not find workload with container ID %s.",
                             self.container_id)

    def _provision_veth(self, endpoint):
        """Provisions veth for given endpoint.

        Uses the netns relative path passed in through CNI_NETNS_ENV and
        interface passed in through CNI_IFNAME_ENV.

        :param endpoint
        :return Calico endpoint object
        """
        _log.debug("Provisioning Calico veth interface")
        netns_path = os.path.abspath(os.path.join(os.getcwd(), self.cni_netns))
        _log.debug("netns path: %s", netns_path)

        try:
            endpoint.mac = endpoint.provision_veth(Namespace(netns_path),
                                                   self.interface)
        except CalledProcessError as e:
            _log.exception(
                "Failed to provision veth interface for endpoint %s",
                endpoint.name)
            self._remove_workload()
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)
            print_cni_error(ERR_CODE_GENERIC, e.message)
            sys.exit(ERR_CODE_GENERIC)

        _log.debug("Endpoint has mac address: %s", endpoint.mac)

        self._client.set_endpoint(endpoint)
        _log.info("Provisioned %s in netns %s", self.interface, netns_path)
        return endpoint

    def _remove_veth(self, endpoint):
        """Remove the veth from given endpoint.

        Handles any errors encountered while removing the endpoint.
        """
        _log.info("Removing veth for endpoint: %s", endpoint.name)
        try:
            removed = netns.remove_veth(endpoint.name)
            _log.debug("Successfully removed endpoint %s? %s", endpoint.name,
                       removed)
        except CalledProcessError:
            _log.warning("Unable to remove veth %s", endpoint.name)

    @handle_datastore_error
    def _get_endpoint(self):
        """Get endpoint matching self.workload_id.

        If we cannot find an endpoint using self.workload_id, try
        using self.container_id.

        Return None if no endpoint is found.
        Exits with an error if multiple endpoints are found.

        :return: Endpoint object if found, None if not found
        """
        try:
            _log.debug("Looking for endpoint that matches workload ID %s",
                       self.workload_id)
            endpoint = self._client.get_endpoint(
                hostname=self.hostname,
                orchestrator_id=self.orchestrator_id,
                workload_id=self.workload_id)
        except KeyError:
            # Try to find using the container ID.  In earlier version of the
            # plugin, the container ID was used as the workload ID.
            _log.debug("No endpoint found matching workload ID %s",
                       self.workload_id)
            try:
                endpoint = self._client.get_endpoint(
                    hostname=self.hostname,
                    orchestrator_id="cni",
                    workload_id=self.container_id)
            except KeyError:
                # We were unable to find an endpoint using either the
                # workload ID or the container ID.
                _log.debug("No endpoint found matching container ID %s",
                           self.container_id)
                endpoint = None
        except MultipleEndpointsMatch:
            message = "Multiple Endpoints found matching ID %s" % \
                    self.workload_id
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        return endpoint

    def _find_ipam_plugin(self):
        """Locates IPAM plugin binary in plugin path and returns absolute path
        of plugin if found; if not found returns an empty string.

        IPAM plugin type is set in the network config file.
        The plugin path is the CNI path passed through the environment variable
        CNI_PATH.

        :rtype : str
        :return: plugin_path - absolute path of IPAM plugin binary
        """
        plugin_type = self.ipam_type
        plugin_path = ""
        for path in self.cni_path.split(":"):
            _log.debug("Looking for plugin %s in path %s", plugin_type, path)
            temp_path = os.path.abspath(os.path.join(path, plugin_type))
            if os.path.isfile(temp_path):
                _log.debug("Found plugin %s in path %s", plugin_type, path)
                plugin_path = temp_path
                break
        return str(plugin_path)
Exemplo n.º 2
0
class PolicyAgent():
    def __init__(self):
        self._event_queue = Queue.Queue()
        """
        Queue to populate with events from API watches.
        """

        self.k8s_api = os.environ.get("K8S_API", "https://10.100.0.1:443")
        """
        Scheme, IP and port of the Kubernetes API.
        """

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN")
        """
        Auth token to use when accessing the API.
        """

        path = NET_POLICY_WATCH_PATH % self.k8s_api
        self._network_policy_thread = Thread(target=self._watch_api, 
                                             args=(path,))
        self._network_policy_thread.daemon = True
        """
        Thread which performs watch of Kubernetes API for changes to 
        NetworkPolicy objects.
        """

        path = NAMESPACE_WATCH_PATH % self.k8s_api
        self._namespace_thread = Thread(target=self._watch_api, 
                                             args=(path,))
        self._namespace_thread.daemon = True
        """
        Thread which performs watch of Kubernetes API for changes to 
        Namespace objects.
        """

        path = POD_WATCH_PATH % self.k8s_api
        self._pod_thread = Thread(target=self._watch_api, 
                                  args=(path,))
        self._pod_thread.daemon = True
        """
        Thread which performs watch of Kubernetes API for changes to 
        Pod objects.
        """

        self._client = DatastoreClient()
        """
        Client for accessing the Calico datastore.
        """

        self._network_policies = {}
        self._namespaces = {}
        self._pods = {}
        """
        Store internal state.
        """

    def run(self):
        """
        PolicyAgent.run() is called at program init to spawn watch threads,
        Loops to read responses from the _watcher Queue as they come in.
        """
        # Start threads to watch Kubernetes API. 
        _log.info("Starting API watch on: NetworkPolicy, Pod, Namespace")
        self._network_policy_thread.start()
        self._namespace_thread.start()
        self._pod_thread.start()

        # Loop and read updates from the queue.
        _log.info("Reading from event queue")
        self.read_updates()

    def read_updates(self):
        """
        Reads from the update queue.
        """
        update = None

        while True:
            try:
                # There may be an update already, since we do a blocking get
                # in the `except Queue.Empty` block.  If we have an update, 
                # just process it before trying to read from the queue again.
                if not update:
                    _log.info("Non-blocking read from event queue")
                    update = self._event_queue.get(block=False)
                    self._event_queue.task_done()

                # We've recieved an update - process it.
                _log.debug("Read update from queue: %s", json.dumps(update, indent=2))
                self._process_update(update)
                update = None
            except Queue.Empty:
                _log.info("Queue empty, waiting for updates")
                update = self._event_queue.get(block=True)
            except KeyError:
                # We'll hit this if we fail to parse an invalid update.
                # Set update = None so we don't continue parsing the 
                # invalid update.
                _log.exception("Invalid update: %s", update)
                update = None
                time.sleep(10)

    def _process_update(self, update):
        """
        Takes an update from the queue and updates our state accordingly.
        """
        # Parse out the type of update and resource.
        update_type = update["type"]
        resource_type = update["object"]["kind"] 
        _log.info("Processing '%s' for kind '%s'", update_type, resource_type) 

        # Determine the key for this object.
        if resource_type == RESOURCE_TYPE_NAMESPACE:
            # Namespaces are just keyed off of their name.
            name = update["object"]["metadata"]["name"]
            key = (name,)
        else:
            # Objects are keyed off their name and namespace.
            name = update["object"]["metadata"]["name"]
            namespace = update["object"]["metadata"]["namespace"]
            key = (namespace, name)

        if resource_type == RESOURCE_TYPE_NETWORK_POLICY:
            # NetworkPolicy objects correspond directly to Calico
            # profiles - create, delete or update the corresponding Calico 
            # profile for each NetworkPolicy update. 
            if update_type in [TYPE_ADDED, TYPE_MODIFIED]:
                # Add or update network policy.
                self._add_new_network_policy(key, update)
            else:
                # Delete an existing network policy.
                assert update_type == TYPE_DELETED
                try:
                    self._delete_network_policy(key, update)
                except KeyError:
                    _log.warning("Delete for unknown network policy: %s", key)
        elif resource_type == RESOURCE_TYPE_NAMESPACE:
            # Namespaces correspond directly to Calico profiles. 
            if update_type in [TYPE_ADDED, TYPE_MODIFIED]:
                # Add or update network policy.
                self._add_new_namespace(key, update)
            else:
                # Delete an existing network policy.
                assert update_type == TYPE_DELETED
                try:
                    self._delete_namespace(key, update)
                except KeyError:
                    _log.warning("Delete for unknown namespace: %s", key)
        elif resource_type == RESOURCE_TYPE_POD:
            # Pods have policy applied to them using Namespaces and
            # NetworkPolicy objects.  We must update the corresponding 
            # endpoints in the Calico datastore to have the correct 
            # labels applied.
            if update_type in [TYPE_ADDED, TYPE_MODIFIED]:
                # Add or update pod.
                self._add_update_pod(key, update)
            else:
                assert update_type == TYPE_DELETED
                try:
                    self._delete_pod(key, update)
                except KeyError:
                    _log.warning("Delete for unknown pod: %s", key)

    def _add_new_network_policy(self, key, policy):
        """
        Takes a new network policy from the Kubernetes API and 
        creates the corresponding Calico policy configuration.
        """
        _log.info("Adding new network policy: %s", key)
        self._network_policies[key] = policy

        # Parse this network policy so we can convert it to the appropriate
        # Calico policy.  First, get the selector from the API object.
        k8s_selector = policy["object"]["spec"]["podSelector"]

        # Build the appropriate Calico label selector.  This is done using 
        # the labels provided in the NetworkPolicy, as well as the 
        # NetworkPolicy's namespace.
        namespace = policy["object"]["metadata"]["namespace"]
        selectors = ["%s == '%s'" % (k,v) for k,v in k8s_selector.iteritems()]
        selectors += ["%s == '%s'" % (K8S_NAMESPACE_LABEL, namespace)]
        selector = " && ".join(selectors)

        # Determine the name for this global policy.
        name = "net_policy-%s" % policy["object"]["metadata"]["name"]

        # Build the Calico rules.
        try:
            inbound_rules = self._calculate_inbound_rules(policy)
        except Exception:
            # It is possible bad rules will be passed - we don't want to 
            # crash the agent, but we do want to indicate a problem in the
            # logs, so that the policy can be fixed.
            _log.exception("Error parsing policy: %s", 
                           json.dumps(policy, indent=2))
            return
        else:
            rules =  Rules(id=name,
                           inbound_rules=inbound_rules,
                           outbound_rules=[Rule(action="allow")])

        # Create the network policy using the calculated selector and rules.
        self._client.create_global_policy(NET_POL_GROUP_NAME, name, selector, rules)
        _log.info("Updated global policy '%s' for NetworkPolicy %s", name, key)

    def _delete_network_policy(self, key, policy):
        """
        Takes a deleted network policy and removes the corresponding
        configuration from the Calico datastore.
        """
        _log.info("Deleting network policy: %s", key)

        # Delete from internal dict.
        del self._network_policies[key]

        # Determine the name for this global policy.
        name = "net_policy-%s" % policy["object"]["metadata"]["name"]

        # Delete the corresponding Calico policy 
        self._client.remove_global_policy(NET_POL_GROUP_NAME, name)

    def _calculate_inbound_rules(self, policy):
        """
        Takes a NetworkPolicy object from the API and returns a list of 
        Calico Rules objects which should be applied on ingress.
        """
        # Store the rules to return.
        rules = []

        # Iterate through each inbound rule and create the appropriate
        # rules.
        allow_incomings = policy["object"]["spec"]["inbound"]
        for r in allow_incomings:
            # Determine the destination ports to allow.  If no ports are
            # specified, allow all port / protocol combinations.
            ports_by_protocol = {}
            for to_port in r.get("ports", []):
                # Keep a dict of ports exposed, keyed by protocol.
                protocol = to_port.get("protocol")
                port = to_port.get("port")
                ports = ports_by_protocol.setdefault(protocol, [])
                if port:
                    _log.debug("Allow to port: %s/%s", protocol, port)
                    ports.append(port)

            # Convert into arguments to be passed to a Rule object.
            to_args = []
            for protocol, ports in ports_by_protocol.iteritems():
                arg = {"protocol": protocol.lower()}
                if ports:
                    arg["dst_ports"] = ports
                to_args.append(arg)

            if not to_args:
                # There are not destination protocols / ports specified.
                # Allow to all protocols and ports.
                to_args = [{}]

            # Determine the from criteria.  If no "from" block is specified,
            # then we should allow from all sources.
            from_args = []
            for from_clause in r.get("from", []):
                pod_selector = from_clause.get("pods", {})
                namespaces = from_clause.get("namespaces", {})
                if pod_selector:
                    # There is a pod selector in this "from" clause.
                    _log.debug("Allow from pods: %s", pod_selector)
                    selectors = ["%s == '%s'" % (k,v) for k,v in pod_selector.iteritems()]
                    selector = " && ".join(selectors)
                    from_args.append({"src_selector": selector})
                elif namespaces:
                    _log.warning("'from: {namespaces: {}}' is not yet "
                                 "supported - ignoring %s", from_clause)

            if not from_args:
                # There are no match criteria specified.  We should allow
                # from all sources to the given ports.
                from_args = [{}]

            # A rule per-protocol, per-from-clause.
            for to_arg in to_args: 
                for from_arg in from_args:
                    # Create a rule by combining a 'from' argument with
                    # the protocol / ports arguments.
                    from_arg.update(to_arg)
                    from_arg.update({"action": "allow"})
                    rules.append(Rule(**from_arg))

        _log.debug("Calculated rules: %s", rules)
        return rules

    def _add_new_namespace(self, key, namespace):
        """
        Takes a new namespace from the Kubernetes API and 
        creates the corresponding Calico policy configuration.
        """
        _log.info("Adding new namespace: %s", key)

        # Store the namespace.
        self._namespaces[key] = namespace 

        # Determine the type of network-isolation specified by this namespace.
        # This defaults to no isolation.
        annotations = namespace["object"]["metadata"].get("annotations", {})
        _log.debug("Namespace %s has annotations: %s", key, annotations)
        net_isolation = annotations.get(NS_POLICY_ANNOTATION, "no") == "yes"
        _log.info("Namespace %s has: network-isolation=%s", key, net_isolation)

        # Determine the policy name to create.
        namespace_name = namespace["object"]["metadata"]["name"]
        policy_name = "k8s_ns-%s" % namespace_name

        # Determine the rules to use.
        outbound_rules = [Rule(action="allow")]
        if net_isolation:
            inbound_rules = [Rule(action="deny")]
        else:
            inbound_rules = [Rule(action="allow")]
        rules = Rules(id=policy_name,
                      inbound_rules=inbound_rules,
                      outbound_rules=outbound_rules)

        # Create the Calico policy to represent this namespace, or 
        # update it if it already exists.  Namespace policies select each
        # pod within that namespace.
        selector = "%s == '%s'" % (K8S_NAMESPACE_LABEL, namespace_name) 
        self._client.create_global_policy(NAMESPACE_GROUP_NAME, policy_name, 
                                          selector, rules=rules)
        _log.info("Created/updated global policy for namespace %s", 
                  namespace_name)

    def _delete_namespace(self, key, namespace):
        """
        Takes a deleted namespace and removes the corresponding
        configuration from the Calico datastore.
        """
        _log.info("Deleting namespace: %s", key)

        # Delete the Calico policy which represnets this namespace.
        # We need to make sure that there are no pods running 
        # in this namespace first.
        namespace_name = namespace["object"]["metadata"]["name"]
        policy_name = "k8s_ns-%s" % namespace_name
        self._client.remove_global_policy(NAMESPACE_GROUP_NAME, policy_name)

        # Delete from internal dict.
        del self._namespaces[key]

    def _add_update_pod(self, key, pod):
        """
        Takes a new or updated pod from the Kubernetes API and 
        creates the corresponding Calico configuration.
        """
        _log.info("Adding new pod: %s", key)

        # Store the latest version of the API Pod.
        self._pods[key] = pod 

        # Get the Calico endpoint.  This may or may not have already been 
        # created by the CNI plugin.  If it hasn't been created, we need to 
        # wait until is has before we can do any meaningful work.
        workload_id = "%s.%s" % (pod["object"]["metadata"]["namespace"],
                                 pod["object"]["metadata"]["name"])
        try:
            _log.debug("Looking for endpoint that matches workload_id=%s",
                       workload_id)
            endpoint = self._client.get_endpoint(
                orchestrator_id="cni",
                workload_id=workload_id
            )
        except KeyError:
            # We don't need to do anything special here, just return.
            # We'll receive another update when the Pod enters running state.
            _log.warn("No endpoint for '%s', wait until running", workload_id)
            return
        except MultipleEndpointsMatch:
            # We should never have multiple endpoints with the same
            # workload_id.  This could theoretically occur if the Calico
            # datastore is out-of-sync with what pods actually exist, but 
            # this is an error state and indicates a problem elsewhere.
            _log.error("Multiple Endpoints found matching ID %s", workload_id)
            sys.exit(1)

        # Get Kubernetes labels.
        labels = pod["object"]["metadata"].get("labels", {}) 
        _log.debug("Pod '%s' has labels: %s", key, labels)

        # Add a special label for the Kubernetes namespace.
        labels[K8S_NAMESPACE_LABEL] = pod["object"]["metadata"]["namespace"]

        # Set the labels on the endpoint.
        endpoint.labels = labels
        self._client.set_endpoint(endpoint)
        _log.info("Updated labels on pod %s", key)

        # Remove the 'deny-inbound' profile from the pod now that 
        # it has been configured with labels.  It will match at least the 
        # per-namespace policy, and potentially others, which will 
        # define what connectivity is allowed.
        self._client.set_profiles_on_endpoint([], 
                                              orchestrator_id="cni",
                                              workload_id=endpoint.workload_id)

    def _delete_pod(self, key, pod):
        """
        Takes a deleted pod and removes the corresponding
        configuration from the Calico datastore.
        """
        _log.info("Deleting pod: %s", key)

        # Delete from internal dict.
        del self._pods[key]

    def _watch_api(self, path, resource_version=None):
        """
        Work loop for the watch thread.
        """
        _log.info("Starting watch on path: %s", path)
        while True:
            # Attempt to stream API resources.
            try:
                response = self._get_api_stream(path, resource_version)
                _log.info("Watch response for %s: %s", path, response)
            except requests.ConnectionError:
                _log.exception("Error querying path: %s", path)
                time.sleep(10)
                continue

            # Check for successful response.
            if response.status_code != 200:
                _log.error("Error watching path: %s", response.text)
                time.sleep(10)
                continue

            # Success - add resources to the queue for processing.
            for line in response.iter_lines():
                # Filter out keep-alive new lines.
                if line:
                    _log.debug("Adding line to queue: %s", line)
                    self._event_queue.put(json.loads(line))

    def _get_api_stream(self, path, resource_version=None):
        """
        Watch a stream from the API given a resource.
    
        :param resource: The plural resource you would like to watch.
        :return: A stream of json objs e.g. {"type": "MODIFED"|"ADDED"|"DELETED", "object":{...}}
        :rtype stream
        """
        # Append the resource version - this indicates where the 
        # watch should start.
        _log.info("Streaming API resources '%s' at version '%s'", path, resource_version)
        if resource_version:
            path += "?resourceVersion=%s" % resource_version

        session = requests.Session()
        if self.auth_token:
            _log.debug("Using Auth Token: %s", self.auth_token)
            session.headers.update({'Authorization': 'Bearer ' + self.auth_token})
        return session.get(path, verify=False, stream=True)
    
    def _get_api_resource(self, path):
        """
        Get a resource from the API specified API path.
        :return: A JSON API object
        :rtype json dict
        """
        _log.debug("Getting API Resource: %s", path)
        session = requests.Session()
        if self.auth_token:
            _log.debug("Using Auth Token: %s", self.auth_token)
            session.headers.update({'Authorization': 'Bearer ' + self.auth_token})
        response = session.get(path, verify=False)
        return json.loads(response.text)
Exemplo n.º 3
0
class PolicyAgent():
    def __init__(self):
        self._event_queue = Queue.Queue()
        """
        Queue to populate with events from API watches.
        """

        self.k8s_api = os.environ.get("K8S_API", DEFAULT_API)
        """
        Scheme, IP and port of the Kubernetes API.
        """

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN", read_token_file())
        """
        Auth token to use when accessing the API.
        """
        _log.debug("Using auth token: %s", self.auth_token)

        self.ca_crt_exists = os.path.exists(CA_CERT_PATH)
        """
        True if a CA cert has been mounted by Kubernetes.  
        """

        self._client = DatastoreClient()
        """
        Client for accessing the Calico datastore.
        """

        self._handlers = {}
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_ADDED,
                         self._add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_DELETED,
                         self._delete_network_policy)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_ADDED,
                         self._add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_DELETED,
                         self._delete_namespace)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_ADDED, self._add_update_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_DELETED, self._delete_pod)
        """
        Handlers for watch events.
        """

    def add_handler(self, resource_type, event_type, handler):
        """
        Adds an event handler for the given event type (ADD, DELETE) for the 
        given resource type.
        """
        _log.info("Setting %s %s handler: %s", resource_type, event_type,
                  handler)
        key = (resource_type, event_type)
        self._handlers[key] = handler

    def get_handler(self, resource_type, event_type):
        """
        Gets the correct handler.
        """
        key = (resource_type, event_type)
        _log.debug("Looking up handler for event: %s", key)
        return self._handlers[key]

    def run(self):
        """
        PolicyAgent.run() is called at program init to spawn watch threads,
        Loops to read responses from the _watcher Queue as they come in.
        """
        resources = [
            RESOURCE_TYPE_NETWORK_POLICY, RESOURCE_TYPE_NAMESPACE,
            RESOURCE_TYPE_POD
        ]
        for resource_type in resources:
            # Get existing resources from the API.
            _log.info("Getting existing %s objects", resource_type)
            get_url = GET_URLS[resource_type] % self.k8s_api
            resp = self._api_get(get_url, stream=False)
            _log.info("Response: %s", resp)

            if resp.status_code != 200:
                _log.error("Error querying API: %s", resp.json())
                return
            updates = resp.json()["items"]
            metadata = resp.json().get("metadata", {})
            resource_version = metadata.get("resourceVersion")
            _log.debug("%s metadata: %s", resource_type, metadata)

            # Process the existing resources.
            _log.info("%s existing %s(s)", len(updates), resource_type)
            for update in updates:
                _log.debug("Processing existing resource: %s",
                           json.dumps(update, indent=2))
                self._process_update(TYPE_ADDED, resource_type, update)

            # Start watching for updates from the last resourceVersion.
            watch_url = WATCH_URLS[resource_type] % self.k8s_api
            t = Thread(target=self._watch_api,
                       args=(watch_url, resource_version))
            t.daemon = True
            t.start()
            _log.info("Started watch on: %s", resource_type)

        # Loop and read updates from the queue.
        _log.info("Reading from event queue")
        self.read_updates()

    def read_updates(self):
        """
        Reads from the update queue.
        """
        update = None

        while True:
            try:
                # There may be an update already, since we do a blocking get
                # in the `except Queue.Empty` block.  If we have an update,
                # just process it before trying to read from the queue again.
                if not update:
                    _log.info("Non-blocking read from event queue")
                    update = self._event_queue.get(block=False)
                    self._event_queue.task_done()

                # We've recieved an update - process it.
                _log.debug("Read update from queue: %s",
                           json.dumps(update, indent=2))
                self._process_update(update["type"], update["object"]["kind"],
                                     update["object"])
                update = None
            except Queue.Empty:
                _log.info("Queue empty, waiting for updates")
                update = self._event_queue.get(block=True)
            except KeyError:
                # We'll hit this if we fail to parse an invalid update.
                # Set update = None so we don't continue parsing the
                # invalid update.
                _log.exception("Invalid update: %s", update)
                update = None
                time.sleep(10)

    def _process_update(self, event_type, resource_type, resource):
        """
        Takes an event updates our state accordingly.
        """
        _log.info("Processing '%s' for kind '%s'", event_type, resource_type)

        # Determine the key for this object using namespace and name.
        # This is simply used for easy identification in logs, etc.
        name = resource["metadata"]["name"]
        namespace = resource["metadata"].get("namespace")
        key = (namespace, name)

        # Treat "modified" as "added".
        if event_type == TYPE_MODIFIED:
            _log.info("Treating 'MODIFIED' as 'ADDED'")
            event_type = TYPE_ADDED

        # Call the right handler.
        try:
            handler = self.get_handler(resource_type, event_type)
        except KeyError:
            _log.warning("No %s handlers for: %s", event_type, resource_type)
        else:
            _log.debug("Calling handler: %s", handler)
            try:
                handler(key, resource)
            except KeyError:
                _log.exception("Invalid %s: %s", resource_type,
                               json.dumps(resource, indent=2))

    def _add_update_network_policy(self, key, policy):
        """
        Takes a new network policy from the Kubernetes API and 
        creates the corresponding Calico policy configuration.
        """
        _log.info("Adding new network policy: %s", key)

        # Parse this network policy so we can convert it to the appropriate
        # Calico policy.  First, get the selector from the API object.
        k8s_selector = policy["spec"]["podSelector"]
        k8s_selector = k8s_selector or {}

        # Build the appropriate Calico label selector.  This is done using
        # the labels provided in the NetworkPolicy, as well as the
        # NetworkPolicy's namespace.
        namespace = policy["metadata"]["namespace"]
        selectors = [
            "%s == '%s'" % (k, v) for k, v in k8s_selector.iteritems()
        ]
        selectors += ["%s == '%s'" % (K8S_NAMESPACE_LABEL, namespace)]
        selector = " && ".join(selectors)

        # Determine the name for this global policy.
        name = "net_policy-%s" % policy["metadata"]["name"]

        # Build the Calico rules.
        try:
            inbound_rules = self._calculate_inbound_rules(policy)
        except Exception:
            # It is possible bad rules will be passed - we don't want to
            # crash the agent, but we do want to indicate a problem in the
            # logs, so that the policy can be fixed.
            _log.exception("Error parsing policy: %s",
                           json.dumps(policy, indent=2))
            return
        else:
            rules = Rules(id=name,
                          inbound_rules=inbound_rules,
                          outbound_rules=[Rule(action="allow")])

        # Create the network policy using the calculated selector and rules.
        self._client.create_global_policy(NET_POL_GROUP_NAME, name, selector,
                                          rules)
        _log.info("Updated global policy '%s' for NetworkPolicy %s", name, key)

    def _delete_network_policy(self, key, policy):
        """
        Takes a deleted network policy and removes the corresponding
        configuration from the Calico datastore.
        """
        _log.info("Deleting network policy: %s", key)

        # Determine the name for this global policy.
        name = "net_policy-%s" % policy["metadata"]["name"]

        # Delete the corresponding Calico policy
        try:
            self._client.remove_global_policy(NET_POL_GROUP_NAME, name)
        except KeyError:
            _log.info("Unable to find policy '%s' - already deleted", key)

    def _calculate_inbound_rules(self, policy):
        """
        Takes a NetworkPolicy object from the API and returns a list of 
        Calico Rules objects which should be applied on ingress.
        """
        _log.debug("Calculating inbound rules")

        # Store the rules to return.
        rules = []

        # Get this policy's namespace.
        policy_ns = policy["metadata"]["namespace"]

        # Iterate through each inbound rule and create the appropriate
        # rules.
        allow_incomings = policy["spec"].get("ingress") or []
        _log.info("Found %s ingress rules", len(allow_incomings))
        for r in allow_incomings:
            # Determine the destination ports to allow.  If no ports are
            # specified, allow all port / protocol combinations.
            _log.debug("Processing ingress rule: %s", r)
            ports_by_protocol = {}
            for to_port in r.get("ports", []):
                # Keep a dict of ports exposed, keyed by protocol.
                protocol = to_port.get("protocol")
                port = to_port.get("port")
                ports = ports_by_protocol.setdefault(protocol, [])
                if port:
                    _log.debug("Allow to port: %s/%s", protocol, port)
                    ports.append(port)

            # Convert into arguments to be passed to a Rule object.
            to_args = []
            for protocol, ports in ports_by_protocol.iteritems():
                arg = {"protocol": protocol.lower()}
                if ports:
                    arg["dst_ports"] = ports
                to_args.append(arg)

            if not to_args:
                # There are not destination protocols / ports specified.
                # Allow to all protocols and ports.
                to_args = [{}]

            # Determine the from criteria.  If no "from" block is specified,
            # then we should allow from all sources.
            from_args = []
            for from_clause in r.get("from", []):
                # We need to check if the key exists, not just if there is
                # a non-null value.  The presence of the key with a null
                # value means "select all".
                pods_present = "pods" in from_clause
                namespaces_present = "namespaces" in from_clause
                _log.debug("Is 'pods:' present? %s", pods_present)
                _log.debug("Is 'namespaces:' present? %s", namespaces_present)

                if pods_present and namespaces_present:
                    # This is an error case according to the API.
                    msg = "Policy API does not support both 'pods' and " \
                          "'namespaces' selectors."
                    raise PolicyError(msg, policy)
                elif pods_present:
                    # There is a pod selector in this "from" clause.
                    pod_selector = from_clause["pods"] or {}
                    _log.debug("Allow from pods: %s", pod_selector)
                    selectors = [
                        "%s == '%s'" % (k, v)
                        for k, v in pod_selector.iteritems()
                    ]

                    # We can only select on pods in this namespace.
                    selectors.append("%s == %s" %
                                     (K8S_NAMESPACE_LABEL, policy_ns))
                    selector = " && ".join(selectors)

                    # Append the selector to the from args.
                    _log.debug("Allowing pods which match: %s", selector)
                    from_args.append({"src_selector": selector})
                elif namespaces_present:
                    # There is a namespace selector.  Namespace labels are
                    # applied to each pod in the namespace using
                    # the per-namespace profile.  We can select on namespace
                    # labels using the NS_LABEL_KEY_FMT modifier.
                    namespaces = from_clause["namespaces"] or {}
                    _log.debug("Allow from namespaces: %s", namespaces)
                    selectors = ["%s == '%s'" % (NS_LABEL_KEY_FMT % k, v) \
                            for k,v in namespaces.iteritems()]
                    selector = " && ".join(selectors)
                    if selector:
                        # Allow from the selected namespaces.
                        _log.debug("Allowing from namespaces which match: %s",
                                   selector)
                        from_args.append({"src_selector": selector})
                    else:
                        # Allow from all pods in all namespaces.
                        _log.debug("Allowing from all pods in all namespaces")
                        selector = "has(%s)" % K8S_NAMESPACE_LABEL
                        from_args.append({"src_selector": selector})

            if not from_args:
                # There are no match criteria specified.  We should allow
                # from all sources to the given ports.
                from_args = [{}]

            # A rule per-protocol, per-from-clause.
            for to_arg in to_args:
                for from_arg in from_args:
                    # Create a rule by combining a 'from' argument with
                    # the protocol / ports arguments.
                    from_arg.update(to_arg)
                    from_arg.update({"action": "allow"})
                    rules.append(Rule(**from_arg))

        _log.debug("Calculated rules: %s", rules)
        return rules

    def _add_update_namespace(self, key, namespace):
        """
        Configures the necessary policy in Calico for this
        namespace.  Uses the `net.alpha.kubernetes.io/network-isolation` 
        annotation.
        """
        _log.info("Adding/updating namespace: %s", key)

        # Determine the type of network-isolation specified by this namespace.
        # This defaults to no isolation.
        annotations = namespace["metadata"].get("annotations", {})
        _log.debug("Namespace %s has annotations: %s", key, annotations)
        net_isolation = annotations.get(NS_POLICY_ANNOTATION, "no") == "yes"
        _log.info("Namespace %s has network-isolation? %s", key, net_isolation)

        # Determine the profile name to create.
        namespace_name = namespace["metadata"]["name"]
        profile_name = NS_PROFILE_FMT % namespace_name

        # Determine the rules to use.
        outbound_rules = [Rule(action="allow")]
        if net_isolation:
            inbound_rules = [Rule(action="deny")]
        else:
            inbound_rules = [Rule(action="allow")]
        rules = Rules(id=profile_name,
                      inbound_rules=inbound_rules,
                      outbound_rules=outbound_rules)

        # Create the Calico policy to represent this namespace, or
        # update it if it already exists.  Namespace policies select each
        # pod within that namespace.
        self._client.create_profile(profile_name, rules)

        # Assign labels to the profile.  We modify the keys to use
        # a special prefix to indicate that these labels are inherited
        # from the namespace.
        labels = namespace["metadata"].get("labels", {})
        for k, v in labels.iteritems():
            labels[NS_LABEL_KEY_FMT % k] = v
            del labels[k]
        _log.debug("Generated namespace labels: %s", labels)

        # TODO: Actually assign labels to the profile.

        _log.info("Created/updated profile for namespace %s", namespace_name)

    def _delete_namespace(self, key, namespace):
        """
        Takes a deleted namespace and removes the corresponding
        configuration from the Calico datastore.
        """
        _log.info("Deleting namespace: %s", key)

        # Delete the Calico policy which represnets this namespace.
        # We need to make sure that there are no pods running
        # in this namespace first.
        namespace_name = namespace["metadata"]["name"]
        profile_name = NS_PROFILE_FMT % namespace_name
        try:
            self._client.remove_profile(profile_name)
        except KeyError:
            _log.info("Unable to find profile for namespace '%s'", key)

    def _add_update_pod(self, key, pod):
        """
        Takes a new or updated pod from the Kubernetes API and 
        creates the corresponding Calico configuration.
        """
        _log.info("Adding new pod: %s", key)

        # Get the Calico endpoint.  This may or may not have already been
        # created by the CNI plugin.  If it hasn't been created, we need to
        # wait until is has before we can do any meaningful work.
        namespace = pod["metadata"]["namespace"]
        name = pod["metadata"]["name"]
        workload_id = "%s.%s" % (namespace, name)
        try:
            _log.debug("Looking for endpoint that matches workload_id=%s",
                       workload_id)
            endpoint = self._client.get_endpoint(orchestrator_id="cni",
                                                 workload_id=workload_id)
        except KeyError:
            # We don't need to do anything special here, just return.
            # We'll receive another update when the Pod enters running state.
            _log.warn("No endpoint for '%s', wait until running", workload_id)
            return
        except MultipleEndpointsMatch:
            # We should never have multiple endpoints with the same
            # workload_id.  This could theoretically occur if the Calico
            # datastore is out-of-sync with what pods actually exist, but
            # this is an error state and indicates a problem elsewhere.
            _log.error("Multiple Endpoints found matching ID %s", workload_id)
            sys.exit(1)

        # Get Kubernetes labels.
        labels = pod["metadata"].get("labels", {})
        _log.debug("Pod '%s' has labels: %s", key, labels)

        # Add a special label for the Kubernetes namespace.  This is used
        # by selector-based policies to select all pods in a given namespace.
        labels[K8S_NAMESPACE_LABEL] = namespace

        # Set the labels on the endpoint.
        endpoint.labels = labels
        self._client.set_endpoint(endpoint)
        _log.info("Updated labels on pod %s", key)

        # Configure this pod with its namespace profile.
        ns_profile = NS_PROFILE_FMT % namespace
        self._client.set_profiles_on_endpoint([ns_profile],
                                              orchestrator_id="cni",
                                              workload_id=endpoint.workload_id)

    def _delete_pod(self, key, pod):
        """
        We don't need to do anything when a pod is deleted - the CNI plugin
        handles the deletion of the endpoint.
        """
        _log.info("Pod deleted: %s", key)

    def _watch_api(self, path, resource_version=None):
        try:
            self.__watch_api(path, resource_version)
        except Exception:
            _log.exception("Exception watching %s", path)

    def __watch_api(self, path, resource_version=None):
        """
        Work loop for the watch thread.
        """
        _log.info("Starting watch on path: %s", path)
        while True:
            # Attempt to stream API resources.
            try:
                response = self._api_get(path,
                                         stream=True,
                                         resource_version=resource_version)
                _log.info("Watch response for %s: %s", path, response)
            except requests.ConnectionError:
                _log.exception("Error querying path: %s", path)
                time.sleep(10)
                continue

            # Check for successful response.
            if response.status_code != 200:
                _log.error("Error watching path: %s", response.text)
                time.sleep(10)
                continue

            # Success - add resources to the queue for processing.
            for line in response.iter_lines():
                # Filter out keep-alive new lines.
                if line:
                    _log.debug("Adding line to queue: %s", line)
                    self._event_queue.put(json.loads(line))

    def _api_get(self, path, stream, resource_version=None):
        """
        Watch a stream from the API given a resource.
    
        :param resource: The plural resource you would like to watch.
        :return: A stream of json objs e.g. {"type": "MODIFED"|"ADDED"|"DELETED", "object":{...}}
        :rtype stream
        """
        # Append the resource version - this indicates where the
        # watch should start.
        _log.info("Getting API resources '%s' at version '%s'. stream=%s",
                  path, resource_version, stream)
        if resource_version:
            path += "?resourceVersion=%s" % resource_version

        session = requests.Session()
        if self.auth_token:
            session.headers.update(
                {'Authorization': 'Bearer ' + self.auth_token})
        verify = CA_CERT_PATH if self.ca_crt_exists else False
        return session.get(path, verify=verify, stream=stream)
Exemplo n.º 4
0
class CniPlugin(object):
    """
    Class which encapsulates the function of a CNI plugin.
    """
    def __init__(self, network_config, env):
        self._client = DatastoreClient()
        """
        DatastoreClient for access to the Calico datastore.
        """

        # Parse CNI_ARGS into dictionary so we can extract values.
        cni_args = parse_cni_args(env.get(CNI_ARGS_ENV, ""))

        self.k8s_pod_name = cni_args.get(K8S_POD_NAME)
        """
        Name of Kubernetes pod if running under Kubernetes, else None.
        """

        self.k8s_namespace = cni_args.get(K8S_POD_NAMESPACE)
        """
        Name of Kubernetes namespace if running under Kubernetes, else None.
        """

        self.network_config = network_config
        """
        Network config as provided in the CNI network file passed in
        via stdout.
        """

        self.network_name = network_config["name"]
        """
        Name of the network from the provided network config file.
        """

        self.ipam_type = network_config["ipam"]["type"]
        """
        Type of IPAM to use, e.g calico-ipam.
        """

        self.policy_driver = get_policy_driver(self.k8s_pod_name, 
                                               self.k8s_namespace, 
                                               self.network_config) 
        """
        Chooses the correct policy driver based on the given configuration
        """

        self.container_engine = get_container_engine(self.k8s_pod_name)
        """
        Chooses the correct container engine based on the given configuration.
        """

        self.ipam_env = env
        """
        Environment dictionary used when calling the IPAM plugin.
        """

        self.command = env[CNI_COMMAND_ENV]
        assert self.command in [CNI_CMD_DELETE, CNI_CMD_ADD], \
                "Invalid CNI command %s" % self.command
        """
        The command to execute for this plugin instance. Required. 
        One of:
          - CNI_CMD_ADD
          - CNI_CMD_DELETE
        """

        self.container_id = env[CNI_CONTAINERID_ENV]
        """
        The container's ID in the containerizer. Required.
        """

        self.cni_netns = env[CNI_NETNS_ENV]
        """
        Relative path to the network namespace of this container.
        """

        self.interface = env[CNI_IFNAME_ENV]
        """
        Name of the interface to create within the container.
        """

        self.cni_path = env[CNI_PATH_ENV]
        """
        Path in which to search for CNI plugins.
        """

    def execute(self):
        """
        Execute the CNI plugin - uses the given CNI_COMMAND to determine 
        which action to take.

        :return: None.
        """
        if self.command == CNI_CMD_ADD:
            self.add()
        else:
            self.delete()

    def add(self):
        """"Handles CNI_CMD_ADD requests. 

        Configures Calico networking and prints required json to stdout.

        In CNI, a container can be added to multiple networks, in which case
        the CNI plugin will be called multiple times.  In Calico, each network
        is represented by a profile, and each container only receives a single
        endpoint / veth / IP address even when it is on multiple CNI networks.

        :return: None.
        """
        # If this container uses host networking, don't network it.  
        # This should only be hit when running in Kubernetes mode with
        # docker - rkt doesn't call plugins when using host networking.
        if self.container_engine.uses_host_networking(self.container_id):
            _log.info("Cannot network container %s since it is configured "
                      "with host networking.", self.container_id)
            sys.exit(0)

        _log.info("Configuring network '%s' for container: %s", 
                  self.network_name, self.container_id)

        _log.debug("Checking for existing Calico endpoint")
        endpoint = self._get_endpoint()
        if endpoint:
            # This endpoint already exists, add it to another network.
            _log.info("Endpoint for container exists - add to new network")
            output = self._add_existing_endpoint(endpoint)
        else:
            # No endpoint exists - we need to configure a new one.
            _log.info("Configuring a new Endpoint for container")
            output = self._add_new_endpoint()

        # If all successful, print the IPAM plugin's output to stdout.
        dump = json.dumps(output)
        _log.debug("Printing CNI result to stdout: %s", dump)
        print(dump)

        _log.info("Finished networking container: %s", self.container_id)

    def _add_new_endpoint(self):
        """
        Handled adding a new container to a Calico network.
        """
        # Assign IP addresses using the given IPAM plugin.
        ipv4, ipv6, ipam_result = self._assign_ips(self.ipam_env)

        # Filter out addresses that didn't get assigned.
        ip_list = [ip for ip in [ipv4, ipv6] if ip is not None]

        # Create the Calico endpoint object.
        endpoint = self._create_endpoint(ip_list)
    
        # Provision the veth for this endpoint.
        endpoint = self._provision_veth(endpoint)
        
        # Provision / apply profile on the created endpoint.
        try:
            self.policy_driver.apply_profile(endpoint)
        except ApplyProfileError as e:
            _log.error("Failed to apply profile to endpoint %s",
                       endpoint.name)
            self._remove_veth(endpoint)
            self._remove_workload()
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)
            print_cni_error(ERR_CODE_GENERIC, e.message, e.details)
            sys.exit(ERR_CODE_GENERIC)

        # Return the IPAM plugin's result.
        return ipam_result

    def _add_existing_endpoint(self, endpoint):
        """
        Handles adding an existing container to a new Calico network.

        We've already assigned an IP address and created the veth,
        we just need to apply a new profile to this endpoint.
        """
        # Get the already existing IP information for this Endpoint. 
        try:
            ip4 = next(iter(endpoint.ipv4_nets))
        except StopIteration:
            # No IPv4 address on this endpoint.
            _log.warning("No IPV4 address attached to existing endpoint")
            ip4 = IPNetwork("0.0.0.0/32")

        try:
            ip6 = next(iter(endpoint.ipv6_nets))
        except StopIteration:
            # No IPv6 address on this endpoint.
            _log.warning("No IPV6 address attached to existing endpoint")
            ip6 = IPNetwork("::/128")

        # Apply a new profile to this endpoint.
        try:
            self.policy_driver.apply_profile(endpoint)
        except ApplyProfileError as e:
            # Hit an exception applying the profile.  We haven't configured
            # anything, so we don't need to clean anything up.  Just exit.
            _log.error("Failed to apply profile to endpoint %s",
                       endpoint.name)
            print_cni_error(ERR_CODE_GENERIC, e.message)
            sys.exit(ERR_CODE_GENERIC)

        return {"ip4": {"ip": str(ip4.cidr)}, 
                "ip6": {"ip": str(ip6.cidr)}}
    
    def delete(self):
        """Handles CNI_CMD_DELETE requests.

        Remove this container from Calico networking.

        :return: None.
        """
        _log.info("Remove network '%s' from container: %s", 
                self.network_name, self.container_id)

        # Step 1: Remove any IP assignments.
        self._release_ip(self.ipam_env)

        # Step 2: Get the Calico endpoint for this workload. If it does not
        # exist, log a warning and exit successfully.
        endpoint = self._get_endpoint()
        if not endpoint:
            _log.warning("No Calico Endpoint for container: %s",
                         self.container_id)
            sys.exit(0)

        # Step 3: Delete the veth interface for this endpoint.
        self._remove_veth(endpoint)

        # Step 4: Delete the Calico endpoint.
        self._remove_workload()

        # Step 5: Delete any profiles for this endpoint
        self.policy_driver.remove_profile()

        _log.info("Finished removing container: %s", self.container_id)

    def _assign_ips(self, env):
        """Assigns and returns an IPv4 address using the IPAM plugin
        specified in the network config file.

        :return: ipv4, ipv6 - The IP addresses assigned by the IPAM plugin.
        """
        # Call the IPAM plugin.  Returns the plugin returncode,
        # as well as the CNI result from stdout.
        _log.debug("Assigning IP address")
        assert env[CNI_COMMAND_ENV] == CNI_CMD_ADD
        rc, result = self._call_ipam_plugin(env)

        try:
            # Load the response - either the assigned IP addresses or 
            # a CNI error message.
            ipam_result = json.loads(result)
        except ValueError:
            message = "Failed to parse IPAM response, exiting"
            _log.exception(message)
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        if rc:
            # The IPAM plugin failed to assign an IP address. At this point in
            # execution, we haven't done anything yet, so we don't have to
            # clean up.
            _log.error("IPAM plugin error (rc=%s): %s", rc, result)
            code = ipam_result.get("code", ERR_CODE_GENERIC)
            msg = ipam_result.get("msg", "Unknown IPAM error")
            details = ipam_result.get("details")
            print_cni_error(code, msg, details)
            sys.exit(int(code))

        try:
            ipv4 = IPNetwork(ipam_result["ip4"]["ip"])
            _log.info("IPAM plugin assigned IPv4 address: %s", ipv4)
        except KeyError:
            ipv4 = None
        except (AddrFormatError, ValueError):
            message = "Invalid or Empty IPv4 address: %s" % \
                      (ipam_result["ip4"]["ip"])
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        try:
            ipv6 = IPNetwork(ipam_result["ip6"]["ip"])
            _log.info("IPAM plugin assigned IPv6 address: %s", ipv6)
        except KeyError:
            ipv6 = None
        except (AddrFormatError, ValueError):
            message = "Invalid or Empty IPv6 address: %s" % \
                      (ipam_result["ip6"]["ip"])
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        if not ipv4 and not ipv6:
            message = "IPAM plugin did not return any valid addresses."
            _log.warning("Bad IPAM plugin response: %s", ipam_result)
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        return ipv4, ipv6, ipam_result

    def _release_ip(self, env):
        """Releases the IP address(es) for this container using the IPAM plugin
        specified in the network config file.

        :param env - A dictionary of environment variables to pass to the
        IPAM plugin
        :return: None.
        """
        _log.info("Releasing IP address")
        assert env[CNI_COMMAND_ENV] == CNI_CMD_DELETE
        rc, _ = self._call_ipam_plugin(env)

        if rc:
            _log.error("IPAM plugin failed to release IP address")

    def _call_ipam_plugin(self, env):
        """
        Executes a CNI IPAM plugin.  If `calico-ipam` is the provided IPAM
        type, then calls directly into ipam.py as a performance optimization.

        For all other types of IPAM, searches the CNI_PATH for the 
        correct binary and executes it.

        :return: Tuple of return code, response from the IPAM plugin.
        """
        if self.ipam_type == "calico-ipam":
            _log.info("Using Calico IPAM")
            try:
                response = IpamPlugin(env, 
                                      self.network_config["ipam"]).execute()
                code = 0
            except CniError as e:
                # We hit a CNI error - return the appropriate CNI formatted
                # error dictionary.
                response = json.dumps({"code": e.code, 
                                       "msg": e.msg, 
                                       "details": e.details})
                code = e.code
        else:
            _log.debug("Using binary plugin")
            code, response = self._call_binary_ipam_plugin(env)

        # Return the IPAM return code and output.
        _log.debug("IPAM response (rc=%s): %s", code, response)
        return code, response

    def _call_binary_ipam_plugin(self, env):
        """Calls through to the specified IPAM plugin binary.
    
        Utilizes the IPAM config as specified in the CNI network
        configuration file.  A dictionary with the following form:
            {
              type: <IPAM TYPE>
            }

        :param env - A dictionary of environment variables to pass to the
        IPAM plugin
        :return: Tuple of return code, response from the IPAM plugin.
        """
        # Find the correct plugin based on the given type.
        plugin_path = self._find_ipam_plugin()
        if not plugin_path:
            message = "Could not find IPAM plugin of type %s in path %s." % \
                      (self.ipam_type, self.cni_path)
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)
    
        # Execute the plugin and return the result.
        _log.info("Using IPAM plugin at: %s", plugin_path)
        _log.debug("Passing in environment to IPAM plugin: \n%s",
                   json.dumps(env, indent=2))
        p = Popen(plugin_path, stdin=PIPE, stdout=PIPE, stderr=PIPE, env=env)
        stdout, stderr = p.communicate(json.dumps(self.network_config))
        _log.debug("IPAM plugin return code: %s", p.returncode)
        _log.debug("IPAM plugin output: \nstdout:\n%s\nstderr:\n%s", 
                   stdout, stderr)
        return p.returncode, stdout

    def _create_endpoint(self, ip_list):
        """Creates an endpoint in the Calico datastore with the client.

        :param ip_list - list of IP addresses that have been already allocated
        :return Calico endpoint object
        """
        _log.debug("Creating Calico endpoint")
        try:
            endpoint = self._client.create_endpoint(HOSTNAME,
                                                    ORCHESTRATOR_ID,
                                                    self.container_id,
                                                    ip_list)
        except (AddrFormatError, KeyError) as e:
            # AddrFormatError: Raised when an IP address type is not 
            #                  compatible with the node.
            # KeyError: Raised when BGP config for host is not found.
            _log.exception("Failed to create Calico endpoint.")
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)
            print_cni_error(ERR_CODE_GENERIC, e.message)
            sys.exit(ERR_CODE_GENERIC)

        _log.info("Created Calico endpoint with IP address(es) %s", ip_list)
        return endpoint

    def _remove_workload(self):
        """Removes the given endpoint from the Calico datastore

        :param endpoint:
        :return: None
        """
        try:
            _log.info("Removing Calico endpoint for container '%s'",
                    self.container_id)
            self._client.remove_workload(hostname=HOSTNAME,
                                         orchestrator_id=ORCHESTRATOR_ID,
                                         workload_id=self.container_id)
        except KeyError:
            _log.warning("Unable to remove workload with ID %s from datastore.",
                         self.container_id)

    def _provision_veth(self, endpoint):
        """Provisions veth for given endpoint.

        Uses the netns relative path passed in through CNI_NETNS_ENV and
        interface passed in through CNI_IFNAME_ENV.

        :param endpoint
        :return Calico endpoint object
        """
        _log.debug("Provisioning Calico veth interface")
        netns_path = os.path.abspath(os.path.join(os.getcwd(), self.cni_netns))
        _log.debug("netns path: %s", netns_path)

        try:
            endpoint.mac = endpoint.provision_veth(
                Namespace(netns_path), self.interface)
        except CalledProcessError as e:
            _log.exception("Failed to provision veth interface for endpoint %s",
                           endpoint.name)
            self._remove_workload()
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)
            print_cni_error(ERR_CODE_GENERIC, e.message)
            sys.exit(ERR_CODE_GENERIC)

        _log.debug("Endpoint has mac address: %s", endpoint.mac)

        self._client.set_endpoint(endpoint)
        _log.info("Provisioned %s in netns %s", self.interface, netns_path)
        return endpoint

    def _remove_veth(self, endpoint):
        """Remove the veth from given endpoint.

        Handles any errors encountered while removing the endpoint.
        """
        _log.info("Removing veth for endpoint: %s", endpoint.name)
        try:
            removed = netns.remove_veth(endpoint.name)
            _log.debug("Successfully removed endpoint %s? %s", 
                       endpoint.name, removed)
        except CalledProcessError:
            _log.warning("Unable to remove veth %s", endpoint.name)

    @handle_datastore_error
    def _get_endpoint(self):
        """Get endpoint matching self.container_id.

        Return None if no endpoint is found.
        Exits with an error if multiple endpoints are found.

        :return: Endpoint object if found, None if not found
        """
        try:
            _log.debug("Looking for endpoint that matches container ID %s",
                       self.container_id)
            endpoint = self._client.get_endpoint(
                hostname=HOSTNAME,
                orchestrator_id=ORCHESTRATOR_ID,
                workload_id=self.container_id
            )
        except KeyError:
            _log.debug("No endpoint found matching ID %s", self.container_id)
            endpoint = None
        except MultipleEndpointsMatch:
            message = "Multiple Endpoints found matching ID %s" % \
                    self.container_id
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        return endpoint

    def _find_ipam_plugin(self):
        """Locates IPAM plugin binary in plugin path and returns absolute path
        of plugin if found; if not found returns an empty string.

        IPAM plugin type is set in the network config file.
        The plugin path is the CNI path passed through the environment variable
        CNI_PATH.

        :rtype : str
        :return: plugin_path - absolute path of IPAM plugin binary
        """
        plugin_type = self.ipam_type 
        plugin_path = ""
        for path in self.cni_path.split(":"):
            _log.debug("Looking for plugin %s in path %s", plugin_type, path)
            temp_path = os.path.abspath(os.path.join(path, plugin_type))
            if os.path.isfile(temp_path):
                _log.debug("Found plugin %s in path %s", plugin_type, path)
                plugin_path = temp_path
                break
        return str(plugin_path)
class PolicyAgent():
    def __init__(self):
        self._event_queue = Queue.Queue()
        """
        Queue to populate with events from API watches.
        """

        self.k8s_api = os.environ.get("K8S_API", DEFAULT_API)
        """
        Scheme, IP and port of the Kubernetes API.
        """

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN", read_token_file())
        """
        Auth token to use when accessing the API.
        """
        _log.debug("Using auth token: %s", self.auth_token)

        self.ca_crt_exists = os.path.exists(CA_CERT_PATH)
        """
        True if a CA cert has been mounted by Kubernetes.  
        """

        self._client = DatastoreClient()
        """
        Client for accessing the Calico datastore.
        """

        self._handlers = {}
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_ADDED, 
                         self._add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_DELETED, 
                         self._delete_network_policy)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_ADDED, 
                         self._add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_DELETED, 
                         self._delete_namespace)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_ADDED, 
                         self._add_update_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_DELETED, 
                         self._delete_pod)
        """
        Handlers for watch events.
        """
        
    def add_handler(self, resource_type, event_type, handler):
        """
        Adds an event handler for the given event type (ADD, DELETE) for the 
        given resource type.
        """
        _log.info("Setting %s %s handler: %s", 
                  resource_type, event_type, handler)
        key = (resource_type, event_type)
        self._handlers[key] = handler

    def get_handler(self, resource_type, event_type):
        """
        Gets the correct handler.
        """
        key = (resource_type, event_type)
        _log.debug("Looking up handler for event: %s", key)
        return self._handlers[key]

    def run(self):
        """
        PolicyAgent.run() is called at program init to spawn watch threads,
        Loops to read responses from the _watcher Queue as they come in.
        """
        resources = [RESOURCE_TYPE_NETWORK_POLICY, 
                     RESOURCE_TYPE_NAMESPACE,
                     RESOURCE_TYPE_POD]
        for resource_type in resources:
            # Get existing resources from the API.
            _log.info("Getting existing %s objects", resource_type)
            get_url = GET_URLS[resource_type] % self.k8s_api
            resp = self._api_get(get_url, stream=False)
            _log.info("Response: %s", resp)

            if resp.status_code != 200:
                _log.error("Error querying API: %s", resp.json())
                return
            updates = resp.json()["items"]
            metadata = resp.json().get("metadata", {})
            resource_version = metadata.get("resourceVersion")
            _log.debug("%s metadata: %s", resource_type, metadata)

            # Process the existing resources.
            _log.info("%s existing %s(s)", len(updates), resource_type)
            for update in updates:
                _log.debug("Processing existing resource: %s", 
                           json.dumps(update, indent=2))
                self._process_update(TYPE_ADDED, resource_type, update)

            # Start watching for updates from the last resourceVersion.
            watch_url = WATCH_URLS[resource_type] % self.k8s_api
            t = Thread(target=self._watch_api, 
                       args=(watch_url, resource_version))
            t.daemon = True
            t.start()
            _log.info("Started watch on: %s", resource_type)

        # Loop and read updates from the queue.
        _log.info("Reading from event queue")
        self.read_updates()

    def read_updates(self):
        """
        Reads from the update queue.
        """
        update = None

        while True:
            try:
                # There may be an update already, since we do a blocking get
                # in the `except Queue.Empty` block.  If we have an update, 
                # just process it before trying to read from the queue again.
                if not update:
                    _log.info("Non-blocking read from event queue")
                    update = self._event_queue.get(block=False)
                    self._event_queue.task_done()

                # We've recieved an update - process it.
                _log.debug("Read update from queue: %s", json.dumps(update, indent=2))
                self._process_update(update["type"], 
                                     update["object"]["kind"], 
                                     update["object"])
                update = None
            except Queue.Empty:
                _log.info("Queue empty, waiting for updates")
                update = self._event_queue.get(block=True)
            except KeyError:
                # We'll hit this if we fail to parse an invalid update.
                # Set update = None so we don't continue parsing the 
                # invalid update.
                _log.exception("Invalid update: %s", update)
                update = None
                time.sleep(10)

    def _process_update(self, event_type, resource_type, resource):
        """
        Takes an event updates our state accordingly.
        """
        _log.info("Processing '%s' for kind '%s'", event_type, resource_type) 

        # Determine the key for this object using namespace and name.
        # This is simply used for easy identification in logs, etc.
        name = resource["metadata"]["name"]
        namespace = resource["metadata"].get("namespace")
        key = (namespace, name)

        # Treat "modified" as "added".
        if event_type == TYPE_MODIFIED: 
            _log.info("Treating 'MODIFIED' as 'ADDED'")
            event_type = TYPE_ADDED

        # Call the right handler.
        try:
            handler = self.get_handler(resource_type, event_type) 
        except KeyError:    
            _log.warning("No %s handlers for: %s", 
                         event_type, resource_type)
        else:
            _log.debug("Calling handler: %s", handler)
            try:
                handler(key, resource)
            except KeyError:
                _log.exception("Invalid %s: %s", resource_type, 
                               json.dumps(resource, indent=2))

    def _add_update_network_policy(self, key, policy):
        """
        Takes a new network policy from the Kubernetes API and 
        creates the corresponding Calico policy configuration.
        """
        _log.info("Adding new network policy: %s", key)

        # Parse this network policy so we can convert it to the appropriate
        # Calico policy.  First, get the selector from the API object.
        k8s_selector = policy["spec"]["podSelector"]
        k8s_selector = k8s_selector or {}

        # Build the appropriate Calico label selector.  This is done using 
        # the labels provided in the NetworkPolicy, as well as the 
        # NetworkPolicy's namespace.
        namespace = policy["metadata"]["namespace"]
        selectors = ["%s == '%s'" % (k,v) for k,v in k8s_selector.iteritems()]
        selectors += ["%s == '%s'" % (K8S_NAMESPACE_LABEL, namespace)]
        selector = " && ".join(selectors)

        # Determine the name for this global policy.
        name = "net_policy-%s" % policy["metadata"]["name"]

        # Build the Calico rules.
        try:
            inbound_rules = self._calculate_inbound_rules(policy)
        except Exception:
            # It is possible bad rules will be passed - we don't want to 
            # crash the agent, but we do want to indicate a problem in the
            # logs, so that the policy can be fixed.
            _log.exception("Error parsing policy: %s", 
                           json.dumps(policy, indent=2))
            return
        else:
            rules =  Rules(id=name,
                           inbound_rules=inbound_rules,
                           outbound_rules=[Rule(action="allow")])

        # Create the network policy using the calculated selector and rules.
        self._client.create_global_policy(NET_POL_GROUP_NAME, name, selector, rules)
        _log.info("Updated global policy '%s' for NetworkPolicy %s", name, key)

    def _delete_network_policy(self, key, policy):
        """
        Takes a deleted network policy and removes the corresponding
        configuration from the Calico datastore.
        """
        _log.info("Deleting network policy: %s", key)

        # Determine the name for this global policy.
        name = "net_policy-%s" % policy["metadata"]["name"]

        # Delete the corresponding Calico policy 
        try:
            self._client.remove_global_policy(NET_POL_GROUP_NAME, name)
        except KeyError:
            _log.info("Unable to find policy '%s' - already deleted", key)

    def _calculate_inbound_rules(self, policy):
        """
        Takes a NetworkPolicy object from the API and returns a list of 
        Calico Rules objects which should be applied on ingress.
        """
        _log.debug("Calculating inbound rules")

        # Store the rules to return.
        rules = []

        # Get this policy's namespace.
        policy_ns = policy["metadata"]["namespace"]

        # Iterate through each inbound rule and create the appropriate
        # rules.
        allow_incomings = policy["spec"].get("ingress") or []
        _log.info("Found %s ingress rules", len(allow_incomings))
        for r in allow_incomings:
            # Determine the destination ports to allow.  If no ports are
            # specified, allow all port / protocol combinations.
            _log.debug("Processing ingress rule: %s", r)
            ports_by_protocol = {}
            for to_port in r.get("ports", []):
                # Keep a dict of ports exposed, keyed by protocol.
                protocol = to_port.get("protocol")
                port = to_port.get("port")
                ports = ports_by_protocol.setdefault(protocol, [])
                if port:
                    _log.debug("Allow to port: %s/%s", protocol, port)
                    ports.append(port)

            # Convert into arguments to be passed to a Rule object.
            to_args = []
            for protocol, ports in ports_by_protocol.iteritems():
                arg = {"protocol": protocol.lower()}
                if ports:
                    arg["dst_ports"] = ports
                to_args.append(arg)

            if not to_args:
                # There are not destination protocols / ports specified.
                # Allow to all protocols and ports.
                to_args = [{}]

            # Determine the from criteria.  If no "from" block is specified,
            # then we should allow from all sources.
            from_args = []
            for from_clause in r.get("from", []):
                # We need to check if the key exists, not just if there is 
                # a non-null value.  The presence of the key with a null 
                # value means "select all".
                pods_present = "pods" in from_clause
                namespaces_present = "namespaces" in from_clause
                _log.debug("Is 'pods:' present? %s", pods_present)
                _log.debug("Is 'namespaces:' present? %s", namespaces_present)

                if pods_present and namespaces_present:
                    # This is an error case according to the API.
                    msg = "Policy API does not support both 'pods' and " \
                          "'namespaces' selectors."
                    raise PolicyError(msg, policy)
                elif pods_present:
                    # There is a pod selector in this "from" clause.
                    pod_selector = from_clause["pods"] or {}
                    _log.debug("Allow from pods: %s", pod_selector)
                    selectors = ["%s == '%s'" % (k,v) for k,v in pod_selector.iteritems()]

                    # We can only select on pods in this namespace.
                    selectors.append("%s == %s" % (K8S_NAMESPACE_LABEL, 
                                                   policy_ns))
                    selector = " && ".join(selectors)

                    # Append the selector to the from args.
                    _log.debug("Allowing pods which match: %s", selector)
                    from_args.append({"src_selector": selector})
                elif namespaces_present:
                    # There is a namespace selector.  Namespace labels are
                    # applied to each pod in the namespace using 
                    # the per-namespace profile.  We can select on namespace
                    # labels using the NS_LABEL_KEY_FMT modifier.
                    namespaces = from_clause["namespaces"] or {}
                    _log.debug("Allow from namespaces: %s", namespaces)
                    selectors = ["%s == '%s'" % (NS_LABEL_KEY_FMT % k, v) \
                            for k,v in namespaces.iteritems()]
                    selector = " && ".join(selectors)
                    if selector:
                        # Allow from the selected namespaces.
                        _log.debug("Allowing from namespaces which match: %s", 
                                    selector)
                        from_args.append({"src_selector": selector})
                    else:
                        # Allow from all pods in all namespaces.
                        _log.debug("Allowing from all pods in all namespaces")
                        selector = "has(%s)" % K8S_NAMESPACE_LABEL
                        from_args.append({"src_selector": selector})

            if not from_args:
                # There are no match criteria specified.  We should allow
                # from all sources to the given ports.
                from_args = [{}]

            # A rule per-protocol, per-from-clause.
            for to_arg in to_args: 
                for from_arg in from_args:
                    # Create a rule by combining a 'from' argument with
                    # the protocol / ports arguments.
                    from_arg.update(to_arg)
                    from_arg.update({"action": "allow"})
                    rules.append(Rule(**from_arg))

        _log.debug("Calculated rules: %s", rules)
        return rules

    def _add_update_namespace(self, key, namespace):
        """
        Configures the necessary policy in Calico for this
        namespace.  Uses the `net.alpha.kubernetes.io/network-isolation` 
        annotation.
        """
        _log.info("Adding/updating namespace: %s", key)

        # Determine the type of network-isolation specified by this namespace.
        # This defaults to no isolation.
        annotations = namespace["metadata"].get("annotations", {})
        _log.debug("Namespace %s has annotations: %s", key, annotations)
        net_isolation = annotations.get(NS_POLICY_ANNOTATION, "no") == "yes"
        _log.info("Namespace %s has network-isolation? %s", key, net_isolation)

        # Determine the profile name to create.
        namespace_name = namespace["metadata"]["name"]
        profile_name = NS_PROFILE_FMT % namespace_name

        # Determine the rules to use.
        outbound_rules = [Rule(action="allow")]
        if net_isolation:
            inbound_rules = [Rule(action="deny")]
        else:
            inbound_rules = [Rule(action="allow")]
        rules = Rules(id=profile_name,
                      inbound_rules=inbound_rules,
                      outbound_rules=outbound_rules)

        # Create the Calico policy to represent this namespace, or 
        # update it if it already exists.  Namespace policies select each
        # pod within that namespace.
        self._client.create_profile(profile_name, rules)

        # Assign labels to the profile.  We modify the keys to use 
        # a special prefix to indicate that these labels are inherited 
        # from the namespace.
        labels = namespace["metadata"].get("labels", {})
        for k,v in labels.iteritems():
            labels[NS_LABEL_KEY_FMT % k] = v
            del labels[k]
        _log.debug("Generated namespace labels: %s", labels)

        # TODO: Actually assign labels to the profile.

        _log.info("Created/updated profile for namespace %s", namespace_name)

    def _delete_namespace(self, key, namespace):
        """
        Takes a deleted namespace and removes the corresponding
        configuration from the Calico datastore.
        """
        _log.info("Deleting namespace: %s", key)

        # Delete the Calico policy which represnets this namespace.
        # We need to make sure that there are no pods running 
        # in this namespace first.
        namespace_name = namespace["metadata"]["name"]
        profile_name = NS_PROFILE_FMT % namespace_name
        try:
            self._client.remove_profile(profile_name)
        except KeyError:
            _log.info("Unable to find profile for namespace '%s'", key)

    def _add_update_pod(self, key, pod):
        """
        Takes a new or updated pod from the Kubernetes API and 
        creates the corresponding Calico configuration.
        """
        _log.info("Adding new pod: %s", key)

        # Get the Calico endpoint.  This may or may not have already been 
        # created by the CNI plugin.  If it hasn't been created, we need to 
        # wait until is has before we can do any meaningful work.
        namespace = pod["metadata"]["namespace"]
        name = pod["metadata"]["name"]
        workload_id = "%s.%s" % (namespace, name)
        try:
            _log.debug("Looking for endpoint that matches workload_id=%s",
                       workload_id)
            endpoint = self._client.get_endpoint(
                orchestrator_id="cni",
                workload_id=workload_id
            )
        except KeyError:
            # We don't need to do anything special here, just return.
            # We'll receive another update when the Pod enters running state.
            _log.warn("No endpoint for '%s', wait until running", workload_id)
            return
        except MultipleEndpointsMatch:
            # We should never have multiple endpoints with the same
            # workload_id.  This could theoretically occur if the Calico
            # datastore is out-of-sync with what pods actually exist, but 
            # this is an error state and indicates a problem elsewhere.
            _log.error("Multiple Endpoints found matching ID %s", workload_id)
            sys.exit(1)

        # Get Kubernetes labels.
        labels = pod["metadata"].get("labels", {}) 
        _log.debug("Pod '%s' has labels: %s", key, labels)

        # Add a special label for the Kubernetes namespace.  This is used
        # by selector-based policies to select all pods in a given namespace.
        labels[K8S_NAMESPACE_LABEL] = namespace 

        # Set the labels on the endpoint.
        endpoint.labels = labels
        self._client.set_endpoint(endpoint)
        _log.info("Updated labels on pod %s", key)

        # Configure this pod with its namespace profile.
        ns_profile = NS_PROFILE_FMT % namespace
        self._client.set_profiles_on_endpoint([ns_profile], 
                                              orchestrator_id="cni",
                                              workload_id=endpoint.workload_id)

    def _delete_pod(self, key, pod):
        """
        We don't need to do anything when a pod is deleted - the CNI plugin
        handles the deletion of the endpoint.
        """
        _log.info("Pod deleted: %s", key)

    def _watch_api(self, path, resource_version=None):
        try:
            self.__watch_api(path, resource_version)
        except Exception:
            _log.exception("Exception watching %s", path)

    def __watch_api(self, path, resource_version=None):
        """
        Work loop for the watch thread.
        """
        _log.info("Starting watch on path: %s", path)
        while True:
            # Attempt to stream API resources.
            try:
                response = self._api_get(path, 
                                         stream=True, 
                                         resource_version=resource_version)
                _log.info("Watch response for %s: %s", path, response)
            except requests.ConnectionError:
                _log.exception("Error querying path: %s", path)
                time.sleep(10)
                continue

            # Check for successful response.
            if response.status_code != 200:
                _log.error("Error watching path: %s", response.text)
                time.sleep(10)
                continue

            # Success - add resources to the queue for processing.
            for line in response.iter_lines():
                # Filter out keep-alive new lines.
                if line:
                    _log.debug("Adding line to queue: %s", line)
                    self._event_queue.put(json.loads(line))

    def _api_get(self, path, stream, resource_version=None):
        """
        Watch a stream from the API given a resource.
    
        :param resource: The plural resource you would like to watch.
        :return: A stream of json objs e.g. {"type": "MODIFED"|"ADDED"|"DELETED", "object":{...}}
        :rtype stream
        """
        # Append the resource version - this indicates where the 
        # watch should start.
        _log.info("Getting API resources '%s' at version '%s'. stream=%s", 
                  path, resource_version, stream)
        if resource_version:
            path += "?resourceVersion=%s" % resource_version

        session = requests.Session()
        if self.auth_token:
            session.headers.update({'Authorization': 'Bearer ' + self.auth_token})
        verify = CA_CERT_PATH if self.ca_crt_exists else False
        return session.get(path, verify=verify, stream=stream)
Exemplo n.º 6
0
class CniPlugin(object):
    """
    Class which encapsulates the function of a CNI plugin.
    """
    def __init__(self, network_config, env):
        self._client = DatastoreClient()
        """
        DatastoreClient for access to the Calico datastore.
        """

        # Parse CNI_ARGS into dictionary so we can extract values.
        cni_args = parse_cni_args(env.get(CNI_ARGS_ENV, ""))

        self.k8s_pod_name = cni_args.get(K8S_POD_NAME)
        """
        Name of Kubernetes pod if running under Kubernetes, else None.
        """

        self.k8s_namespace = cni_args.get(K8S_POD_NAMESPACE)
        """
        Name of Kubernetes namespace if running under Kubernetes, else None.
        """

        self.network_config = network_config
        """
        Network config as provided in the CNI network file passed in
        via stdout.
        """

        self.network_name = network_config["name"]
        """
        Name of the network from the provided network config file.
        """

        self.ipam_type = network_config["ipam"]["type"]
        """
        Type of IPAM to use, e.g calico-ipam.
        """

        self.hostname = network_config.get("hostname", socket.gethostname())
        """
        The hostname to register endpoints under.
        """

        self.container_engine = get_container_engine(self.k8s_pod_name)
        """
        Chooses the correct container engine based on the given configuration.
        """

        self.ipam_env = env
        """
        Environment dictionary used when calling the IPAM plugin.
        """

        self.command = env[CNI_COMMAND_ENV]
        assert self.command in [CNI_CMD_DELETE, CNI_CMD_ADD], \
                "Invalid CNI command %s" % self.command
        """
        The command to execute for this plugin instance. Required.
        One of:
          - CNI_CMD_ADD
          - CNI_CMD_DELETE
        """

        self.container_id = env[CNI_CONTAINERID_ENV]
        """
        The container's ID in the containerizer. Required.
        """

        self.cni_netns = env[CNI_NETNS_ENV]
        """
        Relative path to the network namespace of this container.
        """

        self.interface = env[CNI_IFNAME_ENV]
        """
        Name of the interface to create within the container.
        """

        self.cni_path = env[CNI_PATH_ENV]
        """
        Path in which to search for CNI plugins.
        """

        self.running_under_k8s = self.k8s_namespace and self.k8s_pod_name
        if self.running_under_k8s:
            self.workload_id = "%s.%s" % (self.k8s_namespace, self.k8s_pod_name)
            self.orchestrator_id = "k8s"
        else:
            self.workload_id = self.container_id
            self.orchestrator_id = "cni"
        kubernetes_config = network_config.get("kubernetes", {})
        self.kubeconfig_path = kubernetes_config.get("kubeconfig")
        self.k8s_node_name = kubernetes_config.get("node_name", socket.gethostname())
        """
        Configure orchestrator specific settings.
        workload_id: In Kubernetes, this is the pod's namespace and name.
                     Otherwise, this is the container ID.
        orchestrator_id: Either "k8s" or "cni".
        """

        # Ensure that the ipam_env CNI_ARGS contains the IgnoreUnknown=1 option
        # See https://github.com/appc/cni/pull/158
        # And https://github.com/appc/cni/pull/127
        self.ipam_env[CNI_ARGS_ENV] = 'IgnoreUnknown=1'
        if env.get(CNI_ARGS_ENV):
            # Append any existing args - if they are set.
            self.ipam_env[CNI_ARGS_ENV] += ";%s" % env.get(CNI_ARGS_ENV)

        self.policy_driver = get_policy_driver(self)
        """
        Chooses the correct policy driver based on the given configuration
        """

    def execute(self):
        """
        Execute the CNI plugin - uses the given CNI_COMMAND to determine
        which action to take.

        :return: None.
        """
        if self.command == CNI_CMD_ADD:
            self.add()
        else:
            self.delete()

    def add(self):
        """"Handles CNI_CMD_ADD requests.

        Configures Calico networking and prints required json to stdout.

        In CNI, a container can be added to multiple networks, in which case
        the CNI plugin will be called multiple times.  In Calico, each network
        is represented by a profile, and each container only receives a single
        endpoint / veth / IP address even when it is on multiple CNI networks.

        :return: None.
        """
        # If this container uses host networking, don't network it.
        # This should only be hit when running in Kubernetes mode with
        # docker - rkt doesn't call plugins when using host networking.
        if self.container_engine.uses_host_networking(self.container_id):
            _log.info("Cannot network container %s since it is configured "
                      "with host networking.", self.container_id)
            sys.exit(0)

        _log.info("Configuring network '%s' for container: %s",
                  self.network_name, self.container_id)

        _log.debug("Checking for existing Calico endpoint")
        endpoint = self._get_endpoint()
        if endpoint and not self.running_under_k8s:
            # We've received a create for an existing container, likely on
            # a new CNI network.  We don't need to configure the veth or
            # assign IP addresses, we simply need to add to the new
            # CNI network.  Kubernetes handles this case
            # differently (see below).
            _log.info("Endpoint for container exists - add to new network")
            output = self._add_existing_endpoint(endpoint)
        elif endpoint and self.running_under_k8s:
            # Running under Kubernetes and we've received a create for
            # an existing workload.  Kubernetes only supports a single CNI
            # network, which means that the old pod has been destroyed
            # under our feet and we need to set up networking on the new one.
            # We should also clean up any stale endpoint / IP assignment.
            _log.info("Kubernetes pod has been recreated")
            self._remove_stale_endpoint(endpoint)

            # Release any previous IP addresses assigned to this workload.
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)

            # Clean up any profiles for the stale endpoint
            self.policy_driver.remove_profile()

            # Configure the new workload.
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_ADD
            output = self._add_new_endpoint()
        else:
            # No endpoint exists - we need to configure a new one.
            _log.info("No endpoint exists for workload - creating")
            output = self._add_new_endpoint()

        # If all successful, print the IPAM plugin's output to stdout.
        dump = json.dumps(output)
        _log.debug("Printing CNI result to stdout: %s", dump)
        print(dump)

        _log.info("Finished networking container: %s", self.container_id)

    def _add_new_endpoint(self):
        """
        Handled adding a new container to a Calico network.
        """
        # Assign IP addresses using the given IPAM plugin.
        _log.info("Configuring a new Endpoint")
        ipv4, ipv6, ipam_result = self._assign_ips(self.ipam_env)

        # Filter out addresses that didn't get assigned.
        ip_list = [ip for ip in [ipv4, ipv6] if ip is not None]

        # Create the Calico endpoint object.
        endpoint = self._create_endpoint(ip_list)

        # Provision the veth for this endpoint.
        endpoint = self._provision_veth(endpoint)

        # Provision / apply profile on the created endpoint.
        try:
            self.policy_driver.apply_profile(endpoint)
        except PolicyException as e:
            _log.error("Failed to apply profile to endpoint %s",
                       endpoint.name)
            self._remove_veth(endpoint)
            self._remove_workload()
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)
            print_cni_error(ERR_CODE_GENERIC, e.message, e.details)
            sys.exit(ERR_CODE_GENERIC)

        # Return the IPAM plugin's result.
        return ipam_result

    def _add_existing_endpoint(self, endpoint):
        """
        Handles adding an existing container to a new Calico network.

        We've already assigned an IP address and created the veth,
        we just need to apply a new profile to this endpoint.
        """
        # Get the already existing IP information for this Endpoint.
        try:
            ip4 = next(iter(endpoint.ipv4_nets))
        except StopIteration:
            # No IPv4 address on this endpoint.
            _log.warning("No IPV4 address attached to existing endpoint")
            ip4 = IPNetwork("0.0.0.0/32")

        try:
            ip6 = next(iter(endpoint.ipv6_nets))
        except StopIteration:
            # No IPv6 address on this endpoint.
            _log.warning("No IPV6 address attached to existing endpoint")
            ip6 = IPNetwork("::/128")

        # Apply a new profile to this endpoint.
        try:
            self.policy_driver.apply_profile(endpoint)
        except PolicyException as e:
            # Hit an exception applying the profile.  We haven't configured
            # anything, so we don't need to clean anything up.  Just exit.
            _log.error("Failed to apply profile to endpoint %s",
                       endpoint.name)
            print_cni_error(ERR_CODE_GENERIC, e.message)
            sys.exit(ERR_CODE_GENERIC)

        return {"ip4": {"ip": str(ip4.cidr)},
                "ip6": {"ip": str(ip6.cidr)}}

    def delete(self):
        """Handles CNI_CMD_DELETE requests.

        Remove this container from Calico networking.

        :return: None.
        """
        _log.info("Remove network '%s' from container: %s",
                self.network_name, self.container_id)

        # Step 1: Remove any IP assignments.
        self._release_ip(self.ipam_env)

        # Step 2: Get the Calico endpoint for this workload. If it does not
        # exist, log a warning and exit successfully.
        endpoint = self._get_endpoint()
        if not endpoint:
            _log.warning("No Calico Endpoint for workload: %s",
                         self.workload_id)
            sys.exit(0)

        # Step 3: Delete the veth interface for this endpoint.
        self._remove_veth(endpoint)

        # Step 4: Delete the Calico workload.
        self._remove_workload()

        # Step 5: Delete any profiles for this endpoint
        self.policy_driver.remove_profile()

        _log.info("Finished removing container: %s", self.container_id)

    def _assign_ips(self, env):
        """Assigns and returns an IPv4 address using the IPAM plugin
        specified in the network config file.

        :return: ipv4, ipv6 - The IP addresses assigned by the IPAM plugin.
        """
        # Call the IPAM plugin.  Returns the plugin returncode,
        # as well as the CNI result from stdout.
        _log.debug("Assigning IP address")
        assert env[CNI_COMMAND_ENV] == CNI_CMD_ADD
        rc, result = self._call_ipam_plugin(env)

        try:
            # Load the response - either the assigned IP addresses or
            # a CNI error message.
            ipam_result = json.loads(result)
        except ValueError:
            message = "Failed to parse IPAM response, exiting"
            _log.exception(message)
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        if rc:
            # The IPAM plugin failed to assign an IP address. At this point in
            # execution, we haven't done anything yet, so we don't have to
            # clean up.
            _log.error("IPAM plugin error (rc=%s): %s", rc, result)
            code = ipam_result.get("code", ERR_CODE_GENERIC)
            msg = ipam_result.get("msg", "Unknown IPAM error")
            details = ipam_result.get("details")
            print_cni_error(code, msg, details)
            sys.exit(int(code))

        try:
            ipv4 = IPNetwork(ipam_result["ip4"]["ip"])
            _log.info("IPAM plugin assigned IPv4 address: %s", ipv4)
        except KeyError:
            ipv4 = None
        except (AddrFormatError, ValueError):
            message = "Invalid or Empty IPv4 address: %s" % \
                      (ipam_result["ip4"]["ip"])
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        try:
            ipv6 = IPNetwork(ipam_result["ip6"]["ip"])
            _log.info("IPAM plugin assigned IPv6 address: %s", ipv6)
        except KeyError:
            ipv6 = None
        except (AddrFormatError, ValueError):
            message = "Invalid or Empty IPv6 address: %s" % \
                      (ipam_result["ip6"]["ip"])
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        if not ipv4 and not ipv6:
            message = "IPAM plugin did not return any valid addresses."
            _log.warning("Bad IPAM plugin response: %s", ipam_result)
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        return ipv4, ipv6, ipam_result

    def _release_ip(self, env):
        """Releases the IP address(es) for this container using the IPAM plugin
        specified in the network config file.

        :param env - A dictionary of environment variables to pass to the
        IPAM plugin
        :return: None.
        """
        _log.info("Releasing IP address")
        assert env[CNI_COMMAND_ENV] == CNI_CMD_DELETE
        rc, _ = self._call_ipam_plugin(env)

        if rc:
            _log.error("IPAM plugin failed to release IP address")

    def _call_ipam_plugin(self, env):
        """
        Executes a CNI IPAM plugin.  If `calico-ipam` is the provided IPAM
        type, then calls directly into ipam.py as a performance optimization.

        For all other types of IPAM, searches the CNI_PATH for the
        correct binary and executes it.

        :return: Tuple of return code, response from the IPAM plugin.
        """
        if self.ipam_type == "calico-ipam":
            _log.info("Using Calico IPAM")
            try:
                response = IpamPlugin(env,
                                      self.network_config["ipam"]).execute()
                code = 0
            except CniError as e:
                # We hit a CNI error - return the appropriate CNI formatted
                # error dictionary.
                response = json.dumps({"code": e.code,
                                       "msg": e.msg,
                                       "details": e.details})
                code = e.code
        elif self.ipam_type == "host-local":
            # We've been told to use the "host-local" IPAM plugin.
            # Check if we need to use the Kubernetes podCidr for this node, and
            # if so replace the subnet field with the correct value.
            if self.network_config["ipam"].get("subnet") == "usePodCidr":
                if not self.running_under_k8s:
                    print_cni_error(ERR_CODE_GENERIC, "Invalid network config",
                            "Must be running under Kubernetes to use 'subnet: usePodCidr'")
                    sys.exit(ERR_CODE_GENERIC)
                _log.info("Using Kubernetes podCIDR for node: %s", self.k8s_node_name)
                pod_cidr = self._get_kubernetes_pod_cidr()
                self.network_config["ipam"]["subnet"] = str(pod_cidr)

            # Call the IPAM plugin.
            _log.debug("Calling host-local IPAM plugin")
            code, response = self._call_binary_ipam_plugin(env)
        else:
            # Using some other IPAM plugin - call it.
            _log.debug("Using binary plugin")
            code, response = self._call_binary_ipam_plugin(env)

        # Return the IPAM return code and output.
        _log.debug("IPAM response (rc=%s): %s", code, response)
        return code, response

    def _get_kubernetes_pod_cidr(self):
        """
        Attempt to get the Kubernetes pod CIDR for this node.
        First check if we've written it to disk.  If so, use that value.  If
        not, then query the Kubernetes API for it.
        """
        _log.info("Getting node.spec.podCidr from API, kubeconfig: %s",
                  self.kubeconfig_path)
        if not self.kubeconfig_path:
            # For now, kubeconfig is the only supported auth method.
            print_cni_error(ERR_CODE_GENERIC, "Missing kubeconfig",
                    "usePodCidr requires specification of kubeconfig file")
            sys.exit(ERR_CODE_GENERIC)

        # Query the API for this node.  Default node name to the hostname.
        try:
            api = HTTPClient(KubeConfig.from_file(self.kubeconfig_path))
            node = None
            for n in Node.objects(api):
                _log.debug("Checking node: %s", n.obj["metadata"]["name"])
                if n.obj["metadata"]["name"] == self.k8s_node_name:
                    node = n
                    break
            if not node:
                raise KeyError("Unable to find node in API: %s", self.k8s_node_name)
            _log.debug("Found node %s: %s: ", node.obj["metadata"]["name"],
                       node.obj["spec"])
        except Exception:
            print_cni_error(ERR_CODE_GENERIC, "Error querying Kubernetes API",
                    "Failed to get podCidr from Kubernetes API")
            sys.exit(ERR_CODE_GENERIC)
        else:
            pod_cidr = node.obj["spec"].get("podCIDR")
            if not pod_cidr:
                print_cni_error(ERR_CODE_GENERIC, "Missing podCidr",
                        "No podCidr for node %s" % self.k8s_node_name)
                sys.exit(ERR_CODE_GENERIC)
        _log.debug("Using podCidr: %s", pod_cidr)
        return pod_cidr

    def _call_binary_ipam_plugin(self, env):
        """Calls through to the specified IPAM plugin binary.

        Utilizes the IPAM config as specified in the CNI network
        configuration file.  A dictionary with the following form:
            {
              type: <IPAM TYPE>
            }

        :param env - A dictionary of environment variables to pass to the
        IPAM plugin
        :return: Tuple of return code, response from the IPAM plugin.
        """
        # Find the correct plugin based on the given type.
        plugin_path = self._find_ipam_plugin()
        if not plugin_path:
            message = "Could not find IPAM plugin of type %s in path %s." % \
                      (self.ipam_type, self.cni_path)
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        # Execute the plugin and return the result.
        _log.info("Using IPAM plugin at: %s", plugin_path)
        _log.debug("Passing in environment to IPAM plugin: \n%s",
                   json.dumps(env, indent=2))
        p = Popen(plugin_path, stdin=PIPE, stdout=PIPE, stderr=PIPE, env=env)
        stdout, stderr = p.communicate(json.dumps(self.network_config))
        _log.debug("IPAM plugin return code: %s", p.returncode)
        _log.debug("IPAM plugin output: \nstdout:\n%s\nstderr:\n%s",
                   stdout, stderr)
        return p.returncode, stdout

    def _create_endpoint(self, ip_list):
        """Creates an endpoint in the Calico datastore with the client.

        :param ip_list - list of IP addresses that have been already allocated
        :return Calico endpoint object
        """
        _log.debug("Creating Calico endpoint with workload_id=%s",
                   self.workload_id)
        try:
            endpoint = self._client.create_endpoint(self.hostname,
                                                    self.orchestrator_id,
                                                    self.workload_id,
                                                    ip_list)
        except (AddrFormatError, KeyError) as e:
            # AddrFormatError: Raised when an IP address type is not
            #                  compatible with the node.
            # KeyError: Raised when BGP config for host is not found.
            _log.exception("Failed to create Calico endpoint.")
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)
            print_cni_error(ERR_CODE_GENERIC, e.message)
            sys.exit(ERR_CODE_GENERIC)

        _log.info("Created Calico endpoint with IP address(es) %s", ip_list)
        return endpoint

    def _remove_stale_endpoint(self, endpoint):
        """
        Removes the given endpoint from Calico.
        Called when we discover a stale endpoint that is no longer in use.
        Note that this doesn't release IP allocations - that must be done
        using the designated IPAM plugin.
        """
        _log.info("Removing stale Calico endpoint '%s'", endpoint)
        try:
            self._client.remove_endpoint(endpoint)
        except KeyError:
            # Shouldn't hit this since we know the workload exists.
            _log.info("Error removing stale endpoint, ignoring")

    def _remove_workload(self):
        """Removes the given endpoint from the Calico datastore

        :return: None
        """
        try:
            _log.info("Removing Calico workload '%s'", self.workload_id)
            self._client.remove_workload(hostname=self.hostname,
                                         orchestrator_id=self.orchestrator_id,
                                         workload_id=self.workload_id)
        except KeyError:
            # Attempt to remove the workload using the container ID as the
            # workload ID.  Earlier releases of the plugin used the
            # container ID for the workload ID rather than the Kubernetes pod
            # name and namespace.
            _log.debug("Could not find workload with workload ID %s.",
                         self.workload_id)
            try:
                self._client.remove_workload(hostname=self.hostname,
                                             orchestrator_id="cni",
                                             workload_id=self.container_id)
            except KeyError:
                _log.warning("Could not find workload with container ID %s.",
                             self.container_id)


    def _provision_veth(self, endpoint):
        """Provisions veth for given endpoint.

        Uses the netns relative path passed in through CNI_NETNS_ENV and
        interface passed in through CNI_IFNAME_ENV.

        :param endpoint
        :return Calico endpoint object
        """
        _log.debug("Provisioning Calico veth interface")
        netns_path = os.path.abspath(os.path.join(os.getcwd(), self.cni_netns))
        _log.debug("netns path: %s", netns_path)

        try:
            endpoint.mac = endpoint.provision_veth(
                Namespace(netns_path), self.interface)
        except CalledProcessError as e:
            _log.exception("Failed to provision veth interface for endpoint %s",
                           endpoint.name)
            self._remove_workload()
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)
            print_cni_error(ERR_CODE_GENERIC, e.message)
            sys.exit(ERR_CODE_GENERIC)

        _log.debug("Endpoint has mac address: %s", endpoint.mac)

        self._client.set_endpoint(endpoint)
        _log.info("Provisioned %s in netns %s", self.interface, netns_path)
        return endpoint

    def _remove_veth(self, endpoint):
        """Remove the veth from given endpoint.

        Handles any errors encountered while removing the endpoint.
        """
        _log.info("Removing veth for endpoint: %s", endpoint.name)
        try:
            removed = netns.remove_veth(endpoint.name)
            _log.debug("Successfully removed endpoint %s? %s",
                       endpoint.name, removed)
        except CalledProcessError:
            _log.warning("Unable to remove veth %s", endpoint.name)

    @handle_datastore_error
    def _get_endpoint(self):
        """Get endpoint matching self.workload_id.

        If we cannot find an endpoint using self.workload_id, try
        using self.container_id.

        Return None if no endpoint is found.
        Exits with an error if multiple endpoints are found.

        :return: Endpoint object if found, None if not found
        """
        try:
            _log.debug("Looking for endpoint that matches workload ID %s",
                       self.workload_id)
            endpoint = self._client.get_endpoint(
                hostname=self.hostname,
                orchestrator_id=self.orchestrator_id,
                workload_id=self.workload_id
            )
        except KeyError:
            # Try to find using the container ID.  In earlier version of the
            # plugin, the container ID was used as the workload ID.
            _log.debug("No endpoint found matching workload ID %s",
                       self.workload_id)
            try:
                endpoint = self._client.get_endpoint(
                    hostname=self.hostname,
                    orchestrator_id="cni",
                    workload_id=self.container_id
                )
            except KeyError:
                # We were unable to find an endpoint using either the
                # workload ID or the container ID.
                _log.debug("No endpoint found matching container ID %s",
                           self.container_id)
                endpoint = None
        except MultipleEndpointsMatch:
            message = "Multiple Endpoints found matching ID %s" % \
                    self.workload_id
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        return endpoint

    def _find_ipam_plugin(self):
        """Locates IPAM plugin binary in plugin path and returns absolute path
        of plugin if found; if not found returns an empty string.

        IPAM plugin type is set in the network config file.
        The plugin path is the CNI path passed through the environment variable
        CNI_PATH.

        :rtype : str
        :return: plugin_path - absolute path of IPAM plugin binary
        """
        plugin_type = self.ipam_type
        plugin_path = ""
        for path in self.cni_path.split(":"):
            _log.debug("Looking for plugin %s in path %s", plugin_type, path)
            temp_path = os.path.abspath(os.path.join(path, plugin_type))
            if os.path.isfile(temp_path):
                _log.debug("Found plugin %s in path %s", plugin_type, path)
                plugin_path = temp_path
                break
        return str(plugin_path)