Exemplo n.º 1
0
class BasePolicyDriver(object):
    """
    Abstract base class of a CNI Policy Driver.

    The CNI Policy Driver is responsible for applying network policy to
    networked containers and creating/applying Calico profiles to Calico
    endpoints.
    """
    def __init__(self):

        self._client = DatastoreClient()
        """
        DatastoreClient for access to the Calico datastore.
        """

        self.profile_name = None
        """
        Name of profile for attach to endpoint. Must be set in init function
        of subclass.
        """

    def apply_profile(self, endpoint):
        """Sets a profile for the networked container on the given endpoint.

        Create a profile if it is not yet created.

        :param endpoint:
        :return: None
        """
        assert self.profile_name, "No profile name set."
        if not self._client.profile_exists(self.profile_name):
            # If the profile doesn't exist, create it.
            _log.info("Creating new profile '%s'", self.profile_name)
            rules = self.generate_rules()
            self._client.create_profile(self.profile_name, rules)

        # Check if the profile has already been applied.
        if self.profile_name in endpoint.profile_ids:
            _log.warning("Endpoint already in profile %s", 
                         self.profile_name)
            return

        # Set the default profile on this pod's Calico endpoint.
        _log.info("Appending profile '%s' to endpoint %s",
                  self.profile_name, endpoint.endpoint_id)
        try:
            self._client.append_profiles_to_endpoint(
                    profile_names=[self.profile_name],
                    endpoint_id=endpoint.endpoint_id
            )
        except (KeyError, MultipleEndpointsMatch), e:
            _log.exception("Failed to apply profile to endpoint %s: %s",
                           endpoint.name, e.message)
            raise ApplyProfileError(e.message)
Exemplo n.º 2
0
    def __init__(self):

        self._client = DatastoreClient()
        """
        DatastoreClient for access to the Calico datastore.
        """

        self.profile_name = None
        """
Exemplo n.º 3
0
    def __init__(self, network_name):

        self._client = DatastoreClient()
        """
        DatastoreClient for access to the Calico datastore.
        """

        self.profile_name = network_name
        """
        Name of profile for attach to endpoint.
        """

        # Validate the given network name to make sure it is compatible with
        # Calico policy.
        if not validate_characters(network_name):
            raise ValueError(
                "Invalid characters detected in the given network "
                "name, %s. Only letters a-z, numbers 0-9, and "
                "symbols _.- are supported.", network_name)
Exemplo n.º 4
0
def save_diags(log_dir):
    # Create temp directory
    temp_dir = tempfile.mkdtemp()
    global temp_diags_dir
    temp_diags_dir = os.path.join(temp_dir, 'diagnostics')
    os.mkdir(temp_diags_dir)
    print("Using temp dir: %s" % temp_dir)

    write_diags(None, "date")
    write_diags(None, "hostname")
    write_diags("Dumping netstat", "netstat --all --numeric")
    write_diags("Dumping routes (IPv4)", "ip -4 route")
    write_diags("Dumping routes (IPv6)", "ip -6 route")
    write_diags("Dumping interface info (IPv4)", "ip -4 addr")
    write_diags("Dumping interface info (IPv6)", "ip -6 addr")
    write_diags("Dumping iptables (IPv4)", "iptables-save")
    write_diags("Dumping iptables (IPv6)", "ip6tables-save")
    write_diags("Dumping ipsets", "ipset list")
    # If running under rkt, get the journal for the calico/node container.
    write_diags("Copying journal for calico-node.service",
                "journalctl -u calico-node.service --no-pager")

    # Ask Felix to dump stats to its log file - ignore errors as the
    # calico/node container might not be running.  Gathering of the logs is
    # dependent on environment.
    print("Dumping felix stats")
    subprocess.call(["pkill", "-SIGUSR1", "felix"])

    # Otherwise, calico logs are in the log directory.
    if os.path.isdir(log_dir):
        print("Copying Calico logs")
        # Skip the lock files as they can only be copied by root.
        copytree(log_dir,
                 os.path.join(temp_diags_dir, "logs"),
                 ignore=ignore_patterns('lock'))
    else:
        print('No logs found in %s; skipping log copying' % log_dir)

    # Dump the contents of the etcd datastore.
    print("Dumping datastore")
    with DiagsErrorWriter(temp_diags_dir, 'etcd_calico') as f:
        try:
            datastore_client = DatastoreClient()
            datastore_data = datastore_client.etcd_client.read("/calico",
                                                               recursive=True)
            f.write("dir?, key, value\n")
            # TODO: python-etcd bug: Leaves show up twice in get_subtree().
            for child in datastore_data.get_subtree():
                if child.dir:
                    f.write("DIR,  %s,\n" % child.key)
                else:
                    f.write("FILE, %s, %s\n" % (child.key, child.value))
        except EtcdException, e:
            print "Unable to dump etcd datastore"
            f.write("Unable to dump etcd datastore: %s" % e)
Exemplo n.º 5
0
    def __init__(self):
        self._event_queue = Queue.Queue()
        """
        Queue to populate with events from API watches.
        """

        self.k8s_api = os.environ.get("K8S_API", "https://10.100.0.1:443")
        """
        Scheme, IP and port of the Kubernetes API.
        """

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN")
        """
        Auth token to use when accessing the API.
        """

        path = NET_POLICY_WATCH_PATH % self.k8s_api
        self._network_policy_thread = Thread(target=self._watch_api, 
                                             args=(path,))
        self._network_policy_thread.daemon = True
        """
        Thread which performs watch of Kubernetes API for changes to 
        NetworkPolicy objects.
        """

        path = NAMESPACE_WATCH_PATH % self.k8s_api
        self._namespace_thread = Thread(target=self._watch_api, 
                                             args=(path,))
        self._namespace_thread.daemon = True
        """
        Thread which performs watch of Kubernetes API for changes to 
        Namespace objects.
        """

        path = POD_WATCH_PATH % self.k8s_api
        self._pod_thread = Thread(target=self._watch_api, 
                                  args=(path,))
        self._pod_thread.daemon = True
        """
        Thread which performs watch of Kubernetes API for changes to 
        Pod objects.
        """

        self._client = DatastoreClient()
        """
        Client for accessing the Calico datastore.
        """

        self._network_policies = {}
        self._namespaces = {}
        self._pods = {}
        """
Exemplo n.º 6
0
    def __init__(self):
        self._event_queue = Queue.Queue(maxsize=MAX_QUEUE_SIZE)
        """
        Queue to populate with events from API watches.
        """

        self.k8s_api = os.environ.get("K8S_API", DEFAULT_API)
        """
        Scheme, IP and port of the Kubernetes API.
        """

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN", read_token_file())
        """
        Auth token to use when accessing the API.
        """
        _log.debug("Using auth token: %s", self.auth_token)

        self.ca_crt_exists = os.path.exists(CA_CERT_PATH)
        """
        True if a CA cert has been mounted by Kubernetes.
        """

        self._client = DatastoreClient()
        """
        Client for accessing the Calico datastore.
        """

        self._handlers = {}
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_ADDED,
                         add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_DELETED,
                         delete_network_policy)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_ADDED,
                         add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_DELETED,
                         delete_namespace)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_ADDED, add_update_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_DELETED, delete_pod)
        """
Exemplo n.º 7
0
    def __init__(self, network_name):

        self._client = DatastoreClient()
        """
        DatastoreClient for access to the Calico datastore.
        """

        self.profile_name = network_name
        """
        Name of profile for attach to endpoint.
        """

        # Validate the given network name to make sure it is compatible with
        # Calico policy.
        if not validate_characters(network_name):
            raise ValueError("Invalid characters detected in the given network "
                             "name, %s. Only letters a-z, numbers 0-9, and "
                             "symbols _.- are supported.", network_name)
    def __init__(self):
        self._event_queue = Queue.Queue()
        """
        Queue to populate with events from API watches.
        """

        self.k8s_api = os.environ.get("K8S_API", DEFAULT_API)
        """
        Scheme, IP and port of the Kubernetes API.
        """

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN", read_token_file())
        """
        Auth token to use when accessing the API.
        """
        _log.debug("Using auth token: %s", self.auth_token)

        self.ca_crt_exists = os.path.exists(CA_CERT_PATH)
        """
        True if a CA cert has been mounted by Kubernetes.  
        """

        self._client = DatastoreClient()
        """
        Client for accessing the Calico datastore.
        """

        self._handlers = {}
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_ADDED, 
                         self._add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_DELETED, 
                         self._delete_network_policy)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_ADDED, 
                         self._add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_DELETED, 
                         self._delete_namespace)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_ADDED, 
                         self._add_update_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_DELETED, 
                         self._delete_pod)
        """
Exemplo n.º 9
0
class CniPlugin(object):
    """
    Class which encapsulates the function of a CNI plugin.
    """
    def __init__(self, network_config, env):
        self._client = DatastoreClient()
        """
        DatastoreClient for access to the Calico datastore.
        """

        # Parse CNI_ARGS into dictionary so we can extract values.
        cni_args = parse_cni_args(env.get(CNI_ARGS_ENV, ""))

        self.k8s_pod_name = cni_args.get(K8S_POD_NAME)
        """
        Name of Kubernetes pod if running under Kubernetes, else None.
        """

        self.k8s_namespace = cni_args.get(K8S_POD_NAMESPACE)
        """
        Name of Kubernetes namespace if running under Kubernetes, else None.
        """

        self.network_config = network_config
        """
        Network config as provided in the CNI network file passed in
        via stdout.
        """

        self.network_name = network_config["name"]
        """
        Name of the network from the provided network config file.
        """

        self.ipam_type = network_config["ipam"]["type"]
        """
        Type of IPAM to use, e.g calico-ipam.
        """

        self.hostname = network_config.get("hostname", socket.gethostname())
        """
        The hostname to register endpoints under.
        """

        self.container_engine = get_container_engine(self.k8s_pod_name)
        """
        Chooses the correct container engine based on the given configuration.
        """

        self.ipam_env = env
        """
        Environment dictionary used when calling the IPAM plugin.
        """

        self.command = env[CNI_COMMAND_ENV]
        assert self.command in [CNI_CMD_DELETE, CNI_CMD_ADD], \
                "Invalid CNI command %s" % self.command
        """
        The command to execute for this plugin instance. Required.
        One of:
          - CNI_CMD_ADD
          - CNI_CMD_DELETE
        """

        self.container_id = env[CNI_CONTAINERID_ENV]
        """
        The container's ID in the containerizer. Required.
        """

        self.cni_netns = env[CNI_NETNS_ENV]
        """
        Relative path to the network namespace of this container.
        """

        self.interface = env[CNI_IFNAME_ENV]
        """
        Name of the interface to create within the container.
        """

        self.cni_path = env[CNI_PATH_ENV]
        """
        Path in which to search for CNI plugins.
        """

        self.running_under_k8s = self.k8s_namespace and self.k8s_pod_name
        if self.running_under_k8s:
            self.workload_id = "%s.%s" % (self.k8s_namespace, self.k8s_pod_name)
            self.orchestrator_id = "k8s"
        else:
            self.workload_id = self.container_id
            self.orchestrator_id = "cni"
        kubernetes_config = network_config.get("kubernetes", {})
        self.kubeconfig_path = kubernetes_config.get("kubeconfig")
        self.k8s_node_name = kubernetes_config.get("node_name", socket.gethostname())
        """
        Configure orchestrator specific settings.
        workload_id: In Kubernetes, this is the pod's namespace and name.
                     Otherwise, this is the container ID.
        orchestrator_id: Either "k8s" or "cni".
        """

        # Ensure that the ipam_env CNI_ARGS contains the IgnoreUnknown=1 option
        # See https://github.com/appc/cni/pull/158
        # And https://github.com/appc/cni/pull/127
        self.ipam_env[CNI_ARGS_ENV] = 'IgnoreUnknown=1'
        if env.get(CNI_ARGS_ENV):
            # Append any existing args - if they are set.
            self.ipam_env[CNI_ARGS_ENV] += ";%s" % env.get(CNI_ARGS_ENV)

        self.policy_driver = get_policy_driver(self)
        """
        Chooses the correct policy driver based on the given configuration
        """

    def execute(self):
        """
        Execute the CNI plugin - uses the given CNI_COMMAND to determine
        which action to take.

        :return: None.
        """
        if self.command == CNI_CMD_ADD:
            self.add()
        else:
            self.delete()

    def add(self):
        """"Handles CNI_CMD_ADD requests.

        Configures Calico networking and prints required json to stdout.

        In CNI, a container can be added to multiple networks, in which case
        the CNI plugin will be called multiple times.  In Calico, each network
        is represented by a profile, and each container only receives a single
        endpoint / veth / IP address even when it is on multiple CNI networks.

        :return: None.
        """
        # If this container uses host networking, don't network it.
        # This should only be hit when running in Kubernetes mode with
        # docker - rkt doesn't call plugins when using host networking.
        if self.container_engine.uses_host_networking(self.container_id):
            _log.info("Cannot network container %s since it is configured "
                      "with host networking.", self.container_id)
            sys.exit(0)

        _log.info("Configuring network '%s' for container: %s",
                  self.network_name, self.container_id)

        _log.debug("Checking for existing Calico endpoint")
        endpoint = self._get_endpoint()
        if endpoint and not self.running_under_k8s:
            # We've received a create for an existing container, likely on
            # a new CNI network.  We don't need to configure the veth or
            # assign IP addresses, we simply need to add to the new
            # CNI network.  Kubernetes handles this case
            # differently (see below).
            _log.info("Endpoint for container exists - add to new network")
            output = self._add_existing_endpoint(endpoint)
        elif endpoint and self.running_under_k8s:
            # Running under Kubernetes and we've received a create for
            # an existing workload.  Kubernetes only supports a single CNI
            # network, which means that the old pod has been destroyed
            # under our feet and we need to set up networking on the new one.
            # We should also clean up any stale endpoint / IP assignment.
            _log.info("Kubernetes pod has been recreated")
            self._remove_stale_endpoint(endpoint)

            # Release any previous IP addresses assigned to this workload.
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)

            # Clean up any profiles for the stale endpoint
            self.policy_driver.remove_profile()

            # Configure the new workload.
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_ADD
            output = self._add_new_endpoint()
        else:
            # No endpoint exists - we need to configure a new one.
            _log.info("No endpoint exists for workload - creating")
            output = self._add_new_endpoint()

        # If all successful, print the IPAM plugin's output to stdout.
        dump = json.dumps(output)
        _log.debug("Printing CNI result to stdout: %s", dump)
        print(dump)

        _log.info("Finished networking container: %s", self.container_id)

    def _add_new_endpoint(self):
        """
        Handled adding a new container to a Calico network.
        """
        # Assign IP addresses using the given IPAM plugin.
        _log.info("Configuring a new Endpoint")
        ipv4, ipv6, ipam_result = self._assign_ips(self.ipam_env)

        # Filter out addresses that didn't get assigned.
        ip_list = [ip for ip in [ipv4, ipv6] if ip is not None]

        # Create the Calico endpoint object.
        endpoint = self._create_endpoint(ip_list)

        # Provision the veth for this endpoint.
        endpoint = self._provision_veth(endpoint)

        # Provision / apply profile on the created endpoint.
        try:
            self.policy_driver.apply_profile(endpoint)
        except PolicyException as e:
            _log.error("Failed to apply profile to endpoint %s",
                       endpoint.name)
            self._remove_veth(endpoint)
            self._remove_workload()
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)
            print_cni_error(ERR_CODE_GENERIC, e.message, e.details)
            sys.exit(ERR_CODE_GENERIC)

        # Return the IPAM plugin's result.
        return ipam_result

    def _add_existing_endpoint(self, endpoint):
        """
        Handles adding an existing container to a new Calico network.

        We've already assigned an IP address and created the veth,
        we just need to apply a new profile to this endpoint.
        """
        # Get the already existing IP information for this Endpoint.
        try:
            ip4 = next(iter(endpoint.ipv4_nets))
        except StopIteration:
            # No IPv4 address on this endpoint.
            _log.warning("No IPV4 address attached to existing endpoint")
            ip4 = IPNetwork("0.0.0.0/32")

        try:
            ip6 = next(iter(endpoint.ipv6_nets))
        except StopIteration:
            # No IPv6 address on this endpoint.
            _log.warning("No IPV6 address attached to existing endpoint")
            ip6 = IPNetwork("::/128")

        # Apply a new profile to this endpoint.
        try:
            self.policy_driver.apply_profile(endpoint)
        except PolicyException as e:
            # Hit an exception applying the profile.  We haven't configured
            # anything, so we don't need to clean anything up.  Just exit.
            _log.error("Failed to apply profile to endpoint %s",
                       endpoint.name)
            print_cni_error(ERR_CODE_GENERIC, e.message)
            sys.exit(ERR_CODE_GENERIC)

        return {"ip4": {"ip": str(ip4.cidr)},
                "ip6": {"ip": str(ip6.cidr)}}

    def delete(self):
        """Handles CNI_CMD_DELETE requests.

        Remove this container from Calico networking.

        :return: None.
        """
        _log.info("Remove network '%s' from container: %s",
                self.network_name, self.container_id)

        # Step 1: Remove any IP assignments.
        self._release_ip(self.ipam_env)

        # Step 2: Get the Calico endpoint for this workload. If it does not
        # exist, log a warning and exit successfully.
        endpoint = self._get_endpoint()
        if not endpoint:
            _log.warning("No Calico Endpoint for workload: %s",
                         self.workload_id)
            sys.exit(0)

        # Step 3: Delete the veth interface for this endpoint.
        self._remove_veth(endpoint)

        # Step 4: Delete the Calico workload.
        self._remove_workload()

        # Step 5: Delete any profiles for this endpoint
        self.policy_driver.remove_profile()

        _log.info("Finished removing container: %s", self.container_id)

    def _assign_ips(self, env):
        """Assigns and returns an IPv4 address using the IPAM plugin
        specified in the network config file.

        :return: ipv4, ipv6 - The IP addresses assigned by the IPAM plugin.
        """
        # Call the IPAM plugin.  Returns the plugin returncode,
        # as well as the CNI result from stdout.
        _log.debug("Assigning IP address")
        assert env[CNI_COMMAND_ENV] == CNI_CMD_ADD
        rc, result = self._call_ipam_plugin(env)

        try:
            # Load the response - either the assigned IP addresses or
            # a CNI error message.
            ipam_result = json.loads(result)
        except ValueError:
            message = "Failed to parse IPAM response, exiting"
            _log.exception(message)
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        if rc:
            # The IPAM plugin failed to assign an IP address. At this point in
            # execution, we haven't done anything yet, so we don't have to
            # clean up.
            _log.error("IPAM plugin error (rc=%s): %s", rc, result)
            code = ipam_result.get("code", ERR_CODE_GENERIC)
            msg = ipam_result.get("msg", "Unknown IPAM error")
            details = ipam_result.get("details")
            print_cni_error(code, msg, details)
            sys.exit(int(code))

        try:
            ipv4 = IPNetwork(ipam_result["ip4"]["ip"])
            _log.info("IPAM plugin assigned IPv4 address: %s", ipv4)
        except KeyError:
            ipv4 = None
        except (AddrFormatError, ValueError):
            message = "Invalid or Empty IPv4 address: %s" % \
                      (ipam_result["ip4"]["ip"])
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        try:
            ipv6 = IPNetwork(ipam_result["ip6"]["ip"])
            _log.info("IPAM plugin assigned IPv6 address: %s", ipv6)
        except KeyError:
            ipv6 = None
        except (AddrFormatError, ValueError):
            message = "Invalid or Empty IPv6 address: %s" % \
                      (ipam_result["ip6"]["ip"])
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        if not ipv4 and not ipv6:
            message = "IPAM plugin did not return any valid addresses."
            _log.warning("Bad IPAM plugin response: %s", ipam_result)
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        return ipv4, ipv6, ipam_result

    def _release_ip(self, env):
        """Releases the IP address(es) for this container using the IPAM plugin
        specified in the network config file.

        :param env - A dictionary of environment variables to pass to the
        IPAM plugin
        :return: None.
        """
        _log.info("Releasing IP address")
        assert env[CNI_COMMAND_ENV] == CNI_CMD_DELETE
        rc, _ = self._call_ipam_plugin(env)

        if rc:
            _log.error("IPAM plugin failed to release IP address")

    def _call_ipam_plugin(self, env):
        """
        Executes a CNI IPAM plugin.  If `calico-ipam` is the provided IPAM
        type, then calls directly into ipam.py as a performance optimization.

        For all other types of IPAM, searches the CNI_PATH for the
        correct binary and executes it.

        :return: Tuple of return code, response from the IPAM plugin.
        """
        if self.ipam_type == "calico-ipam":
            _log.info("Using Calico IPAM")
            try:
                response = IpamPlugin(env,
                                      self.network_config["ipam"]).execute()
                code = 0
            except CniError as e:
                # We hit a CNI error - return the appropriate CNI formatted
                # error dictionary.
                response = json.dumps({"code": e.code,
                                       "msg": e.msg,
                                       "details": e.details})
                code = e.code
        elif self.ipam_type == "host-local":
            # We've been told to use the "host-local" IPAM plugin.
            # Check if we need to use the Kubernetes podCidr for this node, and
            # if so replace the subnet field with the correct value.
            if self.network_config["ipam"].get("subnet") == "usePodCidr":
                if not self.running_under_k8s:
                    print_cni_error(ERR_CODE_GENERIC, "Invalid network config",
                            "Must be running under Kubernetes to use 'subnet: usePodCidr'")
                    sys.exit(ERR_CODE_GENERIC)
                _log.info("Using Kubernetes podCIDR for node: %s", self.k8s_node_name)
                pod_cidr = self._get_kubernetes_pod_cidr()
                self.network_config["ipam"]["subnet"] = str(pod_cidr)

            # Call the IPAM plugin.
            _log.debug("Calling host-local IPAM plugin")
            code, response = self._call_binary_ipam_plugin(env)
        else:
            # Using some other IPAM plugin - call it.
            _log.debug("Using binary plugin")
            code, response = self._call_binary_ipam_plugin(env)

        # Return the IPAM return code and output.
        _log.debug("IPAM response (rc=%s): %s", code, response)
        return code, response

    def _get_kubernetes_pod_cidr(self):
        """
        Attempt to get the Kubernetes pod CIDR for this node.
        First check if we've written it to disk.  If so, use that value.  If
        not, then query the Kubernetes API for it.
        """
        _log.info("Getting node.spec.podCidr from API, kubeconfig: %s",
                  self.kubeconfig_path)
        if not self.kubeconfig_path:
            # For now, kubeconfig is the only supported auth method.
            print_cni_error(ERR_CODE_GENERIC, "Missing kubeconfig",
                    "usePodCidr requires specification of kubeconfig file")
            sys.exit(ERR_CODE_GENERIC)

        # Query the API for this node.  Default node name to the hostname.
        try:
            api = HTTPClient(KubeConfig.from_file(self.kubeconfig_path))
            node = None
            for n in Node.objects(api):
                _log.debug("Checking node: %s", n.obj["metadata"]["name"])
                if n.obj["metadata"]["name"] == self.k8s_node_name:
                    node = n
                    break
            if not node:
                raise KeyError("Unable to find node in API: %s", self.k8s_node_name)
            _log.debug("Found node %s: %s: ", node.obj["metadata"]["name"],
                       node.obj["spec"])
        except Exception:
            print_cni_error(ERR_CODE_GENERIC, "Error querying Kubernetes API",
                    "Failed to get podCidr from Kubernetes API")
            sys.exit(ERR_CODE_GENERIC)
        else:
            pod_cidr = node.obj["spec"].get("podCIDR")
            if not pod_cidr:
                print_cni_error(ERR_CODE_GENERIC, "Missing podCidr",
                        "No podCidr for node %s" % self.k8s_node_name)
                sys.exit(ERR_CODE_GENERIC)
        _log.debug("Using podCidr: %s", pod_cidr)
        return pod_cidr

    def _call_binary_ipam_plugin(self, env):
        """Calls through to the specified IPAM plugin binary.

        Utilizes the IPAM config as specified in the CNI network
        configuration file.  A dictionary with the following form:
            {
              type: <IPAM TYPE>
            }

        :param env - A dictionary of environment variables to pass to the
        IPAM plugin
        :return: Tuple of return code, response from the IPAM plugin.
        """
        # Find the correct plugin based on the given type.
        plugin_path = self._find_ipam_plugin()
        if not plugin_path:
            message = "Could not find IPAM plugin of type %s in path %s." % \
                      (self.ipam_type, self.cni_path)
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        # Execute the plugin and return the result.
        _log.info("Using IPAM plugin at: %s", plugin_path)
        _log.debug("Passing in environment to IPAM plugin: \n%s",
                   json.dumps(env, indent=2))
        p = Popen(plugin_path, stdin=PIPE, stdout=PIPE, stderr=PIPE, env=env)
        stdout, stderr = p.communicate(json.dumps(self.network_config))
        _log.debug("IPAM plugin return code: %s", p.returncode)
        _log.debug("IPAM plugin output: \nstdout:\n%s\nstderr:\n%s",
                   stdout, stderr)
        return p.returncode, stdout

    def _create_endpoint(self, ip_list):
        """Creates an endpoint in the Calico datastore with the client.

        :param ip_list - list of IP addresses that have been already allocated
        :return Calico endpoint object
        """
        _log.debug("Creating Calico endpoint with workload_id=%s",
                   self.workload_id)
        try:
            endpoint = self._client.create_endpoint(self.hostname,
                                                    self.orchestrator_id,
                                                    self.workload_id,
                                                    ip_list)
        except (AddrFormatError, KeyError) as e:
            # AddrFormatError: Raised when an IP address type is not
            #                  compatible with the node.
            # KeyError: Raised when BGP config for host is not found.
            _log.exception("Failed to create Calico endpoint.")
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)
            print_cni_error(ERR_CODE_GENERIC, e.message)
            sys.exit(ERR_CODE_GENERIC)

        _log.info("Created Calico endpoint with IP address(es) %s", ip_list)
        return endpoint

    def _remove_stale_endpoint(self, endpoint):
        """
        Removes the given endpoint from Calico.
        Called when we discover a stale endpoint that is no longer in use.
        Note that this doesn't release IP allocations - that must be done
        using the designated IPAM plugin.
        """
        _log.info("Removing stale Calico endpoint '%s'", endpoint)
        try:
            self._client.remove_endpoint(endpoint)
        except KeyError:
            # Shouldn't hit this since we know the workload exists.
            _log.info("Error removing stale endpoint, ignoring")

    def _remove_workload(self):
        """Removes the given endpoint from the Calico datastore

        :return: None
        """
        try:
            _log.info("Removing Calico workload '%s'", self.workload_id)
            self._client.remove_workload(hostname=self.hostname,
                                         orchestrator_id=self.orchestrator_id,
                                         workload_id=self.workload_id)
        except KeyError:
            # Attempt to remove the workload using the container ID as the
            # workload ID.  Earlier releases of the plugin used the
            # container ID for the workload ID rather than the Kubernetes pod
            # name and namespace.
            _log.debug("Could not find workload with workload ID %s.",
                         self.workload_id)
            try:
                self._client.remove_workload(hostname=self.hostname,
                                             orchestrator_id="cni",
                                             workload_id=self.container_id)
            except KeyError:
                _log.warning("Could not find workload with container ID %s.",
                             self.container_id)


    def _provision_veth(self, endpoint):
        """Provisions veth for given endpoint.

        Uses the netns relative path passed in through CNI_NETNS_ENV and
        interface passed in through CNI_IFNAME_ENV.

        :param endpoint
        :return Calico endpoint object
        """
        _log.debug("Provisioning Calico veth interface")
        netns_path = os.path.abspath(os.path.join(os.getcwd(), self.cni_netns))
        _log.debug("netns path: %s", netns_path)

        try:
            endpoint.mac = endpoint.provision_veth(
                Namespace(netns_path), self.interface)
        except CalledProcessError as e:
            _log.exception("Failed to provision veth interface for endpoint %s",
                           endpoint.name)
            self._remove_workload()
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)
            print_cni_error(ERR_CODE_GENERIC, e.message)
            sys.exit(ERR_CODE_GENERIC)

        _log.debug("Endpoint has mac address: %s", endpoint.mac)

        self._client.set_endpoint(endpoint)
        _log.info("Provisioned %s in netns %s", self.interface, netns_path)
        return endpoint

    def _remove_veth(self, endpoint):
        """Remove the veth from given endpoint.

        Handles any errors encountered while removing the endpoint.
        """
        _log.info("Removing veth for endpoint: %s", endpoint.name)
        try:
            removed = netns.remove_veth(endpoint.name)
            _log.debug("Successfully removed endpoint %s? %s",
                       endpoint.name, removed)
        except CalledProcessError:
            _log.warning("Unable to remove veth %s", endpoint.name)

    @handle_datastore_error
    def _get_endpoint(self):
        """Get endpoint matching self.workload_id.

        If we cannot find an endpoint using self.workload_id, try
        using self.container_id.

        Return None if no endpoint is found.
        Exits with an error if multiple endpoints are found.

        :return: Endpoint object if found, None if not found
        """
        try:
            _log.debug("Looking for endpoint that matches workload ID %s",
                       self.workload_id)
            endpoint = self._client.get_endpoint(
                hostname=self.hostname,
                orchestrator_id=self.orchestrator_id,
                workload_id=self.workload_id
            )
        except KeyError:
            # Try to find using the container ID.  In earlier version of the
            # plugin, the container ID was used as the workload ID.
            _log.debug("No endpoint found matching workload ID %s",
                       self.workload_id)
            try:
                endpoint = self._client.get_endpoint(
                    hostname=self.hostname,
                    orchestrator_id="cni",
                    workload_id=self.container_id
                )
            except KeyError:
                # We were unable to find an endpoint using either the
                # workload ID or the container ID.
                _log.debug("No endpoint found matching container ID %s",
                           self.container_id)
                endpoint = None
        except MultipleEndpointsMatch:
            message = "Multiple Endpoints found matching ID %s" % \
                    self.workload_id
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        return endpoint

    def _find_ipam_plugin(self):
        """Locates IPAM plugin binary in plugin path and returns absolute path
        of plugin if found; if not found returns an empty string.

        IPAM plugin type is set in the network config file.
        The plugin path is the CNI path passed through the environment variable
        CNI_PATH.

        :rtype : str
        :return: plugin_path - absolute path of IPAM plugin binary
        """
        plugin_type = self.ipam_type
        plugin_path = ""
        for path in self.cni_path.split(":"):
            _log.debug("Looking for plugin %s in path %s", plugin_type, path)
            temp_path = os.path.abspath(os.path.join(path, plugin_type))
            if os.path.isfile(temp_path):
                _log.debug("Found plugin %s in path %s", plugin_type, path)
                plugin_path = temp_path
                break
        return str(plugin_path)
Exemplo n.º 10
0
class PolicyAgent(object):
    def __init__(self):
        self._event_queue = Queue.Queue(maxsize=MAX_QUEUE_SIZE)
        """
        Queue to populate with events from API watches.
        """

        self.k8s_api = os.environ.get("K8S_API", DEFAULT_API)
        """
        Scheme, IP and port of the Kubernetes API.
        """

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN", read_token_file())
        """
        Auth token to use when accessing the API.
        """
        _log.debug("Using auth token: %s", self.auth_token)

        self.ca_crt_exists = os.path.exists(CA_CERT_PATH)
        """
        True if a CA cert has been mounted by Kubernetes.
        """

        self._client = DatastoreClient()
        """
        Client for accessing the Calico datastore.
        """

        self._handlers = {}
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_ADDED,
                         add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_DELETED,
                         delete_network_policy)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_ADDED,
                         add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_DELETED,
                         delete_namespace)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_ADDED, add_update_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_DELETED, delete_pod)
        """
        Handlers for watch events.
        """

    def add_handler(self, resource_type, event_type, handler):
        """
        Adds an event handler for the given event type (ADD, DELETE) for the
        given resource type.

        :param resource_type: The type of resource that this handles.
        :param event_type: The type of event that this handles.
        :param handler: The callable to execute when events are received.
        :return None
        """
        _log.info("Setting %s %s handler: %s", resource_type, event_type,
                  handler)
        key = (resource_type, event_type)
        self._handlers[key] = handler

    def get_handler(self, resource_type, event_type):
        """
        Gets the correct handler.

        :param resource_type: The type of resource that needs handling.
        :param event_type: The type of event that needs handling.
        :return None
        """
        key = (resource_type, event_type)
        _log.debug("Looking up handler for event: %s", key)
        return self._handlers[key]

    def run(self):
        """
        PolicyAgent.run() is called at program init to spawn watch threads,
        Loops to read responses from the Queue as they come in.
        """
        # Ensure the tier exists.
        metadata = {"order": 50}
        self._client.set_policy_tier_metadata(NET_POL_TIER_NAME, metadata)

        # Read initial state from Kubernetes API.
        self.read_initial_state()

        # Loop and read updates from the queue.
        self.read_updates()

    def read_initial_state(self):
        """
        Reads initial state from the API, processes existing resources, and
        kicks off threads to watch the Kubernetes API for changes.
        """
        resources = [
            RESOURCE_TYPE_NETWORK_POLICY, RESOURCE_TYPE_NAMESPACE,
            RESOURCE_TYPE_POD
        ]
        for resource_type in resources:
            # Get existing resources from the API.
            _log.info("Getting existing %s objects", resource_type)
            get_url = GET_URLS[resource_type] % self.k8s_api
            resp = self._api_get(get_url, stream=False)
            _log.info("Response: %s", resp)

            # If we hit an error, raise it.  This will kill the agent,
            # which will be re-started by Kubernetes.
            if resp.status_code != 200:
                _log.error("Error querying API: %s", resp.json())
                raise Exception("Failed to query resource: %s" % resource_type)

            # Get the list of existing API objects from the response, as
            # well as the latest resourceVersion.
            resources = resp.json()["items"]
            metadata = resp.json().get("metadata", {})
            resource_version = metadata.get("resourceVersion")
            _log.debug("%s metadata: %s", resource_type, metadata)

            # Add the existing resources to the queue to be processed.
            _log.info("%s existing %s(s)", len(resources), resource_type)
            for resource in resources:
                _log.debug("Queueing update: %s", resource)
                update = (TYPE_ADDED, resource_type, resource)
                self._event_queue.put(update,
                                      block=True,
                                      timeout=QUEUE_PUT_TIMEOUT)

            # Start watching for updates from the last resourceVersion.
            watch_url = WATCH_URLS[resource_type] % self.k8s_api
            t = Thread(target=self._watch_api,
                       args=(watch_url, resource_version))
            t.daemon = True
            t.start()
            _log.info("Started watch on: %s", resource_type)

    def read_updates(self):
        """
        Reads from the update queue.

        An update on the queue must be a tuple of:
          (event_type, resource_type, resource)

        Where:
          - event_type: Either "ADDED", "MODIFIED", "DELETED"
          - resource_type: e.g "Namespace", "Pod", "NetworkPolicy"
          - resource: The parsed json resource from the API matching
                      the given resource_type.
        """
        while True:
            try:
                # Wait for an update on the event queue.
                _log.debug("Reading from event queue")
                update = self._event_queue.get(block=True)
                event_type, resource_type, resource = update
                self._event_queue.task_done()

                # We've recieved an update - process it.
                _log.debug("Read event: %s, %s, %s", event_type, resource_type,
                           json.dumps(resource, indent=2))
                self._process_update(event_type, resource_type, resource)
            except KeyError:
                # We'll hit this if we fail to parse an invalid update.
                _log.exception("Invalid update: %s", update)

    def _process_update(self, event_type, resource_type, resource):
        """
        Takes an event updates our state accordingly.
        """
        _log.debug("Processing '%s' for kind '%s'", event_type, resource_type)

        # Determine the key for this object using namespace and name.
        # This is simply used for easy identification in logs, etc.
        name = resource["metadata"]["name"]
        namespace = resource["metadata"].get("namespace")
        key = (namespace, name)

        # Treat "modified" as "added".
        if event_type == TYPE_MODIFIED:
            _log.debug("Treating 'MODIFIED' as 'ADDED'")
            event_type = TYPE_ADDED

        # Call the right handler.
        try:
            handler = self.get_handler(resource_type, event_type)
        except KeyError:
            _log.warning("No %s handlers for: %s", event_type, resource_type)
        else:
            _log.debug("Calling handler: %s", handler)
            try:
                handler(key, resource)
            except KeyError:
                _log.exception("Invalid %s: %s", resource_type,
                               json.dumps(resource, indent=2))

    def _watch_api(self, path, resource_version=None):
        """
        Work loop for the watch thread.
        """
        _log.info("Starting watch on path: %s", path)
        while True:
            # Attempt to stream API resources.
            try:
                response = self._api_get(path,
                                         stream=True,
                                         resource_version=resource_version)
                _log.debug("Watch response for %s: %s", path, response)
            except requests.ConnectionError:
                _log.exception("Error querying path: %s", path)
                time.sleep(10)
                continue

            # Check for successful response.
            if response.status_code != 200:
                _log.error("Error watching path: %s", response.text)
                time.sleep(10)
                continue

            # Success - add resources to the queue for processing.
            for line in response.iter_lines():
                # Filter out keep-alive new lines.
                if line:
                    _log.debug("Read line: %s", line)
                    parsed = json.loads(line)
                    update = (parsed["type"], parsed["object"]["kind"],
                              parsed["object"])
                    self._event_queue.put(update,
                                          block=True,
                                          timeout=QUEUE_PUT_TIMEOUT)

                    # Extract the latest resource version.
                    new_ver = parsed["object"]["metadata"]["resourceVersion"]
                    _log.debug("Update resourceVersion, was: %s, now: %s",
                               resource_version, new_ver)
                    resource_version = new_ver

    def _api_get(self, path, stream, resource_version=None):
        """
        Get or stream from the API, given a resource.

        :param path: The API path to get.
        :param stream: Whether to return a single object or a stream.
        :param resource_version: The resourceVersion at which to
        start the stream.
        :return: A requests Response object
        """
        # Append the resource version - this indicates where the
        # watch should start.
        _log.info("Getting API resources '%s' at version '%s'. stream=%s",
                  path, resource_version, stream)
        if resource_version:
            path += "?resourceVersion=%s" % resource_version

        session = requests.Session()
        if self.auth_token:
            session.headers.update(
                {'Authorization': 'Bearer ' + self.auth_token})
        verify = CA_CERT_PATH if self.ca_crt_exists else False
        return session.get(path, verify=verify, stream=stream)
Exemplo n.º 11
0
    def __init__(self):
        self._event_queue = Queue.Queue(maxsize=MAX_QUEUE_SIZE)
        """
        Queue to populate with events from API watches.
        """

        self.k8s_api = os.environ.get("K8S_API", DEFAULT_API)
        """
        Scheme, IP and port of the Kubernetes API.
        """

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN", read_token_file())
        """
        Auth token to use when accessing the API.
        """
        _log.debug("Using auth token: %s", self.auth_token)

        self.ca_crt_exists = os.path.exists(CA_CERT_PATH)
        """
        True if a CA cert has been mounted by Kubernetes.
        """

        self._client = DatastoreClient()
        """
        Client for accessing the Calico datastore.
        """

        self._leader_election_url = os.environ.get("ELECTION_URL",
                                                   "http://127.0.0.1:4040/")
        """
        Use this URL to get leader election status from the sidecar container.
        """

        elect = os.environ.get("LEADER_ELECTION", "false")
        self._leader_elect = elect.lower() ==  "true"
        """
        Whether or not leader election is enabled.  If set to False, this
        policy controller will assume it is the only instance.
        """

        self._handlers = {}
        """
        Keeps track of which handlers to execute for various events.
        """

        # Handlers for NetworkPolicy events.
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_ADDED,
                         add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_MODIFIED,
                         add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_DELETED,
                         delete_network_policy)

        # Handlers for Namespace events.
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_ADDED,
                         add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_MODIFIED,
                         add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_DELETED,
                         delete_namespace)

        # Handlers for Pod events.
        self.add_handler(RESOURCE_TYPE_POD, TYPE_ADDED,
                         add_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_MODIFIED,
                         update_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_DELETED,
                         delete_pod)
Exemplo n.º 12
0
class Controller(object):
    def __init__(self):
        self._event_queue = Queue.Queue(maxsize=MAX_QUEUE_SIZE)
        """
        Queue to populate with events from API watches.
        """

        self.k8s_api = os.environ.get("K8S_API", DEFAULT_API)
        """
        Scheme, IP and port of the Kubernetes API.
        """

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN", read_token_file())
        """
        Auth token to use when accessing the API.
        """
        _log.debug("Using auth token: %s", self.auth_token)

        self.ca_crt_exists = os.path.exists(CA_CERT_PATH)
        """
        True if a CA cert has been mounted by Kubernetes.
        """

        self._client = DatastoreClient()
        """
        Client for accessing the Calico datastore.
        """

        self._leader_election_url = os.environ.get("ELECTION_URL",
                                                   "http://127.0.0.1:4040/")
        """
        Use this URL to get leader election status from the sidecar container.
        """

        elect = os.environ.get("LEADER_ELECTION", "false")
        self._leader_elect = elect.lower() ==  "true"
        """
        Whether or not leader election is enabled.  If set to False, this
        policy controller will assume it is the only instance.
        """

        self._handlers = {}
        """
        Keeps track of which handlers to execute for various events.
        """

        # Handlers for NetworkPolicy events.
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_ADDED,
                         add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_MODIFIED,
                         add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_DELETED,
                         delete_network_policy)

        # Handlers for Namespace events.
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_ADDED,
                         add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_MODIFIED,
                         add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_DELETED,
                         delete_namespace)

        # Handlers for Pod events.
        self.add_handler(RESOURCE_TYPE_POD, TYPE_ADDED,
                         add_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_MODIFIED,
                         update_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_DELETED,
                         delete_pod)

    def add_handler(self, resource_type, event_type, handler):
        """
        Adds an event handler for the given event type (ADD, DELETE) for the
        given resource type.

        :param resource_type: The type of resource that this handles.
        :param event_type: The type of event that this handles.
        :param handler: The callable to execute when events are received.
        :return None
        """
        _log.debug("Setting %s %s handler: %s",
                    resource_type, event_type, handler)
        key = (resource_type, event_type)
        self._handlers[key] = handler

    def get_handler(self, resource_type, event_type):
        """
        Gets the correct handler.

        :param resource_type: The type of resource that needs handling.
        :param event_type: The type of event that needs handling.
        :return None
        """
        key = (resource_type, event_type)
        _log.debug("Looking up handler for event: %s", key)
        return self._handlers[key]

    def run(self):
        """
        Controller.run() is called at program init to spawn watch threads,
        Loops to read responses from the Queue as they come in.
        """
        _log.info("Leader election enabled? %s", self._leader_elect)
        if self._leader_elect:
            # Wait until we've been elected leader to start.
            self._wait_for_leadership()
            self._start_leader_thread()

        # Ensure the tier exists.
        metadata = {"order": NET_POL_TIER_ORDER}
        self._client.set_policy_tier_metadata(NET_POL_TIER_NAME, metadata)

        # Read initial state from Kubernetes API.
        self.start_workers()

        # Loop and read updates from the queue.
        self.read_updates()

    def _wait_for_leadership(self):
        """
        Loops until this controller has been elected leader.
        """
        _log.info("Waiting for this controller to be elected leader")
        while True:
            try:
                is_leader = self._is_leader()
            except requests.exceptions.ConnectionError:
                # During startup, the leader election container
                # might not be up yet.  Handle this case gracefully.
                _log.info("Waiting for leader election container")
            else:
                # Successful response from the leader election container.
                # Check if we are the elected leader.
                if is_leader:
                    _log.info("We have been elected leader")
                    break
            time.sleep(1)

    def _start_leader_thread(self):
        """
        Starts a thread which periodically checks if this controller is the leader.
        If determined that we are no longer the leader, exit.
        """
        t = Thread(target=self._watch_leadership)
        t.daemon = True
        t.start()
        _log.info("Started leader election watcher")

    def _watch_leadership(self):
        """
        Watches to see if this policy controller is still the elected leader.
        If no longer the elected leader, exits.
        """
        _log.info("Watching for leader election changes")
        while True:
            try:
                if not self._is_leader():
                    _log.warning("No longer the elected leader - exiting")
                    os._exit(1)
                time.sleep(1)
            except Exception:
                _log.exception("Exception verifying leadership - exiting")
                os._exit(1)

    def start_workers(self):
        """
        Starts the worker threads which manage each Kubernetes
        API resource.
        """
        resources = [RESOURCE_TYPE_NETWORK_POLICY,
                     RESOURCE_TYPE_NAMESPACE,
                     RESOURCE_TYPE_POD]

        # For each resource type, start a thread which syncs it from the
        # kubernetes API.
        for resource_type in resources:
            t = Thread(target=self._manage_resource, args=(resource_type,))
            t.daemon = True
            t.start()
            _log.info("Started worker thread for: %s", resource_type)

    def read_updates(self):
        """
        Reads from the update queue.

        An update on the queue must be a tuple of:
          (event_type, resource_type, resource)

        Where:
          - event_type: Either "ADDED", "MODIFIED", "DELETED", "ERROR"
          - resource_type: e.g "Namespace", "Pod", "NetworkPolicy"
          - resource: The parsed json resource from the API matching
                      the given resource_type.
        """
        while True:
            try:
                # Wait for an update on the event queue.
                _log.debug("Reading from event queue")
                update = self._event_queue.get(block=True)
                event_type, resource_type, resource = update

                # We've recieved an update - process it.
                _log.debug("Read event: %s, %s, %s",
                           event_type,
                           resource_type,
                           json.dumps(resource, indent=2))
                self._process_update(event_type,
                                     resource_type,
                                     resource)
            except KeyError:
                # We'll hit this if we fail to parse an invalid update.
                _log.exception("Invalid update: %s", update)
            finally:
                self._event_queue.task_done()

                # Log out when the queue is empty.
                if self._event_queue.empty():
                    _log.info("Emptied the event queue")

    def _process_update(self, event_type, resource_type, resource):
        """
        Takes an event updates our state accordingly.
        """
        _log.debug("Processing '%s' for kind '%s'", event_type, resource_type)

        # Determine the key for this object using namespace and name.
        # This is simply used for easy identification in logs, etc.
        name = resource["metadata"]["name"]
        namespace = resource["metadata"].get("namespace")
        key = (namespace, name)

        # Call the right handler.
        try:
            handler = self.get_handler(resource_type, event_type)
        except KeyError:
            _log.warning("No %s handlers for: %s",
                         event_type, resource_type)
        else:
            try:
                handler(resource)
                _log.info("Handled %s for %s: %s",
                           event_type, resource_type, key)
            except KeyError:
                _log.exception("Invalid %s: %s", resource_type,
                               json.dumps(resource, indent=2))

    def _manage_resource(self, resource_type):
        """
        Routine for a worker thread.  Syncs with API for the given resource
        and starts a watch.  If an error occurs within the watch, will re-sync
        with the API and re-start the watch.
        """
        while True:
            try:
                # Sync existing resources for this type.
                resource_version = self._sync_resources(resource_type)

                # Start a watch from the latest resource_version.
                self._watch_resource(resource_type, resource_version)
            except requests.ConnectionError, requests.ChunkedEncodingError:
                _log.exception("Connection error querying: %s", resource_type)
            except requests.HTTPError:
                _log.exception("HTTP error querying: %s", resource_type)
            except KubernetesApiError:
                _log.debug("Kubernetes API error managing %s", resource_type)
Exemplo n.º 13
0
class Controller(object):
    def __init__(self):
        self._event_queue = Queue.Queue(maxsize=MAX_QUEUE_SIZE)
        """
        Queue to populate with events from API watches.
        """

        self.k8s_api = os.environ.get("K8S_API", DEFAULT_API)
        """
        Scheme, IP and port of the Kubernetes API.
        """

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN", read_token_file())
        """
        Auth token to use when accessing the API.
        """
        _log.debug("Using auth token: %s", self.auth_token)

        self.ca_crt_exists = os.path.exists(CA_CERT_PATH)
        """
        True if a CA cert has been mounted by Kubernetes.
        """

        self._client = DatastoreClient()
        """
        Client for accessing the Calico datastore.
        """

        self._leader_election_url = os.environ.get("ELECTION_URL",
                                                   "http://127.0.0.1:4040/")
        """
        Use this URL to get leader election status from the sidecar container.
        """

        elect = os.environ.get("LEADER_ELECTION", "false")
        self._leader_elect = elect.lower() == "true"
        """
        Whether or not leader election is enabled.  If set to False, this
        policy controller will assume it is the only instance.
        """

        self._handlers = {}
        """
        Keeps track of which handlers to execute for various events.
        """

        # Handlers for NetworkPolicy events.
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_ADDED,
                         add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_MODIFIED,
                         add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_DELETED,
                         delete_network_policy)

        # Handlers for Namespace events.
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_ADDED,
                         add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_MODIFIED,
                         add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_DELETED,
                         delete_namespace)

        # Handlers for Pod events.
        self.add_handler(RESOURCE_TYPE_POD, TYPE_ADDED, add_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_MODIFIED, update_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_DELETED, delete_pod)

        self._last_resource_version = {}
        """
        Keeps track of last received version for each resource type.
        """

    def add_handler(self, resource_type, event_type, handler):
        """
        Adds an event handler for the given event type (ADD, DELETE) for the
        given resource type.

        :param resource_type: The type of resource that this handles.
        :param event_type: The type of event that this handles.
        :param handler: The callable to execute when events are received.
        :return None
        """
        _log.debug("Setting %s %s handler: %s", resource_type, event_type,
                   handler)
        key = (resource_type, event_type)
        self._handlers[key] = handler

    def get_handler(self, resource_type, event_type):
        """
        Gets the correct handler.

        :param resource_type: The type of resource that needs handling.
        :param event_type: The type of event that needs handling.
        :return None
        """
        key = (resource_type, event_type)
        _log.debug("Looking up handler for event: %s", key)
        return self._handlers[key]

    def run(self):
        """
        Controller.run() is called at program init to spawn watch threads,
        Loops to read responses from the Queue as they come in.
        """
        _log.info("Leader election enabled? %s", self._leader_elect)
        if self._leader_elect:
            # Wait until we've been elected leader to start.
            self._wait_for_leadership()
            self._start_leader_thread()

        # Remove old tier if it exists
        try:
            _log.debug("Attempting to remove old tier k8s-network-policy")
            self._client.delete_policy_tier("k8s-network-policy")
        except KeyError:
            pass

        # Ensure the tier exists.
        metadata = {"order": NET_POL_TIER_ORDER}
        self._client.set_policy_tier_metadata("default", metadata)

        # Ensure the backstop policy exists.  This policy forwards
        # any traffic to Kubernetes pods which doesn't match another policy
        # to the next-tier (i.e the per-namespace Profiles).
        selector = "has(%s)" % K8S_NAMESPACE_LABEL
        rules = Rules(inbound_rules=[Rule(action="next-tier")],
                      outbound_rules=[Rule(action="next-tier")])
        self._client.create_policy("default",
                                   "k8s-policy-no-match",
                                   selector,
                                   order=NET_POL_BACKSTOP_ORDER,
                                   rules=rules)

        # Read initial state from Kubernetes API.
        self.start_workers()

        # Loop and read updates from the queue.
        self.read_updates()

    def _wait_for_leadership(self):
        """
        Loops until this controller has been elected leader.
        """
        _log.info("Waiting for this controller to be elected leader")
        while True:
            try:
                is_leader = self._is_leader()
            except requests.exceptions.ConnectionError:
                # During startup, the leader election container
                # might not be up yet.  Handle this case gracefully.
                _log.info("Waiting for leader election container")
            else:
                # Successful response from the leader election container.
                # Check if we are the elected leader.
                if is_leader:
                    _log.info("We have been elected leader")
                    break
            time.sleep(1)

    def _start_leader_thread(self):
        """
        Starts a thread which periodically checks if this controller is the leader.
        If determined that we are no longer the leader, exit.
        """
        t = Thread(target=self._watch_leadership)
        t.daemon = True
        t.start()
        _log.info("Started leader election watcher")

    def _watch_leadership(self):
        """
        Watches to see if this policy controller is still the elected leader.
        If no longer the elected leader, exits.
        """
        _log.info("Watching for leader election changes")
        while True:
            try:
                if not self._is_leader():
                    _log.warning("No longer the elected leader - exiting")
                    os._exit(1)
                time.sleep(1)
            except Exception:
                _log.exception("Exception verifying leadership - exiting")
                os._exit(1)

    def start_workers(self):
        """
        Starts the worker threads which manage each Kubernetes
        API resource.
        """
        resources = [
            RESOURCE_TYPE_NETWORK_POLICY, RESOURCE_TYPE_NAMESPACE,
            RESOURCE_TYPE_POD
        ]

        # For each resource type, start a thread which syncs it from the
        # kubernetes API.
        for resource_type in resources:
            t = Thread(target=self._manage_resource, args=(resource_type, ))
            t.daemon = True
            t.start()
            _log.info("Started worker thread for: %s", resource_type)

    def read_updates(self):
        """
        Reads from the update queue.

        An update on the queue must be a tuple of:
          (event_type, resource_type, resource)

        Where:
          - event_type: Either "ADDED", "MODIFIED", "DELETED", "ERROR"
          - resource_type: e.g "Namespace", "Pod", "NetworkPolicy"
          - resource: The parsed json resource from the API matching
                      the given resource_type.
        """
        while True:
            try:
                # Wait for an update on the event queue.
                _log.debug("Reading from event queue")
                update = self._event_queue.get(block=True)
                event_type, resource_type, resource = update

                # We've recieved an update - process it.
                _log.debug("Read event: %s, %s, %s", event_type, resource_type,
                           json.dumps(resource, indent=2))
                self._process_update(event_type, resource_type, resource)
            except KeyError:
                # We'll hit this if we fail to parse an invalid update.
                _log.exception("Invalid update: %s", update)
            finally:
                self._event_queue.task_done()

                # Log out when the queue is empty.
                if self._event_queue.empty():
                    _log.debug("Emptied the event queue")

    def _process_update(self, event_type, resource_type, resource):
        """
        Takes an event updates our state accordingly.
        """
        _log.debug("Processing '%s' for kind '%s'", event_type, resource_type)

        # Determine the key for this object using namespace and name.
        # This is simply used for easy identification in logs, etc.
        name = resource["metadata"]["name"]
        namespace = resource["metadata"].get("namespace")
        key = (namespace, name)

        # Call the right handler.
        try:
            handler = self.get_handler(resource_type, event_type)
        except KeyError:
            _log.warning("No %s handlers for: %s", event_type, resource_type)
        else:
            try:
                handler(resource)
                _log.info("Handled %s for %s: %s", event_type, resource_type,
                          key)
            except KeyError:
                _log.exception("Invalid %s: %s", resource_type,
                               json.dumps(resource, indent=2))

    def _manage_resource(self, resource_type):
        """
        Routine for a worker thread.  Syncs with API for the given resource
        and starts a watch.  If an error occurs within the watch, will resync
        with the API and restart the watch.
        """
        sync_needed = True
        while True:
            try:
                if sync_needed:
                    # Sync existing resources for this type.
                    self._sync_resources(resource_type)

                # There are many exception conditions below for which we would
                # need to sync again.  Even though sync isn't needed in the
                # most mainline case - read timeout - we save some lines of
                # code by setting sync_needed True here, and resetting it below
                # in the cases where it isn't needed.
                sync_needed = True

                # Start a watch from the latest resource_version.
                self._watch_resource(resource_type)
            except requests.exceptions.ConnectTimeout as e:
                _log.warning("Connection attempt timed out: %s ...%s",
                             resource_type, e)
            except requests.ConnectionError as e:
                if "Read timed out" in str(e):
                    _log.debug("Normal read time out for %s", resource_type)
                    sync_needed = False
                else:
                    _log.warning("Connection error: %s ...%s", resource_type,
                                 e)
            except requests.exceptions.ChunkedEncodingError:
                _log.exception("Read error querying: %s", resource_type)
            except requests.HTTPError:
                _log.exception("HTTP error querying: %s", resource_type)
            except KubernetesApiError:
                _log.debug("Kubernetes API error managing %s", resource_type)
            except Queue.Full:
                _log.exception("Event queue full")
            except Exception:
                _log.exception("Unhandled exception killed %s manager",
                               resource_type)
            finally:
                if sync_needed:
                    # Sleep for a second so that we don't tight-loop.
                    _log.info("Restarting watch on resource: %s",
                              resource_type)
                    time.sleep(1)
                else:
                    _log.debug("Restarting watch on resource: %s",
                               resource_type)

    def _watch_resource(self, resource_type):
        """
        Watch the given resource type starting at the given resource version.
        Add any events to the event queue.
        """
        path = WATCH_URLS[resource_type] % self.k8s_api
        _log.debug("Starting watch on: %s", path)
        while True:
            # Attempt to stream API resources.
            response = self._api_get(
                path,
                stream=True,
                resource_version=self._last_resource_version[resource_type])
            _log.debug("Watch response for %s: %s", path, response)

            # Check for successful response, raise error if not.
            if response.status_code != 200:
                raise KubernetesApiError(response.text)

            # Success - add resources to the queue for processing.
            for line in response.iter_lines():
                # Filter out keep-alive new lines.
                if line:
                    _log.debug("Read from API: %s", line)
                    try:
                        parsed = json.loads(line)
                    except json.JSONDecodeError as e:
                        _log.error("Bad response '%s' from API: %s", line, e)
                        raise

                    # Check if we've encountered an error.  If so,
                    # raise an exception.
                    if parsed["type"] == TYPE_ERROR:
                        _log.error("Received error from API: %s",
                                   json.dumps(parsed, indent=2))
                        raise KubernetesApiError()

                    # Get the important information from the event.
                    event_type = parsed["type"]
                    resource_type = parsed["object"]["kind"]
                    resource = parsed["object"]

                    # Successful update - send to the queue.
                    _log.debug("%s %s: %s to queue (%s) (%s)", event_type,
                               resource_type, resource["metadata"]["name"],
                               self._event_queue.qsize(), time.time())

                    update = (event_type, resource_type, resource)
                    self._event_queue.put(update,
                                          block=True,
                                          timeout=QUEUE_PUT_TIMEOUT)

                    # Extract the latest resource version.
                    new_ver = resource["metadata"]["resourceVersion"]
                    _log.debug("Update resourceVersion, was: %s, now: %s",
                               self._last_resource_version[resource_type],
                               new_ver)
                    self._last_resource_version[resource_type] = new_ver

    def _sync_resources(self, resource_type):
        """
        Syncs with the API and determines the latest resource version.
        Adds API objects to the event queue and
        returns the latest resourceVersion.
        Raises an Exception if unable to access the API.
        """
        # Get existing resources from the API.
        _log.info("Syncing '%s' objects", resource_type)
        url = GET_URLS[resource_type] % self.k8s_api
        resp = self._api_get(url, stream=False)
        _log.debug("Response: %s", resp)

        # If we hit an error, raise it.
        if resp.status_code != 200:
            _log.error("Error querying API: %s", resp.json())
            raise KubernetesApiError("Failed to query resource: %s" %
                                     resource_type)

        # Get the list of existing API objects from the response, as
        # well as the latest resourceVersion.
        resources = [] if not resp.json()["items"] else resp.json()["items"]
        metadata = resp.json().get("metadata", {})
        resource_version = metadata.get("resourceVersion")
        _log.debug("%s metadata: %s", resource_type, metadata)

        # Add the existing resources to the queue to be processed.
        # Treat as a MODIFIED event to trigger updates which may not always
        # occur on ADDED.
        _log.debug("%s existing %s(s) - add to queue", len(resources),
                   resource_type)
        for resource in resources:
            _log.debug("Queueing update: %s", resource)
            update = (TYPE_MODIFIED, resource_type, resource)
            self._event_queue.put(update,
                                  block=True,
                                  timeout=QUEUE_PUT_TIMEOUT)

        _log.info("Done syncing %s(s) - new resourceVersion: %s",
                  resource_type, resource_version)
        self._last_resource_version[resource_type] = resource_version

    def _api_get(self, path, stream, resource_version=None):
        """
        Get or stream from the API, given a resource.

        :param path: The API path to get.
        :param stream: Whether to return a single object or a stream.
        :param resource_version: The resourceVersion at which to
        start the stream.
        :return: A requests Response object
        """
        # Append the resource version - this indicates where the
        # watch should start.
        _log.debug("Getting API resources '%s' at version '%s'. stream=%s",
                   path, resource_version, stream)
        if resource_version:
            path += "?resourceVersion=%s" % resource_version

        session = requests.Session()
        if self.auth_token:
            session.headers.update(
                {'Authorization': 'Bearer ' + self.auth_token})
        verify = CA_CERT_PATH if self.ca_crt_exists else False
        return session.get(path, verify=verify, stream=stream, timeout=(5, 10))

    def _is_leader(self):
        """
        Returns True if this policy controller instance has been elected leader,
        False otherwise.
        """
        _log.debug("Checking if we are the elected leader.")
        response = requests.get(self._leader_election_url)
        response = response.json()

        # Determine if we're the leader.
        our_name = os.environ.get("HOSTNAME")
        leader_name = response["name"]
        _log.debug("Elected leader is: %s. We are: %s", leader_name, our_name)
        return our_name == leader_name
Exemplo n.º 14
0
class DefaultPolicyDriver(object):
    """
    Implements default network policy for a generic CNI plugin.
    """
    def __init__(self, network_name):

        self._client = DatastoreClient()
        """
        DatastoreClient for access to the Calico datastore.
        """

        self.profile_name = network_name
        """
        Name of profile for attach to endpoint.
        """

        # Validate the given network name to make sure it is compatible with
        # Calico policy.
        if not validate_characters(network_name):
            raise ValueError(
                "Invalid characters detected in the given network "
                "name, %s. Only letters a-z, numbers 0-9, and "
                "symbols _.- are supported.", network_name)

    def apply_profile(self, endpoint):
        """Sets a profile for the networked container on the given endpoint.

        Create a profile if it is not yet created.

        :param endpoint:
        :return: None
        """
        assert self.profile_name, "No profile name set."
        if not self._client.profile_exists(self.profile_name):
            # If the profile doesn't exist, create it.
            _log.info("Creating new profile '%s'", self.profile_name)
            rules = self.generate_rules()
            self._client.create_profile(self.profile_name, rules)

            # Apply any additonal tags.
            tags = self.generate_tags()
            if tags:
                _log.debug("Applying additional tags: %s", tags)
                profile = self._client.get_profile(self.profile_name)
                profile.tags.update(tags)
                self._client.profile_update_tags(profile)

        # Check if the profile has already been applied.
        if self.profile_name in endpoint.profile_ids:
            _log.warning("Endpoint already in profile %s", self.profile_name)
            return

        # Append profile to Calico endpoint.
        _log.info("Appending profile '%s' to endpoint %s", self.profile_name,
                  endpoint.endpoint_id)
        try:
            self._client.append_profiles_to_endpoint(
                profile_names=[self.profile_name],
                endpoint_id=endpoint.endpoint_id)
        except (KeyError, MultipleEndpointsMatch), e:
            _log.exception("Failed to apply profile to endpoint %s: %s",
                           endpoint.name, e.message)
            raise ApplyProfileError(e.message)
Exemplo n.º 15
0
class PolicyAgent():
    def __init__(self):
        self._event_queue = Queue.Queue()
        """
        Queue to populate with events from API watches.
        """

        self.k8s_api = os.environ.get("K8S_API", DEFAULT_API)
        """
        Scheme, IP and port of the Kubernetes API.
        """

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN", read_token_file())
        """
        Auth token to use when accessing the API.
        """
        _log.debug("Using auth token: %s", self.auth_token)

        self.ca_crt_exists = os.path.exists(CA_CERT_PATH)
        """
        True if a CA cert has been mounted by Kubernetes.  
        """

        self._client = DatastoreClient()
        """
        Client for accessing the Calico datastore.
        """

        self._handlers = {}
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_ADDED, 
                         self._add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_DELETED, 
                         self._delete_network_policy)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_ADDED, 
                         self._add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_DELETED, 
                         self._delete_namespace)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_ADDED, 
                         self._add_update_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_DELETED, 
                         self._delete_pod)
        """
        Handlers for watch events.
        """
        
    def add_handler(self, resource_type, event_type, handler):
        """
        Adds an event handler for the given event type (ADD, DELETE) for the 
        given resource type.
        """
        _log.info("Setting %s %s handler: %s", 
                  resource_type, event_type, handler)
        key = (resource_type, event_type)
        self._handlers[key] = handler

    def get_handler(self, resource_type, event_type):
        """
        Gets the correct handler.
        """
        key = (resource_type, event_type)
        _log.debug("Looking up handler for event: %s", key)
        return self._handlers[key]

    def run(self):
        """
        PolicyAgent.run() is called at program init to spawn watch threads,
        Loops to read responses from the _watcher Queue as they come in.
        """
        resources = [RESOURCE_TYPE_NETWORK_POLICY, 
                     RESOURCE_TYPE_NAMESPACE,
                     RESOURCE_TYPE_POD]
        for resource_type in resources:
            # Get existing resources from the API.
            _log.info("Getting existing %s objects", resource_type)
            get_url = GET_URLS[resource_type] % self.k8s_api
            resp = self._api_get(get_url, stream=False)
            _log.info("Response: %s", resp)

            if resp.status_code != 200:
                _log.error("Error querying API: %s", resp.json())
                return
            updates = resp.json()["items"]
            metadata = resp.json().get("metadata", {})
            resource_version = metadata.get("resourceVersion")
            _log.debug("%s metadata: %s", resource_type, metadata)

            # Process the existing resources.
            _log.info("%s existing %s(s)", len(updates), resource_type)
            for update in updates:
                _log.debug("Processing existing resource: %s", 
                           json.dumps(update, indent=2))
                self._process_update(TYPE_ADDED, resource_type, update)

            # Start watching for updates from the last resourceVersion.
            watch_url = WATCH_URLS[resource_type] % self.k8s_api
            t = Thread(target=self._watch_api, 
                       args=(watch_url, resource_version))
            t.daemon = True
            t.start()
            _log.info("Started watch on: %s", resource_type)

        # Loop and read updates from the queue.
        _log.info("Reading from event queue")
        self.read_updates()

    def read_updates(self):
        """
        Reads from the update queue.
        """
        update = None

        while True:
            try:
                # There may be an update already, since we do a blocking get
                # in the `except Queue.Empty` block.  If we have an update, 
                # just process it before trying to read from the queue again.
                if not update:
                    _log.info("Non-blocking read from event queue")
                    update = self._event_queue.get(block=False)
                    self._event_queue.task_done()

                # We've recieved an update - process it.
                _log.debug("Read update from queue: %s", json.dumps(update, indent=2))
                self._process_update(update["type"], 
                                     update["object"]["kind"], 
                                     update["object"])
                update = None
            except Queue.Empty:
                _log.info("Queue empty, waiting for updates")
                update = self._event_queue.get(block=True)
            except KeyError:
                # We'll hit this if we fail to parse an invalid update.
                # Set update = None so we don't continue parsing the 
                # invalid update.
                _log.exception("Invalid update: %s", update)
                update = None
                time.sleep(10)

    def _process_update(self, event_type, resource_type, resource):
        """
        Takes an event updates our state accordingly.
        """
        _log.info("Processing '%s' for kind '%s'", event_type, resource_type) 

        # Determine the key for this object using namespace and name.
        # This is simply used for easy identification in logs, etc.
        name = resource["metadata"]["name"]
        namespace = resource["metadata"].get("namespace")
        key = (namespace, name)

        # Treat "modified" as "added".
        if event_type == TYPE_MODIFIED: 
            _log.info("Treating 'MODIFIED' as 'ADDED'")
            event_type = TYPE_ADDED

        # Call the right handler.
        try:
            handler = self.get_handler(resource_type, event_type) 
        except KeyError:    
            _log.warning("No %s handlers for: %s", 
                         event_type, resource_type)
        else:
            _log.debug("Calling handler: %s", handler)
            try:
                handler(key, resource)
            except KeyError:
                _log.exception("Invalid %s: %s", resource_type, 
                               json.dumps(resource, indent=2))

    def _add_update_network_policy(self, key, policy):
        """
        Takes a new network policy from the Kubernetes API and 
        creates the corresponding Calico policy configuration.
        """
        _log.info("Adding new network policy: %s", key)

        # Parse this network policy so we can convert it to the appropriate
        # Calico policy.  First, get the selector from the API object.
        k8s_selector = policy["spec"]["podSelector"]
        k8s_selector = k8s_selector or {}

        # Build the appropriate Calico label selector.  This is done using 
        # the labels provided in the NetworkPolicy, as well as the 
        # NetworkPolicy's namespace.
        namespace = policy["metadata"]["namespace"]
        selectors = ["%s == '%s'" % (k,v) for k,v in k8s_selector.iteritems()]
        selectors += ["%s == '%s'" % (K8S_NAMESPACE_LABEL, namespace)]
        selector = " && ".join(selectors)

        # Determine the name for this global policy.
        name = "net_policy-%s" % policy["metadata"]["name"]

        # Build the Calico rules.
        try:
            inbound_rules = self._calculate_inbound_rules(policy)
        except Exception:
            # It is possible bad rules will be passed - we don't want to 
            # crash the agent, but we do want to indicate a problem in the
            # logs, so that the policy can be fixed.
            _log.exception("Error parsing policy: %s", 
                           json.dumps(policy, indent=2))
            return
        else:
            rules =  Rules(id=name,
                           inbound_rules=inbound_rules,
                           outbound_rules=[Rule(action="allow")])

        # Create the network policy using the calculated selector and rules.
        self._client.create_global_policy(NET_POL_GROUP_NAME, name, selector, rules)
        _log.info("Updated global policy '%s' for NetworkPolicy %s", name, key)

    def _delete_network_policy(self, key, policy):
        """
        Takes a deleted network policy and removes the corresponding
        configuration from the Calico datastore.
        """
        _log.info("Deleting network policy: %s", key)

        # Determine the name for this global policy.
        name = "net_policy-%s" % policy["metadata"]["name"]

        # Delete the corresponding Calico policy 
        try:
            self._client.remove_global_policy(NET_POL_GROUP_NAME, name)
        except KeyError:
            _log.info("Unable to find policy '%s' - already deleted", key)

    def _calculate_inbound_rules(self, policy):
        """
        Takes a NetworkPolicy object from the API and returns a list of 
        Calico Rules objects which should be applied on ingress.
        """
        _log.debug("Calculating inbound rules")

        # Store the rules to return.
        rules = []

        # Get this policy's namespace.
        policy_ns = policy["metadata"]["namespace"]

        # Iterate through each inbound rule and create the appropriate
        # rules.
        allow_incomings = policy["spec"].get("ingress") or []
        _log.info("Found %s ingress rules", len(allow_incomings))
        for r in allow_incomings:
            # Determine the destination ports to allow.  If no ports are
            # specified, allow all port / protocol combinations.
            _log.debug("Processing ingress rule: %s", r)
            ports_by_protocol = {}
            for to_port in r.get("ports", []):
                # Keep a dict of ports exposed, keyed by protocol.
                protocol = to_port.get("protocol")
                port = to_port.get("port")
                ports = ports_by_protocol.setdefault(protocol, [])
                if port:
                    _log.debug("Allow to port: %s/%s", protocol, port)
                    ports.append(port)

            # Convert into arguments to be passed to a Rule object.
            to_args = []
            for protocol, ports in ports_by_protocol.iteritems():
                arg = {"protocol": protocol.lower()}
                if ports:
                    arg["dst_ports"] = ports
                to_args.append(arg)

            if not to_args:
                # There are not destination protocols / ports specified.
                # Allow to all protocols and ports.
                to_args = [{}]

            # Determine the from criteria.  If no "from" block is specified,
            # then we should allow from all sources.
            from_args = []
            for from_clause in r.get("from", []):
                # We need to check if the key exists, not just if there is 
                # a non-null value.  The presence of the key with a null 
                # value means "select all".
                pods_present = "pods" in from_clause
                namespaces_present = "namespaces" in from_clause
                _log.debug("Is 'pods:' present? %s", pods_present)
                _log.debug("Is 'namespaces:' present? %s", namespaces_present)

                if pods_present and namespaces_present:
                    # This is an error case according to the API.
                    msg = "Policy API does not support both 'pods' and " \
                          "'namespaces' selectors."
                    raise PolicyError(msg, policy)
                elif pods_present:
                    # There is a pod selector in this "from" clause.
                    pod_selector = from_clause["pods"] or {}
                    _log.debug("Allow from pods: %s", pod_selector)
                    selectors = ["%s == '%s'" % (k,v) for k,v in pod_selector.iteritems()]

                    # We can only select on pods in this namespace.
                    selectors.append("%s == %s" % (K8S_NAMESPACE_LABEL, 
                                                   policy_ns))
                    selector = " && ".join(selectors)

                    # Append the selector to the from args.
                    _log.debug("Allowing pods which match: %s", selector)
                    from_args.append({"src_selector": selector})
                elif namespaces_present:
                    # There is a namespace selector.  Namespace labels are
                    # applied to each pod in the namespace using 
                    # the per-namespace profile.  We can select on namespace
                    # labels using the NS_LABEL_KEY_FMT modifier.
                    namespaces = from_clause["namespaces"] or {}
                    _log.debug("Allow from namespaces: %s", namespaces)
                    selectors = ["%s == '%s'" % (NS_LABEL_KEY_FMT % k, v) \
                            for k,v in namespaces.iteritems()]
                    selector = " && ".join(selectors)
                    if selector:
                        # Allow from the selected namespaces.
                        _log.debug("Allowing from namespaces which match: %s", 
                                    selector)
                        from_args.append({"src_selector": selector})
                    else:
                        # Allow from all pods in all namespaces.
                        _log.debug("Allowing from all pods in all namespaces")
                        selector = "has(%s)" % K8S_NAMESPACE_LABEL
                        from_args.append({"src_selector": selector})

            if not from_args:
                # There are no match criteria specified.  We should allow
                # from all sources to the given ports.
                from_args = [{}]

            # A rule per-protocol, per-from-clause.
            for to_arg in to_args: 
                for from_arg in from_args:
                    # Create a rule by combining a 'from' argument with
                    # the protocol / ports arguments.
                    from_arg.update(to_arg)
                    from_arg.update({"action": "allow"})
                    rules.append(Rule(**from_arg))

        _log.debug("Calculated rules: %s", rules)
        return rules

    def _add_update_namespace(self, key, namespace):
        """
        Configures the necessary policy in Calico for this
        namespace.  Uses the `net.alpha.kubernetes.io/network-isolation` 
        annotation.
        """
        _log.info("Adding/updating namespace: %s", key)

        # Determine the type of network-isolation specified by this namespace.
        # This defaults to no isolation.
        annotations = namespace["metadata"].get("annotations", {})
        _log.debug("Namespace %s has annotations: %s", key, annotations)
        net_isolation = annotations.get(NS_POLICY_ANNOTATION, "no") == "yes"
        _log.info("Namespace %s has network-isolation? %s", key, net_isolation)

        # Determine the profile name to create.
        namespace_name = namespace["metadata"]["name"]
        profile_name = NS_PROFILE_FMT % namespace_name

        # Determine the rules to use.
        outbound_rules = [Rule(action="allow")]
        if net_isolation:
            inbound_rules = [Rule(action="deny")]
        else:
            inbound_rules = [Rule(action="allow")]
        rules = Rules(id=profile_name,
                      inbound_rules=inbound_rules,
                      outbound_rules=outbound_rules)

        # Create the Calico policy to represent this namespace, or 
        # update it if it already exists.  Namespace policies select each
        # pod within that namespace.
        self._client.create_profile(profile_name, rules)

        # Assign labels to the profile.  We modify the keys to use 
        # a special prefix to indicate that these labels are inherited 
        # from the namespace.
        labels = namespace["metadata"].get("labels", {})
        for k,v in labels.iteritems():
            labels[NS_LABEL_KEY_FMT % k] = v
            del labels[k]
        _log.debug("Generated namespace labels: %s", labels)

        # TODO: Actually assign labels to the profile.

        _log.info("Created/updated profile for namespace %s", namespace_name)

    def _delete_namespace(self, key, namespace):
        """
        Takes a deleted namespace and removes the corresponding
        configuration from the Calico datastore.
        """
        _log.info("Deleting namespace: %s", key)

        # Delete the Calico policy which represnets this namespace.
        # We need to make sure that there are no pods running 
        # in this namespace first.
        namespace_name = namespace["metadata"]["name"]
        profile_name = NS_PROFILE_FMT % namespace_name
        try:
            self._client.remove_profile(profile_name)
        except KeyError:
            _log.info("Unable to find profile for namespace '%s'", key)

    def _add_update_pod(self, key, pod):
        """
        Takes a new or updated pod from the Kubernetes API and 
        creates the corresponding Calico configuration.
        """
        _log.info("Adding new pod: %s", key)

        # Get the Calico endpoint.  This may or may not have already been 
        # created by the CNI plugin.  If it hasn't been created, we need to 
        # wait until is has before we can do any meaningful work.
        namespace = pod["metadata"]["namespace"]
        name = pod["metadata"]["name"]
        workload_id = "%s.%s" % (namespace, name)
        try:
            _log.debug("Looking for endpoint that matches workload_id=%s",
                       workload_id)
            endpoint = self._client.get_endpoint(
                orchestrator_id="cni",
                workload_id=workload_id
            )
        except KeyError:
            # We don't need to do anything special here, just return.
            # We'll receive another update when the Pod enters running state.
            _log.warn("No endpoint for '%s', wait until running", workload_id)
            return
        except MultipleEndpointsMatch:
            # We should never have multiple endpoints with the same
            # workload_id.  This could theoretically occur if the Calico
            # datastore is out-of-sync with what pods actually exist, but 
            # this is an error state and indicates a problem elsewhere.
            _log.error("Multiple Endpoints found matching ID %s", workload_id)
            sys.exit(1)

        # Get Kubernetes labels.
        labels = pod["metadata"].get("labels", {}) 
        _log.debug("Pod '%s' has labels: %s", key, labels)

        # Add a special label for the Kubernetes namespace.  This is used
        # by selector-based policies to select all pods in a given namespace.
        labels[K8S_NAMESPACE_LABEL] = namespace 

        # Set the labels on the endpoint.
        endpoint.labels = labels
        self._client.set_endpoint(endpoint)
        _log.info("Updated labels on pod %s", key)

        # Configure this pod with its namespace profile.
        ns_profile = NS_PROFILE_FMT % namespace
        self._client.set_profiles_on_endpoint([ns_profile], 
                                              orchestrator_id="cni",
                                              workload_id=endpoint.workload_id)

    def _delete_pod(self, key, pod):
        """
        We don't need to do anything when a pod is deleted - the CNI plugin
        handles the deletion of the endpoint.
        """
        _log.info("Pod deleted: %s", key)

    def _watch_api(self, path, resource_version=None):
        try:
            self.__watch_api(path, resource_version)
        except Exception:
            _log.exception("Exception watching %s", path)

    def __watch_api(self, path, resource_version=None):
        """
        Work loop for the watch thread.
        """
        _log.info("Starting watch on path: %s", path)
        while True:
            # Attempt to stream API resources.
            try:
                response = self._api_get(path, 
                                         stream=True, 
                                         resource_version=resource_version)
                _log.info("Watch response for %s: %s", path, response)
            except requests.ConnectionError:
                _log.exception("Error querying path: %s", path)
                time.sleep(10)
                continue

            # Check for successful response.
            if response.status_code != 200:
                _log.error("Error watching path: %s", response.text)
                time.sleep(10)
                continue

            # Success - add resources to the queue for processing.
            for line in response.iter_lines():
                # Filter out keep-alive new lines.
                if line:
                    _log.debug("Adding line to queue: %s", line)
                    self._event_queue.put(json.loads(line))

    def _api_get(self, path, stream, resource_version=None):
        """
        Watch a stream from the API given a resource.
    
        :param resource: The plural resource you would like to watch.
        :return: A stream of json objs e.g. {"type": "MODIFED"|"ADDED"|"DELETED", "object":{...}}
        :rtype stream
        """
        # Append the resource version - this indicates where the 
        # watch should start.
        _log.info("Getting API resources '%s' at version '%s'. stream=%s", 
                  path, resource_version, stream)
        if resource_version:
            path += "?resourceVersion=%s" % resource_version

        session = requests.Session()
        if self.auth_token:
            session.headers.update({'Authorization': 'Bearer ' + self.auth_token})
        verify = CA_CERT_PATH if self.ca_crt_exists else False
        return session.get(path, verify=verify, stream=stream)
Exemplo n.º 16
0
class PolicyAgent():
    def __init__(self):
        self._event_queue = Queue.Queue()
        """
        Queue to populate with events from API watches.
        """

        self.k8s_api = os.environ.get("K8S_API", "https://10.100.0.1:443")
        """
        Scheme, IP and port of the Kubernetes API.
        """

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN")
        """
        Auth token to use when accessing the API.
        """

        path = NET_POLICY_WATCH_PATH % self.k8s_api
        self._network_policy_thread = Thread(target=self._watch_api, 
                                             args=(path,))
        self._network_policy_thread.daemon = True
        """
        Thread which performs watch of Kubernetes API for changes to 
        NetworkPolicy objects.
        """

        path = NAMESPACE_WATCH_PATH % self.k8s_api
        self._namespace_thread = Thread(target=self._watch_api, 
                                             args=(path,))
        self._namespace_thread.daemon = True
        """
        Thread which performs watch of Kubernetes API for changes to 
        Namespace objects.
        """

        path = POD_WATCH_PATH % self.k8s_api
        self._pod_thread = Thread(target=self._watch_api, 
                                  args=(path,))
        self._pod_thread.daemon = True
        """
        Thread which performs watch of Kubernetes API for changes to 
        Pod objects.
        """

        self._client = DatastoreClient()
        """
        Client for accessing the Calico datastore.
        """

        self._network_policies = {}
        self._namespaces = {}
        self._pods = {}
        """
        Store internal state.
        """

    def run(self):
        """
        PolicyAgent.run() is called at program init to spawn watch threads,
        Loops to read responses from the _watcher Queue as they come in.
        """
        # Start threads to watch Kubernetes API. 
        _log.info("Starting API watch on: NetworkPolicy, Pod, Namespace")
        self._network_policy_thread.start()
        self._namespace_thread.start()
        self._pod_thread.start()

        # Loop and read updates from the queue.
        _log.info("Reading from event queue")
        self.read_updates()

    def read_updates(self):
        """
        Reads from the update queue.
        """
        update = None

        while True:
            try:
                # There may be an update already, since we do a blocking get
                # in the `except Queue.Empty` block.  If we have an update, 
                # just process it before trying to read from the queue again.
                if not update:
                    _log.info("Non-blocking read from event queue")
                    update = self._event_queue.get(block=False)
                    self._event_queue.task_done()

                # We've recieved an update - process it.
                _log.debug("Read update from queue: %s", json.dumps(update, indent=2))
                self._process_update(update)
                update = None
            except Queue.Empty:
                _log.info("Queue empty, waiting for updates")
                update = self._event_queue.get(block=True)
            except KeyError:
                # We'll hit this if we fail to parse an invalid update.
                # Set update = None so we don't continue parsing the 
                # invalid update.
                _log.exception("Invalid update: %s", update)
                update = None
                time.sleep(10)

    def _process_update(self, update):
        """
        Takes an update from the queue and updates our state accordingly.
        """
        # Parse out the type of update and resource.
        update_type = update["type"]
        resource_type = update["object"]["kind"] 
        _log.info("Processing '%s' for kind '%s'", update_type, resource_type) 

        # Determine the key for this object.
        if resource_type == RESOURCE_TYPE_NAMESPACE:
            # Namespaces are just keyed off of their name.
            name = update["object"]["metadata"]["name"]
            key = (name,)
        else:
            # Objects are keyed off their name and namespace.
            name = update["object"]["metadata"]["name"]
            namespace = update["object"]["metadata"]["namespace"]
            key = (namespace, name)

        if resource_type == RESOURCE_TYPE_NETWORK_POLICY:
            # NetworkPolicy objects correspond directly to Calico
            # profiles - create, delete or update the corresponding Calico 
            # profile for each NetworkPolicy update. 
            if update_type in [TYPE_ADDED, TYPE_MODIFIED]:
                # Add or update network policy.
                self._add_new_network_policy(key, update)
            else:
                # Delete an existing network policy.
                assert update_type == TYPE_DELETED
                try:
                    self._delete_network_policy(key, update)
                except KeyError:
                    _log.warning("Delete for unknown network policy: %s", key)
        elif resource_type == RESOURCE_TYPE_NAMESPACE:
            # Namespaces correspond directly to Calico profiles. 
            if update_type in [TYPE_ADDED, TYPE_MODIFIED]:
                # Add or update network policy.
                self._add_new_namespace(key, update)
            else:
                # Delete an existing network policy.
                assert update_type == TYPE_DELETED
                try:
                    self._delete_namespace(key, update)
                except KeyError:
                    _log.warning("Delete for unknown namespace: %s", key)
        elif resource_type == RESOURCE_TYPE_POD:
            # Pods have policy applied to them using Namespaces and
            # NetworkPolicy objects.  We must update the corresponding 
            # endpoints in the Calico datastore to have the correct 
            # labels applied.
            if update_type in [TYPE_ADDED, TYPE_MODIFIED]:
                # Add or update pod.
                self._add_update_pod(key, update)
            else:
                assert update_type == TYPE_DELETED
                try:
                    self._delete_pod(key, update)
                except KeyError:
                    _log.warning("Delete for unknown pod: %s", key)

    def _add_new_network_policy(self, key, policy):
        """
        Takes a new network policy from the Kubernetes API and 
        creates the corresponding Calico policy configuration.
        """
        _log.info("Adding new network policy: %s", key)
        self._network_policies[key] = policy

        # Parse this network policy so we can convert it to the appropriate
        # Calico policy.  First, get the selector from the API object.
        k8s_selector = policy["object"]["spec"]["podSelector"]

        # Build the appropriate Calico label selector.  This is done using 
        # the labels provided in the NetworkPolicy, as well as the 
        # NetworkPolicy's namespace.
        namespace = policy["object"]["metadata"]["namespace"]
        selectors = ["%s == '%s'" % (k,v) for k,v in k8s_selector.iteritems()]
        selectors += ["%s == '%s'" % (K8S_NAMESPACE_LABEL, namespace)]
        selector = " && ".join(selectors)

        # Determine the name for this global policy.
        name = "net_policy-%s" % policy["object"]["metadata"]["name"]

        # Build the Calico rules.
        try:
            inbound_rules = self._calculate_inbound_rules(policy)
        except Exception:
            # It is possible bad rules will be passed - we don't want to 
            # crash the agent, but we do want to indicate a problem in the
            # logs, so that the policy can be fixed.
            _log.exception("Error parsing policy: %s", 
                           json.dumps(policy, indent=2))
            return
        else:
            rules =  Rules(id=name,
                           inbound_rules=inbound_rules,
                           outbound_rules=[Rule(action="allow")])

        # Create the network policy using the calculated selector and rules.
        self._client.create_global_policy(NET_POL_GROUP_NAME, name, selector, rules)
        _log.info("Updated global policy '%s' for NetworkPolicy %s", name, key)

    def _delete_network_policy(self, key, policy):
        """
        Takes a deleted network policy and removes the corresponding
        configuration from the Calico datastore.
        """
        _log.info("Deleting network policy: %s", key)

        # Delete from internal dict.
        del self._network_policies[key]

        # Determine the name for this global policy.
        name = "net_policy-%s" % policy["object"]["metadata"]["name"]

        # Delete the corresponding Calico policy 
        self._client.remove_global_policy(NET_POL_GROUP_NAME, name)

    def _calculate_inbound_rules(self, policy):
        """
        Takes a NetworkPolicy object from the API and returns a list of 
        Calico Rules objects which should be applied on ingress.
        """
        # Store the rules to return.
        rules = []

        # Iterate through each inbound rule and create the appropriate
        # rules.
        allow_incomings = policy["object"]["spec"]["inbound"]
        for r in allow_incomings:
            # Determine the destination ports to allow.  If no ports are
            # specified, allow all port / protocol combinations.
            ports_by_protocol = {}
            for to_port in r.get("ports", []):
                # Keep a dict of ports exposed, keyed by protocol.
                protocol = to_port.get("protocol")
                port = to_port.get("port")
                ports = ports_by_protocol.setdefault(protocol, [])
                if port:
                    _log.debug("Allow to port: %s/%s", protocol, port)
                    ports.append(port)

            # Convert into arguments to be passed to a Rule object.
            to_args = []
            for protocol, ports in ports_by_protocol.iteritems():
                arg = {"protocol": protocol.lower()}
                if ports:
                    arg["dst_ports"] = ports
                to_args.append(arg)

            if not to_args:
                # There are not destination protocols / ports specified.
                # Allow to all protocols and ports.
                to_args = [{}]

            # Determine the from criteria.  If no "from" block is specified,
            # then we should allow from all sources.
            from_args = []
            for from_clause in r.get("from", []):
                pod_selector = from_clause.get("pods", {})
                namespaces = from_clause.get("namespaces", {})
                if pod_selector:
                    # There is a pod selector in this "from" clause.
                    _log.debug("Allow from pods: %s", pod_selector)
                    selectors = ["%s == '%s'" % (k,v) for k,v in pod_selector.iteritems()]
                    selector = " && ".join(selectors)
                    from_args.append({"src_selector": selector})
                elif namespaces:
                    _log.warning("'from: {namespaces: {}}' is not yet "
                                 "supported - ignoring %s", from_clause)

            if not from_args:
                # There are no match criteria specified.  We should allow
                # from all sources to the given ports.
                from_args = [{}]

            # A rule per-protocol, per-from-clause.
            for to_arg in to_args: 
                for from_arg in from_args:
                    # Create a rule by combining a 'from' argument with
                    # the protocol / ports arguments.
                    from_arg.update(to_arg)
                    from_arg.update({"action": "allow"})
                    rules.append(Rule(**from_arg))

        _log.debug("Calculated rules: %s", rules)
        return rules

    def _add_new_namespace(self, key, namespace):
        """
        Takes a new namespace from the Kubernetes API and 
        creates the corresponding Calico policy configuration.
        """
        _log.info("Adding new namespace: %s", key)

        # Store the namespace.
        self._namespaces[key] = namespace 

        # Determine the type of network-isolation specified by this namespace.
        # This defaults to no isolation.
        annotations = namespace["object"]["metadata"].get("annotations", {})
        _log.debug("Namespace %s has annotations: %s", key, annotations)
        net_isolation = annotations.get(NS_POLICY_ANNOTATION, "no") == "yes"
        _log.info("Namespace %s has: network-isolation=%s", key, net_isolation)

        # Determine the policy name to create.
        namespace_name = namespace["object"]["metadata"]["name"]
        policy_name = "k8s_ns-%s" % namespace_name

        # Determine the rules to use.
        outbound_rules = [Rule(action="allow")]
        if net_isolation:
            inbound_rules = [Rule(action="deny")]
        else:
            inbound_rules = [Rule(action="allow")]
        rules = Rules(id=policy_name,
                      inbound_rules=inbound_rules,
                      outbound_rules=outbound_rules)

        # Create the Calico policy to represent this namespace, or 
        # update it if it already exists.  Namespace policies select each
        # pod within that namespace.
        selector = "%s == '%s'" % (K8S_NAMESPACE_LABEL, namespace_name) 
        self._client.create_global_policy(NAMESPACE_GROUP_NAME, policy_name, 
                                          selector, rules=rules)
        _log.info("Created/updated global policy for namespace %s", 
                  namespace_name)

    def _delete_namespace(self, key, namespace):
        """
        Takes a deleted namespace and removes the corresponding
        configuration from the Calico datastore.
        """
        _log.info("Deleting namespace: %s", key)

        # Delete the Calico policy which represnets this namespace.
        # We need to make sure that there are no pods running 
        # in this namespace first.
        namespace_name = namespace["object"]["metadata"]["name"]
        policy_name = "k8s_ns-%s" % namespace_name
        self._client.remove_global_policy(NAMESPACE_GROUP_NAME, policy_name)

        # Delete from internal dict.
        del self._namespaces[key]

    def _add_update_pod(self, key, pod):
        """
        Takes a new or updated pod from the Kubernetes API and 
        creates the corresponding Calico configuration.
        """
        _log.info("Adding new pod: %s", key)

        # Store the latest version of the API Pod.
        self._pods[key] = pod 

        # Get the Calico endpoint.  This may or may not have already been 
        # created by the CNI plugin.  If it hasn't been created, we need to 
        # wait until is has before we can do any meaningful work.
        workload_id = "%s.%s" % (pod["object"]["metadata"]["namespace"],
                                 pod["object"]["metadata"]["name"])
        try:
            _log.debug("Looking for endpoint that matches workload_id=%s",
                       workload_id)
            endpoint = self._client.get_endpoint(
                orchestrator_id="cni",
                workload_id=workload_id
            )
        except KeyError:
            # We don't need to do anything special here, just return.
            # We'll receive another update when the Pod enters running state.
            _log.warn("No endpoint for '%s', wait until running", workload_id)
            return
        except MultipleEndpointsMatch:
            # We should never have multiple endpoints with the same
            # workload_id.  This could theoretically occur if the Calico
            # datastore is out-of-sync with what pods actually exist, but 
            # this is an error state and indicates a problem elsewhere.
            _log.error("Multiple Endpoints found matching ID %s", workload_id)
            sys.exit(1)

        # Get Kubernetes labels.
        labels = pod["object"]["metadata"].get("labels", {}) 
        _log.debug("Pod '%s' has labels: %s", key, labels)

        # Add a special label for the Kubernetes namespace.
        labels[K8S_NAMESPACE_LABEL] = pod["object"]["metadata"]["namespace"]

        # Set the labels on the endpoint.
        endpoint.labels = labels
        self._client.set_endpoint(endpoint)
        _log.info("Updated labels on pod %s", key)

        # Remove the 'deny-inbound' profile from the pod now that 
        # it has been configured with labels.  It will match at least the 
        # per-namespace policy, and potentially others, which will 
        # define what connectivity is allowed.
        self._client.set_profiles_on_endpoint([], 
                                              orchestrator_id="cni",
                                              workload_id=endpoint.workload_id)

    def _delete_pod(self, key, pod):
        """
        Takes a deleted pod and removes the corresponding
        configuration from the Calico datastore.
        """
        _log.info("Deleting pod: %s", key)

        # Delete from internal dict.
        del self._pods[key]

    def _watch_api(self, path, resource_version=None):
        """
        Work loop for the watch thread.
        """
        _log.info("Starting watch on path: %s", path)
        while True:
            # Attempt to stream API resources.
            try:
                response = self._get_api_stream(path, resource_version)
                _log.info("Watch response for %s: %s", path, response)
            except requests.ConnectionError:
                _log.exception("Error querying path: %s", path)
                time.sleep(10)
                continue

            # Check for successful response.
            if response.status_code != 200:
                _log.error("Error watching path: %s", response.text)
                time.sleep(10)
                continue

            # Success - add resources to the queue for processing.
            for line in response.iter_lines():
                # Filter out keep-alive new lines.
                if line:
                    _log.debug("Adding line to queue: %s", line)
                    self._event_queue.put(json.loads(line))

    def _get_api_stream(self, path, resource_version=None):
        """
        Watch a stream from the API given a resource.
    
        :param resource: The plural resource you would like to watch.
        :return: A stream of json objs e.g. {"type": "MODIFED"|"ADDED"|"DELETED", "object":{...}}
        :rtype stream
        """
        # Append the resource version - this indicates where the 
        # watch should start.
        _log.info("Streaming API resources '%s' at version '%s'", path, resource_version)
        if resource_version:
            path += "?resourceVersion=%s" % resource_version

        session = requests.Session()
        if self.auth_token:
            _log.debug("Using Auth Token: %s", self.auth_token)
            session.headers.update({'Authorization': 'Bearer ' + self.auth_token})
        return session.get(path, verify=False, stream=True)
    
    def _get_api_resource(self, path):
        """
        Get a resource from the API specified API path.
        :return: A JSON API object
        :rtype json dict
        """
        _log.debug("Getting API Resource: %s", path)
        session = requests.Session()
        if self.auth_token:
            _log.debug("Using Auth Token: %s", self.auth_token)
            session.headers.update({'Authorization': 'Bearer ' + self.auth_token})
        response = session.get(path, verify=False)
        return json.loads(response.text)
Exemplo n.º 17
0
    def __init__(self, network_config, env):
        self.network_config = network_config
        """
        Network config as provided in the CNI network file passed in
        via stdout.
        """

        self.env = env
        """
        Copy of the environment variable dictionary. Contains CNI_* 
        variables.
        """

        self._client = DatastoreClient()
        """
        DatastoreClient for access to the Calico datastore.
        """

        self.command = env[CNI_COMMAND_ENV]
        """
        The command to execute for this plugin instance. Required. 
        One of:
          - CNI_CMD_ADD
          - CNI_CMD_DELETE
        """

        self.container_id = env[CNI_CONTAINERID_ENV]
        """
        The container's ID in the containerizer. Required.
        """

        self.cni_netns = env[CNI_NETNS_ENV]
        """
        Relative path to the network namespace of this container.
        """

        self.interface = env[CNI_IFNAME_ENV]
        """
        Name of the interface to create within the container.
        """

        self.cni_args = parse_cni_args(env[CNI_ARGS_ENV])
        """
        Dictionary of additional CNI arguments provided via
        the CNI_ARGS environment variable.
        """

        self.cni_path = env[CNI_PATH_ENV]
        """
        Path in which to search for CNI plugins.
        """

        self.network_name = network_config["name"]
        """
        Name of the network from the provided network config file.
        """

        self.ipam_result = None
        """
        Stores the output generated by the IPAM plugin.  This is printed
        to stdout at the end of execution.
        """

        self.policy_driver = self._get_policy_driver()
        """
        Chooses the correct policy driver based on the given configuration
        """

        self.container_engine = self._get_container_engine()
        """
Exemplo n.º 18
0
class PolicyAgent(object):
    def __init__(self):
        self._event_queue = Queue.Queue(maxsize=MAX_QUEUE_SIZE)
        """
        Queue to populate with events from API watches.
        """

        self.k8s_api = os.environ.get("K8S_API", DEFAULT_API)
        """
        Scheme, IP and port of the Kubernetes API.
        """

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN", read_token_file())
        """
        Auth token to use when accessing the API.
        """
        _log.debug("Using auth token: %s", self.auth_token)

        self.ca_crt_exists = os.path.exists(CA_CERT_PATH)
        """
        True if a CA cert has been mounted by Kubernetes.
        """

        self._client = DatastoreClient()
        """
        Client for accessing the Calico datastore.
        """

        self._leader_election_url = os.environ.get("ELECTION_URL",
                                                   "http://127.0.0.1:4040/")
        """
        Use this URL to get leader election status from the sidecar container.
        """

        elect = os.environ.get("LEADER_ELECTION", "false")
        self._leader_elect = elect.lower() ==  "true"
        """
        Whether or not leader election is enabled.  If set to False, this
        policy agent will assume it is the only instance.
        """

        self._handlers = {}
        """
        Keeps track of which handlers to execute for various events.
        """

        # Handlers for NetworkPolicy events.
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_ADDED,
                         add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_MODIFIED,
                         add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_DELETED,
                         delete_network_policy)

        # Handlers for Namespace events.
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_ADDED,
                         add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_MODIFIED,
                         add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_DELETED,
                         delete_namespace)

        # Handlers for Pod events.
        self.add_handler(RESOURCE_TYPE_POD, TYPE_ADDED,
                         add_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_MODIFIED,
                         update_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_DELETED,
                         delete_pod)

    def add_handler(self, resource_type, event_type, handler):
        """
        Adds an event handler for the given event type (ADD, DELETE) for the
        given resource type.

        :param resource_type: The type of resource that this handles.
        :param event_type: The type of event that this handles.
        :param handler: The callable to execute when events are received.
        :return None
        """
        _log.debug("Setting %s %s handler: %s",
                    resource_type, event_type, handler)
        key = (resource_type, event_type)
        self._handlers[key] = handler

    def get_handler(self, resource_type, event_type):
        """
        Gets the correct handler.

        :param resource_type: The type of resource that needs handling.
        :param event_type: The type of event that needs handling.
        :return None
        """
        key = (resource_type, event_type)
        _log.debug("Looking up handler for event: %s", key)
        return self._handlers[key]

    def run(self):
        """
        PolicyAgent.run() is called at program init to spawn watch threads,
        Loops to read responses from the Queue as they come in.
        """
        _log.info("Leader election enabled? %s", self._leader_elect)
        if self._leader_elect:
            # Wait until we've been elected leader to start.
            self._wait_for_leadership()
            self._start_leader_thread()

        # Ensure the tier exists.
        metadata = {"order": 50}
        self._client.set_policy_tier_metadata(NET_POL_TIER_NAME, metadata)

        # Read initial state from Kubernetes API.
        self.start_workers()

        # Loop and read updates from the queue.
        self.read_updates()

    def _wait_for_leadership(self):
        """
        Loops until this agent has been elected leader.
        """
        _log.info("Waiting for this agent to be elected leader")
        while True:
            try:
                is_leader = self._is_leader()
            except requests.exceptions.ConnectionError:
                # During startup, the leader election container
                # might not be up yet.  Handle this case gracefully.
                _log.info("Waiting for leader election container")
            else:
                # Successful response from the leader election container.
                # Check if we are the elected leader.
                if is_leader:
                    _log.info("We have been elected leader")
                    break
            time.sleep(1)

    def _start_leader_thread(self):
        """
        Starts a thread which periodically checks if this agent is the leader.
        If determined that we are no longer the leader, exit.
        """
        t = Thread(target=self._watch_leadership)
        t.daemon = True
        t.start()
        _log.info("Started leader election watcher")

    def _watch_leadership(self):
        """
        Watches to see if this policy agent is still the elected leader.
        If no longer the elected leader, exits.
        """
        _log.info("Watching for leader election changes")
        while True:
            try:
                if not self._is_leader():
                    _log.warning("No longer the elected leader - exiting")
                    os._exit(1)
                time.sleep(1)
            except Exception:
                _log.exception("Exception verifying leadership - exiting")
                os._exit(1)

    def start_workers(self):
        """
        Starts the worker threads which manage each Kubernetes
        API resource.
        """
        resources = [RESOURCE_TYPE_NETWORK_POLICY,
                     RESOURCE_TYPE_NAMESPACE,
                     RESOURCE_TYPE_POD]

        # For each resource type, start a thread which syncs it from the
        # kubernetes API.
        for resource_type in resources:
            t = Thread(target=self._manage_resource, args=(resource_type,))
            t.daemon = True
            t.start()
            _log.info("Started worker thread for: %s", resource_type)

    def read_updates(self):
        """
        Reads from the update queue.

        An update on the queue must be a tuple of:
          (event_type, resource_type, resource)

        Where:
          - event_type: Either "ADDED", "MODIFIED", "DELETED", "ERROR"
          - resource_type: e.g "Namespace", "Pod", "NetworkPolicy"
          - resource: The parsed json resource from the API matching
                      the given resource_type.
        """
        while True:
            try:
                # Wait for an update on the event queue.
                _log.debug("Reading from event queue")
                update = self._event_queue.get(block=True)
                event_type, resource_type, resource = update

                # We've recieved an update - process it.
                _log.debug("Read event: %s, %s, %s",
                           event_type,
                           resource_type,
                           json.dumps(resource, indent=2))
                self._process_update(event_type,
                                     resource_type,
                                     resource)
            except KeyError:
                # We'll hit this if we fail to parse an invalid update.
                _log.exception("Invalid update: %s", update)
            finally:
                self._event_queue.task_done()

                # Log out when the queue is empty.
                if self._event_queue.empty():
                    _log.info("Emptied the event queue")

    def _process_update(self, event_type, resource_type, resource):
        """
        Takes an event updates our state accordingly.
        """
        _log.debug("Processing '%s' for kind '%s'", event_type, resource_type)

        # Determine the key for this object using namespace and name.
        # This is simply used for easy identification in logs, etc.
        name = resource["metadata"]["name"]
        namespace = resource["metadata"].get("namespace")
        key = (namespace, name)

        # Call the right handler.
        try:
            handler = self.get_handler(resource_type, event_type)
        except KeyError:
            _log.warning("No %s handlers for: %s",
                         event_type, resource_type)
        else:
            try:
                handler(resource)
                _log.info("Handled %s for %s: %s",
                           event_type, resource_type, key)
            except KeyError:
                _log.exception("Invalid %s: %s", resource_type,
                               json.dumps(resource, indent=2))

    def _manage_resource(self, resource_type):
        """
        Routine for a worker thread.  Syncs with API for the given resource
        and starts a watch.  If an error occurs within the watch, will re-sync
        with the API and re-start the watch.
        """
        while True:
            try:
                # Sync existing resources for this type.
                resource_version = self._sync_resources(resource_type)

                # Start a watch from the latest resource_version.
                self._watch_resource(resource_type, resource_version)
            except requests.ConnectionError:
                _log.exception("Connection error querying: %s", resource_type)
            except requests.HTTPError:
                _log.exception("HTTP error querying: %s", resource_type)
            except KubernetesApiError:
                _log.debug("Kubernetes API error managing %s", resource_type)
            except Queue.Full:
                _log.exception("Event queue full")
            finally:
                # Sleep for a second so that we don't tight-loop.
                _log.warning("Re-starting watch on resource: %s",
                             resource_type)
                time.sleep(1)

    def _watch_resource(self, resource_type, resource_version):
        """
        Watch the given resource type starting at the given resource version.
        Add any events to the event queue.
        """
        path = WATCH_URLS[resource_type] % self.k8s_api
        _log.info("Starting watch on: %s", path)
        while True:
            # Attempt to stream API resources.
            response = self._api_get(path,
                                     stream=True,
                                     resource_version=resource_version)
            _log.debug("Watch response for %s: %s", path, response)

            # Check for successful response, raise error if not.
            if response.status_code != 200:
                raise KubernetesApiError(response.text)

            # Success - add resources to the queue for processing.
            for line in response.iter_lines():
                # Filter out keep-alive new lines.
                if line:
                    _log.debug("Read line: %s", line)
                    parsed = json.loads(line)

                    # Check if we've encountered an error.  If so,
                    # raise an exception.
                    if parsed["type"] == TYPE_ERROR:
                        _log.error("Received error from API: %s",
                                   json.dumps(parsed, indent=2))
                        raise KubernetesApiError()

                    # Get the important information from the event.
                    event_type = parsed["type"]
                    resource_type = parsed["object"]["kind"]
                    resource = parsed["object"]

                    # Successful update - send to the queue.
                    _log.info("%s %s: %s to queue (%s) (%s)",
                              event_type,
                              resource_type,
                              resource["metadata"]["name"],
                              self._event_queue.qsize(),
                              time.time())

                    update = (event_type, resource_type, resource)
                    self._event_queue.put(update,
                                          block=True,
                                          timeout=QUEUE_PUT_TIMEOUT)

                    # Extract the latest resource version.
                    new_ver = resource["metadata"]["resourceVersion"]
                    _log.debug("Update resourceVersion, was: %s, now: %s",
                               resource_version, new_ver)
                    resource_version = new_ver

    def _sync_resources(self, resource_type):
        """
        Syncs with the API and determines the latest resource version.
        Adds API objects to the event queue and
        returns the latest resourceVersion.
        Raises an Exception if unable to access the API.
        """
        # Get existing resources from the API.
        _log.info("Syncing '%s' objects", resource_type)
        url = GET_URLS[resource_type] % self.k8s_api
        resp = self._api_get(url, stream=False)
        _log.debug("Response: %s", resp)

        # If we hit an error, raise it.
        if resp.status_code != 200:
            _log.error("Error querying API: %s", resp.json())
            raise KubernetesApiError("Failed to query resource: %s" % resource_type)

        # Get the list of existing API objects from the response, as
        # well as the latest resourceVersion.
        resources = resp.json()["items"]
        metadata = resp.json().get("metadata", {})
        resource_version = metadata.get("resourceVersion")
        _log.debug("%s metadata: %s", resource_type, metadata)

        # Add the existing resources to the queue to be processed.
        # Treat as a MODIFIED event to trigger updates which may not always
        # occur on ADDED.
        _log.info("%s existing %s(s) - add to queue",
                  len(resources), resource_type)
        for resource in resources:
            _log.debug("Queueing update: %s", resource)
            update = (TYPE_MODIFIED, resource_type, resource)
            self._event_queue.put(update,
                                  block=True,
                                  timeout=QUEUE_PUT_TIMEOUT)

        _log.info("Done getting %s(s) - new resourceVersion: %s",
                  resource_type, resource_version)
        return resource_version

    def _api_get(self, path, stream, resource_version=None):
        """
        Get or stream from the API, given a resource.

        :param path: The API path to get.
        :param stream: Whether to return a single object or a stream.
        :param resource_version: The resourceVersion at which to
        start the stream.
        :return: A requests Response object
        """
        # Append the resource version - this indicates where the
        # watch should start.
        _log.debug("Getting API resources '%s' at version '%s'. stream=%s",
                  path, resource_version, stream)
        if resource_version:
            path += "?resourceVersion=%s" % resource_version

        session = requests.Session()
        if self.auth_token:
            session.headers.update({'Authorization': 'Bearer ' + self.auth_token})
        verify = CA_CERT_PATH if self.ca_crt_exists else False
        return session.get(path, verify=verify, stream=stream)

    def _is_leader(self):
        """
        Returns True if this policy agent instance has been elected leader,
        False otherwise.
        """
        _log.debug("Checking if we are the elected leader.")
        response = requests.get(self._leader_election_url)
        response = response.json()

        # Determine if we're the leader.
        our_name = os.environ.get("HOSTNAME")
        leader_name = response["name"]
        _log.debug("Elected leader is: %s. We are: %s", leader_name, our_name)
        return our_name == leader_name
Exemplo n.º 19
0
import logging
import simplejson as json
from constants import *
from pycalico.datastore import DatastoreClient
from pycalico.datastore_datatypes import Rules, Rule

_log = logging.getLogger("__main__")
client = DatastoreClient()


def add_update_namespace(namespace):
    """
    Configures the necessary policy in Calico for this
    namespace.  Uses the `net.alpha.kubernetes.io/network-isolation`
    annotation.
    """
    namespace_name = namespace["metadata"]["name"]
    _log.debug("Adding/updating namespace: %s", namespace_name)

    # Determine the type of network-isolation specified by this namespace.
    # This defaults to no isolation.
    annotations = namespace["metadata"].get("annotations", {})
    _log.debug("Namespace %s has annotations: %s", namespace_name, annotations)
    policy_annotation = annotations.get(NS_POLICY_ANNOTATION, "{}")
    try:
        policy_annotation = json.loads(policy_annotation)
    except ValueError, TypeError:
        _log.exception("Failed to parse namespace annotations: %s", annotations)
        return

    # Parsed the annotation - get data.  Might not be a dict, so be careful
Exemplo n.º 20
0
    def __init__(self, network_config, env):
        self._client = DatastoreClient()
        """
        DatastoreClient for access to the Calico datastore.
        """

        # Parse CNI_ARGS into dictionary so we can extract values.
        cni_args = parse_cni_args(env.get(CNI_ARGS_ENV, ""))

        self.k8s_pod_name = cni_args.get(K8S_POD_NAME)
        """
        Name of Kubernetes pod if running under Kubernetes, else None.
        """

        self.k8s_namespace = cni_args.get(K8S_POD_NAMESPACE)
        """
        Name of Kubernetes namespace if running under Kubernetes, else None.
        """

        self.network_config = network_config
        """
        Network config as provided in the CNI network file passed in
        via stdout.
        """

        self.network_name = network_config["name"]
        """
        Name of the network from the provided network config file.
        """

        self.ipam_type = network_config["ipam"]["type"]
        """
        Type of IPAM to use, e.g calico-ipam.
        """

        self.policy_driver = get_policy_driver(self.k8s_pod_name, 
                                               self.k8s_namespace, 
                                               self.network_config) 
        """
        Chooses the correct policy driver based on the given configuration
        """

        self.container_engine = get_container_engine(self.k8s_pod_name)
        """
        Chooses the correct container engine based on the given configuration.
        """

        self.ipam_env = env
        """
        Environment dictionary used when calling the IPAM plugin.
        """

        self.command = env[CNI_COMMAND_ENV]
        assert self.command in [CNI_CMD_DELETE, CNI_CMD_ADD], \
                "Invalid CNI command %s" % self.command
        """
        The command to execute for this plugin instance. Required. 
        One of:
          - CNI_CMD_ADD
          - CNI_CMD_DELETE
        """

        self.container_id = env[CNI_CONTAINERID_ENV]
        """
        The container's ID in the containerizer. Required.
        """

        self.cni_netns = env[CNI_NETNS_ENV]
        """
        Relative path to the network namespace of this container.
        """

        self.interface = env[CNI_IFNAME_ENV]
        """
        Name of the interface to create within the container.
        """

        self.cni_path = env[CNI_PATH_ENV]
        """
Exemplo n.º 21
0
class CniPlugin(object):
    """
    Class which encapsulates the function of a CNI plugin.
    """
    def __init__(self, network_config, env):
        self._client = DatastoreClient()
        """
        DatastoreClient for access to the Calico datastore.
        """

        # Parse CNI_ARGS into dictionary so we can extract values.
        cni_args = parse_cni_args(env.get(CNI_ARGS_ENV, ""))

        self.k8s_pod_name = cni_args.get(K8S_POD_NAME)
        """
        Name of Kubernetes pod if running under Kubernetes, else None.
        """

        self.k8s_namespace = cni_args.get(K8S_POD_NAMESPACE)
        """
        Name of Kubernetes namespace if running under Kubernetes, else None.
        """

        self.network_config = network_config
        """
        Network config as provided in the CNI network file passed in
        via stdout.
        """

        self.network_name = network_config["name"]
        """
        Name of the network from the provided network config file.
        """

        self.ipam_type = network_config["ipam"]["type"]
        """
        Type of IPAM to use, e.g calico-ipam.
        """

        self.policy_driver = get_policy_driver(self.k8s_pod_name, 
                                               self.k8s_namespace, 
                                               self.network_config) 
        """
        Chooses the correct policy driver based on the given configuration
        """

        self.container_engine = get_container_engine(self.k8s_pod_name)
        """
        Chooses the correct container engine based on the given configuration.
        """

        self.ipam_env = env
        """
        Environment dictionary used when calling the IPAM plugin.
        """

        self.command = env[CNI_COMMAND_ENV]
        assert self.command in [CNI_CMD_DELETE, CNI_CMD_ADD], \
                "Invalid CNI command %s" % self.command
        """
        The command to execute for this plugin instance. Required. 
        One of:
          - CNI_CMD_ADD
          - CNI_CMD_DELETE
        """

        self.container_id = env[CNI_CONTAINERID_ENV]
        """
        The container's ID in the containerizer. Required.
        """

        self.cni_netns = env[CNI_NETNS_ENV]
        """
        Relative path to the network namespace of this container.
        """

        self.interface = env[CNI_IFNAME_ENV]
        """
        Name of the interface to create within the container.
        """

        self.cni_path = env[CNI_PATH_ENV]
        """
        Path in which to search for CNI plugins.
        """

    def execute(self):
        """
        Execute the CNI plugin - uses the given CNI_COMMAND to determine 
        which action to take.

        :return: None.
        """
        if self.command == CNI_CMD_ADD:
            self.add()
        else:
            self.delete()

    def add(self):
        """"Handles CNI_CMD_ADD requests. 

        Configures Calico networking and prints required json to stdout.

        In CNI, a container can be added to multiple networks, in which case
        the CNI plugin will be called multiple times.  In Calico, each network
        is represented by a profile, and each container only receives a single
        endpoint / veth / IP address even when it is on multiple CNI networks.

        :return: None.
        """
        # If this container uses host networking, don't network it.  
        # This should only be hit when running in Kubernetes mode with
        # docker - rkt doesn't call plugins when using host networking.
        if self.container_engine.uses_host_networking(self.container_id):
            _log.info("Cannot network container %s since it is configured "
                      "with host networking.", self.container_id)
            sys.exit(0)

        _log.info("Configuring network '%s' for container: %s", 
                  self.network_name, self.container_id)

        _log.debug("Checking for existing Calico endpoint")
        endpoint = self._get_endpoint()
        if endpoint:
            # This endpoint already exists, add it to another network.
            _log.info("Endpoint for container exists - add to new network")
            output = self._add_existing_endpoint(endpoint)
        else:
            # No endpoint exists - we need to configure a new one.
            _log.info("Configuring a new Endpoint for container")
            output = self._add_new_endpoint()

        # If all successful, print the IPAM plugin's output to stdout.
        dump = json.dumps(output)
        _log.debug("Printing CNI result to stdout: %s", dump)
        print(dump)

        _log.info("Finished networking container: %s", self.container_id)

    def _add_new_endpoint(self):
        """
        Handled adding a new container to a Calico network.
        """
        # Assign IP addresses using the given IPAM plugin.
        ipv4, ipv6, ipam_result = self._assign_ips(self.ipam_env)

        # Filter out addresses that didn't get assigned.
        ip_list = [ip for ip in [ipv4, ipv6] if ip is not None]

        # Create the Calico endpoint object.
        endpoint = self._create_endpoint(ip_list)
    
        # Provision the veth for this endpoint.
        endpoint = self._provision_veth(endpoint)
        
        # Provision / apply profile on the created endpoint.
        try:
            self.policy_driver.apply_profile(endpoint)
        except ApplyProfileError as e:
            _log.error("Failed to apply profile to endpoint %s",
                       endpoint.name)
            self._remove_veth(endpoint)
            self._remove_workload()
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)
            print_cni_error(ERR_CODE_GENERIC, e.message, e.details)
            sys.exit(ERR_CODE_GENERIC)

        # Return the IPAM plugin's result.
        return ipam_result

    def _add_existing_endpoint(self, endpoint):
        """
        Handles adding an existing container to a new Calico network.

        We've already assigned an IP address and created the veth,
        we just need to apply a new profile to this endpoint.
        """
        # Get the already existing IP information for this Endpoint. 
        try:
            ip4 = next(iter(endpoint.ipv4_nets))
        except StopIteration:
            # No IPv4 address on this endpoint.
            _log.warning("No IPV4 address attached to existing endpoint")
            ip4 = IPNetwork("0.0.0.0/32")

        try:
            ip6 = next(iter(endpoint.ipv6_nets))
        except StopIteration:
            # No IPv6 address on this endpoint.
            _log.warning("No IPV6 address attached to existing endpoint")
            ip6 = IPNetwork("::/128")

        # Apply a new profile to this endpoint.
        try:
            self.policy_driver.apply_profile(endpoint)
        except ApplyProfileError as e:
            # Hit an exception applying the profile.  We haven't configured
            # anything, so we don't need to clean anything up.  Just exit.
            _log.error("Failed to apply profile to endpoint %s",
                       endpoint.name)
            print_cni_error(ERR_CODE_GENERIC, e.message)
            sys.exit(ERR_CODE_GENERIC)

        return {"ip4": {"ip": str(ip4.cidr)}, 
                "ip6": {"ip": str(ip6.cidr)}}
    
    def delete(self):
        """Handles CNI_CMD_DELETE requests.

        Remove this container from Calico networking.

        :return: None.
        """
        _log.info("Remove network '%s' from container: %s", 
                self.network_name, self.container_id)

        # Step 1: Remove any IP assignments.
        self._release_ip(self.ipam_env)

        # Step 2: Get the Calico endpoint for this workload. If it does not
        # exist, log a warning and exit successfully.
        endpoint = self._get_endpoint()
        if not endpoint:
            _log.warning("No Calico Endpoint for container: %s",
                         self.container_id)
            sys.exit(0)

        # Step 3: Delete the veth interface for this endpoint.
        self._remove_veth(endpoint)

        # Step 4: Delete the Calico endpoint.
        self._remove_workload()

        # Step 5: Delete any profiles for this endpoint
        self.policy_driver.remove_profile()

        _log.info("Finished removing container: %s", self.container_id)

    def _assign_ips(self, env):
        """Assigns and returns an IPv4 address using the IPAM plugin
        specified in the network config file.

        :return: ipv4, ipv6 - The IP addresses assigned by the IPAM plugin.
        """
        # Call the IPAM plugin.  Returns the plugin returncode,
        # as well as the CNI result from stdout.
        _log.debug("Assigning IP address")
        assert env[CNI_COMMAND_ENV] == CNI_CMD_ADD
        rc, result = self._call_ipam_plugin(env)

        try:
            # Load the response - either the assigned IP addresses or 
            # a CNI error message.
            ipam_result = json.loads(result)
        except ValueError:
            message = "Failed to parse IPAM response, exiting"
            _log.exception(message)
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        if rc:
            # The IPAM plugin failed to assign an IP address. At this point in
            # execution, we haven't done anything yet, so we don't have to
            # clean up.
            _log.error("IPAM plugin error (rc=%s): %s", rc, result)
            code = ipam_result.get("code", ERR_CODE_GENERIC)
            msg = ipam_result.get("msg", "Unknown IPAM error")
            details = ipam_result.get("details")
            print_cni_error(code, msg, details)
            sys.exit(int(code))

        try:
            ipv4 = IPNetwork(ipam_result["ip4"]["ip"])
            _log.info("IPAM plugin assigned IPv4 address: %s", ipv4)
        except KeyError:
            ipv4 = None
        except (AddrFormatError, ValueError):
            message = "Invalid or Empty IPv4 address: %s" % \
                      (ipam_result["ip4"]["ip"])
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        try:
            ipv6 = IPNetwork(ipam_result["ip6"]["ip"])
            _log.info("IPAM plugin assigned IPv6 address: %s", ipv6)
        except KeyError:
            ipv6 = None
        except (AddrFormatError, ValueError):
            message = "Invalid or Empty IPv6 address: %s" % \
                      (ipam_result["ip6"]["ip"])
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        if not ipv4 and not ipv6:
            message = "IPAM plugin did not return any valid addresses."
            _log.warning("Bad IPAM plugin response: %s", ipam_result)
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        return ipv4, ipv6, ipam_result

    def _release_ip(self, env):
        """Releases the IP address(es) for this container using the IPAM plugin
        specified in the network config file.

        :param env - A dictionary of environment variables to pass to the
        IPAM plugin
        :return: None.
        """
        _log.info("Releasing IP address")
        assert env[CNI_COMMAND_ENV] == CNI_CMD_DELETE
        rc, _ = self._call_ipam_plugin(env)

        if rc:
            _log.error("IPAM plugin failed to release IP address")

    def _call_ipam_plugin(self, env):
        """
        Executes a CNI IPAM plugin.  If `calico-ipam` is the provided IPAM
        type, then calls directly into ipam.py as a performance optimization.

        For all other types of IPAM, searches the CNI_PATH for the 
        correct binary and executes it.

        :return: Tuple of return code, response from the IPAM plugin.
        """
        if self.ipam_type == "calico-ipam":
            _log.info("Using Calico IPAM")
            try:
                response = IpamPlugin(env, 
                                      self.network_config["ipam"]).execute()
                code = 0
            except CniError as e:
                # We hit a CNI error - return the appropriate CNI formatted
                # error dictionary.
                response = json.dumps({"code": e.code, 
                                       "msg": e.msg, 
                                       "details": e.details})
                code = e.code
        else:
            _log.debug("Using binary plugin")
            code, response = self._call_binary_ipam_plugin(env)

        # Return the IPAM return code and output.
        _log.debug("IPAM response (rc=%s): %s", code, response)
        return code, response

    def _call_binary_ipam_plugin(self, env):
        """Calls through to the specified IPAM plugin binary.
    
        Utilizes the IPAM config as specified in the CNI network
        configuration file.  A dictionary with the following form:
            {
              type: <IPAM TYPE>
            }

        :param env - A dictionary of environment variables to pass to the
        IPAM plugin
        :return: Tuple of return code, response from the IPAM plugin.
        """
        # Find the correct plugin based on the given type.
        plugin_path = self._find_ipam_plugin()
        if not plugin_path:
            message = "Could not find IPAM plugin of type %s in path %s." % \
                      (self.ipam_type, self.cni_path)
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)
    
        # Execute the plugin and return the result.
        _log.info("Using IPAM plugin at: %s", plugin_path)
        _log.debug("Passing in environment to IPAM plugin: \n%s",
                   json.dumps(env, indent=2))
        p = Popen(plugin_path, stdin=PIPE, stdout=PIPE, stderr=PIPE, env=env)
        stdout, stderr = p.communicate(json.dumps(self.network_config))
        _log.debug("IPAM plugin return code: %s", p.returncode)
        _log.debug("IPAM plugin output: \nstdout:\n%s\nstderr:\n%s", 
                   stdout, stderr)
        return p.returncode, stdout

    def _create_endpoint(self, ip_list):
        """Creates an endpoint in the Calico datastore with the client.

        :param ip_list - list of IP addresses that have been already allocated
        :return Calico endpoint object
        """
        _log.debug("Creating Calico endpoint")
        try:
            endpoint = self._client.create_endpoint(HOSTNAME,
                                                    ORCHESTRATOR_ID,
                                                    self.container_id,
                                                    ip_list)
        except (AddrFormatError, KeyError) as e:
            # AddrFormatError: Raised when an IP address type is not 
            #                  compatible with the node.
            # KeyError: Raised when BGP config for host is not found.
            _log.exception("Failed to create Calico endpoint.")
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)
            print_cni_error(ERR_CODE_GENERIC, e.message)
            sys.exit(ERR_CODE_GENERIC)

        _log.info("Created Calico endpoint with IP address(es) %s", ip_list)
        return endpoint

    def _remove_workload(self):
        """Removes the given endpoint from the Calico datastore

        :param endpoint:
        :return: None
        """
        try:
            _log.info("Removing Calico endpoint for container '%s'",
                    self.container_id)
            self._client.remove_workload(hostname=HOSTNAME,
                                         orchestrator_id=ORCHESTRATOR_ID,
                                         workload_id=self.container_id)
        except KeyError:
            _log.warning("Unable to remove workload with ID %s from datastore.",
                         self.container_id)

    def _provision_veth(self, endpoint):
        """Provisions veth for given endpoint.

        Uses the netns relative path passed in through CNI_NETNS_ENV and
        interface passed in through CNI_IFNAME_ENV.

        :param endpoint
        :return Calico endpoint object
        """
        _log.debug("Provisioning Calico veth interface")
        netns_path = os.path.abspath(os.path.join(os.getcwd(), self.cni_netns))
        _log.debug("netns path: %s", netns_path)

        try:
            endpoint.mac = endpoint.provision_veth(
                Namespace(netns_path), self.interface)
        except CalledProcessError as e:
            _log.exception("Failed to provision veth interface for endpoint %s",
                           endpoint.name)
            self._remove_workload()
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)
            print_cni_error(ERR_CODE_GENERIC, e.message)
            sys.exit(ERR_CODE_GENERIC)

        _log.debug("Endpoint has mac address: %s", endpoint.mac)

        self._client.set_endpoint(endpoint)
        _log.info("Provisioned %s in netns %s", self.interface, netns_path)
        return endpoint

    def _remove_veth(self, endpoint):
        """Remove the veth from given endpoint.

        Handles any errors encountered while removing the endpoint.
        """
        _log.info("Removing veth for endpoint: %s", endpoint.name)
        try:
            removed = netns.remove_veth(endpoint.name)
            _log.debug("Successfully removed endpoint %s? %s", 
                       endpoint.name, removed)
        except CalledProcessError:
            _log.warning("Unable to remove veth %s", endpoint.name)

    @handle_datastore_error
    def _get_endpoint(self):
        """Get endpoint matching self.container_id.

        Return None if no endpoint is found.
        Exits with an error if multiple endpoints are found.

        :return: Endpoint object if found, None if not found
        """
        try:
            _log.debug("Looking for endpoint that matches container ID %s",
                       self.container_id)
            endpoint = self._client.get_endpoint(
                hostname=HOSTNAME,
                orchestrator_id=ORCHESTRATOR_ID,
                workload_id=self.container_id
            )
        except KeyError:
            _log.debug("No endpoint found matching ID %s", self.container_id)
            endpoint = None
        except MultipleEndpointsMatch:
            message = "Multiple Endpoints found matching ID %s" % \
                    self.container_id
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        return endpoint

    def _find_ipam_plugin(self):
        """Locates IPAM plugin binary in plugin path and returns absolute path
        of plugin if found; if not found returns an empty string.

        IPAM plugin type is set in the network config file.
        The plugin path is the CNI path passed through the environment variable
        CNI_PATH.

        :rtype : str
        :return: plugin_path - absolute path of IPAM plugin binary
        """
        plugin_type = self.ipam_type 
        plugin_path = ""
        for path in self.cni_path.split(":"):
            _log.debug("Looking for plugin %s in path %s", plugin_type, path)
            temp_path = os.path.abspath(os.path.join(path, plugin_type))
            if os.path.isfile(temp_path):
                _log.debug("Found plugin %s in path %s", plugin_type, path)
                plugin_path = temp_path
                break
        return str(plugin_path)
Exemplo n.º 22
0
class DefaultPolicyDriver(object):
    """
    Implements default network policy for a generic CNI plugin.
    """
    def __init__(self, network_name):

        self._client = DatastoreClient()
        """
        DatastoreClient for access to the Calico datastore.
        """

        self.profile_name = network_name
        """
        Name of profile for attach to endpoint.
        """

        # Validate the given network name to make sure it is compatible with
        # Calico policy.
        if not validate_characters(network_name):
            raise ValueError("Invalid characters detected in the given network "
                             "name, %s. Only letters a-z, numbers 0-9, and "
                             "symbols _.- are supported.", network_name)

    def apply_profile(self, endpoint):
        """Sets a profile for the networked container on the given endpoint.

        Create a profile if it is not yet created.

        :param endpoint:
        :return: None
        """
        assert self.profile_name, "No profile name set."
        if not self._client.profile_exists(self.profile_name):
            # If the profile doesn't exist, create it.
            _log.info("Creating new profile '%s'", self.profile_name)
            rules = self.generate_rules()
            self._client.create_profile(self.profile_name, rules)

            # Apply any additonal tags.
            tags = self.generate_tags()
            if tags:
                _log.debug("Applying additional tags: %s", tags)
                profile = self._client.get_profile(self.profile_name)
                profile.tags.update(tags)
                self._client.profile_update_tags(profile)

        # Check if the profile has already been applied.
        if self.profile_name in endpoint.profile_ids:
            _log.warning("Endpoint already in profile %s", 
                         self.profile_name)
            return

        # Append profile to Calico endpoint.
        _log.info("Appending profile '%s' to endpoint %s",
                  self.profile_name, endpoint.endpoint_id)
        try:
            self._client.append_profiles_to_endpoint(
                    profile_names=[self.profile_name],
                    endpoint_id=endpoint.endpoint_id
            )
        except (KeyError, MultipleEndpointsMatch), e:
            _log.exception("Failed to apply profile to endpoint %s: %s",
                           endpoint.name, e.message)
            raise ApplyProfileError(e.message)
Exemplo n.º 23
0
class Controller(object):
    def __init__(self):
        self._event_queue = Queue.Queue(maxsize=MAX_QUEUE_SIZE)
        """
        Queue to populate with events from API watches.
        """

        self.k8s_api = os.environ.get("K8S_API", DEFAULT_API)
        """
        Scheme, IP and port of the Kubernetes API.
        """

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN", read_token_file())
        """
        Auth token to use when accessing the API.
        """
        _log.debug("Using auth token: %s", self.auth_token)

        self.ca_crt_exists = os.path.exists(CA_CERT_PATH)
        """
        True if a CA cert has been mounted by Kubernetes.
        """

        self._client = DatastoreClient()
        """
        Client for accessing the Calico datastore.
        """

        self._leader_election_url = os.environ.get("ELECTION_URL",
                                                   "http://127.0.0.1:4040/")
        """
        Use this URL to get leader election status from the sidecar container.
        """

        elect = os.environ.get("LEADER_ELECTION", "false")
        self._leader_elect = elect.lower() == "true"
        """
        Whether or not leader election is enabled.  If set to False, this
        policy controller will assume it is the only instance.
        """

        self._handlers = {}
        """
        Keeps track of which handlers to execute for various events.
        """

        # Handlers for NetworkPolicy events.
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_ADDED,
                         add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_MODIFIED,
                         add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_DELETED,
                         delete_network_policy)

        # Handlers for Namespace events.
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_ADDED,
                         add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_MODIFIED,
                         add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_DELETED,
                         delete_namespace)

        # Handlers for Pod events.
        self.add_handler(RESOURCE_TYPE_POD, TYPE_ADDED, add_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_MODIFIED, update_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_DELETED, delete_pod)

    def add_handler(self, resource_type, event_type, handler):
        """
        Adds an event handler for the given event type (ADD, DELETE) for the
        given resource type.

        :param resource_type: The type of resource that this handles.
        :param event_type: The type of event that this handles.
        :param handler: The callable to execute when events are received.
        :return None
        """
        _log.debug("Setting %s %s handler: %s", resource_type, event_type,
                   handler)
        key = (resource_type, event_type)
        self._handlers[key] = handler

    def get_handler(self, resource_type, event_type):
        """
        Gets the correct handler.

        :param resource_type: The type of resource that needs handling.
        :param event_type: The type of event that needs handling.
        :return None
        """
        key = (resource_type, event_type)
        _log.debug("Looking up handler for event: %s", key)
        return self._handlers[key]

    def run(self):
        """
        Controller.run() is called at program init to spawn watch threads,
        Loops to read responses from the Queue as they come in.
        """
        _log.info("Leader election enabled? %s", self._leader_elect)
        if self._leader_elect:
            # Wait until we've been elected leader to start.
            self._wait_for_leadership()
            self._start_leader_thread()

        # Ensure the tier exists.
        metadata = {"order": NET_POL_TIER_ORDER}
        self._client.set_policy_tier_metadata(NET_POL_TIER_NAME, metadata)

        # Ensure the backstop policy exists.  This policy fowards
        # any traffic to Kubernetes pods which doesn't match another policy
        # to the next-tier (i.e the per-namespace Profiles).
        selector = "has(%s)" % K8S_NAMESPACE_LABEL
        rules = Rules(inbound_rules=[Rule(action="next-tier")],
                      outbound_rules=[Rule(action="next-tier")])
        self._client.create_policy(NET_POL_TIER_NAME,
                                   "k8s-policy-no-match",
                                   selector,
                                   order=NET_POL_BACKSTOP_ORDER,
                                   rules=rules)

        # Read initial state from Kubernetes API.
        self.start_workers()

        # Loop and read updates from the queue.
        self.read_updates()

    def _wait_for_leadership(self):
        """
        Loops until this controller has been elected leader.
        """
        _log.info("Waiting for this controller to be elected leader")
        while True:
            try:
                is_leader = self._is_leader()
            except requests.exceptions.ConnectionError:
                # During startup, the leader election container
                # might not be up yet.  Handle this case gracefully.
                _log.info("Waiting for leader election container")
            else:
                # Successful response from the leader election container.
                # Check if we are the elected leader.
                if is_leader:
                    _log.info("We have been elected leader")
                    break
            time.sleep(1)

    def _start_leader_thread(self):
        """
        Starts a thread which periodically checks if this controller is the leader.
        If determined that we are no longer the leader, exit.
        """
        t = Thread(target=self._watch_leadership)
        t.daemon = True
        t.start()
        _log.info("Started leader election watcher")

    def _watch_leadership(self):
        """
        Watches to see if this policy controller is still the elected leader.
        If no longer the elected leader, exits.
        """
        _log.info("Watching for leader election changes")
        while True:
            try:
                if not self._is_leader():
                    _log.warning("No longer the elected leader - exiting")
                    os._exit(1)
                time.sleep(1)
            except Exception:
                _log.exception("Exception verifying leadership - exiting")
                os._exit(1)

    def start_workers(self):
        """
        Starts the worker threads which manage each Kubernetes
        API resource.
        """
        resources = [
            RESOURCE_TYPE_NETWORK_POLICY, RESOURCE_TYPE_NAMESPACE,
            RESOURCE_TYPE_POD
        ]

        # For each resource type, start a thread which syncs it from the
        # kubernetes API.
        for resource_type in resources:
            t = Thread(target=self._manage_resource, args=(resource_type, ))
            t.daemon = True
            t.start()
            _log.info("Started worker thread for: %s", resource_type)

    def read_updates(self):
        """
        Reads from the update queue.

        An update on the queue must be a tuple of:
          (event_type, resource_type, resource)

        Where:
          - event_type: Either "ADDED", "MODIFIED", "DELETED", "ERROR"
          - resource_type: e.g "Namespace", "Pod", "NetworkPolicy"
          - resource: The parsed json resource from the API matching
                      the given resource_type.
        """
        while True:
            try:
                # Wait for an update on the event queue.
                _log.debug("Reading from event queue")
                update = self._event_queue.get(block=True)
                event_type, resource_type, resource = update

                # We've recieved an update - process it.
                _log.debug("Read event: %s, %s, %s", event_type, resource_type,
                           json.dumps(resource, indent=2))
                self._process_update(event_type, resource_type, resource)
            except KeyError:
                # We'll hit this if we fail to parse an invalid update.
                _log.exception("Invalid update: %s", update)
            finally:
                self._event_queue.task_done()

                # Log out when the queue is empty.
                if self._event_queue.empty():
                    _log.info("Emptied the event queue")

    def _process_update(self, event_type, resource_type, resource):
        """
        Takes an event updates our state accordingly.
        """
        _log.debug("Processing '%s' for kind '%s'", event_type, resource_type)

        # Determine the key for this object using namespace and name.
        # This is simply used for easy identification in logs, etc.
        name = resource["metadata"]["name"]
        namespace = resource["metadata"].get("namespace")
        key = (namespace, name)

        # Call the right handler.
        try:
            handler = self.get_handler(resource_type, event_type)
        except KeyError:
            _log.warning("No %s handlers for: %s", event_type, resource_type)
        else:
            try:
                handler(resource)
                _log.info("Handled %s for %s: %s", event_type, resource_type,
                          key)
            except KeyError:
                _log.exception("Invalid %s: %s", resource_type,
                               json.dumps(resource, indent=2))

    def _manage_resource(self, resource_type):
        """
        Routine for a worker thread.  Syncs with API for the given resource
        and starts a watch.  If an error occurs within the watch, will re-sync
        with the API and re-start the watch.
        """
        while True:
            try:
                # Sync existing resources for this type.
                resource_version = self._sync_resources(resource_type)

                # Start a watch from the latest resource_version.
                self._watch_resource(resource_type, resource_version)
            except requests.ConnectionError, requests.ChunkedEncodingError:
                _log.exception("Connection error querying: %s", resource_type)
            except requests.HTTPError:
                _log.exception("HTTP error querying: %s", resource_type)
            except KubernetesApiError:
                _log.debug("Kubernetes API error managing %s", resource_type)
Exemplo n.º 24
0
def save_diags(log_dir):
    # Create temp directory
    temp_dir = tempfile.mkdtemp()
    temp_diags_dir = os.path.join(temp_dir, 'diagnostics')
    os.mkdir(temp_diags_dir)
    print("Using temp dir: %s" % temp_dir)

    # Write date to file
    with DiagsErrorWriter(temp_diags_dir, 'date') as f:
        f.write("DATE=%s" %
                datetime.strftime(datetime.today(), "%Y-%m-%d_%H-%M-%S"))

    # Write hostname to file
    with DiagsErrorWriter(temp_diags_dir, 'hostname') as f:
        f.write("%s" % socket.gethostname())

    # Write netstat output to file
    with DiagsErrorWriter(temp_diags_dir, 'netstat') as f:
        try:
            print("Dumping netstat output")
            netstat = sh.Command._create("netstat")

            f.writelines(
                netstat(
                    # Display all sockets (default: connected)
                    all=True,
                    # Don't resolve names
                    numeric=True))

        except sh.CommandNotFound as e:
            print "  - Missing command: %s" % e.message
            f.writelines("Missing command: %s\n" % e.message)

    # Write routes
    print("Dumping routes")
    with DiagsErrorWriter(temp_diags_dir, 'route') as f:
        try:
            route = sh.Command._create("route")
            f.write("route --numeric\n")
            f.writelines(route(numeric=True))
            f.write('\n')
        except sh.CommandNotFound as e:
            print "  - Missing command: %s" % e.message
            f.writelines("Missing command: %s\n" % e.message)

        try:
            ip = sh.Command._create("ip")
            f.write("ip route\n")
            f.writelines(ip("route"))
            f.write('\n')

            f.write("ip -6 route\n")
            f.writelines(ip("-6", "route"))
            f.write('\n')
        except sh.CommandNotFound as e:
            print "  - Missing command: %s" % e.message
            f.writelines("Missing command: %s\n" % e.message)

    # Dump iptables
    with DiagsErrorWriter(temp_diags_dir, 'iptables') as f:
        try:
            iptables_save = sh.Command._create("iptables-save")
            print("Dumping iptables")
            f.writelines(iptables_save())
        except sh.CommandNotFound as e:
            print "  - Missing command: %s" % e.message
            f.writelines("Missing command: %s\n" % e.message)

    # Dump ipset list
    # TODO: ipset might not be installed on the host. But we don't want to
    # gather the diags in the container because it might not be running...
    with DiagsErrorWriter(temp_diags_dir, 'ipset') as f:
        try:
            ipset = sh.Command._create("ipset")
            print("Dumping ipset")
            f.writelines(ipset("list"))
        except sh.CommandNotFound as e:
            print "  - Missing command: %s" % e.message
            f.writelines("Missing command: %s\n" % e.message)
        except sh.ErrorReturnCode_1 as e:
            print "  - Error running ipset. Maybe you need to run as root."
            f.writelines("Error running ipset: %s\n" % e)

    # Ask Felix to dump stats to its log file - ignore errors as the
    # calico-node might not be running
    subprocess.call(
        ["docker", "exec", "calico-node", "pkill", "-SIGUSR1", "felix"])

    if os.path.isdir(log_dir):
        print("Copying Calico logs")
        # Skip the lock files as they can only be copied by root.
        copytree(log_dir,
                 os.path.join(temp_diags_dir, "logs"),
                 ignore=ignore_patterns('lock'))
    else:
        print('No logs found in %s; skipping log copying' % log_dir)

    print("Dumping datastore")
    # TODO: May want to move this into datastore.py as a dump-calico function
    with DiagsErrorWriter(temp_diags_dir, 'etcd_calico') as f:
        try:
            datastore_client = DatastoreClient()
            datastore_data = datastore_client.etcd_client.read("/calico",
                                                               recursive=True)
            f.write("dir?, key, value\n")
            # TODO: python-etcd bug: Leaves show up twice in get_subtree().
            for child in datastore_data.get_subtree():
                if child.dir:
                    f.write("DIR,  %s,\n" % child.key)
                else:
                    f.write("FILE, %s, %s\n" % (child.key, child.value))
        except EtcdException, e:
            print "Unable to dump etcd datastore"
            f.write("Unable to dump etcd datastore: %s" % e)
Exemplo n.º 25
0
def save_diags(log_dir, upload=False):
    # Create temp directory
    temp_dir = tempfile.mkdtemp()
    temp_diags_dir = os.path.join(temp_dir, 'diagnostics')
    os.mkdir(temp_diags_dir)
    print("Using temp dir: %s" % temp_dir)

    # Write date to file
    with open(os.path.join(temp_diags_dir, 'date'), 'w') as f:
        f.write("DATE=%s" %
                datetime.strftime(datetime.today(), "%Y-%m-%d_%H-%M-%S"))

    # Write hostname to file
    with open(os.path.join(temp_diags_dir, 'hostname'), 'w') as f:
        f.write("%s" % socket.gethostname())

    # Write netstat output to file
    with open(os.path.join(temp_diags_dir, 'netstat'), 'w') as f:
        try:
            print("Dumping netstat output")
            netstat = sh.Command._create("netstat")

            f.writelines(
                netstat(
                    # Display all sockets (default: connected)
                    all=True,
                    # Don't resolve names
                    numeric=True))

        except sh.CommandNotFound as e:
            print "Missing command: %s" % e.message

    # Write routes
    print("Dumping routes")
    with open(os.path.join(temp_diags_dir, 'route'), 'w') as f:
        try:
            route = sh.Command._create("route")
            f.write("route --numeric")
            f.writelines(route(numeric=True))
            f.write('\n')
        except sh.CommandNotFound as e:
            print "Missing command: %s" % e.message

        try:
            ip = sh.Command._create("ip")
            f.write("ip route")
            f.writelines(ip("route"))
            f.write('\n')

            f.write("ip -6 route")
            f.writelines(ip("-6", "route"))
            f.write('\n')
        except sh.CommandNotFound as e:
            print "Missing command: %s" % e.message

    # Dump iptables
    with open(os.path.join(temp_diags_dir, 'iptables'), 'w') as f:
        try:
            iptables_save = sh.Command._create("iptables-save")
            print("Dumping iptables")
            f.writelines(iptables_save())
        except sh.CommandNotFound as e:
            print "Missing command: %s" % e.message

    # Dump ipset list
    # TODO: ipset might not be installed on the host. But we don't want to
    # gather the diags in the container because it might not be running...
    with open(os.path.join(temp_diags_dir, 'ipset'), 'w') as f:
        try:
            ipset = sh.Command._create("ipset")
            print("Dumping ipset")
            f.writelines(ipset("list"))
        except sh.CommandNotFound as e:
            print "Missing command: %s" % e.message

    if os.path.isdir(log_dir):
        print("Copying Calico logs")
        copytree(log_dir, os.path.join(temp_diags_dir, "logs"))
    else:
        print('No logs found in %s; skipping log copying' % log_dir)

    print("Dumping datastore")
    # TODO: May want to move this into datastore.py as a dump-calico function
    try:
        datastore_client = DatastoreClient()
        datastore_data = datastore_client.etcd_client.read("/calico",
                                                           recursive=True)
        with open(os.path.join(temp_diags_dir, 'etcd_calico'), 'w') as f:
            f.write(str(datastore_data))
    except EtcdException:
        print "Unable to dump etcd datastore"

    # Create tar and upload
    tar_filename = datetime.strftime(datetime.today(),
                                     "diags-%d%m%y_%H%M%S.tar.gz")
    full_tar_path = os.path.join(temp_dir, tar_filename)
    with tarfile.open(full_tar_path, "w:gz") as tar:
        # pass in arcname, otherwise zip contains layers of subfolders
        tar.add(temp_dir, arcname="")

    print("Diags saved to %s" % (full_tar_path))

    if upload:
        upload_temp_diags(full_tar_path)
Exemplo n.º 26
0
def save_diags(log_dir):
    # Create temp directory
    temp_dir = tempfile.mkdtemp()
    temp_diags_dir = os.path.join(temp_dir, 'diagnostics')
    os.mkdir(temp_diags_dir)
    print("Using temp dir: %s" % temp_dir)

    # Write date to file
    with open(os.path.join(temp_diags_dir, 'date'), 'w') as f:
        f.write("DATE=%s" %
                datetime.strftime(datetime.today(), "%Y-%m-%d_%H-%M-%S"))

    # Write hostname to file
    with open(os.path.join(temp_diags_dir, 'hostname'), 'w') as f:
        f.write("%s" % socket.gethostname())

    # Write netstat output to file
    with open(os.path.join(temp_diags_dir, 'netstat'), 'w') as f:
        try:
            print("Dumping netstat output")
            netstat = sh.Command._create("netstat")

            f.writelines(
                netstat(
                    # Display all sockets (default: connected)
                    all=True,
                    # Don't resolve names
                    numeric=True))

        except sh.CommandNotFound as e:
            print "Missing command: %s" % e.message

    # Write routes
    print("Dumping routes")
    with open(os.path.join(temp_diags_dir, 'route'), 'w') as f:
        try:
            route = sh.Command._create("route")
            f.write("route --numeric\n")
            f.writelines(route(numeric=True))
            f.write('\n')
        except sh.CommandNotFound as e:
            print "Missing command: %s" % e.message

        try:
            ip = sh.Command._create("ip")
            f.write("ip route\n")
            f.writelines(ip("route"))
            f.write('\n')

            f.write("ip -6 route\n")
            f.writelines(ip("-6", "route"))
            f.write('\n')
        except sh.CommandNotFound as e:
            print "Missing command: %s" % e.message

    # Dump iptables
    with open(os.path.join(temp_diags_dir, 'iptables'), 'w') as f:
        try:
            iptables_save = sh.Command._create("iptables-save")
            print("Dumping iptables")
            f.writelines(iptables_save())
        except sh.CommandNotFound as e:
            print "Missing command: %s" % e.message

    # Dump ipset list
    # TODO: ipset might not be installed on the host. But we don't want to
    # gather the diags in the container because it might not be running...
    with open(os.path.join(temp_diags_dir, 'ipset'), 'w') as f:
        try:
            ipset = sh.Command._create("ipset")
            print("Dumping ipset")
            f.writelines(ipset("list"))
        except sh.CommandNotFound as e:
            print "Missing command: %s" % e.message
        except sh.ErrorReturnCode_1 as e:
            print "Error running ipset. Maybe you need to run as root."

    # Ask Felix to dump stats to its log file - ignore errors as the
    # calico-node might not be running
    subprocess.call(
        ["docker", "exec", "calico-node", "pkill", "-SIGUSR1", "felix"])

    if os.path.isdir(log_dir):
        print("Copying Calico logs")
        # Skip the lock files as they can only be copied by root.
        copytree(log_dir,
                 os.path.join(temp_diags_dir, "logs"),
                 ignore=ignore_patterns('lock'))
    else:
        print('No logs found in %s; skipping log copying' % log_dir)

    print("Dumping datastore")
    # TODO: May want to move this into datastore.py as a dump-calico function
    try:
        datastore_client = DatastoreClient()
        datastore_data = datastore_client.etcd_client.read("/calico",
                                                           recursive=True)
        with open(os.path.join(temp_diags_dir, 'etcd_calico'), 'w') as f:
            f.write("dir?, key, value\n")
            # TODO: python-etcd bug: Leaves show up twice in get_subtree().
            for child in datastore_data.get_subtree():
                if child.dir:
                    f.write("DIR,  %s,\n" % child.key)
                else:
                    f.write("FILE, %s, %s\n" % (child.key, child.value))
    except EtcdException:
        print "Unable to dump etcd datastore"

    # Create tar.
    tar_filename = datetime.strftime(datetime.today(),
                                     "diags-%d%m%y_%H%M%S.tar.gz")
    full_tar_path = os.path.join(temp_dir, tar_filename)
    with tarfile.open(full_tar_path, "w:gz") as tar:
        # pass in arcname, otherwise zip contains layers of subfolders
        tar.add(temp_dir, arcname="")

    print("\nDiags saved to %s\n" % (full_tar_path))
    print_paragraph("If required, you can upload the diagnostics bundle to a "
                    "file sharing service such as transfer.sh using curl or "
                    "similar.  For example:")
    print("  curl --upload-file %s https://transfer.sh/%s" %
          (full_tar_path, os.path.basename(full_tar_path)))
Exemplo n.º 27
0
import logging
import simplejson as json
from constants import *
from pycalico.datastore import DatastoreClient
from pycalico.datastore_datatypes import Rules, Rule

_log = logging.getLogger("__main__")
client = DatastoreClient()


def add_update_namespace(namespace):
    """
    Configures the necessary policy in Calico for this
    namespace.  Uses the `net.alpha.kubernetes.io/network-isolation`
    annotation.
    """
    namespace_name = namespace["metadata"]["name"]
    _log.debug("Adding/updating namespace: %s", namespace_name)

    # Determine the type of network-isolation specified by this namespace.
    # This defaults to no isolation.
    annotations = namespace["metadata"].get("annotations", {})
    _log.debug("Namespace %s has annotations: %s", namespace_name, annotations)
    policy_annotation = annotations.get(NS_POLICY_ANNOTATION, "{}")
    try:
        policy_annotation = json.loads(policy_annotation)
    except ValueError, TypeError:
        _log.exception("Failed to parse namespace annotations: %s", annotations)
        return

    # Parsed the annotation - get data.  Might not be a dict, so be careful
Exemplo n.º 28
0
class CniPlugin(object):
    """
    Class which encapsulates the function of a CNI plugin.
    """
    def __init__(self, network_config, env):
        self._client = DatastoreClient()
        """
        DatastoreClient for access to the Calico datastore.
        """

        # Parse CNI_ARGS into dictionary so we can extract values.
        cni_args = parse_cni_args(env.get(CNI_ARGS_ENV, ""))

        self.k8s_pod_name = cni_args.get(K8S_POD_NAME)
        """
        Name of Kubernetes pod if running under Kubernetes, else None.
        """

        self.k8s_namespace = cni_args.get(K8S_POD_NAMESPACE)
        """
        Name of Kubernetes namespace if running under Kubernetes, else None.
        """

        self.network_config = network_config
        """
        Network config as provided in the CNI network file passed in
        via stdout.
        """

        self.network_name = network_config["name"]
        """
        Name of the network from the provided network config file.
        """

        self.ipam_type = network_config["ipam"]["type"]
        """
        Type of IPAM to use, e.g calico-ipam.
        """

        self.hostname = network_config.get("hostname", socket.gethostname())
        """
        The hostname to register endpoints under.
        """

        self.container_engine = get_container_engine(self.k8s_pod_name)
        """
        Chooses the correct container engine based on the given configuration.
        """

        self.ipam_env = env
        """
        Environment dictionary used when calling the IPAM plugin.
        """

        self.command = env[CNI_COMMAND_ENV]
        assert self.command in [CNI_CMD_DELETE, CNI_CMD_ADD], \
                "Invalid CNI command %s" % self.command
        """
        The command to execute for this plugin instance. Required.
        One of:
          - CNI_CMD_ADD
          - CNI_CMD_DELETE
        """

        self.container_id = env[CNI_CONTAINERID_ENV]
        """
        The container's ID in the containerizer. Required.
        """

        self.cni_netns = env[CNI_NETNS_ENV]
        """
        Relative path to the network namespace of this container.
        """

        self.interface = env[CNI_IFNAME_ENV]
        """
        Name of the interface to create within the container.
        """

        self.cni_path = env[CNI_PATH_ENV]
        """
        Path in which to search for CNI plugins.
        """

        self.running_under_k8s = self.k8s_namespace and self.k8s_pod_name
        if self.running_under_k8s:
            self.workload_id = "%s.%s" % (self.k8s_namespace,
                                          self.k8s_pod_name)
            self.orchestrator_id = "k8s"
        else:
            self.workload_id = self.container_id
            self.orchestrator_id = "cni"
        kubernetes_config = network_config.get("kubernetes", {})
        self.kubeconfig_path = kubernetes_config.get("kubeconfig")
        self.k8s_node_name = kubernetes_config.get("node_name",
                                                   socket.gethostname())
        """
        Configure orchestrator specific settings.
        workload_id: In Kubernetes, this is the pod's namespace and name.
                     Otherwise, this is the container ID.
        orchestrator_id: Either "k8s" or "cni".
        """

        # Ensure that the ipam_env CNI_ARGS contains the IgnoreUnknown=1 option
        # See https://github.com/appc/cni/pull/158
        # And https://github.com/appc/cni/pull/127
        self.ipam_env[CNI_ARGS_ENV] = 'IgnoreUnknown=1'
        if env.get(CNI_ARGS_ENV):
            # Append any existing args - if they are set.
            self.ipam_env[CNI_ARGS_ENV] += ";%s" % env.get(CNI_ARGS_ENV)

        self.policy_driver = get_policy_driver(self)
        """
        Chooses the correct policy driver based on the given configuration
        """

    def execute(self):
        """
        Execute the CNI plugin - uses the given CNI_COMMAND to determine
        which action to take.

        :return: None.
        """
        if self.command == CNI_CMD_ADD:
            self.add()
        else:
            self.delete()

    def add(self):
        """"Handles CNI_CMD_ADD requests.

        Configures Calico networking and prints required json to stdout.

        In CNI, a container can be added to multiple networks, in which case
        the CNI plugin will be called multiple times.  In Calico, each network
        is represented by a profile, and each container only receives a single
        endpoint / veth / IP address even when it is on multiple CNI networks.

        :return: None.
        """
        # If this container uses host networking, don't network it.
        # This should only be hit when running in Kubernetes mode with
        # docker - rkt doesn't call plugins when using host networking.
        if self.container_engine.uses_host_networking(self.container_id):
            _log.info(
                "Cannot network container %s since it is configured "
                "with host networking.", self.container_id)
            sys.exit(0)

        _log.info("Configuring network '%s' for container: %s",
                  self.network_name, self.container_id)

        _log.debug("Checking for existing Calico endpoint")
        endpoint = self._get_endpoint()
        if endpoint and not self.running_under_k8s:
            # We've received a create for an existing container, likely on
            # a new CNI network.  We don't need to configure the veth or
            # assign IP addresses, we simply need to add to the new
            # CNI network.  Kubernetes handles this case
            # differently (see below).
            _log.info("Endpoint for container exists - add to new network")
            output = self._add_existing_endpoint(endpoint)
        elif endpoint and self.running_under_k8s:
            # Running under Kubernetes and we've received a create for
            # an existing workload.  Kubernetes only supports a single CNI
            # network, which means that the old pod has been destroyed
            # under our feet and we need to set up networking on the new one.
            # We should also clean up any stale endpoint / IP assignment.
            _log.info("Kubernetes pod has been recreated")
            self._remove_stale_endpoint(endpoint)

            # Release any previous IP addresses assigned to this workload.
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)

            # Clean up any profiles for the stale endpoint
            self.policy_driver.remove_profile()

            # Configure the new workload.
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_ADD
            output = self._add_new_endpoint()
        else:
            # No endpoint exists - we need to configure a new one.
            _log.info("No endpoint exists for workload - creating")
            output = self._add_new_endpoint()

        # If all successful, print the IPAM plugin's output to stdout.
        dump = json.dumps(output)
        _log.debug("Printing CNI result to stdout: %s", dump)
        print(dump)

        _log.info("Finished networking container: %s", self.container_id)

    def _add_new_endpoint(self):
        """
        Handled adding a new container to a Calico network.
        """
        # Assign IP addresses using the given IPAM plugin.
        _log.info("Configuring a new Endpoint")
        ipv4, ipv6, ipam_result = self._assign_ips(self.ipam_env)

        # Filter out addresses that didn't get assigned.
        ip_list = [ip for ip in [ipv4, ipv6] if ip is not None]

        # Create the Calico endpoint object.
        endpoint = self._create_endpoint(ip_list)

        # Provision the veth for this endpoint.
        endpoint = self._provision_veth(endpoint)

        # Provision / apply profile on the created endpoint.
        try:
            self.policy_driver.apply_profile(endpoint)
        except PolicyException as e:
            _log.error("Failed to apply profile to endpoint %s", endpoint.name)
            self._remove_veth(endpoint)
            self._remove_workload()
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)
            print_cni_error(ERR_CODE_GENERIC, e.message, e.details)
            sys.exit(ERR_CODE_GENERIC)

        # Return the IPAM plugin's result.
        return ipam_result

    def _add_existing_endpoint(self, endpoint):
        """
        Handles adding an existing container to a new Calico network.

        We've already assigned an IP address and created the veth,
        we just need to apply a new profile to this endpoint.
        """
        # Get the already existing IP information for this Endpoint.
        try:
            ip4 = next(iter(endpoint.ipv4_nets))
        except StopIteration:
            # No IPv4 address on this endpoint.
            _log.warning("No IPV4 address attached to existing endpoint")
            ip4 = IPNetwork("0.0.0.0/32")

        try:
            ip6 = next(iter(endpoint.ipv6_nets))
        except StopIteration:
            # No IPv6 address on this endpoint.
            _log.warning("No IPV6 address attached to existing endpoint")
            ip6 = IPNetwork("::/128")

        # Apply a new profile to this endpoint.
        try:
            self.policy_driver.apply_profile(endpoint)
        except PolicyException as e:
            # Hit an exception applying the profile.  We haven't configured
            # anything, so we don't need to clean anything up.  Just exit.
            _log.error("Failed to apply profile to endpoint %s", endpoint.name)
            print_cni_error(ERR_CODE_GENERIC, e.message)
            sys.exit(ERR_CODE_GENERIC)

        return {"ip4": {"ip": str(ip4.cidr)}, "ip6": {"ip": str(ip6.cidr)}}

    def delete(self):
        """Handles CNI_CMD_DELETE requests.

        Remove this container from Calico networking.

        :return: None.
        """
        _log.info("Remove network '%s' from container: %s", self.network_name,
                  self.container_id)

        # Step 1: Remove any IP assignments.
        self._release_ip(self.ipam_env)

        # Step 2: Get the Calico endpoint for this workload. If it does not
        # exist, log a warning and exit successfully.
        endpoint = self._get_endpoint()
        if not endpoint:
            _log.warning("No Calico Endpoint for workload: %s",
                         self.workload_id)
            sys.exit(0)

        # Step 3: Delete the veth interface for this endpoint.
        self._remove_veth(endpoint)

        # Step 4: Delete the Calico workload.
        self._remove_workload()

        # Step 5: Delete any profiles for this endpoint
        self.policy_driver.remove_profile()

        _log.info("Finished removing container: %s", self.container_id)

    def _assign_ips(self, env):
        """Assigns and returns an IPv4 address using the IPAM plugin
        specified in the network config file.

        :return: ipv4, ipv6 - The IP addresses assigned by the IPAM plugin.
        """
        # Call the IPAM plugin.  Returns the plugin returncode,
        # as well as the CNI result from stdout.
        _log.debug("Assigning IP address")
        assert env[CNI_COMMAND_ENV] == CNI_CMD_ADD
        rc, result = self._call_ipam_plugin(env)

        try:
            # Load the response - either the assigned IP addresses or
            # a CNI error message.
            ipam_result = json.loads(result)
        except ValueError:
            message = "Failed to parse IPAM response, exiting"
            _log.exception(message)
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        if rc:
            # The IPAM plugin failed to assign an IP address. At this point in
            # execution, we haven't done anything yet, so we don't have to
            # clean up.
            _log.error("IPAM plugin error (rc=%s): %s", rc, result)
            code = ipam_result.get("code", ERR_CODE_GENERIC)
            msg = ipam_result.get("msg", "Unknown IPAM error")
            details = ipam_result.get("details")
            print_cni_error(code, msg, details)
            sys.exit(int(code))

        try:
            ipv4 = IPNetwork(ipam_result["ip4"]["ip"])
            _log.info("IPAM plugin assigned IPv4 address: %s", ipv4)
        except KeyError:
            ipv4 = None
        except (AddrFormatError, ValueError):
            message = "Invalid or Empty IPv4 address: %s" % \
                      (ipam_result["ip4"]["ip"])
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        try:
            ipv6 = IPNetwork(ipam_result["ip6"]["ip"])
            _log.info("IPAM plugin assigned IPv6 address: %s", ipv6)
        except KeyError:
            ipv6 = None
        except (AddrFormatError, ValueError):
            message = "Invalid or Empty IPv6 address: %s" % \
                      (ipam_result["ip6"]["ip"])
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        if not ipv4 and not ipv6:
            message = "IPAM plugin did not return any valid addresses."
            _log.warning("Bad IPAM plugin response: %s", ipam_result)
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        return ipv4, ipv6, ipam_result

    def _release_ip(self, env):
        """Releases the IP address(es) for this container using the IPAM plugin
        specified in the network config file.

        :param env - A dictionary of environment variables to pass to the
        IPAM plugin
        :return: None.
        """
        _log.info("Releasing IP address")
        assert env[CNI_COMMAND_ENV] == CNI_CMD_DELETE
        rc, _ = self._call_ipam_plugin(env)

        if rc:
            _log.error("IPAM plugin failed to release IP address")

    def _call_ipam_plugin(self, env):
        """
        Executes a CNI IPAM plugin.  If `calico-ipam` is the provided IPAM
        type, then calls directly into ipam.py as a performance optimization.

        For all other types of IPAM, searches the CNI_PATH for the
        correct binary and executes it.

        :return: Tuple of return code, response from the IPAM plugin.
        """
        if self.ipam_type == "calico-ipam":
            _log.info("Using Calico IPAM")
            try:
                response = IpamPlugin(env,
                                      self.network_config["ipam"]).execute()
                code = 0
            except CniError as e:
                # We hit a CNI error - return the appropriate CNI formatted
                # error dictionary.
                response = json.dumps({
                    "code": e.code,
                    "msg": e.msg,
                    "details": e.details
                })
                code = e.code
        elif self.ipam_type == "host-local":
            # We've been told to use the "host-local" IPAM plugin.
            # Check if we need to use the Kubernetes podCidr for this node, and
            # if so replace the subnet field with the correct value.
            if self.network_config["ipam"].get("subnet") == "usePodCidr":
                if not self.running_under_k8s:
                    print_cni_error(
                        ERR_CODE_GENERIC, "Invalid network config",
                        "Must be running under Kubernetes to use 'subnet: usePodCidr'"
                    )
                    sys.exit(ERR_CODE_GENERIC)
                _log.info("Using Kubernetes podCIDR for node: %s",
                          self.k8s_node_name)
                pod_cidr = self._get_kubernetes_pod_cidr()
                self.network_config["ipam"]["subnet"] = str(pod_cidr)

            # Call the IPAM plugin.
            _log.debug("Calling host-local IPAM plugin")
            code, response = self._call_binary_ipam_plugin(env)
        else:
            # Using some other IPAM plugin - call it.
            _log.debug("Using binary plugin")
            code, response = self._call_binary_ipam_plugin(env)

        # Return the IPAM return code and output.
        _log.debug("IPAM response (rc=%s): %s", code, response)
        return code, response

    def _get_kubernetes_pod_cidr(self):
        """
        Attempt to get the Kubernetes pod CIDR for this node.
        First check if we've written it to disk.  If so, use that value.  If
        not, then query the Kubernetes API for it.
        """
        _log.info("Getting node.spec.podCidr from API, kubeconfig: %s",
                  self.kubeconfig_path)
        if not self.kubeconfig_path:
            # For now, kubeconfig is the only supported auth method.
            print_cni_error(
                ERR_CODE_GENERIC, "Missing kubeconfig",
                "usePodCidr requires specification of kubeconfig file")
            sys.exit(ERR_CODE_GENERIC)

        # Query the API for this node.  Default node name to the hostname.
        try:
            api = HTTPClient(KubeConfig.from_file(self.kubeconfig_path))
            node = None
            for n in Node.objects(api):
                _log.debug("Checking node: %s", n.obj["metadata"]["name"])
                if n.obj["metadata"]["name"] == self.k8s_node_name:
                    node = n
                    break
            if not node:
                raise KeyError("Unable to find node in API: %s",
                               self.k8s_node_name)
            _log.debug("Found node %s: %s: ", node.obj["metadata"]["name"],
                       node.obj["spec"])
        except Exception:
            print_cni_error(ERR_CODE_GENERIC, "Error querying Kubernetes API",
                            "Failed to get podCidr from Kubernetes API")
            sys.exit(ERR_CODE_GENERIC)
        else:
            pod_cidr = node.obj["spec"].get("podCIDR")
            if not pod_cidr:
                print_cni_error(ERR_CODE_GENERIC, "Missing podCidr",
                                "No podCidr for node %s" % self.k8s_node_name)
                sys.exit(ERR_CODE_GENERIC)
        _log.debug("Using podCidr: %s", pod_cidr)
        return pod_cidr

    def _call_binary_ipam_plugin(self, env):
        """Calls through to the specified IPAM plugin binary.

        Utilizes the IPAM config as specified in the CNI network
        configuration file.  A dictionary with the following form:
            {
              type: <IPAM TYPE>
            }

        :param env - A dictionary of environment variables to pass to the
        IPAM plugin
        :return: Tuple of return code, response from the IPAM plugin.
        """
        # Find the correct plugin based on the given type.
        plugin_path = self._find_ipam_plugin()
        if not plugin_path:
            message = "Could not find IPAM plugin of type %s in path %s." % \
                      (self.ipam_type, self.cni_path)
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        # Execute the plugin and return the result.
        _log.info("Using IPAM plugin at: %s", plugin_path)
        _log.debug("Passing in environment to IPAM plugin: \n%s",
                   json.dumps(env, indent=2))
        p = Popen(plugin_path, stdin=PIPE, stdout=PIPE, stderr=PIPE, env=env)
        stdout, stderr = p.communicate(json.dumps(self.network_config))
        _log.debug("IPAM plugin return code: %s", p.returncode)
        _log.debug("IPAM plugin output: \nstdout:\n%s\nstderr:\n%s", stdout,
                   stderr)
        return p.returncode, stdout

    def _create_endpoint(self, ip_list):
        """Creates an endpoint in the Calico datastore with the client.

        :param ip_list - list of IP addresses that have been already allocated
        :return Calico endpoint object
        """
        _log.debug("Creating Calico endpoint with workload_id=%s",
                   self.workload_id)
        try:
            endpoint = self._client.create_endpoint(self.hostname,
                                                    self.orchestrator_id,
                                                    self.workload_id, ip_list)
        except (AddrFormatError, KeyError) as e:
            # AddrFormatError: Raised when an IP address type is not
            #                  compatible with the node.
            # KeyError: Raised when BGP config for host is not found.
            _log.exception("Failed to create Calico endpoint.")
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)
            print_cni_error(ERR_CODE_GENERIC, e.message)
            sys.exit(ERR_CODE_GENERIC)

        _log.info("Created Calico endpoint with IP address(es) %s", ip_list)
        return endpoint

    def _remove_stale_endpoint(self, endpoint):
        """
        Removes the given endpoint from Calico.
        Called when we discover a stale endpoint that is no longer in use.
        Note that this doesn't release IP allocations - that must be done
        using the designated IPAM plugin.
        """
        _log.info("Removing stale Calico endpoint '%s'", endpoint)
        try:
            self._client.remove_endpoint(endpoint)
        except KeyError:
            # Shouldn't hit this since we know the workload exists.
            _log.info("Error removing stale endpoint, ignoring")

    def _remove_workload(self):
        """Removes the given endpoint from the Calico datastore

        :return: None
        """
        try:
            _log.info("Removing Calico workload '%s'", self.workload_id)
            self._client.remove_workload(hostname=self.hostname,
                                         orchestrator_id=self.orchestrator_id,
                                         workload_id=self.workload_id)
        except KeyError:
            # Attempt to remove the workload using the container ID as the
            # workload ID.  Earlier releases of the plugin used the
            # container ID for the workload ID rather than the Kubernetes pod
            # name and namespace.
            _log.debug("Could not find workload with workload ID %s.",
                       self.workload_id)
            try:
                self._client.remove_workload(hostname=self.hostname,
                                             orchestrator_id="cni",
                                             workload_id=self.container_id)
            except KeyError:
                _log.warning("Could not find workload with container ID %s.",
                             self.container_id)

    def _provision_veth(self, endpoint):
        """Provisions veth for given endpoint.

        Uses the netns relative path passed in through CNI_NETNS_ENV and
        interface passed in through CNI_IFNAME_ENV.

        :param endpoint
        :return Calico endpoint object
        """
        _log.debug("Provisioning Calico veth interface")
        netns_path = os.path.abspath(os.path.join(os.getcwd(), self.cni_netns))
        _log.debug("netns path: %s", netns_path)

        try:
            endpoint.mac = endpoint.provision_veth(Namespace(netns_path),
                                                   self.interface)
        except CalledProcessError as e:
            _log.exception(
                "Failed to provision veth interface for endpoint %s",
                endpoint.name)
            self._remove_workload()
            self.ipam_env[CNI_COMMAND_ENV] = CNI_CMD_DELETE
            self._release_ip(self.ipam_env)
            print_cni_error(ERR_CODE_GENERIC, e.message)
            sys.exit(ERR_CODE_GENERIC)

        _log.debug("Endpoint has mac address: %s", endpoint.mac)

        self._client.set_endpoint(endpoint)
        _log.info("Provisioned %s in netns %s", self.interface, netns_path)
        return endpoint

    def _remove_veth(self, endpoint):
        """Remove the veth from given endpoint.

        Handles any errors encountered while removing the endpoint.
        """
        _log.info("Removing veth for endpoint: %s", endpoint.name)
        try:
            removed = netns.remove_veth(endpoint.name)
            _log.debug("Successfully removed endpoint %s? %s", endpoint.name,
                       removed)
        except CalledProcessError:
            _log.warning("Unable to remove veth %s", endpoint.name)

    @handle_datastore_error
    def _get_endpoint(self):
        """Get endpoint matching self.workload_id.

        If we cannot find an endpoint using self.workload_id, try
        using self.container_id.

        Return None if no endpoint is found.
        Exits with an error if multiple endpoints are found.

        :return: Endpoint object if found, None if not found
        """
        try:
            _log.debug("Looking for endpoint that matches workload ID %s",
                       self.workload_id)
            endpoint = self._client.get_endpoint(
                hostname=self.hostname,
                orchestrator_id=self.orchestrator_id,
                workload_id=self.workload_id)
        except KeyError:
            # Try to find using the container ID.  In earlier version of the
            # plugin, the container ID was used as the workload ID.
            _log.debug("No endpoint found matching workload ID %s",
                       self.workload_id)
            try:
                endpoint = self._client.get_endpoint(
                    hostname=self.hostname,
                    orchestrator_id="cni",
                    workload_id=self.container_id)
            except KeyError:
                # We were unable to find an endpoint using either the
                # workload ID or the container ID.
                _log.debug("No endpoint found matching container ID %s",
                           self.container_id)
                endpoint = None
        except MultipleEndpointsMatch:
            message = "Multiple Endpoints found matching ID %s" % \
                    self.workload_id
            print_cni_error(ERR_CODE_GENERIC, message)
            sys.exit(ERR_CODE_GENERIC)

        return endpoint

    def _find_ipam_plugin(self):
        """Locates IPAM plugin binary in plugin path and returns absolute path
        of plugin if found; if not found returns an empty string.

        IPAM plugin type is set in the network config file.
        The plugin path is the CNI path passed through the environment variable
        CNI_PATH.

        :rtype : str
        :return: plugin_path - absolute path of IPAM plugin binary
        """
        plugin_type = self.ipam_type
        plugin_path = ""
        for path in self.cni_path.split(":"):
            _log.debug("Looking for plugin %s in path %s", plugin_type, path)
            temp_path = os.path.abspath(os.path.join(path, plugin_type))
            if os.path.isfile(temp_path):
                _log.debug("Found plugin %s in path %s", plugin_type, path)
                plugin_path = temp_path
                break
        return str(plugin_path)
Exemplo n.º 29
0
import logging
import simplejson as json
from constants import *
from pycalico.datastore import DatastoreClient
from pycalico.datastore_datatypes import Rules, Rule

_log = logging.getLogger("__main__")
client = DatastoreClient()


def add_update_namespace(namespace):
    """
    Configures the necessary policy in Calico for this
    namespace.  Uses the `net.alpha.kubernetes.io/network-isolation`
    annotation.
    """
    namespace_name = namespace["metadata"]["name"]
    _log.debug("Adding/updating namespace: %s", namespace_name)

    # Determine the type of network-isolation specified by this namespace.
    # This defaults to no isolation.
    annotations = namespace["metadata"].get("annotations", {})
    _log.debug("Namespace %s has annotations: %s", namespace_name, annotations)
    policy_annotation = annotations.get(NS_POLICY_ANNOTATION, "{}")
    try:
        policy_annotation = json.loads(policy_annotation)
    except ValueError, TypeError:
        _log.exception("Failed to parse namespace annotations: %s",
                       annotations)
        return
Exemplo n.º 30
0
    def __init__(self):
        self._event_queue = Queue.Queue(maxsize=MAX_QUEUE_SIZE)
        """
        Queue to populate with events from API watches.
        """

        self.k8s_api = os.environ.get("K8S_API", DEFAULT_API)
        """
        Scheme, IP and port of the Kubernetes API.
        """

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN", read_token_file())
        """
        Auth token to use when accessing the API.
        """
        _log.debug("Using auth token: %s", self.auth_token)

        self.ca_crt_exists = os.path.exists(CA_CERT_PATH)
        """
        True if a CA cert has been mounted by Kubernetes.
        """

        self._client = DatastoreClient()
        """
        Client for accessing the Calico datastore.
        """

        self._leader_election_url = os.environ.get("ELECTION_URL",
                                                   "http://127.0.0.1:4040/")
        """
        Use this URL to get leader election status from the sidecar container.
        """

        elect = os.environ.get("LEADER_ELECTION", "false")
        self._leader_elect = elect.lower() == "true"
        """
        Whether or not leader election is enabled.  If set to False, this
        policy controller will assume it is the only instance.
        """

        self._handlers = {}
        """
        Keeps track of which handlers to execute for various events.
        """

        # Handlers for NetworkPolicy events.
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_ADDED,
                         add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_MODIFIED,
                         add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_DELETED,
                         delete_network_policy)

        # Handlers for Namespace events.
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_ADDED,
                         add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_MODIFIED,
                         add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_DELETED,
                         delete_namespace)

        # Handlers for Pod events.
        self.add_handler(RESOURCE_TYPE_POD, TYPE_ADDED, add_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_MODIFIED, update_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_DELETED, delete_pod)
Exemplo n.º 31
0
    def __init__(self, network_config, env):
        self._client = DatastoreClient()
        """
        DatastoreClient for access to the Calico datastore.
        """

        # Parse CNI_ARGS into dictionary so we can extract values.
        cni_args = parse_cni_args(env.get(CNI_ARGS_ENV, ""))

        self.k8s_pod_name = cni_args.get(K8S_POD_NAME)
        """
        Name of Kubernetes pod if running under Kubernetes, else None.
        """

        self.k8s_namespace = cni_args.get(K8S_POD_NAMESPACE)
        """
        Name of Kubernetes namespace if running under Kubernetes, else None.
        """

        self.network_config = network_config
        """
        Network config as provided in the CNI network file passed in
        via stdout.
        """

        self.network_name = network_config["name"]
        """
        Name of the network from the provided network config file.
        """

        self.ipam_type = network_config["ipam"]["type"]
        """
        Type of IPAM to use, e.g calico-ipam.
        """

        self.hostname = network_config.get("hostname", socket.gethostname())
        """
        The hostname to register endpoints under.
        """

        self.container_engine = get_container_engine(self.k8s_pod_name)
        """
        Chooses the correct container engine based on the given configuration.
        """

        self.ipam_env = env
        """
        Environment dictionary used when calling the IPAM plugin.
        """

        self.command = env[CNI_COMMAND_ENV]
        assert self.command in [CNI_CMD_DELETE, CNI_CMD_ADD], \
                "Invalid CNI command %s" % self.command
        """
        The command to execute for this plugin instance. Required.
        One of:
          - CNI_CMD_ADD
          - CNI_CMD_DELETE
        """

        self.container_id = env[CNI_CONTAINERID_ENV]
        """
        The container's ID in the containerizer. Required.
        """

        self.cni_netns = env[CNI_NETNS_ENV]
        """
        Relative path to the network namespace of this container.
        """

        self.interface = env[CNI_IFNAME_ENV]
        """
        Name of the interface to create within the container.
        """

        self.cni_path = env[CNI_PATH_ENV]
        """
        Path in which to search for CNI plugins.
        """

        self.running_under_k8s = self.k8s_namespace and self.k8s_pod_name
        if self.running_under_k8s:
            self.workload_id = "%s.%s" % (self.k8s_namespace, self.k8s_pod_name)
            self.orchestrator_id = "k8s"
        else:
            self.workload_id = self.container_id
            self.orchestrator_id = "cni"
        kubernetes_config = network_config.get("kubernetes", {})
        self.kubeconfig_path = kubernetes_config.get("kubeconfig")
        self.k8s_node_name = kubernetes_config.get("node_name", socket.gethostname())
        """
        Configure orchestrator specific settings.
        workload_id: In Kubernetes, this is the pod's namespace and name.
                     Otherwise, this is the container ID.
        orchestrator_id: Either "k8s" or "cni".
        """

        # Ensure that the ipam_env CNI_ARGS contains the IgnoreUnknown=1 option
        # See https://github.com/appc/cni/pull/158
        # And https://github.com/appc/cni/pull/127
        self.ipam_env[CNI_ARGS_ENV] = 'IgnoreUnknown=1'
        if env.get(CNI_ARGS_ENV):
            # Append any existing args - if they are set.
            self.ipam_env[CNI_ARGS_ENV] += ";%s" % env.get(CNI_ARGS_ENV)

        self.policy_driver = get_policy_driver(self)
        """
Exemplo n.º 32
0
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import simplejson as json
from constants import *
from pycalico.datastore import DatastoreClient
from pycalico.datastore_datatypes import Rules, Rule

_log = logging.getLogger("__main__")
client = DatastoreClient()


def add_update_namespace(namespace):
    """
    Configures the necessary policy in Calico for this
    namespace.  Uses the `net.alpha.kubernetes.io/network-isolation`
    annotation.
    """
    namespace_name = namespace["metadata"]["name"]
    _log.debug("Adding/updating namespace: %s", namespace_name)

    # Determine the type of network-isolation specified by this namespace.
    # This defaults to no isolation.
    annotations = namespace["metadata"].get("annotations", {})
    _log.debug("Namespace %s has annotations: %s", namespace_name, annotations)
Exemplo n.º 33
0
class PolicyAgent():
    def __init__(self):
        self._event_queue = Queue.Queue()
        """
        Queue to populate with events from API watches.
        """

        self.k8s_api = os.environ.get("K8S_API", DEFAULT_API)
        """
        Scheme, IP and port of the Kubernetes API.
        """

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN", read_token_file())
        """
        Auth token to use when accessing the API.
        """
        _log.debug("Using auth token: %s", self.auth_token)

        self.ca_crt_exists = os.path.exists(CA_CERT_PATH)
        """
        True if a CA cert has been mounted by Kubernetes.  
        """

        self._client = DatastoreClient()
        """
        Client for accessing the Calico datastore.
        """

        self._handlers = {}
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_ADDED,
                         self._add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_DELETED,
                         self._delete_network_policy)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_ADDED,
                         self._add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_DELETED,
                         self._delete_namespace)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_ADDED, self._add_update_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_DELETED, self._delete_pod)
        """
        Handlers for watch events.
        """

    def add_handler(self, resource_type, event_type, handler):
        """
        Adds an event handler for the given event type (ADD, DELETE) for the 
        given resource type.
        """
        _log.info("Setting %s %s handler: %s", resource_type, event_type,
                  handler)
        key = (resource_type, event_type)
        self._handlers[key] = handler

    def get_handler(self, resource_type, event_type):
        """
        Gets the correct handler.
        """
        key = (resource_type, event_type)
        _log.debug("Looking up handler for event: %s", key)
        return self._handlers[key]

    def run(self):
        """
        PolicyAgent.run() is called at program init to spawn watch threads,
        Loops to read responses from the _watcher Queue as they come in.
        """
        resources = [
            RESOURCE_TYPE_NETWORK_POLICY, RESOURCE_TYPE_NAMESPACE,
            RESOURCE_TYPE_POD
        ]
        for resource_type in resources:
            # Get existing resources from the API.
            _log.info("Getting existing %s objects", resource_type)
            get_url = GET_URLS[resource_type] % self.k8s_api
            resp = self._api_get(get_url, stream=False)
            _log.info("Response: %s", resp)

            if resp.status_code != 200:
                _log.error("Error querying API: %s", resp.json())
                return
            updates = resp.json()["items"]
            metadata = resp.json().get("metadata", {})
            resource_version = metadata.get("resourceVersion")
            _log.debug("%s metadata: %s", resource_type, metadata)

            # Process the existing resources.
            _log.info("%s existing %s(s)", len(updates), resource_type)
            for update in updates:
                _log.debug("Processing existing resource: %s",
                           json.dumps(update, indent=2))
                self._process_update(TYPE_ADDED, resource_type, update)

            # Start watching for updates from the last resourceVersion.
            watch_url = WATCH_URLS[resource_type] % self.k8s_api
            t = Thread(target=self._watch_api,
                       args=(watch_url, resource_version))
            t.daemon = True
            t.start()
            _log.info("Started watch on: %s", resource_type)

        # Loop and read updates from the queue.
        _log.info("Reading from event queue")
        self.read_updates()

    def read_updates(self):
        """
        Reads from the update queue.
        """
        update = None

        while True:
            try:
                # There may be an update already, since we do a blocking get
                # in the `except Queue.Empty` block.  If we have an update,
                # just process it before trying to read from the queue again.
                if not update:
                    _log.info("Non-blocking read from event queue")
                    update = self._event_queue.get(block=False)
                    self._event_queue.task_done()

                # We've recieved an update - process it.
                _log.debug("Read update from queue: %s",
                           json.dumps(update, indent=2))
                self._process_update(update["type"], update["object"]["kind"],
                                     update["object"])
                update = None
            except Queue.Empty:
                _log.info("Queue empty, waiting for updates")
                update = self._event_queue.get(block=True)
            except KeyError:
                # We'll hit this if we fail to parse an invalid update.
                # Set update = None so we don't continue parsing the
                # invalid update.
                _log.exception("Invalid update: %s", update)
                update = None
                time.sleep(10)

    def _process_update(self, event_type, resource_type, resource):
        """
        Takes an event updates our state accordingly.
        """
        _log.info("Processing '%s' for kind '%s'", event_type, resource_type)

        # Determine the key for this object using namespace and name.
        # This is simply used for easy identification in logs, etc.
        name = resource["metadata"]["name"]
        namespace = resource["metadata"].get("namespace")
        key = (namespace, name)

        # Treat "modified" as "added".
        if event_type == TYPE_MODIFIED:
            _log.info("Treating 'MODIFIED' as 'ADDED'")
            event_type = TYPE_ADDED

        # Call the right handler.
        try:
            handler = self.get_handler(resource_type, event_type)
        except KeyError:
            _log.warning("No %s handlers for: %s", event_type, resource_type)
        else:
            _log.debug("Calling handler: %s", handler)
            try:
                handler(key, resource)
            except KeyError:
                _log.exception("Invalid %s: %s", resource_type,
                               json.dumps(resource, indent=2))

    def _add_update_network_policy(self, key, policy):
        """
        Takes a new network policy from the Kubernetes API and 
        creates the corresponding Calico policy configuration.
        """
        _log.info("Adding new network policy: %s", key)

        # Parse this network policy so we can convert it to the appropriate
        # Calico policy.  First, get the selector from the API object.
        k8s_selector = policy["spec"]["podSelector"]
        k8s_selector = k8s_selector or {}

        # Build the appropriate Calico label selector.  This is done using
        # the labels provided in the NetworkPolicy, as well as the
        # NetworkPolicy's namespace.
        namespace = policy["metadata"]["namespace"]
        selectors = [
            "%s == '%s'" % (k, v) for k, v in k8s_selector.iteritems()
        ]
        selectors += ["%s == '%s'" % (K8S_NAMESPACE_LABEL, namespace)]
        selector = " && ".join(selectors)

        # Determine the name for this global policy.
        name = "net_policy-%s" % policy["metadata"]["name"]

        # Build the Calico rules.
        try:
            inbound_rules = self._calculate_inbound_rules(policy)
        except Exception:
            # It is possible bad rules will be passed - we don't want to
            # crash the agent, but we do want to indicate a problem in the
            # logs, so that the policy can be fixed.
            _log.exception("Error parsing policy: %s",
                           json.dumps(policy, indent=2))
            return
        else:
            rules = Rules(id=name,
                          inbound_rules=inbound_rules,
                          outbound_rules=[Rule(action="allow")])

        # Create the network policy using the calculated selector and rules.
        self._client.create_global_policy(NET_POL_GROUP_NAME, name, selector,
                                          rules)
        _log.info("Updated global policy '%s' for NetworkPolicy %s", name, key)

    def _delete_network_policy(self, key, policy):
        """
        Takes a deleted network policy and removes the corresponding
        configuration from the Calico datastore.
        """
        _log.info("Deleting network policy: %s", key)

        # Determine the name for this global policy.
        name = "net_policy-%s" % policy["metadata"]["name"]

        # Delete the corresponding Calico policy
        try:
            self._client.remove_global_policy(NET_POL_GROUP_NAME, name)
        except KeyError:
            _log.info("Unable to find policy '%s' - already deleted", key)

    def _calculate_inbound_rules(self, policy):
        """
        Takes a NetworkPolicy object from the API and returns a list of 
        Calico Rules objects which should be applied on ingress.
        """
        _log.debug("Calculating inbound rules")

        # Store the rules to return.
        rules = []

        # Get this policy's namespace.
        policy_ns = policy["metadata"]["namespace"]

        # Iterate through each inbound rule and create the appropriate
        # rules.
        allow_incomings = policy["spec"].get("ingress") or []
        _log.info("Found %s ingress rules", len(allow_incomings))
        for r in allow_incomings:
            # Determine the destination ports to allow.  If no ports are
            # specified, allow all port / protocol combinations.
            _log.debug("Processing ingress rule: %s", r)
            ports_by_protocol = {}
            for to_port in r.get("ports", []):
                # Keep a dict of ports exposed, keyed by protocol.
                protocol = to_port.get("protocol")
                port = to_port.get("port")
                ports = ports_by_protocol.setdefault(protocol, [])
                if port:
                    _log.debug("Allow to port: %s/%s", protocol, port)
                    ports.append(port)

            # Convert into arguments to be passed to a Rule object.
            to_args = []
            for protocol, ports in ports_by_protocol.iteritems():
                arg = {"protocol": protocol.lower()}
                if ports:
                    arg["dst_ports"] = ports
                to_args.append(arg)

            if not to_args:
                # There are not destination protocols / ports specified.
                # Allow to all protocols and ports.
                to_args = [{}]

            # Determine the from criteria.  If no "from" block is specified,
            # then we should allow from all sources.
            from_args = []
            for from_clause in r.get("from", []):
                # We need to check if the key exists, not just if there is
                # a non-null value.  The presence of the key with a null
                # value means "select all".
                pods_present = "pods" in from_clause
                namespaces_present = "namespaces" in from_clause
                _log.debug("Is 'pods:' present? %s", pods_present)
                _log.debug("Is 'namespaces:' present? %s", namespaces_present)

                if pods_present and namespaces_present:
                    # This is an error case according to the API.
                    msg = "Policy API does not support both 'pods' and " \
                          "'namespaces' selectors."
                    raise PolicyError(msg, policy)
                elif pods_present:
                    # There is a pod selector in this "from" clause.
                    pod_selector = from_clause["pods"] or {}
                    _log.debug("Allow from pods: %s", pod_selector)
                    selectors = [
                        "%s == '%s'" % (k, v)
                        for k, v in pod_selector.iteritems()
                    ]

                    # We can only select on pods in this namespace.
                    selectors.append("%s == %s" %
                                     (K8S_NAMESPACE_LABEL, policy_ns))
                    selector = " && ".join(selectors)

                    # Append the selector to the from args.
                    _log.debug("Allowing pods which match: %s", selector)
                    from_args.append({"src_selector": selector})
                elif namespaces_present:
                    # There is a namespace selector.  Namespace labels are
                    # applied to each pod in the namespace using
                    # the per-namespace profile.  We can select on namespace
                    # labels using the NS_LABEL_KEY_FMT modifier.
                    namespaces = from_clause["namespaces"] or {}
                    _log.debug("Allow from namespaces: %s", namespaces)
                    selectors = ["%s == '%s'" % (NS_LABEL_KEY_FMT % k, v) \
                            for k,v in namespaces.iteritems()]
                    selector = " && ".join(selectors)
                    if selector:
                        # Allow from the selected namespaces.
                        _log.debug("Allowing from namespaces which match: %s",
                                   selector)
                        from_args.append({"src_selector": selector})
                    else:
                        # Allow from all pods in all namespaces.
                        _log.debug("Allowing from all pods in all namespaces")
                        selector = "has(%s)" % K8S_NAMESPACE_LABEL
                        from_args.append({"src_selector": selector})

            if not from_args:
                # There are no match criteria specified.  We should allow
                # from all sources to the given ports.
                from_args = [{}]

            # A rule per-protocol, per-from-clause.
            for to_arg in to_args:
                for from_arg in from_args:
                    # Create a rule by combining a 'from' argument with
                    # the protocol / ports arguments.
                    from_arg.update(to_arg)
                    from_arg.update({"action": "allow"})
                    rules.append(Rule(**from_arg))

        _log.debug("Calculated rules: %s", rules)
        return rules

    def _add_update_namespace(self, key, namespace):
        """
        Configures the necessary policy in Calico for this
        namespace.  Uses the `net.alpha.kubernetes.io/network-isolation` 
        annotation.
        """
        _log.info("Adding/updating namespace: %s", key)

        # Determine the type of network-isolation specified by this namespace.
        # This defaults to no isolation.
        annotations = namespace["metadata"].get("annotations", {})
        _log.debug("Namespace %s has annotations: %s", key, annotations)
        net_isolation = annotations.get(NS_POLICY_ANNOTATION, "no") == "yes"
        _log.info("Namespace %s has network-isolation? %s", key, net_isolation)

        # Determine the profile name to create.
        namespace_name = namespace["metadata"]["name"]
        profile_name = NS_PROFILE_FMT % namespace_name

        # Determine the rules to use.
        outbound_rules = [Rule(action="allow")]
        if net_isolation:
            inbound_rules = [Rule(action="deny")]
        else:
            inbound_rules = [Rule(action="allow")]
        rules = Rules(id=profile_name,
                      inbound_rules=inbound_rules,
                      outbound_rules=outbound_rules)

        # Create the Calico policy to represent this namespace, or
        # update it if it already exists.  Namespace policies select each
        # pod within that namespace.
        self._client.create_profile(profile_name, rules)

        # Assign labels to the profile.  We modify the keys to use
        # a special prefix to indicate that these labels are inherited
        # from the namespace.
        labels = namespace["metadata"].get("labels", {})
        for k, v in labels.iteritems():
            labels[NS_LABEL_KEY_FMT % k] = v
            del labels[k]
        _log.debug("Generated namespace labels: %s", labels)

        # TODO: Actually assign labels to the profile.

        _log.info("Created/updated profile for namespace %s", namespace_name)

    def _delete_namespace(self, key, namespace):
        """
        Takes a deleted namespace and removes the corresponding
        configuration from the Calico datastore.
        """
        _log.info("Deleting namespace: %s", key)

        # Delete the Calico policy which represnets this namespace.
        # We need to make sure that there are no pods running
        # in this namespace first.
        namespace_name = namespace["metadata"]["name"]
        profile_name = NS_PROFILE_FMT % namespace_name
        try:
            self._client.remove_profile(profile_name)
        except KeyError:
            _log.info("Unable to find profile for namespace '%s'", key)

    def _add_update_pod(self, key, pod):
        """
        Takes a new or updated pod from the Kubernetes API and 
        creates the corresponding Calico configuration.
        """
        _log.info("Adding new pod: %s", key)

        # Get the Calico endpoint.  This may or may not have already been
        # created by the CNI plugin.  If it hasn't been created, we need to
        # wait until is has before we can do any meaningful work.
        namespace = pod["metadata"]["namespace"]
        name = pod["metadata"]["name"]
        workload_id = "%s.%s" % (namespace, name)
        try:
            _log.debug("Looking for endpoint that matches workload_id=%s",
                       workload_id)
            endpoint = self._client.get_endpoint(orchestrator_id="cni",
                                                 workload_id=workload_id)
        except KeyError:
            # We don't need to do anything special here, just return.
            # We'll receive another update when the Pod enters running state.
            _log.warn("No endpoint for '%s', wait until running", workload_id)
            return
        except MultipleEndpointsMatch:
            # We should never have multiple endpoints with the same
            # workload_id.  This could theoretically occur if the Calico
            # datastore is out-of-sync with what pods actually exist, but
            # this is an error state and indicates a problem elsewhere.
            _log.error("Multiple Endpoints found matching ID %s", workload_id)
            sys.exit(1)

        # Get Kubernetes labels.
        labels = pod["metadata"].get("labels", {})
        _log.debug("Pod '%s' has labels: %s", key, labels)

        # Add a special label for the Kubernetes namespace.  This is used
        # by selector-based policies to select all pods in a given namespace.
        labels[K8S_NAMESPACE_LABEL] = namespace

        # Set the labels on the endpoint.
        endpoint.labels = labels
        self._client.set_endpoint(endpoint)
        _log.info("Updated labels on pod %s", key)

        # Configure this pod with its namespace profile.
        ns_profile = NS_PROFILE_FMT % namespace
        self._client.set_profiles_on_endpoint([ns_profile],
                                              orchestrator_id="cni",
                                              workload_id=endpoint.workload_id)

    def _delete_pod(self, key, pod):
        """
        We don't need to do anything when a pod is deleted - the CNI plugin
        handles the deletion of the endpoint.
        """
        _log.info("Pod deleted: %s", key)

    def _watch_api(self, path, resource_version=None):
        try:
            self.__watch_api(path, resource_version)
        except Exception:
            _log.exception("Exception watching %s", path)

    def __watch_api(self, path, resource_version=None):
        """
        Work loop for the watch thread.
        """
        _log.info("Starting watch on path: %s", path)
        while True:
            # Attempt to stream API resources.
            try:
                response = self._api_get(path,
                                         stream=True,
                                         resource_version=resource_version)
                _log.info("Watch response for %s: %s", path, response)
            except requests.ConnectionError:
                _log.exception("Error querying path: %s", path)
                time.sleep(10)
                continue

            # Check for successful response.
            if response.status_code != 200:
                _log.error("Error watching path: %s", response.text)
                time.sleep(10)
                continue

            # Success - add resources to the queue for processing.
            for line in response.iter_lines():
                # Filter out keep-alive new lines.
                if line:
                    _log.debug("Adding line to queue: %s", line)
                    self._event_queue.put(json.loads(line))

    def _api_get(self, path, stream, resource_version=None):
        """
        Watch a stream from the API given a resource.
    
        :param resource: The plural resource you would like to watch.
        :return: A stream of json objs e.g. {"type": "MODIFED"|"ADDED"|"DELETED", "object":{...}}
        :rtype stream
        """
        # Append the resource version - this indicates where the
        # watch should start.
        _log.info("Getting API resources '%s' at version '%s'. stream=%s",
                  path, resource_version, stream)
        if resource_version:
            path += "?resourceVersion=%s" % resource_version

        session = requests.Session()
        if self.auth_token:
            session.headers.update(
                {'Authorization': 'Bearer ' + self.auth_token})
        verify = CA_CERT_PATH if self.ca_crt_exists else False
        return session.get(path, verify=verify, stream=stream)
Exemplo n.º 34
0
    def __init__(self, network_config, env):
        self._client = DatastoreClient()
        """
        DatastoreClient for access to the Calico datastore.
        """

        # Parse CNI_ARGS into dictionary so we can extract values.
        cni_args = parse_cni_args(env.get(CNI_ARGS_ENV, ""))

        self.k8s_pod_name = cni_args.get(K8S_POD_NAME)
        """
        Name of Kubernetes pod if running under Kubernetes, else None.
        """

        self.k8s_namespace = cni_args.get(K8S_POD_NAMESPACE)
        """
        Name of Kubernetes namespace if running under Kubernetes, else None.
        """

        self.network_config = network_config
        """
        Network config as provided in the CNI network file passed in
        via stdout.
        """

        self.network_name = network_config["name"]
        """
        Name of the network from the provided network config file.
        """

        self.ipam_type = network_config["ipam"]["type"]
        """
        Type of IPAM to use, e.g calico-ipam.
        """

        self.hostname = network_config.get("hostname", socket.gethostname())
        """
        The hostname to register endpoints under.
        """

        self.container_engine = get_container_engine(self.k8s_pod_name)
        """
        Chooses the correct container engine based on the given configuration.
        """

        self.ipam_env = env
        """
        Environment dictionary used when calling the IPAM plugin.
        """

        self.command = env[CNI_COMMAND_ENV]
        assert self.command in [CNI_CMD_DELETE, CNI_CMD_ADD], \
                "Invalid CNI command %s" % self.command
        """
        The command to execute for this plugin instance. Required.
        One of:
          - CNI_CMD_ADD
          - CNI_CMD_DELETE
        """

        self.container_id = env[CNI_CONTAINERID_ENV]
        """
        The container's ID in the containerizer. Required.
        """

        self.cni_netns = env[CNI_NETNS_ENV]
        """
        Relative path to the network namespace of this container.
        """

        self.interface = env[CNI_IFNAME_ENV]
        """
        Name of the interface to create within the container.
        """

        self.cni_path = env[CNI_PATH_ENV]
        """
        Path in which to search for CNI plugins.
        """

        self.running_under_k8s = self.k8s_namespace and self.k8s_pod_name
        if self.running_under_k8s:
            self.workload_id = "%s.%s" % (self.k8s_namespace,
                                          self.k8s_pod_name)
            self.orchestrator_id = "k8s"
        else:
            self.workload_id = self.container_id
            self.orchestrator_id = "cni"
        kubernetes_config = network_config.get("kubernetes", {})
        self.kubeconfig_path = kubernetes_config.get("kubeconfig")
        self.k8s_node_name = kubernetes_config.get("node_name",
                                                   socket.gethostname())
        """
        Configure orchestrator specific settings.
        workload_id: In Kubernetes, this is the pod's namespace and name.
                     Otherwise, this is the container ID.
        orchestrator_id: Either "k8s" or "cni".
        """

        # Ensure that the ipam_env CNI_ARGS contains the IgnoreUnknown=1 option
        # See https://github.com/appc/cni/pull/158
        # And https://github.com/appc/cni/pull/127
        self.ipam_env[CNI_ARGS_ENV] = 'IgnoreUnknown=1'
        if env.get(CNI_ARGS_ENV):
            # Append any existing args - if they are set.
            self.ipam_env[CNI_ARGS_ENV] += ";%s" % env.get(CNI_ARGS_ENV)

        self.policy_driver = get_policy_driver(self)
        """
Exemplo n.º 35
0
import logging
import simplejson as json
from constants import *
from pycalico.datastore import DatastoreClient
from pycalico.datastore_datatypes import Rules, Rule

_log = logging.getLogger("__main__")
client = DatastoreClient()


def add_update_namespace(namespace):
    """
    Configures the necessary policy in Calico for this
    namespace.  Uses the `net.alpha.kubernetes.io/network-isolation`
    annotation.
    """
    namespace_name = namespace["metadata"]["name"]
    _log.debug("Adding/updating namespace: %s", namespace_name)

    # Determine the type of network-isolation specified by this namespace.
    # This defaults to no isolation.
    annotations = namespace["metadata"].get("annotations", {})
    _log.debug("Namespace %s has annotations: %s", namespace_name, annotations)
    policy_annotation = annotations.get(NS_POLICY_ANNOTATION, "{}")
    try:
        policy_annotation = json.loads(policy_annotation)
    except ValueError, TypeError:
        _log.exception("Failed to parse namespace annotations: %s", annotations)
        return

    # Parsed the annotation - get data.  Might not be a dict, so be careful
Exemplo n.º 36
0
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging
import simplejson as json
from constants import *
from pycalico.datastore import DatastoreClient
from pycalico.datastore_datatypes import Rules, Rule

_log = logging.getLogger("__main__")
client = DatastoreClient()


def add_update_namespace(namespace):
    """
    Configures a Profile for the given Kubernetes namespace.
    """
    namespace_name = namespace["metadata"]["name"]
    _log.debug("Adding/updating namespace: %s", namespace_name)

    # Determine the profile name to create.
    profile_name = NS_PROFILE_FMT % namespace_name

    # Build the rules to use.
    rules = Rules(inbound_rules=[Rule(action="allow")],
                  outbound_rules=[Rule(action="allow")])
Exemplo n.º 37
0
class PolicyAgent(object):
    def __init__(self):
        self._event_queue = Queue.Queue(maxsize=MAX_QUEUE_SIZE)
        """
        Queue to populate with events from API watches.
        """

        self.k8s_api = os.environ.get("K8S_API", DEFAULT_API)
        """
        Scheme, IP and port of the Kubernetes API.
        """

        self.auth_token = os.environ.get("K8S_AUTH_TOKEN", read_token_file())
        """
        Auth token to use when accessing the API.
        """
        _log.debug("Using auth token: %s", self.auth_token)

        self.ca_crt_exists = os.path.exists(CA_CERT_PATH)
        """
        True if a CA cert has been mounted by Kubernetes.
        """

        self._client = DatastoreClient()
        """
        Client for accessing the Calico datastore.
        """

        self._handlers = {}
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_ADDED,
                         add_update_network_policy)
        self.add_handler(RESOURCE_TYPE_NETWORK_POLICY, TYPE_DELETED,
                         delete_network_policy)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_ADDED,
                         add_update_namespace)
        self.add_handler(RESOURCE_TYPE_NAMESPACE, TYPE_DELETED,
                         delete_namespace)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_ADDED,
                         add_update_pod)
        self.add_handler(RESOURCE_TYPE_POD, TYPE_DELETED,
                         delete_pod)
        """
        Handlers for watch events.
        """

    def add_handler(self, resource_type, event_type, handler):
        """
        Adds an event handler for the given event type (ADD, DELETE) for the
        given resource type.

        :param resource_type: The type of resource that this handles.
        :param event_type: The type of event that this handles.
        :param handler: The callable to execute when events are received.
        :return None
        """
        _log.info("Setting %s %s handler: %s",
                  resource_type, event_type, handler)
        key = (resource_type, event_type)
        self._handlers[key] = handler

    def get_handler(self, resource_type, event_type):
        """
        Gets the correct handler.

        :param resource_type: The type of resource that needs handling.
        :param event_type: The type of event that needs handling.
        :return None
        """
        key = (resource_type, event_type)
        _log.debug("Looking up handler for event: %s", key)
        return self._handlers[key]

    def run(self):
        """
        PolicyAgent.run() is called at program init to spawn watch threads,
        Loops to read responses from the Queue as they come in.
        """
        # Ensure the tier exists.
        metadata = {"order": 50}
        self._client.set_policy_tier_metadata(NET_POL_TIER_NAME, metadata)

        # Read initial state from Kubernetes API.
        self.read_initial_state()

        # Loop and read updates from the queue.
        self.read_updates()

    def read_initial_state(self):
        """
        Reads initial state from the API, processes existing resources, and
        kicks off threads to watch the Kubernetes API for changes.
        """
        resources = [RESOURCE_TYPE_NETWORK_POLICY,
                     RESOURCE_TYPE_NAMESPACE,
                     RESOURCE_TYPE_POD]
        for resource_type in resources:
            # Get existing resources from the API.
            _log.info("Getting existing %s objects", resource_type)
            get_url = GET_URLS[resource_type] % self.k8s_api
            resp = self._api_get(get_url, stream=False)
            _log.info("Response: %s", resp)

            # If we hit an error, raise it.  This will kill the agent,
            # which will be re-started by Kubernetes.
            if resp.status_code != 200:
                _log.error("Error querying API: %s", resp.json())
                raise Exception("Failed to query resource: %s" % resource_type)

            # Get the list of existing API objects from the response, as
            # well as the latest resourceVersion.
            resources = resp.json()["items"]
            metadata = resp.json().get("metadata", {})
            resource_version = metadata.get("resourceVersion")
            _log.debug("%s metadata: %s", resource_type, metadata)

            # Add the existing resources to the queue to be processed.
            _log.info("%s existing %s(s)", len(resources), resource_type)
            for resource in resources:
                _log.debug("Queueing update: %s", resource)
                update = (TYPE_ADDED, resource_type, resource)
                self._event_queue.put(update,
                                      block=True,
                                      timeout=QUEUE_PUT_TIMEOUT)

            # Start watching for updates from the last resourceVersion.
            watch_url = WATCH_URLS[resource_type] % self.k8s_api
            t = Thread(target=self._watch_api,
                       args=(watch_url, resource_version))
            t.daemon = True
            t.start()
            _log.info("Started watch on: %s", resource_type)

    def read_updates(self):
        """
        Reads from the update queue.

        An update on the queue must be a tuple of:
          (event_type, resource_type, resource)

        Where:
          - event_type: Either "ADDED", "MODIFIED", "DELETED"
          - resource_type: e.g "Namespace", "Pod", "NetworkPolicy"
          - resource: The parsed json resource from the API matching
                      the given resource_type.
        """
        while True:
            try:
                # Wait for an update on the event queue.
                _log.debug("Reading from event queue")
                update = self._event_queue.get(block=True)
                event_type, resource_type, resource = update
                self._event_queue.task_done()

                # We've recieved an update - process it.
                _log.debug("Read event: %s, %s, %s",
                           event_type,
                           resource_type,
                           json.dumps(resource, indent=2))
                self._process_update(event_type,
                                     resource_type,
                                     resource)
            except KeyError:
                # We'll hit this if we fail to parse an invalid update.
                _log.exception("Invalid update: %s", update)

    def _process_update(self, event_type, resource_type, resource):
        """
        Takes an event updates our state accordingly.
        """
        _log.debug("Processing '%s' for kind '%s'", event_type, resource_type)

        # Determine the key for this object using namespace and name.
        # This is simply used for easy identification in logs, etc.
        name = resource["metadata"]["name"]
        namespace = resource["metadata"].get("namespace")
        key = (namespace, name)

        # Treat "modified" as "added".
        if event_type == TYPE_MODIFIED:
            _log.debug("Treating 'MODIFIED' as 'ADDED'")
            event_type = TYPE_ADDED

        # Call the right handler.
        try:
            handler = self.get_handler(resource_type, event_type)
        except KeyError:
            _log.warning("No %s handlers for: %s",
                         event_type, resource_type)
        else:
            _log.debug("Calling handler: %s", handler)
            try:
                handler(key, resource)
            except KeyError:
                _log.exception("Invalid %s: %s", resource_type,
                               json.dumps(resource, indent=2))

    def _watch_api(self, path, resource_version=None):
        """
        Work loop for the watch thread.
        """
        _log.info("Starting watch on path: %s", path)
        while True:
            # Attempt to stream API resources.
            try:
                response = self._api_get(path,
                                         stream=True,
                                         resource_version=resource_version)
                _log.debug("Watch response for %s: %s", path, response)
            except requests.ConnectionError:
                _log.exception("Error querying path: %s", path)
                time.sleep(10)
                continue

            # Check for successful response.
            if response.status_code != 200:
                _log.error("Error watching path: %s", response.text)
                time.sleep(10)
                continue

            # Success - add resources to the queue for processing.
            for line in response.iter_lines():
                # Filter out keep-alive new lines.
                if line:
                    _log.debug("Read line: %s", line)
                    parsed = json.loads(line)
                    update = (parsed["type"],
                              parsed["object"]["kind"],
                              parsed["object"])
                    self._event_queue.put(update,
                                          block=True,
                                          timeout=QUEUE_PUT_TIMEOUT)

                    # Extract the latest resource version.
                    new_ver = parsed["object"]["metadata"]["resourceVersion"]
                    _log.debug("Update resourceVersion, was: %s, now: %s",
                               resource_version, new_ver)
                    resource_version = new_ver

    def _api_get(self, path, stream, resource_version=None):
        """
        Get or stream from the API, given a resource.

        :param path: The API path to get.
        :param stream: Whether to return a single object or a stream.
        :param resource_version: The resourceVersion at which to
        start the stream.
        :return: A requests Response object
        """
        # Append the resource version - this indicates where the
        # watch should start.
        _log.info("Getting API resources '%s' at version '%s'. stream=%s",
                  path, resource_version, stream)
        if resource_version:
            path += "?resourceVersion=%s" % resource_version

        session = requests.Session()
        if self.auth_token:
            session.headers.update({'Authorization': 'Bearer ' + self.auth_token})
        verify = CA_CERT_PATH if self.ca_crt_exists else False
        return session.get(path, verify=verify, stream=stream)