def _attempt_cleanup(self): our_host_dir = "/".join( [FELIX_STATUS_DIR, self._config.HOSTNAME, "workload"]) try: # Grab all the existing status reports. response = self.client.read(our_host_dir, recursive=True) except EtcdKeyNotFound: _log.info("No endpoint statuses found, nothing to clean up") else: # Mark all statuses we find as dirty. This will result in any # unknown endpoints being cleaned up. for node in response.leaves: combined_id = get_endpoint_id_from_key(node.key) if combined_id: _log.debug( "Endpoint %s removed by resync, marking " "status key for cleanup", combined_id) self._mark_endpoint_dirty(combined_id) elif node.dir: # This leaf is an empty directory, try to clean it up. # This is safe even if another thread is adding keys back # into the directory. _log.debug("Found empty directory %s, cleaning up", node.key) delete_empty_parents(self.client, node.key, our_host_dir)
def clean_up_endpoint_statuses(self, our_endpoints_ids): """ Mark any endpoint status reports for non-existent endpoints for cleanup. :param set our_endpoints_ids: Set of endpoint IDs for endpoints on this host. """ if not self._config.REPORT_ENDPOINT_STATUS: _log.debug("Endpoint status reporting disabled, ignoring.") return our_host_dir = "/".join([FELIX_STATUS_DIR, self._config.HOSTNAME, "workload"]) try: # Grab all the existing status reports. response = self.client.read(our_host_dir, recursive=True) except EtcdKeyNotFound: _log.info("No endpoint statuses found, nothing to clean up") else: for node in response.leaves: combined_id = get_endpoint_id_from_key(node.key) if combined_id and combined_id not in our_endpoints_ids: # We found an endpoint in our status reporting tree that # wasn't in the main tree. Mark it as dirty so the status # reporting thread will clean it up. _log.debug("Endpoint %s removed by resync, marking " "status key for cleanup", combined_id) self._status_reporter.mark_endpoint_dirty(combined_id, async=True) elif node.dir: # This leaf is an empty directory, try to clean it up. # This is safe even if another thread is adding keys back # into the directory. _log.debug("Found empty directory %s, cleaning up", node.key) delete_empty_parents(self.client, node.key, our_host_dir)
def _attempt_cleanup(self): our_host_dir = "/".join([FELIX_STATUS_DIR, self._config.HOSTNAME, "workload"]) try: # Grab all the existing status reports. response = self.client.read(our_host_dir, recursive=True) except EtcdKeyNotFound: _log.info("No endpoint statuses found, nothing to clean up") else: # Mark all statuses we find as dirty. This will result in any # unknown endpoints being cleaned up. for node in response.leaves: combined_id = get_endpoint_id_from_key(node.key) if combined_id: _log.debug("Endpoint %s removed by resync, marking " "status key for cleanup", combined_id) self._mark_endpoint_dirty(combined_id) elif node.dir: # This leaf is an empty directory, try to clean it up. # This is safe even if another thread is adding keys back # into the directory. _log.debug("Found empty directory %s, cleaning up", node.key) delete_empty_parents(self.client, node.key, our_host_dir)
def _on_snapshot_loaded(self, etcd_snapshot_response): """Called whenever a snapshot is loaded from etcd. Updates the driver with the current state. """ LOG.info("Started processing status-reporting snapshot from etcd") endpoints_by_host = collections.defaultdict(set) hosts_with_live_felix = set() # First pass: find all the Felixes that are alive. for etcd_node in etcd_snapshot_response.leaves: key = etcd_node.key felix_hostname = datamodel_v1.hostname_from_status_key(key) if felix_hostname: # Defer to the code for handling an event. hosts_with_live_felix.add(felix_hostname) self._on_status_set(etcd_node, felix_hostname) continue # Second pass: find all the endpoints associated with a live Felix. for etcd_node in etcd_snapshot_response.leaves: key = etcd_node.key endpoint_id = datamodel_v1.get_endpoint_id_from_key(key) if endpoint_id: if endpoint_id.host in hosts_with_live_felix: LOG.debug("Endpoint %s is on a host with a live Felix.", endpoint_id) self._report_status( endpoints_by_host, endpoint_id, etcd_node.value ) else: LOG.debug("Endpoint %s is not on a host with live Felix;" "marking it down.", endpoint_id) self.calico_driver.on_port_status_changed( endpoint_id.host, endpoint_id.endpoint, None, ) continue # Find any removed endpoints. for host, endpoints in self._endpoints_by_host.iteritems(): current_endpoints = endpoints_by_host.get(host, set()) removed_endpoints = endpoints - current_endpoints for endpoint_id in removed_endpoints: LOG.debug("Endpoint %s removed by resync.") self.calico_driver.on_port_status_changed( host, endpoint_id.endpoint, None, ) # Swap in the newly-loaded state. self._endpoints_by_host = endpoints_by_host LOG.info("Finished processing status-reporting snapshot from etcd")
def parse_if_endpoint(config, etcd_node): combined_id = get_endpoint_id_from_key(etcd_node.key) if combined_id: # Got an endpoint. if etcd_node.action == "delete": _log.debug("Found deleted endpoint %s", combined_id) endpoint = None else: endpoint = parse_endpoint(config, combined_id, etcd_node.value) # EndpointId does the interning for us. return combined_id, endpoint return None, None
def _on_ep_set(self, response, hostname, workload, endpoint): """Called when the status key for a particular endpoint is updated. Reports the status to the driver and caches the existence of the endpoint. """ ep_id = datamodel_v1.get_endpoint_id_from_key(response.key) if not ep_id: LOG.error( "Failed to extract endpoint ID from: %s. Ignoring " "update!", response.key) return self._report_status(self._endpoints_by_host, ep_id, response.value)
def _on_ep_set(self, response, hostname, workload, endpoint): """Called when the status key for a particular endpoint is updated. Reports the status to the driver and caches the existence of the endpoint. """ ep_id = datamodel_v1.get_endpoint_id_from_key(response.key) if not ep_id: LOG.error("Failed to extract endpoint ID from: %s. Ignoring " "update!", response.key) return self._report_status(self._endpoints_by_host, ep_id, response.value)
def _on_ep_delete(self, response, hostname, workload, endpoint): """Called when the status key for an endpoint is deleted. This typically means the endpoint has been deleted. Reports the deletion to the driver. """ LOG.debug("Port %s/%s/%s deleted", hostname, workload, endpoint) endpoint_id = datamodel_v1.get_endpoint_id_from_key(response.key) self._endpoints_by_host[hostname].discard(endpoint_id) if not self._endpoints_by_host[hostname]: del self._endpoints_by_host[hostname] self.calico_driver.on_port_status_changed( hostname, endpoint, None, )
def clean_up_endpoint_statuses(self, our_endpoints_ids): """ Mark any endpoint status reports for non-existent endpoints for cleanup. :param set our_endpoints_ids: Set of endpoint IDs for endpoints on this host. """ if not self._config.REPORT_ENDPOINT_STATUS: _log.debug("Endpoint status reporting disabled, ignoring.") return our_host_dir = "/".join( [FELIX_STATUS_DIR, self._config.HOSTNAME, "workload"]) try: # Grab all the existing status reports. response = self.client.read(our_host_dir, recursive=True) except EtcdKeyNotFound: _log.info("No endpoint statuses found, nothing to clean up") else: for node in response.leaves: combined_id = get_endpoint_id_from_key(node.key) if combined_id and combined_id not in our_endpoints_ids: # We found an endpoint in our status reporting tree that # wasn't in the main tree. Mark it as dirty so the status # reporting thread will clean it up. _log.debug( "Endpoint %s removed by resync, marking " "status key for cleanup", combined_id) self._status_reporter.mark_endpoint_dirty(combined_id, async=True) elif node.dir: # This leaf is an empty directory, try to clean it up. # This is safe even if another thread is adding keys back # into the directory. _log.debug("Found empty directory %s, cleaning up", node.key) delete_empty_parents(self.client, node.key, our_host_dir)