def __init__(self, config, ip_type, iptables_updater, workload_disp_chains, host_disp_chains, rules_manager, fip_manager, status_reporter): super(EndpointManager, self).__init__(qualifier=ip_type) # Configuration and version to use self.config = config self.ip_type = ip_type self.ip_version = futils.IP_TYPE_TO_VERSION[ip_type] # Peers/utility classes. self.iptables_updater = iptables_updater self.workload_disp_chains = workload_disp_chains self.host_disp_chains = host_disp_chains self.rules_mgr = rules_manager self.status_reporter = status_reporter self.fip_manager = fip_manager # All endpoint dicts that are on this host. self.endpoints_by_id = {} # Dict that maps from interface name ("tap1234") to endpoint ID. self.endpoint_id_by_iface_name = {} # Cache of IPs applied to host endpoints. (I.e. any interfaces that # aren't workload interfaces.) self.host_ep_ips_by_iface = {} # Host interface dicts by ID. We'll resolve these with the IPs above # and inject the (resolved) ones as endpoints. self.host_eps_by_id = {} # Cache of interfaces that we've resolved and injected as endpoints. self.resolved_host_eps = {} # Set of endpoints that are live on this host. I.e. ones that we've # increffed. self.local_endpoint_ids = set() # Index tracking what policy applies to what endpoints. self.policy_index = LabelValueIndex() self.policy_index.on_match_started = self.on_policy_match_started self.policy_index.on_match_stopped = self.on_policy_match_stopped self._label_inherit_idx = LabelInheritanceIndex(self.policy_index) # Tier orders by tier ID. We use this to look up the order when we're # sorting the tiers. self.tier_orders = {} # Cache of the current ordering of tier IDs. self.tier_sequence = [] # And their associated orders. self.profile_orders = {} # Set of profile IDs to apply to each endpoint ID. self.pol_ids_by_ep_id = MultiDict() self.endpoints_with_dirty_policy = set() self._data_model_in_sync = False self._iface_poll_greenlet = gevent.Greenlet(self._interface_poll_loop) self._iface_poll_greenlet.link_exception(self._on_worker_died)
def __init__(self, config, ip_type, iptables_updater, dispatch_chains, rules_manager, fip_manager, status_reporter): super(EndpointManager, self).__init__(qualifier=ip_type) # Configuration and version to use self.config = config self.ip_type = ip_type self.ip_version = futils.IP_TYPE_TO_VERSION[ip_type] # Peers/utility classes. self.iptables_updater = iptables_updater self.dispatch_chains = dispatch_chains self.rules_mgr = rules_manager self.status_reporter = status_reporter self.fip_manager = fip_manager # All endpoint dicts that are on this host. self.endpoints_by_id = {} # Dict that maps from interface name ("tap1234") to endpoint ID. self.endpoint_id_by_iface_name = {} # Set of endpoints that are live on this host. I.e. ones that we've # increffed. self.local_endpoint_ids = set() # Index tracking what policy applies to what endpoints. self.policy_index = LabelValueIndex() self.policy_index.on_match_started = self.on_policy_match_started self.policy_index.on_match_stopped = self.on_policy_match_stopped self._label_inherit_idx = LabelInheritanceIndex(self.policy_index) # Tier orders by tier ID. We use this to look up the order when we're # sorting the tiers. self.tier_orders = {} # Cache of the current ordering of tier IDs. self.tier_sequence = [] # And their associated orders. self.profile_orders = {} # Set of profile IDs to apply to each endpoint ID. self.pol_ids_by_ep_id = MultiDict() self.endpoints_with_dirty_policy = set() self._data_model_in_sync = False
def __init__(self, ip_type, config): """ Manages all the ipsets for tags for either IPv4 or IPv6. :param ip_type: IP type (IPV4 or IPV6) """ super(IpsetManager, self).__init__(qualifier=ip_type) self.ip_type = ip_type self._config = config # State. # Tag IDs indexed by profile IDs self.tags_by_prof_id = {} # EndpointData "structs" indexed by WloadEndpointId. self.endpoint_data_by_ep_id = {} # Main index. Tracks which IPs are currently in each tag. self.tag_membership_index = TagMembershipIndex() # Take copies of the key functions; avoids messy long lines. self._add_mapping = self.tag_membership_index.add_mapping self._remove_mapping = self.tag_membership_index.remove_mapping # Set of WloadEndpointId objects referenced by profile IDs. self.endpoint_ids_by_profile_id = defaultdict(set) # LabelNode index, used to cross-reference endpoint labels against # selectors. self._label_index = LabelValueIndex() self._label_index.on_match_started = self._on_label_match_started self._label_index.on_match_stopped = self._on_label_match_stopped self._label_inherit_idx = LabelInheritanceIndex(self._label_index) # Sets used to defer updates of the label match cache until we're ready # to handle them. self._started_label_matches = set() self._stopped_label_matches = set() # One-way flag set when we know the datamodel is in sync. We can't # rewrite any ipsets before we're in sync or we risk omitting some # values. self._datamodel_in_sync = False
class IpsetManager(ReferenceManager): # Using a larger batch delay here significantly reduces CPU usage when # we're under heavy churn. batch_delay = 0.05 def __init__(self, ip_type, config): """ Manages all the ipsets for tags for either IPv4 or IPv6. :param ip_type: IP type (IPV4 or IPV6) """ super(IpsetManager, self).__init__(qualifier=ip_type) self.ip_type = ip_type self._config = config # State. # Tag IDs indexed by profile IDs self.tags_by_prof_id = {} # EndpointData "structs" indexed by WloadEndpointId. self.endpoint_data_by_ep_id = {} # Main index. Tracks which IPs are currently in each tag. self.tag_membership_index = TagMembershipIndex() # Take copies of the key functions; avoids messy long lines. self._add_mapping = self.tag_membership_index.add_mapping self._remove_mapping = self.tag_membership_index.remove_mapping # Set of WloadEndpointId objects referenced by profile IDs. self.endpoint_ids_by_profile_id = defaultdict(set) # LabelNode index, used to cross-reference endpoint labels against # selectors. self._label_index = LabelValueIndex() self._label_index.on_match_started = self._on_label_match_started self._label_index.on_match_stopped = self._on_label_match_stopped self._label_inherit_idx = LabelInheritanceIndex(self._label_index) # Sets used to defer updates of the label match cache until we're ready # to handle them. self._started_label_matches = set() self._stopped_label_matches = set() # One-way flag set when we know the datamodel is in sync. We can't # rewrite any ipsets before we're in sync or we risk omitting some # values. self._datamodel_in_sync = False def _create(self, tag_id_or_sel): if isinstance(tag_id_or_sel, SelectorExpression): _log.debug("Creating ipset for expression %s", tag_id_or_sel) sel = tag_id_or_sel self._label_index.on_expression_update(sel, sel) ipset_name = futils.uniquely_shorten(sel.unique_id, MAX_NAME_LENGTH) self._process_stopped_label_matches() self._process_started_label_matches() else: _log.debug("Creating ipset for tag %s", tag_id_or_sel) ipset_name = futils.uniquely_shorten(tag_id_or_sel, MAX_NAME_LENGTH) active_ipset = RefCountedIpsetActor( ipset_name, self.ip_type, max_elem=self._config.MAX_IPSET_SIZE ) return active_ipset def _maybe_start(self, obj_id): if self._datamodel_in_sync: _log.debug("Datamodel is in-sync, deferring to superclass.") return super(IpsetManager, self)._maybe_start(obj_id) else: _log.info("Delaying startup of ipset for %s because datamodel is " "not in sync.", obj_id) def _on_object_started(self, tag_id, active_ipset): _log.debug("RefCountedIpsetActor actor for %s started", tag_id) # Fill the ipset in with its members, this will trigger its first # programming, after which it will call us back to tell us it is ready. # We can't use self._dirty_tags to defer this in case the set becomes # unreferenced before _finish_msg_batch() is called. assert self._is_starting_or_live(tag_id) assert self._datamodel_in_sync active_ipset = self.objects_by_id[tag_id] members = self.tag_membership_index.members(tag_id) active_ipset.replace_members(members, async=True) def _update_dirty_active_ipsets(self): """ Updates the members of any live TagIpsets that are dirty. Clears the index of dirty TagIpsets as a side-effect. """ tag_index = self.tag_membership_index ips_added, ips_removed = tag_index.get_and_reset_changes_by_tag() num_updates = 0 for tag_id, removed_ips in ips_removed.iteritems(): if self._is_starting_or_live(tag_id): assert self._datamodel_in_sync active_ipset = self.objects_by_id[tag_id] active_ipset.remove_members(removed_ips, async=True) num_updates += 1 self._maybe_yield() for tag_id, added_ips in ips_added.iteritems(): if self._is_starting_or_live(tag_id): assert self._datamodel_in_sync active_ipset = self.objects_by_id[tag_id] active_ipset.add_members(added_ips, async=True) num_updates += 1 self._maybe_yield() if num_updates > 0: _log.info("Sent %s updates to updated tags", num_updates) @property def nets_key(self): nets = "ipv4_nets" if self.ip_type == IPV4 else "ipv6_nets" return nets @property def expected_ips_key(self): key = ("expected_ipv4_addrs" if self.ip_type == IPV4 else "expected_ipv6_addrs") return key @actor_message() def on_datamodel_in_sync(self): if not self._datamodel_in_sync: _log.info("Datamodel now in sync, uncorking updates to TagIpsets") self._datamodel_in_sync = True self._maybe_start_all() @actor_message() def cleanup(self): """ Clean up left-over ipsets that existed at start-of-day. """ _log.info("Cleaning up left-over ipsets.") all_ipsets = list_ipset_names() # only clean up our own rubbish. pfx = IPSET_PREFIX[self.ip_type] tmppfx = IPSET_TMP_PREFIX[self.ip_type] felix_ipsets = set([n for n in all_ipsets if (n.startswith(pfx) or n.startswith(tmppfx))]) whitelist = set() live_ipsets = self.objects_by_id.itervalues() # stopping_objects_by_id is a dict of sets of RefCountedIpsetActor # objects, chain them together. stopping_ipsets = chain.from_iterable( self.stopping_objects_by_id.itervalues()) for ipset in chain(live_ipsets, stopping_ipsets): # Ask the ipset for all the names it may use and whitelist. whitelist.update(ipset.owned_ipset_names()) _log.debug("Whitelisted ipsets: %s", whitelist) ipsets_to_delete = felix_ipsets - whitelist _log.debug("Deleting ipsets: %s", ipsets_to_delete) # Delete the ipsets before we return. We can't queue these up since # that could conflict if someone increffed one of the ones we're about # to delete. for ipset_name in ipsets_to_delete: try: futils.check_call(["ipset", "destroy", ipset_name]) except FailedSystemCall: _log.exception("Failed to clean up dead ipset %s, will " "retry on next cleanup.", ipset_name) @actor_message() def on_tags_update(self, profile_id, tags): """ Called when the tag list of the given profile has changed or been deleted. Updates the indices and notifies any live RefCountedIpsetActor objects of any changes that affect them. :param str profile_id: Profile ID affected. :param list[str]|NoneType tags: List of tags for the given profile or None if deleted. """ _log.info("Tags for profile %s updated", profile_id) # General approach is to default to the empty list if the new/old # tag list is missing; then add/delete falls out: all the tags will # end up in added_tags/removed_tags. old_tags = set(self.tags_by_prof_id.get(profile_id, [])) new_tags = set(tags or []) # Find the endpoints that use these tags and work out what tags have # been added/removed. endpoint_ids = self.endpoint_ids_by_profile_id.get(profile_id, set()) added_tags = new_tags - old_tags removed_tags = old_tags - new_tags _log.debug("Endpoint IDs with this profile: %s", endpoint_ids) _log.debug("Profile %s added tags: %s", profile_id, added_tags) _log.debug("Profile %s removed tags: %s", profile_id, removed_tags) for endpoint_id in endpoint_ids: endpoint = self.endpoint_data_by_ep_id.get(endpoint_id, EMPTY_ENDPOINT_DATA) ip_addrs = endpoint.ip_addresses for tag_id in removed_tags: for ip in ip_addrs: self._remove_mapping(tag_id, profile_id, endpoint_id, ip) for tag_id in added_tags: for ip in ip_addrs: self._add_mapping(tag_id, profile_id, endpoint_id, ip) if tags is None: _log.info("Tags for profile %s deleted", profile_id) self.tags_by_prof_id.pop(profile_id, None) else: self.tags_by_prof_id[profile_id] = tags @actor_message() def on_prof_labels_set(self, profile_id, labels): _log.debug("Profile labels updated for %s: %s", profile_id, labels) self._label_inherit_idx.on_parent_labels_update(profile_id, labels) # Flush the updates. self._process_stopped_label_matches() self._process_started_label_matches() @actor_message() def on_host_ep_update(self, combined_id, endpoint): """ Update tag/selector memberships and indices with the new interface data dict. :param HostEndpointId combined_id: ID of the host endpoint. :param dict|NoneType endpoint: Either a dict containing interface information or None to indicate deletion. """ # For our purposes, host endpoints are indexed as endpoints. assert isinstance(combined_id, HostEndpointId) self._on_endpoint_or_host_ep_update(combined_id, endpoint) @actor_message() def on_endpoint_update(self, endpoint_id, endpoint): """ Update tag/selector memberships and indices with the new endpoint dict. :param WloadEndpointId endpoint_id: ID of the endpoint. :param dict|NoneType endpoint: Either a dict containing endpoint information or None to indicate deletion. """ assert isinstance(endpoint_id, WloadEndpointId) self._on_endpoint_or_host_ep_update(endpoint_id, endpoint) def _on_endpoint_or_host_ep_update(self, combined_id, data): """ Update tag/selector memberships and indices with the new host ep/endpoint dict. We care about the labels, profiles and IP addresses. For host endpoints, we include the expected_ipvX_addrs in the IP addresses. :param HostEndpointId|WloadEndpointId combined_id: ID of the endpoint. :param dict|NoneType data: Either a dict containing endpoint information or None to indicate deletion. """ endpoint_data = self._endpoint_data_from_dict(combined_id, data) if data and endpoint_data != EMPTY_ENDPOINT_DATA: # This endpoint makes a contribution to the IP addresses, we need # to index its labels. labels = data.get("labels", {}) prof_ids = data.get("profile_ids", []) else: labels = None prof_ids = None # Remove the endpoint from the label index so that we clean up its # old IP addresses. self._label_inherit_idx.on_item_update(combined_id, None, None) self._process_stopped_label_matches() # Now update the main cache of endpoint data. self._on_endpoint_data_update(combined_id, endpoint_data) # And then, if not doing a deletion, add the endpoint back into the # label index. if labels is not None: self._label_inherit_idx.on_item_update(combined_id, labels, prof_ids) self._process_started_label_matches() def _on_label_match_started(self, expr_id, item_id): """Callback from the label index to tell us that a match started.""" _log.debug("SelectorExpression %s now matches %s", expr_id, item_id) self._started_label_matches.add((expr_id, item_id)) def _on_label_match_stopped(self, expr_id, item_id): """Callback from the label index to tell us that a match stopped.""" _log.debug("SelectorExpression %s no longer matches %s", expr_id, item_id) self._stopped_label_matches.add((expr_id, item_id)) def _process_started_label_matches(self): for selector, item_id in self._started_label_matches: ep_data = self.endpoint_data_by_ep_id[item_id] ip_addrs = ep_data.ip_addresses _log.debug("Adding %s to expression %s", ip_addrs, selector) for ip in ip_addrs: self._add_mapping(selector, DUMMY_PROFILE, item_id, ip) self._started_label_matches.clear() def _process_stopped_label_matches(self): for selector, item_id in self._stopped_label_matches: ep_data = self.endpoint_data_by_ep_id[item_id] for ip in ep_data.ip_addresses: self._remove_mapping(selector, DUMMY_PROFILE, item_id, ip) self._stopped_label_matches.clear() def _endpoint_data_from_dict(self, endpoint_id, endpoint_dict): """ Convert the endpoint dict, which may be large, into a struct-like object in order to save occupancy. As an optimization, if the endpoint doesn't contain any data relevant to this manager, returns EMPTY_ENDPOINT_DATA. :param dict|None endpoint_dict: The data model endpoint dict or None. :return: An EndpointData object containing the data. If the input was None, EMPTY_ENDPOINT_DATA is returned. """ if endpoint_dict is not None: profile_ids = endpoint_dict.get("profile_ids", []) nets = endpoint_dict.get(self.nets_key, []) ips = map(futils.net_to_ip, nets) exp_ips = endpoint_dict.get(self.expected_ips_key, []) if ips or exp_ips: # Optimization: only return an object if this endpoint makes # some contribution to the IP addresses. return EndpointData(profile_ids, ips + exp_ips) else: _log.debug("Endpoint makes no contribution, " "treating as missing: %s", endpoint_id) return EMPTY_ENDPOINT_DATA def _on_endpoint_data_update(self, endpoint_id, endpoint_data): """ Update tag memberships and indices with the new EndpointData object. :param EndpointId endpoint_id: ID of the endpoint. :param EndpointData endpoint_data: An EndpointData object EMPTY_ENDPOINT_DATA to indicate deletion (or endpoint being optimized out). """ # Endpoint updates are the most complex to handle because they may # change the profile IDs (and hence the set of tags) as well as the # ip addresses attached to the interface. In addition, the endpoint # may or may not have existed before. # # General approach: force all the possibilities through the same # update loops by defaulting values. For example, if there was no # previous endpoint then we default old_tags to the empty set. Then, # when we calculate removed_tags, we'll get the empty set and the # removal loop will be skipped. old_endpoint = self.endpoint_data_by_ep_id.pop(endpoint_id, EMPTY_ENDPOINT_DATA) old_prof_ids = old_endpoint.profile_ids old_tags = set() for profile_id in old_prof_ids: for tag in self.tags_by_prof_id.get(profile_id, []): old_tags.add((profile_id, tag)) if endpoint_data != EMPTY_ENDPOINT_DATA: # EMPTY_ENDPOINT_DATA represents a deletion (or that the endpoint # has been optimized out earlier in the pipeline). Only store # off real endpoints. _log.debug("Endpoint %s updated", endpoint_id) self.endpoint_data_by_ep_id[endpoint_id] = endpoint_data new_prof_ids = endpoint_data.profile_ids new_tags = set() for profile_id in new_prof_ids: for tag in self.tags_by_prof_id.get(profile_id, []): new_tags.add((profile_id, tag)) if new_prof_ids != old_prof_ids: # Profile ID changed, or an add/delete. the _xxx_profile_index # methods ignore profile_id == None so we'll do the right thing. _log.debug("Profile IDs changed from %s to %s", old_prof_ids, new_prof_ids) self._remove_profile_index(old_prof_ids, endpoint_id) self._add_profile_index(new_prof_ids, endpoint_id) # Since we've defaulted new/old_tags to set() if needed, we can # use set operations to calculate the tag changes. added_tags = new_tags - old_tags unchanged_tags = new_tags & old_tags removed_tags = old_tags - new_tags # These default to set() if there are no IPs. old_ips = old_endpoint.ip_addresses new_ips = endpoint_data.ip_addresses # Add *new* IPs to new tags. On a deletion, added_tags will be empty. # Do this first to avoid marking ipsets as dirty if an endpoint moves # from one profile to another but keeps the same tag. for profile_id, tag in added_tags: for ip in new_ips: self._add_mapping(tag, profile_id, endpoint_id, ip) # Change IPs in unchanged tags. added_ips = new_ips - old_ips removed_ips = old_ips - new_ips for profile_id, tag in unchanged_tags: for ip in removed_ips: self._remove_mapping(tag, profile_id, endpoint_id, ip) for ip in added_ips: self._add_mapping(tag, profile_id, endpoint_id, ip) # Remove *all* *old* IPs from removed tags. For a deletion, only this # loop will fire. for profile_id, tag in removed_tags: for ip in old_ips: self._remove_mapping(tag, profile_id, endpoint_id, ip) def _add_profile_index(self, prof_ids, endpoint_id): """ Notes in the index that an endpoint uses the given profiles. :param set[str] prof_ids: set of profile IDs that the endpoint is in. :param EndpointId endpoint_id: ID of the endpoint """ for prof_id in prof_ids: self.endpoint_ids_by_profile_id[prof_id].add(endpoint_id) def _remove_profile_index(self, prof_ids, endpoint_id): """ Notes in the index that an endpoint no longer uses any of the given profiles. :param set[str] prof_ids: set of profile IDs to remove the endpoint from. :param EndpointId endpoint_id: ID of the endpoint """ for prof_id in prof_ids: endpoints = self.endpoint_ids_by_profile_id[prof_id] endpoints.discard(endpoint_id) if not endpoints: _log.debug("No more endpoints use profile %s", prof_id) del self.endpoint_ids_by_profile_id[prof_id] def _finish_msg_batch(self, batch, results): """ Called after a batch of messages is finished, processes any pending RefCountedIpsetActor member updates. Doing that here allows us to lots of updates into one replace operation. It also avoid wasted effort if tags are flapping. """ super(IpsetManager, self)._finish_msg_batch(batch, results) self._update_dirty_active_ipsets()
class IpsetManager(ReferenceManager): # Using a larger batch delay here significantly reduces CPU usage when # we're under heavy churn. batch_delay = 0.05 def __init__(self, ip_type, config): """ Manages all the ipsets for tags for either IPv4 or IPv6. :param ip_type: IP type (IPV4 or IPV6) """ super(IpsetManager, self).__init__(qualifier=ip_type) self.ip_type = ip_type self._config = config # State. # Tag IDs indexed by profile IDs self.tags_by_prof_id = {} # EndpointData "structs" indexed by WloadEndpointId. self.endpoint_data_by_ep_id = {} # Main index. Tracks which IPs are currently in each tag. self.tag_membership_index = TagMembershipIndex() # Take copies of the key functions; avoids messy long lines. self._add_mapping = self.tag_membership_index.add_mapping self._remove_mapping = self.tag_membership_index.remove_mapping # Set of WloadEndpointId objects referenced by profile IDs. self.endpoint_ids_by_profile_id = defaultdict(set) # LabelNode index, used to cross-reference endpoint labels against # selectors. self._label_index = LabelValueIndex() self._label_index.on_match_started = self._on_label_match_started self._label_index.on_match_stopped = self._on_label_match_stopped self._label_inherit_idx = LabelInheritanceIndex(self._label_index) # Sets used to defer updates of the label match cache until we're ready # to handle them. self._started_label_matches = set() self._stopped_label_matches = set() # One-way flag set when we know the datamodel is in sync. We can't # rewrite any ipsets before we're in sync or we risk omitting some # values. self._datamodel_in_sync = False def _create(self, tag_id_or_sel): if isinstance(tag_id_or_sel, SelectorExpression): _log.debug("Creating ipset for expression %s", tag_id_or_sel) sel = tag_id_or_sel self._label_index.on_expression_update(sel, sel) ipset_name = futils.uniquely_shorten(sel.unique_id, MAX_NAME_LENGTH) self._process_stopped_label_matches() self._process_started_label_matches() else: _log.debug("Creating ipset for tag %s", tag_id_or_sel) ipset_name = futils.uniquely_shorten(tag_id_or_sel, MAX_NAME_LENGTH) active_ipset = RefCountedIpsetActor( ipset_name, self.ip_type, max_elem=self._config.MAX_IPSET_SIZE) return active_ipset def _maybe_start(self, obj_id): if self._datamodel_in_sync: _log.debug("Datamodel is in-sync, deferring to superclass.") return super(IpsetManager, self)._maybe_start(obj_id) else: _log.info( "Delaying startup of ipset for %s because datamodel is " "not in sync.", obj_id) def _on_object_started(self, tag_id, active_ipset): _log.debug("RefCountedIpsetActor actor for %s started", tag_id) # Fill the ipset in with its members, this will trigger its first # programming, after which it will call us back to tell us it is ready. # We can't use self._dirty_tags to defer this in case the set becomes # unreferenced before _finish_msg_batch() is called. assert self._is_starting_or_live(tag_id) assert self._datamodel_in_sync active_ipset = self.objects_by_id[tag_id] members = self.tag_membership_index.members(tag_id) active_ipset.replace_members(members, async=True) def _update_dirty_active_ipsets(self): """ Updates the members of any live TagIpsets that are dirty. Clears the index of dirty TagIpsets as a side-effect. """ tag_index = self.tag_membership_index ips_added, ips_removed = tag_index.get_and_reset_changes_by_tag() num_updates = 0 for tag_id, removed_ips in ips_removed.iteritems(): if self._is_starting_or_live(tag_id): assert self._datamodel_in_sync active_ipset = self.objects_by_id[tag_id] active_ipset.remove_members(removed_ips, async=True) num_updates += 1 self._maybe_yield() for tag_id, added_ips in ips_added.iteritems(): if self._is_starting_or_live(tag_id): assert self._datamodel_in_sync active_ipset = self.objects_by_id[tag_id] active_ipset.add_members(added_ips, async=True) num_updates += 1 self._maybe_yield() if num_updates > 0: _log.info("Sent %s updates to updated tags", num_updates) @property def nets_key(self): nets = "ipv4_nets" if self.ip_type == IPV4 else "ipv6_nets" return nets @property def expected_ips_key(self): key = ("expected_ipv4_addrs" if self.ip_type == IPV4 else "expected_ipv6_addrs") return key @actor_message() def on_datamodel_in_sync(self): if not self._datamodel_in_sync: _log.info("Datamodel now in sync, uncorking updates to TagIpsets") self._datamodel_in_sync = True self._maybe_start_all() @actor_message() def cleanup(self): """ Clean up left-over ipsets that existed at start-of-day. """ _log.info("Cleaning up left-over ipsets.") all_ipsets = list_ipset_names() # only clean up our own rubbish. pfx = IPSET_PREFIX[self.ip_type] tmppfx = IPSET_TMP_PREFIX[self.ip_type] felix_ipsets = set([ n for n in all_ipsets if (n.startswith(pfx) or n.startswith(tmppfx)) ]) whitelist = set() live_ipsets = self.objects_by_id.itervalues() # stopping_objects_by_id is a dict of sets of RefCountedIpsetActor # objects, chain them together. stopping_ipsets = chain.from_iterable( self.stopping_objects_by_id.itervalues()) for ipset in chain(live_ipsets, stopping_ipsets): # Ask the ipset for all the names it may use and whitelist. whitelist.update(ipset.owned_ipset_names()) _log.debug("Whitelisted ipsets: %s", whitelist) ipsets_to_delete = felix_ipsets - whitelist _log.debug("Deleting ipsets: %s", ipsets_to_delete) # Delete the ipsets before we return. We can't queue these up since # that could conflict if someone increffed one of the ones we're about # to delete. for ipset_name in ipsets_to_delete: try: futils.check_call(["ipset", "destroy", ipset_name]) except FailedSystemCall: _log.exception( "Failed to clean up dead ipset %s, will " "retry on next cleanup.", ipset_name) @actor_message() def on_tags_update(self, profile_id, tags): """ Called when the tag list of the given profile has changed or been deleted. Updates the indices and notifies any live RefCountedIpsetActor objects of any changes that affect them. :param str profile_id: Profile ID affected. :param list[str]|NoneType tags: List of tags for the given profile or None if deleted. """ _log.info("Tags for profile %s updated", profile_id) # General approach is to default to the empty list if the new/old # tag list is missing; then add/delete falls out: all the tags will # end up in added_tags/removed_tags. old_tags = set(self.tags_by_prof_id.get(profile_id, [])) new_tags = set(tags or []) # Find the endpoints that use these tags and work out what tags have # been added/removed. endpoint_ids = self.endpoint_ids_by_profile_id.get(profile_id, set()) added_tags = new_tags - old_tags removed_tags = old_tags - new_tags _log.debug("Endpoint IDs with this profile: %s", endpoint_ids) _log.debug("Profile %s added tags: %s", profile_id, added_tags) _log.debug("Profile %s removed tags: %s", profile_id, removed_tags) for endpoint_id in endpoint_ids: endpoint = self.endpoint_data_by_ep_id.get(endpoint_id, EMPTY_ENDPOINT_DATA) ip_addrs = endpoint.ip_addresses for tag_id in removed_tags: for ip in ip_addrs: self._remove_mapping(tag_id, profile_id, endpoint_id, ip) for tag_id in added_tags: for ip in ip_addrs: self._add_mapping(tag_id, profile_id, endpoint_id, ip) if tags is None: _log.info("Tags for profile %s deleted", profile_id) self.tags_by_prof_id.pop(profile_id, None) else: self.tags_by_prof_id[profile_id] = tags @actor_message() def on_prof_labels_set(self, profile_id, labels): _log.debug("Profile labels updated for %s: %s", profile_id, labels) self._label_inherit_idx.on_parent_labels_update(profile_id, labels) # Flush the updates. self._process_stopped_label_matches() self._process_started_label_matches() @actor_message() def on_host_ep_update(self, combined_id, endpoint): """ Update tag/selector memberships and indices with the new interface data dict. :param HostEndpointId combined_id: ID of the host endpoint. :param dict|NoneType endpoint: Either a dict containing interface information or None to indicate deletion. """ # For our purposes, host endpoints are indexed as endpoints. assert isinstance(combined_id, HostEndpointId) self._on_endpoint_or_host_ep_update(combined_id, endpoint) @actor_message() def on_endpoint_update(self, endpoint_id, endpoint): """ Update tag/selector memberships and indices with the new endpoint dict. :param WloadEndpointId endpoint_id: ID of the endpoint. :param dict|NoneType endpoint: Either a dict containing endpoint information or None to indicate deletion. """ assert isinstance(endpoint_id, WloadEndpointId) self._on_endpoint_or_host_ep_update(endpoint_id, endpoint) def _on_endpoint_or_host_ep_update(self, combined_id, data): """ Update tag/selector memberships and indices with the new host ep/endpoint dict. We care about the labels, profiles and IP addresses. For host endpoints, we include the expected_ipvX_addrs in the IP addresses. :param HostEndpointId|WloadEndpointId combined_id: ID of the endpoint. :param dict|NoneType data: Either a dict containing endpoint information or None to indicate deletion. """ endpoint_data = self._endpoint_data_from_dict(combined_id, data) if data and endpoint_data != EMPTY_ENDPOINT_DATA: # This endpoint makes a contribution to the IP addresses, we need # to index its labels. labels = data.get("labels", {}) prof_ids = data.get("profile_ids", []) else: labels = None prof_ids = None # Remove the endpoint from the label index so that we clean up its # old IP addresses. self._label_inherit_idx.on_item_update(combined_id, None, None) self._process_stopped_label_matches() # Now update the main cache of endpoint data. self._on_endpoint_data_update(combined_id, endpoint_data) # And then, if not doing a deletion, add the endpoint back into the # label index. if labels is not None: self._label_inherit_idx.on_item_update(combined_id, labels, prof_ids) self._process_started_label_matches() def _on_label_match_started(self, expr_id, item_id): """Callback from the label index to tell us that a match started.""" _log.debug("SelectorExpression %s now matches %s", expr_id, item_id) self._started_label_matches.add((expr_id, item_id)) def _on_label_match_stopped(self, expr_id, item_id): """Callback from the label index to tell us that a match stopped.""" _log.debug("SelectorExpression %s no longer matches %s", expr_id, item_id) self._stopped_label_matches.add((expr_id, item_id)) def _process_started_label_matches(self): for selector, item_id in self._started_label_matches: ep_data = self.endpoint_data_by_ep_id[item_id] ip_addrs = ep_data.ip_addresses _log.debug("Adding %s to expression %s", ip_addrs, selector) for ip in ip_addrs: self._add_mapping(selector, DUMMY_PROFILE, item_id, ip) self._started_label_matches.clear() def _process_stopped_label_matches(self): for selector, item_id in self._stopped_label_matches: ep_data = self.endpoint_data_by_ep_id[item_id] for ip in ep_data.ip_addresses: self._remove_mapping(selector, DUMMY_PROFILE, item_id, ip) self._stopped_label_matches.clear() def _endpoint_data_from_dict(self, endpoint_id, endpoint_dict): """ Convert the endpoint dict, which may be large, into a struct-like object in order to save occupancy. As an optimization, if the endpoint doesn't contain any data relevant to this manager, returns EMPTY_ENDPOINT_DATA. :param dict|None endpoint_dict: The data model endpoint dict or None. :return: An EndpointData object containing the data. If the input was None, EMPTY_ENDPOINT_DATA is returned. """ if endpoint_dict is not None: profile_ids = endpoint_dict.get("profile_ids", []) nets = endpoint_dict.get(self.nets_key, []) ips = map(futils.net_to_ip, nets) exp_ips = endpoint_dict.get(self.expected_ips_key, []) if ips or exp_ips: # Optimization: only return an object if this endpoint makes # some contribution to the IP addresses. return EndpointData(profile_ids, ips + exp_ips) else: _log.debug( "Endpoint makes no contribution, " "treating as missing: %s", endpoint_id) return EMPTY_ENDPOINT_DATA def _on_endpoint_data_update(self, endpoint_id, endpoint_data): """ Update tag memberships and indices with the new EndpointData object. :param EndpointId endpoint_id: ID of the endpoint. :param EndpointData endpoint_data: An EndpointData object EMPTY_ENDPOINT_DATA to indicate deletion (or endpoint being optimized out). """ # Endpoint updates are the most complex to handle because they may # change the profile IDs (and hence the set of tags) as well as the # ip addresses attached to the interface. In addition, the endpoint # may or may not have existed before. # # General approach: force all the possibilities through the same # update loops by defaulting values. For example, if there was no # previous endpoint then we default old_tags to the empty set. Then, # when we calculate removed_tags, we'll get the empty set and the # removal loop will be skipped. old_endpoint = self.endpoint_data_by_ep_id.pop(endpoint_id, EMPTY_ENDPOINT_DATA) old_prof_ids = old_endpoint.profile_ids old_tags = set() for profile_id in old_prof_ids: for tag in self.tags_by_prof_id.get(profile_id, []): old_tags.add((profile_id, tag)) if endpoint_data != EMPTY_ENDPOINT_DATA: # EMPTY_ENDPOINT_DATA represents a deletion (or that the endpoint # has been optimized out earlier in the pipeline). Only store # off real endpoints. _log.debug("Endpoint %s updated", endpoint_id) self.endpoint_data_by_ep_id[endpoint_id] = endpoint_data new_prof_ids = endpoint_data.profile_ids new_tags = set() for profile_id in new_prof_ids: for tag in self.tags_by_prof_id.get(profile_id, []): new_tags.add((profile_id, tag)) if new_prof_ids != old_prof_ids: # Profile ID changed, or an add/delete. the _xxx_profile_index # methods ignore profile_id == None so we'll do the right thing. _log.debug("Profile IDs changed from %s to %s", old_prof_ids, new_prof_ids) self._remove_profile_index(old_prof_ids, endpoint_id) self._add_profile_index(new_prof_ids, endpoint_id) # Since we've defaulted new/old_tags to set() if needed, we can # use set operations to calculate the tag changes. added_tags = new_tags - old_tags unchanged_tags = new_tags & old_tags removed_tags = old_tags - new_tags # These default to set() if there are no IPs. old_ips = old_endpoint.ip_addresses new_ips = endpoint_data.ip_addresses # Add *new* IPs to new tags. On a deletion, added_tags will be empty. # Do this first to avoid marking ipsets as dirty if an endpoint moves # from one profile to another but keeps the same tag. for profile_id, tag in added_tags: for ip in new_ips: self._add_mapping(tag, profile_id, endpoint_id, ip) # Change IPs in unchanged tags. added_ips = new_ips - old_ips removed_ips = old_ips - new_ips for profile_id, tag in unchanged_tags: for ip in removed_ips: self._remove_mapping(tag, profile_id, endpoint_id, ip) for ip in added_ips: self._add_mapping(tag, profile_id, endpoint_id, ip) # Remove *all* *old* IPs from removed tags. For a deletion, only this # loop will fire. for profile_id, tag in removed_tags: for ip in old_ips: self._remove_mapping(tag, profile_id, endpoint_id, ip) def _add_profile_index(self, prof_ids, endpoint_id): """ Notes in the index that an endpoint uses the given profiles. :param set[str] prof_ids: set of profile IDs that the endpoint is in. :param EndpointId endpoint_id: ID of the endpoint """ for prof_id in prof_ids: self.endpoint_ids_by_profile_id[prof_id].add(endpoint_id) def _remove_profile_index(self, prof_ids, endpoint_id): """ Notes in the index that an endpoint no longer uses any of the given profiles. :param set[str] prof_ids: set of profile IDs to remove the endpoint from. :param EndpointId endpoint_id: ID of the endpoint """ for prof_id in prof_ids: endpoints = self.endpoint_ids_by_profile_id[prof_id] endpoints.discard(endpoint_id) if not endpoints: _log.debug("No more endpoints use profile %s", prof_id) del self.endpoint_ids_by_profile_id[prof_id] def _finish_msg_batch(self, batch, results): """ Called after a batch of messages is finished, processes any pending RefCountedIpsetActor member updates. Doing that here allows us to lots of updates into one replace operation. It also avoid wasted effort if tags are flapping. """ super(IpsetManager, self)._finish_msg_batch(batch, results) self._update_dirty_active_ipsets()
class EndpointManager(ReferenceManager): def __init__(self, config, ip_type, iptables_updater, dispatch_chains, rules_manager, fip_manager, status_reporter): super(EndpointManager, self).__init__(qualifier=ip_type) # Configuration and version to use self.config = config self.ip_type = ip_type self.ip_version = futils.IP_TYPE_TO_VERSION[ip_type] # Peers/utility classes. self.iptables_updater = iptables_updater self.dispatch_chains = dispatch_chains self.rules_mgr = rules_manager self.status_reporter = status_reporter self.fip_manager = fip_manager # All endpoint dicts that are on this host. self.endpoints_by_id = {} # Dict that maps from interface name ("tap1234") to endpoint ID. self.endpoint_id_by_iface_name = {} # Set of endpoints that are live on this host. I.e. ones that we've # increffed. self.local_endpoint_ids = set() # Index tracking what policy applies to what endpoints. self.policy_index = LabelValueIndex() self.policy_index.on_match_started = self.on_policy_match_started self.policy_index.on_match_stopped = self.on_policy_match_stopped self._label_inherit_idx = LabelInheritanceIndex(self.policy_index) # Tier orders by tier ID. We use this to look up the order when we're # sorting the tiers. self.tier_orders = {} # Cache of the current ordering of tier IDs. self.tier_sequence = [] # And their associated orders. self.profile_orders = {} # Set of profile IDs to apply to each endpoint ID. self.pol_ids_by_ep_id = MultiDict() self.endpoints_with_dirty_policy = set() self._data_model_in_sync = False def _create(self, combined_id): """ Overrides ReferenceManager._create() """ return LocalEndpoint(self.config, combined_id, self.ip_type, self.iptables_updater, self.dispatch_chains, self.rules_mgr, self.fip_manager, self.status_reporter) @actor_message() def on_tier_data_update(self, tier, data): """ Message received when the metadata for a policy tier is updated in etcd. :param str tier: The name of the tier. :param dict|NoneType data: The dict or None, for a deletion. """ _log.debug("Data for policy tier %s updated to %s", tier, data) # Currently, the only data we care about is the order. order = None if data is None else data["order"] if self.tier_orders.get(tier) == order: _log.debug("No change, ignoring") return if order is not None: self.tier_orders[tier] = order else: del self.tier_orders[tier] new_tier_sequence = sorted(self.tier_orders.iterkeys(), key=lambda k: (self.tier_orders[k], k)) if self.tier_sequence != new_tier_sequence: _log.info("Sequence of profile tiers changed, refreshing all " "endpoints") self.tier_sequence = new_tier_sequence self.endpoints_with_dirty_policy.update( self.endpoints_by_id.keys() ) self._update_dirty_policy() @actor_message() def on_prof_labels_set(self, profile_id, labels): _log.debug("Profile labels updated for %s: %s", profile_id, labels) # Defer to the label index, which will call us back synchronously # with any match changes. self._label_inherit_idx.on_parent_labels_update(profile_id, labels) # Process any match changes that we've recorded in the callbacks. self._update_dirty_policy() @actor_message() def on_policy_selector_update(self, policy_id, selector_or_none, order_or_none): _log.debug("Policy %s selector updated to %s (%s)", policy_id, selector_or_none, order_or_none) # Defer to the label index, which will call us back synchronously # via on_policy_match_started and on_policy_match_stopped. self.policy_index.on_expression_update(policy_id, selector_or_none) # Before we update the policies, check if the order has changed, # which would mean we need to refresh all endpoints with this policy # too. if order_or_none != self.profile_orders.get(policy_id): if order_or_none is not None: self.profile_orders[policy_id] = order_or_none else: del self.profile_orders[policy_id] self.endpoints_with_dirty_policy.update( self.policy_index.matches_by_expr_id.iter_values(policy_id) ) # Finally, flush any updates to our waiting endpoints. self._update_dirty_policy() def on_policy_match_started(self, expr_id, item_id): """Called by the label index when a new match is started. Records the update but processing is deferred to the next call to self._update_dirty_policy(). """ _log.info("Policy %s now applies to endpoint %s", expr_id, item_id) self.pol_ids_by_ep_id.add(item_id, expr_id) self.endpoints_with_dirty_policy.add(item_id) def on_policy_match_stopped(self, expr_id, item_id): """Called by the label index when a match stops. Records the update but processing is deferred to the next call to self._update_dirty_policy(). """ _log.info("Policy %s no longer applies to endpoint %s", expr_id, item_id) self.pol_ids_by_ep_id.discard(item_id, expr_id) self.endpoints_with_dirty_policy.add(item_id) def _on_object_started(self, endpoint_id, obj): """ Callback from a LocalEndpoint to report that it has started. Overrides ReferenceManager._on_object_started """ ep = self.endpoints_by_id.get(endpoint_id) obj.on_endpoint_update(ep, async=True) self._update_tiered_policy(endpoint_id) @actor_message() def on_datamodel_in_sync(self): if not self._data_model_in_sync: _log.info("%s: First time we've been in-sync with the datamodel," "sending snapshot to DispatchChains and FIPManager.", self) self._data_model_in_sync = True # Tell the dispatch chains about the local endpoints in advance so # that we don't flap the dispatch chain at start-of-day. Note: # the snapshot may contain information that is ahead of the # state that our individual LocalEndpoint actors are sending to the # DispatchChains actor. That is OK! The worst that can happen is # that a LocalEndpoint undoes part of our update and then goes on # to re-apply the update when it catches up to the snapshot. local_ifaces = frozenset(self.endpoint_id_by_iface_name.keys()) self.dispatch_chains.apply_snapshot(local_ifaces, async=True) self._update_dirty_policy() nat_maps = {} for ep_id, ep in self.endpoints_by_id.iteritems(): if ep_id in self.local_endpoint_ids: nat_map = ep.get(nat_key(self.ip_type), None) if nat_map: nat_maps[ep_id] = nat_map self.fip_manager.apply_snapshot(nat_maps, async=True) @actor_message() def on_endpoint_update(self, endpoint_id, endpoint, force_reprogram=False): """ Event to indicate that an endpoint has been updated (including creation or deletion). :param EndpointId endpoint_id: The endpoint ID in question. :param dict[str]|NoneType endpoint: Dictionary of all endpoint data or None if the endpoint is to be deleted. """ if endpoint_id.host != self.config.HOSTNAME: _log.debug("Skipping endpoint %s; not on our host.", endpoint_id) return if self._is_starting_or_live(endpoint_id): # Local endpoint thread is running; tell it of the change. _log.info("Update for live endpoint %s", endpoint_id) self.objects_by_id[endpoint_id].on_endpoint_update( endpoint, force_reprogram=force_reprogram, async=True) old_ep = self.endpoints_by_id.pop(endpoint_id, {}) # Interface name shouldn't change but popping it now is correct for # deletes and we add it back in below on create/modify. old_iface_name = old_ep.get("name") self.endpoint_id_by_iface_name.pop(old_iface_name, None) if endpoint is None: # Deletion. Remove from the list. _log.info("Endpoint %s deleted", endpoint_id) if endpoint_id in self.local_endpoint_ids: self.decref(endpoint_id) self.local_endpoint_ids.remove(endpoint_id) self._label_inherit_idx.on_item_update(endpoint_id, None, None) assert endpoint_id not in self.pol_ids_by_ep_id else: # Creation or modification _log.info("Endpoint %s modified or created", endpoint_id) self.endpoints_by_id[endpoint_id] = endpoint self.endpoint_id_by_iface_name[endpoint["name"]] = endpoint_id if endpoint_id not in self.local_endpoint_ids: # This will trigger _on_object_activated to pass the endpoint # we just saved off to the endpoint. _log.debug("Endpoint wasn't known before, increffing it") self.local_endpoint_ids.add(endpoint_id) self.get_and_incref(endpoint_id) self._label_inherit_idx.on_item_update( endpoint_id, endpoint.get("labels", {}), endpoint.get("profile_ids", []) ) self._update_dirty_policy() @actor_message() def on_interface_update(self, name, iface_up): """ Called when an interface is created or changes state. The interface may be any interface on the host, not necessarily one managed by any endpoint of this server. """ try: endpoint_id = self.endpoint_id_by_iface_name[name] except KeyError: _log.debug("Update on interface %s that we do not care about", name) else: _log.info("Endpoint %s received interface update for %s", endpoint_id, name) if self._is_starting_or_live(endpoint_id): # LocalEndpoint is running, so tell it about the change. ep = self.objects_by_id[endpoint_id] ep.on_interface_update(iface_up, async=True) def _update_dirty_policy(self): if not self._data_model_in_sync: _log.debug("Datamodel not in sync, postponing update to policy") return _log.debug("Endpoints with dirty policy: %s", self.endpoints_with_dirty_policy) while self.endpoints_with_dirty_policy: ep_id = self.endpoints_with_dirty_policy.pop() if self._is_starting_or_live(ep_id): self._update_tiered_policy(ep_id) def _update_tiered_policy(self, ep_id): """ Sends an updated list of tiered policy to an endpoint. Recalculates the list. :param ep_id: ID of the endpoint to send an update to. """ _log.debug("Updating policies for %s from %s", ep_id, self.pol_ids_by_ep_id) # Order the profiles by tier and profile order, using the name of the # tier and profile as a tie-breaker if the orders are the same. profiles = [] for pol_id in self.pol_ids_by_ep_id.iter_values(ep_id): try: tier_order = self.tier_orders[pol_id.tier] except KeyError: _log.warn("Ignoring profile %s because its tier metadata is " "missing.") continue profile_order = self.profile_orders[pol_id] profiles.append((tier_order, pol_id.tier, profile_order, pol_id.policy_id, pol_id)) profiles.sort() # Convert to an ordered dict from tier to list of profiles. pols_by_tier = OrderedDict() for _, tier, _, _, pol_id in profiles: pols_by_tier.setdefault(tier, []).append(pol_id) endpoint = self.objects_by_id[ep_id] endpoint.on_tiered_policy_update(pols_by_tier, async=True)
class EndpointManager(ReferenceManager): def __init__(self, config, ip_type, iptables_updater, workload_disp_chains, host_disp_chains, rules_manager, fip_manager, status_reporter): super(EndpointManager, self).__init__(qualifier=ip_type) # Configuration and version to use self.config = config self.ip_type = ip_type self.ip_version = futils.IP_TYPE_TO_VERSION[ip_type] # Peers/utility classes. self.iptables_updater = iptables_updater self.workload_disp_chains = workload_disp_chains self.host_disp_chains = host_disp_chains self.rules_mgr = rules_manager self.status_reporter = status_reporter self.fip_manager = fip_manager # All endpoint dicts that are on this host. self.endpoints_by_id = {} # Dict that maps from interface name ("tap1234") to endpoint ID. self.endpoint_id_by_iface_name = {} # Cache of IPs applied to host endpoints. (I.e. any interfaces that # aren't workload interfaces.) self.host_ep_ips_by_iface = {} # Host interface dicts by ID. We'll resolve these with the IPs above # and inject the (resolved) ones as endpoints. self.host_eps_by_id = {} # Cache of interfaces that we've resolved and injected as endpoints. self.resolved_host_eps = {} # Set of endpoints that are live on this host. I.e. ones that we've # increffed. self.local_endpoint_ids = set() # Index tracking what policy applies to what endpoints. self.policy_index = LabelValueIndex() self.policy_index.on_match_started = self.on_policy_match_started self.policy_index.on_match_stopped = self.on_policy_match_stopped self._label_inherit_idx = LabelInheritanceIndex(self.policy_index) # Tier orders by tier ID. We use this to look up the order when we're # sorting the tiers. self.tier_orders = {} # Cache of the current ordering of tier IDs. self.tier_sequence = [] # And their associated orders. self.profile_orders = {} # Set of profile IDs to apply to each endpoint ID. self.pol_ids_by_ep_id = MultiDict() self.endpoints_with_dirty_policy = set() self._data_model_in_sync = False self._iface_poll_greenlet = gevent.Greenlet(self._interface_poll_loop) self._iface_poll_greenlet.link_exception(self._on_worker_died) def _on_actor_started(self): _log.info("Endpoint manager started, spawning interface poll worker.") self._iface_poll_greenlet.start() def _create(self, combined_id): """ Overrides ReferenceManager._create() """ if isinstance(combined_id, WloadEndpointId): return WorkloadEndpoint(self.config, combined_id, self.ip_type, self.iptables_updater, self.workload_disp_chains, self.rules_mgr, self.fip_manager, self.status_reporter) elif isinstance(combined_id, ResolvedHostEndpointId): return HostEndpoint(self.config, combined_id, self.ip_type, self.iptables_updater, self.host_disp_chains, self.rules_mgr, self.fip_manager, self.status_reporter) else: raise RuntimeError("Unknown ID type: %s" % combined_id) @actor_message() def on_tier_data_update(self, tier, data): """ Message received when the metadata for a policy tier is updated in etcd. :param str tier: The name of the tier. :param dict|NoneType data: The dict or None, for a deletion. """ _log.debug("Data for policy tier %s updated to %s", tier, data) # Currently, the only data we care about is the order. order = None if data is None else data["order"] if self.tier_orders.get(tier) == order: _log.debug("No change, ignoring") return if order is not None: self.tier_orders[tier] = order else: del self.tier_orders[tier] new_tier_sequence = sorted(self.tier_orders.iterkeys(), key=lambda k: (self.tier_orders[k], k)) if self.tier_sequence != new_tier_sequence: _log.info("Sequence of profile tiers changed, refreshing all " "endpoints") self.tier_sequence = new_tier_sequence self.endpoints_with_dirty_policy.update( self.endpoints_by_id.keys()) self._update_dirty_policy() @actor_message() def on_prof_labels_set(self, profile_id, labels): _log.debug("Profile labels updated for %s: %s", profile_id, labels) # Defer to the label index, which will call us back synchronously # with any match changes. self._label_inherit_idx.on_parent_labels_update(profile_id, labels) # Process any match changes that we've recorded in the callbacks. self._update_dirty_policy() @actor_message() def on_policy_selector_update(self, policy_id, selector_or_none, order_or_none): _log.debug("Policy %s selector updated to %s (%s)", policy_id, selector_or_none, order_or_none) # Defer to the label index, which will call us back synchronously # via on_policy_match_started and on_policy_match_stopped. self.policy_index.on_expression_update(policy_id, selector_or_none) # Before we update the policies, check if the order has changed, # which would mean we need to refresh all endpoints with this policy # too. if order_or_none != self.profile_orders.get(policy_id): if order_or_none is not None: self.profile_orders[policy_id] = order_or_none else: del self.profile_orders[policy_id] self.endpoints_with_dirty_policy.update( self.policy_index.matches_by_expr_id.iter_values(policy_id)) # Finally, flush any updates to our waiting endpoints. self._update_dirty_policy() def on_policy_match_started(self, expr_id, item_id): """Called by the label index when a new match is started. Records the update but processing is deferred to the next call to self._update_dirty_policy(). """ _log.info("Policy %s now applies to endpoint %s", expr_id, item_id) self.pol_ids_by_ep_id.add(item_id, expr_id) self.endpoints_with_dirty_policy.add(item_id) def on_policy_match_stopped(self, expr_id, item_id): """Called by the label index when a match stops. Records the update but processing is deferred to the next call to self._update_dirty_policy(). """ _log.info("Policy %s no longer applies to endpoint %s", expr_id, item_id) self.pol_ids_by_ep_id.discard(item_id, expr_id) self.endpoints_with_dirty_policy.add(item_id) def _on_object_started(self, endpoint_id, obj): """ Callback from a LocalEndpoint to report that it has started. Overrides ReferenceManager._on_object_started """ ep = self.endpoints_by_id.get(endpoint_id) obj.on_endpoint_update(ep, async=True) self._update_tiered_policy(endpoint_id) @actor_message() def on_datamodel_in_sync(self): if not self._data_model_in_sync: _log.info( "%s: First time we've been in-sync with the datamodel," "sending snapshot to DispatchChains and FIPManager.", self) self._data_model_in_sync = True # Tell the dispatch chains about the local endpoints in advance so # that we don't flap the dispatch chain at start-of-day. Note: # the snapshot may contain information that is ahead of the # state that our individual LocalEndpoint actors are sending to the # DispatchChains actor. That is OK! The worst that can happen is # that a LocalEndpoint undoes part of our update and then goes on # to re-apply the update when it catches up to the snapshot. workload_ifaces = set() host_eps = set() for if_name, ep_id in self.endpoint_id_by_iface_name.iteritems(): if isinstance(ep_id, WloadEndpointId): workload_ifaces.add(if_name) else: host_eps.add(if_name) self.workload_disp_chains.apply_snapshot( frozenset(workload_ifaces), async=True) self.host_disp_chains.apply_snapshot(frozenset(host_eps), async=True) self._update_dirty_policy() nat_maps = {} for ep_id, ep in self.endpoints_by_id.iteritems(): if ep_id in self.local_endpoint_ids: nat_map = ep.get(nat_key(self.ip_type), None) if nat_map: nat_maps[ep_id] = nat_map self.fip_manager.apply_snapshot(nat_maps, async=True) @actor_message() def on_host_ep_update(self, combined_id, data): if combined_id.host != self.config.HOSTNAME: _log.debug("Skipping endpoint %s; not on our host.", combined_id) return if data is not None: self.host_eps_by_id[combined_id] = data else: self.host_eps_by_id.pop(combined_id, None) self._resolve_host_eps() @actor_message() def on_endpoint_update(self, endpoint_id, endpoint, force_reprogram=False): """ Event to indicate that an endpoint has been updated (including creation or deletion). :param EndpointId endpoint_id: The endpoint ID in question. :param dict[str]|NoneType endpoint: Dictionary of all endpoint data or None if the endpoint is to be deleted. """ if endpoint_id.host != self.config.HOSTNAME: _log.debug("Skipping endpoint %s; not on our host.", endpoint_id) return old_ep = self.endpoints_by_id.get(endpoint_id, {}) old_iface_name = old_ep.get("name") new_iface_name = (endpoint or {}).get("name") if (old_iface_name is not None and new_iface_name is not None and old_iface_name != new_iface_name): # Special-case: if the interface name of an active endpoint # changes we need to clean up routes and iptables and start from # scratch. Force that through the deletion path so that we don't # introduce any more complexity in LocalEndpoint. _log.info( "Name of interface for endpoint %s changed from %s " "to %s. Forcing a delete/re-add.", endpoint_id, old_iface_name, new_iface_name) self._on_endpoint_update_internal(endpoint_id, None, force_reprogram) self._on_endpoint_update_internal(endpoint_id, endpoint, force_reprogram) def _on_endpoint_update_internal(self, endpoint_id, endpoint, force_reprogram=False): """Handles a single update or deletion of an endpoint. Increfs/decrefs the actor as appropriate and forwards on the update if the endpoint is active. :param EndpointId endpoint_id: The endpoint ID in question. :param dict[str]|NoneType endpoint: Dictionary of all endpoint data or None if the endpoint is to be deleted. """ if self._is_starting_or_live(endpoint_id): # Local endpoint thread is running; tell it of the change. _log.info("Update for live endpoint %s", endpoint_id) self.objects_by_id[endpoint_id].on_endpoint_update( endpoint, force_reprogram=force_reprogram, async=True) old_ep = self.endpoints_by_id.pop(endpoint_id, {}) # Interface name shouldn't change but popping it now is correct for # deletes and we add it back in below on create/modify. old_iface_name = old_ep.get("name") self.endpoint_id_by_iface_name.pop(old_iface_name, None) if endpoint is None: # Deletion. Remove from the list. _log.info("Endpoint %s deleted", endpoint_id) if endpoint_id in self.local_endpoint_ids: self.decref(endpoint_id) self.local_endpoint_ids.remove(endpoint_id) self._label_inherit_idx.on_item_update(endpoint_id, None, None) assert endpoint_id not in self.pol_ids_by_ep_id else: # Creation or modification _log.info("Endpoint %s modified or created", endpoint_id) self.endpoints_by_id[endpoint_id] = endpoint self.endpoint_id_by_iface_name[endpoint["name"]] = endpoint_id if endpoint_id not in self.local_endpoint_ids: # This will trigger _on_object_activated to pass the endpoint # we just saved off to the endpoint. _log.debug("Endpoint wasn't known before, increffing it") self.local_endpoint_ids.add(endpoint_id) self.get_and_incref(endpoint_id) self._label_inherit_idx.on_item_update( endpoint_id, endpoint.get("labels", {}), endpoint.get("profile_ids", [])) self._update_dirty_policy() @actor_message() def on_interface_update(self, name, iface_up): """ Called when an interface is created or changes state. The interface may be any interface on the host, not necessarily one managed by any endpoint of this server. """ try: endpoint_id = self.endpoint_id_by_iface_name[name] except KeyError: _log.debug("Update on interface %s that we do not care about", name) else: _log.info("Endpoint %s received interface update for %s", endpoint_id, name) if self._is_starting_or_live(endpoint_id): # LocalEndpoint is running, so tell it about the change. ep = self.objects_by_id[endpoint_id] ep.on_interface_update(iface_up, async=True) def _interface_poll_loop(self): """Greenlet: Polls host endpoints for changes to their IP addresses. Sends updates to the EndpointManager via the _on_iface_ips_update() message. If polling is disabled, then it reads the interfaces once and then stops. """ known_interfaces = {} while True: known_interfaces = self._poll_interfaces(known_interfaces) if self.config.HOST_IF_POLL_INTERVAL_SECS <= 0: _log.info("Host interface polling disabled, stopping after " "initial read. Further changes to host endpoint " "IPs will be ignored.") break gevent.sleep(self.config.HOST_IF_POLL_INTERVAL_SECS) def _poll_interfaces(self, known_interfaces): """Does a single poll of the host interfaces, looking for IP changes. Sends updates to the EndpointManager via the _on_iface_ips_update() message. This is broken out form the loop above to make it easier to test. :param known_interfaces: :return: """ # We only care about host interfaces, not workload ones. exclude_prefixes = self.config.IFACE_PREFIX # Get the IPs for each interface. ips_by_iface = devices.list_ips_by_iface(self.ip_type) for iface, ips in ips_by_iface.items(): ignore_iface = any( iface.startswith(prefix) for prefix in exclude_prefixes) if ignore_iface: # Ignore non-host interfaces. ips_by_iface.pop(iface) else: # Compare with the set of IPs that were there before. # We pop interfaces that we see so that we can clean up # deletions below. old_ips = known_interfaces.pop(iface, None) if old_ips != ips: _log.debug("IPs of interface %s changed to %s", iface, ips) self._on_iface_ips_update(iface, ips, async=True) # Clean up deletions. Anything left in known_interfaces has # been deleted. for iface, ips in known_interfaces.iteritems(): self._on_iface_ips_update(iface, None, async=True) # Update our cache of known interfaces for the next loop. return ips_by_iface @actor_message() def _on_iface_ips_update(self, iface_name, ip_addrs): """Message sent by _poll_interface_ips when it detects a change. :param iface_name: Name of the interface that has been updated. :param ip_addrs: set of IP addresses, or None if the interface no longer exists (or has no IPs). """ _log.info("Interface %s now has IPs %s", iface_name, ip_addrs) if ip_addrs is not None: self.host_ep_ips_by_iface[iface_name] = ip_addrs else: self.host_ep_ips_by_iface.pop(iface_name, None) # Since changes to IPs can change which host endpoint objects apply to # which interfaces, we need to resolve IPs and host endpoints. self._resolve_host_eps() def _resolve_host_eps(self): """Resolves the host endpoint data we've learned from etcd with IP addresses and interface names learned from the kernel. Host interfaces that have matching IPs get combined with interface name learned from the kernel and updated via on_endpoint_update(). In the case where multiple interfaces have the same IP address, a copy of the host endpoint will be resolved with each interface. """ # Invert the interface name to IP mapping to allow us to do an IP to # interface name lookup. iface_names_by_ip = defaultdict(set) for iface, ips in self.host_ep_ips_by_iface.iteritems(): for ip in ips: iface_names_by_ip[ip].add(iface) # Iterate over the host endpoints, looking for corresponding IPs. resolved_ifaces = {} iface_name_to_id = {} # For repeatability, we sort the endpoint data. We don't care what # the sort order is, only that it's stable so we just use the repr() # of the ID. for combined_id, host_ep in sorted(self.host_eps_by_id.iteritems(), key=lambda h: repr(h[0])): addrs_key = "expected_ipv%s_addrs" % self.ip_version if "name" in host_ep: # This interface has an explicit name in the data so it's # already resolved. resolved_id = combined_id.resolve(host_ep["name"]) resolved_ifaces[resolved_id] = host_ep elif addrs_key in host_ep: # No explicit name, look for an interface with a matching IP. expected_ips = IPSet(host_ep[addrs_key]) for ip, iface_names in sorted(iface_names_by_ip.iteritems()): if ip in expected_ips: # This endpoint matches the IP, loop over the (usually # one) interface with that IP. Sort the names to avoid # non-deterministic behaviour if there are multiple # conflicting matches. _log.debug("Host endpoint %s matches interfaces: %s", combined_id, iface_names) for iface_name in sorted(iface_names): # Check for conflicting matches. prev_match = iface_name_to_id.get(iface_name) if prev_match == combined_id: # Already matched this interface by a different # IP address. continue elif prev_match is not None: # Already matched a different interface. # First match wins. _log.warn( "Interface %s matched with multiple " "entries in datamodel; using %s", iface_name, prev_match) continue else: # Else, this is the first match, record it. iface_name_to_id[iface_name] = combined_id # Got a match. Since it's possible to match # multiple interfaces by IP, we add the interface # name into the ID to disambiguate. resolved_id = combined_id.resolve(iface_name) resolved_data = host_ep.copy() resolved_data["name"] = iface_name resolved_ifaces[resolved_id] = resolved_data # Fire in deletions for interfaces that no longer resolve. for resolved_id in self.resolved_host_eps.keys(): if resolved_id not in resolved_ifaces: _log.debug("%s no longer matches", resolved_id) self.on_endpoint_update(resolved_id, None) # Fire in the updates for the new data. for resolved_id, data in resolved_ifaces.iteritems(): if self.resolved_host_eps.get(resolved_id) != data: _log.debug("Updating data for %s", resolved_id) self.on_endpoint_update(resolved_id, data) # Update the cache so we can calculate deltas next time. self.resolved_host_eps = resolved_ifaces def _update_dirty_policy(self): if not self._data_model_in_sync: _log.debug("Datamodel not in sync, postponing update to policy") return _log.debug("Endpoints with dirty policy: %s", self.endpoints_with_dirty_policy) while self.endpoints_with_dirty_policy: ep_id = self.endpoints_with_dirty_policy.pop() if self._is_starting_or_live(ep_id): self._update_tiered_policy(ep_id) def _update_tiered_policy(self, ep_id): """ Sends an updated list of tiered policy to an endpoint. Recalculates the list. :param ep_id: ID of the endpoint to send an update to. """ _log.debug("Updating policies for %s from %s", ep_id, self.pol_ids_by_ep_id) # Order the profiles by tier and profile order, using the name of the # tier and profile as a tie-breaker if the orders are the same. profiles = [] for pol_id in self.pol_ids_by_ep_id.iter_values(ep_id): try: tier_order = self.tier_orders[pol_id.tier] except KeyError: _log.warn( "Ignoring policy %s because its tier metadata is " "missing.", pol_id) continue profile_order = self.profile_orders[pol_id] profiles.append((tier_order, pol_id.tier, profile_order, pol_id.policy_id, pol_id)) profiles.sort() # Convert to an ordered dict from tier to list of profiles. pols_by_tier = OrderedDict() for _, tier, _, _, pol_id in profiles: pols_by_tier.setdefault(tier, []).append(pol_id) endpoint = self.objects_by_id[ep_id] endpoint.on_tiered_policy_update(pols_by_tier, async=True) def _on_worker_died(self, watch_greenlet): """ Greenlet: spawned by the gevent Hub if our worker thread dies. """ _log.critical("Worker greenlet died: %s; exiting.", watch_greenlet) sys.exit(1)
class EndpointManager(ReferenceManager): def __init__(self, config, ip_type, iptables_updater, workload_disp_chains, host_disp_chains, rules_manager, fip_manager, status_reporter): super(EndpointManager, self).__init__(qualifier=ip_type) # Configuration and version to use self.config = config self.ip_type = ip_type self.ip_version = futils.IP_TYPE_TO_VERSION[ip_type] # Peers/utility classes. self.iptables_updater = iptables_updater self.workload_disp_chains = workload_disp_chains self.host_disp_chains = host_disp_chains self.rules_mgr = rules_manager self.status_reporter = status_reporter self.fip_manager = fip_manager # All endpoint dicts that are on this host. self.endpoints_by_id = {} # Dict that maps from interface name ("tap1234") to endpoint ID. self.endpoint_id_by_iface_name = {} # Cache of IPs applied to host endpoints. (I.e. any interfaces that # aren't workload interfaces.) self.host_ep_ips_by_iface = {} # Host interface dicts by ID. We'll resolve these with the IPs above # and inject the (resolved) ones as endpoints. self.host_eps_by_id = {} # Cache of interfaces that we've resolved and injected as endpoints. self.resolved_host_eps = {} # Set of endpoints that are live on this host. I.e. ones that we've # increffed. self.local_endpoint_ids = set() # Index tracking what policy applies to what endpoints. self.policy_index = LabelValueIndex() self.policy_index.on_match_started = self.on_policy_match_started self.policy_index.on_match_stopped = self.on_policy_match_stopped self._label_inherit_idx = LabelInheritanceIndex(self.policy_index) # Tier orders by tier ID. We use this to look up the order when we're # sorting the tiers. self.tier_orders = {} # Cache of the current ordering of tier IDs. self.tier_sequence = [] # And their associated orders. self.profile_orders = {} # Set of profile IDs to apply to each endpoint ID. self.pol_ids_by_ep_id = MultiDict() self.endpoints_with_dirty_policy = set() self._data_model_in_sync = False self._iface_poll_greenlet = gevent.Greenlet(self._interface_poll_loop) self._iface_poll_greenlet.link_exception(self._on_worker_died) def _on_actor_started(self): _log.info("Endpoint manager started, spawning interface poll worker.") self._iface_poll_greenlet.start() def _create(self, combined_id): """ Overrides ReferenceManager._create() """ if isinstance(combined_id, WloadEndpointId): return WorkloadEndpoint(self.config, combined_id, self.ip_type, self.iptables_updater, self.workload_disp_chains, self.rules_mgr, self.fip_manager, self.status_reporter) elif isinstance(combined_id, ResolvedHostEndpointId): return HostEndpoint(self.config, combined_id, self.ip_type, self.iptables_updater, self.host_disp_chains, self.rules_mgr, self.fip_manager, self.status_reporter) else: raise RuntimeError("Unknown ID type: %s" % combined_id) @actor_message() def on_tier_data_update(self, tier, data): """ Message received when the metadata for a policy tier is updated in etcd. :param str tier: The name of the tier. :param dict|NoneType data: The dict or None, for a deletion. """ _log.debug("Data for policy tier %s updated to %s", tier, data) # Currently, the only data we care about is the order. order = None if data is None else data["order"] if self.tier_orders.get(tier) == order: _log.debug("No change, ignoring") return if order is not None: self.tier_orders[tier] = order else: del self.tier_orders[tier] new_tier_sequence = sorted(self.tier_orders.iterkeys(), key=lambda k: (self.tier_orders[k], k)) if self.tier_sequence != new_tier_sequence: _log.info("Sequence of profile tiers changed, refreshing all " "endpoints") self.tier_sequence = new_tier_sequence self.endpoints_with_dirty_policy.update( self.endpoints_by_id.keys() ) self._update_dirty_policy() @actor_message() def on_prof_labels_set(self, profile_id, labels): _log.debug("Profile labels updated for %s: %s", profile_id, labels) # Defer to the label index, which will call us back synchronously # with any match changes. self._label_inherit_idx.on_parent_labels_update(profile_id, labels) # Process any match changes that we've recorded in the callbacks. self._update_dirty_policy() @actor_message() def on_policy_selector_update(self, policy_id, selector_or_none, order_or_none): _log.debug("Policy %s selector updated to %s (%s)", policy_id, selector_or_none, order_or_none) # Defer to the label index, which will call us back synchronously # via on_policy_match_started and on_policy_match_stopped. self.policy_index.on_expression_update(policy_id, selector_or_none) # Before we update the policies, check if the order has changed, # which would mean we need to refresh all endpoints with this policy # too. if order_or_none != self.profile_orders.get(policy_id): if order_or_none is not None: self.profile_orders[policy_id] = order_or_none else: del self.profile_orders[policy_id] self.endpoints_with_dirty_policy.update( self.policy_index.matches_by_expr_id.iter_values(policy_id) ) # Finally, flush any updates to our waiting endpoints. self._update_dirty_policy() def on_policy_match_started(self, expr_id, item_id): """Called by the label index when a new match is started. Records the update but processing is deferred to the next call to self._update_dirty_policy(). """ _log.info("Policy %s now applies to endpoint %s", expr_id, item_id) self.pol_ids_by_ep_id.add(item_id, expr_id) self.endpoints_with_dirty_policy.add(item_id) def on_policy_match_stopped(self, expr_id, item_id): """Called by the label index when a match stops. Records the update but processing is deferred to the next call to self._update_dirty_policy(). """ _log.info("Policy %s no longer applies to endpoint %s", expr_id, item_id) self.pol_ids_by_ep_id.discard(item_id, expr_id) self.endpoints_with_dirty_policy.add(item_id) def _on_object_started(self, endpoint_id, obj): """ Callback from a LocalEndpoint to report that it has started. Overrides ReferenceManager._on_object_started """ ep = self.endpoints_by_id.get(endpoint_id) obj.on_endpoint_update(ep, async=True) self._update_tiered_policy(endpoint_id) @actor_message() def on_datamodel_in_sync(self): if not self._data_model_in_sync: _log.info("%s: First time we've been in-sync with the datamodel," "sending snapshot to DispatchChains and FIPManager.", self) self._data_model_in_sync = True # Tell the dispatch chains about the local endpoints in advance so # that we don't flap the dispatch chain at start-of-day. Note: # the snapshot may contain information that is ahead of the # state that our individual LocalEndpoint actors are sending to the # DispatchChains actor. That is OK! The worst that can happen is # that a LocalEndpoint undoes part of our update and then goes on # to re-apply the update when it catches up to the snapshot. workload_ifaces = set() host_eps = set() for if_name, ep_id in self.endpoint_id_by_iface_name.iteritems(): if isinstance(ep_id, WloadEndpointId): workload_ifaces.add(if_name) else: host_eps.add(if_name) self.workload_disp_chains.apply_snapshot( frozenset(workload_ifaces), async=True ) self.host_disp_chains.apply_snapshot( frozenset(host_eps), async=True ) self._update_dirty_policy() nat_maps = {} for ep_id, ep in self.endpoints_by_id.iteritems(): if ep_id in self.local_endpoint_ids: nat_map = ep.get(nat_key(self.ip_type), None) if nat_map: nat_maps[ep_id] = nat_map self.fip_manager.apply_snapshot(nat_maps, async=True) @actor_message() def on_host_ep_update(self, combined_id, data): if data is not None: self.host_eps_by_id[combined_id] = data else: self.host_eps_by_id.pop(combined_id, None) self._resolve_host_eps() @actor_message() def on_endpoint_update(self, endpoint_id, endpoint, force_reprogram=False): """ Event to indicate that an endpoint has been updated (including creation or deletion). :param EndpointId endpoint_id: The endpoint ID in question. :param dict[str]|NoneType endpoint: Dictionary of all endpoint data or None if the endpoint is to be deleted. """ if endpoint_id.host != self.config.HOSTNAME: _log.debug("Skipping endpoint %s; not on our host.", endpoint_id) return if self._is_starting_or_live(endpoint_id): # Local endpoint thread is running; tell it of the change. _log.info("Update for live endpoint %s", endpoint_id) self.objects_by_id[endpoint_id].on_endpoint_update( endpoint, force_reprogram=force_reprogram, async=True) old_ep = self.endpoints_by_id.pop(endpoint_id, {}) # Interface name shouldn't change but popping it now is correct for # deletes and we add it back in below on create/modify. old_iface_name = old_ep.get("name") self.endpoint_id_by_iface_name.pop(old_iface_name, None) if endpoint is None: # Deletion. Remove from the list. _log.info("Endpoint %s deleted", endpoint_id) if endpoint_id in self.local_endpoint_ids: self.decref(endpoint_id) self.local_endpoint_ids.remove(endpoint_id) self._label_inherit_idx.on_item_update(endpoint_id, None, None) assert endpoint_id not in self.pol_ids_by_ep_id else: # Creation or modification _log.info("Endpoint %s modified or created", endpoint_id) self.endpoints_by_id[endpoint_id] = endpoint self.endpoint_id_by_iface_name[endpoint["name"]] = endpoint_id if endpoint_id not in self.local_endpoint_ids: # This will trigger _on_object_activated to pass the endpoint # we just saved off to the endpoint. _log.debug("Endpoint wasn't known before, increffing it") self.local_endpoint_ids.add(endpoint_id) self.get_and_incref(endpoint_id) self._label_inherit_idx.on_item_update( endpoint_id, endpoint.get("labels", {}), endpoint.get("profile_ids", []) ) self._update_dirty_policy() @actor_message() def on_interface_update(self, name, iface_up): """ Called when an interface is created or changes state. The interface may be any interface on the host, not necessarily one managed by any endpoint of this server. """ try: endpoint_id = self.endpoint_id_by_iface_name[name] except KeyError: _log.debug("Update on interface %s that we do not care about", name) else: _log.info("Endpoint %s received interface update for %s", endpoint_id, name) if self._is_starting_or_live(endpoint_id): # LocalEndpoint is running, so tell it about the change. ep = self.objects_by_id[endpoint_id] ep.on_interface_update(iface_up, async=True) def _interface_poll_loop(self): """Greenlet: Polls host endpoints for changes to their IP addresses. Sends updates to the EndpointManager via the _on_iface_ips_update() message. If polling is disabled, then it reads the interfaces once and then stops. """ known_interfaces = {} while True: known_interfaces = self._poll_interfaces(known_interfaces) if self.config.HOST_IF_POLL_INTERVAL_SECS <= 0: _log.info("Host interface polling disabled, stopping after " "initial read. Further changes to host endpoint " "IPs will be ignored.") break gevent.sleep(self.config.HOST_IF_POLL_INTERVAL_SECS) def _poll_interfaces(self, known_interfaces): """Does a single poll of the host interfaces, looking for IP changes. Sends updates to the EndpointManager via the _on_iface_ips_update() message. This is broken out form the loop above to make it easier to test. :param known_interfaces: :return: """ # We only care about host interfaces, not workload ones. exclude_prefix = self.config.IFACE_PREFIX # Get the IPs for each interface. ips_by_iface = devices.list_ips_by_iface(self.ip_type) for iface, ips in ips_by_iface.items(): if iface.startswith(exclude_prefix): # Ignore non-host interfaces. ips_by_iface.pop(iface) else: # Compare with the set of IPs that were there before. # We pop interfaces that we see so that we can clean up # deletions below. old_ips = known_interfaces.pop(iface, None) if old_ips != ips: _log.debug("IPs of interface %s changed to %s", iface, ips) self._on_iface_ips_update(iface, ips, async=True) # Clean up deletions. Anything left in known_interfaces has # been deleted. for iface, ips in known_interfaces.iteritems(): self._on_iface_ips_update(iface, None, async=True) # Update our cache of known interfaces for the next loop. return ips_by_iface @actor_message() def _on_iface_ips_update(self, iface_name, ip_addrs): """Message sent by _poll_interface_ips when it detects a change. :param iface_name: Name of the interface that has been updated. :param ip_addrs: set of IP addresses, or None if the interface no longer exists (or has no IPs). """ _log.info("Interface %s now has IPs %s", iface_name, ip_addrs) if ip_addrs is not None: self.host_ep_ips_by_iface[iface_name] = ip_addrs else: self.host_ep_ips_by_iface.pop(iface_name, None) # Since changes to IPs can change which host endpoint objects apply to # which interfaces, we need to resolve IPs and host endpoints. self._resolve_host_eps() def _resolve_host_eps(self): """Resolves the host endpoint data we've learned from etcd with IP addresses and interface names learned from the kernel. Host interfaces that have matching IPs get combined with interface name learned from the kernel and updated via on_endpoint_update(). In the case where multiple interfaces have the same IP address, a copy of the host endpoint will be resolved with each interface. """ # Invert the interface name to IP mapping to allow us to do an IP to # interface name lookup. iface_names_by_ip = defaultdict(set) for iface, ips in self.host_ep_ips_by_iface.iteritems(): for ip in ips: iface_names_by_ip[ip].add(iface) # Iterate over the host endpoints, looking for corresponding IPs. resolved_ifaces = {} iface_name_to_id = {} # For repeatability, we sort the endpoint data. We don't care what # the sort order is, only that it's stable so we just use the repr() # of the ID. for combined_id, host_ep in sorted(self.host_eps_by_id.iteritems(), key=lambda h: repr(h[0])): addrs_key = "expected_ipv%s_addrs" % self.ip_version if "name" in host_ep: # This interface has an explicit name in the data so it's # already resolved. resolved_id = combined_id.resolve(host_ep["name"]) resolved_ifaces[resolved_id] = host_ep elif addrs_key in host_ep: # No explicit name, look for an interface with a matching IP. expected_ips = IPSet(host_ep[addrs_key]) for ip, iface_names in sorted(iface_names_by_ip.iteritems()): if ip in expected_ips: # This endpoint matches the IP, loop over the (usually # one) interface with that IP. Sort the names to avoid # non-deterministic behaviour if there are multiple # conflicting matches. _log.debug("Host endpoint %s matches interfaces: %s", combined_id, iface_names) for iface_name in sorted(iface_names): # Check for conflicting matches. prev_match = iface_name_to_id.get(iface_name) if prev_match == combined_id: # Already matched this interface by a different # IP address. continue elif prev_match is not None: # Already matched a different interface. # First match wins. _log.warn("Interface %s matched with multiple " "entries in datamodel; using %s", iface_name, prev_match) continue else: # Else, this is the first match, record it. iface_name_to_id[iface_name] = combined_id # Got a match. Since it's possible to match # multiple interfaces by IP, we add the interface # name into the ID to disambiguate. resolved_id = combined_id.resolve(iface_name) resolved_data = host_ep.copy() resolved_data["name"] = iface_name resolved_ifaces[resolved_id] = resolved_data # Fire in deletions for interfaces that no longer resolve. for resolved_id in self.resolved_host_eps.keys(): if resolved_id not in resolved_ifaces: _log.debug("%s no longer matches", resolved_id) self.on_endpoint_update(resolved_id, None) # Fire in the updates for the new data. for resolved_id, data in resolved_ifaces.iteritems(): if self.resolved_host_eps.get(resolved_id) != data: _log.debug("Updating data for %s", resolved_id) self.on_endpoint_update(resolved_id, data) # Update the cache so we can calculate deltas next time. self.resolved_host_eps = resolved_ifaces def _update_dirty_policy(self): if not self._data_model_in_sync: _log.debug("Datamodel not in sync, postponing update to policy") return _log.debug("Endpoints with dirty policy: %s", self.endpoints_with_dirty_policy) while self.endpoints_with_dirty_policy: ep_id = self.endpoints_with_dirty_policy.pop() if self._is_starting_or_live(ep_id): self._update_tiered_policy(ep_id) def _update_tiered_policy(self, ep_id): """ Sends an updated list of tiered policy to an endpoint. Recalculates the list. :param ep_id: ID of the endpoint to send an update to. """ _log.debug("Updating policies for %s from %s", ep_id, self.pol_ids_by_ep_id) # Order the profiles by tier and profile order, using the name of the # tier and profile as a tie-breaker if the orders are the same. profiles = [] for pol_id in self.pol_ids_by_ep_id.iter_values(ep_id): try: tier_order = self.tier_orders[pol_id.tier] except KeyError: _log.warn("Ignoring profile %s because its tier metadata is " "missing.") continue profile_order = self.profile_orders[pol_id] profiles.append((tier_order, pol_id.tier, profile_order, pol_id.policy_id, pol_id)) profiles.sort() # Convert to an ordered dict from tier to list of profiles. pols_by_tier = OrderedDict() for _, tier, _, _, pol_id in profiles: pols_by_tier.setdefault(tier, []).append(pol_id) endpoint = self.objects_by_id[ep_id] endpoint.on_tiered_policy_update(pols_by_tier, async=True) def _on_worker_died(self, watch_greenlet): """ Greenlet: spawned by the gevent Hub if our worker thread dies. """ _log.critical("Worker greenlet died: %s; exiting.", watch_greenlet) sys.exit(1)
class EndpointManager(ReferenceManager): def __init__(self, config, ip_type, iptables_updater, dispatch_chains, rules_manager, fip_manager, status_reporter): super(EndpointManager, self).__init__(qualifier=ip_type) # Configuration and version to use self.config = config self.ip_type = ip_type self.ip_version = futils.IP_TYPE_TO_VERSION[ip_type] # Peers/utility classes. self.iptables_updater = iptables_updater self.dispatch_chains = dispatch_chains self.rules_mgr = rules_manager self.status_reporter = status_reporter self.fip_manager = fip_manager # All endpoint dicts that are on this host. self.endpoints_by_id = {} # Dict that maps from interface name ("tap1234") to endpoint ID. self.endpoint_id_by_iface_name = {} # Set of endpoints that are live on this host. I.e. ones that we've # increffed. self.local_endpoint_ids = set() # Index tracking what policy applies to what endpoints. self.policy_index = LabelValueIndex() self.policy_index.on_match_started = self.on_policy_match_started self.policy_index.on_match_stopped = self.on_policy_match_stopped self._label_inherit_idx = LabelInheritanceIndex(self.policy_index) # Tier orders by tier ID. We use this to look up the order when we're # sorting the tiers. self.tier_orders = {} # Cache of the current ordering of tier IDs. self.tier_sequence = [] # And their associated orders. self.profile_orders = {} # Set of profile IDs to apply to each endpoint ID. self.pol_ids_by_ep_id = MultiDict() self.endpoints_with_dirty_policy = set() self._data_model_in_sync = False def _create(self, combined_id): """ Overrides ReferenceManager._create() """ return LocalEndpoint(self.config, combined_id, self.ip_type, self.iptables_updater, self.dispatch_chains, self.rules_mgr, self.fip_manager, self.status_reporter) @actor_message() def on_tier_data_update(self, tier, data): """ Message received when the metadata for a policy tier is updated in etcd. :param str tier: The name of the tier. :param dict|NoneType data: The dict or None, for a deletion. """ _log.debug("Data for policy tier %s updated to %s", tier, data) # Currently, the only data we care about is the order. order = None if data is None else data["order"] if self.tier_orders.get(tier) == order: _log.debug("No change, ignoring") return if order is not None: self.tier_orders[tier] = order else: del self.tier_orders[tier] new_tier_sequence = sorted(self.tier_orders.iterkeys(), key=lambda k: (self.tier_orders[k], k)) if self.tier_sequence != new_tier_sequence: _log.info("Sequence of profile tiers changed, refreshing all " "endpoints") self.tier_sequence = new_tier_sequence self.endpoints_with_dirty_policy.update( self.endpoints_by_id.keys()) self._update_dirty_policy() @actor_message() def on_prof_labels_set(self, profile_id, labels): _log.debug("Profile labels updated for %s: %s", profile_id, labels) # Defer to the label index, which will call us back synchronously # with any match changes. self._label_inherit_idx.on_parent_labels_update(profile_id, labels) # Process any match changes that we've recorded in the callbacks. self._update_dirty_policy() @actor_message() def on_policy_selector_update(self, policy_id, selector_or_none, order_or_none): _log.debug("Policy %s selector updated to %s (%s)", policy_id, selector_or_none, order_or_none) # Defer to the label index, which will call us back synchronously # via on_policy_match_started and on_policy_match_stopped. self.policy_index.on_expression_update(policy_id, selector_or_none) # Before we update the policies, check if the order has changed, # which would mean we need to refresh all endpoints with this policy # too. if order_or_none != self.profile_orders.get(policy_id): if order_or_none is not None: self.profile_orders[policy_id] = order_or_none else: del self.profile_orders[policy_id] self.endpoints_with_dirty_policy.update( self.policy_index.matches_by_expr_id.iter_values(policy_id)) # Finally, flush any updates to our waiting endpoints. self._update_dirty_policy() def on_policy_match_started(self, expr_id, item_id): """Called by the label index when a new match is started. Records the update but processing is deferred to the next call to self._update_dirty_policy(). """ _log.info("Policy %s now applies to endpoint %s", expr_id, item_id) self.pol_ids_by_ep_id.add(item_id, expr_id) self.endpoints_with_dirty_policy.add(item_id) def on_policy_match_stopped(self, expr_id, item_id): """Called by the label index when a match stops. Records the update but processing is deferred to the next call to self._update_dirty_policy(). """ _log.info("Policy %s no longer applies to endpoint %s", expr_id, item_id) self.pol_ids_by_ep_id.discard(item_id, expr_id) self.endpoints_with_dirty_policy.add(item_id) def _on_object_started(self, endpoint_id, obj): """ Callback from a LocalEndpoint to report that it has started. Overrides ReferenceManager._on_object_started """ ep = self.endpoints_by_id.get(endpoint_id) obj.on_endpoint_update(ep, async=True) self._update_tiered_policy(endpoint_id) @actor_message() def on_datamodel_in_sync(self): if not self._data_model_in_sync: _log.info( "%s: First time we've been in-sync with the datamodel," "sending snapshot to DispatchChains and FIPManager.", self) self._data_model_in_sync = True # Tell the dispatch chains about the local endpoints in advance so # that we don't flap the dispatch chain at start-of-day. Note: # the snapshot may contain information that is ahead of the # state that our individual LocalEndpoint actors are sending to the # DispatchChains actor. That is OK! The worst that can happen is # that a LocalEndpoint undoes part of our update and then goes on # to re-apply the update when it catches up to the snapshot. local_ifaces = frozenset(self.endpoint_id_by_iface_name.keys()) self.dispatch_chains.apply_snapshot(local_ifaces, async=True) self._update_dirty_policy() nat_maps = {} for ep_id, ep in self.endpoints_by_id.iteritems(): if ep_id in self.local_endpoint_ids: nat_map = ep.get(nat_key(self.ip_type), None) if nat_map: nat_maps[ep_id] = nat_map self.fip_manager.apply_snapshot(nat_maps, async=True) @actor_message() def on_endpoint_update(self, endpoint_id, endpoint, force_reprogram=False): """ Event to indicate that an endpoint has been updated (including creation or deletion). :param EndpointId endpoint_id: The endpoint ID in question. :param dict[str]|NoneType endpoint: Dictionary of all endpoint data or None if the endpoint is to be deleted. """ if endpoint_id.host != self.config.HOSTNAME: _log.debug("Skipping endpoint %s; not on our host.", endpoint_id) return if self._is_starting_or_live(endpoint_id): # Local endpoint thread is running; tell it of the change. _log.info("Update for live endpoint %s", endpoint_id) self.objects_by_id[endpoint_id].on_endpoint_update( endpoint, force_reprogram=force_reprogram, async=True) old_ep = self.endpoints_by_id.pop(endpoint_id, {}) # Interface name shouldn't change but popping it now is correct for # deletes and we add it back in below on create/modify. old_iface_name = old_ep.get("name") self.endpoint_id_by_iface_name.pop(old_iface_name, None) if endpoint is None: # Deletion. Remove from the list. _log.info("Endpoint %s deleted", endpoint_id) if endpoint_id in self.local_endpoint_ids: self.decref(endpoint_id) self.local_endpoint_ids.remove(endpoint_id) self._label_inherit_idx.on_item_update(endpoint_id, None, None) assert endpoint_id not in self.pol_ids_by_ep_id else: # Creation or modification _log.info("Endpoint %s modified or created", endpoint_id) self.endpoints_by_id[endpoint_id] = endpoint self.endpoint_id_by_iface_name[endpoint["name"]] = endpoint_id if endpoint_id not in self.local_endpoint_ids: # This will trigger _on_object_activated to pass the endpoint # we just saved off to the endpoint. _log.debug("Endpoint wasn't known before, increffing it") self.local_endpoint_ids.add(endpoint_id) self.get_and_incref(endpoint_id) self._label_inherit_idx.on_item_update( endpoint_id, endpoint.get("labels", {}), endpoint.get("profile_ids", [])) self._update_dirty_policy() @actor_message() def on_interface_update(self, name, iface_up): """ Called when an interface is created or changes state. The interface may be any interface on the host, not necessarily one managed by any endpoint of this server. """ try: endpoint_id = self.endpoint_id_by_iface_name[name] except KeyError: _log.debug("Update on interface %s that we do not care about", name) else: _log.info("Endpoint %s received interface update for %s", endpoint_id, name) if self._is_starting_or_live(endpoint_id): # LocalEndpoint is running, so tell it about the change. ep = self.objects_by_id[endpoint_id] ep.on_interface_update(iface_up, async=True) def _update_dirty_policy(self): if not self._data_model_in_sync: _log.debug("Datamodel not in sync, postponing update to policy") return _log.debug("Endpoints with dirty policy: %s", self.endpoints_with_dirty_policy) while self.endpoints_with_dirty_policy: ep_id = self.endpoints_with_dirty_policy.pop() if self._is_starting_or_live(ep_id): self._update_tiered_policy(ep_id) def _update_tiered_policy(self, ep_id): """ Sends an updated list of tiered policy to an endpoint. Recalculates the list. :param ep_id: ID of the endpoint to send an update to. """ _log.debug("Updating policies for %s from %s", ep_id, self.pol_ids_by_ep_id) # Order the profiles by tier and profile order, using the name of the # tier and profile as a tie-breaker if the orders are the same. profiles = [] for pol_id in self.pol_ids_by_ep_id.iter_values(ep_id): try: tier_order = self.tier_orders[pol_id.tier] except KeyError: _log.warn("Ignoring profile %s because its tier metadata is " "missing.") continue profile_order = self.profile_orders[pol_id] profiles.append((tier_order, pol_id.tier, profile_order, pol_id.policy_id, pol_id)) profiles.sort() # Convert to an ordered dict from tier to list of profiles. pols_by_tier = OrderedDict() for _, tier, _, _, pol_id in profiles: pols_by_tier.setdefault(tier, []).append(pol_id) endpoint = self.objects_by_id[ep_id] endpoint.on_tiered_policy_update(pols_by_tier, async=True)