def handle_endpointdestroyed(self, message, sock): """ Handles an ENDPOINTDESTROYED message. ENDPOINTDESTROYED is an active notification that an endpoint is going away. """ log.debug("Received endpoint destroy: %s", message.fields) delete_id = message.fields['endpoint_id'] try: endpoint = self.endpoints.pop(delete_id) except KeyError: log.error("Received destroy for absent endpoint %s", delete_id) return # Unsubscribe endpoint. sock = self.sockets[Socket.TYPE_ACL_SUB] sock._zmq.setsockopt(zmq.UNSUBSCRIBE, delete_id.encode('utf-8')) endpoint.remove() # Send a message indicating our success. sock = self.sockets[Socket.TYPE_EP_REP] fields = { "rc": RC_SUCCESS, "message": "", } sock.send(Message(Message.TYPE_EP_RM, fields)) return
def _create_endpoint(self, endpoint_id, mac, interface): """ Creates an endpoint after having been informed about it over the API. Does the state programming required to get future updates for this endpoint, and issues a request for its ACL state. This routine must only be called if the endpoint is not already known to Felix. """ log.debug("Create endpoint %s", endpoint_id) endpoint = Endpoint(endpoint_id, mac, interface, self.iface_prefix) self.endpoints[endpoint_id] = endpoint # Start listening to the subscription for this endpoint. self.sockets[Socket.TYPE_ACL_SUB].subscribe( endpoint_id.encode('utf-8')) # Having subscribed, we can now request ACL state for this endpoint. fields = {'endpoint_id': endpoint_id, 'issued': futils.time_ms()} self.send_request(Message(Message.TYPE_GET_ACL, fields), Socket.TYPE_ACL_REQ) return endpoint
def resync_endpoints(self): """ This function is called to resync all endpoint state, both periodically and during initialisation. """ self.resync_id = str(uuid.uuid4()) self.resync_recd = 0 self.resync_expected = None #*********************************************************************# #* Log the version here, ensuring that we log it periodically (in *# #* case we try to debug with logs that do not cover Felix starting). *# #*********************************************************************# log.info("Do total resync - ID : %s (version: %s)", self.resync_id, pkg_resources.get_distribution('calico')) # Mark all the endpoints as expecting to be resynchronized. for ep in self.endpoints.values(): ep.pending_resync = True # Since we are about to ask for ACLs for all endpoints too, we want to # clear that queue. self.sockets[Socket.TYPE_ACL_REQ].clear_queue() # Send the RESYNCSTATE message. fields = { 'resync_id': self.resync_id, 'issued': futils.time_ms(), 'hostname': self.hostname, } self.send_request(Message(Message.TYPE_RESYNC, fields), Socket.TYPE_EP_REQ)
def resync_endpoints(self): """ This function is called to resync all endpoint state, both periodically and during initialisation. """ self.resync_id = str(uuid.uuid4()) self.resync_recd = 0 self.resync_expected = None log.info("Do total resync - ID : %s" % self.resync_id) # Mark all the endpoints as expecting to be resynchronized. for ep in self.endpoints.values(): ep.pending_resync = True # If we had anything queued up to send, clear the queue - it is # superseded. Since we are about to ask for ACLs for all endpoints too, # we want to clear that queue as well. self.endpoint_queue.clear() self.acl_queue.clear() # Send the RESYNCSTATE message. fields = { 'resync_id': self.resync_id, 'issued': futils.time_ms(), 'hostname': self.hostname, } self.send_request(Message(Message.TYPE_RESYNC, fields), Socket.TYPE_EP_REQ)
def handle_heartbeat(self, message, sock): """ Handles a HEARTBEAT request or response. """ if sock.type == Socket.TYPE_EP_REQ or sock.type == Socket.TYPE_ACL_REQ: log.debug("Received heartbeat response on %s socket", sock.type) else: assert (sock.type == Socket.TYPE_EP_REP) log.debug("Received heartbeat message on EP REP socket") sock.send(Message(Message.TYPE_HEARTBEAT, {})) return
def resync_acls(self): """ Initiates a full ACL resynchronisation procedure. """ # ACL resynchronization involves requesting ACLs for all endpoints # for which we have an ID. self.acl_queue.clear() for endpoint_id, endpoint in self.endpoints.iteritems(): fields = {'endpoint_id': endpoint_id, 'issued': futils.time_ms()} self.send_request(Message(Message.TYPE_GET_ACL, fields), Socket.TYPE_ACL_REQ)
def resync_acls(self): """ Initiates a full ACL resynchronisation procedure. """ # ACL resynchronization involves requesting ACLs for all endpoints # for which we have an ID. That means any queued requests are really # no longer relevant as they are duplicates. self.sockets[Socket.TYPE_ACL_REQ].clear_queue() for endpoint_id, endpoint in self.endpoints.iteritems(): fields = {'endpoint_id': endpoint_id, 'issued': futils.time_ms()} self.send_request(Message(Message.TYPE_GET_ACL, fields), Socket.TYPE_ACL_REQ)
def handle_endpointdestroyed(self, message, sock): """ Handles an ENDPOINTDESTROYED message. ENDPOINTDESTROYED is an active notification that an endpoint is going away. """ log.debug("Received endpoint destroy: %s", message.fields) # Initially assume success. fields = {"rc": RC_SUCCESS, "message": ""} try: # Get the endpoint ID from the message. try: delete_id = message.fields['endpoint_id'] except KeyError: raise InvalidRequest("Missing \"endpoint_id\" field", message.fields) try: # Remove this endpoint from Felix's list of managed # endpoints. endpoint = self.endpoints.pop(delete_id) except KeyError: log.error("Received destroy for absent endpoint %s", delete_id) fields = { "rc": RC_NOTEXIST, "message": "Endpoint %s does not exist" % delete_id, } else: # Unsubscribe from ACL information for this endpoint. self.sockets[Socket.TYPE_ACL_SUB].unsubscribe( delete_id.encode('utf-8')) # Remove programming for this endpoint. endpoint.remove(self.iptables_state) except InvalidRequest as error: fields = { "rc": RC_INVALID, "message": error.message, } log.error( "Got invalid ENDPOINTDESTROYED message : %s, " "request fields %s", error.message, error.fields) # Send the response. sock.send(Message(Message.TYPE_EP_RM, fields))
def handle_endpointupdated(self, message, sock): """ Handles an ENDPOINTUPDATED message. This has very similar logic to ENDPOINTCREATED, but does not actually create new endpoints. """ log.debug("Received endpoint update: %s", message.fields) # Initially assume success. fields = {"rc": RC_SUCCESS, "message": ""} try: # Get the endpoint ID from the message. try: endpoint_id = message.fields['endpoint_id'] except KeyError: raise InvalidRequest("Missing \"endpoint_id\" field", message.fields) try: # Update the endpoint endpoint = self.endpoints[endpoint_id] except KeyError: log.error("Received update for absent endpoint %s", endpoint_id) fields = { "rc": RC_NOTEXIST, "message": "Endpoint %s does not exist" % endpoint_id, } else: # Update the endpoint state; this can fail with InvalidRequest. self._update_endpoint(endpoint, message.fields) except InvalidRequest as error: fields = { "rc": RC_INVALID, "message": error.message, } log.error( "Got invalid ENDPOINTUPDATED message : %s, " "request fields %s", error.message, error.fields) # Send the response. sock.send(Message(Message.TYPE_EP_UP, fields))
def handle_endpointupdated(self, message, sock): """ Handles an ENDPOINTUPDATED message. This has very similar logic to ENDPOINTCREATED, but does not actually create new endpoints. """ log.debug("Received endpoint update: %s", message.fields) # Get the endpoint data from the message. endpoint_id = message.fields['endpoint_id'] try: # Update the endpoint endpoint = self.endpoints[endpoint_id] # Update the endpoint state; this can fail. self._update_endpoint(endpoint, message.fields) fields = { "rc": RC_SUCCESS, "message": "", } except KeyError: log.error("Received update for absent endpoint %s", endpoint_id) fields = { "rc": RC_NOTEXIST, "message": "Endpoint %s does not exist" % endpoint_id, } except InvalidRequest as error: # Invalid request fields. Return an error. fields = { "rc": RC_INVALID, "message": error.value, } # Now we send the response. sock = self.sockets[Socket.TYPE_EP_REP] sock.send(Message(Message.TYPE_EP_UP, fields)) return
def handle_endpointcreated(self, message, sock): """ Handles an ENDPOINTCREATED message. ENDPOINTCREATED can be received in two cases: either as part of a state resynchronization, or to notify Felix of a new endpoint to manage. """ log.debug("Received endpoint create: %s", message.fields) # Default to success fields = {"rc": RC_SUCCESS, "message": ""} try: try: endpoint_id = message.fields['endpoint_id'] except KeyError: raise InvalidRequest("Missing \"endpoint_id\" field", message.fields) try: mac = message.fields['mac'] except KeyError: raise InvalidRequest("Missing \"mac\" field", message.fields) try: resync_id = message.fields['resync_id'] except KeyError: raise InvalidRequest("Missing \"resync_id\" field", message.fields) try: interface = message.fields['interface_name'] except KeyError: raise InvalidRequest("Missing \"interface_name\" field", message.fields) if not interface.startswith(self.iface_prefix): raise InvalidRequest( "Interface \"%s\" does not start with \"%s\"" % (interface, self.iface_prefix), message.fields) endpoint = self.endpoints.get(endpoint_id) if endpoint is not None and resync_id is None: # We know about this endpoint, but not a resync; accept, but log. log.warning( "Received endpoint creation for existing endpoint %s", endpoint_id) elif endpoint is not None and resync_id is not None: # We know about this endpoint, and this is a resync. endpoint.pending_resync = False elif endpoint is None: # New endpoint. endpoint = self._create_endpoint(endpoint_id, mac, interface) # Update the endpoint state; this can fail with Invalid Request. self._update_endpoint(endpoint, message.fields) if resync_id: # This endpoint created was part of a resync. if resync_id == self.resync_id: #*********************************************************# #* It was part of the most recent resync. Increment how *# #* many ENDPOINTCREATED requests we have received, and *# #* if this is the last one expected, complete the *# #* resync. *# #*********************************************************# self.resync_recd += 1 if self.resync_expected is None: # resync_expected not set - resync response pending log.debug( "Received ENDPOINTCREATED number %d for resync " "before resync response", self.resync_recd) else: log.debug( "Received ENDPOINTCREATED for resync, %d out of %d", self.resync_recd, self.resync_expected) if self.resync_recd == self.resync_expected: self.complete_endpoint_resync(True) else: #*********************************************************# #* We just got an ENDPOINTCREATED for the wrong *# #* resync. This can happen (perhaps we restarted during *# #* a resync and are seeing messages from that old *# #* resync). Log it though, since this is very unusual *# #* and strange. *# #*********************************************************# log.warning( "Received ENDPOINTCREATED for %s with invalid " "resync %s (expected %s)", endpoint_id, resync_id, self.resync_id) except InvalidRequest as error: fields = { "rc": RC_INVALID, "message": error.message, } log.error( "Got invalid ENDPOINTCREATED message : %s, " "request fields %s", error.message, error.fields) # Send the response. sock.send(Message(Message.TYPE_EP_CR, fields))
def run(self): """ Executes one iteration of the main agent loop. """ # Issue a poll request on all active sockets. endpoint_resync_needed = False acl_resync_needed = False lPoller = zmq.Poller() for sock in self.sockets.values(): # Easier just to poll all sockets, even if we expect nothing. lPoller.register(sock._zmq, zmq.POLLIN) polled_sockets = dict(lPoller.poll(self.config.EP_RETRY_INT_MS)) # Get all the sockets with activity. active_sockets = (s for s in self.sockets.values() if s._zmq in polled_sockets and polled_sockets[s._zmq] == zmq.POLLIN) # For each active socket, pull the message off and handle it. for sock in active_sockets: message = sock.receive() if message is not None: self.handlers[message.type](message, sock) for sock in self.sockets.values(): #*****************************************************************# #* See if anything else is required on this socket. First, check *# #* whether any have timed out. A timed out socket needs to be *# #* reconnected. Also, whatever API it belongs to needs to be *# #* resynchronised. *# #*****************************************************************# if sock.timed_out(): log.warning("Socket %s timed out", sock.type) sock.close() #*************************************************************# #* If we lost the connection on which we would receive *# #* ENDPOINTCREATED messages, we need to trigger a total *# #* endpoint resync, and similarly for ACLs if we have lost *# #* the connection on which we would receive ACLUPDATE *# #* messages. *# #*************************************************************# if sock.type == Socket.TYPE_EP_REP: endpoint_resync_needed = True elif sock.type == Socket.TYPE_ACL_SUB: acl_resync_needed = True # Flush the message queue. if sock.type == Socket.TYPE_EP_REQ: self.endpoint_queue.clear() elif sock.type == Socket.TYPE_ACL_REQ: self.acl_queue.clear() # Recreate the socket. sock.communicate(self.hostname, self.zmq_context) # If this is the ACL SUB socket, then subscribe for all # endpoints. if sock.type == Socket.TYPE_ACL_SUB: for endpoint_id in self.endpoints: sock._zmq.setsockopt(zmq.SUBSCRIBE, endpoint_id.encode('utf-8')) # If we have any queued messages to send, we should do so. endpoint_socket = self.sockets[Socket.TYPE_EP_REQ] acl_socket = self.sockets[Socket.TYPE_ACL_REQ] if (len(self.endpoint_queue) and not endpoint_socket.request_outstanding): message = self.endpoint_queue.pop() endpoint_socket.send(message) elif (endpoint_socket.keepalive_due() and not endpoint_socket.request_outstanding): endpoint_socket.send(Message(Message.TYPE_HEARTBEAT, {})) if len(self.acl_queue) and not acl_socket.request_outstanding: message = self.acl_queue.pop() acl_socket.send(message) elif (acl_socket.keepalive_due() and not acl_socket.request_outstanding): acl_socket.send(Message(Message.TYPE_HEARTBEAT, {})) # Now, check if we need to resynchronize and do it. if (self.resync_id is None and (futils.time_ms() - self.resync_time > self.config.RESYNC_INT_SEC * 1000)): # Time for a total resync of all endpoints endpoint_resync_needed = True if endpoint_resync_needed: self.resync_endpoints() elif acl_resync_needed: #*****************************************************************# #* Note that an endpoint resync implicitly involves an ACL *# #* resync, so there is no point in triggering one when an *# #* endpoint resync has just started (as opposed to when we are *# #* in the middle of an endpoint resync and just lost our *# #* connection). *# #*****************************************************************# self.resync_acls() #*********************************************************************# #* Finally, retry any endpoints which need retrying. We remove them *# #* from ep_retry if they no longer exist or if the retry succeeds; *# #* the simplest way to do this is to copy the list, clear ep_retry *# #* then add them back if necessary. *# #*********************************************************************# retry_list = list(self.ep_retry) self.ep_retry.clear() for uuid in retry_list: if uuid in self.endpoints: endpoint = self.endpoints[uuid] log.debug("Retry program of %s" % endpoint.suffix) if endpoint.program_endpoint(): # Failed again - put back on list self.ep_retry.add(uuid) else: # Programmed OK, so apply any ACLs we might have. endpoint.update_acls() else: log.debug("No retry programming %s - no longer exists" % uuid)
def handle_endpointcreated(self, message, sock): """ Handles an ENDPOINTCREATED message. ENDPOINTCREATED can be received in two cases: either as part of a state resynchronization, or to notify Felix of a new endpoint to manage. """ log.debug("Received endpoint create: %s", message.fields) # TODO: Ought to firewall missing mandatory fields here. endpoint_id = message.fields['endpoint_id'] mac = message.fields['mac'] resync_id = message.fields.get('resync_id') # First, check whether we know about this endpoint already. If we do, # we should raise a warning log unless we're in the middle of a resync. endpoint = self.endpoints.get(endpoint_id) if endpoint is not None and resync_id is None: log.warning("Received endpoint creation for existing endpoint %s", endpoint_id) elif endpoint is not None and resync_id is not None: endpoint.pending_resync = False elif endpoint is None: endpoint = self._create_endpoint(endpoint_id, mac) try: # Update the endpoint state; this can fail. self._update_endpoint(endpoint, message.fields) fields = { "rc": RC_SUCCESS, "message": "", } except InvalidRequest as error: # Invalid request fields. Return an error. fields = { "rc": RC_INVALID, "message": error.value, } # Now we send the response. sock = self.sockets[Socket.TYPE_EP_REP] sock.send(Message(Message.TYPE_EP_CR, fields)) if resync_id: # This endpoint created was part of a resync. if resync_id == self.resync_id: #*************************************************************# #* It was part of the most recent resync. Increment how *# #* many ENDPOINTCREATED requests we have received, and if *# #* this is the last one expected, complete the resync. *# #*************************************************************# self.resync_recd += 1 if self.resync_expected is None: # resync_expected not set - resync response pending log.debug( "Received ENDPOINTCREATED number %d for resync " "before resync response", self.resync_recd) else: log.debug( "Received ENDPOINTCREATED for resync, %d out of %d", self.resync_recd, self.resync_expected) if self.resync_recd == self.resync_expected: self.complete_endpoint_resync(True) else: #*************************************************************# #* We just got an ENDPOINTCREATED for the wrong resync. This *# #* can happen (perhaps we restarted during a resync and are *# #* seeing messages from that old resync). Log it though, *# #* since this is very unusual and strange. *# #*************************************************************# log.warning( "Received ENDPOINTCREATED for %s with invalid " "resync %s (expected %s)", endpoint_id, resync_id, self.resync_id) return