class Peer (object): def __init__ (self, neighbor, reactor): try: self.logger = Logger() # We only to try to connect via TCP once self.once = environment.settings().tcp.once self.bind = True if environment.settings().tcp.bind else False except RuntimeError: self.logger = FakeLogger() self.once = False self.bind = True now = time.time() self.reactor = reactor self.neighbor = neighbor # The next restart neighbor definition self._neighbor = None self.proto = None self.fsm = FSM(self,FSM.IDLE) self.stats = { 'fsm': self.fsm, 'creation': now, 'reset': now, 'complete': 0, } self.generator = None # The peer should restart after a stop self._restart = True # The peer was restarted (to know what kind of open to send for graceful restart) self._restarted = FORCE_GRACEFUL # We want to remove routes which are not in the configuration anymore after a signal to reload self._reconfigure = True # We want to send all the known routes self._resend_routes = SEND.DONE # We have been asked to teardown the session with this code self._teardown = None self._delay = Delay() self.recv_timer = None def id (self): return 'peer-%s' % self.neighbor.uid def _reset (self, message='',error=''): self.fsm.change(FSM.IDLE) self.stats = { 'fsm': self.fsm, 'creation': self.stats['creation'], 'reset': time.time(), 'complete': 0, } if self.proto: try: message = u"peer reset, message [{0}] error[{1}]".format(message, error) except UnicodeDecodeError as msg_err: message = u"peer reset, message [{0}] error[{1}]".format(message, msg_err) self.proto.close(message) self._delay.increase() self.proto = None if not self._restart or self.neighbor.generated: self.generator = False return self.generator = None self._teardown = None self.neighbor.rib.reset() # If we are restarting, and the neighbor definition is different, update the neighbor if self._neighbor: self.neighbor = self._neighbor self._neighbor = None def _stop (self, message): self.generator = None if self.proto: self.proto.close('stop, message [%s]' % message) self.proto = None # logging def me (self, message): return "peer %s ASN %-7s %s" % (self.neighbor.peer_address,self.neighbor.peer_as,message) # control def stop (self): self._teardown = 3 self._restart = False self._restarted = False self._delay.reset() self.fsm.change(FSM.IDLE) self.stats = { 'fsm': self.fsm, 'creation': self.stats['creation'], 'reset': time.time(), } self.neighbor.rib.uncache() def remove (self): self._stop("removed") self.stop() def shutdown (self): self._stop("shutting down") self.stop() def resend (self): self._resend_routes = SEND.NORMAL self._delay.reset() def reestablish (self, restart_neighbor=None): # we want to tear down the session and re-establish it self._teardown = 3 self._restart = True self._restarted = True self._resend_routes = SEND.NORMAL self._neighbor = restart_neighbor self._delay.reset() def reconfigure (self, restart_neighbor=None): # we want to update the route which were in the configuration file self._reconfigure = True self._neighbor = restart_neighbor self._resend_routes = SEND.NORMAL self._neighbor = restart_neighbor def teardown (self, code, restart=True): self._restart = restart self._teardown = code self._delay.reset() # sockets we must monitor def sockets (self): if self.proto: fd = self.proto.fd() if fd: return [fd] return [] def handle_connection (self, connection): self.logger.debug("state machine for the peer is %s" % self.fsm.name(), self.id()) # if the other side fails, we go back to idle if self.fsm == FSM.ESTABLISHED: self.logger.debug('we already have a peer in state established for %s' % connection.name(),self.id()) return connection.notification(6,7,'could not accept the connection, already established') # 6.8 The convention is to compare the BGP Identifiers of the peers # involved in the collision and to retain only the connection initiated # by the BGP speaker with the higher-valued BGP Identifier. # FSM.IDLE , FSM.ACTIVE , FSM.CONNECT , FSM.OPENSENT , FSM.OPENCONFIRM , FSM.ESTABLISHED if self.fsm == FSM.OPENCONFIRM: # We cheat: we are not really reading the OPEN, we use the data we have instead # it does not matter as the open message will be the same anyway local_id = self.neighbor.router_id.pack() remote_id = self.proto.negotiated.received_open.router_id.pack() if remote_id < local_id: self.logger.debug('closing incoming connection as we have an outgoing connection with higher router-id for %s' % connection.name(),self.id()) return connection.notification(6,7,'could not accept the connection, as another connection is already in open-confirm and will go through') # accept the connection if self.proto: self.logger.debug('closing outgoing connection as we have another incoming on with higher router-id for %s' % connection.name(),self.id()) self.proto.close('closing outgoing connection as we have another incoming on with higher router-id') self.proto = Protocol(self).accept(connection) self.generator = None # Let's make sure we do some work with this connection self._delay.reset() return None def established (self): return self.fsm == FSM.ESTABLISHED def negotiated_families(self): if self.proto: families = ["%s/%s" % (x[0], x[1]) for x in self.proto.negotiated.families] else: families = ["%s/%s" % (x[0], x[1]) for x in self.neighbor.families()] if len(families) > 1: return "[ %s ]" % " ".join(families) elif len(families) == 1: return families[0] return '' def _connect (self): proto = Protocol(self) generator = proto.connect() connected = False try: for connected in generator: if connected: break if self._teardown: raise Stop() # we want to come back as soon as possible yield ACTION.LATER self.proto = proto except Stop: # Connection failed if not connected and self.proto: self.proto.close('connection to %s:%d failed' % (self.neighbor.peer_address,self.neighbor.connect)) # A connection arrived before we could establish ! if not connected or self.proto: yield ACTION.NOW raise Interrupted() def _send_open (self): message = Message.CODE.NOP for message in self.proto.new_open(): if ordinal(message.TYPE) == Message.CODE.NOP: yield ACTION.NOW yield message def _read_open (self): wait = environment.settings().bgp.openwait opentimer = ReceiveTimer(self.proto.connection.session,wait,1,1,'waited for open too long, we do not like stuck in active') # Only yield if we have not the open, otherwise the reactor can run the other connection # which would be bad as we need to do the collission check without going to the other peer for message in self.proto.read_open(self.neighbor.peer_address.top()): opentimer.check_ka(message) # XXX: FIXME: change the whole code to use the ord and not the chr version # Only yield if we have not the open, otherwise the reactor can run the other connection # which would be bad as we need to do the collission check if ordinal(message.TYPE) == Message.CODE.NOP: # If a peer does not reply to OPEN message, or not enough bytes # yielding ACTION.NOW can cause ExaBGP to busy spin trying to # read from peer. See GH #723 . yield ACTION.LATER yield message def _send_ka (self): for message in self.proto.new_keepalive('OPENCONFIRM'): yield ACTION.NOW def _read_ka (self): # Start keeping keepalive timer for message in self.proto.read_keepalive(): self.recv_timer.check_ka_timer(message) yield ACTION.NOW def _establish (self): # try to establish the outgoing connection self.fsm.change(FSM.ACTIVE) if not self.proto: for action in self._connect(): if action in ACTION.ALL: yield action self.fsm.change(FSM.CONNECT) # normal sending of OPEN first ... if self.neighbor.local_as: for sent_open in self._send_open(): if sent_open in ACTION.ALL: yield sent_open self.proto.negotiated.sent(sent_open) self.fsm.change(FSM.OPENSENT) # read the peer's open for received_open in self._read_open(): if received_open in ACTION.ALL: yield received_open self.proto.negotiated.received(received_open) self.proto.connection.msg_size = self.proto.negotiated.msg_size # if we mirror the ASN, we need to read first and send second if not self.neighbor.local_as: for sent_open in self._send_open(): if sent_open in ACTION.ALL: yield sent_open self.proto.negotiated.sent(sent_open) self.fsm.change(FSM.OPENSENT) self.proto.validate_open() self.fsm.change(FSM.OPENCONFIRM) self.recv_timer = ReceiveTimer(self.proto.connection.session,self.proto.negotiated.holdtime,4,0) for action in self._send_ka(): yield action for action in self._read_ka(): yield action self.fsm.change(FSM.ESTABLISHED) self.stats['complete'] = time.time() # let the caller know that we were sucesfull yield ACTION.NOW def _main (self): """yield True if we want to come back to it asap, None if nothing urgent, and False if stopped""" if self._teardown: raise Notify(6,3) self.neighbor.rib.incoming.clear() include_withdraw = False # Announce to the process BGP is up self.logger.notice('connected to %s with %s' % (self.id(),self.proto.connection.name()),'reactor') self.stats['up'] = self.stats.get('up',0) + 1 if self.neighbor.api['neighbor-changes']: try: self.reactor.processes.up(self.neighbor) except ProcessError: # Can not find any better error code than 6,0 ! # XXX: We can not restart the program so this will come back again and again - FIX # XXX: In the main loop we do exit on this kind of error raise Notify(6,0,'ExaBGP Internal error, sorry.') send_eor = not self.neighbor.manual_eor new_routes = None self._resend_routes = SEND.NORMAL send_families = [] # Every last asm message should be re-announced on restart for family in self.neighbor.asm: if family in self.neighbor.families(): self.neighbor.messages.appendleft(self.neighbor.asm[family]) operational = None refresh = None command_eor = None number = 0 refresh_enhanced = True if self.proto.negotiated.refresh == REFRESH.ENHANCED else False send_ka = KA(self.proto.connection.session,self.proto) while not self._teardown: for message in self.proto.read_message(): self.recv_timer.check_ka(message) if send_ka() is not False: # we need and will send a keepalive while send_ka() is None: yield ACTION.NOW # Received update if message.TYPE == Update.TYPE: number += 1 self.logger.debug('<< UPDATE #%d' % number,self.id()) for nlri in message.nlris: self.neighbor.rib.incoming.update_cache(Change(nlri,message.attributes)) self.logger.debug(LazyFormat(' UPDATE #%d nlri ' % number,nlri,str),self.id()) elif message.TYPE == RouteRefresh.TYPE: if message.reserved == RouteRefresh.request: self._resend_routes = SEND.REFRESH send_families.append((message.afi,message.safi)) # SEND OPERATIONAL if self.neighbor.operational: if not operational: new_operational = self.neighbor.messages.popleft() if self.neighbor.messages else None if new_operational: operational = self.proto.new_operational(new_operational,self.proto.negotiated) if operational: try: six.next(operational) except StopIteration: operational = None # make sure that if some operational message are received via the API # that we do not eat memory for nothing elif self.neighbor.messages: self.neighbor.messages.popleft() # SEND REFRESH if self.neighbor.route_refresh: if not refresh: new_refresh = self.neighbor.refresh.popleft() if self.neighbor.refresh else None if new_refresh: refresh = self.proto.new_refresh(new_refresh) if refresh: try: six.next(refresh) except StopIteration: refresh = None # Take the routes already sent to that peer and resend them if self._reconfigure: self._reconfigure = False # we are here following a configuration change if self._neighbor: # see what changed in the configuration self.neighbor.rib.outgoing.replace(self._neighbor.backup_changes,self._neighbor.changes) # do not keep the previous routes in memory as they are not useful anymore self._neighbor.backup_changes = [] # Take the routes already sent to that peer and resend them if self._resend_routes != SEND.DONE: enhanced = True if refresh_enhanced and self._resend_routes == SEND.REFRESH else False self._resend_routes = SEND.DONE self.neighbor.rib.outgoing.resend(send_families,enhanced) send_families = [] # Need to send update if not new_routes and self.neighbor.rib.outgoing.pending(): # XXX: in proto really. hum to think about ? new_routes = self.proto.new_update(include_withdraw) if new_routes: try: for _ in range(25): # This can raise a NetworkError six.next(new_routes) except StopIteration: new_routes = None include_withdraw = True elif send_eor: send_eor = False for _ in self.proto.new_eors(): yield ACTION.NOW self.logger.debug('>> EOR(s)',self.id()) # SEND MANUAL KEEPALIVE (only if we have no more routes to send) elif not command_eor and self.neighbor.eor: new_eor = self.neighbor.eor.popleft() command_eor = self.proto.new_eors(new_eor.afi,new_eor.safi) if command_eor: try: six.next(command_eor) except StopIteration: command_eor = None if new_routes or message.TYPE != NOP.TYPE: yield ACTION.NOW elif self.neighbor.messages or operational: yield ACTION.NOW elif self.neighbor.eor or command_eor: yield ACTION.NOW else: yield ACTION.LATER # read_message will loop until new message arrives with NOP if self._teardown: break # If graceful restart, silent shutdown if self.neighbor.graceful_restart and self.proto.negotiated.sent_open.capabilities.announced(Capability.CODE.GRACEFUL_RESTART): self.logger.error('closing the session without notification',self.id()) self.proto.close('graceful restarted negotiated, closing without sending any notification') raise NetworkError('closing') # notify our peer of the shutdown raise Notify(6,self._teardown) def _run (self): """yield True if we want the reactor to give us back the hand with the same peer loop, None if we do not have any more work to do""" try: for action in self._establish(): yield action for action in self._main(): yield action # CONNECTION FAILURE except NetworkError as network: # we tried to connect once, it failed and it was not a manual request, we stop if self.once and not self._teardown: self.logger.debug('only one attempt to connect is allowed, stopping the peer',self.id()) self.stop() self._reset('closing connection',network) return # NOTIFY THE PEER OF AN ERROR except Notify as notify: if self.proto: try: generator = self.proto.new_notification(notify) try: while True: six.next(generator) yield ACTION.NOW except StopIteration: pass except (NetworkError,ProcessError): self.logger.error('Notification not sent',self.id()) self._reset('notification sent (%d,%d)' % (notify.code,notify.subcode),notify) else: self._reset() return # THE PEER NOTIFIED US OF AN ERROR except Notification as notification: # we tried to connect once, it failed and it was not a manual request, we stop if self.once and not self._teardown: self.logger.debug('only one attempt to connect is allowed, stopping the peer',self.id()) self.stop() self._reset( 'notification received (%d,%d)' % ( notification.code, notification.subcode), notification ) return # RECEIVED a Message TYPE we did not expect except Message as message: self._reset('unexpected message received',message) return # PROBLEM WRITING TO OUR FORKED PROCESSES except ProcessError as process: self._reset('process problem',process) return # .... except Interrupted as interruption: self._reset('connection received before we could fully establish one') return # UNHANDLED PROBLEMS except Exception as exc: # Those messages can not be filtered in purpose self.logger.debug('\n'.join([ NO_PANIC, '', '', str(type(exc)), str(exc), trace(), FOOTER ]),'reactor') self._reset() return # loop def run (self): if self.reactor.processes.broken(self.neighbor): # XXX: we should perhaps try to restart the process ?? self.logger.error('ExaBGP lost the helper process for this peer - stopping','process') if self.reactor.processes.terminate_on_error: self.reactor.api_shutdown() else: self.stop() return True if self.generator: try: # This generator only stops when it raises # otherwise return one of the ACTION return six.next(self.generator) except StopIteration: # Trying to run a closed loop, no point continuing self.generator = None if self._restart: return ACTION.LATER return ACTION.CLOSE elif self.generator is None: if self.fsm in [FSM.OPENCONFIRM,FSM.ESTABLISHED]: self.logger.debug('stopping, other connection is established',self.id()) self.generator = False return ACTION.LATER if self._delay.backoff(): return ACTION.LATER if self._restart: self.logger.debug('initialising connection to %s' % self.id(),'reactor') self.generator = self._run() return ACTION.LATER # make sure we go through a clean loop return ACTION.CLOSE def cli_data (self): def tri (value): if value is None: return None return True if value else False peer = defaultdict(lambda: None) have_peer = self.proto is not None have_open = self.proto and self.proto.negotiated.received_open if have_peer: peer.update({ 'multi-session': self.proto.negotiated.multisession, 'operational': self.proto.negotiated.operational, }) if have_open: capa = self.proto.negotiated.received_open.capabilities peer.update({ 'router-id': self.proto.negotiated.received_open.router_id, 'hold-time': self.proto.negotiated.received_open.hold_time, 'asn4': self.proto.negotiated.asn4, 'route-refresh': capa.announced(Capability.CODE.ROUTE_REFRESH), 'multi-session': capa.announced(Capability.CODE.MULTISESSION) or capa.announced(Capability.CODE.MULTISESSION_CISCO), 'add-path': capa.announced(Capability.CODE.ADD_PATH), 'extended-message': capa.announced(Capability.CODE.EXTENDED_MESSAGE), 'graceful-restart': capa.announced(Capability.CODE.GRACEFUL_RESTART), }) capabilities = { 'asn4': (tri(self.neighbor.asn4), tri(peer['asn4'])), 'route-refresh': (tri(self.neighbor.route_refresh),tri(peer['route-refresh'])), 'multi-session': (tri(self.neighbor.multisession), tri(peer['multi-session'])), 'operational': (tri(self.neighbor.operational), tri(peer['operational'])), 'add-path': (tri(self.neighbor.add_path),tri(peer['add-path'])), 'extended-message': (tri(self.neighbor.extended_message),tri(peer['extended-message'])), 'graceful-restart': (tri(self.neighbor.graceful_restart),tri(peer['graceful-restart'])), } families = {} for family in self.neighbor.families(): if have_open: common = True if family in self.proto.negotiated.families else False addpath = self.proto.negotiated.addpath.receive(*family) and self.proto.negotiated.addpath.receive(*family) else: common = None addpath = None if family in self.neighbor.addpaths() else False families[family] = (True,common,addpath) messages = {} total_sent = 0 total_rcvd = 0 for message in ('open','notification','keepalive','update','refresh'): sent = self.stats.get('send-%s' % message,0) rcvd = self.stats.get('receive-%s' % message,0) total_sent += sent total_rcvd += rcvd messages[message] = (sent, rcvd) messages['total'] = (total_sent, total_rcvd) return { 'down': int(self.stats['reset'] - self.stats['creation']), 'duration': int(time.time() - self.stats['complete']) if self.stats['complete'] else 0, 'local-address': str(self.neighbor.local_address), 'peer-address': str(self.neighbor.peer_address), 'local-as': int(self.neighbor.local_as), 'peer-as': int(self.neighbor.peer_as), 'local-id': str(self.neighbor.router_id), 'peer-id': None if peer['peer-id'] is None else str(peer['router-id']), 'local-hold': int(self.neighbor.hold_time), 'peer-hold': None if peer['hold-time'] is None else int(peer['hold-time']), 'state': self.fsm.name(), 'capabilities': capabilities, 'families': families, 'messages': messages, }
class Connection(object): direction = 'undefined' identifier = {} def __init__(self, afi, peer, local): self.msg_size = ExtendedMessage.INITIAL_SIZE # peer and local are strings of the IP try: self.defensive = environment.settings().debug.defensive self.logger = Logger() except RuntimeError: self.defensive = True self.logger = FakeLogger() self.afi = afi self.peer = peer self.local = local self.io = None self.established = False self._rpoller = {} self._wpoller = {} self.id = self.identifier.get(self.direction, 1) def success(self): identifier = self.identifier.get(self.direction, 1) + 1 self.identifier[self.direction] = identifier return identifier # Just in case .. def __del__(self): if self.io: self.close() self.logger.warning('connection to %s closed' % self.peer, self.session()) def name(self): return "%s-%d %s-%s" % (self.direction, self.id, self.local, self.peer) def session(self): return "%s-%d" % (self.direction, self.id) def fd(self): if self.io: return self.io.fileno() # the socket is closed (fileno() == -1) or not open yet (io is None) return -1 def close(self): try: self.logger.warning('%s, closing connection' % self.name(), source=self.session()) if self.io: self.io.close() self.io = None except KeyboardInterrupt as exc: raise exc except Exception: self.io = None def reading(self): poller = self._rpoller.get(self.io, None) if poller is None: poller = select.poll() poller.register( self.io, select.POLLIN | select.POLLPRI | select.POLLHUP | select.POLLNVAL | select.POLLERR) self._rpoller = {self.io: poller} ready = False for _, event in poller.poll(0): if event & select.POLLIN or event & select.POLLPRI: ready = True elif event & select.POLLHUP or event & select.POLLERR or event & select.POLLNVAL: self._rpoller = {} ready = True return ready def writing(self): poller = self._wpoller.get(self.io, None) if poller is None: poller = select.poll() poller.register( self.io, select.POLLOUT | select.POLLHUP | select.POLLNVAL | select.POLLERR) self._wpoller = {self.io: poller} ready = False for _, event in poller.poll(0): if event & select.POLLOUT: ready = True elif event & select.POLLHUP or event & select.POLLERR or event & select.POLLNVAL: self._wpoller = {} ready = True return ready def _reader(self, number): # The function must not be called if it does not return with no data with a smaller size as parameter if not self.io: self.close() raise NotConnected('Trying to read on a closed TCP connection') if number == 0: yield b'' return while not self.reading(): yield b'' data = b'' reported = '' while True: try: while True: if self.defensive and random.randint(0, 2): raise socket.error(errno.EAGAIN, 'raising network error on purpose') read = self.io.recv(number) if not read: self.close() self.logger.warning( '%s %s lost TCP session with peer' % (self.name(), self.peer), self.session()) raise LostConnection( 'the TCP connection was closed by the remote end') data += read number -= len(read) if not number: self.logger.debug( LazyFormat('received TCP payload', data), self.session()) yield data return yield b'' except socket.timeout as exc: self.close() self.logger.warning( '%s %s peer is too slow' % (self.name(), self.peer), self.session()) raise TooSlowError( 'Timeout while reading data from the network (%s)' % errstr(exc)) except socket.error as exc: if exc.args[0] in error.block: message = '%s %s blocking io problem mid-way through reading a message %s, trying to complete' % ( self.name(), self.peer, errstr(exc)) if message != reported: reported = message self.logger.debug(message, self.session()) yield b'' elif exc.args[0] in error.fatal: self.close() raise LostConnection('issue reading on the socket: %s' % errstr(exc)) # what error could it be ! else: self.logger.critical( '%s %s undefined error reading on socket' % (self.name(), self.peer), self.session()) raise NetworkError( 'Problem while reading data from the network (%s)' % errstr(exc)) def writer(self, data): if not self.io: # XXX: FIXME: Make sure it does not hold the cleanup during the closing of the peering session yield True return while not self.writing(): yield False self.logger.debug(LazyFormat('sending TCP payload', data), self.session()) # The first while is here to setup the try/catch block once as it is very expensive while True: try: while True: if self.defensive and random.randint(0, 2): raise socket.error(errno.EAGAIN, 'raising network error on purpose') # we can not use sendall as in case of network buffer filling # it does raise and does not let you know how much was sent number = self.io.send(data) if not number: self.close() self.logger.warning( '%s %s lost TCP connection with peer' % (self.name(), self.peer), self.session()) raise LostConnection('lost the TCP connection') data = data[number:] if not data: yield True return yield False except socket.error as exc: if exc.args[0] in error.block: self.logger.debug( '%s %s blocking io problem mid-way through writing a message %s, trying to complete' % (self.name(), self.peer, errstr(exc)), self.session()) yield False elif exc.errno == errno.EPIPE: # The TCP connection is gone. self.close() raise NetworkError('Broken TCP connection') elif exc.args[0] in error.fatal: self.close() self.logger.critical( '%s %s problem sending message (%s)' % (self.name(), self.peer, errstr(exc)), self.session()) raise NetworkError( 'Problem while writing data to the network (%s)' % errstr(exc)) # what error could it be ! else: self.logger.critical( '%s %s undefined error writing on socket' % (self.name(), self.peer), self.session()) yield False def reader(self): # _reader returns the whole number requested or nothing and then stops for header in self._reader(Message.HEADER_LEN): if not header: yield 0, 0, b'', b'', None if not header.startswith(Message.MARKER): report = 'The packet received does not contain a BGP marker' yield 0, 0, header, b'', NotifyError(1, 1, report) return msg = ordinal(header[18]) length = unpack('!H', header[16:18])[0] if length < Message.HEADER_LEN or length > self.msg_size: report = '%s has an invalid message length of %d' % ( Message.CODE.name(msg), length) yield length, 0, header, b'', NotifyError(1, 2, report) return validator = Message.Length.get(msg, lambda _: _ >= 19) if not validator(length): # MUST send the faulty length back report = '%s has an invalid message length of %d' % ( Message.CODE.name(msg), length) yield length, 0, header, b'', NotifyError(1, 2, report) return number = length - Message.HEADER_LEN if not number: yield length, msg, header, b'', None return for body in self._reader(number): if not body: yield 0, 0, b'', b'', None yield length, msg, header, body, None
class Peer(object): def __init__(self, neighbor, reactor): try: self.logger = Logger() # We only to try to connect via TCP once self.once = environment.settings().tcp.once self.bind = True if environment.settings().tcp.bind else False except RuntimeError: self.logger = FakeLogger() self.once = False self.bind = True now = time.time() self.reactor = reactor self.neighbor = neighbor # The next restart neighbor definition self._neighbor = None self.proto = None self.fsm = FSM(self, FSM.IDLE) self.stats = { 'fsm': self.fsm, 'creation': now, 'complete': now, } self.generator = None # The peer should restart after a stop self._restart = True # The peer was restarted (to know what kind of open to send for graceful restart) self._restarted = FORCE_GRACEFUL # We want to remove routes which are not in the configuration anymote afte a signal to reload self._reconfigure = True # We want to send all the known routes self._resend_routes = SEND.DONE # We have been asked to teardown the session with this code self._teardown = None self._delay = Delay() self.recv_timer = None def id(self): return 'peer-%s' % self.neighbor.uid def _reset(self, message='', error=''): self.fsm.change(FSM.IDLE) self.stats = { 'fsm': self.fsm, 'creation': self.stats['creation'], 'complete': self.stats['creation'], } if self.proto: self.proto.close(u"peer reset, message [{0}] error[{1}]".format( message, error)) self._delay.increase() self.proto = None if not self._restart or self.neighbor.generated: self.generator = False return self.generator = None self._teardown = None self.neighbor.rib.reset() # If we are restarting, and the neighbor definition is different, update the neighbor if self._neighbor: self.neighbor = self._neighbor self._neighbor = None def _stop(self, message): self.generator = False self.proto.close('stop, message [%s]' % message) self.proto = None # logging def me(self, message): return "peer %s ASN %-7s %s" % (self.neighbor.peer_address, self.neighbor.peer_as, message) # control def stop(self): self._teardown = 3 self._restart = False self._restarted = False self._delay.reset() self.fsm.change(FSM.IDLE) self.stats = { 'fsm': self.fsm, 'creation': self.stats['creation'], 'reset': time.time(), } self.neighbor.rib.uncache() def resend(self): self._resend_routes = SEND.NORMAL self._delay.reset() def reestablish(self, restart_neighbor=None): # we want to tear down the session and re-establish it self._teardown = 3 self._restart = True self._restarted = True self._resend_routes = SEND.NORMAL self._neighbor = restart_neighbor self._delay.reset() def reconfigure(self, restart_neighbor=None): # we want to update the route which were in the configuration file self._reconfigure = True self._neighbor = restart_neighbor self._resend_routes = SEND.NORMAL self._neighbor = restart_neighbor def teardown(self, code, restart=True): self._restart = restart self._teardown = code self._delay.reset() # sockets we must monitor def sockets(self): ios = [] if self.proto and self.proto.connection and self.proto.connection.io: ios.append(self.proto.connection.io) return ios def handle_connection(self, connection): # if the other side fails, we go back to idle if self.fsm == FSM.ESTABLISHED: self.logger.debug( 'we already have a peer in state established for %s' % connection.name(), self.id()) return connection.notification( 6, 7, b'could not accept the connection, already established') # 6.8 The convention is to compare the BGP Identifiers of the peers # involved in the collision and to retain only the connection initiated # by the BGP speaker with the higher-valued BGP Identifier. # FSM.IDLE , FSM.ACTIVE , FSM.CONNECT , FSM.OPENSENT , FSM.OPENCONFIRM , FSM.ESTABLISHED if self.fsm == FSM.OPENCONFIRM: # We cheat: we are not really reading the OPEN, we use the data we have instead # it does not matter as the open message will be the same anyway local_id = self.neighbor.router_id.pack() remote_id = self.proto.negotiated.received_open.router_id.pack() if remote_id < local_id: self.logger.debug( 'closing incoming connection as we have an outgoing connection with higher router-id for %s' % connection.name(), self.id()) return connection.notification( 6, 7, b'could not accept the connection, as another connection is already in open-confirm and will go through' ) # accept the connection if self.proto: self.proto.close( 'closing outgoing connection as we have another incoming on with higher router-id' ) self.proto = Protocol(self).accept(connection) self.generator = None # Let's make sure we do some work with this connection self._delay.reset() return None def established(self): return self.fsm == FSM.ESTABLISHED def negotiated_families(self): if self.proto: families = [ "%s/%s" % (x[0], x[1]) for x in self.proto.negotiated.families ] else: families = [ "%s/%s" % (x[0], x[1]) for x in self.neighbor.families() ] if len(families) > 1: return "[ %s ]" % " ".join(families) elif len(families) == 1: return families[0] return '' def _connect(self): proto = Protocol(self) generator = proto.connect() connected = False try: while not connected: if self._teardown: raise StopIteration() connected = six.next(generator) # we want to come back as soon as possible yield ACTION.LATER self.proto = proto except StopIteration: # Connection failed if not connected and self.proto: self.proto.close( 'connection to %s:%d failed' % (self.neighbor.peer_address, self.neighbor.connect)) # A connection arrived before we could establish ! if not connected or self.proto: yield ACTION.NOW raise Interrupted() def _send_open(self): message = Message.CODE.NOP for message in self.proto.new_open(): if ordinal(message.TYPE) == Message.CODE.NOP: yield ACTION.NOW yield message def _read_open(self): wait = environment.settings().bgp.openwait opentimer = ReceiveTimer( self.proto.connection.session, wait, 1, 1, 'waited for open too long, we do not like stuck in active') # Only yield if we have not the open, otherwise the reactor can run the other connection # which would be bad as we need to do the collission check without going to the other peer for message in self.proto.read_open(self.neighbor.peer_address.top()): opentimer.check_ka(message) # XXX: FIXME: change the whole code to use the ord and not the chr version # Only yield if we have not the open, otherwise the reactor can run the other connection # which would be bad as we need to do the collission check if ordinal(message.TYPE) == Message.CODE.NOP: # If a peer does not reply to OPEN message, or not enough bytes # yielding ACTION.NOW can cause ExaBGP to busy spin trying to # read from peer. See GH #723 . yield ACTION.LATER yield message def _send_ka(self): for message in self.proto.new_keepalive('OPENCONFIRM'): yield ACTION.NOW def _read_ka(self): # Start keeping keepalive timer for message in self.proto.read_keepalive(): self.recv_timer.check_ka_timer(message) yield ACTION.NOW def _establish(self): # try to establish the outgoing connection self.fsm.change(FSM.ACTIVE) if not self.proto: for action in self._connect(): if action in ACTION.ALL: yield action self.fsm.change(FSM.CONNECT) # normal sending of OPEN first ... if self.neighbor.local_as: for sent_open in self._send_open(): if sent_open in ACTION.ALL: yield sent_open self.proto.negotiated.sent(sent_open) self.fsm.change(FSM.OPENSENT) # read the peer's open for received_open in self._read_open(): if received_open in ACTION.ALL: yield received_open self.proto.negotiated.received(received_open) self.proto.connection.msg_size = self.proto.negotiated.msg_size # if we mirror the ASN, we need to read first and send second if not self.neighbor.local_as: for sent_open in self._send_open(): if sent_open in ACTION.ALL: yield sent_open self.proto.negotiated.sent(sent_open) self.fsm.change(FSM.OPENSENT) self.proto.validate_open() self.fsm.change(FSM.OPENCONFIRM) self.recv_timer = ReceiveTimer(self.proto.connection.session, self.proto.negotiated.holdtime, 4, 0) for action in self._send_ka(): yield action for action in self._read_ka(): yield action self.fsm.change(FSM.ESTABLISHED) self.stats['complete'] = time.time() # let the caller know that we were sucesfull yield ACTION.NOW def _main(self): """yield True if we want to come back to it asap, None if nothing urgent, and False if stopped""" if self._teardown: raise Notify(6, 3) self.neighbor.rib.incoming.clear() include_withdraw = False # Announce to the process BGP is up self.logger.notice( 'connected to %s with %s' % (self.id(), self.proto.connection.name()), 'reactor') self.stats['up'] = self.stats.get('up', 0) + 1 if self.neighbor.api['neighbor-changes']: try: self.reactor.processes.up(self.neighbor) except ProcessError: # Can not find any better error code than 6,0 ! # XXX: We can not restart the program so this will come back again and again - FIX # XXX: In the main loop we do exit on this kind of error raise Notify(6, 0, 'ExaBGP Internal error, sorry.') send_eor = not self.neighbor.manual_eor new_routes = None self._resend_routes = SEND.NORMAL send_families = [] # Every last asm message should be re-announced on restart for family in self.neighbor.asm: if family in self.neighbor.families(): self.neighbor.messages.appendleft(self.neighbor.asm[family]) operational = None refresh = None command_eor = None number = 0 refresh_enhanced = True if self.proto.negotiated.refresh == REFRESH.ENHANCED else False send_ka = KA(self.proto.connection.session, self.proto) while not self._teardown: for message in self.proto.read_message(): self.recv_timer.check_ka(message) if send_ka() is not False: # we need and will send a keepalive while send_ka() is None: yield ACTION.NOW # Received update if message.TYPE == Update.TYPE: number += 1 self.logger.debug('<< UPDATE #%d' % number, self.id()) for nlri in message.nlris: self.neighbor.rib.incoming.update_cache( Change(nlri, message.attributes)) self.logger.debug( LazyFormat(' UPDATE #%d nlri ' % number, nlri, str), self.id()) elif message.TYPE == RouteRefresh.TYPE: if message.reserved == RouteRefresh.request: self._resend_routes = SEND.REFRESH send_families.append((message.afi, message.safi)) # SEND OPERATIONAL if self.neighbor.operational: if not operational: new_operational = self.neighbor.messages.popleft( ) if self.neighbor.messages else None if new_operational: operational = self.proto.new_operational( new_operational, self.proto.negotiated) if operational: try: six.next(operational) except StopIteration: operational = None # make sure that if some operational message are received via the API # that we do not eat memory for nothing elif self.neighbor.messages: self.neighbor.messages.popleft() # SEND REFRESH if self.neighbor.route_refresh: if not refresh: new_refresh = self.neighbor.refresh.popleft( ) if self.neighbor.refresh else None if new_refresh: refresh = self.proto.new_refresh(new_refresh) if refresh: try: six.next(refresh) except StopIteration: refresh = None # Take the routes already sent to that peer and resend them if self._reconfigure: self._reconfigure = False # we are here following a configuration change if self._neighbor: # see what changed in the configuration self.neighbor.rib.outgoing.replace( self._neighbor.backup_changes, self._neighbor.changes) # do not keep the previous routes in memory as they are not useful anymore self._neighbor.backup_changes = [] # Take the routes already sent to that peer and resend them if self._resend_routes != SEND.DONE: enhanced = True if refresh_enhanced and self._resend_routes == SEND.REFRESH else False self._resend_routes = SEND.DONE self.neighbor.rib.outgoing.resend(send_families, enhanced) send_families = [] # Need to send update if not new_routes and self.neighbor.rib.outgoing.pending(): # XXX: in proto really. hum to think about ? new_routes = self.proto.new_update(include_withdraw) if new_routes: try: for _ in range(25): # This can raise a NetworkError six.next(new_routes) except StopIteration: new_routes = None include_withdraw = True elif send_eor: send_eor = False for _ in self.proto.new_eors(): yield ACTION.NOW self.logger.debug('>> EOR(s)', self.id()) # SEND MANUAL KEEPALIVE (only if we have no more routes to send) elif not command_eor and self.neighbor.eor: new_eor = self.neighbor.eor.popleft() command_eor = self.proto.new_eors(new_eor.afi, new_eor.safi) if command_eor: try: six.next(command_eor) except StopIteration: command_eor = None if new_routes or message.TYPE != NOP.TYPE: yield ACTION.NOW elif self.neighbor.messages or operational: yield ACTION.NOW elif self.neighbor.eor or command_eor: yield ACTION.NOW else: yield ACTION.LATER # read_message will loop until new message arrives with NOP if self._teardown: break # If graceful restart, silent shutdown if self.neighbor.graceful_restart and self.proto.negotiated.sent_open.capabilities.announced( Capability.CODE.GRACEFUL_RESTART): self.logger.error('closing the session without notification', self.id()) self.proto.close( 'graceful restarted negotiated, closing without sending any notification' ) raise NetworkError('closing') # notify our peer of the shutdown raise Notify(6, self._teardown) def _run(self): """yield True if we want the reactor to give us back the hand with the same peer loop, None if we do not have any more work to do""" try: for action in self._establish(): yield action for action in self._main(): yield action # CONNECTION FAILURE except NetworkError as network: # we tried to connect once, it failed and it was not a manual request, we stop if self.once and not self._teardown: self.logger.debug( 'only one attempt to connect is allowed, stopping the peer', self.id()) self.stop() self._reset('closing connection', network) return # NOTIFY THE PEER OF AN ERROR except Notify as notify: if self.proto: try: generator = self.proto.new_notification(notify) try: while True: six.next(generator) yield ACTION.NOW except StopIteration: pass except (NetworkError, ProcessError): self.logger.error('Notification not sent', self.id()) self._reset( 'notification sent (%d,%d)' % (notify.code, notify.subcode), notify) else: self._reset() return # THE PEER NOTIFIED US OF AN ERROR except Notification as notification: # we tried to connect once, it failed and it was not a manual request, we stop if self.once and not self._teardown: self.logger.debug( 'only one attempt to connect is allowed, stopping the peer', self.id()) self.stop() self._reset( 'notification received (%d,%d)' % (notification.code, notification.subcode), notification) return # RECEIVED a Message TYPE we did not expect except Message as message: self._reset('unexpected message received', message) return # PROBLEM WRITING TO OUR FORKED PROCESSES except ProcessError as process: self._reset('process problem', process) return # .... except Interrupted as interruption: self._reset( 'connection received before we could fully establish one') return # UNHANDLED PROBLEMS except Exception as exc: # Those messages can not be filtered in purpose self.logger.debug( '\n'.join([ NO_PANIC, '', '', str(type(exc)), str(exc), trace(), FOOTER ]), 'reactor') self._reset() return # loop def run(self): if self.reactor.processes.broken(self.neighbor): # XXX: we should perhaps try to restart the process ?? self.logger.error( 'ExaBGP lost the helper process for this peer - stopping', 'process') if self.reactor.processes.terminate_on_error: self.reactor.api_shutdown() else: self.stop() return True if self.generator: try: # This generator only stops when it raises # otherwise return one of the ACTION return six.next(self.generator) except StopIteration: # Trying to run a closed loop, no point continuing self.generator = None if self._restart: return ACTION.LATER return ACTION.CLOSE elif self.generator is None: if self.fsm in [FSM.OPENCONFIRM, FSM.ESTABLISHED]: self.logger.debug('stopping, other connection is established', self.id()) self.generator = False return ACTION.LATER if self._delay.backoff(): return ACTION.LATER if self._restart: self.logger.debug('initialising connection to %s' % self.id(), 'reactor') self.generator = self._run() return ACTION.LATER # make sure we go through a clean loop return ACTION.CLOSE def cli_data(self): def tri(value): if value is None: return None return True if value else False peer = defaultdict(lambda: None) have_peer = self.proto is not None have_open = self.proto and self.proto.negotiated.received_open if have_peer: peer.update({ 'multi-session': self.proto.negotiated.multisession, 'operational': self.proto.negotiated.operational, }) if have_open: capa = self.proto.negotiated.received_open.capabilities peer.update({ 'router-id': self.proto.negotiated.received_open.router_id, 'hold-time': self.proto.negotiated.received_open.hold_time, 'asn4': self.proto.negotiated.asn4, 'route-refresh': capa.announced(Capability.CODE.ROUTE_REFRESH), 'multi-session': capa.announced(Capability.CODE.MULTISESSION) or capa.announced(Capability.CODE.MULTISESSION_CISCO), 'add-path': capa.announced(Capability.CODE.ADD_PATH), 'extended-message': capa.announced(Capability.CODE.EXTENDED_MESSAGE), 'graceful-restart': capa.announced(Capability.CODE.GRACEFUL_RESTART), }) capabilities = { 'asn4': (tri(self.neighbor.asn4), tri(peer['asn4'])), 'route-refresh': (tri(self.neighbor.route_refresh), tri(peer['route-refresh'])), 'multi-session': (tri(self.neighbor.multisession), tri(peer['multi-session'])), 'operational': (tri(self.neighbor.operational), tri(peer['operational'])), 'add-path': (tri(self.neighbor.add_path), tri(peer['add-path'])), 'extended-message': (tri(self.neighbor.extended_message), tri(peer['extended-message'])), 'graceful-restart': (tri(self.neighbor.graceful_restart), tri(peer['graceful-restart'])), } families = {} for family in self.neighbor.families(): if have_open: common = True if family in self.proto.negotiated.families else False addpath = self.proto.negotiated.addpath.receive( *family) and self.proto.negotiated.addpath.receive(*family) else: common = None addpath = None if family in self.neighbor.addpaths() else False families[family] = (True, common, addpath) messages = {} total_sent = 0 total_rcvd = 0 for message in ('open', 'notification', 'keepalive', 'update', 'refresh'): sent = self.stats.get('send-%s' % message, 0) rcvd = self.stats.get('receive-%s' % message, 0) total_sent += sent total_rcvd += rcvd messages[message] = (sent, rcvd) messages['total'] = (total_sent, total_rcvd) return { 'duration': int(time.time() - self.stats['complete']) if self.stats['complete'] else 0, 'local-address': str(self.neighbor.local_address), 'peer-address': str(self.neighbor.peer_address), 'local-as': int(self.neighbor.local_as), 'peer-as': int(self.neighbor.peer_as), 'local-id': str(self.neighbor.router_id), 'peer-id': None if peer['peer-id'] is None else str(peer['router-id']), 'local-hold': int(self.neighbor.hold_time), 'peer-hold': None if peer['hold-time'] is None else int(peer['hold-time']), 'state': self.fsm.name(), 'capabilities': capabilities, 'families': families, 'messages': messages, }
class Protocol(object): decode = True def __init__(self, peer): try: self.logger = Logger() except RuntimeError: self.logger = FakeLogger() self.peer = peer self.neighbor = peer.neighbor self.negotiated = Negotiated(self.neighbor) self.connection = None if self.neighbor.connect: self.port = self.neighbor.connect elif os.environ.get('exabgp.tcp.port', '').isdigit(): self.port = int(os.environ.get('exabgp.tcp.port')) elif os.environ.get('exabgp_tcp_port', '').isdigit(): self.port = int(os.environ.get('exabgp_tcp_port')) else: self.port = 179 from exabgp.configuration.environment import environment self.log_routes = peer.neighbor.adj_rib_in or environment.settings( ).log.routes def fd(self): if self.connection is None: return None return self.connection.fd() # XXX: we use self.peer.neighbor.peer_address when we could use self.neighbor.peer_address def me(self, message): return "%s/%s %s" % (self.peer.neighbor.peer_address, self.peer.neighbor.peer_as, message) def accept(self, incoming): self.connection = incoming if self.peer.neighbor.api['neighbor-changes']: self.peer.reactor.processes.connected(self.peer.neighbor) # very important - as we use this function on __init__ return self def connect(self): # allows to test the protocol code using modified StringIO with a extra 'pending' function if not self.connection: local = self.neighbor.md5_ip.top( ) if not self.neighbor.auto_discovery else None peer = self.neighbor.peer_address.top() afi = self.neighbor.peer_address.afi md5 = self.neighbor.md5_password md5_base64 = self.neighbor.md5_base64 ttl_out = self.neighbor.ttl_out self.connection = Outgoing(afi, peer, local, self.port, md5, md5_base64, ttl_out) if not self.connection.init: yield False return if not local: self.neighbor.local_address = IP.create(self.connection.local) if self.neighbor.router_id is None and self.neighbor.local_address.afi == AFI.ipv4: self.neighbor.router_id = self.neighbor.local_address for connected in self.connection.establish(): if not connected: yield False continue if self.peer.neighbor.api['neighbor-changes']: self.peer.reactor.processes.connected(self.peer.neighbor) yield True return def close(self, reason='protocol closed, reason unspecified'): if self.connection: self.logger.debug(reason, self.connection.session()) # must be first otherwise we could have a loop caused by the raise in the below self.connection.close() self.connection = None self.peer.stats['down'] = self.peer.stats.get('down', 0) + 1 try: if self.peer.neighbor.api['neighbor-changes']: self.peer.reactor.processes.down(self.peer.neighbor, reason) except ProcessError: self.logger.debug( 'could not send notification of neighbor close to API', self.connection.session()) def _to_api(self, direction, message, raw): packets = self.neighbor.api['%s-packets' % direction] parsed = self.neighbor.api['%s-parsed' % direction] consolidate = self.neighbor.api['%s-consolidate' % direction] negotiated = self.negotiated if self.neighbor.api[ 'negotiated'] else None if consolidate: if packets: self.peer.reactor.processes.message(self.peer.neighbor, direction, message, negotiated, raw[:19], raw[19:]) else: self.peer.reactor.processes.message(self.peer.neighbor, direction, message, negotiated, b'', b'') else: if packets: self.peer.reactor.processes.packets(self.peer.neighbor, direction, int(message.ID), negotiated, raw[:19], raw[19:]) if parsed: self.peer.reactor.processes.message(message.ID, self.peer.neighbor, direction, message, negotiated, b'', b'') def write(self, message, negotiated=None): raw = message.message(negotiated) code = 'send-%s' % Message.CODE.short(message.ID) self.peer.stats[code] = self.peer.stats.get(code, 0) + 1 if self.neighbor.api.get(code, False): self._to_api('send', message, raw) for boolean in self.connection.writer(raw): yield boolean def send(self, raw): code = 'send-%s' % Message.CODE.short(ordinal(raw[18])) self.peer.stats[code] = self.peer.stats.get(code, 0) + 1 if self.neighbor.api.get(code, False): message = Update.unpack_message(raw[19:], self.negotiated) self._to_api('send', message, raw) for boolean in self.connection.writer(raw): yield boolean # Read from network ....................................................... def read_message(self): # This will always be defined by the loop but scope leaking upset scrutinizer/pylint msg_id = None packets = self.neighbor.api['receive-packets'] consolidate = self.neighbor.api['receive-consolidate'] parsed = self.neighbor.api['receive-parsed'] body, header = b'', b'' # just because pylint/pylama are getting more clever for length, msg_id, header, body, notify in self.connection.reader(): # internal issue if notify: code = 'receive-%s' % Message.CODE.NOTIFICATION.SHORT if self.neighbor.api.get(code, False): if consolidate: self.peer.reactor.processes.notification( self.peer.neighbor, 'receive', notify.code, notify.subcode, str(notify), None, header, body) elif parsed: self.peer.reactor.processes.notification( self.peer.neighbor, 'receive', notify.code, notify.subcode, str(notify), None, b'', b'') elif packets: self.peer.reactor.processes.packets( self.peer.neighbor, 'receive', msg_id, None, header, body) # XXX: is notify not already Notify class ? raise Notify(notify.code, notify.subcode, str(notify)) if not length: yield _NOP continue self.logger.debug( '<< message of type %s' % Message.CODE.name(msg_id), self.connection.session()) code = 'receive-%s' % Message.CODE.short(msg_id) self.peer.stats[code] = self.peer.stats.get(code, 0) + 1 for_api = self.neighbor.api.get(code, False) if for_api and packets and not consolidate: negotiated = self.negotiated if self.neighbor.api.get( 'negotiated', False) else None self.peer.reactor.processes.packets(self.peer.neighbor, 'receive', msg_id, negotiated, header, body) if msg_id == Message.CODE.UPDATE: if not self.neighbor.adj_rib_in and not ( for_api or self.log_routes) and not (parsed or consolidate): yield _UPDATE return try: message = Message.unpack(msg_id, body, self.negotiated) except (KeyboardInterrupt, SystemExit, Notify): raise except Exception as exc: self.logger.debug('could not decode message "%d"' % msg_id, self.connection.session()) self.logger.debug('%s' % str(exc), self.connection.session()) self.logger.debug(traceback.format_exc(), self.connection.session()) raise Notify( 1, 0, 'can not decode update message of type "%d"' % msg_id) # raise Notify(5,0,'unknown message received') if message.TYPE == Update.TYPE: if Attribute.CODE.INTERNAL_TREAT_AS_WITHDRAW in message.attributes: for nlri in message.nlris: nlri.action = IN.WITHDRAWN if for_api: negotiated = self.negotiated if self.neighbor.api.get( 'negotiated', False) else None if consolidate: self.peer.reactor.processes.message( msg_id, self.neighbor, 'receive', message, negotiated, header, body) elif parsed: self.peer.reactor.processes.message( msg_id, self.neighbor, 'receive', message, negotiated, b'', b'') if message.TYPE == Notification.TYPE: raise message if message.TYPE == Update.TYPE and Attribute.CODE.INTERNAL_DISCARD in message.attributes: yield _NOP else: yield message def validate_open(self): error = self.negotiated.validate(self.neighbor) if error is not None: raise Notify(*error) if self.neighbor.api['negotiated']: self.peer.reactor.processes.negotiated(self.peer.neighbor, self.negotiated) if self.negotiated.mismatch: self.logger.warning( '--------------------------------------------------------------------', self.connection.session()) self.logger.warning( 'the connection can not carry the following family/families', self.connection.session()) for reason, (afi, safi) in self.negotiated.mismatch: self.logger.warning( ' - %s is not configured for %s/%s' % (reason, afi, safi), self.connection.session()) self.logger.warning( 'therefore no routes of this kind can be announced on the connection', self.connection.session()) self.logger.warning( '--------------------------------------------------------------------', self.connection.session()) def read_open(self, ip): for received_open in self.read_message(): if received_open.TYPE == NOP.TYPE: yield received_open else: break if received_open.TYPE != Open.TYPE: raise Notify( 5, 1, 'The first packet received is not an open message (%s)' % received_open) self.logger.debug('<< %s' % received_open, self.connection.session()) yield received_open def read_keepalive(self): for message in self.read_message(): if message.TYPE == NOP.TYPE: yield message else: break if message.TYPE != KeepAlive.TYPE: raise Notify(5, 2) yield message # # Sending message to peer # def new_open(self): if self.neighbor.local_as: local_as = self.neighbor.local_as elif self.negotiated.received_open: local_as = self.negotiated.received_open.asn else: raise RuntimeError('no ASN available for the OPEN message') sent_open = Open( Version(4), local_as, self.neighbor.hold_time, self.neighbor.router_id, Capabilities().new(self.neighbor, self.peer._restarted)) # we do not buffer open message in purpose for _ in self.write(sent_open): yield _NOP self.logger.debug('>> %s' % sent_open, self.connection.session()) yield sent_open def new_keepalive(self, comment=''): keepalive = KeepAlive() for _ in self.write(keepalive): yield _NOP self.logger.debug( '>> KEEPALIVE%s' % (' (%s)' % comment if comment else ''), self.connection.session()) yield keepalive def new_notification(self, notification): for _ in self.write(notification): yield _NOP self.logger.debug( '>> NOTIFICATION (%d,%d,"%s")' % (notification.code, notification.subcode, notification.data), self.connection.session()) yield notification def new_update(self, include_withdraw): updates = self.neighbor.rib.outgoing.updates( self.neighbor.group_updates) number = 0 for update in updates: for message in update.messages(self.negotiated, include_withdraw): number += 1 for boolean in self.send(message): # boolean is a transient network error we already announced yield _NOP if number: self.logger.debug('>> %d UPDATE(s)' % number, self.connection.session()) yield _UPDATE def new_eor(self, afi, safi): eor = EOR(afi, safi) for _ in self.write(eor): yield _NOP self.logger.debug('>> EOR %s %s' % (afi, safi), self.connection.session()) yield eor def new_eors(self, afi=AFI.undefined, safi=SAFI.undefined): # Send EOR to let our peer know he can perform a RIB update if self.negotiated.families: families = self.negotiated.families if (afi, safi) == ( AFI.undefined, SAFI.undefined) else [ (afi, safi), ] for eor_afi, eor_safi in families: for _ in self.new_eor(eor_afi, eor_safi): yield _ else: # If we are not sending an EOR, send a keepalive as soon as when finished # So the other routers knows that we have no (more) routes to send ... # (is that behaviour documented somewhere ??) for eor in self.new_keepalive('EOR'): yield _NOP yield _UPDATE def new_operational(self, operational, negotiated): for _ in self.write(operational, negotiated): yield _NOP self.logger.debug('>> OPERATIONAL %s' % str(operational), self.connection.session()) yield operational def new_refresh(self, refresh): for _ in self.write(refresh, None): yield _NOP self.logger.debug('>> REFRESH %s' % str(refresh), self.connection.session()) yield refresh
class Protocol (object): decode = True def __init__ (self, peer): try: self.logger = Logger() except RuntimeError: self.logger = FakeLogger() self.peer = peer self.neighbor = peer.neighbor self.negotiated = Negotiated(self.neighbor) self.connection = None if self.neighbor.connect: self.port = self.neighbor.connect elif os.environ.get('exabgp.tcp.port','').isdigit(): self.port = int(os.environ.get('exabgp.tcp.port')) elif os.environ.get('exabgp_tcp_port','').isdigit(): self.port = int(os.environ.get('exabgp_tcp_port')) else: self.port = 179 # XXX: FIXME: check the the -19 is correct (but it is harmless) # The message size is the whole BGP message _without_ headers self.message_size = Message.MAX_LEN-Message.HEADER_LEN from exabgp.configuration.environment import environment self.log_routes = peer.neighbor.adj_rib_in or environment.settings().log.routes # XXX: we use self.peer.neighbor.peer_address when we could use self.neighbor.peer_address def me (self, message): return "%s/%s %s" % (self.peer.neighbor.peer_address,self.peer.neighbor.peer_as,message) def accept (self, incoming): self.connection = incoming if self.peer.neighbor.api['neighbor-changes']: self.peer.reactor.processes.connected(self.peer.neighbor) # very important - as we use this function on __init__ return self def connect (self): # allows to test the protocol code using modified StringIO with a extra 'pending' function if not self.connection: local = self.neighbor.md5_ip.top() if not self.neighbor.auto_discovery else None peer = self.neighbor.peer_address.top() afi = self.neighbor.peer_address.afi md5 = self.neighbor.md5_password md5_base64 = self.neighbor.md5_base64 ttl_out = self.neighbor.ttl_out self.connection = Outgoing(afi,peer,local,self.port,md5,md5_base64,ttl_out) if not local and self.connection.init: self.neighbor.local_address = IP.create(self.connection.local) if self.neighbor.router_id is None and self.neighbor.local_address.afi == AFI.ipv4: self.neighbor.router_id = self.neighbor.local_address try: generator = self.connection.establish() while True: connected = six.next(generator) if not connected: yield False continue if self.peer.neighbor.api['neighbor-changes']: self.peer.reactor.processes.connected(self.peer.neighbor) yield True return except StopIteration: # close called by the caller # self.close('could not connect to remote end') yield False return def close (self, reason='protocol closed, reason unspecified'): if self.connection: self.logger.debug(reason,self.connection.session()) # must be first otherwise we could have a loop caused by the raise in the below self.connection.close() self.connection = None self.peer.stats['down'] = self.peer.stats.get('down',0) + 1 try: if self.peer.neighbor.api['neighbor-changes']: self.peer.reactor.processes.down(self.peer.neighbor,reason) except ProcessError: self.logger.debug('could not send notification of neighbor close to API',self.connection.session()) def _to_api (self,direction,message,raw): packets = self.neighbor.api['%s-packets' % direction] parsed = self.neighbor.api['%s-parsed' % direction] consolidate = self.neighbor.api['%s-consolidate' % direction] negotiated = self.negotiated if self.neighbor.api['negotiated'] else None if consolidate: if packets: self.peer.reactor.processes.message(self.peer.neighbor,direction,message,negotiated,raw[:19],raw[19:]) else: self.peer.reactor.processes.message(self.peer.neighbor,direction,message,negotiated,b'',b'') else: if packets: self.peer.reactor.processes.packets(self.peer.neighbor,direction,int(message.ID),negotiated,raw[:19],raw[19:]) if parsed: self.peer.reactor.processes.message(message.ID,self.peer.neighbor,direction,message,negotiated,b'',b'') def write (self, message, negotiated=None): raw = message.message(negotiated) code = 'send-%s' % Message.CODE.short(message.ID) self.peer.stats[code] = self.peer.stats.get(code,0) + 1 if self.neighbor.api.get(code,False): self._to_api('send',message,raw) for boolean in self.connection.writer(raw): yield boolean def send (self, raw): code = 'send-%s' % Message.CODE.short(ordinal(raw[18])) self.peer.stats[code] = self.peer.stats.get(code,0) + 1 if self.neighbor.api.get(code,False): message = Update.unpack_message(raw[19:],self.negotiated) self._to_api('send',message,raw) for boolean in self.connection.writer(raw): yield boolean # Read from network ....................................................... def read_message (self): # This will always be defined by the loop but scope leaking upset scrutinizer/pylint msg_id = None packets = self.neighbor.api['receive-packets'] consolidate = self.neighbor.api['receive-consolidate'] parsed = self.neighbor.api['receive-parsed'] body,header = b'',b'' # just because pylint/pylama are getting more clever for length,msg_id,header,body,notify in self.connection.reader(): # internal issue if notify: code = 'receive-%s' % Message.CODE.NOTIFICATION.SHORT if self.neighbor.api.get(code,False): if consolidate: self.peer.reactor.processes.notification(self.peer.neighbor,'receive',notify.code,notify.subcode,str(notify),None,header,body) elif parsed: self.peer.reactor.processes.notification(self.peer.neighbor,'receive',notify.code,notify.subcode,str(notify),None,b'',b'') elif packets: self.peer.reactor.processes.packets(self.peer.neighbor,'receive',msg_id,None,header,body) # XXX: is notify not already Notify class ? raise Notify(notify.code,notify.subcode,str(notify)) if not length: yield _NOP continue self.logger.debug('<< message of type %s' % Message.CODE.name(msg_id),self.connection.session()) code = 'receive-%s' % Message.CODE.short(msg_id) self.peer.stats[code] = self.peer.stats.get(code,0) + 1 for_api = self.neighbor.api.get(code,False) if for_api and packets and not consolidate: negotiated = self.negotiated if self.neighbor.api.get('negotiated',False) else None self.peer.reactor.processes.packets(self.peer.neighbor,'receive',msg_id,negotiated,header,body) if msg_id == Message.CODE.UPDATE: if not self.neighbor.adj_rib_in and not (for_api or self.log_routes) and not (parsed or consolidate): yield _UPDATE return try: message = Message.unpack(msg_id,body,self.negotiated) except (KeyboardInterrupt,SystemExit,Notify): raise except Exception as exc: self.logger.debug('could not decode message "%d"' % msg_id,self.connection.session()) self.logger.debug('%s' % str(exc),self.connection.session()) self.logger.debug(traceback.format_exc(),self.connection.session()) raise Notify(1,0,'can not decode update message of type "%d"' % msg_id) # raise Notify(5,0,'unknown message received') if message.TYPE == Update.TYPE: if Attribute.CODE.INTERNAL_TREAT_AS_WITHDRAW in message.attributes: for nlri in message.nlris: nlri.action = IN.WITHDRAWN if for_api: negotiated = self.negotiated if self.neighbor.api.get('negotiated',False) else None if consolidate: self.peer.reactor.processes.message(msg_id,self.neighbor,'receive',message,negotiated,header,body) elif parsed: self.peer.reactor.processes.message(msg_id,self.neighbor,'receive',message,negotiated,b'',b'') if message.TYPE == Notification.TYPE: raise message if message.TYPE == Update.TYPE and Attribute.CODE.INTERNAL_DISCARD in message.attributes: yield _NOP else: yield message def validate_open (self): error = self.negotiated.validate(self.neighbor) if error is not None: raise Notify(*error) if self.neighbor.api['negotiated']: self.peer.reactor.processes.negotiated(self.peer.neighbor,self.negotiated) if self.negotiated.mismatch: self.logger.warning('--------------------------------------------------------------------',self.connection.session()) self.logger.warning('the connection can not carry the following family/families',self.connection.session()) for reason,(afi,safi) in self.negotiated.mismatch: self.logger.warning(' - %s is not configured for %s/%s' % (reason,afi,safi),self.connection.session()) self.logger.warning('therefore no routes of this kind can be announced on the connection',self.connection.session()) self.logger.warning('--------------------------------------------------------------------',self.connection.session()) def read_open (self, ip): for received_open in self.read_message(): if received_open.TYPE == NOP.TYPE: yield received_open else: break if received_open.TYPE != Open.TYPE: raise Notify(5,1,'The first packet received is not an open message (%s)' % received_open) self.logger.debug('<< %s' % received_open,self.connection.session()) yield received_open def read_keepalive (self): for message in self.read_message(): if message.TYPE == NOP.TYPE: yield message else: break if message.TYPE != KeepAlive.TYPE: raise Notify(5,2) yield message # # Sending message to peer # def new_open (self): if self.neighbor.local_as: local_as = self.neighbor.local_as elif self.negotiated.received_open: local_as = self.negotiated.received_open.asn else: raise RuntimeError('no ASN available for the OPEN message') sent_open = Open( Version(4), local_as, self.neighbor.hold_time, self.neighbor.router_id, Capabilities().new(self.neighbor,self.peer._restarted) ) # we do not buffer open message in purpose for _ in self.write(sent_open): yield _NOP self.logger.debug('>> %s' % sent_open,self.connection.session()) yield sent_open def new_keepalive (self, comment=''): keepalive = KeepAlive() for _ in self.write(keepalive): yield _NOP self.logger.debug('>> KEEPALIVE%s' % (' (%s)' % comment if comment else ''),self.connection.session()) yield keepalive def new_notification (self, notification): for _ in self.write(notification): yield _NOP self.logger.debug('>> NOTIFICATION (%d,%d,"%s")' % (notification.code,notification.subcode,notification.data),self.connection.session()) yield notification def new_update (self, include_withdraw): updates = self.neighbor.rib.outgoing.updates(self.neighbor.group_updates) number = 0 for update in updates: for message in update.messages(self.negotiated,include_withdraw): number += 1 for boolean in self.send(message): # boolean is a transient network error we already announced yield _NOP if number: self.logger.debug('>> %d UPDATE(s)' % number,self.connection.session()) yield _UPDATE def new_eor (self, afi, safi): eor = EOR(afi,safi) for _ in self.write(eor): yield _NOP self.logger.debug('>> EOR %s %s' % (afi,safi),self.connection.session()) yield eor def new_eors (self, afi=AFI.undefined,safi=SAFI.undefined): # Send EOR to let our peer know he can perform a RIB update if self.negotiated.families: families = self.negotiated.families if (afi,safi) == (AFI.undefined,SAFI.undefined) else [(afi,safi),] for eor_afi,eor_safi in families: for _ in self.new_eor(eor_afi,eor_safi): yield _ else: # If we are not sending an EOR, send a keepalive as soon as when finished # So the other routers knows that we have no (more) routes to send ... # (is that behaviour documented somewhere ??) for eor in self.new_keepalive('EOR'): yield _NOP yield _UPDATE def new_operational (self, operational, negotiated): for _ in self.write(operational,negotiated): yield _NOP self.logger.debug('>> OPERATIONAL %s' % str(operational),self.connection.session()) yield operational def new_refresh (self, refresh): for _ in self.write(refresh,None): yield _NOP self.logger.debug('>> REFRESH %s' % str(refresh),self.connection.session()) yield refresh
class Connection (object): direction = 'undefined' identifier = {} def __init__ (self, afi, peer, local): # peer and local are strings of the IP try: self.defensive = environment.settings().debug.defensive self.logger = Logger() except RuntimeError: self.defensive = True self.logger = FakeLogger() self.afi = afi self.peer = peer self.local = local self.io = None self.established = False self.id = self.identifier.get(self.direction,1) def success (self): identifier = self.identifier.get(self.direction,1) + 1 self.identifier[self.direction] = identifier return identifier # Just in case .. def __del__ (self): if self.io: self.logger.warning('connection to %s closed' % self.peer,self.session()) self.close() def name (self): return "%s-%d %s-%s" % (self.direction,self.id,self.local,self.peer) def session (self): return "%s-%d" % (self.direction,self.id) def close (self): try: self.logger.warning('%s, closing connection' % self.name(),source=self.session()) if self.io: self.io.close() self.io = None except KeyboardInterrupt as exc: raise exc except Exception: pass def reading (self): while True: try: r,_,_ = select.select([self.io,],[],[],0) except select.error as exc: if exc.args[0] not in error.block: self.close() self.logger.warning('%s %s errno %s on socket' % (self.name(),self.peer,errno.errorcode[exc.args[0]]),self.session()) raise NetworkError('errno %s on socket' % errno.errorcode[exc.args[0]]) return False return r != [] def writing (self): while True: try: _,w,_ = select.select([],[self.io,],[],0) except select.error as exc: if exc.args[0] not in error.block: self.close() self.logger.warning('%s %s errno %s on socket' % (self.name(),self.peer,errno.errorcode[exc.args[0]]),self.session()) raise NetworkError('errno %s on socket' % errno.errorcode[exc.args[0]]) return False return w != [] def _reader (self, number): # The function must not be called if it does not return with no data with a smaller size as parameter if not self.io: self.close() raise NotConnected('Trying to read on a closed TCP connection') if number == 0: yield b'' return while not self.reading(): yield b'' data = b'' reported = '' while True: try: while True: if self.defensive and random.randint(0,2): raise socket.error(errno.EAGAIN,'raising network error on purpose') read = self.io.recv(number) if not read: self.close() self.logger.warning('%s %s lost TCP session with peer' % (self.name(),self.peer),self.session()) raise LostConnection('the TCP connection was closed by the remote end') data += read number -= len(read) if not number: self.logger.debug(LazyFormat('received TCP payload',data),self.session()) yield data return yield b'' except socket.timeout as exc: self.close() self.logger.warning('%s %s peer is too slow' % (self.name(),self.peer),self.session()) raise TooSlowError('Timeout while reading data from the network (%s)' % errstr(exc)) except socket.error as exc: if exc.args[0] in error.block: message = '%s %s blocking io problem mid-way through reading a message %s, trying to complete' % (self.name(),self.peer,errstr(exc)) if message != reported: reported = message self.logger.debug(message,self.session()) yield b'' elif exc.args[0] in error.fatal: self.close() raise LostConnection('issue reading on the socket: %s' % errstr(exc)) # what error could it be ! else: self.logger.critical('%s %s undefined error reading on socket' % (self.name(),self.peer),self.session()) raise NetworkError('Problem while reading data from the network (%s)' % errstr(exc)) def writer (self, data): if not self.io: # XXX: FIXME: Make sure it does not hold the cleanup during the closing of the peering session yield True return while not self.writing(): yield False self.logger.debug(LazyFormat('sending TCP payload',data),self.session()) # The first while is here to setup the try/catch block once as it is very expensive while True: try: while True: if self.defensive and random.randint(0,2): raise socket.error(errno.EAGAIN,'raising network error on purpose') # we can not use sendall as in case of network buffer filling # it does raise and does not let you know how much was sent number = self.io.send(data) if not number: self.close() self.logger.warning('%s %s lost TCP connection with peer' % (self.name(),self.peer),self.session()) raise LostConnection('lost the TCP connection') data = data[number:] if not data: yield True return yield False except socket.error as exc: if exc.args[0] in error.block: self.logger.debug( '%s %s blocking io problem mid-way through writing a message %s, trying to complete' % ( self.name(), self.peer, errstr(exc) ), self.session() ) yield False elif exc.errno == errno.EPIPE: # The TCP connection is gone. self.close() raise NetworkError('Broken TCP connection') elif exc.args[0] in error.fatal: self.close() self.logger.critical('%s %s problem sending message (%s)' % (self.name(),self.peer,errstr(exc)),self.session()) raise NetworkError('Problem while writing data to the network (%s)' % errstr(exc)) # what error could it be ! else: self.logger.critical('%s %s undefined error writing on socket' % (self.name(),self.peer),self.session()) yield False def reader (self): # _reader returns the whole number requested or nothing and then stops for header in self._reader(Message.HEADER_LEN): if not header: yield 0,0,b'',b'',None if not header.startswith(Message.MARKER): report = 'The packet received does not contain a BGP marker' yield 0,0,header,b'',NotifyError(1,1,report) return msg = ordinal(header[18]) length = unpack('!H',header[16:18])[0] if length < Message.HEADER_LEN or length > Message.MAX_LEN: report = '%s has an invalid message length of %d' % (Message.CODE.name(msg),length) yield length,0,header,b'',NotifyError(1,2,report) return validator = Message.Length.get(msg,lambda _: _ >= 19) if not validator(length): # MUST send the faulty length back report = '%s has an invalid message length of %d' % (Message.CODE.name(msg),length) yield length,0,header,b'',NotifyError(1,2,report) return number = length - Message.HEADER_LEN if not number: yield length,msg,header,b'',None return for body in self._reader(number): if not body: yield 0,0,b'',b'',None yield length,msg,header,body,None