class Peer (object): def __init__ (self, neighbor, reactor): try: self.logger = Logger() # We only to try to connect via TCP once self.once = environment.settings().tcp.once self.bind = True if environment.settings().tcp.bind else False except RuntimeError: self.logger = FakeLogger() self.once = True self.reactor = reactor self.neighbor = neighbor # The next restart neighbor definition self._neighbor = None # The peer should restart after a stop self._restart = True # The peer was restarted (to know what kind of open to send for graceful restart) self._restarted = FORCE_GRACEFUL self._reset_skip() # We want to remove routes which are not in the configuration anymote afte a signal to reload self._reconfigure = True # We want to send all the known routes self._resend_routes = SEND.DONE # We have new routes for the peers self._have_routes = True # We have been asked to teardown the session with this code self._teardown = None self._ = {'in':{},'out':{}} self._['in']['state'] = STATE.IDLE self._['out']['state'] = STATE.IDLE # value to reset 'generator' to self._['in']['enabled'] = False self._['out']['enabled'] = None if not self.neighbor.passive else False # the networking code self._['out']['proto'] = None self._['in']['proto'] = None # the networking code self._['out']['code'] = self._connect self._['in']['code'] = self._accept # the generator used by the main code # * False, the generator for this direction is down # * Generator, the code to run to connect or accept the connection # * None, the generator must be re-created self._['in']['generator'] = self._['in']['enabled'] self._['out']['generator'] = self._['out']['enabled'] def _reset (self, direction, message='',error=''): self._[direction]['state'] = STATE.IDLE if self._restart: if self._[direction]['proto']: self._[direction]['proto'].close('%s loop, peer reset, message [%s] error[%s]' % (direction,message,str(error))) self._[direction]['proto'] = None self._[direction]['generator'] = self._[direction]['enabled'] self._teardown = None self._more_skip(direction) self.neighbor.rib.reset() # If we are restarting, and the neighbor definition is different, update the neighbor if self._neighbor: self.neighbor = self._neighbor self._neighbor = None else: self._[direction]['generator'] = False self._[direction]['proto'] = None def _stop (self, direction, message): self._[direction]['generator'] = False self._[direction]['proto'].close('%s loop, stop, message [%s]' % (direction,message)) self._[direction]['proto'] = None # connection delay def _reset_skip (self): # We are currently not skipping connection attempts self._skip_time = time.time() # when we can not connect to a peer how many time (in loop) should we back-off self._next_skip = 0 def _more_skip (self, direction): if direction != 'out': return self._skip_time = time.time() + self._next_skip self._next_skip = int(1 + self._next_skip * 1.2) if self._next_skip > 60: self._next_skip = 60 # logging def me (self, message): return "peer %s ASN %-7s %s" % (self.neighbor.peer_address,self.neighbor.peer_as,message) # control def stop (self): self._teardown = 3 self._restart = False self._restarted = False self._reset_skip() def resend (self): self._resend_routes = SEND.NORMAL self._reset_skip() def send_new (self, changes=None,update=None): if changes: self.neighbor.rib.outgoing.replace(changes) self._have_routes = self.neighbor.flush if update is None else update def reestablish (self, restart_neighbor=None): # we want to tear down the session and re-establish it self._teardown = 3 self._restart = True self._restarted = True self._resend_routes = SEND.NORMAL self._neighbor = restart_neighbor self._reset_skip() def reconfigure (self, restart_neighbor=None): # we want to update the route which were in the configuration file self._reconfigure = True self._neighbor = restart_neighbor self._resend_routes = SEND.NORMAL self._neighbor = restart_neighbor def teardown (self, code, restart=True): self._restart = restart self._teardown = code self._reset_skip() # sockets we must monitor def sockets (self): ios = [] for direction in ['in','out']: proto = self._[direction]['proto'] if proto and proto.connection and proto.connection.io: ios.append(proto.connection.io) return ios def incoming (self, connection): # if the other side fails, we go back to idle if self._['in']['proto'] not in (True,False,None): self.logger.network('we already have a peer at this address') return False self._['in']['proto'] = Protocol(self).accept(connection) # Let's make sure we do some work with this connection self._['in']['generator'] = None self._['in']['state'] = STATE.CONNECT return True def established (self): return self._['in']['state'] == STATE.ESTABLISHED or self._['out']['state'] == STATE.ESTABLISHED def detailed_link_status (self): state_tbl = { STATE.IDLE : "Idle", STATE.ACTIVE : "Active", STATE.CONNECT : "Connect", STATE.OPENSENT : "OpenSent", STATE.OPENCONFIRM : "OpenConfirm", STATE.ESTABLISHED : "Established" } return state_tbl[max(self._["in"]["state"], self._["out"]["state"])] def negotiated_families(self): if self._['out']['proto']: families = ["%s/%s" % (x[0], x[1]) for x in self._['out']['proto'].negotiated.families] else: families = ["%s/%s" % (x[0], x[1]) for x in self.neighbor.families()] if len(families) > 1: return "[ %s ]" % " ".join(families) elif len(families) == 1: return families[0] return '' def _accept (self): # we can do this as Protocol is a mutable object proto = self._['in']['proto'] # send OPEN message = Message.CODE.NOP for message in proto.new_open(self._restarted): if ord(message.TYPE) == Message.CODE.NOP: yield ACTION.NOW proto.negotiated.sent(message) self._['in']['state'] = STATE.OPENSENT # Read OPEN wait = environment.settings().bgp.openwait opentimer = ReceiveTimer(self.me,wait,1,1,'waited for open too long, we do not like stuck in active') # Only yield if we have not the open, otherwise the reactor can run the other connection # which would be bad as we need to do the collission check without going to the other peer for message in proto.read_open(self.neighbor.peer_address.ip): opentimer.check_ka(message) if ord(message.TYPE) == Message.CODE.NOP: yield ACTION.LATER self._['in']['state'] = STATE.OPENCONFIRM proto.negotiated.received(message) proto.validate_open() if self._['out']['state'] == STATE.OPENCONFIRM: self.logger.network('incoming connection finds the outgoing connection is in openconfirm') local_id = self.neighbor.router_id.packed remote_id = proto.negotiated.received_open.router_id.packed if local_id < remote_id: self.logger.network('closing the outgoing connection') self._stop('out','collision local id < remote id') yield ACTION.LATER else: self.logger.network('aborting the incoming connection') stop = Interrupted() stop.direction = 'in' raise stop # Send KEEPALIVE for message in self._['in']['proto'].new_keepalive('OPENCONFIRM'): yield ACTION.NOW # Start keeping keepalive timer self.recv_timer = ReceiveTimer(self.me,proto.negotiated.holdtime,4,0) # Read KEEPALIVE for message in proto.read_keepalive(): self.recv_timer.check_ka(message) yield ACTION.NOW self._['in']['state'] = STATE.ESTABLISHED # let the caller know that we were sucesfull yield ACTION.NOW def _connect (self): # try to establish the outgoing connection proto = Protocol(self) generator = proto.connect() connected = False try: while not connected: if self._teardown: raise StopIteration() connected = generator.next() # we want to come back as soon as possible yield ACTION.LATER except StopIteration: # Connection failed if not connected: proto.close('connection to peer failed',self._['in']['state'] != STATE.ESTABLISHED) # A connection arrived before we could establish ! if not connected or self._['in']['proto']: stop = Interrupted() stop.direction = 'out' yield ACTION.NOW raise stop self._['out']['state'] = STATE.CONNECT self._['out']['proto'] = proto # send OPEN # Only yield if we have not the open, otherwise the reactor can run the other connection # which would be bad as we need to set the state without going to the other peer message = Message.CODE.NOP for message in proto.new_open(self._restarted): if ord(message.TYPE) == Message.CODE.NOP: yield ACTION.NOW proto.negotiated.sent(message) self._['out']['state'] = STATE.OPENSENT # Read OPEN wait = environment.settings().bgp.openwait opentimer = ReceiveTimer(self.me,wait,1,1,'waited for open too long, we do not like stuck in active') for message in self._['out']['proto'].read_open(self.neighbor.peer_address.ip): opentimer.check_ka(message) # XXX: FIXME: change the whole code to use the ord and not the chr version # Only yield if we have not the open, otherwise the reactor can run the other connection # which would be bad as we need to do the collission check if ord(message.TYPE) == Message.CODE.NOP: yield ACTION.LATER self._['out']['state'] = STATE.OPENCONFIRM proto.negotiated.received(message) proto.validate_open() if self._['in']['state'] == STATE.OPENCONFIRM: self.logger.network('outgoing connection finds the incoming connection is in openconfirm') local_id = self.neighbor.router_id.packed remote_id = proto.negotiated.received_open.router_id.packed if local_id < remote_id: self.logger.network('aborting the outgoing connection') stop = Interrupted() stop.direction = 'out' raise stop else: self.logger.network('closing the incoming connection') self._stop('in','collision local id < remote id') yield ACTION.LATER # Send KEEPALIVE for message in proto.new_keepalive('OPENCONFIRM'): yield ACTION.NOW # Start keeping keepalive timer self.recv_timer = ReceiveTimer(self.me,proto.negotiated.holdtime,4,0) # Read KEEPALIVE for message in self._['out']['proto'].read_keepalive(): self.recv_timer.check_ka(message) yield ACTION.NOW self._['out']['state'] = STATE.ESTABLISHED # let the caller know that we were sucesfull yield ACTION.NOW def _main (self, direction): """yield True if we want to come back to it asap, None if nothing urgent, and False if stopped""" if self._teardown: raise Notify(6,3) proto = self._[direction]['proto'] # Announce to the process BGP is up self.logger.network('Connected to peer %s (%s)' % (self.neighbor.name(),direction)) if self.neighbor.api['neighbor-changes']: try: self.reactor.processes.up(self) except ProcessError: # Can not find any better error code than 6,0 ! # XXX: We can not restart the program so this will come back again and again - FIX # XXX: In the main loop we do exit on this kind of error raise Notify(6,0,'ExaBGP Internal error, sorry.') send_eor = not self.neighbor.manual_eor new_routes = None self._resend_routes = SEND.NORMAL send_families = [] # Every last asm message should be re-announced on restart for family in self.neighbor.asm: if family in self.neighbor.families(): self.neighbor.messages.appendleft(self.neighbor.asm[family]) operational = None refresh = None command_eor = None number = 0 refresh_enhanced = True if proto.negotiated.refresh == REFRESH.ENHANCED else False self.send_ka = KA(self.me,proto) while not self._teardown: for message in proto.read_message(): self.recv_timer.check_ka(message) if self.send_ka() is not False: # we need and will send a keepalive while self.send_ka() is None: yield ACTION.NOW # Received update if message.TYPE == Update.TYPE: number += 1 self.logger.routes(LazyFormat(self.me('<< UPDATE (%d)' % number),message.attributes,lambda _: "%s%s" % (' attributes' if _ else '',_))) for nlri in message.nlris: self.neighbor.rib.incoming.insert_received(Change(nlri,message.attributes)) self.logger.routes(LazyFormat(self.me('<< UPDATE (%d) nlri ' % number),nlri,str)) elif message.TYPE == RouteRefresh.TYPE: if message.reserved == RouteRefresh.request: self._resend_routes = SEND.REFRESH send_families.append((message.afi,message.safi)) # SEND OPERATIONAL if self.neighbor.operational: if not operational: new_operational = self.neighbor.messages.popleft() if self.neighbor.messages else None if new_operational: operational = proto.new_operational(new_operational,proto.negotiated) if operational: try: operational.next() except StopIteration: operational = None # SEND REFRESH if self.neighbor.route_refresh: if not refresh: new_refresh = self.neighbor.refresh.popleft() if self.neighbor.refresh else None if new_refresh: refresh = proto.new_refresh(new_refresh) if refresh: try: refresh.next() except StopIteration: refresh = None # Take the routes already sent to that peer and resend them if self._reconfigure: self._reconfigure = False # we are here following a configuration change if self._neighbor: # see what changed in the configuration self.neighbor.rib.outgoing.replace(self._neighbor.backup_changes,self._neighbor.changes) # do not keep the previous routes in memory as they are not useful anymore self._neighbor.backup_changes = [] self._have_routes = True # Take the routes already sent to that peer and resend them if self._resend_routes != SEND.DONE: enhanced = True if refresh_enhanced and self._resend_routes == SEND.REFRESH else False self._resend_routes = SEND.DONE self.neighbor.rib.outgoing.resend(send_families,enhanced) self._have_routes = True send_families = [] # Need to send update if self._have_routes and not new_routes: self._have_routes = False # XXX: in proto really. hum to think about ? new_routes = proto.new_update() if new_routes: try: count = 20 while count: # This can raise a NetworkError new_routes.next() count -= 1 except StopIteration: new_routes = None elif send_eor: send_eor = False for _ in proto.new_eors(): yield ACTION.NOW self.logger.message(self.me('>> EOR(s)')) # SEND MANUAL KEEPALIVE (only if we have no more routes to send) elif not command_eor and self.neighbor.eor: new_eor = self.neighbor.eor.popleft() command_eor = proto.new_eors(new_eor.afi,new_eor.safi) if command_eor: try: command_eor.next() except StopIteration: command_eor = None if new_routes or message.TYPE != NOP.TYPE: yield ACTION.NOW elif self.neighbor.messages or operational: yield ACTION.NOW elif self.neighbor.eor or command_eor: yield ACTION.NOW else: yield ACTION.LATER # read_message will loop until new message arrives with NOP if self._teardown: break # If graceful restart, silent shutdown if self.neighbor.graceful_restart and proto.negotiated.sent_open.capabilities.announced(Capability.CODE.GRACEFUL_RESTART): self.logger.network('Closing the session without notification','error') proto.close('graceful restarted negotiated, closing without sending any notification') raise NetworkError('closing') # notify our peer of the shutdown raise Notify(6,self._teardown) def _run (self, direction): """yield True if we want the reactor to give us back the hand with the same peer loop, None if we do not have any more work to do""" try: for action in self._[direction]['code'](): yield action for action in self._main(direction): yield action # CONNECTION FAILURE except NetworkError,network: self._reset(direction,'closing connection',network) # we tried to connect once, it failed, we stop if self.once: self.logger.network('only one attempt to connect is allowed, stopping the peer') self.stop() return # NOTIFY THE PEER OF AN ERROR except Notify,notify: for direction in ['in','out']: if self._[direction]['proto']: try: generator = self._[direction]['proto'].new_notification(notify) try: maximum = 20 while maximum: generator.next() maximum -= 1 yield ACTION.NOW if maximum > 10 else ACTION.LATER except StopIteration: pass except (NetworkError,ProcessError): self.logger.network(self.me('NOTIFICATION NOT SENT'),'error') self._reset(direction,'notification sent (%d,%d)' % (notify.code,notify.subcode),notify) else: self._reset(direction) return
class Peer (object): def __init__ (self,neighbor,reactor): try: self.logger = Logger() # We only to try to connect via TCP once self.once = environment.settings().tcp.once self.bind = True if environment.settings().tcp.bind else False except RuntimeError: self.logger = FakeLogger() self.once = True self.reactor = reactor self.neighbor = neighbor # The next restart neighbor definition self._neighbor = None # The peer should restart after a stop self._restart = True # The peer was restarted (to know what kind of open to send for graceful restart) self._restarted = FORCE_GRACEFUL self._reset_skip() # We want to send all the known routes self._resend_routes = SEND.done # We have new routes for the peers self._have_routes = True # We have been asked to teardown the session with this code self._teardown = None self._ = {'in':{},'out':{}} self._['in']['state'] = STATE.idle self._['out']['state'] = STATE.idle # value to reset 'generator' to self._['in']['enabled'] = False self._['out']['enabled'] = None if not self.neighbor.passive else False # the networking code self._['out']['proto'] = None self._['in']['proto'] = None # the networking code self._['out']['code'] = self._connect self._['in']['code'] = self._accept # the generator used by the main code # * False, the generator for this direction is down # * Generator, the code to run to connect or accept the connection # * None, the generator must be re-created self._['in']['generator'] = self._['in']['enabled'] self._['out']['generator'] = self._['out']['enabled'] self._generator_keepalive = None def _reset (self,direction,message='',error=''): self._[direction]['state'] = STATE.idle if self._restart: if self._[direction]['proto']: self._[direction]['proto'].close('%s loop reset %s %s' % (direction,message,str(error))) self._[direction]['proto'] = None self._[direction]['generator'] = self._[direction]['enabled'] self._teardown = None self._more_skip(direction) self.neighbor.rib.reset() # If we are restarting, and the neighbor definition is different, update the neighbor if self._neighbor: self.neighbor = self._neighbor self._neighbor = None else: self._[direction]['generator'] = False self._[direction]['proto'] = None def _stop (self,direction,message): self._[direction]['generator'] = False self._[direction]['proto'].close('%s loop stop %s' % (direction,message)) self._[direction]['proto'] = None # connection delay def _reset_skip (self): # We are currently not skipping connection attempts self._skip_time = time.time() # when we can not connect to a peer how many time (in loop) should we back-off self._next_skip = 0 def _more_skip (self,direction): if direction != 'out': return self._skip_time = time.time() + self._next_skip self._next_skip = int(1+ self._next_skip*1.2) if self._next_skip > 60: self._next_skip = 60 # logging def me (self,message): return "peer %s ASN %-7s %s" % (self.neighbor.peer_address,self.neighbor.peer_as,message) def _output (self,direction,message): return "%s %s" % (self._[direction]['proto'].connection.name(),self.me(message)) def _log (self,direction): def inner (message): return self._output(direction,message) return inner # control def stop (self): self._teardown = 3 self._restart = False self._restarted = False self._reset_skip() def resend (self): self._resend_routes = SEND.normal self._reset_skip() def send_new (self,changes=None,update=None): if changes: self.neighbor.rib.outgoing.replace(changes) self._have_routes = self.neighbor.flush if update is None else update def restart (self,restart_neighbor=None): # we want to tear down the session and re-establish it self._teardown = 3 self._restart = True self._restarted = True self._resend_routes = SEND.normal self._neighbor = restart_neighbor self._reset_skip() def teardown (self,code,restart=True): self._restart = restart self._teardown = code self._reset_skip() # sockets we must monitor def sockets (self): ios = [] for direction in ['in','out']: proto = self._[direction]['proto'] if proto and proto.connection and proto.connection.io: ios.append(proto.connection.io) return ios def incoming (self,connection): # if the other side fails, we go back to idle if self._['in']['proto'] not in (True,False,None): self.logger.network('we already have a peer at this address') return False self._['in']['proto'] = Protocol(self).accept(connection) # Let's make sure we do some work with this connection self._['in']['generator'] = None self._['in']['state'] = STATE.connect return True def established (self): return self._['in']['state'] == STATE.established or self._['out']['state'] == STATE.established def _accept (self): # we can do this as Protocol is a mutable object proto = self._['in']['proto'] # send OPEN for message in proto.new_open(self._restarted): if ord(message.TYPE) == Message.Type.NOP: yield ACTION.immediate proto.negotiated.sent(message) self._['in']['state'] = STATE.opensent # Read OPEN wait = environment.settings().bgp.openwait opentimer = Timer(self._log('in'),wait,1,1,'waited for open too long, we do not like stuck in active') # Only yield if we have not the open, otherwise the reactor can run the other connection # which would be bad as we need to do the collission check without going to the other peer for message in proto.read_open(self.neighbor.peer_address.ip): opentimer.tick(message) if ord(message.TYPE) == Message.Type.NOP: yield ACTION.later self._['in']['state'] = STATE.openconfirm proto.negotiated.received(message) proto.validate_open() if self._['out']['state'] == STATE.openconfirm: self.logger.network('incoming connection finds the outgoing connection is in openconfirm') local_id = self.neighbor.router_id.packed remote_id = proto.negotiated.received_open.router_id.packed if local_id < remote_id: self.logger.network('closing the outgoing connection') self._stop('out','collision local id < remote id') yield ACTION.later else: self.logger.network('aborting the incoming connection') stop = Interrupted() stop.direction = 'in' raise stop # Send KEEPALIVE for message in self._['in']['proto'].new_keepalive('OPENCONFIRM'): yield ACTION.immediate # Start keeping keepalive timer self.timer = Timer(self._log('in'),proto.negotiated.holdtime,4,0) # Read KEEPALIVE for message in proto.read_keepalive('ESTABLISHED'): self.timer.tick(message) yield ACTION.later self._['in']['state'] = STATE.established # let the caller know that we were sucesfull yield ACTION.immediate def _connect (self): # try to establish the outgoing connection proto = Protocol(self) generator = proto.connect() connected = False try: while not connected: connected = generator.next() # we want to come back as soon as possible yield ACTION.later except StopIteration: # Connection failed if not connected: proto.close('connection to peer failed') # A connection arrived before we could establish ! if not connected or self._['in']['proto']: stop = Interrupted() stop.direction = 'out' raise stop self._['out']['state'] = STATE.connect self._['out']['proto'] = proto # send OPEN # Only yield if we have not the open, otherwise the reactor can run the other connection # which would be bad as we need to set the state without going to the other peer for message in proto.new_open(self._restarted): if ord(message.TYPE) == Message.Type.NOP: yield ACTION.immediate proto.negotiated.sent(message) self._['out']['state'] = STATE.opensent # Read OPEN wait = environment.settings().bgp.openwait opentimer = Timer(self._log('out'),wait,1,1,'waited for open too long, we do not like stuck in active') for message in self._['out']['proto'].read_open(self.neighbor.peer_address.ip): opentimer.tick(message) # XXX: FIXME: change the whole code to use the ord and not the chr version # Only yield if we have not the open, otherwise the reactor can run the other connection # which would be bad as we need to do the collission check if ord(message.TYPE) == Message.Type.NOP: yield ACTION.later self._['out']['state'] = STATE.openconfirm proto.negotiated.received(message) proto.validate_open() if self._['in']['state'] == STATE.openconfirm: self.logger.network('outgoing connection finds the incoming connection is in openconfirm') local_id = self.neighbor.router_id.packed remote_id = proto.negotiated.received_open.router_id.packed if local_id < remote_id: self.logger.network('aborting the outgoing connection') stop = Interrupted() stop.direction = 'out' raise stop else: self.logger.network('closing the incoming connection') self._stop('in','collision local id < remote id') yield ACTION.later # Send KEEPALIVE for message in proto.new_keepalive('OPENCONFIRM'): yield ACTION.immediate # Start keeping keepalive timer self.timer = Timer(self._log('out'),self._['out']['proto'].negotiated.holdtime,4,0) # Read KEEPALIVE for message in self._['out']['proto'].read_keepalive('ESTABLISHED'): self.timer.tick(message) yield ACTION.immediate self._['out']['state'] = STATE.established # let the caller know that we were sucesfull yield ACTION.immediate def _keepalive (self,direction): # yield : # True if we just sent the keepalive # None if we are working as we should # False if something went wrong yield 'ready' need_keepalive = False generator = None last = NOP while not self._teardown: # SEND KEEPALIVES need_keepalive |= self.timer.keepalive() if need_keepalive and not generator: proto = self._[direction]['proto'] if not proto: yield False break generator = proto.new_keepalive() need_keepalive = False if generator: try: last = generator.next() if last.TYPE == KeepAlive.TYPE: # close the generator and rasie a StopIteration generator.next() yield None except (NetworkError,ProcessError): yield False break except StopIteration: generator = None if last.TYPE != KeepAlive.TYPE: self._generator_keepalive = False yield False break yield True else: yield None def keepalive (self): generator = self._generator_keepalive if generator: # XXX: CRITICAL : this code needs the same exception than the one protecting the main loop try: return generator.next() except StopIteration: pass return self._generator_keepalive is None def _main (self,direction): "yield True if we want to come back to it asap, None if nothing urgent, and False if stopped" if self._teardown: raise Notify(6,3) proto = self._[direction]['proto'] # Initialise the keepalive self._generator_keepalive = self._keepalive(direction) # Announce to the process BGP is up self.logger.network('Connected to peer %s (%s)' % (self.neighbor.name(),direction)) if self.neighbor.api.neighbor_changes: try: self.reactor.processes.up(self) except ProcessError: # Can not find any better error code than 6,0 ! # XXX: We can not restart the program so this will come back again and again - FIX # XXX: In the main loop we do exit on this kind of error raise Notify(6,0,'ExaBGP Internal error, sorry.') send_eor = True new_routes = None self._resend_routes = SEND.normal send_families = [] # Every last asm message should be re-announced on restart for family in self.neighbor.asm: if family in self.neighbor.families(): self.neighbor.messages.appendleft(self.neighbor.asm[family]) counter = Counter(self.logger,self._log(direction)) operational = None refresh = None number = 0 while not self._teardown: for message in proto.read_message(): # Update timer self.timer.tick(message) # Give information on the number of routes seen counter.display() # Received update if message.TYPE == Update.TYPE: counter.increment(len(message.nlris)) number += 1 self.logger.routes(LazyFormat(self.me('<< UPDATE (%d)' % number),lambda _: "%s%s" % (' attributes' if _ else '',_),message.attributes)) for nlri in message.nlris: self.neighbor.rib.incoming.insert_received(Change(nlri,message.attributes)) self.logger.routes(LazyFormat(self.me('<< UPDATE (%d) nlri ' % number),str,nlri)) elif message.TYPE == RouteRefresh.TYPE: if message.reserved == RouteRefresh.request: self._resend_routes = SEND.refresh send_families.append((message.afi,message.safi)) # SEND OPERATIONAL if self.neighbor.operational: if not operational: new_operational = self.neighbor.messages.popleft() if self.neighbor.messages else None if new_operational: operational = proto.new_operational(new_operational,proto.negotiated) if operational: try: operational.next() except StopIteration: operational = None # SEND REFRESH if self.neighbor.route_refresh: if not refresh: new_refresh = self.neighbor.refresh.popleft() if self.neighbor.refresh else None if new_refresh: enhanced_negotiated = True if proto.negotiated.refresh == REFRESH.enhanced else False refresh = proto.new_refresh(new_refresh,enhanced_negotiated) if refresh: try: refresh.next() except StopIteration: refresh = None # Take the routes already sent to that peer and resend them if self._resend_routes != SEND.done: enhanced_refresh = True if self._resend_routes == SEND.refresh and proto.negotiated.refresh == REFRESH.enhanced else False self._resend_routes = SEND.done self.neighbor.rib.outgoing.resend(send_families,enhanced_refresh) self._have_routes = True send_families = [] # Need to send update if self._have_routes and not new_routes: self._have_routes = False # XXX: in proto really. hum to think about ? new_routes = proto.new_update() if new_routes: try: count = 20 while count: # This can raise a NetworkError new_routes.next() count -= 1 except StopIteration: new_routes = None elif send_eor: send_eor = False for eor in proto.new_eors(): yield ACTION.immediate self.logger.message(self.me('>> EOR(s)')) # Go to other Peers yield ACTION.immediate if new_routes or message.TYPE != NOP.TYPE or self.neighbor.messages else ACTION.later # read_message will loop until new message arrives with NOP if self._teardown: break # If graceful restart, silent shutdown if self.neighbor.graceful_restart and proto.negotiated.sent_open.capabilities.announced(CapabilityID.GRACEFUL_RESTART): self.logger.network('Closing the session without notification','error') proto.close('graceful restarted negotiated, closing without sending any notification') raise NetworkError('closing') # notify our peer of the shutdown raise Notify(6,self._teardown) def _run (self,direction): "yield True if we want the reactor to give us back the hand with the same peer loop, None if we do not have any more work to do" try: for action in self._[direction]['code'](): yield action for action in self._main(direction): yield action # CONNECTION FAILURE except NetworkError, e: self._reset(direction,'closing connection',e) # we tried to connect once, it failed, we stop if self.once: self.logger.network('only one attempt to connect is allowed, stopping the peer') self.stop() return # NOTIFY THE PEER OF AN ERROR except Notify, n: for direction in ['in','out']: if self._[direction]['proto']: try: generator = self._[direction]['proto'].new_notification(n) try: maximum = 20 while maximum: generator.next() maximum -= 1 yield ACTION.immediate if maximum > 10 else ACTION.later except StopIteration: pass except (NetworkError,ProcessError): self.logger.network(self._output(direction,'NOTIFICATION NOT SENT'),'error') pass self._reset(direction,'notification sent (%d,%d)' % (n.code,n.subcode),n) else: self._reset(direction) return
class Peer(object): def __init__(self, neighbor, reactor): try: self.logger = Logger() # We only to try to connect via TCP once self.once = environment.settings().tcp.once self.bind = True if environment.settings().tcp.bind else False except RuntimeError: self.logger = FakeLogger() self.once = True self.reactor = reactor self.neighbor = neighbor # The next restart neighbor definition self._neighbor = None # The peer should restart after a stop self._restart = True # The peer was restarted (to know what kind of open to send for graceful restart) self._restarted = FORCE_GRACEFUL self._reset_skip() # We want to send all the known routes self._resend_routes = True # We have new routes for the peers self._have_routes = True # We have been asked to teardown the session with this code self._teardown = None self._ = {'in': {}, 'out': {}} self._['in']['state'] = STATE.idle self._['out']['state'] = STATE.idle # value to reset 'generator' to self._['in']['enabled'] = False self._['out']['enabled'] = None if not self.neighbor.passive else False # the networking code self._['out']['proto'] = None self._['in']['proto'] = None # the networking code self._['out']['code'] = self._connect self._['in']['code'] = self._accept # the generator used by the main code # * False, the generator for this direction is down # * Generator, the code to run to connect or accept the connection # * None, the generator must be re-created self._['in']['generator'] = self._['in']['enabled'] self._['out']['generator'] = self._['out']['enabled'] def _reset(self, direction, message='', error=''): self._[direction]['state'] = STATE.idle if self._restart: if self._[direction]['proto']: self._[direction]['proto'].close( '%s loop reset %s %s' % (direction, message, str(error))) self._[direction]['proto'] = None self._[direction]['generator'] = self._[direction]['enabled'] self._teardown = None self._more_skip(direction) # If we are restarting, and the neighbor definition is different, update the neighbor if self._neighbor: self.neighbor = self._neighbor self._neighbor = None else: self._[direction]['generator'] = False self._[direction]['proto'] = None # connection delay def _reset_skip(self): # We are currently not skipping connection attempts self._skip_time = time.time() # when we can not connect to a peer how many time (in loop) should we back-off self._next_skip = 0 def _more_skip(self, direction): if direction != 'out': return self._skip_time = time.time() + self._next_skip self._next_skip = int(1 + self._next_skip * 1.2) if self._next_skip > 60: self._next_skip = 60 # logging def me(self, message): return "peer %s ASN %-7s %s" % (self.neighbor.peer_address, self.neighbor.peer_as, message) def _output(self, direction, message): return "%s %s" % (self._[direction]['proto'].connection.name(), self.me(message)) def _log(self, direction): def inner(message): return self._output(direction, message) return inner # control def stop(self): self._teardown = 3 self._restart = False self._restarted = False self._reset_skip() def resend(self): self._resend_routes = True self._reset_skip() def send_new(self): self._have_routes = True def restart(self, restart_neighbor=None): # we want to tear down the session and re-establish it self._teardown = 3 self._restart = True self._restarted = True self._resend_routes = True self._neighbor = restart_neighbor self._reset_skip() def teardown(self, code, restart=True): self._restart = restart self._teardown = code self._reset_skip() # sockets we must monitor def sockets(self): ios = [] for direction in ['in', 'out']: proto = self._[direction]['proto'] if proto and proto.connection and proto.connection.io: ios.append(proto.connection.io) return ios def incoming(self, connection): # if the other side fails, we go back to idle if self._['in']['proto'] not in (True, False, None): self.logger.network('we already have a peer at this address') return False self._['in']['proto'] = Protocol(self).accept(connection) # Let's make sure we do some work with this connection self._['in']['generator'] = None self._['in']['state'] = STATE.connect return True def _accept(self): # we can do this as Protocol is a mutable object proto = self._['in']['proto'] # send OPEN for message in proto.new_open(self._restarted): if ord(message.TYPE) == Message.Type.NOP: yield ACTION.immediate proto.negotiated.sent(message) self._['in']['state'] = STATE.opensent # Read OPEN wait = environment.settings().bgp.openwait opentimer = Timer( self._log('in'), wait, 1, 1, 'waited for open too long, we do not like stuck in active') # Only yield if we have not the open, otherwise the reactor can run the other connection # which would be bad as we need to do the collission check without going to the other peer for message in proto.read_open(self.neighbor.peer_address.ip): opentimer.tick(message) if ord(message.TYPE) == Message.Type.NOP: yield ACTION.later self._['in']['state'] = STATE.openconfirm proto.negotiated.received(message) proto.validate_open() if self._['out']['state'] == STATE.openconfirm: self.logger.network( 'incoming connection finds the outgoing connection is in openconfirm' ) local_id = self.neighbor.router_id.packed remote_id = proto.negotiated.received_open.router_id.packed if local_id < remote_id: self.logger.network('closing the outgoing connection') self._reset('out', 'collision local id < remote id') yield ACTION.immediate else: self.logger.network('aborting the incoming connection') stop = Interrupted() stop.direction = 'in' raise stop # Send KEEPALIVE for message in self._['in']['proto'].new_keepalive('OPENCONFIRM'): yield ACTION.immediate # Start keeping keepalive timer self.timer = Timer(self._log('in'), proto.negotiated.holdtime, 4, 0) # Read KEEPALIVE for message in proto.read_keepalive('ESTABLISHED'): self.timer.tick(message) yield ACTION.later self._['in']['state'] = STATE.established # let the caller know that we were sucesfull yield ACTION.immediate def _connect(self): # try to establish the outgoing connection proto = Protocol(self) generator = proto.connect() connected = False try: while not connected: connected = generator.next() # we want to come back as soon as possible yield ACTION.immediate except StopIteration: # Connection failed if not connected: proto.close('connection to peer failed') # A connection arrived before we could establish ! if not connected or self._['in']['proto']: stop = Interrupted() stop.direction = 'out' raise stop self._['out']['state'] = STATE.connect self._['out']['proto'] = proto # send OPEN # Only yield if we have not the open, otherwise the reactor can run the other connection # which would be bad as we need to set the state without going to the other peer for message in proto.new_open(self._restarted): if ord(message.TYPE) == Message.Type.NOP: yield ACTION.immediate proto.negotiated.sent(message) self._['out']['state'] = STATE.opensent # Read OPEN wait = environment.settings().bgp.openwait opentimer = Timer( self._log('out'), wait, 1, 1, 'waited for open too long, we do not like stuck in active') for message in self._['out']['proto'].read_open( self.neighbor.peer_address.ip): opentimer.tick(message) # XXX: FIXME: change the whole code to use the ord and not the chr version # Only yield if we have not the open, otherwise the reactor can run the other connection # which would be bad as we need to do the collission check if ord(message.TYPE) == Message.Type.NOP: yield ACTION.later self._['out']['state'] = STATE.openconfirm proto.negotiated.received(message) proto.validate_open() if self._['in']['state'] == STATE.openconfirm: self.logger.network( 'outgoing connection finds the incoming connection is in openconfirm' ) local_id = self.neighbor.router_id.packed remote_id = proto.negotiated.received_open.router_id.packed if local_id < remote_id: self.logger.network('aborting the outgoing connection') stop = Interrupted() stop.direction = 'out' raise stop else: self.logger.network('closing the incoming connection') self._reset('in', 'collision local id < remote id') yield ACTION.immediate # Send KEEPALIVE for message in proto.new_keepalive('OPENCONFIRM'): yield ACTION.immediate # Start keeping keepalive timer self.timer = Timer(self._log('out'), self._['out']['proto'].negotiated.holdtime, 4, 0) # Read KEEPALIVE for message in self._['out']['proto'].read_keepalive('ESTABLISHED'): self.timer.tick(message) yield ACTION.later self._['out']['state'] = STATE.established # let the caller know that we were sucesfull yield ACTION.immediate def _main(self, direction): "yield True if we want to come back to it asap, None if nothing urgent, and False if stopped" if self._teardown: raise Notify(6, 3) proto = self._[direction]['proto'] # Announce to the process BGP is up self.logger.network('Connected to peer %s (%s)' % (self.neighbor.name(), direction)) if self.neighbor.api.neighbor_changes: try: self.reactor.processes.up(self.neighbor.peer_address) except ProcessError: # Can not find any better error code than 6,0 ! # XXX: We can not restart the program so this will come back again and again - FIX # XXX: In the main loop we do exit on this kind of error raise Notify(6, 0, 'ExaBGP Internal error, sorry.') send_eor = True new_routes = None self._resend_routes = True # Every last asm message should be re-announced on restart for family in self.neighbor.asm: if family in self.neighbor.families(): self.neighbor.messages.appendleft(self.neighbor.asm[family]) counter = Counter(self.logger, self._log(direction)) need_keepalive = False keepalive = None operational = None while not self._teardown: for message in proto.read_message(): # Update timer self.timer.tick(message) # Give information on the number of routes seen counter.display() # Received update if message.TYPE == Update.TYPE: counter.increment(len(message.nlris)) for nlri in message.nlris: self.neighbor.rib.incoming.insert_received( Change(nlri, message.attributes)) self.logger.routes(LazyFormat(self.me(''), str, nlri)) # SEND KEEPALIVES need_keepalive |= self.timer.keepalive() if need_keepalive and not keepalive: keepalive = proto.new_keepalive() need_keepalive = False if keepalive: try: keepalive.next() except StopIteration: keepalive = None # SEND OPERATIONAL if self.neighbor.operational: if not operational: new_operational = self.neighbor.messages.popleft( ) if self.neighbor.messages else None if new_operational: operational = proto.new_operational( new_operational) if operational: try: operational.next() except StopIteration: operational = None # Take the routes already sent to that peer and resend them if self._resend_routes: self._resend_routes = False self.neighbor.rib.outgoing.resend_known() self._have_routes = True # Need to send update if self._have_routes and not new_routes: self._have_routes = False # XXX: in proto really. hum to think about ? new_routes = proto.new_update() if new_routes: try: count = 20 while count: # This can raise a NetworkError new_routes.next() count -= 1 except StopIteration: new_routes = None elif send_eor: send_eor = False for eor in proto.new_eors(): yield ACTION.immediate self.logger.message(self.me('>> EOR(s)')) # Go to other Peers yield ACTION.immediate if new_routes or message.TYPE != NOP.TYPE else ACTION.later # read_message will loop until new message arrives with NOP if self._teardown: break # If graceful restart, silent shutdown if self.neighbor.graceful_restart and proto.negotiated.sent_open.capabilities.announced( CapabilityID.GRACEFUL_RESTART): self.logger.network('Closing the session without notification', 'error') proto.close( 'graceful restarted negotiated, closing without sending any notification' ) raise NetworkError('closing') # notify our peer of the shutdown raise Notify(6, self._teardown) def _run(self, direction): "yield True if we want the reactor to give us back the hand with the same peer loop, None if we do not have any more work to do" try: for action in self._[direction]['code'](): yield action for action in self._main(direction): yield action # CONNECTION FAILURE except NetworkError, e: self._reset(direction, 'closing connection') # we tried to connect once, it failed, we stop if self.once: self.logger.network( 'only one attempt to connect is allowed, stoping the peer') self.stop() return # NOTIFY THE PEER OF AN ERROR except Notify, n: for direction in ['in', 'out']: if self._[direction]['proto']: try: self._[direction]['proto'].new_notification(n) except (NetworkError, ProcessError): self.logger.network( self._output(direction, 'NOTIFICATION NOT SENT', 'error')) pass self._reset( direction, 'notification sent (%d,%d)' % (n.code, n.subcode), n) else: self._reset(direction) return
class Peer (object): def __init__ (self, neighbor, reactor): try: self.logger = Logger() # We only to try to connect via TCP once self.once = environment.settings().tcp.once self.bind = True if environment.settings().tcp.bind else False except RuntimeError: self.logger = FakeLogger() self.once = False self.bind = True now = time.time() self.reactor = reactor self.neighbor = neighbor # The next restart neighbor definition self._neighbor = None self.proto = None self.fsm = FSM(self,FSM.IDLE) self.stats = { 'fsm': self.fsm, 'creation': now, 'complete': now, } self.generator = None # The peer should restart after a stop self._restart = True # The peer was restarted (to know what kind of open to send for graceful restart) self._restarted = FORCE_GRACEFUL # We want to remove routes which are not in the configuration anymote afte a signal to reload self._reconfigure = True # We want to send all the known routes self._resend_routes = SEND.DONE # We have new routes for the peers self._have_routes = True # We have been asked to teardown the session with this code self._teardown = None self._delay = Delay() self.recv_timer = None def _reset (self, message='',error=''): self.fsm.change(FSM.IDLE) self.stats = { 'fsm': self.fsm, 'creation': self.stats['creation'], 'complete': self.stats['creation'], } if self.proto: self.proto.close(u"peer reset, message [{0}] error[{1}]".format(message, error)) self._delay.increase() self.proto = None if not self._restart or self.neighbor.generated: self.generator = False return self.generator = None self._teardown = None self.neighbor.rib.reset() # If we are restarting, and the neighbor definition is different, update the neighbor if self._neighbor: self.neighbor = self._neighbor self._neighbor = None def _stop (self, message): self.generator = False self.proto.close('stop, message [%s]' % message) self.proto = None # logging def me (self, message): return "peer %s ASN %-7s %s" % (self.neighbor.peer_address,self.neighbor.peer_as,message) # control def stop (self): self._teardown = 3 self._restart = False self._restarted = False self._delay.reset() self.fsm.change(FSM.IDLE) self.stats = { 'fsm': self.fsm, 'creation': self.stats['creation'], 'reset': time.time(), } self.neighbor.rib.uncache() def resend (self): self._resend_routes = SEND.NORMAL self._delay.reset() def schedule_rib_check (self, changes=None, update=None): if changes: self.neighbor.rib.outgoing.replace(changes) self._have_routes = self.neighbor.flush if update is None else update def reestablish (self, restart_neighbor=None): # we want to tear down the session and re-establish it self._teardown = 3 self._restart = True self._restarted = True self._resend_routes = SEND.NORMAL self._neighbor = restart_neighbor self._delay.reset() def reconfigure (self, restart_neighbor=None): # we want to update the route which were in the configuration file self._reconfigure = True self._neighbor = restart_neighbor self._resend_routes = SEND.NORMAL self._neighbor = restart_neighbor def teardown (self, code, restart=True): self._restart = restart self._teardown = code self._delay.reset() # sockets we must monitor def sockets (self): ios = [] if self.proto and self.proto.connection and self.proto.connection.io: ios.append(self.proto.connection.io) return ios def handle_connection (self, connection): # if the other side fails, we go back to idle if self.fsm == FSM.ESTABLISHED: self.logger.network('we already have a peer in state established for %s' % connection.name()) return connection.notification(6,7,b'could not accept the connection, already established') # 6.8 The convention is to compare the BGP Identifiers of the peers # involved in the collision and to retain only the connection initiated # by the BGP speaker with the higher-valued BGP Identifier. # FSM.IDLE , FSM.ACTIVE , FSM.CONNECT , FSM.OPENSENT , FSM.OPENCONFIRM , FSM.ESTABLISHED if self.fsm == FSM.OPENCONFIRM: # We cheat: we are not really reading the OPEN, we use the data we have instead # it does not matter as the open message will be the same anyway local_id = self.neighbor.router_id.pack() remote_id = self.proto.negotiated.received_open.router_id.pack() if remote_id < local_id: self.logger.network('closing incoming connection as we have an outgoing connection with higher router-id for %s' % connection.name()) return connection.notification(6,7,b'could not accept the connection, as another connection is already in open-confirm and will go through') # accept the connection if self.proto: self.proto.close('closing outgoing connection as we have another incoming on with higher router-id') self.proto = Protocol(self).accept(connection) self.generator = None # Let's make sure we do some work with this connection self._delay.reset() return None def established (self): return self.fsm == FSM.ESTABLISHED def negotiated_families(self): if self.proto: families = ["%s/%s" % (x[0], x[1]) for x in self.proto.negotiated.families] else: families = ["%s/%s" % (x[0], x[1]) for x in self.neighbor.families()] if len(families) > 1: return "[ %s ]" % " ".join(families) elif len(families) == 1: return families[0] return '' def _connect (self): proto = Protocol(self) generator = proto.connect() connected = False try: while not connected: if self._teardown: raise StopIteration() connected = six.next(generator) # we want to come back as soon as possible yield ACTION.LATER self.proto = proto except StopIteration: # Connection failed if not connected and self.proto: self.proto.close('connection to %s:%d failed' % (self.neighbor.peer_address,self.neighbor.connect)) # A connection arrived before we could establish ! if not connected or self.proto: yield ACTION.NOW raise Interrupted() def _send_open (self): message = Message.CODE.NOP for message in self.proto.new_open(): if ordinal(message.TYPE) == Message.CODE.NOP: yield ACTION.NOW yield message def _read_open (self): wait = environment.settings().bgp.openwait opentimer = ReceiveTimer(self.proto.connection.session,wait,1,1,'waited for open too long, we do not like stuck in active') # Only yield if we have not the open, otherwise the reactor can run the other connection # which would be bad as we need to do the collission check without going to the other peer for message in self.proto.read_open(self.neighbor.peer_address.top()): opentimer.check_ka(message) # XXX: FIXME: change the whole code to use the ord and not the chr version # Only yield if we have not the open, otherwise the reactor can run the other connection # which would be bad as we need to do the collission check if ordinal(message.TYPE) == Message.CODE.NOP: yield ACTION.NOW yield message def _send_ka (self): for message in self.proto.new_keepalive('OPENCONFIRM'): yield ACTION.NOW def _read_ka (self): # Start keeping keepalive timer for message in self.proto.read_keepalive(): self.recv_timer.check_ka(message) yield ACTION.NOW def _establish (self): # try to establish the outgoing connection self.fsm.change(FSM.ACTIVE) if not self.proto: for action in self._connect(): if action in ACTION.ALL: yield action self.fsm.change(FSM.CONNECT) # normal sending of OPEN first ... if self.neighbor.local_as: for sent_open in self._send_open(): if sent_open in ACTION.ALL: yield sent_open self.proto.negotiated.sent(sent_open) self.fsm.change(FSM.OPENSENT) # read the peer's open for received_open in self._read_open(): if received_open in ACTION.ALL: yield received_open self.proto.negotiated.received(received_open) # if we mirror the ASN, we need to read first and send second if not self.neighbor.local_as: for sent_open in self._send_open(): if sent_open in ACTION.ALL: yield sent_open self.proto.negotiated.sent(sent_open) self.fsm.change(FSM.OPENSENT) self.proto.validate_open() self.fsm.change(FSM.OPENCONFIRM) self.recv_timer = ReceiveTimer(self.proto.connection.session,self.proto.negotiated.holdtime,4,0) for action in self._send_ka(): yield action for action in self._read_ka(): yield action self.fsm.change(FSM.ESTABLISHED) self.stats['complete'] = time.time() # let the caller know that we were sucesfull yield ACTION.NOW def _main (self): """yield True if we want to come back to it asap, None if nothing urgent, and False if stopped""" if self._teardown: raise Notify(6,3) include_withdraw = False # Announce to the process BGP is up self.logger.network('Connected to peer %s' % self.neighbor.name()) self.stats['up'] = self.stats.get('up',0) + 1 if self.neighbor.api['neighbor-changes']: try: self.reactor.processes.up(self.neighbor) except ProcessError: # Can not find any better error code than 6,0 ! # XXX: We can not restart the program so this will come back again and again - FIX # XXX: In the main loop we do exit on this kind of error raise Notify(6,0,'ExaBGP Internal error, sorry.') send_eor = not self.neighbor.manual_eor new_routes = None self._resend_routes = SEND.NORMAL send_families = [] # Every last asm message should be re-announced on restart for family in self.neighbor.asm: if family in self.neighbor.families(): self.neighbor.messages.appendleft(self.neighbor.asm[family]) operational = None refresh = None command_eor = None number = 0 refresh_enhanced = True if self.proto.negotiated.refresh == REFRESH.ENHANCED else False send_ka = KA(self.proto.connection.session,self.proto) while not self._teardown: for message in self.proto.read_message(): self.recv_timer.check_ka(message) if send_ka() is not False: # we need and will send a keepalive while send_ka() is None: yield ACTION.NOW # Received update if message.TYPE == Update.TYPE: number += 1 self.logger.routes('%s << UPDATE #%d' % (self.proto.connection.session(),number)) for nlri in message.nlris: self.neighbor.rib.incoming.update_cache(Change(nlri,message.attributes)) self.logger.routes(LazyFormat('<< UPDATE #%d nlri ' % number,nlri,str),source=self.proto.connection.session()) elif message.TYPE == RouteRefresh.TYPE: if message.reserved == RouteRefresh.request: self._resend_routes = SEND.REFRESH send_families.append((message.afi,message.safi)) # SEND OPERATIONAL if self.neighbor.operational: if not operational: new_operational = self.neighbor.messages.popleft() if self.neighbor.messages else None if new_operational: operational = self.proto.new_operational(new_operational,self.proto.negotiated) if operational: try: six.next(operational) except StopIteration: operational = None # make sure that if some operational message are received via the API # that we do not eat memory for nothing elif self.neighbor.messages: self.neighbor.messages.popleft() # SEND REFRESH if self.neighbor.route_refresh: if not refresh: new_refresh = self.neighbor.refresh.popleft() if self.neighbor.refresh else None if new_refresh: refresh = self.proto.new_refresh(new_refresh) if refresh: try: six.next(refresh) except StopIteration: refresh = None # Take the routes already sent to that peer and resend them if self._reconfigure: self._reconfigure = False # we are here following a configuration change if self._neighbor: # see what changed in the configuration self.neighbor.rib.outgoing.replace(self._neighbor.backup_changes,self._neighbor.changes) # do not keep the previous routes in memory as they are not useful anymore self._neighbor.backup_changes = [] self._have_routes = True # Take the routes already sent to that peer and resend them if self._resend_routes != SEND.DONE: enhanced = True if refresh_enhanced and self._resend_routes == SEND.REFRESH else False self._resend_routes = SEND.DONE self.neighbor.rib.outgoing.resend(send_families,enhanced) self._have_routes = True send_families = [] # Need to send update if self._have_routes and not new_routes: self._have_routes = False # XXX: in proto really. hum to think about ? new_routes = self.proto.new_update(include_withdraw) if new_routes: try: count = 20 while count: # This can raise a NetworkError six.next(new_routes) count -= 1 except StopIteration: new_routes = None include_withdraw = True elif send_eor: send_eor = False for _ in self.proto.new_eors(): yield ACTION.NOW self.logger.message('>> EOR(s)') # SEND MANUAL KEEPALIVE (only if we have no more routes to send) elif not command_eor and self.neighbor.eor: new_eor = self.neighbor.eor.popleft() command_eor = self.proto.new_eors(new_eor.afi,new_eor.safi) if command_eor: try: six.next(command_eor) except StopIteration: command_eor = None if new_routes or message.TYPE != NOP.TYPE: yield ACTION.NOW elif self.neighbor.messages or operational: yield ACTION.NOW elif self.neighbor.eor or command_eor: yield ACTION.NOW else: yield ACTION.LATER # read_message will loop until new message arrives with NOP if self._teardown: break # If graceful restart, silent shutdown if self.neighbor.graceful_restart and self.proto.negotiated.sent_open.capabilities.announced(Capability.CODE.GRACEFUL_RESTART): self.logger.network('Closing the session without notification','error') self.proto.close('graceful restarted negotiated, closing without sending any notification') raise NetworkError('closing') # notify our peer of the shutdown raise Notify(6,self._teardown) def _run (self): """yield True if we want the reactor to give us back the hand with the same peer loop, None if we do not have any more work to do""" try: for action in self._establish(): yield action for action in self._main(): yield action # CONNECTION FAILURE except NetworkError as network: # we tried to connect once, it failed and it was not a manual request, we stop if self.once and not self._teardown: self.logger.network('only one attempt to connect is allowed, stopping the peer') self.stop() self._reset('closing connection',network) return # NOTIFY THE PEER OF AN ERROR except Notify as notify: if self.proto: try: generator = self.proto.new_notification(notify) try: while True: six.next(generator) yield ACTION.NOW except StopIteration: pass except (NetworkError,ProcessError): self.logger.network('NOTIFICATION NOT SENT','error') self._reset('notification sent (%d,%d)' % (notify.code,notify.subcode),notify) else: self._reset() return # THE PEER NOTIFIED US OF AN ERROR except Notification as notification: # we tried to connect once, it failed and it was not a manual request, we stop if self.once and not self._teardown: self.logger.network('only one attempt to connect is allowed, stopping the peer') self.stop() self._reset( 'notification received (%d,%d)' % ( notification.code, notification.subcode), notification ) return # RECEIVED a Message TYPE we did not expect except Message as message: self._reset('unexpected message received',message) return # PROBLEM WRITING TO OUR FORKED PROCESSES except ProcessError as process: self._reset('process problem',process) return # .... except Interrupted as interruption: self._reset('connection received before we could fully establish one') return # UNHANDLED PROBLEMS except Exception as exc: # Those messages can not be filtered in purpose self.logger.raw('\n'.join([ NO_PANIC, '', '', str(type(exc)), str(exc), trace(), FOOTER ])) self._reset() return # loop def run (self): if self.reactor.processes.broken(self.neighbor): # XXX: we should perhaps try to restart the process ?? self.logger.processes('ExaBGP lost the helper process for this peer - stopping','error') if self.reactor.processes.terminate_on_error: self.reactor.api_shutdown() else: self.stop() return True if self.generator: try: # This generator only stops when it raises # otherwise return one of the ACTION return six.next(self.generator) except StopIteration: # Trying to run a closed loop, no point continuing self.generator = None if self._restart: return ACTION.LATER return ACTION.CLOSE elif self.generator is None: if self.fsm in [FSM.OPENCONFIRM,FSM.ESTABLISHED]: self.logger.network('stopping, other connection is established','debug') self.generator = False return ACTION.LATER if self._delay.backoff(): return ACTION.LATER if self._restart: self.logger.network('initialising connection to %s' % self.neighbor.name(),'debug') self.generator = self._run() return ACTION.LATER # make sure we go through a clean loop return ACTION.CLOSE def cli_data (self): def tri (value): if value is None: return None return True if value else False peer = defaultdict(lambda: None) have_peer = self.proto is not None have_open = self.proto and self.proto.negotiated.received_open if have_peer: peer.update({ 'multi-session': self.proto.negotiated.multisession, 'operational': self.proto.negotiated.operational, }) if have_open: capa = self.proto.negotiated.received_open.capabilities peer.update({ 'router-id': self.proto.negotiated.received_open.router_id, 'hold-time': self.proto.negotiated.received_open.hold_time, 'asn4': self.proto.negotiated.asn4, 'route-refresh': capa.announced(Capability.CODE.ROUTE_REFRESH), 'multi-session': capa.announced(Capability.CODE.MULTISESSION) or capa.announced(Capability.CODE.MULTISESSION_CISCO), 'add-path': capa.announced(Capability.CODE.ADD_PATH), 'graceful-restart': capa.announced(Capability.CODE.GRACEFUL_RESTART), }) capabilities = { 'asn4': (tri(self.neighbor.asn4), tri(peer['asn4'])), 'route-refresh': (tri(self.neighbor.route_refresh),tri(peer['route-refresh'])), 'multi-session': (tri(self.neighbor.multisession), tri(peer['multi-session'])), 'operational': (tri(self.neighbor.operational), tri(peer['operational'])), 'add-path': (tri(self.neighbor.add_path),tri(peer['add-path'])), 'graceful-restart': (tri(self.neighbor.graceful_restart),tri(peer['graceful-restart'])), } families = {} for family in self.neighbor.families(): if have_open: common = True if family in self.proto.negotiated.families else False addpath = self.proto.negotiated.addpath.receive(*family) and self.proto.negotiated.addpath.receive(*family) else: common = False addpath = False families[family] = (True,common if have_open else None,addpath) messages = {} total_sent = 0 total_rcvd = 0 for message in ('open','notification','keepalive','update','refresh'): sent = self.stats.get('send-%s' % message,0) rcvd = self.stats.get('receive-%s' % message,0) total_sent += sent total_rcvd += rcvd messages[message] = (sent, rcvd) messages['total'] = (total_sent, total_rcvd) return { 'duration': int(time.time() - self.stats['complete']) if self.stats['complete'] else 0, 'local-address': str(self.neighbor.local_address), 'peer-address': str(self.neighbor.peer_address), 'local-as': int(self.neighbor.local_as), 'peer-as': int(self.neighbor.peer_as), 'local-id': str(self.neighbor.router_id), 'peer-id': None if peer['peer-id'] is None else str(peer['router-id']), 'local-hold': int(self.neighbor.hold_time), 'peer-hold': None if peer['hold-time'] is None else int(peer['hold-time']), 'state': self.fsm.name(), 'capabilities': capabilities, 'families': families, 'messages': messages, }
class Peer(object): def __init__(self, neighbor, reactor): try: self.logger = Logger() # We only to try to connect via TCP once self.once = environment.settings().tcp.once self.bind = True if environment.settings().tcp.bind else False except RuntimeError: self.logger = FakeLogger() self.once = False self.bind = True self.reactor = reactor self.neighbor = neighbor # The next restart neighbor definition self._neighbor = None # The peer should restart after a stop self._restart = True # The peer was restarted (to know what kind of open to send for graceful restart) self._restarted = FORCE_GRACEFUL # We want to remove routes which are not in the configuration anymote afte a signal to reload self._reconfigure = True # We want to send all the known routes self._resend_routes = SEND.DONE # We have new routes for the peers self._have_routes = True # We have been asked to teardown the session with this code self._teardown = None self._delay = Delay() self.recv_timer = None self._incoming = Direction('in', self._accept, FSM(FSM.IDLE), None, False, False) self._outgoing = Direction( 'out', self._connect, FSM(FSM.IDLE), None, None if not self.neighbor.passive else False, None if not self.neighbor.passive else False) self._incoming.opposite = self._outgoing self._outgoing.opposite = self._incoming def _reset(self, direction, message='', error=''): direction.fsm.change(FSM.IDLE) if not self._restart: direction.generator = False direction.proto = None return if direction.proto: direction.proto.close( u"{0} loop, peer reset, message [{1}] error[{2}]".format( direction.name, message, error)) direction.proto = None direction.generator = direction.enabled self._teardown = None if direction.name == 'out': self._delay.increase() self.neighbor.rib.reset() # If we are restarting, and the neighbor definition is different, update the neighbor if self._neighbor: self.neighbor = self._neighbor self._neighbor = None def _stop(self, direction, message): direction.generator = False direction.proto.close('%s loop, stop, message [%s]' % (direction.name, message)) direction.proto = None # logging def me(self, message): return "peer %s ASN %-7s %s" % (self.neighbor.peer_address, self.neighbor.peer_as, message) # control def stop(self): self._teardown = 3 self._restart = False self._restarted = False self._delay.reset() def resend(self): self._resend_routes = SEND.NORMAL self._delay.reset() def send_new(self, changes=None, update=None): if changes: self.neighbor.rib.outgoing.replace(changes) self._have_routes = self.neighbor.flush if update is None else update def reestablish(self, restart_neighbor=None): # we want to tear down the session and re-establish it self._teardown = 3 self._restart = True self._restarted = True self._resend_routes = SEND.NORMAL self._neighbor = restart_neighbor self._delay.reset() def reconfigure(self, restart_neighbor=None): # we want to update the route which were in the configuration file self._reconfigure = True self._neighbor = restart_neighbor self._resend_routes = SEND.NORMAL self._neighbor = restart_neighbor def teardown(self, code, restart=True): self._restart = restart self._teardown = code self._delay.reset() # sockets we must monitor def sockets(self): ios = [] for proto in (self._incoming.proto, self._outgoing.proto): if proto and proto.connection and proto.connection.io: ios.append(proto.connection.io) return ios def incoming(self, connection): # if the other side fails, we go back to idle if self._incoming.proto not in (True, False, None): self.logger.network('we already have a peer at this address') return False # self._incoming.fsm.change(FSM.ACTIVE) self._incoming.proto = Protocol(self).accept(connection) # Let's make sure we do some work with this connection self._incoming.generator = None return True def established(self): return self._incoming.fsm == FSM.ESTABLISHED or self._outgoing.fsm == FSM.ESTABLISHED def detailed_link_status(self): state_tbl = { FSM.IDLE: "Idle", FSM.ACTIVE: "Active", FSM.CONNECT: "Connect", FSM.OPENSENT: "OpenSent", FSM.OPENCONFIRM: "OpenConfirm", FSM.ESTABLISHED: "Established" } return state_tbl[max(self._incoming.fsm.state, self._outgoing.fsm.state)] def negotiated_families(self): if self._outgoing.proto: families = [ "%s/%s" % (x[0], x[1]) for x in self._outgoing.proto.negotiated.families ] else: families = [ "%s/%s" % (x[0], x[1]) for x in self.neighbor.families() ] if len(families) > 1: return "[ %s ]" % " ".join(families) elif len(families) == 1: return families[0] return '' def _accept(self): self._incoming.fsm.change(FSM.CONNECT) # we can do this as Protocol is a mutable object proto = self._incoming.proto # send OPEN message = Message.CODE.NOP for message in proto.new_open(self._restarted): if ord(message.TYPE) == Message.CODE.NOP: yield ACTION.NOW proto.negotiated.sent(message) self._incoming.fsm.change(FSM.OPENSENT) # Read OPEN wait = environment.settings().bgp.openwait opentimer = ReceiveTimer( self.me, wait, 1, 1, 'waited for open too long, we do not like stuck in active') # Only yield if we have not the open, otherwise the reactor can run the other connection # which would be bad as we need to do the collission check without going to the other peer for message in proto.read_open(self.neighbor.peer_address.top()): opentimer.check_ka(message) if ord(message.TYPE) == Message.CODE.NOP: yield ACTION.LATER self._incoming.fsm.change(FSM.OPENCONFIRM) proto.negotiated.received(message) proto.validate_open() if self._outgoing.fsm == FSM.OPENCONFIRM: self.logger.network( 'incoming connection finds the outgoing connection is in openconfirm' ) local_id = self.neighbor.router_id.pack() remote_id = proto.negotiated.received_open.router_id.pack() if local_id < remote_id: self.logger.network('closing the outgoing connection') self._stop(self._outgoing, 'collision local id < remote id') yield ACTION.LATER else: self.logger.network('aborting the incoming connection') raise Interrupted(self._incoming) # Send KEEPALIVE for message in self._incoming.proto.new_keepalive('OPENCONFIRM'): yield ACTION.NOW # Start keeping keepalive timer self.recv_timer = ReceiveTimer(self.me, proto.negotiated.holdtime, 4, 0) # Read KEEPALIVE for message in proto.read_keepalive(): self.recv_timer.check_ka(message) yield ACTION.NOW self._incoming.fsm.change(FSM.ESTABLISHED) # let the caller know that we were sucesfull yield ACTION.NOW def _connect(self): # try to establish the outgoing connection self._outgoing.fsm.change(FSM.CONNECT) proto = Protocol(self) generator = proto.connect() connected = False try: while not connected: if self._teardown: raise StopIteration() connected = six.next(generator) # we want to come back as soon as possible yield ACTION.LATER except StopIteration: # Connection failed if not connected: proto.close('connection to %s:%d failed' % (self.neighbor.peer_address, proto.port)) # A connection arrived before we could establish ! if not connected or self._incoming.proto: yield ACTION.NOW raise Interrupted(self._outgoing) self._outgoing.proto = proto # send OPEN # Only yield if we have not the open, otherwise the reactor can run the other connection # which would be bad as we need to set the state without going to the other peer message = Message.CODE.NOP for message in proto.new_open(self._restarted): if ord(message.TYPE) == Message.CODE.NOP: yield ACTION.NOW proto.negotiated.sent(message) self._outgoing.fsm.change(FSM.OPENSENT) # Read OPEN wait = environment.settings().bgp.openwait opentimer = ReceiveTimer( self.me, wait, 1, 1, 'waited for open too long, we do not like stuck in active') for message in self._outgoing.proto.read_open( self.neighbor.peer_address.top()): opentimer.check_ka(message) # XXX: FIXME: change the whole code to use the ord and not the chr version # Only yield if we have not the open, otherwise the reactor can run the other connection # which would be bad as we need to do the collission check if ord(message.TYPE) == Message.CODE.NOP: yield ACTION.LATER self._outgoing.fsm.change(FSM.OPENCONFIRM) proto.negotiated.received(message) proto.validate_open() if self._incoming.fsm == FSM.OPENCONFIRM: self.logger.network( 'outgoing connection finds the incoming connection is in openconfirm' ) local_id = self.neighbor.router_id.pack() remote_id = proto.negotiated.received_open.router_id.pack() if local_id < remote_id: self.logger.network('aborting the outgoing connection') raise Interrupted(self._outgoing) else: self.logger.network('closing the incoming connection') self._stop(self._incoming, 'collision local id < remote id') yield ACTION.LATER # Send KEEPALIVE for message in proto.new_keepalive('OPENCONFIRM'): yield ACTION.NOW # Start keeping keepalive timer self.recv_timer = ReceiveTimer(self.me, proto.negotiated.holdtime, 4, 0) # Read KEEPALIVE for message in self._outgoing.proto.read_keepalive(): self.recv_timer.check_ka(message) yield ACTION.NOW self._outgoing.fsm.change(FSM.ESTABLISHED) # let the caller know that we were sucesfull yield ACTION.NOW def _main(self, direction): """yield True if we want to come back to it asap, None if nothing urgent, and False if stopped""" if self._teardown: raise Notify(6, 3) proto = direction.proto include_withdraw = False # Announce to the process BGP is up self.logger.network('Connected to peer %s (%s)' % (self.neighbor.name(), direction.name)) if self.neighbor.api['neighbor-changes']: try: self.reactor.processes.up(self.neighbor) except ProcessError: # Can not find any better error code than 6,0 ! # XXX: We can not restart the program so this will come back again and again - FIX # XXX: In the main loop we do exit on this kind of error raise Notify(6, 0, 'ExaBGP Internal error, sorry.') send_eor = not self.neighbor.manual_eor new_routes = None self._resend_routes = SEND.NORMAL send_families = [] # Every last asm message should be re-announced on restart for family in self.neighbor.asm: if family in self.neighbor.families(): self.neighbor.messages.appendleft(self.neighbor.asm[family]) operational = None refresh = None command_eor = None number = 0 refresh_enhanced = True if proto.negotiated.refresh == REFRESH.ENHANCED else False send_ka = KA(self.me, proto) while not self._teardown: for message in proto.read_message(): self.recv_timer.check_ka(message) if send_ka() is not False: # we need and will send a keepalive while send_ka() is None: yield ACTION.NOW # Received update if message.TYPE == Update.TYPE: number += 1 self.logger.routes( LazyFormat( self.me('<< UPDATE (%d)' % number), message.attributes, lambda _: "%s%s" % (' attributes' if _ else '', _))) for nlri in message.nlris: self.neighbor.rib.incoming.insert_received( Change(nlri, message.attributes)) self.logger.routes( LazyFormat( self.me('<< UPDATE (%d) nlri ' % number), nlri, str)) elif message.TYPE == RouteRefresh.TYPE: if message.reserved == RouteRefresh.request: self._resend_routes = SEND.REFRESH send_families.append((message.afi, message.safi)) # SEND OPERATIONAL if self.neighbor.operational: if not operational: new_operational = self.neighbor.messages.popleft( ) if self.neighbor.messages else None if new_operational: operational = proto.new_operational( new_operational, proto.negotiated) if operational: try: six.next(operational) except StopIteration: operational = None # make sure that if some operational message are received via the API # that we do not eat memory for nothing elif self.neighbor.messages: self.neighbor.messages.popleft() # SEND REFRESH if self.neighbor.route_refresh: if not refresh: new_refresh = self.neighbor.refresh.popleft( ) if self.neighbor.refresh else None if new_refresh: refresh = proto.new_refresh(new_refresh) if refresh: try: six.next(refresh) except StopIteration: refresh = None # Take the routes already sent to that peer and resend them if self._reconfigure: self._reconfigure = False # we are here following a configuration change if self._neighbor: # see what changed in the configuration self.neighbor.rib.outgoing.replace( self._neighbor.backup_changes, self._neighbor.changes) # do not keep the previous routes in memory as they are not useful anymore self._neighbor.backup_changes = [] self._have_routes = True # Take the routes already sent to that peer and resend them if self._resend_routes != SEND.DONE: enhanced = True if refresh_enhanced and self._resend_routes == SEND.REFRESH else False self._resend_routes = SEND.DONE self.neighbor.rib.outgoing.resend(send_families, enhanced) self._have_routes = True send_families = [] # Need to send update if self._have_routes and not new_routes: self._have_routes = False # XXX: in proto really. hum to think about ? new_routes = proto.new_update(include_withdraw) if new_routes: try: count = 20 while count: # This can raise a NetworkError six.next(new_routes) count -= 1 except StopIteration: new_routes = None include_withdraw = True elif send_eor: send_eor = False for _ in proto.new_eors(): yield ACTION.NOW self.logger.message(self.me('>> EOR(s)')) # SEND MANUAL KEEPALIVE (only if we have no more routes to send) elif not command_eor and self.neighbor.eor: new_eor = self.neighbor.eor.popleft() command_eor = proto.new_eors(new_eor.afi, new_eor.safi) if command_eor: try: six.next(command_eor) except StopIteration: command_eor = None if new_routes or message.TYPE != NOP.TYPE: yield ACTION.NOW elif self.neighbor.messages or operational: yield ACTION.NOW elif self.neighbor.eor or command_eor: yield ACTION.NOW else: yield ACTION.LATER # read_message will loop until new message arrives with NOP if self._teardown: break # If graceful restart, silent shutdown if self.neighbor.graceful_restart and proto.negotiated.sent_open.capabilities.announced( Capability.CODE.GRACEFUL_RESTART): self.logger.network('Closing the session without notification', 'error') proto.close( 'graceful restarted negotiated, closing without sending any notification' ) raise NetworkError('closing') # notify our peer of the shutdown raise Notify(6, self._teardown) def _run(self, direction): """yield True if we want the reactor to give us back the hand with the same peer loop, None if we do not have any more work to do""" try: for action in direction.code(): yield action for action in self._main(direction): yield action # CONNECTION FAILURE except NetworkError as network: # we tried to connect once, it failed and it was not a manual request, we stop if self.once and not self._teardown: self.logger.network( 'only one attempt to connect is allowed, stopping the peer' ) self.stop() self._reset(direction, 'closing connection', network) return # NOTIFY THE PEER OF AN ERROR except Notify as notify: if direction.proto: try: generator = direction.proto.new_notification(notify) try: maximum = 20 while maximum: six.next(generator) maximum -= 1 yield ACTION.NOW if maximum > 10 else ACTION.LATER except StopIteration: pass except (NetworkError, ProcessError): self.logger.network(self.me('NOTIFICATION NOT SENT'), 'error') self._reset( direction, 'notification sent (%d,%d)' % (notify.code, notify.subcode), notify) else: self._reset(direction) return # THE PEER NOTIFIED US OF AN ERROR except Notification as notification: # we tried to connect once, it failed and it was not a manual request, we stop if self.once and not self._teardown: self.logger.network( 'only one attempt to connect is allowed, stopping the peer' ) self.stop() self._reset(direction,'notification received (%d,%d)' \ % (notification.code, notification.subcode), notification) return # RECEIVED a Message TYPE we did not expect except Message as message: self._reset(direction, 'unexpected message received', message) return # PROBLEM WRITING TO OUR FORKED PROCESSES except ProcessError as process: self._reset(direction, 'process problem', process) return # .... except Interrupted as interruption: self._reset(interruption.direction) return # UNHANDLED PROBLEMS except Exception as exc: # Those messages can not be filtered in purpose self.logger.raw('\n'.join([ no_panic, self.me(''), '', str(type(exc)), str(exc), trace(), footer ])) self._reset(direction) return # loop def run(self): if self.reactor.processes.broken(self.neighbor): # XXX: we should perhaps try to restart the process ?? self.logger.processes( 'ExaBGP lost the helper process for this peer - stopping', 'error') self.stop() return True back = ACTION.LATER if self._restart else ACTION.CLOSE for direction in (self._incoming, self._outgoing): if direction.generator: try: # This generator only stops when it raises r = six.next(direction.generator) # if r is ACTION.NOW: status = 'immediately' # elif r is ACTION.LATER: status = 'next second' # elif r is ACTION.CLOSE: status = 'stop' # else: status = 'buggy' # self.logger.network('%s loop %11s, state is %s' % (direction.name,status,direction.fsm),'debug') if r == ACTION.NOW: back = ACTION.NOW elif r == ACTION.LATER: back = ACTION.LATER if back != ACTION.NOW else ACTION.NOW except StopIteration: # Trying to run a closed loop, no point continuing direction.generator = direction.enabled elif direction.generator is None: if direction.opposite.fsm in [ FSM.OPENCONFIRM, FSM.ESTABLISHED ]: self.logger.network( '%s loop, stopping, other one is established' % direction.name, 'debug') direction.generator = False continue if direction.name == 'out' and self._delay.backoff(): self.logger.network( '%s loop, skipping, not time yet' % direction.name, 'debug') back = ACTION.LATER continue if self._restart: self.logger.network( '%s loop, intialising' % direction.name, 'debug') direction.generator = self._run(direction) back = ACTION.LATER # make sure we go through a clean loop return back