def neighbor_rib_out_withdraw(self, peer_name): peer = self._peers.get(peer_name, None) if not peer: log.critical('could not find referenced peer', 'reactor') return peer.neighbor.rib.outgoing.withdraw( None, peer.neighbor['capability']['route-refresh'])
def daemonise(self): if not self.daemonize: return logging = getenv().log if logging.enable and logging.destination.lower() in ('stdout', 'stderr'): log.critical('ExaBGP can not fork when logs are going to %s' % log.destination.lower(), 'daemon') return def fork_exit(): try: pid = os.fork() if pid > 0: os._exit(0) except OSError as exc: log.critical('can not fork, errno %d : %s' % (exc.errno, exc.strerror), 'daemon') # do not detach if we are already supervised or run by init like process if self._is_socket(sys.__stdin__.fileno()) or os.getppid() == 1: return fork_exit() os.setsid() fork_exit() self.silence()
def fork_exit(): try: pid = os.fork() if pid > 0: os._exit(0) except OSError as exc: log.critical('can not fork, errno %d : %s' % (exc.errno, exc.strerror), 'daemon')
def writer(self, data): if not self.io: # XXX: FIXME: Make sure it does not hold the cleanup during the closing of the peering session yield True return while not self.writing(): yield False logfunc.debug(lazyformat('sending TCP payload', data), self.session()) # The first while is here to setup the try/catch block once as it is very expensive while True: try: while True: if self.defensive and random.randint(0, 2): raise socket.error(errno.EAGAIN, 'raising network error on purpose') # we can not use sendall as in case of network buffer filling # it does raise and does not let you know how much was sent number = self.io.send(data) if not number: self.close() log.warning( '%s %s lost TCP connection with peer' % (self.name(), self.peer), self.session()) raise LostConnection('lost the TCP connection') data = data[number:] if not data: yield True return yield False except socket.error as exc: if exc.args[0] in error.block: log.debug( '%s %s blocking io problem mid-way through writing a message %s, trying to complete' % (self.name(), self.peer, errstr(exc)), self.session(), ) yield False elif exc.errno == errno.EPIPE: # The TCP connection is gone. self.close() raise NetworkError('Broken TCP connection') elif exc.args[0] in error.fatal: self.close() log.critical( '%s %s problem sending message (%s)' % (self.name(), self.peer, errstr(exc)), self.session()) raise NetworkError( 'Problem while writing data to the network (%s)' % errstr(exc)) # what error could it be ! else: log.critical( '%s %s undefined error writing on socket' % (self.name(), self.peer), self.session()) yield False
def neighor_rib(self, peer_name, rib_name, advertised=False): peer = self._peers.get(peer_name, None) if not peer: log.critical('could not find referenced peer', 'reactor') return [] families = None if advertised: families = peer.proto.negotiated.families if peer.proto else [] rib = peer.neighbor.rib.outgoing if rib_name == 'out' else peer.neighbor.rib.incoming return list(rib.cached_changes(families))
def _reader(self, number): # The function must not be called if it does not return with no data with a smaller size as parameter if not self.io: self.close() raise NotConnected('Trying to read on a closed TCP connection') if number == 0: yield b'' return while not self.reading(): yield b'' data = b'' reported = '' while True: try: while True: if self.defensive and random.randint(0, 2): raise socket.error(errno.EAGAIN, 'raising network error on purpose') read = self.io.recv(number) if not read: self.close() log.warning('%s %s lost TCP session with peer' % (self.name(), self.peer), self.session()) raise LostConnection('the TCP connection was closed by the remote end') data += read number -= len(read) if not number: log.debug(LazyFormat('received TCP payload', data), self.session()) yield data return yield b'' except socket.timeout as exc: self.close() log.warning('%s %s peer is too slow' % (self.name(), self.peer), self.session()) raise TooSlowError('Timeout while reading data from the network (%s)' % errstr(exc)) except socket.error as exc: if exc.args[0] in error.block: message = '%s %s blocking io problem mid-way through reading a message %s, trying to complete' % ( self.name(), self.peer, errstr(exc), ) if message != reported: reported = message log.debug(message, self.session()) yield b'' elif exc.args[0] in error.fatal: self.close() raise LostConnection('issue reading on the socket: %s' % errstr(exc)) # what error could it be ! else: log.critical('%s %s undefined error reading on socket' % (self.name(), self.peer), self.session()) raise NetworkError('Problem while reading data from the network (%s)' % errstr(exc))
def shutdown(self): """Terminate all the current BGP connections""" log.critical('performing shutdown', 'reactor') if self.listener: self.listener.stop() self.listener = None for key in self._peers.keys(): self._peers[key].shutdown() self.asynchronous.clear() self.processes.terminate() self.daemon.removepid() self._stopping = True
def listen_on(self, local_addr, remote_addr, port, md5_password, md5_base64, ttl_in): try: if not remote_addr: remote_addr = IP.create( '0.0.0.0') if local_addr.ipv4() else IP.create('::') self._listen(local_addr, remote_addr, port, md5_password, md5_base64, ttl_in) log.debug( 'listening for BGP session(s) on %s:%d%s' % (local_addr, port, ' with MD5' if md5_password else ''), 'network', ) return True except NetworkError as exc: if os.geteuid() != 0 and port <= 1024: log.critical( 'can not bind to %s:%d, you may need to run ExaBGP as root' % (local_addr, port), 'network') else: log.critical( 'can not bind to %s:%d (%s)' % (local_addr, port, str(exc)), 'network') log.critical( 'unset exabgp.tcp.bind if you do not want listen for incoming connections', 'network') log.critical( 'and check that no other daemon is already binding to port %d' % port, 'network') return False
def _listen(self, local_ip, peer_ip, local_port, md5, md5_base64, ttl_in): self.serving = True for sock, (local, port, peer, md) in self._sockets.items(): if local_ip.top() != local: continue if local_port != port: continue MD5(sock, peer_ip.top(), 0, md5, md5_base64) if ttl_in: MIN_TTL(sock, peer_ip, ttl_in) return try: sock = self._new_socket(local_ip) # MD5 must match the peer side of the TCP, not the local one MD5(sock, peer_ip.top(), 0, md5, md5_base64) if ttl_in: MIN_TTL(sock, peer_ip, ttl_in) try: sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) if local_ip.ipv6(): sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1) except (socket.error, AttributeError): pass sock.setblocking(0) # s.settimeout(0.0) sock.bind((local_ip.top(), local_port)) sock.listen(self._backlog) self._sockets[sock] = (local_ip.top(), local_port, peer_ip.top(), md5) except socket.error as exc: if exc.args[0] == errno.EADDRINUSE: raise BindingError( 'could not listen on %s:%d, the port may already be in use by another application' % (local_ip, local_port)) elif exc.args[0] == errno.EADDRNOTAVAIL: raise BindingError( 'could not listen on %s:%d, this is an invalid address' % (local_ip, local_port)) raise NetworkError(str(exc)) except NetworkError as exc: log.critical(str(exc), 'network') raise exc
def incoming(self): if not self.serving: return False peer_connected = False for sock in self._sockets: if sock in self._accepted: continue try: io, _ = sock.accept() self._accepted[sock] = io peer_connected = True except socket.error as exc: if exc.errno in error.block: continue log.critical(str(exc), 'network') return peer_connected
def _connected(self): try: for sock, io in list(self._accepted.items()): del self._accepted[sock] if sock.family == socket.AF_INET: local_ip = io.getpeername()[0] # local_ip,local_port remote_ip = io.getsockname()[0] # remote_ip,remote_port elif sock.family == socket.AF_INET6: local_ip = io.getpeername()[ 0] # local_ip,local_port,local_flow,local_scope remote_ip = io.getsockname()[ 0] # remote_ip,remote_port,remote_flow,remote_scope else: raise AcceptError('unexpected address family (%d)' % sock.family) fam = self._family_AFI_map[sock.family] yield Incoming(fam, remote_ip, local_ip, io) except NetworkError as exc: log.critical(str(exc), 'network')
def sigusr1(self, signum, frame): log.critical('SIGUSR1 received', 'reactor') if self.received: log.critical('ignoring - still handling previous signal', 'reactor') return log.critical('scheduling reload of configuration', 'reactor') self.received = self.RELOAD self.number = signum
def sigalrm(self, signum, frame): log.critical('SIGALRM received', 'reactor') if self.received: log.critical('ignoring - still handling previous signal', 'reactor') return log.critical('scheduling restart', 'reactor') self.received = self.RESTART self.number = signum
def sighup(self, signum, frame): log.critical('SIGHUP received', 'reactor') if self.received: log.critical('ignoring - still handling previous signal', 'reactor') return log.critical('scheduling shutdown', 'reactor') self.received = self.SHUTDOWN self.number = signum
def cmdline(cmdarg): env = getenv() # Must be done before setting the logger as it modify its behaviour if cmdarg.verbose: env.log.all = True env.log.level = syslog.LOG_DEBUG log.init() if cmdarg.pdb: env.debug.pdb = True if cmdarg.verbose: env.log.parser = True for configuration in cmdarg.configuration: log.notice(f'loading {configuration}', 'configuration') location = getconf(configuration) if not location: log.critical(f'{configuration} is not an exabgp config file', 'configuration') sys.exit(1) config = Reactor([location]).configuration if not config.reload(): log.critical(f'{configuration} is not a valid config file', 'configuration') sys.exit(1) log.info(f'\u2713 loading', 'configuration') if cmdarg.neighbor: log.notice(f'checking neighbors', 'configuration') for name, neighbor in config.neighbors.items(): reparsed = neighbor.string() for line in reparsed.split('\n'): log.debug(line, configuration) log.info(f'\u2713 neighbor {name.split()[1]}', 'configuration') if cmdarg.route: log.notice(f'checking routes', 'configuration') if not check_generation(config.neighbors): log.critical(f'{configuration} has an invalid route', 'configuration') sys.exit(1) log.info(f'\u2713 routes', 'configuration')
def _start(self, process): if not self._restart.get(process, True): return try: if process in self._process: log.debug('process already running', 'process') return if process not in self._configuration: log.debug('can not start process, no configuration for it', 'process') return # Prevent some weird termcap data to be created at the start of the PIPE # \x1b[?1034h (no-eol) (esc) os.environ['TERM'] = 'dumb' configuration = self._configuration[process] run = configuration.get('run', '') if run: api = configuration.get('encoder', '') self._encoder[process] = Response.Text( text_version) if api == 'text' else Response.JSON( json_version) self._process[process] = subprocess.Popen( run, stdin=subprocess.PIPE, stdout=subprocess.PIPE, preexec_fn=preexec_helper # This flags exists for python 2.7.3 in the documentation but on on my MAC # creationflags=subprocess.CREATE_NEW_PROCESS_GROUP ) self._update_fds() fcntl.fcntl(self._process[process].stdout.fileno(), fcntl.F_SETFL, os.O_NONBLOCK) log.debug('forked process %s' % process, 'process') self._restart[process] = self._configuration[process][ 'respawn'] around_now = int(time.time()) & self.respawn_timemask if process in self._respawning: if around_now in self._respawning[process]: self._respawning[process][around_now] += 1 # we are respawning too fast if self._respawning[process][ around_now] > self.respawn_number: log.critical( 'Too many death for %s (%d) terminating program' % (process, self.respawn_number), 'process', ) raise ProcessError() else: # reset long time since last respawn self._respawning[process] = {around_now: 1} else: # record respawing self._respawning[process] = {around_now: 1} except (subprocess.CalledProcessError, OSError, ValueError) as exc: self._broken.append(process) log.debug('could not start process %s' % process, 'process') log.debug('reason: %s' % str(exc), 'process')
def cmdline(cmdarg): if not os.path.isfile(ENVFILE): comment = 'environment file missing\ngenerate it using "exabgp env --fi > %s"' % ENVFILE else: comment = '' env = getenv() # Must be done before setting the logger as it modify its behaviour if cmdarg.debug: env.log.all = True env.log.level = syslog.LOG_DEBUG log.init() if cmdarg.profile: env.profile.enable = True env.profile.file = cmdarg.profile if cmdarg.once: env.tcp.once = True if cmdarg.pdb: env.debug.pdb = True if cmdarg.test: env.log.parser = True if cmdarg.memory: env.debug.memory = True if env.cache.attributes: Attribute.caching = env.cache.attributes configurations = [] for configuration in cmdarg.configuration: location = getconf(configuration) if not location: log.critical(f'{configuration} is not an exabgp config file', 'configuration') sys.exit(1) configurations.append(configuration) delay = cmdarg.signal _delayed_signal(delay, signal.SIGUSR1) if env.debug.rotate or len(configurations) == 1: run(comment, configurations, cmdarg.validate) if not (env.log.destination in ('syslog', 'stdout', 'stderr') or env.log.destination.startswith('host:')): log.error('can not log to files when running multiple configuration (as we fork)', 'configuration') sys.exit(1) try: # run each configuration in its own process pids = [] for configuration in configurations: pid = os.fork() if pid == 0: run(comment, [configuration], cmdarg.validate, os.getpid()) else: pids.append(pid) # If we get a ^C / SIGTERM, ignore just continue waiting for our child process signal.signal(signal.SIGINT, signal.SIG_IGN) # wait for the forked processes for pid in pids: os.waitpid(pid, 0) except OSError as exc: log.critical('can not fork, errno %d : %s' % (exc.errno, exc.strerror), 'reactor') sys.exit(1)
def messages(self, negotiated, include_withdraw=True): # sort the nlris nlris = [] mp_nlris = {} for nlri in sorted(self.nlris): if nlri.family() not in negotiated.families: continue add_v4 = nlri.afi == AFI.ipv4 add_v4 = add_v4 and nlri.safi in [SAFI.unicast, SAFI.multicast] del_v4 = add_v4 and nlri.action == OUT.WITHDRAW if del_v4: nlris.append(nlri) continue add_v4 = add_v4 and nlri.action == OUT.ANNOUNCE add_v4 = add_v4 and nlri.nexthop.afi == AFI.ipv4 if add_v4: nlris.append(nlri) continue if nlri.nexthop.afi != AFI.undefined: mp_nlris.setdefault(nlri.family(), {}).setdefault(nlri.action, []).append(nlri) continue if nlri.safi in (SAFI.flow_ip, SAFI.flow_vpn): mp_nlris.setdefault(nlri.family(), {}).setdefault(nlri.action, []).append(nlri) continue raise ValueError("unexpected nlri definition (%s)" % nlri) if not nlris and not mp_nlris: return # If all we have is MP_UNREACH_NLRI, we do not need the default # attributes. See RFC4760 that states the following: # # An UPDATE message that contains the MP_UNREACH_NLRI is not required # to carry any other path attributes. # include_defaults = True if mp_nlris and not nlris: for family, actions in mp_nlris.items(): afi, safi = family if safi not in (SAFI.unicast, SAFI.multicast): break if set(actions.keys()) != {OUT.WITHDRAW}: break # no break else: include_defaults = False attr = self.attributes.pack(negotiated, include_defaults) # Withdraws/NLRIS (IPv4 unicast and multicast) msg_size = negotiated.msg_size - 19 - 2 - 2 - len(attr) # 2 bytes for each of the two prefix() header if msg_size < 0: # raise Notify(6,0,'attributes size is so large we can not even pack one NLRI') log.critical('attributes size is so large we can not even pack one NLRI', 'parser') return if msg_size == 0 and (nlris or mp_nlris): # raise Notify(6,0,'attributes size is so large we can not even pack one NLRI') log.critical('attributes size is so large we can not even pack one NLRI', 'parser') return withdraws = b'' announced = b'' for nlri in nlris: packed = nlri.pack(negotiated) if len(announced + withdraws + packed) <= msg_size: if nlri.action == OUT.ANNOUNCE: announced += packed elif include_withdraw: withdraws += packed continue if not withdraws and not announced: # raise Notify(6,0,'attributes size is so large we can not even pack one NLRI') log.critical('attributes size is so large we can not even pack one NLRI', 'parser') return if announced: yield self._message(Update.prefix(withdraws) + Update.prefix(attr) + announced) else: yield self._message(Update.prefix(withdraws) + Update.prefix(b'') + announced) if nlri.action == OUT.ANNOUNCE: announced = packed withdraws = b'' elif include_withdraw: withdraws = packed announced = b'' else: withdraws = b'' announced = b'' if announced or withdraws: if announced: yield self._message(Update.prefix(withdraws) + Update.prefix(attr) + announced) else: yield self._message(Update.prefix(withdraws) + Update.prefix(b'') + announced) for family in mp_nlris.keys(): afi, safi = family mp_reach = b'' mp_unreach = b'' mp_announce = MPRNLRI(afi, safi, mp_nlris[family].get(OUT.ANNOUNCE, [])) mp_withdraw = MPURNLRI(afi, safi, mp_nlris[family].get(OUT.WITHDRAW, [])) for mprnlri in mp_announce.packed_attributes(negotiated, msg_size - len(withdraws + announced)): if mp_reach: yield self._message(Update.prefix(withdraws) + Update.prefix(attr + mp_reach) + announced) announced = b'' withdraws = b'' mp_reach = mprnlri if include_withdraw: for mpurnlri in mp_withdraw.packed_attributes( negotiated, msg_size - len(withdraws + announced + mp_reach) ): if mp_unreach: yield self._message( Update.prefix(withdraws) + Update.prefix(attr + mp_unreach + mp_reach) + announced ) mp_reach = b'' announced = b'' withdraws = b'' mp_unreach = mpurnlri yield self._message( Update.prefix(withdraws) + Update.prefix(attr + mp_unreach + mp_reach) + announced ) # yield mpr/mpur per family withdraws = b'' announced = b''
def neighbor_rib_resend(self, peer_name): peer = self._peers.get(peer_name, None) if not peer: log.critical('could not find referenced peer', 'reactor') return peer.neighbor.rib.outgoing.resend(None, peer.neighbor.route_refresh)
def _termination(self, reason, exit_code): self.exit_code = exit_code self.signal.received = Signal.SHUTDOWN log.critical(reason, 'reactor')
def run(self): self.daemon.daemonise() # Make sure we create processes once we have closed file descriptor # unfortunately, this must be done before reading the configuration file # so we can not do it with dropped privileges self.processes = Processes() # we have to read the configuration possibly with root privileges # as we need the MD5 information when we bind, and root is needed # to bind to a port < 1024 # this is undesirable as : # - handling user generated data as root should be avoided # - we may not be able to reload the configuration once the privileges are dropped # but I can not see any way to avoid it for ip in self._ips: if not self.listener.listen_on(ip, None, self._port, None, False, None): return self.Exit.listening if not self.reload(): return self.Exit.configuration for neighbor in self.configuration.neighbors.values(): if neighbor['listen']: if not self.listener.listen_on( neighbor['md5-ip'], neighbor['peer-address'], neighbor['listen'], neighbor['md5-password'], neighbor['md5-base64'], neighbor['incoming-ttl'], ): return self.Exit.listening if not self.early_drop: self.processes.start(self.configuration.processes) if not self.daemon.drop_privileges(): log.critical( 'could not drop privileges to \'%s\' refusing to run as root' % self.daemon.user, 'reactor') log.critical( 'set the environmemnt value exabgp.daemon.user to change the unprivileged user', 'reactor') return self.Exit.privileges if self.early_drop: self.processes.start(self.configuration.processes) # This is required to make sure we can write in the log location as we now have dropped root privileges log.init(getenv()) if not self.daemon.savepid(): return self.Exit.pid wait = getenv().tcp.delay if wait: sleeptime = (wait * 60) - int(time.time()) % (wait * 60) log.debug('waiting for %d seconds before connecting' % sleeptime, 'reactor') time.sleep(float(sleeptime)) workers = {} peers = set() api_fds = [] ms_sleep = int(self._sleep_time * 1000) while True: try: if self.signal.received: signaled = self.signal.received # report that we received a signal for key in self._peers: if self._peers[key].neighbor.api['signal']: self._peers[key].reactor.processes.signal( self._peers[key].neighbor, self.signal.number) self.signal.rearm() # we always want to exit if signaled == Signal.SHUTDOWN: self.exit_code = self.Exit.normal self.shutdown() break # it does mot matter what we did if we are restarting # as the peers and network stack are replaced by new ones if signaled == Signal.RESTART: self.restart() continue # did we complete the run of updates caused by the last SIGUSR1/SIGUSR2 ? if self._pending_adjribout(): continue if signaled == Signal.RELOAD: self.reload() self.processes.start(self.configuration.processes, False) continue if signaled == Signal.FULL_RELOAD: self.reload() self.processes.start(self.configuration.processes, True) continue if self.listener.incoming(): # check all incoming connection self.asynchronous.schedule( str(uuid.uuid1()), 'checking for new connection(s)', self.listener.new_connections()) sleep = ms_sleep # do not attempt to listen on closed sockets even if the peer is still here for io in list(workers.keys()): if io == -1: self._poller.unregister(io) del workers[io] peers = self.active_peers() # give a turn to all the peers for key in list(peers): peer = self._peers[key] # limit the number of message handling per second if self._rate_limited(key, peer.neighbor['rate-limit']): peers.discard(key) continue # handle the peer action = peer.run() # .run() returns an ACTION enum: # * immediate if it wants to be called again # * later if it should be called again but has no work atm # * close if it is finished and is closing down, or restarting if action == ACTION.CLOSE: if key in self._peers: del self._peers[key] peers.discard(key) # we are loosing this peer, not point to schedule more process work elif action == ACTION.LATER: io = peer.socket() if io != -1: self._poller.register( io, select.POLLIN | select.POLLPRI | select.POLLHUP | select.POLLNVAL | select.POLLERR) workers[io] = key # no need to come back to it before a a full cycle peers.discard(key) elif action == ACTION.NOW: sleep = 0 if not peers: break # read at least on message per process if there is some and parse it for service, command in self.processes.received(): self.api.text(self, service, command) sleep = 0 self.asynchronous.run() if api_fds != self.processes.fds: for fd in api_fds: if fd == -1: continue if fd not in self.processes.fds: self._poller.unregister(fd) for fd in self.processes.fds: if fd == -1: continue if fd not in api_fds: self._poller.register( fd, select.POLLIN | select.POLLPRI | select.POLLHUP | select.POLLNVAL | select.POLLERR) api_fds = self.processes.fds for io in self._wait_for_io(sleep): if io not in api_fds: peers.add(workers[io]) if self._stopping and not self._peers.keys(): self._termination('exiting on peer termination', self.Exit.normal) except KeyboardInterrupt: self._termination('^C received', self.Exit.normal) except SystemExit: self._termination('exiting', self.Exit.normal) # socket.error is a subclass of IOError (so catch it first) except socket.error: self._termination('socket error received', self.Exit.socket) except IOError: self._termination( 'I/O Error received, most likely ^C during IO', self.Exit.io_error) except ProcessError: self._termination( 'Problem when sending message(s) to helper program, stopping', self.Exit.process) except select.error: self._termination('problem using select, stopping', self.Exit.select) return self.exit_code
def neighbor_rib_in_clear(self, peer_name): peer = self._peers.get(peer_name, None) if not peer: log.critical('could not find referenced peer', 'reactor') return peer.neighbor.rib.incoming.clear()
def handle_connection(self, peer_name, connection): peer = self._peers.get(peer_name, None) if not peer: log.critical('could not find referenced peer', 'reactor') return peer.handle_connection(connection)
def neighbor_ip(self, peer_name): peer = self._peers.get(peer_name, None) if not peer: log.critical('could not find referenced peer', 'reactor') return "" return str(peer.neighbor['peer-address'])
def neighbor_cli_data(self, peer_name): peer = self._peers.get(peer_name, None) if not peer: log.critical('could not find referenced peer', 'reactor') return "" return peer.cli_data()