def maybe_restart_mirroring_script() -> None: if os.stat(os.path.join( options.stamp_path, "stamps", "restart_stamp")).st_mtime > start_time or ( (options.user == "tabbott" or options.user == "tabbott/extra") and os.stat( os.path.join(options.stamp_path, "stamps", "tabbott_stamp")).st_mtime > start_time): logger.warning("") logger.warning( "zephyr mirroring script has been updated; restarting...") maybe_kill_child() try: zephyr._z.cancelSubs() except OSError: # We don't care whether we failed to cancel subs properly, but we should log it logger.exception("") backoff = RandomExponentialBackoff(maximum_retries=3, ) while backoff.keep_going(): try: os.execvp(os.path.abspath(__file__), sys.argv) # No need for backoff.succeed, since this can't be reached except Exception: logger.exception( "Error restarting mirroring script; trying again... Traceback:" ) backoff.fail() raise Exception("Failed to reload too many times, aborting!")
def zulip_to_zephyr(options: int) -> NoReturn: # Sync messages from zulip to zephyr logger.info("Starting syncing messages.") backoff = RandomExponentialBackoff(timeout_success_equivalent=120) while True: try: zulip_client.call_on_each_message(maybe_forward_to_zephyr) except Exception: logger.exception("Error syncing messages:") backoff.fail()
def process_loop(log: Optional[IO[Any]]) -> NoReturn: restart_check_count = 0 last_check_time = time.time() recieve_backoff = RandomExponentialBackoff() while True: select.select([zephyr._z.getFD()], [], [], 15) try: process_backoff = RandomExponentialBackoff() # Fetch notices from the queue until its empty while True: notice = zephyr.receive(block=False) recieve_backoff.succeed() if notice is None: break try: process_notice(notice, log) process_backoff.succeed() except Exception: logger.exception("Error relaying zephyr:") process_backoff.fail() except Exception: logger.exception("Error checking for new zephyrs:") recieve_backoff.fail() continue if time.time() - last_check_time > 15: last_check_time = time.time() try: maybe_restart_mirroring_script() if restart_check_count > 0: logger.info( "Stopped getting errors checking whether restart is required." ) restart_check_count = 0 except Exception: if restart_check_count < 5: logger.exception( "Error checking whether restart is required:") restart_check_count += 1 if options.forward_class_messages: try: update_subscriptions() except Exception: logger.exception( "Error updating subscriptions from Zulip:")
"""We actually want to exit, so run os._exit (so as not to be caught and restarted)""" os._exit(1) signal.signal(signal.SIGINT, die) args = [os.path.join(os.path.dirname(sys.argv[0]), "jabber_mirror_backend.py")] args.extend(sys.argv[1:]) backoff = RandomExponentialBackoff(timeout_success_equivalent=300) while backoff.keep_going(): print("Starting Jabber mirroring bot") try: ret = subprocess.call(args) except Exception: traceback.print_exc() else: if ret == 2: # Don't try again on initial configuration errors sys.exit(ret) backoff.fail() print("") print("") print( "ERROR: The Jabber mirroring bot is unable to continue mirroring Jabber.") print("Please contact [email protected] if you need assistance.") print("") sys.exit(1)
print("Starting parallel zephyr class mirroring bot") jobs = list("0123456789abcdef") def run_job(shard): # type: (str) -> int subprocess.call(args + ["--shard=%s" % (shard,)]) return 0 for (status, job) in run_parallel(run_job, jobs, threads=16): print("A mirroring shard died!") pass sys.exit(0) backoff = RandomExponentialBackoff(timeout_success_equivalent=300) while backoff.keep_going(): print("Starting zephyr mirroring bot") try: subprocess.call(args) except Exception: traceback.print_exc() backoff.fail() error_message = """ ERROR: The Zephyr mirroring bot is unable to continue mirroring Zephyrs. This is often caused by failing to maintain unexpired Kerberos tickets or AFS tokens. See https://zulipchat.com/zephyr for documentation on how to maintain unexpired Kerberos tickets and AFS tokens. """ print(error_message) sys.exit(1)