def test_store_cron_job_message_to_disk(): tmp_dir = mkdtemp() ser = path.join(tmp_dir, 'cluster_jobs.json') loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) storage = Storage(path_prefix=tmp_dir) processor = Processor(12345, storage, cron=CronTab(tab="""* * * * * command""")) message = CronItem(command="echo 'hello world'") message.append_log("test log message") for packet in UdpSerializer.dump(message): processor.queue.put_nowait(packet) loop.run_until_complete(asyncio.gather(processor.process())) loop.run_until_complete(asyncio.gather(storage.save())) assert processor.queue.qsize() == 0 assert len(storage.cluster_jobs) == 1 assert message == storage.cluster_jobs[0] assert exists(ser) loop.close() shutil.rmtree(tmp_dir)
def test_store_retrieve_sorts_correctly(): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) storage = Storage() processor = Processor(12345, storage, cron=CronTab(tab="""* * * * * command""")) ip = '127.0.0.1' messages = [] for i in range(10): messages.append(Status(ip, 10)) for message in messages: packets = UdpSerializer.dump(message) for packet in packets: processor.queue.put_nowait(packet) while not processor.queue.empty(): loop.run_until_complete(asyncio.gather(processor.process())) assert messages[len(messages) - 1].time == storage.node_state(ip).time loop.close()
def test_rebalancing(): n1 = 'node1' n2 = 'node2' storage = Storage() storage.cluster_status = [Status(n1, 0), Status(n2, 0)] cj1 = CronItem(command="echo 'hello world 1'") cj2 = CronItem(command="echo 'hello world 2'") storage.cluster_jobs.append(cj1) storage.cluster_jobs.append(cj2) scheduler = Scheduler(storage, 60) assert not scheduler.check_cluster_state() scheduler.re_balance() assert scheduler.check_cluster_state()
def test_manual_run_is_executed_exactly_once(): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) command = "echo 'hello world'" cron_job = CronItem(command=command) cron_job.assigned_to = get_ip() storage = Storage() tab = CronTab(tab="""* * * * * command""") processor = Processor(12345, storage, cron=tab) for packet in UdpSerializer.dump(cron_job): processor.queue.put_nowait(packet) for packet in UdpSerializer.dump(Run(cron_job)): processor.queue.put_nowait(packet) loop.run_until_complete(processor.process()) assert 1 == len(storage.cluster_jobs) assert command == storage.cluster_jobs[0].command assert 1 == len(storage.cluster_jobs[0].log) assert 'exit code: 0' in storage.cluster_jobs[0].log[ 0] and 'hello world' in storage.cluster_jobs[0].log[0] assert processor.queue.empty() loop.close()
def test_add_same_job_twice_adds_cron_once(): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) command = "echo 'hello world'" cron_job = CronItem(command=command) cron_job.assigned_to = get_ip() storage = Storage() tab = CronTab(tab="""* * * * * command""") processor = Processor(12345, storage, cron=tab) for packet in UdpSerializer.dump(cron_job): processor.queue.put_nowait(packet) for packet in UdpSerializer.dump(cron_job): processor.queue.put_nowait(packet) loop.run_until_complete(processor.process()) assert 1 == len(storage.cluster_jobs) assert command == storage.cluster_jobs[0].command assert None is not next(tab.find_command(command), None) assert 1 == len(list(tab.find_command(command))) loop.close()
def test_save_load(): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) tmp_dir = mkdtemp() storage = Storage(path_prefix=tmp_dir) cron = crontab.CronTab() item = CronItem(command="echo 'hello world'", cron=cron) item.set_all("2 1 * * *") item.append_log("test log message") storage.cluster_jobs.append(item) assert 1 == len(storage.cluster_jobs) loop.run_until_complete(storage.save()) storage = Storage(path_prefix=tmp_dir) assert 1 == len(storage.cluster_jobs) shutil.rmtree(tmp_dir)
def test_store_status_message(): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) storage = Storage() processor = Processor(12345, storage, cron=CronTab(tab="""* * * * * command""")) ip = '127.0.0.1' message = Status(ip, 10) packets = UdpSerializer.dump(message) for packet in packets: processor.queue.put_nowait(packet) loop.run_until_complete(asyncio.gather(*[processor.process()])) assert processor.queue.qsize() == 0 assert message == storage.node_state(ip) loop.close()
def test_active_nodes(): ip = 'test' storage = Storage() storage.cluster_status = [Status(ip, 0)] scheduler = Scheduler(storage, 60) assert len(list(scheduler.active_nodes())) == 1
def main(): """ entry point """ parser = argparse.ArgumentParser( description='Distributed Cronlike Scheduler') parser.add_argument('-l', '--log-file', default=None, help='path to store logfile') parser.add_argument('-p', '--storage-path', default=None, help='directory where to store cache') parser.add_argument('-u', '--udp-communication-port', type=int, default=12345, help='communication port (default: 12345)') parser.add_argument('-i', '--broadcast-interval', type=int, default=5, help='interval for broadcasting data over UDP') parser.add_argument( '-c', '--cron', default=None, help= 'crontab to use (default: /etc/crontab, use `memory` to not save to file' ) parser.add_argument('-d', '--cron-user', default=None, help='user for storing cron entries') parser.add_argument('-w', '--web-port', type=int, default=8080, help='web hosting port (default: 8080)') parser.add_argument( '-n', '--ntp-server', default='pool.ntp.org', help='NTP server to detect clock skew (default: pool.ntp.org)') parser.add_argument( '-s', '--node-staleness', type=int, default=180, help= 'Time in seconds of non-communication for a node to be marked as stale (defailt: 180s)' ) parser.add_argument( '-x', '--hash-key', default='abracadabra', help="String to use for verifying UDP traffic (to disable use '')") parser.add_argument('-v', '--verbose', action='store_true', default=False, help='verbose logging') args = parser.parse_args() if get_ntp_offset(args.ntp_server) > 60: exit("your clock is not in sync (check system NTP settings)") root_logger = logging.getLogger() if args.log_file: file_handler = logging.FileHandler(args.log_file) file_handler.setFormatter(logging.Formatter(log_format)) root_logger.addHandler(file_handler) if args.verbose: root_logger.setLevel(logging.DEBUG) else: root_logger.setLevel(logging.INFO) logging.getLogger('aiohttp').setLevel(logging.WARNING) pool = ThreadPoolExecutor(4) storage = Storage(args.storage_path) if args.cron: if args.cron == 'memory': processor = Processor(args.udp_communication_port, storage, cron=CronTab(tab="""* * * * * command""")) elif args.cron_user: processor = Processor(args.udp_communication_port, storage, cron=CronTab(tabfile=args.cron, user=args.cron_user), user=args.cron_user) else: processor = Processor(args.udp_communication_port, storage, cron=CronTab(tabfile=args.cron, user='******'), user='******') else: processor = Processor(args.udp_communication_port, storage, user='******') hash_key = None if args.hash_key != '': hash_key = args.hash_key with StatusProtocolServer(processor, args.udp_communication_port) as loop: running = True scheduler = Scheduler(storage, args.node_staleness) def timed_broadcast(): """ periodically broadcast system status and known jobs """ while running: broadcast( args.udp_communication_port, UdpSerializer.dump(Status(get_ip(), get_load()), hash_key)) for job in storage.cluster_jobs: if job.assigned_to == get_ip(): job.pid = check_process(job.command) for packet in UdpSerializer.dump(job, hash_key): client(args.udp_communication_port, packet) time.sleep(args.broadcast_interval) def timed_schedule(): """ periodically check if cluster needs re-balancing """ while running: time.sleep(23) if not scheduler.check_cluster_state(): logger.info("re-balancing cluster") jobs = storage.cluster_jobs.copy() for packet in UdpSerializer.dump( ReBalance(timestamp=datetime.now()), hash_key): client(args.udp_communication_port, packet) time.sleep(5) for job in jobs: for packet in UdpSerializer.dump(job, hash_key): client(args.udp_communication_port, packet) async def scheduled_broadcast(): await loop.run_in_executor(pool, timed_broadcast) async def scheduled_rebalance(): await loop.run_in_executor(pool, timed_schedule) async def save_schedule(): """ auto save every 100 seconds """ while running: await asyncio.sleep(100) await storage.save() logger.info("setting broadcast interval to {0} seconds".format( args.broadcast_interval)) loop.create_task(scheduled_broadcast()) loop.create_task(scheduled_rebalance()) if args.storage_path: loop.create_task(save_schedule()) logger.info( "starting web application server on http://{0}:{1}/".format( get_ip(), args.web_port)) if args.cron_user: s = Site(scheduler, storage, args.udp_communication_port, cron=processor.cron, user=args.cron_user, hash_key=hash_key) else: s = Site(scheduler, storage, args.udp_communication_port, cron=processor.cron, hash_key=hash_key) runner = AppRunner(s.app) loop.run_until_complete(runner.setup()) site_instance = TCPSite(runner, port=args.web_port) loop.run_until_complete(site_instance.start()) try: loop.run_forever() except: logger.info("interrupt received") logger.info("stopping web application") loop.run_until_complete(site_instance.stop()) running = False if args.storage_path: loop.create_task(storage.save()) logger.debug("waiting for background tasks to finish") pending_tasks = [ task for task in asyncio.Task.all_tasks() if not task.done() ] loop.run_until_complete(asyncio.gather(*pending_tasks)) logger.info("elvis has left the building")