def main(): Disque.connect(["localhost:7711"]) queues = sys.argv[1] or "default" try: while True: jobs = Job.wait(queues, count=16) for job in jobs: print(json.dumps(job, sort_keys=True, indent=4)) except KeyboardInterrupt: pass
def listq(queues): Disque.connect(["localhost:7711"]) qstat = Disque.qstat(queues) queues = sorted(qstat.keys()) for name in queues: try: queue = qstat[name] print_queue(name, queue) except KeyError: print('invalid queue "%s"' % name)
def listq(args): Disque.connect([args.disque_url]) queues = args.queues qstat = Disque.qstat(queues) queues = sorted(qstat.keys()) for name in queues: try: queue = qstat[name] print_queue(name, queue) except KeyError: print('invalid queue "%s"' % name)
def live(): global result_dict Disque.connect(["localhost:7711"]) http_root = os.environ.get("CI_BUILD_HTTP_ROOT", "") queue = sys.argv[1] prnum = sys.argv[2] last_update = 0 maxfailed = 20 failed_jobs = [] nfailed = 0 try: post_status({"status": "setting up build"}, prnum, [], "") while True: _list = Job.wait(queue, count=16) for _status in _list: job = _status.get('job') if job: filename = save_job_result(job) if filename and not has_passed(job): nfailed += 1 jobname = job_name(job) if jobname == "static_tests": failed_jobs = [(filename, jobname)] + failed_jobs elif nfailed <= maxfailed: failed_jobs.append((filename, job_name(job))) failed_jobs = failed_jobs[:maxfailed] if nfailed > maxfailed: failed_jobs.append((None, "(%s more failed jobs)" % (nfailed - maxfailed))) if _status.get("status", "") == "done": post_status(None, prnum, failed_jobs, http_root) return now = time.time() if now - last_update > 0.5: post_status(_status, prnum, failed_jobs, http_root) last_update = now except KeyboardInterrupt: pass
def control(args): Disque.connect(["localhost:7711"]) jobargs = {} # if args.force: # jobargs["force"] = True if args.pause: control_cmd(args.nodes, "pause", **jobargs) elif args.resume: control_cmd(args.nodes, "resume") elif args.shutdown: control_cmd(args.nodes, "shutdown", **jobargs) elif args.ping: control_cmd(args.nodes, "ping") elif args.list: list_nodes()
def drain(queues): if not queues: print("dwqm: drain: no queues given.") sys.exit(1) Disque.connect(["localhost:7711"]) disque = Disque.get() try: while True: jobs = Job.get(queues, count=1024, nohang=True) if not jobs: return job_ids = [] for job in jobs: job_ids.append(job.job_id) disque.fast_ack(*job_ids) except KeyboardInterrupt: pass
def main(): global shutdown global verbose global active_event args = parse_args() verbose = args.verbose - args.quiet cmd_server_pool = cmdserver.CmdServerPool(args.jobs) signal.signal(signal.SIGTERM, sigterm_handler) _dir = "/tmp/dwq.%s" % str(random.random()) gitjobdir = GitJobDir(_dir, args.jobs) servers = ["localhost:7711"] try: Disque.connect(servers) vprint(1, "dwqw: connected.") except: pass working_set = SyncSet() for n in range(1, args.jobs + 1): threading.Thread( target=worker, args=(n, cmd_server_pool, gitjobdir, args, working_set), daemon=True, ).start() active_event.set() try: while True: if not Disque.connected(): try: vprint(1, "dwqw: connecting...") Disque.connect(servers) vprint(1, "dwqw: connected.") except RedisError: time.sleep(1) continue try: control_jobs = Job.get(["control::worker::%s" % args.name]) for job in control_jobs or []: handle_control_job(args, job) except RedisError: pass except (KeyboardInterrupt, SystemExit): vprint(1, "dwqw: shutting down") shutdown = True cmd_server_pool.destroy() vprint(1, "dwqw: nack'ing jobs") jobs = working_set.empty() d = Disque.get() d.nack_job(*jobs) vprint(1, "dwqw: cleaning up job directories") gitjobdir.cleanup()
def main(): global verbose args = parse_args() if (args.repo is None) ^ (args.commit is None): print("dwqc: error: both repo and commit must be specified!", file=sys.stderr) sys.exit(1) signal.signal(signal.SIGTERM, sigterm_handler) job_queue = args.queue Disque.connect([args.disque_url]) if args.subjob: try: control_queue = os.environ["DWQ_CONTROL_QUEUE"] except KeyError: print( "dwqc: error: --subjob specified, but DWQ_CONTROL_QUEUE unset." ) sys.exit(1) try: parent_jobid = os.environ["DWQ_JOBID"] except KeyError: print("dwqc: error: --subjob specified, but DWQ_JOBID unset.") sys.exit(1) else: control_queue = "control::%s" % random.random() parent_jobid = None verbose = args.verbose if args.progress or args.report: start_time = time.time() if args.report: Job.add(args.report, {"status": "collecting jobs"}) try: file_data = util.gen_file_data(args.file) except util.GenFileDataException as e: print("dwqc: error processing --file argument:", e, file=sys.stderr) sys.exit(1) base_options = {} if args.exclusive_jobdir: base_options.update({"jobdir": "exclusive"}) if file_data: base_options["files"] = file_data if args.tries != 3: if args.tries < 1: print("dwqc: error: --tries < 1!") sys.exit(1) base_options["max_retries"] = args.tries - 1 if args.timeout != 300: if args.timeout <= 0: print("dwqc: error: --timeout <=0!") sys.exit(1) base_options["timeout"] = args.timeout result_list = [] try: jobs = set() batch = [] if args.command and not args.stdin: options = base_options queue_job( jobs, job_queue, create_body(args, args.command, options, parent_jobid), [control_queue], ) else: jobs_read = 0 vprint("dwqc: reading jobs from stdin") for line in sys.stdin: line = line.rstrip() if args.stdin and args.command: cmdargs = line.split(" ") command = args.command for i in range(0, len(cmdargs)): command = command.replace("${%i}" % (i + 1), cmdargs[i]) command = command.replace("${0}", line) else: command = line tmp = command.split("###") command = tmp[0] options = {} options.update(base_options) if len(tmp) > 1: command = command.rstrip() try: options.update(json.loads(tmp[1])) except json.decoder.JSONDecodeError: vprint("dwqc: invalid option JSON. Skipping job.", file=sys.stderr) continue _job_queue = options.get("queue", job_queue) if args.batch: batch.append(( _job_queue, create_body(args, command, options, parent_jobid), [control_queue], )) else: job_id = queue_job( jobs, _job_queue, create_body(args, command, options, parent_jobid), [control_queue], ) vprint('dwqc: job %s command="%s" sent to queue %s.' % (job_id, command, _job_queue)) if args.progress: print("") if args.progress or args.report: jobs_read += 1 elapsed = time.time() - start_time if args.progress: print( "\033[F\033[K[%s] %s jobs read" % (nicetime(elapsed), jobs_read), end="\r", ) # if args.report: # Job.add(args.report, { "status" : "collecting jobs", "total" : jobs_read }) _time = "" if args.batch and args.stdin: before = time.time() vprint("dwqc: sending jobs") for _tuple in batch: queue_job(jobs, *_tuple) _time = "(took %s)" % nicetime(time.time() - before) if args.report: Job.add(args.report, {"status": "sending jobs"}) if args.stdin: vprint("dwqc: all jobs sent.", _time) if args.subjob: if args.report: Job.add(args.report, {"status": "done"}) return if args.progress: vprint("") unexpected = {} early_subjobs = [] total = len(jobs) done = 0 failed = 0 failed_expected = 0 passed = 0 subjobs = {} while jobs: _early_subjobs = early_subjobs or None early_subjobs = [] for job in _early_subjobs or Job.wait(control_queue, count=128): # print(json.dumps(job, sort_keys=True, indent=4)) subjob = job.get("subjob") if subjob: parent = job.get("parent") unique = job.get("unique") _dict = dict_dictadd(subjobs, parent) dict_addset(_dict, unique, subjob) else: try: job_id = job["job_id"] jobs.remove(job_id) done += 1 # if args.progress: # vprint("\033[F\033[K", end="") # vprint("dwqc: job %s done. result=%s" % (job["job_id"], job["result"]["status"])) if not args.quiet: if args.progress: print("\033[K", end="") print(job["result"]["output"], end="") _has_passed = job["result"]["status"] in { 0, "0", "pass" } if _has_passed: passed += 1 handle_assets(job, args) else: failed += 1 try: if job["result"]["body"]["options"]["fail_ok"]: failed_expected += 1 except KeyError: pass if args.outfile: result_list.append(job) # collect subjobs started by this job instance, add to waitlist unique = job["result"]["unique"] _subjobs = subjobs.get(job_id, {}).get(unique, []) for subjob_id in _subjobs: try: early_subjobs.append(unexpected.pop(subjob_id)) except KeyError: pass finally: jobs.add(subjob_id) total += len(_subjobs) if args.progress or args.report: elapsed = time.time() - start_time per_job = elapsed / done eta = (total - done) * per_job if args.progress: print( "\r\033[K[%s] %s/%s jobs done (%s passed, %s failed.) " "ETA:" % (nicetime(elapsed), done, total, passed, failed), nicetime(eta), end="\r", ) if args.report: Job.add( args.report, { "status": "working", "elapsed": elapsed, "eta": eta, "total": total, "passed": passed, "failed": failed, "job": job, }, ) if not _has_passed: if failed > failed_expected: if (failed - failed_expected) > args.maxfail: print( "dwqc: more than %i jobs failed. Exiting." % args.maxfail, file=sys.stderr, ) sys.exit(1) except KeyError: unexpected[job_id] = job if args.outfile: args.outfile.write(json.dumps(result_list)) if args.progress: print("") except (KeyboardInterrupt, SystemExit): print("dwqc: cancelling...") Job.cancel_all(jobs) if args.report: Job.add(args.report, {"status": "canceled"}) sys.exit(1) if args.report: Job.add(args.report, {"status": "done"}) if failed > failed_expected: sys.exit(1)