def run_job(job_name, job, params): params['job_name'] = job_name try: job_type = job['job']['type'] if job_type in JOB_TYPES: tasks = JOB_TYPES[job_type](job['job'], params) else: raise Exception("job type {} not recognized".format(job_type)) if params['no_create']: for task_id, task in tasks: logger.info( "Not creating task {} (--no-create):\n".format(task_id) + json.dumps( task, sort_keys=True, indent=4, separators=(',', ': '))) else: for task_id, task in tasks: create_task(get_session(), task_id, params['job_name'], task) except Exception: # report the exception, but don't fail the whole cron task, as that # would leave other jobs un-run. NOTE: we could report job failure to # a responsible person here via tc-notify traceback.print_exc() logger.error("cron job {} run failed; continuing to next job".format( params['job_name']))
def calculate_time(options): if 'TASK_ID' not in os.environ: # running in a development environment, so look for CRON_TIME or use # the current time if 'CRON_TIME' in os.environ: logger.warning("setting params['time'] based on $CRON_TIME") time = datetime.datetime.utcfromtimestamp( int(os.environ['CRON_TIME'])) else: logger.warning( "using current time for params['time']; try setting $CRON_TIME " "to a timestamp") time = datetime.datetime.utcnow() else: # fetch this task from the queue res = get_session().get('http://taskcluster/queue/v1/task/' + os.environ['TASK_ID']) if res.status_code != 200: try: logger.error(res.json()['message']) except Exception: logger.error(res.text) res.raise_for_status() # the task's `created` time is close to when the hook ran, although that # may be some time ago if task execution was delayed created = res.json()['created'] time = datetime.datetime.strptime(created, '%Y-%m-%dT%H:%M:%S.%fZ') # round down to the nearest 15m minute = time.minute - (time.minute % 15) time = time.replace(minute=minute, second=0, microsecond=0) logger.info("calculated cron schedule time is {}".format(time)) return time
def calculate_time(options): if 'TASK_ID' not in os.environ: # running in a development environment, so look for CRON_TIME or use # the current time if 'CRON_TIME' in os.environ: logger.warning("setting params['time'] based on $CRON_TIME") time = datetime.datetime.utcfromtimestamp( int(os.environ['CRON_TIME'])) else: logger.warning("using current time for params['time']; try setting $CRON_TIME " "to a timestamp") time = datetime.datetime.utcnow() else: # fetch this task from the queue res = get_session().get( 'http://taskcluster/queue/v1/task/' + os.environ['TASK_ID']) if res.status_code != 200: try: logger.error(res.json()['message']) except Exception: logger.error(res.text) res.raise_for_status() # the task's `created` time is close to when the hook ran, although that # may be some time ago if task execution was delayed created = res.json()['created'] time = datetime.datetime.strptime(created, '%Y-%m-%dT%H:%M:%S.%fZ') # round down to the nearest 15m minute = time.minute - (time.minute % 15) time = time.replace(minute=minute, second=0, microsecond=0) logger.info("calculated cron schedule time is {}".format(time)) return time
def get_files_changed_pr(base_repository, pull_request_number): url = base_repository.replace("github.com", "api.github.com/repos") url += "/pulls/%s/files" % pull_request_number r = get_session().get(url, timeout=60) r.raise_for_status() files = [f["filename"] for f in r.json()] return files
def download_and_modify_image(): # This function downloads and edits the downloaded tar file on the fly. # It emits chunked buffers of the editted tar file, as a generator. print("Downloading from {}".format(url)) # get_session() gets us a requests.Session set to retry several times. req = get_session().get(url, stream=True) req.raise_for_status() decompressed_reader = IteratorReader(zstd.ZstdDecompressor().read_from(req.raw)) tarin = tarfile.open( mode='r|', fileobj=decompressed_reader, bufsize=zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE) # Stream through each member of the downloaded tar file individually. for member in tarin: # Non-file members only need a tar header. Emit one. if not member.isfile(): yield member.tobuf(tarfile.GNU_FORMAT) continue # Open stream reader for the member reader = tarin.extractfile(member) # If member is `repositories`, we parse and possibly rewrite the image tags if member.name == 'repositories': # Read and parse repositories repos = json.loads(reader.read()) reader.close() # If there is more than one image or tag, we can't handle it here if len(repos.keys()) > 1: raise Exception('file contains more than one image') info['image'] = image = repos.keys()[0] if len(repos[image].keys()) > 1: raise Exception('file contains more than one tag') info['tag'] = tag = repos[image].keys()[0] info['layer'] = layer = repos[image][tag] # Rewrite the repositories file data = json.dumps({imageName or image: {imageTag or tag: layer}}) reader = BytesIO(data) member.size = len(data) # Emit the tar header for this member. yield member.tobuf(tarfile.GNU_FORMAT) # Then emit its content. remaining = member.size while remaining: length = min(remaining, zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE) buf = reader.read(length) remaining -= len(buf) yield buf # Pad to fill a 512 bytes block, per tar format. remainder = member.size % 512 if remainder: yield '\0' * (512 - remainder) reader.close()
def get_files_changed_push(base_repository, base_rev, head_rev): url = base_repository.replace("github.com", "api.github.com/repos") url += "/compare/" url += f"{base_rev}...{head_rev}" r = get_session().get(url, timeout=60) r.raise_for_status() files = [f["filename"] for f in r.json().get("files")] return files
def cancel_all_action(parameters, graph_config, input, task_group_id, task_id, task): session = get_session() own_task_id = os.environ.get('TASK_ID', '') with futures.ThreadPoolExecutor(CONCURRENCY) as e: cancels_jobs = [ e.submit(cancel_task, t, use_proxy=True) for t in list_group(task_group_id, session) if t != own_task_id ] for job in cancels_jobs: job.result()
def cancel_all_action(parameters, input, task_group_id, task_id, task): session = get_session() own_task_id = os.environ.get('TASK_ID', '') with futures.ThreadPoolExecutor(CONCURRENCY) as e: cancels_jobs = [ e.submit(cancel_task, t, use_proxy=True) for t in list_group(task_group_id, session) if t != own_task_id ] for job in cancels_jobs: job.result()
def create_task_from_def(task_id, task_def, level): """Create a new task from a definition rather than from a label that is already in the full-task-graph. The task definition will have {relative-datestamp': '..'} rendered just like in a decision task. Use this for entirely new tasks or ones that change internals of the task. It is useful if you want to "edit" the full_task_graph and then hand it to this function. No dependencies will be scheduled. You must handle this yourself. Seeing how create_tasks handles it might prove helpful.""" task_def['schedulerId'] = 'gecko-level-{}'.format(level) label = task_def['metadata']['name'] session = get_session() create.create_task(session, task_id, label, task_def)
def create_task_from_def(task_id, task_def, level): """Create a new task from a definition rather than from a label that is already in the full-task-graph. The task definition will have {relative-datestamp': '..'} rendered just like in a decision task. Use this for entirely new tasks or ones that change internals of the task. It is useful if you want to "edit" the full_task_graph and then hand it to this function. No dependencies will be scheduled. You must handle this yourself. Seeing how create_tasks handles it might prove helpful.""" task_def['schedulerId'] = 'gecko-level-{}'.format(level) label = task_def['metadata']['name'] session = get_session() create.create_task(session, task_id, label, task_def)
def create_task(task_id, task_def, level): """Create a new task. The task definition will have {relative-datestamp': '..'} rendered just like in a decision task. Action callbacks should use this function to create new tasks, as it has the additional advantage of allowing easy debugging with `mach taskgraph action-callback --test`.""" task_def['schedulerId'] = 'gecko-level-{}'.format(level) if testing: json.dump([task_id, task_def], sys.stdout, sort_keys=True, indent=4, separators=(',', ': ')) return label = task_def['metadata']['name'] session = get_session() create.create_task(session, task_id, label, task_def)
def run_job(job_name, job, params): params['job_name'] = job_name try: job_type = job['job']['type'] if job_type in JOB_TYPES: tasks = JOB_TYPES[job_type](job['job'], params) else: raise Exception("job type {} not recognized".format(job_type)) if params['no_create']: for task_id, task in tasks: logger.info("Not creating task {} (--no-create):\n".format(task_id) + json.dumps(task, sort_keys=True, indent=4, separators=(',', ': '))) else: for task_id, task in tasks: create_task(get_session(), task_id, params['job_name'], task) except Exception: # report the exception, but don't fail the whole cron task, as that # would leave other jobs un-run. NOTE: we could report job failure to # a responsible person here via tc-notify traceback.print_exc() logger.error("cron job {} run failed; continuing to next job".format( params['job_name']))
def download_and_modify_image(): # This function downloads and edits the downloaded tar file on the fly. # It emits chunked buffers of the editted tar file, as a generator. print(f"Downloading from {url}") # get_session() gets us a requests.Session set to retry several times. req = get_session().get(url, stream=True) req.raise_for_status() with zstd.ZstdDecompressor().stream_reader(req.raw) as ifh: tarin = tarfile.open( mode="r|", fileobj=ifh, bufsize=zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE, ) # Stream through each member of the downloaded tar file individually. for member in tarin: # Non-file members only need a tar header. Emit one. if not member.isfile(): yield member.tobuf(tarfile.GNU_FORMAT) continue # Open stream reader for the member reader = tarin.extractfile(member) # If member is `repositories`, we parse and possibly rewrite the # image tags. if member.name == "repositories": # Read and parse repositories repos = json.loads(reader.read()) reader.close() # If there is more than one image or tag, we can't handle it # here. if len(repos.keys()) > 1: raise Exception("file contains more than one image") info["image"] = image = list(repos.keys())[0] if len(repos[image].keys()) > 1: raise Exception("file contains more than one tag") info["tag"] = tag = list(repos[image].keys())[0] info["layer"] = layer = repos[image][tag] # Rewrite the repositories file data = json.dumps( {imageName or image: { imageTag or tag: layer }}) reader = BytesIO(data.encode("utf-8")) member.size = len(data) # Emit the tar header for this member. yield member.tobuf(tarfile.GNU_FORMAT) # Then emit its content. remaining = member.size while remaining: length = min(remaining, zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE) buf = reader.read(length) remaining -= len(buf) yield buf # Pad to fill a 512 bytes block, per tar format. remainder = member.size % 512 if remainder: yield ("\0" * (512 - remainder)).encode("utf-8") reader.close()
def get_hg_file(parameters, path): session = get_session() response = session.get(parameters.file_url(path)) response.raise_for_status() return response.content
def create_tasks(graph_config, taskgraph, label_to_taskid, params, decision_task_id): taskid_to_label = {t: l for l, t in label_to_taskid.iteritems()} # when running as an actual decision task, we use the decision task's # taskId as the taskGroupId. The process that created the decision task # helpfully placed it in this same taskGroup. If there is no $TASK_ID, # fall back to a slugid scheduler_id = '{}-level-{}'.format(graph_config['trust-domain'], params['level']) # Add the taskGroupId, schedulerId and optionally the decision task # dependency for task_id in taskgraph.graph.nodes: task_def = taskgraph.tasks[task_id].task # if this task has no dependencies *within* this taskgraph, make it # depend on this decision task. If it has another dependency within # the taskgraph, then it already implicitly depends on the decision # task. The result is that tasks do not start immediately. if this # loop fails halfway through, none of the already-created tasks run. if not any(t in taskgraph.tasks for t in task_def.get('dependencies', [])): task_def.setdefault('dependencies', []).append(decision_task_id) task_def['taskGroupId'] = decision_task_id task_def['schedulerId'] = scheduler_id # If `testing` is True, then run without parallelization concurrency = CONCURRENCY if not testing else 1 session = get_session() with futures.ThreadPoolExecutor(concurrency) as e: fs = {} # We can't submit a task until its dependencies have been submitted. # So our strategy is to walk the graph and submit tasks once all # their dependencies have been submitted. tasklist = set(taskgraph.graph.visit_postorder()) alltasks = tasklist.copy() def schedule_tasks(): # bail out early if any futures have failed if any(f.done() and f.exception() for f in fs.values()): return to_remove = set() new = set() def submit(task_id, label, task_def): fut = e.submit(create_task, session, task_id, label, task_def) new.add(fut) fs[task_id] = fut for task_id in tasklist: task_def = taskgraph.tasks[task_id].task # If we haven't finished submitting all our dependencies yet, # come back to this later. # Some dependencies aren't in our graph, so make sure to filter # those out deps = set(task_def.get('dependencies', [])) & alltasks if any((d not in fs or not fs[d].done()) for d in deps): continue submit(task_id, taskid_to_label[task_id], task_def) to_remove.add(task_id) # Schedule tasks as many times as task_duplicates indicates attributes = taskgraph.tasks[task_id].attributes for i in range(1, attributes.get('task_duplicates', 1)): # We use slugid() since we want a distinct task id submit(slugid(), taskid_to_label[task_id], task_def) tasklist.difference_update(to_remove) # as each of those futures complete, try to schedule more tasks for f in futures.as_completed(new): schedule_tasks() # start scheduling tasks and run until everything is scheduled schedule_tasks() # check the result of each future, raising an exception if it failed for f in futures.as_completed(fs.values()): f.result()