def load(*args): parent = Job(job_id="Test Parent Job Id", status="Active", payload=maps.NamedDict()) if args[0]: parent.errors = "Error Message" parent.children = ["Test_child"] return parent
def process_job(job): jid = job.key.split('/')[-1] job_status_key = "/queue/%s/status" % jid job_lock_key = "/queue/%s/locked_by" % jid NS.node_context = NS.node_context.load() # Check job not already locked by some agent try: _locked_by = NS._int.client.read(job_lock_key).value if _locked_by: return except etcd.EtcdKeyNotFound: pass # Check job not already "finished", or "processing" try: _status = NS._int.client.read(job_status_key).value if _status in ["finished", "processing"]: return except etcd.EtcdKeyNotFound: pass # tendrl-node-agent tagged as tendrl/monitor will ensure # >10 min old "new" jobs are timed out and marked as # "failed" (the parent job of these jobs will also be # marked as "failed") if "tendrl/monitor" in NS.node_context.tags: _job_valid_until_key = "/queue/%s/valid_until" % jid _valid_until = None try: _valid_until = NS._int.client.read(_job_valid_until_key).value except etcd.EtcdKeyNotFound: pass if _valid_until: _now_epoch = (time_utils.now() - datetime.datetime( 1970, 1, 1).replace(tzinfo=utc)).total_seconds() if int(_now_epoch) >= int(_valid_until): # Job has "new" status since 10 minutes, # mark status as "failed" and Job.error = # "Timed out" try: NS._int.wclient.write(job_status_key, "failed", prevValue="new") except etcd.EtcdCompareFailed: pass else: job = Job(job_id=jid).load() _msg = str("Timed-out (>10min as 'new')") job.errors = _msg job.save() return else: _now_plus_10 = time_utils.now() + datetime.timedelta(minutes=10) _epoch_start = datetime.datetime(1970, 1, 1).replace(tzinfo=utc) # noinspection PyTypeChecker _now_plus_10_epoch = (_now_plus_10 - _epoch_start).total_seconds() NS._int.wclient.write(_job_valid_until_key, int(_now_plus_10_epoch)) job = Job(job_id=jid).load() if job.payload["type"] == NS.type and \ job.status == "new": # Job routing # Flows created by tendrl-api use 'tags' from flow # definition to target jobs _tag_match = False if job.payload.get("tags", []): for flow_tag in job.payload['tags']: if flow_tag in NS.node_context.tags: _tag_match = True if not _tag_match: _job_tags = ", ".join(job.payload.get("tags", [])) _msg = "Node (%s)(type: %s)(tags: %s) will not " \ "process job-%s (tags: %s)" % \ (NS.node_context.node_id, NS.type, NS.node_context.tags, jid, _job_tags) Event( Message(priority="info", publisher=NS.publisher_id, payload={"message": _msg})) return job_status_key = "/queue/%s/status" % job.job_id job_lock_key = "/queue/%s/locked_by" % job.job_id try: lock_info = dict(node_id=NS.node_context.node_id, fqdn=NS.node_context.fqdn, tags=NS.node_context.tags, type=NS.type) NS._int.wclient.write(job_lock_key, json.dumps(lock_info)) NS._int.wclient.write(job_status_key, "processing", prevValue="new") except etcd.EtcdCompareFailed: # job is already being processed by some tendrl # agent return the_flow = None try: current_ns, flow_name, obj_name = \ _extract_fqdn(job.payload['run']) if obj_name: runnable_flow = current_ns.ns.get_obj_flow(obj_name, flow_name) else: runnable_flow = current_ns.ns.get_flow(flow_name) the_flow = runnable_flow(parameters=job.payload['parameters'], job_id=job.job_id) Event( Message(job_id=job.job_id, flow_id=the_flow.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={"message": "Processing Job %s" % job.job_id})) Event( Message(job_id=job.job_id, flow_id=the_flow.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Running Flow %s" % job.payload['run'] })) the_flow.run() try: NS._int.wclient.write(job_status_key, "finished", prevValue="processing") except etcd.EtcdCompareFailed: # This should not happen! _msg = "Cannot mark job as 'finished', " \ "current job status invalid" raise FlowExecutionFailedError(_msg) Event( Message(job_id=job.job_id, flow_id=the_flow.parameters['flow_id'], priority="info", publisher=NS.publisher_id, payload={ "message": "Job (%s): Finished " "Flow %s" % (job.job_id, job.payload['run']) })) except (FlowExecutionFailedError, AtomExecutionFailedError, Exception) as e: _trace = str(traceback.format_exc(e)) _msg = "Failure in Job %s Flow %s with error:" % \ (job.job_id, job.payload['run']) Event( ExceptionMessage(priority="error", publisher=NS.publisher_id, payload={ "message": _msg + _trace, "exception": e })) if the_flow: Event( Message(job_id=job.job_id, flow_id=the_flow.parameters['flow_id'], priority="error", publisher=NS.publisher_id, payload={"message": _msg + "\n" + _trace})) else: Event( Message(priority="error", publisher=NS.publisher_id, payload={"message": _msg + "\n" + _trace})) try: NS._int.wclient.write(job_status_key, "failed", prevValue="processing") except etcd.EtcdCompareFailed: # This should not happen! _msg = "Cannot mark job as 'failed', current" \ "job status invalid" raise FlowExecutionFailedError(_msg) else: job = job.load() job.errors = _trace job.save()