示例#1
0
def load(*args):
    parent = Job(job_id="Test Parent Job Id", status="Active",
                 payload=maps.NamedDict())
    if args[0]:
        parent.errors = "Error Message"
        parent.children = ["Test_child"]
    return parent
示例#2
0
def process_job(job):
    jid = job.key.split('/')[-1]
    job_status_key = "/queue/%s/status" % jid
    job_lock_key = "/queue/%s/locked_by" % jid
    NS.node_context = NS.node_context.load()
    # Check job not already locked by some agent
    try:
        _locked_by = NS._int.client.read(job_lock_key).value
        if _locked_by:
            return
    except etcd.EtcdKeyNotFound:
        pass

    # Check job not already "finished", or "processing"
    try:
        _status = NS._int.client.read(job_status_key).value
        if _status in ["finished", "processing"]:
            return
    except etcd.EtcdKeyNotFound:
        pass

    # tendrl-node-agent tagged as tendrl/monitor will ensure
    # >10 min old "new" jobs are timed out and marked as
    # "failed" (the parent job of these jobs will also be
    # marked as "failed")
    if "tendrl/monitor" in NS.node_context.tags:
        _job_valid_until_key = "/queue/%s/valid_until" % jid
        _valid_until = None
        try:
            _valid_until = NS._int.client.read(_job_valid_until_key).value
        except etcd.EtcdKeyNotFound:
            pass

        if _valid_until:
            _now_epoch = (time_utils.now() - datetime.datetime(
                1970, 1, 1).replace(tzinfo=utc)).total_seconds()
            if int(_now_epoch) >= int(_valid_until):
                # Job has "new" status since 10 minutes,
                # mark status as "failed" and Job.error =
                # "Timed out"
                try:
                    NS._int.wclient.write(job_status_key,
                                          "failed",
                                          prevValue="new")
                except etcd.EtcdCompareFailed:
                    pass
                else:
                    job = Job(job_id=jid).load()
                    _msg = str("Timed-out (>10min as 'new')")
                    job.errors = _msg
                    job.save()
                    return
        else:
            _now_plus_10 = time_utils.now() + datetime.timedelta(minutes=10)
            _epoch_start = datetime.datetime(1970, 1, 1).replace(tzinfo=utc)

            # noinspection PyTypeChecker
            _now_plus_10_epoch = (_now_plus_10 - _epoch_start).total_seconds()
            NS._int.wclient.write(_job_valid_until_key,
                                  int(_now_plus_10_epoch))

    job = Job(job_id=jid).load()
    if job.payload["type"] == NS.type and \
            job.status == "new":
        # Job routing
        # Flows created by tendrl-api use 'tags' from flow
        # definition to target jobs
        _tag_match = False
        if job.payload.get("tags", []):
            for flow_tag in job.payload['tags']:
                if flow_tag in NS.node_context.tags:
                    _tag_match = True

        if not _tag_match:
            _job_tags = ", ".join(job.payload.get("tags", []))
            _msg = "Node (%s)(type: %s)(tags: %s) will not " \
                   "process job-%s (tags: %s)" % \
                   (NS.node_context.node_id, NS.type,
                    NS.node_context.tags, jid,
                    _job_tags)
            Event(
                Message(priority="info",
                        publisher=NS.publisher_id,
                        payload={"message": _msg}))
            return

        job_status_key = "/queue/%s/status" % job.job_id
        job_lock_key = "/queue/%s/locked_by" % job.job_id
        try:
            lock_info = dict(node_id=NS.node_context.node_id,
                             fqdn=NS.node_context.fqdn,
                             tags=NS.node_context.tags,
                             type=NS.type)
            NS._int.wclient.write(job_lock_key, json.dumps(lock_info))
            NS._int.wclient.write(job_status_key,
                                  "processing",
                                  prevValue="new")
        except etcd.EtcdCompareFailed:
            # job is already being processed by some tendrl
            # agent
            return

        the_flow = None
        try:
            current_ns, flow_name, obj_name = \
                _extract_fqdn(job.payload['run'])

            if obj_name:
                runnable_flow = current_ns.ns.get_obj_flow(obj_name, flow_name)
            else:
                runnable_flow = current_ns.ns.get_flow(flow_name)

            the_flow = runnable_flow(parameters=job.payload['parameters'],
                                     job_id=job.job_id)
            Event(
                Message(job_id=job.job_id,
                        flow_id=the_flow.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={"message": "Processing Job %s" % job.job_id}))

            Event(
                Message(job_id=job.job_id,
                        flow_id=the_flow.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message": "Running Flow %s" % job.payload['run']
                        }))
            the_flow.run()
            try:
                NS._int.wclient.write(job_status_key,
                                      "finished",
                                      prevValue="processing")
            except etcd.EtcdCompareFailed:
                # This should not happen!
                _msg = "Cannot mark job as 'finished', " \
                       "current job status invalid"
                raise FlowExecutionFailedError(_msg)

            Event(
                Message(job_id=job.job_id,
                        flow_id=the_flow.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "Job (%s):  Finished "
                            "Flow %s" % (job.job_id, job.payload['run'])
                        }))
        except (FlowExecutionFailedError, AtomExecutionFailedError,
                Exception) as e:
            _trace = str(traceback.format_exc(e))
            _msg = "Failure in Job %s Flow %s with error:" % \
                   (job.job_id, job.payload['run'])
            Event(
                ExceptionMessage(priority="error",
                                 publisher=NS.publisher_id,
                                 payload={
                                     "message": _msg + _trace,
                                     "exception": e
                                 }))
            if the_flow:
                Event(
                    Message(job_id=job.job_id,
                            flow_id=the_flow.parameters['flow_id'],
                            priority="error",
                            publisher=NS.publisher_id,
                            payload={"message": _msg + "\n" + _trace}))
            else:
                Event(
                    Message(priority="error",
                            publisher=NS.publisher_id,
                            payload={"message": _msg + "\n" + _trace}))

            try:
                NS._int.wclient.write(job_status_key,
                                      "failed",
                                      prevValue="processing")
            except etcd.EtcdCompareFailed:
                # This should not happen!
                _msg = "Cannot mark job as 'failed', current" \
                       "job status invalid"
                raise FlowExecutionFailedError(_msg)
            else:
                job = job.load()
                job.errors = _trace
                job.save()