Exemplo n.º 1
0
    def run(self):
        # Generate the journal mapping for the nodes
        mapping = utils.generate_journal_mapping(
            self.parameters['Cluster.node_configuration'],
            integration_id=self.parameters.get("TendrlContext.integration_id"))

        # Update output dict
        job = Job(job_id=self.job_id).load()
        job.output[self.__class__.__name__] = json.dumps(mapping)
        job.save()
Exemplo n.º 2
0
def test_save(mock_save):
    job = Job()
    payload = maps.NamedDict()
    payload['parent'] = "Test Parent Job Id"
    job.payload = payload
    with patch.object(objects.BaseObject, 'load') as mock_load:
        mock_load.return_value = load(False)
        job.status = "true"
        job.save()
    with patch.object(objects.BaseObject, 'load') as mock_load:
        mock_load.return_value = load(False)
        job.status = "failed"
        job.save()
    with patch.object(objects.BaseObject, 'load') as mock_load:
        mock_load.return_value = load(True)
        job.status = "failed"
        job.save()
Exemplo n.º 3
0
def test_save(mock_save):
    job = Job()
    payload = maps.NamedDict()
    payload['parent'] = "Test Parent Job Id"
    job.payload = payload
    with patch.object(objects.BaseObject, 'load') as mock_load:
        mock_load.return_value = load(False)
        job.status = "true"
        job.save()
    with patch.object(objects.BaseObject, 'load') as mock_load:
        mock_load.return_value = load(False)
        job.status = "failed"
        job.save()
    with patch.object(objects.BaseObject, 'load') as mock_load:
        mock_load.return_value = load(True)
        job.status = "failed"
        job.save()
Exemplo n.º 4
0
def process_job(job):
    jid = job.key.split('/')[-1]
    job_status_key = "/queue/%s/status" % jid
    job_lock_key = "/queue/%s/locked_by" % jid
    NS.node_context = NS.node_context.load()
    # Check job not already locked by some agent
    try:
        _locked_by = NS._int.client.read(job_lock_key).value
        if _locked_by:
            return
    except etcd.EtcdKeyNotFound:
        pass

    # Check job not already "finished", or "processing"
    try:
        _status = NS._int.client.read(job_status_key).value
        if _status in ["finished", "processing"]:
            return
    except etcd.EtcdKeyNotFound:
        pass

    # tendrl-node-agent tagged as tendrl/monitor will ensure
    # >10 min old "new" jobs are timed out and marked as
    # "failed" (the parent job of these jobs will also be
    # marked as "failed")
    if "tendrl/monitor" in NS.node_context.tags:
        _job_valid_until_key = "/queue/%s/valid_until" % jid
        _valid_until = None
        try:
            _valid_until = NS._int.client.read(_job_valid_until_key).value
        except etcd.EtcdKeyNotFound:
            pass

        if _valid_until:
            _now_epoch = (time_utils.now() - datetime.datetime(
                1970, 1, 1).replace(tzinfo=utc)).total_seconds()
            if int(_now_epoch) >= int(_valid_until):
                # Job has "new" status since 10 minutes,
                # mark status as "failed" and Job.error =
                # "Timed out"
                try:
                    NS._int.wclient.write(job_status_key,
                                          "failed",
                                          prevValue="new")
                except etcd.EtcdCompareFailed:
                    pass
                else:
                    job = Job(job_id=jid).load()
                    _msg = str("Timed-out (>10min as 'new')")
                    job.errors = _msg
                    job.save()
                    return
        else:
            _now_plus_10 = time_utils.now() + datetime.timedelta(minutes=10)
            _epoch_start = datetime.datetime(1970, 1, 1).replace(tzinfo=utc)

            # noinspection PyTypeChecker
            _now_plus_10_epoch = (_now_plus_10 - _epoch_start).total_seconds()
            NS._int.wclient.write(_job_valid_until_key,
                                  int(_now_plus_10_epoch))

    job = Job(job_id=jid).load()
    if job.payload["type"] == NS.type and \
            job.status == "new":
        # Job routing
        # Flows created by tendrl-api use 'tags' from flow
        # definition to target jobs
        _tag_match = False
        if job.payload.get("tags", []):
            for flow_tag in job.payload['tags']:
                if flow_tag in NS.node_context.tags:
                    _tag_match = True

        if not _tag_match:
            _job_tags = ", ".join(job.payload.get("tags", []))
            _msg = "Node (%s)(type: %s)(tags: %s) will not " \
                   "process job-%s (tags: %s)" % \
                   (NS.node_context.node_id, NS.type,
                    NS.node_context.tags, jid,
                    _job_tags)
            Event(
                Message(priority="info",
                        publisher=NS.publisher_id,
                        payload={"message": _msg}))
            return

        job_status_key = "/queue/%s/status" % job.job_id
        job_lock_key = "/queue/%s/locked_by" % job.job_id
        try:
            lock_info = dict(node_id=NS.node_context.node_id,
                             fqdn=NS.node_context.fqdn,
                             tags=NS.node_context.tags,
                             type=NS.type)
            NS._int.wclient.write(job_lock_key, json.dumps(lock_info))
            NS._int.wclient.write(job_status_key,
                                  "processing",
                                  prevValue="new")
        except etcd.EtcdCompareFailed:
            # job is already being processed by some tendrl
            # agent
            return

        the_flow = None
        try:
            current_ns, flow_name, obj_name = \
                _extract_fqdn(job.payload['run'])

            if obj_name:
                runnable_flow = current_ns.ns.get_obj_flow(obj_name, flow_name)
            else:
                runnable_flow = current_ns.ns.get_flow(flow_name)

            the_flow = runnable_flow(parameters=job.payload['parameters'],
                                     job_id=job.job_id)
            Event(
                Message(job_id=job.job_id,
                        flow_id=the_flow.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={"message": "Processing Job %s" % job.job_id}))

            Event(
                Message(job_id=job.job_id,
                        flow_id=the_flow.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message": "Running Flow %s" % job.payload['run']
                        }))
            the_flow.run()
            try:
                NS._int.wclient.write(job_status_key,
                                      "finished",
                                      prevValue="processing")
            except etcd.EtcdCompareFailed:
                # This should not happen!
                _msg = "Cannot mark job as 'finished', " \
                       "current job status invalid"
                raise FlowExecutionFailedError(_msg)

            Event(
                Message(job_id=job.job_id,
                        flow_id=the_flow.parameters['flow_id'],
                        priority="info",
                        publisher=NS.publisher_id,
                        payload={
                            "message":
                            "Job (%s):  Finished "
                            "Flow %s" % (job.job_id, job.payload['run'])
                        }))
        except (FlowExecutionFailedError, AtomExecutionFailedError,
                Exception) as e:
            _trace = str(traceback.format_exc(e))
            _msg = "Failure in Job %s Flow %s with error:" % \
                   (job.job_id, job.payload['run'])
            Event(
                ExceptionMessage(priority="error",
                                 publisher=NS.publisher_id,
                                 payload={
                                     "message": _msg + _trace,
                                     "exception": e
                                 }))
            if the_flow:
                Event(
                    Message(job_id=job.job_id,
                            flow_id=the_flow.parameters['flow_id'],
                            priority="error",
                            publisher=NS.publisher_id,
                            payload={"message": _msg + "\n" + _trace}))
            else:
                Event(
                    Message(priority="error",
                            publisher=NS.publisher_id,
                            payload={"message": _msg + "\n" + _trace}))

            try:
                NS._int.wclient.write(job_status_key,
                                      "failed",
                                      prevValue="processing")
            except etcd.EtcdCompareFailed:
                # This should not happen!
                _msg = "Cannot mark job as 'failed', current" \
                       "job status invalid"
                raise FlowExecutionFailedError(_msg)
            else:
                job = job.load()
                job.errors = _trace
                job.save()
Exemplo n.º 5
0
    def run(self):
        Event(
            Message(
                priority="info",
                publisher=NS.publisher_id,
                payload={
                    "message": "Generating brick mapping for gluster volume"
                },
                job_id=self.parameters["job_id"],
                flow_id=self.parameters["flow_id"],
                cluster_id=NS.tendrl_context.integration_id,
            )
        )
        brick_count = self.parameters.get('Volume.brick_count')
        subvol_size = self.parameters.get('Volume.subvol_size')
        message = ""
        # get brick_count number of bricks from all the selected nodes

        nodes = {}
        for node in self.parameters.get('Cluster.node_configuration'):
            key = "nodes/%s/NodeContext/fqdn" % node
            host = NS._int.client.read(key).value
            nodes[host] = []

        hosts = NS._int.client.read(
            '/clusters/%s/Bricks/free/' % NS.tendrl_context.integration_id
        )
        for host in hosts.leaves:
            host = host.key.split("/")[-1]
            bricks = NS._int.client.read(
                '/clusters/%s/Bricks/free/%s' % (
                    NS.tendrl_context.integration_id,
                    host
                )
            )
            for brick in bricks.leaves:
                brick = brick.key.split("/")[-1]
                if host in nodes:
                    if len(nodes[host]) < brick_count:
                        nodes[host].append(brick)

        # Form a brick list such that when you fill sub volumes with
        # bricks from this list, it should honour the failure domains

        brick_list = []
        total_bricks = len(nodes) * brick_count
        for iterator in range(total_bricks):
            brick_list.append("")

        counter = 0
        node_count = len(nodes)
        for key, value in nodes.iteritems():
            if len(value) < brick_count:
                message = "Host %s has %s bricks which is less than" + \
                          " bricks per host %s" % (
                              key,
                              len(value),
                              brick_count
                          )
                job = Job(job_id=self.parameters["job_id"]).load()
                res = {"message": message, "result": [[]], "optimal": False}
                job.output["GenerateBrickMapping"] = json.dumps(res)
                job.save()
                return False

            for i in range(brick_count):
                brick_list[node_count * i + counter] = value[i]
            counter += 1

        # Check if total number of bricks available is less than the
        # sub volume size. If its less, then return accordingly

        if len(brick_list) < subvol_size:
            message = "Total bricks available %s less than subvol_size %s" % (
                len(brick_list), subvol_size
            )
            job = Job(job_id=self.parameters["job_id"]).load()
            res = {"message": message, "result": [[]], "optimal": False}
            job.output["GenerateBrickMapping"] = json.dumps(res)
            job.save()
            return False

        # Fill the result list with bricks from the brick_list,
        # try to fill untill you exhaust the brick list and
        # also the number of sub volumes is maximum for the
        # available list

        result = []
        lower_bound = 0
        upper_bound = subvol_size
        while True:
            if upper_bound > len(brick_list):
                break
            subvol = brick_list[lower_bound:upper_bound]
            result.append(subvol)
            lower_bound = upper_bound
            upper_bound += subvol_size

        # check if the mapping provided is optimal as per expected
        # failure domain or not

        optimal = True
        if node_count < subvol_size:
            optimal = False

        # Write the result back to the job

        job = Job(job_id=self.parameters["job_id"]).load()
        res = {"message": message, "result": result, "optimal": optimal}
        job.output["GenerateBrickMapping"] = json.dumps(res)
        job.save()

        return True