Exemplo n.º 1
0
    def touch_control_outputs(self):
        task = self.task

        # create the parent directory
        self._outputs["submission"].parent.touch()

        # get all branch indexes and chunk them by tasks_per_job
        branch_chunks = list(
            iter_chunks(task.branch_map.keys(), task.tasks_per_job))

        # submission output
        if not self._outputs["submission"].exists():
            submission_data = self.submission_data.copy()
            # set dummy submission data
            submission_data.jobs.clear()
            for i, branches in enumerate(branch_chunks):
                job_num = i + 1
                submission_data.jobs[
                    job_num] = self.submission_data_cls.job_data(
                        branches=branches)
            self._outputs["submission"].dump(submission_data,
                                             formatter="json",
                                             indent=4)

        # status output
        if "status" in self._outputs and not self._outputs["status"].exists():
            status_data = self.status_data_cls()
            # set dummy status data
            for i, branches in enumerate(branch_chunks):
                job_num = i + 1
                status_data.jobs[job_num] = self.status_data_cls.job_data(
                    status=self.job_manager.FINISHED, code=0)
            self._outputs["status"].dump(status_data,
                                         formatter="json",
                                         indent=4)
Exemplo n.º 2
0
Arquivo: base.py Projeto: ucontent/law
    def query_batch(self, job_ids, threads=None, chunk_size=20, callback=None, **kwargs):
        """
        Queries the status of a batch of jobs given by *job_ids* via a thread pool of size *threads*
        which defaults to its instance attribute. When *chunk_size* is not negative, *job_ids* is
        split into chunks of that size which are passed to :py:meth:`query`. When *callback* is set,
        it is invoked after each successful job (or job chunk) status query with the job number
        (starting from 0) and the result object. All other *kwargs* are passed the :py:meth:`query`.

        This method returns a tuple containing the job status query data in a dictionary mapped to
        job ids, and a list of exceptions that occured during status querying. An empty list means
        that no exceptions occured.
        """
        # default arguments
        threads = threads or self.threads

        def _callback(i):
            return (lambda r: callback(i, r)) if callable(callback) else None

        # threaded processing
        pool = ThreadPool(max(threads, 1))
        gen = job_ids if chunk_size < 0 else iter_chunks(job_ids, chunk_size)
        results = [pool.apply_async(self.query, (job_id_chunk,), kwargs, callback=_callback(i))
                   for i, job_id_chunk in enumerate(gen)]
        pool.close()
        pool.join()

        # store status data per job id
        query_data, errors = {}, []
        for res in results:
            try:
                query_data.update(res.get())
            except Exception as e:
                errors.append(e)

        return query_data, errors
Exemplo n.º 3
0
Arquivo: base.py Projeto: ucontent/law
    def cleanup_batch(self, job_ids, threads=None, chunk_size=20, callback=None, **kwargs):
        """
        Cleans up a batch of jobs given by *job_ids* via a thread pool of size *threads* which
        defaults to its instance attribute. When *chunk_size* is not negative, *job_ids* is split
        into chunks of that size which are passed to :py:meth:`cleanup`. When *callback* is set, it
        is invoked after each successful job (or job chunk) cleaning with the job number (starting
        from 0) and the result object. All other *kwargs* are passed the :py:meth:`cleanup`.

        Exceptions that occured during job cleaning is stored in a list and returned. An empty
        list means that no exceptions occured.
        """
        # default arguments
        threads = threads or self.threads

        def _callback(i):
            return (lambda r: callback(i, r)) if callable(callback) else None

        # threaded processing
        pool = ThreadPool(max(threads, 1))
        gen = job_ids if chunk_size < 0 else iter_chunks(job_ids, chunk_size)
        results = [pool.apply_async(self.cleanup, (job_id_chunk,), kwargs, callback=_callback(i))
                   for i, job_id_chunk in enumerate(gen)]
        pool.close()
        pool.join()

        # store errors
        errors = []
        for res in results:
            try:
                res.get()
            except Exception as e:
                errors.append(e)

        return errors
Exemplo n.º 4
0
    def query_batch(self,
                    job_ids,
                    threads=None,
                    chunk_size=20,
                    callback=None,
                    **kwargs):
        # default arguments
        threads = threads or self.threads

        def _callback(i):
            return (lambda r: callback(r, i)) if callable(callback) else None

        # threaded processing
        pool = ThreadPool(max(threads, 1))
        gen = job_ids if chunk_size < 0 else iter_chunks(job_ids, chunk_size)
        results = [
            pool.apply_async(self.query, (job_id_chunk, ),
                             kwargs,
                             callback=_callback(i))
            for i, job_id_chunk in enumerate(gen)
        ]
        pool.close()
        pool.join()

        # store status data per job id
        query_data, errors = {}, []
        for res in results:
            try:
                query_data.update(res.get())
            except Exception as e:
                errors.append(e)

        return query_data, errors
Exemplo n.º 5
0
Arquivo: base.py Projeto: meliache/law
    def get_branch_chunks(self, chunk_size):
        """
        Returns a list of chunks of branch numbers defined in this workflow with a certain
        *chunk_size*. Example:

        .. code-block:: python

            wf = SomeWorkflowTask()  # has 8 branches
            print(wf.get_branch_chunks(3))
            # -> [[0, 1, 2], [3, 4, 5], [6, 7]]

            wf2 = SomeWorkflowTask(end_branch=5)  # has 5 branches
            print(wf2.get_branch_chunks(3))
            # -> [[0, 1, 2], [3, 4]]
        """
        if self.is_branch():
            return self.as_workflow().get_branch_chunks(chunk_size)

        # get the branch map and create chunks of its branch values
        branch_chunks = iter_chunks(self.get_branch_map().keys(), chunk_size)

        return list(branch_chunks)
Exemplo n.º 6
0
Arquivo: remote.py Projeto: bfis/law
    def run(self):
        """
        Actual run method that starts the processing of jobs and initiates the status polling, or
        performs job cancelling or cleaning, depending on the task parameters.
        """
        task = self.task
        self._outputs = self.output()

        # create the job dashboard interface
        self.dashboard = task.create_job_dashboard() or NoJobDashboard()

        # read submission data and reset some values
        submitted = not task.ignore_submission and self._outputs[
            "submission"].exists()
        if submitted:
            self.submission_data.update(
                self._outputs["submission"].load(formatter="json"))
            task.tasks_per_job = self.submission_data.tasks_per_job
            self.dashboard.apply_config(self.submission_data.dashboard_config)

        # store the initially complete branches
        if "collection" in self._outputs:
            collection = self._outputs["collection"]
            count, keys = collection.count(keys=True)
            self._initially_existing_branches = keys

        # cancel jobs?
        if self._cancel_jobs:
            if submitted:
                self.cancel()
            self._controlled_jobs = True

        # cleanup jobs?
        elif self._cleanup_jobs:
            if submitted:
                self.cleanup()
            self._controlled_jobs = True

        # submit and/or wait while polling
        else:
            # maybe set a tracking url
            tracking_url = self.dashboard.create_tracking_url()
            if tracking_url:
                task.set_tracking_url(tracking_url)

            # ensure the output directory exists
            if not submitted:
                self._outputs["submission"].parent.touch()

            # at this point, when the status file exists, it is considered outdated
            if "status" in self._outputs:
                self._outputs["status"].remove()

            try:
                # instantiate the configured job file factory
                self.job_file_factory = self.create_job_file_factory()

                # submit
                if not submitted:
                    # set the initial list of unsubmitted jobs
                    branches = sorted(task.branch_map.keys())
                    branch_chunks = list(
                        iter_chunks(branches, task.tasks_per_job))
                    self.submission_data.unsubmitted_jobs = OrderedDict(
                        (i + 1, branches)
                        for i, branches in enumerate(branch_chunks))
                    self.submit()

                    # sleep once to give the job interface time to register the jobs
                    post_submit_delay = self._get_task_attribute(
                        "post_submit_delay")()
                    if post_submit_delay:
                        logger.debug(
                            "sleep for {} seconds due to post_submit_delay".
                            format(post_submit_delay))
                        time.sleep(post_submit_delay)

                # start status polling when a) no_poll is not set, or b) the jobs were already
                # submitted so that failed jobs are resubmitted after a single polling iteration
                if not task.no_poll or submitted:
                    self.poll()

            finally:
                # in any event, cleanup the job file
                if self.job_file_factory:
                    self.job_file_factory.cleanup_dir(force=False)
Exemplo n.º 7
0
    def run(self):
        """
        Actual run method that starts the processing of jobs and initiates the status polling, or
        performs job cancelling or cleaning, depending on the task parameters.
        """
        task = self.task
        self._outputs = self.output()

        # create the job dashboard interface
        self.dashboard = task.create_job_dashboard() or NoJobDashboard()

        # read submission data and reset some values
        submitted = not task.ignore_submission and self._outputs[
            "submission"].exists()
        if submitted:
            self.submission_data.update(
                self._outputs["submission"].load(formatter="json"))
            task.tasks_per_job = self.submission_data.tasks_per_job
            self.dashboard.apply_config(self.submission_data.dashboard_config)
            for job_num in self.submission_data.jobs:
                self.attempts[int(job_num)] = -1

        # when the branch outputs, i.e. the "collection" exists, just create dummy control outputs
        if "collection" in self._outputs and self._outputs[
                "collection"].exists():
            self.touch_control_outputs()

        # cancel jobs?
        elif self._cancel_jobs:
            if submitted:
                self.cancel()

        # cleanup jobs?
        elif self._cleanup_jobs:
            if submitted:
                self.cleanup()

        # submit and/or wait while polling
        else:
            # maybe set a tracking url
            tracking_url = self.dashboard.create_tracking_url()
            if tracking_url:
                task.set_tracking_url(tracking_url)
                print("tracking url set to {}".format(tracking_url))

            # ensure the output directory exists
            if not submitted:
                self._outputs["submission"].parent.touch()

            # at this point, when the status file exists, it is considered outdated
            if "status" in self._outputs:
                self._outputs["status"].remove()

            try:
                self.job_file_factory = self.create_job_file_factory()

                # submit
                if not submitted:
                    # set the initial job waiting list
                    branches = sorted(task.branch_map.keys())
                    branch_chunks = list(
                        iter_chunks(branches, task.tasks_per_job))
                    self.submission_data.waiting_jobs = OrderedDict(
                        (i + 1, branches)
                        for i, branches in enumerate(branch_chunks))
                    self.submit()

                # start status polling when a) no_poll is not set, or b) the jobs were already
                # submitted so that failed jobs are resubmitted after a single polling iteration
                if not task.no_poll or submitted:
                    self.poll()

            finally:
                # finally, cleanup the job file
                if self.job_file_factory:
                    self.job_file_factory.cleanup(force=False)
Exemplo n.º 8
0
Arquivo: base.py Projeto: mschnepf/law
    def query_batch(self,
                    job_ids,
                    threads=None,
                    chunk_size=None,
                    callback=None,
                    **kwargs):
        """
        Queries the status of a batch of jobs given by *job_ids* via a thread pool of size *threads*
        which defaults to its instance attribute. When *chunk_size*, which defaults to
        :py:attr:`chunk_size_query`, is not negative, *job_ids* are split into chunks of that size
        which are passed to :py:meth:`query`. When *callback* is set, it is invoked after each
        successful job (or job chunk) status query with the index of the corresponding job id
        (starting at 0) and the obtained status query data or an exception if any occurred. All
        other *kwargs* are passed to :py:meth:`query`.

        This method returns a dictionary that maps job ids to either the status query data or to an
        exception if any occurred.
        """
        # default arguments
        threads = max(threads or self.threads or 1, 1)

        # is chunking allowed?
        if self.chunk_size_query:
            chunk_size = max(chunk_size or self.chunk_size_query, 0)
        else:
            chunk_size = 0
        chunking = chunk_size > 0

        # build chunks (either job ids one by one, or real chunks of job ids)
        job_ids = make_list(job_ids)
        chunks = list(iter_chunks(job_ids,
                                  chunk_size)) if chunking else job_ids

        # factory to call the passed callback for each job file even when chunking
        def cb_factory(i):
            if not callable(callback):
                return None
            elif chunking:

                def wrapper(query_data):
                    offset = sum(len(chunk) for chunk in chunks[:i])
                    for j in range(len(chunks[i])):
                        data = query_data if isinstance(
                            query_data, Exception) else query_data[j]
                        callback(offset + j, data)

                return wrapper
            else:

                def wrapper(data):
                    callback(i, data)

                return wrapper

        # threaded processing
        pool = ThreadPool(threads)
        results = [
            pool.apply_async(self.query, (v, ), kwargs, callback=cb_factory(i))
            for i, v in enumerate(chunks)
        ]
        pool.close()
        pool.join()

        # store status data per job id or an exception
        query_data = {}
        if chunking:
            for i, (chunk, res) in enumerate(six.moves.zip(chunks, results)):
                data = get_async_result_silent(res)
                if isinstance(data, Exception):
                    data = {job_id: data for job_id in chunk}
                query_data.update(data)
        else:
            for job_id, res in six.moves.zip(job_ids, results):
                query_data[job_id] = get_async_result_silent(res)

        return query_data
Exemplo n.º 9
0
Arquivo: base.py Projeto: mschnepf/law
    def cleanup_batch(self,
                      job_ids,
                      threads=None,
                      chunk_size=None,
                      callback=None,
                      **kwargs):
        """
        Cleans up a batch of jobs given by *job_ids* via a thread pool of size *threads* which
        defaults to its instance attribute. When *chunk_size*, which defaults to
        :py:attr:`chunk_size_cleanup`, is not negative, *job_ids* are split into chunks of that size
        which are passed to :py:meth:`cleanup`. When *callback* is set, it is invoked after each
        successful job (or job chunk) cleaning with the index of the corresponding job id (starting
        at 0) and either *None* or an exception if any occurred. All other *kwargs* are passed to
        :py:meth:`cleanup`.

        Exceptions that occured during job cleaning are stored in a list and returned. An empty list
        means that no exceptions occured.
        """
        # default arguments
        threads = max(threads or self.threads or 1, 1)

        # is chunking allowed?
        if self.chunk_size_cleanup:
            chunk_size = max(chunk_size or self.chunk_size_cleanup, 0)
        else:
            chunk_size = 0
        chunking = chunk_size > 0

        # build chunks (either job ids one by one, or real chunks of job ids)
        job_ids = make_list(job_ids)
        chunks = list(iter_chunks(job_ids,
                                  chunk_size)) if chunking else job_ids

        # factory to call the passed callback for each job id even when chunking
        def cb_factory(i):
            if not callable(callback):
                return None
            elif chunking:

                def wrapper(err):
                    offset = sum(len(chunk) for chunk in chunks[:i])
                    for j in range(len(chunks[i])):
                        callback(offset + j, err)

                return wrapper
            else:

                def wrapper(err):
                    callback(i, err)

                return wrapper

        # threaded processing
        pool = ThreadPool(threads)
        results = [
            pool.apply_async(self.cleanup, (v, ),
                             kwargs,
                             callback=cb_factory(i))
            for i, v in enumerate(chunks)
        ]
        pool.close()
        pool.join()

        # store errors
        errors = filter(
            bool, flatten(get_async_result_silent(res) for res in results))

        return errors
Exemplo n.º 10
0
Arquivo: base.py Projeto: mschnepf/law
    def submit_batch(self,
                     job_files,
                     threads=None,
                     chunk_size=None,
                     callback=None,
                     **kwargs):
        """
        Submits a batch of jobs given by *job_files* via a thread pool of size *threads* which
        defaults to its instance attribute. When *chunk_size*, which defaults to
        :py:attr:`chunk_size_submit`, is not negative, *job_files* are split into chunks of that
        size which are passed to :py:meth:`submit`. When *callback* is set, it is invoked after each
        successful job submission with the index of the corresponding job file (starting at 0) and
        either the assigned job id or an exception if any occurred. All other *kwargs* are passed to
        :py:meth:`submit`.

        The return value is a list containing the return values of the particular :py:meth:`submit`
        calls, in an order that corresponds to *job_files*. When an exception was raised during a
        submission, this exception is added to the returned list.
        """
        # default arguments
        threads = max(threads or self.threads or 1, 1)

        # is chunking allowed?
        if self.chunk_size_submit:
            chunk_size = max(chunk_size or self.chunk_size_submit, 0)
        else:
            chunk_size = 0
        chunking = chunk_size > 0

        # build chunks (either job files one by one, or real chunks of job files)
        job_files = make_list(job_files)
        chunks = list(iter_chunks(job_files,
                                  chunk_size)) if chunking else job_files

        # factory to call the passed callback for each job file even when chunking
        def cb_factory(i):
            if not callable(callback):
                return None
            elif chunking:

                def wrapper(job_ids):
                    offset = sum(len(chunk) for chunk in chunks[:i])
                    for j in range(len(chunks[i])):
                        job_id = job_ids if isinstance(
                            job_ids, Exception) else job_ids[j]
                        callback(offset + j, job_id)

                return wrapper
            else:

                def wrapper(job_id):
                    callback(i, job_id)

                return wrapper

        # threaded processing
        pool = ThreadPool(threads)
        results = [
            pool.apply_async(self.submit, (v, ),
                             kwargs,
                             callback=cb_factory(i))
            for i, v in enumerate(chunks)
        ]
        pool.close()
        pool.join()

        # store return values or errors, same length as job files, independent of chunking
        if chunking:
            outputs = []
            for i, (chunk, res) in enumerate(six.moves.zip(chunks, results)):
                job_ids = get_async_result_silent(res)
                if isinstance(job_ids, Exception):
                    job_ids = len(chunk) * [job_ids]
                outputs.extend(job_ids)
        else:
            outputs = flatten(get_async_result_silent(res) for res in results)

        return outputs
Exemplo n.º 11
0
    def _build_merge_forest(self):
        # a node in the tree can be described by a tuple of integers, where each value denotes the
        # branch path to go down the tree to reach the node (e.g. (2, 0) -> 2nd branch, 0th branch),
        # so the length of the tuple defines the depth of the node via ``depth = len(node) - 1``
        # the tree itself is a dict that maps depths to lists of nodes with that depth
        # when multiple trees are used (a forest), each one handles ``n_leaves / n_trees`` leaves

        if self._forest_built:
            return

        # helper to convert nested lists of leaf number chunks into a list of nodes in the format
        # described above
        def nodify(obj, node=None, root_id=0):
            if not isinstance(obj, list):
                return []
            nodes = []
            if node is None:
                node = tuple()
            else:
                nodes.append(node)
            for i, _obj in enumerate(obj):
                nodes += nodify(_obj, node + (i if node else root_id, ))
            return nodes

        # first, determine the number of files to merge in total when not already set via params
        if self._n_leaves is None:
            # the following lines build the workflow requirements,
            # which strictly requires this task to be a workflow (and also not the forest)
            # for branches, this block is executed anyway via cached workflow properties
            if self.is_branch():
                raise Exception(
                    "number of files to merge should not be computed for a branch"
                )

            # get inputs, i.e. outputs of workflow requirements and trace actual inputs to merge
            # an integer number representing the number of inputs is also valid
            inputs = luigi.task.getpaths(self.merge_workflow_requires())
            inputs = self.trace_merge_workflow_inputs(inputs)
            self._n_leaves = inputs if isinstance(
                inputs, six.integer_types) else len(inputs)

        # infer the number of trees from the merge output
        output = self.merge_output()
        n_trees = 1 if not isinstance(output,
                                      TargetCollection) else len(output)

        if self._n_leaves < n_trees:
            raise Exception(
                "too few leaves ({}) for number of requested trees ({})".
                format(self._n_leaves, n_trees))

        # determine the number of leaves per tree
        n_min = self._n_leaves // n_trees
        n_trees_overlap = self._n_leaves % n_trees
        leaves_per_tree = n_trees_overlap * [n_min + 1] + (
            n_trees - n_trees_overlap) * [n_min]

        # build the trees
        forest = []
        for i, n_leaves in enumerate(leaves_per_tree):
            # build a nested list of leaf numbers using the merge factor
            # e.g. 9 leaves with factor 3 -> [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
            # TODO: this point defines the actual tree structure, which is bottom-up at the moment,
            # but maybe it's good to have this configurable
            nested_leaves = list(iter_chunks(n_leaves, self.merge_factor))
            while len(nested_leaves) > 1:
                nested_leaves = list(
                    iter_chunks(nested_leaves, self.merge_factor))

            # convert the list of nodes to the tree format described above
            tree = {}
            for node in nodify(nested_leaves, root_id=i):
                depth = len(node) - 1
                tree.setdefault(depth, []).append(node)

            forest.append(tree)

        # store values
        self.leaves_per_tree = leaves_per_tree
        self.merge_forest = forest
        self._forest_built = True

        # complain when the depth is too large
        if self.tree_depth > self.max_depth:
            raise ValueError("tree_depth {} exceeds maximum depth {}".format(
                self.tree_depth, self.max_depth))
Exemplo n.º 12
0
    def submit(self, job_map=None):
        task = self.task

        # map branch numbers to job numbers, chunk by tasks_per_job
        if not job_map:
            branch_chunks = list(iter_chunks(task.branch_map.keys(), task.tasks_per_job))
            job_map = dict((i + 1, branches) for i, branches in enumerate(branch_chunks))

        # when only_missing is True, a job can be skipped when all its tasks are completed
        check_skip = False
        if task.only_missing and self.skipped_job_nums is None:
            self.skipped_job_nums = []
            check_skip = True

        # create job files for each chunk
        job_data = OrderedDict()
        for job_num, branches in six.iteritems(job_map):
            if check_skip and all(task.as_branch(b).complete() for b in branches):
                self.skipped_job_nums.append(job_num)
                self.submission_data.jobs[job_num] = self.submission_data_cls.job_data(
                    branches=branches)
                continue

            # create and store the job file
            job_data[job_num] = (branches, self.create_job_file(job_num, branches))

        # actual submission
        job_files = [job_file for _, job_file in six.itervalues(job_data)]
        dst_info = self.destination_info() or ""
        dst_info = dst_info and (", " + dst_info)
        task.publish_message("going to submit {} {} job(s){}".format(
            len(job_files), self.workflow_type, dst_info))

        # pass the the submit_jobs method for actual submission
        job_ids = self.submit_jobs(job_files)

        # store submission data
        errors = []
        successful_job_nums = []
        for job_num, job_id in six.moves.zip(job_data, job_ids):
            if isinstance(job_id, Exception):
                errors.append((job_num, job_id))
                job_id = self.submission_data_cls.dummy_job_id
            else:
                successful_job_nums.append(job_num)
            self.submission_data.jobs[job_num] = self.submission_data_cls.job_data(job_id=job_id,
                branches=job_data[job_num][0])

        # dump the submission data to the output file
        self.dump_submission_data()

        # raise exceptions or log
        if errors:
            print("{} error(s) occured during job submission of task {}:".format(
                len(errors), task.task_id))
            tmpl = "    job {}: {}"
            for i, tpl in enumerate(errors):
                print(tmpl.format(*tpl))
                if i + 1 >= self.show_errors:
                    remaining = len(errors) - self.show_errors
                    if remaining > 0:
                        print("    ... and {} more".format(remaining))
                    break
        else:
            task.publish_message("submitted {} job(s)".format(len(job_files)) + dst_info)

        # inform the dashboard about successful submissions
        for job_num in successful_job_nums:
            job_data = self.submission_data.jobs[job_num]
            task.forward_dashboard_event(self.dashboard, job_num, job_data, "action.submit")