Пример #1
0
    def _optimize_inline_tasks(self):
        """ Group tasks by abstraction and function and then break them into
        sub-groups and schedule the sub-groups has sub DAGs.
        """
        if CurrentScript().inline_tasks <= 1:
            return

        debug(D_NEST, 'Inlining tasks for {0}'.format(self))

        # Group tasks into bins based on Function.
        task_dict = collections.defaultdict(list)
        for task in self.tasks:
            abstraction = task[0]
            function    = task[1]
            task_dict[(abstraction, function)].append(task)

        # For each set of tasks, split the set into small sub-groups; for each
        # sub-group, create a new InlineNest and schedule the tasks there.
        self.tasks = []
        for (abstraction, function), tasks in list(task_dict.items()):
            inline_tasks = max(CurrentScript().inline_tasks, abstraction.group)
            if inline_tasks < len(tasks):
                for group in groups(tasks, inline_tasks):
                    with InlineNest() as inline_nest:
                        for task in group:
                            inline_nest.schedule(*task)
                        inline_nest.compile()
                    with abstraction.options:
                        inline_nest()
            else:
                for task in tasks:
                    self.tasks.append(task)
Пример #2
0
    def emit_task(self,
                  abstraction,
                  function,
                  command,
                  inputs,
                  outputs,
                  options,
                  symbol=None):
        """ Write task to DAG file. """
        # Track inputs and outputs.
        if self.track_imports:
            for i in inputs:
                self.inputs.add(i)

        if self.track_exports:
            for o in outputs:
                self.outputs.add(o)

        debug(
            D_ENGINE, 'Emitting {0}, [{1}], [{2}], {3}'.format(
                command, ', '.join(map(str, inputs)),
                ', '.join(map(str, outputs)), options))

        # Write task outputs and inputs
        self.dag_file.write('{0}: {1}\n'.format(' '.join(map(str, outputs)),
                                                ' '.join(map(str, inputs))))

        # Write debugging symbols if enabled
        if CurrentScript().include_symbols:
            if abstraction == SENTINEL:
                self.dag_file.write(
                    '\t'.join(['', '# SYMBOL', str(function)]) + '\n')
            else:
                self.dag_file.write('\t'.join(
                    ['', '# SYMBOL', str(abstraction)]) + '\n')

        # if a symbol is provided
        if symbol:
            self.dag_file.write('@SYMBOL="' + symbol + '"\n')

        # Write environmental variables
        if options.local:
            self.dag_file.write('@BATCH_LOCAL=1\n')
        if options.batch:
            self.dag_file.write('@BATCH_OPTIONS={0}\n'.format(options.batch))
        if options.collect:
            self.dag_file.write('@_MAKEFLOW_COLLECT_LIST+={0}\n'.format(
                ' '.join(map(str, options.collect))))
        for k, v in list(options.environment.items()):
            self.dag_file.write('@{0}={1}\n'.format(k, v))

        # Write task command
        self.dag_file.write('\t{0}\n'.format(command))
        self.dag_file.flush()
Пример #3
0
    def __iter__(self):
        # Generate the cache under any of the following conditions:
        #
        #   1. Cache file does not exist
        #   2. Cache file exists, is older than compile start time, and we are
        #      forced to do so
        debug(D_DATASET, 'Iterating on Dataset {0}'.format(self))
        if os.path.exists(self.cache_path):
            # If cache file is made after we started compiling, then it is
            # valid, so don't bother generating.
            if CurrentScript().start_time <= os.stat(self.cache_path).st_ctime:
                debug(D_DATASET, 'Loading Dataset {0}'.format(self))
                return (MakeFile(f.strip(), self.nest) \
                    for f in open(self.cache_path, 'r'))

            message = 'Cache file {0} already exists'.format(self.cache_path)
            if CurrentScript().force:
                warn(D_DATASET, message)
            else:
                fatal(D_DATASET, message)

        debug(D_DATASET, 'Generating Dataset {0}'.format(self))
        return self._generate()
Пример #4
0
def normalize_path(path, ref_path=None):
    """ Return normalized path.

    If path is absolute or no `ref_path` is specified, then return absolute
    path.  Otherwise, return relative path.
    """
    from weaver.stack import CurrentScript

    if not CurrentScript().normalize_paths:
        return path

    if os.path.isabs(path):
        return path

    if ref_path:
        return os.path.abspath(os.path.join(ref_path, path))
    else:
        return os.path.abspath(path)
Пример #5
0
    def _optimize_nested_abstractions(self):
        """ Internally, we perform inline abstractions optimization as we build
        the DAG, so we should only execute the body of this method if we want
        to automatically nest abstractions after the fact.
        """
        if not CurrentScript().nested_abstractions:
            return

        debug(D_NEST, 'Inlining Abstractions for {0}'.format(self))

        # Group tasks into bins based on Abstractions.
        task_dict = collections.defaultdict(list)
        for task in self.tasks:
            abstraction = task[0]
            task_dict[abstraction].append(task)

        # For each Abstraction, create InlineNest and schedule tasks to be
        # executed there; only do this if we have more than one Abstraction.
        self.tasks = []
        if len(list(task_dict.keys())) > 1:
            for abstraction, tasks in list(task_dict.items()):
                # For tasks scheduled directly by a Function (Abstraction is
                # None), then simply schedule for execution in current Nest.
                if abstraction is SENTINEL:
                    self.tasks.extend(tasks)
                    continue

                # Otherwise, create a new InlineNest and then schedule tasks to
                # run in this new Nest.
                with InlineNest() as inline_nest:
                    for task in tasks:
                        inline_nest.schedule(*task)
                    inline_nest.compile()

                # Engine is also a Function, so call it to schedule the task
                # responsible for InlineNest to run in the current Nest.
                with abstraction.options:
                    inline_nest()
        else:
            # Copy tasks from Abstractions to Nest task list.
            for abstraction, tasks in list(task_dict.items()):
                for task in tasks:
                    self.tasks.append(task)