def _optimize_inline_tasks(self): """ Group tasks by abstraction and function and then break them into sub-groups and schedule the sub-groups has sub DAGs. """ if CurrentScript().inline_tasks <= 1: return debug(D_NEST, 'Inlining tasks for {0}'.format(self)) # Group tasks into bins based on Function. task_dict = collections.defaultdict(list) for task in self.tasks: abstraction = task[0] function = task[1] task_dict[(abstraction, function)].append(task) # For each set of tasks, split the set into small sub-groups; for each # sub-group, create a new InlineNest and schedule the tasks there. self.tasks = [] for (abstraction, function), tasks in list(task_dict.items()): inline_tasks = max(CurrentScript().inline_tasks, abstraction.group) if inline_tasks < len(tasks): for group in groups(tasks, inline_tasks): with InlineNest() as inline_nest: for task in group: inline_nest.schedule(*task) inline_nest.compile() with abstraction.options: inline_nest() else: for task in tasks: self.tasks.append(task)
def emit_task(self, abstraction, function, command, inputs, outputs, options, symbol=None): """ Write task to DAG file. """ # Track inputs and outputs. if self.track_imports: for i in inputs: self.inputs.add(i) if self.track_exports: for o in outputs: self.outputs.add(o) debug( D_ENGINE, 'Emitting {0}, [{1}], [{2}], {3}'.format( command, ', '.join(map(str, inputs)), ', '.join(map(str, outputs)), options)) # Write task outputs and inputs self.dag_file.write('{0}: {1}\n'.format(' '.join(map(str, outputs)), ' '.join(map(str, inputs)))) # Write debugging symbols if enabled if CurrentScript().include_symbols: if abstraction == SENTINEL: self.dag_file.write( '\t'.join(['', '# SYMBOL', str(function)]) + '\n') else: self.dag_file.write('\t'.join( ['', '# SYMBOL', str(abstraction)]) + '\n') # if a symbol is provided if symbol: self.dag_file.write('@SYMBOL="' + symbol + '"\n') # Write environmental variables if options.local: self.dag_file.write('@BATCH_LOCAL=1\n') if options.batch: self.dag_file.write('@BATCH_OPTIONS={0}\n'.format(options.batch)) if options.collect: self.dag_file.write('@_MAKEFLOW_COLLECT_LIST+={0}\n'.format( ' '.join(map(str, options.collect)))) for k, v in list(options.environment.items()): self.dag_file.write('@{0}={1}\n'.format(k, v)) # Write task command self.dag_file.write('\t{0}\n'.format(command)) self.dag_file.flush()
def __iter__(self): # Generate the cache under any of the following conditions: # # 1. Cache file does not exist # 2. Cache file exists, is older than compile start time, and we are # forced to do so debug(D_DATASET, 'Iterating on Dataset {0}'.format(self)) if os.path.exists(self.cache_path): # If cache file is made after we started compiling, then it is # valid, so don't bother generating. if CurrentScript().start_time <= os.stat(self.cache_path).st_ctime: debug(D_DATASET, 'Loading Dataset {0}'.format(self)) return (MakeFile(f.strip(), self.nest) \ for f in open(self.cache_path, 'r')) message = 'Cache file {0} already exists'.format(self.cache_path) if CurrentScript().force: warn(D_DATASET, message) else: fatal(D_DATASET, message) debug(D_DATASET, 'Generating Dataset {0}'.format(self)) return self._generate()
def normalize_path(path, ref_path=None): """ Return normalized path. If path is absolute or no `ref_path` is specified, then return absolute path. Otherwise, return relative path. """ from weaver.stack import CurrentScript if not CurrentScript().normalize_paths: return path if os.path.isabs(path): return path if ref_path: return os.path.abspath(os.path.join(ref_path, path)) else: return os.path.abspath(path)
def _optimize_nested_abstractions(self): """ Internally, we perform inline abstractions optimization as we build the DAG, so we should only execute the body of this method if we want to automatically nest abstractions after the fact. """ if not CurrentScript().nested_abstractions: return debug(D_NEST, 'Inlining Abstractions for {0}'.format(self)) # Group tasks into bins based on Abstractions. task_dict = collections.defaultdict(list) for task in self.tasks: abstraction = task[0] task_dict[abstraction].append(task) # For each Abstraction, create InlineNest and schedule tasks to be # executed there; only do this if we have more than one Abstraction. self.tasks = [] if len(list(task_dict.keys())) > 1: for abstraction, tasks in list(task_dict.items()): # For tasks scheduled directly by a Function (Abstraction is # None), then simply schedule for execution in current Nest. if abstraction is SENTINEL: self.tasks.extend(tasks) continue # Otherwise, create a new InlineNest and then schedule tasks to # run in this new Nest. with InlineNest() as inline_nest: for task in tasks: inline_nest.schedule(*task) inline_nest.compile() # Engine is also a Function, so call it to schedule the task # responsible for InlineNest to run in the current Nest. with abstraction.options: inline_nest() else: # Copy tasks from Abstractions to Nest task list. for abstraction, tasks in list(task_dict.items()): for task in tasks: self.tasks.append(task)