def run(self, graph, ctx): Pass.run(self, graph, ctx) type_errors_found = False for tid, task in graph.tasks.items(): for edge in task.edges: intype = edge.source.type outtype = edge.dest.type if is_castable(intype, outtype): # Handle file related types separately if isinstance(intype, FileType) and isinstance( outtype, FileType): # Check if one end of the edge is untype but the other # is not. If that's the case then we cast the untyped # end to be the type of the other end. Otherwise we # mark it as needing a data transformation if intype.id == 'anyfile' and outtype.id != 'anyfile': intype.id = outtype.id elif intype.id != 'anyfile' and outtype.id == 'anyfile': outtype.id = intype.id else: edge.needs_transform = True else: ctx.errors.append( "{} expected a {} got a {} from {}".format( edge.dest.task_ref.name, intype.id, outtype.id, edge.source.task_ref.name)) if type_errors_found: return PassResult.ERROR
def run(self, graph, ctx): Pass.run(self, graph, ctx) for tid, task in graph.tasks.items(): # Infer if the task is a source is_source = True for name, inport in task.inputs.items(): if not inport.is_immediate: is_source = False break if is_source: graph.set_source(task) # Infer if the task is a sink and generate sink out-ports if not task.edges: task.is_sink = True # If there no out edges it means this is a sink and all outputs # are sinks. Make them so... current_backend = Backend.get_current_backend() for name, outport in task.outputs.items(): # If current out port is not a local port make it so if not current_backend.name == 'LOCAL_NON_THREADED': outport = Backend.get_backend( BackendConfig(BackendType.LOCAL_NON_THREADED, 'Local Non Threaded')).get_port( outport.type, outport.name, outport.index, outport.task_ref) task.edges.append(Edge(outport, Sink(outport))) return PassResult.CONTINUE
def run(self, graph, ctx): Pass.run(self, graph, ctx) all_fusables = [] visited = set() for name, source in graph.sources.items(): cur_fusables = [source] self._dfs(source, cur_fusables, all_fusables, visited) # Filter out single node fusable regions which are redundant fusables = [fusable for fusable in all_fusables if len(fusable) > 1] fused_tasks = list(map(lambda fusable: FusedTask(fusable), fusables)) for fused_task in fused_tasks: for fusee in fused_task.tasks: graph.fusee_map[fusee.id] = fused_task for fused_task in fused_tasks: graph.add_task(fused_task) # If the head of the fused task sequence is a source remove it and # make the fused container task the source if fused_task.head.id in graph.sources: graph.unset_source(fused_task.head) graph.set_source(fused_task) # If the tail of the fused task sequence is a sink make the fused # container task a sink as well if fused_task.tail.is_sink: fused_task.is_sink = True
def run(self, graph, ctx): Pass.run(self, graph, ctx) graph_map = ctx.properties.get('__dot_graph__', None) if not graph_map: graph_map = {} graph_map[self.tag] = self._generate_dot_graph(graph) ctx.properties['__dot_graph__'] = graph_map return PassResult.CONTINUE
def run(self, graph, ctx): Pass.run(self, graph, ctx) count = 0 tasks = set() for tid, task in graph.tasks.items(): tasks.add(task) # Now remove any tasks within fused tasks for tid, task in graph.tasks.items(): if isinstance(task, FusedTask): for t in task.tasks: tasks.remove(t) graph.num_tasks = len(tasks)
def __init__(self, name): Pass.__init__(self, name) self.description = "Post processing the graph"
def run(self, graph, ctx): Pass.run(self, graph, ctx) new_tasks = [] new_sources = [] deleted_sources = [] # Run edge transformations for tid, task in graph.tasks.items(): for index, edge in enumerate(task.edges): # If we find that we need to do a data type transformation we # need to splice in the transformation in between the original # tasks if edge.needs_transform: intype = edge.source.type outtype = edge.dest.type # [FIXME] Code debt - Currently we have two overloaded ways # of indexing in to the task.outputs dictionary. One with # indices and one with names in the case of named outputs. # But there is currently no way of distinguishing between # these two at the moment. I currently just assume it is # indexed addressing case here since we don't currently # support named outputs. This needs to change once we add # support for named outputs. tasklet = Tasklet(edge.source.task_ref, str(edge.source.index)) args = [tasklet, intype, outtype] sig = inspect.signature(transform) new_task = Task(gen_runner(transform, sig), transform, sig, args, {}) new_task.is_transform = True # The input corresponds to the 'infile' parameter of the # transform function inport = new_task.inputs['infile'] # We know this generated task only has one output outport = new_task.outputs['0'] old_dest_port = edge.dest # Remove the old edge since we should have generated a new # edge from the original source to this newly generated task # during the call to Task constructor del task.edges[index] # Make the original destination port of the edge to be the # the destination port of the outward edge of the new task new_task.edges.append(Edge(outport, old_dest_port)) # Collect newly generated tasks new_tasks.append(new_task) # Run source input transformations for tid, source in graph.sources.items(): for name, inport in source.inputs.items(): # Get the actual argument value passed to this source arg = source._args[name] # Check if it looks like a file ext = get_file_extention(arg) if ext: if ext == 'csv': intype = inport.type.id if intype != ext: outtype = get_type(ext) args = [arg, intype, outtype] sig = inspect.signature(transform) # Generate a new task for transforming the input to # type the original source was expecting task = Task(gen_runner(transform, sig), transform, sig, args, {}) task.is_transform = True # We know this generated task only has one output outport = task.outputs['0'] # Make the configuration of the original task's input to be # non immediate since now it accepts the output from newly # generated staging task at runtime inport.flip_is_immediate() # Connect the out port of the new task to the # in port of the old source task.edges.append(Edge(outport, inport)) # Collect the new task as a source new_sources.append(task) # Collect sources which are made not sources anymore deleted_sources.append(source) # Collect newly generated tasks new_tasks.append(task) # Add the newly generated tasks to the graph for task in new_tasks: graph.add_task(task) # Mark removed sources as not sources for source in deleted_sources: graph.unset_source(source) # Add newly generated sources for source in new_sources: graph.set_source(source) return PassResult.CONTINUE
def __init__(self, name): Pass.__init__(self, name) self.description = "Inserting data transformations"
def run(self, graph, ctx): Pass.run(self, graph, ctx) new_tasks = [] new_sources = [] deleted_sources = [] # Run edge transformations # # This depends on the run time placement of the task since staging may # or may not be necessary depending on whether the next task is placed # at the same node or not. # Run source input staging for tid, source in graph.sources.items(): for name, inport in source.inputs.items(): # Get the actual argument value passed to this source arg = source._args[name] # Check if it looks like a URL (currently we only support FTP) if arg.startswith("ftp:"): ext = get_file_extention(get_file_name(arg)) intype = inport.type.id ''' if ext: if intype != ext: print( Colors.WARNING + """[Compiler] {} input file extention does not seem to match the declared argument type {} at {}""" .format(ext, intype, source.name) + Colors.ENDC) ''' args = [arg] sig = inspect.signature(staging) # Generate a new task for staging the input task = Task(gen_runner(staging, sig), staging, sig, args, {}) task.is_staging = True # We know this generated task only has one output outport = task.outputs['0'] # Make the configuration of the original task's input to be # non immediate since now it accepts the output from newly # generated staging task at runtime inport.flip_is_immediate() # Connect the out port of the new task to the # in port of the old source task.edges.append(Edge(outport, inport)) # Collect the new task as a source new_sources.append(task) # Collect sources which are made not sources anymore deleted_sources.append(source) # Collect newly generated tasks new_tasks.append(task) # Add the newly generated tasks to the graph for task in new_tasks: graph.add_task(task) # Mark removed sources as not sources for source in deleted_sources: graph.unset_source(source) # Add newly generated sources for source in new_sources: graph.set_source(source) return PassResult.CONTINUE
def __init__(self, name): Pass.__init__(self, name) self.description = "Inserting staging operations"
def __init__(self, name, tag=""): Pass.__init__(self, name, tag) self.pre_graph = None self.post_graph = None self.description = "Generating the dot graph"
def __init__(self, name): Pass.__init__(self, name) self.description = "Running the type checker"
def __init__(self, name): Pass.__init__(self, name) self.description = "Running the task fusion optimizer"