def unparse_tasklet(self, sdfg: sdfg.SDFG, dfg: state.StateSubgraphView, state_id: int, node: nodes.Node, function_stream: prettycode.CodeIOStream, callsite_stream: prettycode.CodeIOStream): # extract data state = sdfg.nodes()[state_id] tasklet = node # construct variables paths unique_name: str = "{}_{}_{}_{}".format(tasklet.name, sdfg.sdfg_id, sdfg.node_id(state), state.node_id(tasklet)) # Collect all of the input and output connectors into buses and scalars buses = {} scalars = {} for edge in state.in_edges(tasklet): arr = sdfg.arrays[edge.src.data] # catch symbolic (compile time variables) check_issymbolic([ tasklet.in_connectors[edge.dst_conn].veclen, tasklet.in_connectors[edge.dst_conn].bytes ], sdfg) # extract parameters vec_len = int( symbolic.evaluate(tasklet.in_connectors[edge.dst_conn].veclen, sdfg.constants)) total_size = int( symbolic.evaluate(tasklet.in_connectors[edge.dst_conn].bytes, sdfg.constants)) if isinstance(arr, data.Array): if self.hardware_target: raise NotImplementedError( 'Array input for hardware* not implemented') else: buses[edge.dst_conn] = (False, total_size, vec_len) elif isinstance(arr, data.Stream): buses[edge.dst_conn] = (False, total_size, vec_len) elif isinstance(arr, data.Scalar): scalars[edge.dst_conn] = (False, total_size * 8) for edge in state.out_edges(tasklet): arr = sdfg.arrays[edge.dst.data] # catch symbolic (compile time variables) check_issymbolic([ tasklet.out_connectors[edge.src_conn].veclen, tasklet.out_connectors[edge.src_conn].bytes ], sdfg) # extract parameters vec_len = int( symbolic.evaluate(tasklet.out_connectors[edge.src_conn].veclen, sdfg.constants)) total_size = int( symbolic.evaluate(tasklet.out_connectors[edge.src_conn].bytes, sdfg.constants)) if isinstance(arr, data.Array): if self.hardware_target: raise NotImplementedError( 'Array input for hardware* not implemented') else: buses[edge.src_conn] = (True, total_size, vec_len) elif isinstance(arr, data.Stream): buses[edge.src_conn] = (True, total_size, vec_len) elif isinstance(arr, data.Scalar): print('Scalar output not implemented') # generate system verilog module components parameter_string: str = self.generate_rtl_parameters(sdfg.constants) inputs, outputs = self.generate_rtl_inputs_outputs(buses, scalars) # create rtl code object (that is later written to file) self.code_objects.append( codeobject.CodeObject( name="{}".format(unique_name), code=RTLCodeGen.RTL_HEADER.format(name=unique_name, parameters=parameter_string, inputs="\n".join(inputs), outputs="\n".join(outputs)) + tasklet.code.code + RTLCodeGen.RTL_FOOTER, language="sv", target=RTLCodeGen, title="rtl", target_type="{}".format(unique_name), additional_compiler_kwargs="", linkable=True, environments=None)) if self.hardware_target: if self.vendor == 'xilinx': rtllib_config = { "name": unique_name, "buses": { name: ('m_axis' if is_output else 's_axis', vec_len) for name, (is_output, _, vec_len) in buses.items() }, "params": { "scalars": { name: total_size for name, (_, total_size) in scalars.items() }, "memory": {} }, "ip_cores": tasklet.ip_cores if isinstance( tasklet, nodes.RTLTasklet) else {}, } self.code_objects.append( codeobject.CodeObject(name=f"{unique_name}_control", code=rtllib_control(rtllib_config), language="v", target=RTLCodeGen, title="rtl", target_type="{}".format(unique_name), additional_compiler_kwargs="", linkable=True, environments=None)) self.code_objects.append( codeobject.CodeObject(name=f"{unique_name}_top", code=rtllib_top(rtllib_config), language="v", target=RTLCodeGen, title="rtl", target_type="{}".format(unique_name), additional_compiler_kwargs="", linkable=True, environments=None)) self.code_objects.append( codeobject.CodeObject(name=f"{unique_name}_package", code=rtllib_package(rtllib_config), language="tcl", target=RTLCodeGen, title="rtl", target_type="scripts", additional_compiler_kwargs="", linkable=True, environments=None)) self.code_objects.append( codeobject.CodeObject(name=f"{unique_name}_synth", code=rtllib_synth(rtllib_config), language="tcl", target=RTLCodeGen, title="rtl", target_type="scripts", additional_compiler_kwargs="", linkable=True, environments=None)) else: # self.vendor != "xilinx" raise NotImplementedError( 'Only RTL codegen for Xilinx is implemented') else: # not hardware_target # generate verilator simulation cpp code components inputs, outputs = self.generate_cpp_inputs_outputs(tasklet) valid_zeros, ready_zeros = self.generate_cpp_zero_inits(tasklet) vector_init = self.generate_cpp_vector_init(tasklet) num_elements = self.generate_cpp_num_elements(tasklet) internal_state_str, internal_state_var = self.generate_cpp_internal_state( tasklet) read_input_hs = self.generate_input_hs(tasklet) feed_elements = self.generate_feeding(tasklet, inputs) in_ptrs, out_ptrs = self.generate_ptrs(tasklet) export_elements = self.generate_exporting(tasklet, outputs) write_output_hs = self.generate_write_output_hs(tasklet) hs_flags = self.generate_hs_flags(tasklet) input_hs_toggle = self.generate_input_hs_toggle(tasklet) output_hs_toggle = self.generate_output_hs_toggle(tasklet) running_condition = self.generate_running_condition(tasklet) # add header code to stream if not self.cpp_general_header_added: sdfg.append_global_code( cpp_code=RTLCodeGen.CPP_GENERAL_HEADER_TEMPLATE.format( debug_include="// generic includes\n#include <iostream>" if self.verilator_debug else "")) self.cpp_general_header_added = True sdfg.append_global_code( cpp_code=RTLCodeGen.CPP_MODEL_HEADER_TEMPLATE.format( name=unique_name)) # add main cpp code to stream callsite_stream.write(contents=RTLCodeGen.CPP_MAIN_TEMPLATE.format( name=unique_name, inputs=inputs, outputs=outputs, num_elements=str.join('\n', num_elements), vector_init=vector_init, valid_zeros=str.join('\n', valid_zeros), ready_zeros=str.join('\n', ready_zeros), read_input_hs=str.join('\n', read_input_hs), feed_elements=str.join('\n', feed_elements), in_ptrs=str.join('\n', in_ptrs), out_ptrs=str.join('\n', out_ptrs), export_elements=str.join('\n', export_elements), write_output_hs=str.join('\n', write_output_hs), hs_flags=str.join('\n', hs_flags), input_hs_toggle=str.join('\n', input_hs_toggle), output_hs_toggle=str.join('\n', output_hs_toggle), running_condition=str.join(' && ', running_condition), internal_state_str=internal_state_str, internal_state_var=internal_state_var, debug_sim_start="std::cout << \"SIM {name} START\" << std::endl;" if self.verilator_debug else "", debug_internal_state=""" // report internal state VL_PRINTF("[t=%lu] ap_aclk=%u ap_areset=%u valid_i=%u ready_i=%u valid_o=%u ready_o=%u \\n", main_time, model->ap_aclk, model->ap_areset, model->valid_i, model->ready_i, model->valid_o, model->ready_o); VL_PRINTF("{internal_state_str}\\n", {internal_state_var}); std::cout << std::flush; """.format(internal_state_str=internal_state_str, internal_state_var=internal_state_var) if self.verilator_debug else "", debug_sim_end="std::cout << \"SIM {name} END\" << std::endl;" if self.verilator_debug else ""), sdfg=sdfg, state_id=state_id, node_id=node)
def unparse_tasklet(self, sdfg: sdfg.SDFG, dfg: state.StateSubgraphView, state_id: int, node: nodes.Node, function_stream: prettycode.CodeIOStream, callsite_stream: prettycode.CodeIOStream): # extract data state = sdfg.nodes()[state_id] tasklet = node # construct variables paths unique_name: str = "top_{}_{}_{}".format(sdfg.sdfg_id, sdfg.node_id(state), state.node_id(tasklet)) # generate system verilog module components parameter_string: str = self.generate_rtl_parameters(sdfg.constants) inputs, outputs = self.generate_rtl_inputs_outputs(sdfg, tasklet) # create rtl code object (that is later written to file) self.code_objects.append( codeobject.CodeObject( name="{}".format(unique_name), code=RTLCodeGen.RTL_HEADER.format(name=unique_name, parameters=parameter_string, inputs="\n".join(inputs), outputs="\n".join(outputs)) + tasklet.code.code + RTLCodeGen.RTL_FOOTER, language="sv", target=RTLCodeGen, title="rtl", target_type="", additional_compiler_kwargs="", linkable=True, environments=None)) # generate verilator simulation cpp code components inputs, outputs = self.generate_cpp_inputs_outputs(tasklet) vector_init = self.generate_cpp_vector_init(tasklet) num_elements = self.generate_cpp_num_elements() internal_state_str, internal_state_var = self.generate_cpp_internal_state( tasklet) # add header code to stream if not self.cpp_general_header_added: sdfg.append_global_code( cpp_code=RTLCodeGen.CPP_GENERAL_HEADER_TEMPLATE.format( debug_include="// generic includes\n#include <iostream>" if self.verilator_debug else "")) self.cpp_general_header_added = True sdfg.append_global_code( cpp_code=RTLCodeGen.CPP_MODEL_HEADER_TEMPLATE.format( name=unique_name)) # add main cpp code to stream callsite_stream.write(contents=RTLCodeGen.CPP_MAIN_TEMPLATE.format( name=unique_name, inputs=inputs, outputs=outputs, num_elements=num_elements, vector_init=vector_init, internal_state_str=internal_state_str, internal_state_var=internal_state_var, debug_sim_start="std::cout << \"SIM {name} START\" << std::endl;" if self.verilator_debug else "", debug_feed_element="std::cout << \"feed new element\" << std::endl;" if self.verilator_debug else "", debug_export_element="std::cout << \"export element\" << std::endl;" if self.verilator_debug else "", debug_internal_state=""" // report internal state VL_PRINTF("[t=%lu] clk_i=%u rst_i=%u valid_i=%u ready_i=%u valid_o=%u ready_o=%u \\n", main_time, model->clk_i, model->rst_i, model->valid_i, model->ready_i, model->valid_o, model->ready_o); VL_PRINTF("{internal_state_str}\\n", {internal_state_var}); std::cout << std::flush; """.format(internal_state_str=internal_state_str, internal_state_var=internal_state_var) if self.verilator_debug else "", debug_read_input_hs= "std::cout << \"remove read_input_hs flag\" << std::endl;" if self.verilator_debug else "", debug_output_hs= "std::cout << \"remove write_output_hs flag\" << std::endl;" if self.verilator_debug else "", debug_sim_end="std::cout << \"SIM {name} END\" << std::endl;" if self.verilator_debug else ""), sdfg=sdfg, state_id=state_id, node_id=node)
def apply(self, sdfg): if isinstance(self.subgraph[StateFusion.first_state], SDFGState): first_state: SDFGState = self.subgraph[StateFusion.first_state] second_state: SDFGState = self.subgraph[StateFusion.second_state] else: first_state: SDFGState = sdfg.node( self.subgraph[StateFusion.first_state]) second_state: SDFGState = sdfg.node( self.subgraph[StateFusion.second_state]) # Remove interstate edge(s) edges = sdfg.edges_between(first_state, second_state) for edge in edges: if edge.data.assignments: for src, dst, other_data in sdfg.in_edges(first_state): other_data.assignments.update(edge.data.assignments) sdfg.remove_edge(edge) # Special case 1: first state is empty if first_state.is_empty(): sdutil.change_edge_dest(sdfg, first_state, second_state) sdfg.remove_node(first_state) if sdfg.start_state == first_state: sdfg.start_state = sdfg.node_id(second_state) return # Special case 2: second state is empty if second_state.is_empty(): sdutil.change_edge_src(sdfg, second_state, first_state) sdutil.change_edge_dest(sdfg, second_state, first_state) sdfg.remove_node(second_state) if sdfg.start_state == second_state: sdfg.start_state = sdfg.node_id(first_state) return # Normal case: both states are not empty # Find source/sink (data) nodes first_input = [ node for node in sdutil.find_source_nodes(first_state) if isinstance(node, nodes.AccessNode) ] first_output = [ node for node in sdutil.find_sink_nodes(first_state) if isinstance(node, nodes.AccessNode) ] second_input = [ node for node in sdutil.find_source_nodes(second_state) if isinstance(node, nodes.AccessNode) ] top2 = top_level_nodes(second_state) # first input = first input - first output first_input = [ node for node in first_input if next((x for x in first_output if x.data == node.data), None) is None ] # Merge second state to first state # First keep a backup of the topological sorted order of the nodes sdict = first_state.scope_dict() order = [ x for x in reversed(list(nx.topological_sort(first_state._nx))) if isinstance(x, nodes.AccessNode) and sdict[x] is None ] for node in second_state.nodes(): if isinstance(node, nodes.NestedSDFG): # update parent information node.sdfg.parent = first_state first_state.add_node(node) for src, src_conn, dst, dst_conn, data in second_state.edges(): first_state.add_edge(src, src_conn, dst, dst_conn, data) top = top_level_nodes(first_state) # Merge common (data) nodes for node in second_input: # merge only top level nodes, skip everything else if node not in top2: continue if first_state.in_degree(node) == 0: candidates = [ x for x in order if x.data == node.data and x in top ] if len(candidates) == 0: continue elif len(candidates) == 1: n = candidates[0] else: # Choose first candidate that intersects memlets for cand in candidates: if StateFusion.memlets_intersect( first_state, [cand], False, second_state, [node], True): n = cand break else: # No node intersects, use topologically-last node n = candidates[0] sdutil.change_edge_src(first_state, node, n) first_state.remove_node(node) n.access = dtypes.AccessType.ReadWrite # Redirect edges and remove second state sdutil.change_edge_src(sdfg, second_state, first_state) sdfg.remove_node(second_state) if sdfg.start_state == second_state: sdfg.start_state = sdfg.node_id(first_state)
def unique_name(self, node: nodes.RTLTasklet, state, sdfg): return "{}_{}_{}_{}".format(node.name, sdfg.sdfg_id, sdfg.node_id(state), state.node_id(node))