def test_modelwrapper(): lfc = get_test_model_trained("LFC", 1, 1) bo.export_finn_onnx(lfc, (1, 1, 28, 28), export_onnx_path) model = ModelWrapper(export_onnx_path) assert model.check_all_tensor_shapes_specified() is False inp_name = model.graph.input[0].name inp_shape = model.get_tensor_shape(inp_name) assert inp_shape == [1, 1, 28, 28] # find first matmul node l0_mat_tensor_name = "" l0_inp_tensor_name = "" for node in model.graph.node: if node.op_type == "MatMul": l0_inp_tensor_name = node.input[0] l0_mat_tensor_name = node.input[1] break assert l0_mat_tensor_name != "" l0_weights = model.get_initializer(l0_mat_tensor_name) assert l0_weights.shape == (784, 1024) l0_weights_hist = Counter(l0_weights.flatten()) assert (l0_weights_hist[1.0] + l0_weights_hist[-1.0]) == 784 * 1024 l0_weights_rand = np.random.randn(784, 1024) model.set_initializer(l0_mat_tensor_name, l0_weights_rand) assert (model.get_initializer(l0_mat_tensor_name) == l0_weights_rand).all() assert l0_inp_tensor_name != "" inp_cons = model.find_consumer(l0_inp_tensor_name) assert inp_cons.op_type == "MatMul" out_prod = model.find_producer(l0_inp_tensor_name) assert out_prod.op_type == "Sign" os.remove(export_onnx_path)
def test_modelwrapper(): lfc = get_test_model_trained("LFC", 1, 1) bo.export_finn_onnx(lfc, (1, 1, 28, 28), export_onnx_path) model = ModelWrapper(export_onnx_path) assert model.check_all_tensor_shapes_specified() is False inp_shape = model.get_tensor_shape("0") assert inp_shape == [1, 1, 28, 28] l0_mat_tensor_name = "33" l0_weights = model.get_initializer(l0_mat_tensor_name) assert l0_weights.shape == (784, 1024) l0_weights_hist = Counter(l0_weights.flatten()) assert l0_weights_hist[1.0] == 401311 and l0_weights_hist[-1.0] == 401505 l0_weights_rand = np.random.randn(784, 1024) model.set_initializer(l0_mat_tensor_name, l0_weights_rand) assert (model.get_initializer(l0_mat_tensor_name) == l0_weights_rand).all() l0_inp_tensor_name = "32" inp_cons = model.find_consumer(l0_inp_tensor_name) assert inp_cons.op_type == "MatMul" out_prod = model.find_producer(l0_inp_tensor_name) assert out_prod.op_type == "Sign" os.remove(export_onnx_path)
def test_modelwrapper(): raw_m = get_data("finn.data", "onnx/mnist-conv/model.onnx") model = ModelWrapper(raw_m) assert model.check_all_tensor_shapes_specified() is True inp_name = model.graph.input[0].name inp_shape = model.get_tensor_shape(inp_name) assert inp_shape == [1, 1, 28, 28] conv_nodes = model.get_nodes_by_op_type("Conv") matmul_nodes = model.get_nodes_by_op_type("MatMul") assert len(conv_nodes) == 2 assert len(matmul_nodes) == 1 first_conv = conv_nodes[0] first_conv_iname = first_conv.input[0] first_conv_wname = first_conv.input[1] first_conv_oname = first_conv.output[0] assert first_conv_iname != "" and (first_conv_iname is not None) assert first_conv_wname != "" and (first_conv_wname is not None) assert first_conv_oname != "" and (first_conv_oname is not None) first_conv_weights = model.get_initializer(first_conv_wname) assert first_conv_weights.shape == (8, 1, 5, 5) first_conv_weights_rand = np.random.randn(8, 1, 5, 5) model.set_initializer(first_conv_wname, first_conv_weights_rand) assert (model.get_initializer(first_conv_wname) == first_conv_weights_rand ).all() inp_cons = model.find_consumer(first_conv_iname) assert inp_cons == first_conv out_prod = model.find_producer(first_conv_oname) assert out_prod == first_conv inp_layout = model.get_tensor_layout(first_conv_iname) assert inp_layout is None inp_layout = DataLayout.NCHW model.set_tensor_layout(first_conv_iname, inp_layout) assert model.get_tensor_layout(first_conv_iname) == inp_layout inp_sparsity = model.get_tensor_sparsity(first_conv_iname) assert inp_sparsity is None inp_sparsity = {"dw": {"kernel_shape": [3, 3]}} model.set_tensor_sparsity(first_conv_iname, inp_sparsity) assert model.get_tensor_sparsity(first_conv_iname) == inp_sparsity
def apply(self, model): # create a temporary folder for the generated driver pynq_driver_dir = make_build_dir(prefix="pynq_driver_") model.set_metadata_prop("pynq_driver_dir", pynq_driver_dir) # create the base FINN driver -- same for all accels driver_base_template = pk.resource_filename( "finn.qnn-data", "templates/driver/driver_base.py" ) driver_base_py = pynq_driver_dir + "/driver_base.py" shutil.copy(driver_base_template, driver_base_py) # extract input-output shapes from the graph # TODO convert this to an analysis pass? idt = [] idma_names = [] ishape_normal = [] ishape_folded = [] ishape_packed = [] for idma_ind, graph_in in enumerate(model.graph.input): i_tensor_name = graph_in.name # get inp tensor properties i_tensor_dt = model.get_tensor_datatype(i_tensor_name) i_tensor_shape_normal = tuple(model.get_tensor_shape(i_tensor_name)) # go down into dataflow partition to get folded shape info etc # TODO consider setting these as attributes during dataflow partitioning i_consumer = model.find_consumer(i_tensor_name) assert ( i_consumer.op_type == "StreamingDataflowPartition" ), """ Ensure CreateDataflowPartition called before driver creation.""" first_df_model = ModelWrapper(getCustomOp(i_consumer).get_nodeattr("model")) assert ( first_df_model.graph.node[0].op_type == "IODMA" ), "First partition must hold input IODMA" successors = model.find_direct_successors(i_consumer) successor_input_num = list(successors[0].input).index(i_consumer.output[0]) successor_sdp = getCustomOp(successors[0]) successor_df_model = ModelWrapper(successor_sdp.get_nodeattr("model")) first_node = successor_df_model.find_consumer( successor_df_model.graph.input[successor_input_num].name ) i_tensor_shape_folded = tuple( getCustomOp(first_node).get_folded_input_shape() ) # generate dummy folded i/o tensors and their packed versions i_tensor_dummy_folded = gen_finn_dt_tensor( i_tensor_dt, i_tensor_shape_folded ) i_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray( i_tensor_dummy_folded, i_tensor_dt ) i_tensor_shape_packed = i_tensor_dummy_packed.shape # append all input tensor info to relevant lists idt.append("DataType['%s']" % i_tensor_dt.name) ishape_normal.append(i_tensor_shape_normal) ishape_folded.append(i_tensor_shape_folded) ishape_packed.append(i_tensor_shape_packed) idma_names.append(getCustomOp(i_consumer).get_nodeattr("instance_name")) odt = [] odma_names = [] oshape_normal = [] oshape_folded = [] oshape_packed = [] for odma_ind, graph_out in enumerate(model.graph.output): o_tensor_name = graph_out.name # get inp tensor properties o_tensor_dt = model.get_tensor_datatype(o_tensor_name) o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name)) # go down into IODMA partition to get folded shape info etc # TODO consider setting these as attributes during dataflow partitioning o_producer = model.find_producer(o_tensor_name) assert ( o_producer.op_type == "StreamingDataflowPartition" ), """ Ensure CreateDataflowPartition called before driver creation.""" df_model = ModelWrapper(getCustomOp(o_producer).get_nodeattr("model")) assert ( df_model.graph.node[-1].op_type == "IODMA" ), "Partition must hold output IODMA" predecessors = model.find_direct_predecessors(o_producer) predecessor_output_num = list(predecessors[0].output).index( o_producer.input[0] ) predecessor_sdp = getCustomOp(predecessors[0]) predecessor_df_model = ModelWrapper(predecessor_sdp.get_nodeattr("model")) last_node = predecessor_df_model.find_producer( predecessor_df_model.graph.output[predecessor_output_num].name ) o_tensor_shape_folded = tuple( getCustomOp(last_node).get_folded_output_shape() ) o_tensor_dummy_folded = gen_finn_dt_tensor( o_tensor_dt, o_tensor_shape_folded ) o_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray( o_tensor_dummy_folded, o_tensor_dt ) o_tensor_shape_packed = o_tensor_dummy_packed.shape # append all output tensor info to relevant lists odt.append("DataType['%s']" % o_tensor_dt.name) oshape_normal.append(o_tensor_shape_normal) oshape_folded.append(o_tensor_shape_folded) oshape_packed.append(o_tensor_shape_packed) odma_names.append(getCustomOp(o_producer).get_nodeattr("instance_name")) # generate external weights npy files weights_dir = pynq_driver_dir + "/runtime_weights" os.makedirs(weights_dir) idma_idx = 0 ext_weight_dma_cnt = 0 for node in model.graph.node: assert ( node.op_type == "StreamingDataflowPartition" ), "CreateDataflowPartition needs to be applied before driver generation" if len(node.input) > 0: producer = model.find_producer(node.input[0]) init_tensor = model.get_initializer(node.input[0]) else: producer = None init_tensor = None if producer is None: # input dma? sdp_inst = getCustomOp(node) idma_name = sdp_inst.get_nodeattr("instance_name") df_model = ModelWrapper(sdp_inst.get_nodeattr("model")) assert df_model.graph.node[0].op_type == "IODMA" iodma_node = getCustomOp(df_model.graph.node[0]) if iodma_node.get_nodeattr("burstMode") == "wrap": # input weights dma? init_tensor = df_model.get_initializer( iodma_node.onnx_node.input[0] ) ext_weight_dma_cnt += 1 w_dtype = df_model.get_tensor_datatype( iodma_node.onnx_node.input[0] ) init_external_tensor = to_external_tensor(init_tensor, w_dtype) np.save( weights_dir + "/" + idma_name + ".npy", init_external_tensor ) idma_idx += 1 # fill in the driver template driver_py = pynq_driver_dir + "/driver.py" driver = template_driver.pynq_driver_template driver = driver.replace("$PLATFORM$", self.platform) driver = driver.replace("$INPUT_FINN_DATATYPE$", str(idt).replace('"', "")) driver = driver.replace("$INPUT_SHAPE_NORMAL$", str(ishape_normal)) driver = driver.replace("$INPUT_SHAPE_FOLDED$", str(ishape_folded)) driver = driver.replace("$INPUT_SHAPE_PACKED$", str(ishape_packed)) driver = driver.replace("$OUTPUT_FINN_DATATYPE$", str(odt).replace('"', "")) driver = driver.replace("$OUTPUT_SHAPE_NORMAL$", str(oshape_normal)) driver = driver.replace("$OUTPUT_SHAPE_FOLDED$", str(oshape_folded)) driver = driver.replace("$OUTPUT_SHAPE_PACKED$", str(oshape_packed)) driver = driver.replace("$INPUT_DMA_NAME$", "%s" % str(idma_names)) driver = driver.replace("$OUTPUT_DMA_NAME$", "%s" % str(odma_names)) driver = driver.replace("$NUM_INPUTS$", str(len(idma_names))) driver = driver.replace("$NUM_OUTPUTS$", str(len(odma_names))) driver = driver.replace("$EXT_WEIGHT_NUM$", str(ext_weight_dma_cnt)) with open(driver_py, "w") as f: f.write(driver) # add validate.py to run full top-1 test (only for suitable networks) validate_py = pynq_driver_dir + "/validate.py" validate_template = pk.resource_filename( "finn.qnn-data", "templates/driver/validate.py" ) shutil.copy(validate_template, validate_py) # copy all the dependencies into the driver folder # driver imports utils/data_packing and core/datatype # both of which are in finn-base # e.g. /workspace/finn-base/src/finn/util/data_packing.py dpk_root = dpk.__file__ # e.g. /workspace/finn-base/src/finn/util dpk_root = dpk_root.replace("data_packing.py", "") # e.g. /workspace/finn-base/src/finn/core/datatype.py dtp_root = dtp.__file__ # e.g. /workspace/finn-base/src/finn/core dtp_root = dtp_root.replace("datatype.py", "") shutil.copytree(dpk_root, pynq_driver_dir + "/finn/util") shutil.copytree(dtp_root, pynq_driver_dir + "/finn/core") # generate weight files for runtime-writable layers for sdp_ind, sdp_node in enumerate(model.graph.node): assert sdp_node.op_type == "StreamingDataflowPartition" # get dataflow model sdp_node = getCustomOp(sdp_node) dataflow_model_filename = sdp_node.get_nodeattr("model") dataflow_model = ModelWrapper(dataflow_model_filename) rt_layer_ind = 0 for node in dataflow_model.graph.node: if node.op_type in ["StreamingFCLayer_Batch", "Thresholding_Batch"]: node_inst = getCustomOp(node) is_rt_weights = node_inst.get_nodeattr("runtime_writeable_weights") if is_rt_weights == 1: fcl_w = dataflow_model.get_initializer(node.input[1]) w_filename = weights_dir + "/%d_%d_%s.dat" % ( sdp_ind, rt_layer_ind, node.name, ) node_inst.make_weight_file( fcl_w, "decoupled_runtime", w_filename ) rt_layer_ind += 1 elif node.op_type == "StreamingDataflowPartition": warnings.warn( """Nested StreamingDataflowPartition are not supported """ ) else: continue return (model, False)
def apply(self, model): # create a temporary folder for the generated driver pynq_driver_dir = make_build_dir(prefix="pynq_driver_") model.set_metadata_prop("pynq_driver_dir", pynq_driver_dir) # create the base FINN driver -- same for all accels driver_base_template = pk.resource_filename( "finn.qnn-data", "templates/driver/driver_base.py") driver_base_py = pynq_driver_dir + "/driver_base.py" shutil.copy(driver_base_template, driver_base_py) # extract input-output shapes from the graph # TODO convert this to an analysis pass? i_tensor_name = model.graph.input[0].name o_tensor_name = model.graph.output[0].name i_tensor_shape_normal = tuple(model.get_tensor_shape(i_tensor_name)) o_tensor_shape_normal = tuple(model.get_tensor_shape(o_tensor_name)) i_tensor_dt = model.get_tensor_datatype(i_tensor_name) o_tensor_dt = model.get_tensor_datatype(o_tensor_name) first_node = model.find_consumer(i_tensor_name) last_node = model.find_producer(o_tensor_name) if first_node.op_type == "StreamingDataflowPartition": # IODMAs and dataflow partitions have already been created # extract folded i/o shapes from IODMA consumer/producer first_df_model = ModelWrapper( getCustomOp(first_node).get_nodeattr("model")) assert (first_df_model.graph.node[0].op_type == "IODMA" ), "First partition must hold input IODMA" successors = model.find_direct_successors(first_node) successor_sdp = getCustomOp(successors[0]) successor_df_model = ModelWrapper( successor_sdp.get_nodeattr("model")) first_node = successor_df_model.find_consumer( successor_df_model.graph.input[0].name) last_df_model = ModelWrapper( getCustomOp(last_node).get_nodeattr("model")) assert (last_df_model.graph.node[0].op_type == "IODMA" ), "Last partition must hold output IODMA" predecessors = model.find_direct_predecessors(last_node) predecessor_sdp = getCustomOp(predecessors[0]) predecessor_df_model = ModelWrapper( predecessor_sdp.get_nodeattr("model")) last_node = predecessor_df_model.find_producer( predecessor_df_model.graph.output[0].name) # else: transformation called before IODMA/SDP creation (legacy flow) # can access folded i/o shapes directly i_tensor_shape_folded = tuple( getCustomOp(first_node).get_folded_input_shape()) o_tensor_shape_folded = tuple( getCustomOp(last_node).get_folded_output_shape()) # generate dummy folded i/o tensors and their packed versions i_tensor_dummy_folded = gen_finn_dt_tensor(i_tensor_dt, i_tensor_shape_folded) o_tensor_dummy_folded = gen_finn_dt_tensor(o_tensor_dt, o_tensor_shape_folded) i_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray( i_tensor_dummy_folded, i_tensor_dt) o_tensor_dummy_packed = dpk.finnpy_to_packed_bytearray( o_tensor_dummy_folded, o_tensor_dt) i_tensor_shape_packed = i_tensor_dummy_packed.shape o_tensor_shape_packed = o_tensor_dummy_packed.shape # generate external weights npy files weights_dir = pynq_driver_dir + "/runtime_weights" os.makedirs(weights_dir) idma_idx = 0 ext_weight_dma_cnt = 0 for node in model.graph.node: assert ( node.op_type == "StreamingDataflowPartition" ), "CreateDataflowPartition needs to be applied before driver generation" producer = model.find_producer(node.input[0]) init_tensor = model.get_initializer(node.input[0]) if producer is None: # input dma? idma_name = "idma" + str(idma_idx) if init_tensor is not None: # input weights dma? ext_weight_dma_cnt += 1 w_dtype = model.get_tensor_datatype(node.input[0]) init_external_tensor = to_external_tensor( init_tensor, w_dtype) np.save(weights_dir + "/" + idma_name + ".npy", init_external_tensor) else: net_input_name = idma_name idma_idx += 1 # fill in the driver template driver_py = pynq_driver_dir + "/driver.py" driver = template_driver.pynq_driver_template def mss(x, batch_var_name="1"): # "make shape string" # for a shape like (1, ...) emit a string (N, ...) # where N is the default value for batch_var_name # this lets the driver work with a batch of samples at once ret = str(x) ret = ret.replace("(1,", "(%s," % batch_var_name) ret = ret.replace("[1,", "[%s," % batch_var_name) return ret driver = driver.replace("$PLATFORM$", self.platform) driver = driver.replace("$INPUT_FINN_DATATYPE$", str(i_tensor_dt)) driver = driver.replace("$INPUT_SHAPE_NORMAL$", mss(i_tensor_shape_normal)) driver = driver.replace("$INPUT_SHAPE_FOLDED$", mss(i_tensor_shape_folded)) driver = driver.replace("$INPUT_SHAPE_PACKED$", mss(i_tensor_shape_packed)) driver = driver.replace("$OUTPUT_FINN_DATATYPE$", str(o_tensor_dt)) driver = driver.replace("$OUTPUT_SHAPE_NORMAL$", mss(o_tensor_shape_normal)) driver = driver.replace("$OUTPUT_SHAPE_FOLDED$", mss(o_tensor_shape_folded)) driver = driver.replace("$OUTPUT_SHAPE_PACKED$", mss(o_tensor_shape_packed)) driver = driver.replace("$INPUT_DMA_NAME$", "'%s'" % net_input_name) driver = driver.replace("$EXT_WEIGHT_NUM$", str(ext_weight_dma_cnt)) with open(driver_py, "w") as f: f.write(driver) # add validate.py to run full top-1 test (only for suitable networks) validate_py = pynq_driver_dir + "/validate.py" validate_template = pk.resource_filename( "finn.qnn-data", "templates/driver/validate.py") shutil.copy(validate_template, validate_py) # copy all the dependencies into the driver folder # driver imports utils/data_packing and core/datatype # both of which are in finn-base # e.g. /workspace/finn-base/src/finn/util/data_packing.py dpk_root = dpk.__file__ # e.g. /workspace/finn-base/src/finn/util dpk_root = dpk_root.replace("data_packing.py", "") # e.g. /workspace/finn-base/src/finn/core/datatype.py dtp_root = dtp.__file__ # e.g. /workspace/finn-base/src/finn/core dtp_root = dtp_root.replace("datatype.py", "") shutil.copytree(dpk_root, pynq_driver_dir + "/finn/util") shutil.copytree(dtp_root, pynq_driver_dir + "/finn/core") # generate weight files for runtime-writable layers for sdp_ind, sdp_node in enumerate(model.graph.node): assert sdp_node.op_type == "StreamingDataflowPartition" # get dataflow model sdp_node = getCustomOp(sdp_node) dataflow_model_filename = sdp_node.get_nodeattr("model") dataflow_model = ModelWrapper(dataflow_model_filename) rt_layer_ind = 0 for node in dataflow_model.graph.node: if node.op_type in [ "StreamingFCLayer_Batch", "Thresholding_Batch" ]: node_inst = getCustomOp(node) is_rt_weights = node_inst.get_nodeattr( "runtime_writeable_weights") if is_rt_weights == 1: fcl_w = dataflow_model.get_initializer(node.input[1]) w_filename = weights_dir + "/%d_%d_%s.dat" % ( sdp_ind, rt_layer_ind, node.name, ) node_inst.make_weight_file(fcl_w, "decoupled_runtime", w_filename) rt_layer_ind += 1 elif node.op_type == "StreamingDataflowPartition": warnings.warn( """Nested StreamingDataflowPartition are not supported """) else: continue return (model, False)