def test_streamline_fc(size, wbits, abits): if size == "LFC" and wbits == 2 and abits == 2: pytest.skip("No LFC-w2a2 present at the moment") if wbits > abits: pytest.skip("No wbits > abits cases at the moment") nname = "%s_%dW%dA" % (size, wbits, abits) finn_onnx = export_onnx_path + "/%s.onnx" % nname fc = get_test_model_trained(size, wbits, abits) bo.export_finn_onnx(fc, (1, 1, 28, 28), finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(RemoveStaticGraphInputs()) # load one of the test vectors raw_i = get_data("finn", "data/onnx/mnist-conv/test_data_set_0/input_0.pb") input_tensor = onnx.load_tensor_from_string(raw_i) # run using FINN-based execution input_dict = {"global_in": nph.to_array(input_tensor)} expected_ctx = oxe.execute_onnx(model, input_dict, True) expected = expected_ctx[model.graph.output[0].name] model = model.transform(Streamline()) model = model.transform(RemoveUnusedTensors()) assert len(model.graph.initializer) == 11 assert len(model.graph.value_info) == 21 assert len(model.graph.quantization_annotation) == 20 produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all()
def step_streamline(model: ModelWrapper, cfg: DataflowBuildConfig): """Run streamlining on given model. Streamlining involves moving floating point scale/shift parameters around, collapsing adjacent ones into a single parameter, then absorbing the scale/shift into the following `MultiThreshold` node. Streamlining requires careful topology design and cannot be applied to all topologies. """ model = model.transform(absorb.AbsorbSignBiasIntoMultiThreshold()) model = model.transform(Streamline()) need_lowering = len(model.get_nodes_by_op_type("Conv")) > 0 if need_lowering: model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) # absorb final add-mul nodes into TopK model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferDataLayouts()) model = model.transform(RemoveUnusedTensors()) if VerificationStepType.STREAMLINED_PYTHON in cfg._resolve_verification_steps( ): verify_step(model, cfg, "streamlined_python", need_parent=False) return model
def test_streamline_cnv(size, wbits, abits): if wbits > abits: pytest.skip("No wbits > abits cases at the moment") nname = "%s_%dW%dA" % (size, wbits, abits) finn_onnx = export_onnx_path + "/%s.onnx" % nname fc = get_test_model_trained(size, wbits, abits) bo.export_finn_onnx(fc, (1, 3, 32, 32), finn_onnx) model = ModelWrapper(finn_onnx) model = model.transform(InferShapes()) model = model.transform(FoldConstants()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(RemoveStaticGraphInputs()) # load one of the test vectors fn = pk.resource_filename("finn", "data/cifar10/cifar10-test-data-class3.npz") input_tensor = np.load(fn)["arr_0"].astype(np.float32) input_tensor = input_tensor / 255 assert input_tensor.shape == (1, 3, 32, 32) # run using FINN-based execution input_dict = {"global_in": input_tensor} expected_ctx = oxe.execute_onnx(model, input_dict, True) expected = expected_ctx[model.graph.output[0].name] # model.save("orig_cnv.onnx") model = model.transform(Streamline()) model = model.transform(RemoveUnusedTensors()) assert len(model.graph.initializer) == 21 assert len(model.graph.value_info) == 43 # model.save("streamlined_cnv.onnx") assert len(model.graph.node) == 23 produced_ctx = oxe.execute_onnx(model, input_dict, True) produced = produced_ctx[model.graph.output[0].name] assert np.isclose(expected, produced, atol=1e-3).all() assert model.graph.node[0].op_type == "MultiThreshold" assert np.argmax(produced) == 3
def test_end2end_mobilenet_create_dataflow_partition(): model = load_test_checkpoint_or_skip(build_dir + "/end2end_mobilenet_folded.onnx") parent_model = model.transform(CreateDataflowPartition()) parent_model.save(build_dir + "/end2end_mobilenet_dataflow_parent.onnx") sdp_node = parent_model.get_nodes_by_op_type("StreamingDataflowPartition")[0] sdp_node = getCustomOp(sdp_node) dataflow_model_filename = sdp_node.get_nodeattr("model") dataflow_model = load_test_checkpoint_or_skip(dataflow_model_filename) dataflow_model = dataflow_model.transform(RemoveUnusedTensors()) dataflow_model.save(build_dir + "/end2end_mobilenet_dataflow_model.onnx")
def cleanup(self): "Run cleanup transformations on the model." transformed_model = self cleanup_transforms = [ RemoveUnusedTensors(), RemoveStaticGraphInputs(), SortGraph(), ] for trn in cleanup_transforms: transformed_model = transformed_model.transform( trn, cleanup=False, make_deepcopy=False) return transformed_model
def apply(self, model): _check_vitis_envvars() # first infer layouts model = model.transform(InferDataLayouts()) # prepare at global level, then break up into kernels prep_transforms = [InsertIODMA(512), InsertDWC()] for trn in prep_transforms: model = model.transform(trn) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) model = model.transform(Floorplan(floorplan=self.floorplan_file)) model = model.transform(CreateDataflowPartition()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) # Build each kernel individually sdp_nodes = model.get_nodes_by_op_type("StreamingDataflowPartition") for sdp_node in sdp_nodes: sdp_node = getCustomOp(sdp_node) dataflow_model_filename = sdp_node.get_nodeattr("model") kernel_model = ModelWrapper(dataflow_model_filename) kernel_model = kernel_model.transform(InsertFIFO()) kernel_model = kernel_model.transform(RemoveUnusedTensors()) kernel_model = kernel_model.transform(GiveUniqueNodeNames()) kernel_model.save(dataflow_model_filename) kernel_model = kernel_model.transform( PrepareIP(self.fpga_part, self.period_ns)) kernel_model = kernel_model.transform(HLSSynthIP()) kernel_model = kernel_model.transform( CreateStitchedIP(self.fpga_part, self.period_ns, sdp_node.onnx_node.name, True)) kernel_model = kernel_model.transform( CreateVitisXO(sdp_node.onnx_node.name)) kernel_model.set_metadata_prop("platform", "alveo") kernel_model.save(dataflow_model_filename) # Assemble design from kernels if self.enable_link: model = model.transform( VitisLink( self.platform, round(1000 / self.period_ns), strategy=self.strategy, enable_debug=self.enable_debug, )) # set platform attribute for correct remote execution model.set_metadata_prop("platform", "alveo") # create driver model = model.transform(MakePYNQDriver(platform="alveo")) return (model, False)
def inference_cost(model_filename, *, output_json=None, output_onnx=None, preprocess=True, discount_sparsity=True): """Print the inference cost estimate metric for given ONNX model. Supports the Quant op for weight/activation quantization. :param model_filename: Filename for ONNX model :param output_json: Optional JSON filename to save the inference cost dict :param output_onnx: Optional ONNX filename to save the final model after any preprocessing :param preprocess: If set, run preprocessing steps such as shape inference, datatype inference and constant folding. Strongly recommended. :param discount_sparsity: If set, will discount op cost of MAC ops with a constant zero weight, and the mem cost of constant zero weights. """ print("Inference cost for " + model_filename) model = ModelWrapper(model_filename) if preprocess: qnt_nodes = model.get_nodes_by_op_type("Quant") for qnt_node in qnt_nodes: qnt_node.domain = "finn.custom_op.general" model = model.transform(InferShapes()) model = model.transform(GiveUniqueParameterTensors()) model = model.transform(InferDataTypes()) model = model.transform(FoldConstants()) model = model.transform(RemoveUnusedTensors()) model = model.transform(RemoveStaticGraphInputs()) model = model.transform(InferDataTypes()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(GiveReadableTensorNames()) if output_onnx is not None: model.save(output_onnx) ret = model.analysis(lambda x: infca.inference_cost(x, discount_sparsity)) bops = compute_bops(ret) mem_w_bits = compute_mem_bits(ret, "mem_w") mem_o_bits = compute_mem_bits(ret, "mem_o") ret["total_bops"] = bops ret["total_mem_w_bits"] = mem_w_bits ret["total_mem_o_bits"] = mem_o_bits if "unsupported" in ret: ret["unsupported"] = str(ret["unsupported"]) print(json.dumps(ret, sort_keys=True, indent=2)) if output_json is not None: with open(output_json, "w") as f: json.dump(ret, f, sort_keys=True, indent=2)
def step_resnet50_streamline(model: ModelWrapper, cfg: DataflowBuildConfig): for iter_id in range(4): model = step_resnet50_streamline_linear(model, cfg) model = step_resnet50_streamline_nonlinear(model, cfg) # big loop tidy up model = model.transform(RemoveUnusedTensors()) model = model.transform(GiveReadableTensorNames()) model = model.transform(InferDataTypes()) model = model.transform(SortGraph()) model = model.transform(DoubleToSingleFloat()) return model
def streamline(model, binary=True): log("Streamline transformations launched") model = model.transform(MoveScalarLinearPastInvariants()) model = model.transform(Streamline()) # Absorb add and mul in thresholds model = model.transform(absorb.AbsorbAddIntoMultiThreshold()) model = model.transform(absorb.AbsorbMulIntoMultiThreshold()) # Absorb add-mul in top-k model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(RoundAndClipThresholds()) # Tidy-up model = model.transform(InferDataLayouts()) model = model.transform(RemoveUnusedTensors()) log("Streamline transformations completed") save(model, "3_streamlined") return model
def test_streamline(self, topology, wbits, abits): prev_chkpt_name = get_checkpoint_name(topology, wbits, abits, "pre_post") model = load_test_checkpoint_or_skip(prev_chkpt_name) # move past any reshapes to be able to streamline input scaling model = model.transform(MoveScalarLinearPastInvariants()) model = model.transform(Streamline()) if "fc" not in topology: model = model.transform(LowerConvsToMatMul()) model = model.transform(MakeMaxPoolNHWC()) model = model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) model = model.transform(ConvertBipolarMatMulToXnorPopcount()) model = model.transform(Streamline()) # absorb final add-mul nodes into TopK model = model.transform(absorb.AbsorbScalarMulAddIntoTopK()) model = model.transform(InferDataLayouts()) model = model.transform(RemoveUnusedTensors()) model.save(get_checkpoint_name(topology, wbits, abits, "streamline"))
def step_resnet50_convert_to_hls(model: ModelWrapper, cfg: DataflowBuildConfig): model.set_tensor_datatype(model.graph.input[0].name, DataType["UINT8"]) model = model.transform(InferDataLayouts()) try: from finn.transformation.fpgadataflow.infer_doublepacked_dsp import InferDoublePackedConv model = model.transform(InferDoublePackedConv([1])) except: print( " FINN Experimental not available. Using non-packed convolution ") model = model.transform(DoubleToSingleFloat()) model = model.transform(InferDataTypes()) model = model.transform(SortGraph()) to_hls_transformations = [ to_hls.InferAddStreamsLayer, LowerConvsToMatMul, to_hls.InferChannelwiseLinearLayer, to_hls.InferPool_Batch, AbsorbTransposeIntoMultiThreshold, RoundAndClipThresholds, to_hls.InferQuantizedStreamingFCLayer, to_hls.InferThresholdingLayer, AbsorbConsecutiveTransposes, to_hls.InferConvInpGen, to_hls.InferDuplicateStreamsLayer, to_hls.InferLabelSelectLayer ] for trn in to_hls_transformations: model = model.transform(trn()) model = model.transform(InferDataLayouts()) model = model.transform(GiveUniqueNodeNames()) model = model.transform(InferDataTypes()) model = model.transform(RemoveCNVtoFCFlatten()) model = model.transform(GiveReadableTensorNames()) model = model.transform(RemoveUnusedTensors()) model = model.transform(SortGraph()) return model
def apply(self, model): graph_modified = False invalid_nodes = _find_invalid_nodes(model) if len(invalid_nodes) > 0: warnings.warn("Transformation is not applied,\ found unsupported nodes in the graph: {}.".format( invalid_nodes)) return (model, graph_modified) # Infer the shapes of each tensor, remove unused tensors # and give each tensor a readable name model = model.transform(InferShapes()) model = model.transform(RemoveUnusedTensors()) # This list contains all nodes with initializers that need to be converted nodes_with_initializers = ["Mul", "Conv", "Add", "Div", "Reshape"] # Obtain a list of initializer names (used to filter out only value infos) initializers_names = [x.name for x in model.graph.initializer] all_tensors = {} # Extract the inputs all_tensors = { **all_tensors, **{ x.name: [ x.type.tensor_type.elem_type, model.get_tensor_shape(x.name) ] for x in model.graph.input }, } # Extract only the output tensors all_tensors = { **all_tensors, **{ x.name: [ x.type.tensor_type.elem_type, model.get_tensor_shape(x.name) ] for x in model.graph.value_info if x.name not in initializers_names }, } # Extract only initializers from nodes that are relevant for conversion all_tensors = { **all_tensors, **{ x.name: [x.data_type, x.dims] for x in model.graph.initializer if model.find_consumers(x.name)[0].op_type in nodes_with_initializers }, } # Extract the outputs all_tensors = { **all_tensors, **{ x.name: [ x.type.tensor_type.elem_type, model.get_tensor_shape(x.name) ] for x in model.graph.output }, } # The list below contains tensor names that are the output of nodes that # reduce the tensor's dimension. The shape of these tensors also needs # to be extended tensors_reduced_dimension = [] for n in model.graph.node: node_op_type = n.op_type input_shape = model.get_tensor_shape(n.input[0]) # Find tensors that are the output of nodes that reduce the dimension if node_op_type == "ArgMax": keep_dims = get_by_name(n.attribute, "keepdims", "name").i if len(input_shape) == 3 and keep_dims == 0: node_out = n.output for n_o in node_out: tensors_reduced_dimension.append(n_o) # Each node from the list of supported nodes is made compatible # with 4D tensors if node_op_type == "Transpose": perm = get_by_name(n.attribute, "perm", "name").ints if ( len(perm) == 3 ): # Meaning that the transpose operation was on a 3D tensor perm.append(3) # append 4th dimension elif node_op_type in ["ArgMax", "LogSoftMax", "TopK", "Flatten"]: axis = get_by_name(n.attribute, "axis", "name") if len(input_shape) == 3 and axis.i < 0: axis.i = 3 + axis.i # count dimensions from the front elif node_op_type == "Conv": dilations = get_by_name(n.attribute, "dilations", "name").ints kernel_shape = get_by_name(n.attribute, "kernel_shape", "name").ints pads = get_by_name(n.attribute, "pads", "name").ints strides = get_by_name(n.attribute, "strides", "name").ints if len(dilations) == 1: # we must add another dimension to it dilations.append( 1 ) # only equal dilation value along each spatial axis is supported if len(kernel_shape ) == 1: # we must add another dimension to it kernel_shape.append(1) if ( len(pads) == 2 ): # pads = [x1_begin, x1_end] --> [x1_begin, x2_begin, x1_end, x2_end] pads.insert(1, 0) pads.append(0) if len(strides) == 1: # strides = [stride_h, stride_w] strides.append(1) elif node_op_type == "MaxPool": kernel_shape = get_by_name(n.attribute, "kernel_shape", "name").ints pads = get_by_name(n.attribute, "pads", "name").ints strides = get_by_name(n.attribute, "strides", "name").ints if len(kernel_shape ) == 1: # we must add another dimension to it kernel_shape.append(1) if ( len(pads) == 2 ): # pads = [x1_begin, x1_end] --> [x1_begin, x2_begin, x1_end, x2_end] pads.insert(1, 0) pads.append(0) if len(strides) == 1: # strides = [stride_h, stride_w] strides.append(1) # Change format of each input/value_info/output tensor for k, v in all_tensors.items(): tensor_type = v[0] shape = v[1] # Add extra dimension for tensors that either: # 1) Have 3 dimensions ( (N,C,H) -> (N,C,H,1) ) # 2) Come after operations that reduce their dimension: e.g. {Argmax, ...} if len(shape) == 3 or k in tensors_reduced_dimension: shape.append(1) model.set_tensor_shape(k, shape, tensor_type) return (model, graph_modified)
def test_convert_to_hls_conv_fc_transition(conv_config, depthwise, use_reshape): np.random.seed(0) idt = DataType["UINT4"] odt = DataType["UINT4"] conv_weight_dt = DataType["INT4"] fc_weight_dt = DataType["INT4"] input_shape, kernel_shape, stride, pad = conv_config kernel_size_h, kernel_size_w = kernel_shape input_size_h, input_size_w = input_shape stride_h, stride_w = stride pad_h, pad_w = pad in_chn = 4 fc_filters = 16 if depthwise is True: group = out_chn = in_chn conv_param_shape = [out_chn, 1, kernel_size_h, kernel_size_w] else: group = 1 out_chn = 8 conv_param_shape = [out_chn, in_chn, kernel_size_h, kernel_size_w] output_size_h = compute_conv_output_dim(input_size_h, kernel_size_h, stride_h, 2 * pad_h) output_size_w = compute_conv_output_dim(input_size_w, kernel_size_w, stride_w, 2 * pad_w) input_shape = [1, in_chn, input_size_h, input_size_w] fc_param_shape = [out_chn * output_size_h * output_size_w, fc_filters] output_shape = [1, fc_filters] conv_config = {} conv_config["dilations"] = [1, 1] conv_config["group"] = group conv_config["kernel_shape"] = [kernel_size_h, kernel_size_w] conv_config["pads"] = [pad_h, pad_w, pad_h, pad_w] conv_config["strides"] = [stride_h, stride_w] global_in = helper.make_tensor_value_info("global_in", TensorProto.FLOAT, input_shape) global_out = helper.make_tensor_value_info("global_out", TensorProto.FLOAT, output_shape) value_info = [ helper.make_tensor_value_info("conv_param", TensorProto.FLOAT, conv_param_shape), helper.make_tensor_value_info("thres1_param", TensorProto.FLOAT, (out_chn, 15)), helper.make_tensor_value_info("matmul_param", TensorProto.FLOAT, fc_param_shape), helper.make_tensor_value_info("thres2_param", TensorProto.FLOAT, (fc_filters, 15)), helper.make_tensor_value_info("reshape_shape", TensorProto.INT64, []), ] if use_reshape: flatten_node = helper.make_node("Reshape", ["thres1_out", "reshape_shape"], ["flatten_out"]) else: flatten_node = helper.make_node("Flatten", ["thres1_out"], ["flatten_out"], axis=1) modelproto = helper.make_model( helper.make_graph( name="test", inputs=[global_in], outputs=[global_out], value_info=value_info, nodes=[ helper.make_node("Conv", ["global_in", "conv_param"], ["conv_out"], **conv_config), helper.make_node( "MultiThreshold", ["conv_out", "thres1_param"], ["thres1_out"], domain="finn.custom_op.general", out_dtype="UINT4", ), flatten_node, helper.make_node("MatMul", ["flatten_out", "matmul_param"], ["matmul_out"]), helper.make_node( "MultiThreshold", ["matmul_out", "thres2_param"], ["global_out"], domain="finn.custom_op.general", out_dtype="UINT4", ), ], )) model = ModelWrapper(modelproto) model.set_tensor_datatype("global_in", idt) model.set_tensor_layout("global_in", DataLayout.NCHW) model.set_tensor_datatype("global_out", odt) model.set_tensor_datatype("conv_param", conv_weight_dt) model.set_tensor_datatype("matmul_param", fc_weight_dt) model.set_tensor_datatype("thres1_param", DataType["INT32"]) model.set_tensor_datatype("thres2_param", DataType["INT32"]) model.set_initializer("conv_param", gen_finn_dt_tensor(conv_weight_dt, conv_param_shape)) model.set_initializer("thres1_param", get_multithreshold_rand_params(out_chn, 15, seed=0)) model.set_initializer( "thres2_param", get_multithreshold_rand_params(fc_filters, 15, seed=0)) model.set_initializer("matmul_param", gen_finn_dt_tensor(fc_weight_dt, fc_param_shape)) model.set_initializer("reshape_shape", np.array([1, -1])) model = model.transform(InferShapes()) model = model.transform(InferDataTypes()) model = model.transform(InferDataLayouts()) # streamlining new_model = model.transform(MoveScalarLinearPastInvariants()) new_model = new_model.transform(Streamline()) new_model = new_model.transform(LowerConvsToMatMul()) new_model = new_model.transform(absorb.AbsorbTransposeIntoMultiThreshold()) new_model = new_model.transform(Streamline()) new_model = new_model.transform(InferDataLayouts()) new_model = new_model.transform(RemoveUnusedTensors()) # convert_to_hls if depthwise is True: new_model = new_model.transform(to_hls.InferVVAU()) new_model = new_model.transform(to_hls.InferQuantizedStreamingFCLayer()) new_model = new_model.transform(to_hls.InferThresholdingLayer()) new_model = new_model.transform(to_hls.InferConvInpGen()) new_model = new_model.transform(to_hls.InferStreamingMaxPool()) new_model = new_model.transform(RemoveCNVtoFCFlatten()) new_model = new_model.transform(absorb.AbsorbConsecutiveTransposes()) new_model = new_model.transform(GiveUniqueNodeNames()) new_model = new_model.transform(InferDataLayouts()) # prepare cppsim new_model = new_model.transform(PrepareCppSim()) new_model = new_model.transform(CompileCppSim()) new_model = new_model.transform(SetExecMode("cppsim")) # check for correct execution x = gen_finn_dt_tensor(idt, input_shape) inp_dict = {model.graph.input[0].name: x} assert oxe.compare_execution(model, new_model, inp_dict) num_transpose = len(new_model.get_nodes_by_op_type("Transpose")) num_flatten = len(new_model.get_nodes_by_op_type("Flatten")) num_reshape = len(new_model.get_nodes_by_op_type("Reshape")) # check if transpose->flatten was removed assert num_transpose == 1 and num_flatten == 0 and num_reshape == 0