def get_test_files(name):
    """Extract tar file and returns model path and input, output data"""
    tar_name = download(URLS.get(name), dirname=CURR_PATH.__str__())
    # extract tar file
    tar_path = os.path.join(CURR_PATH, tar_name)
    tar = tarfile.open(tar_path.__str__(), "r:*")
    tar.extractall(path=CURR_PATH.__str__())
    tar.close()
    data_dir = os.path.join(CURR_PATH, name)
    model_path = os.path.join(data_dir, 'model.onnx')

    inputs = []
    outputs = []
    # get test files
    for test_file in os.listdir(data_dir):
        case_dir = os.path.join(data_dir, test_file)
        # skip the non-dir files
        if not os.path.isdir(case_dir):
            continue
        input_file = os.path.join(case_dir, 'input_0.pb')
        input_tensor = TensorProto()
        with open(input_file, 'rb') as proto_file:
            input_tensor.ParseFromString(proto_file.read())
        inputs.append(numpy_helper.to_array(input_tensor))

        output_tensor = TensorProto()
        output_file = os.path.join(case_dir, 'output_0.pb')
        with open(output_file, 'rb') as proto_file:
            output_tensor.ParseFromString(proto_file.read())
        outputs.append(numpy_helper.to_array(output_tensor))

    return model_path, inputs, outputs
Exemplo n.º 2
0
    def test_make_tensor(self):  # type: () -> None
        np_array = np.random.randn(2, 3).astype(np.float32)

        tensor = helper.make_tensor(
            name='test',
            data_type=TensorProto.FLOAT,
            dims=(2, 3),
            vals=np_array.reshape(6).tolist()
        )
        self.assertEqual(tensor.name, 'test')
        np.testing.assert_equal(np_array, numpy_helper.to_array(tensor))

        # use raw_data field to store the data
        tensor = helper.make_tensor(
            name='test',
            data_type=TensorProto.FLOAT,
            dims=(2, 3),
            vals=np_array.reshape(6).tobytes(),
            raw=True,
        )
        np.testing.assert_equal(np_array, numpy_helper.to_array(tensor))

        string_list = list(s.encode('ascii') for s in ['Amy', 'Billy', 'Cindy', 'David'])
        tensor = helper.make_tensor(
            name='test',
            data_type=TensorProto.STRING,
            dims=(2, 2),
            vals=string_list,
            raw=False
        )
        self.assertEqual(string_list, list(tensor.string_data))
Exemplo n.º 3
0
 def _parse_array(self, tensor_proto):
     """Grab data in TensorProto and convert to numpy array."""
     try:
         from onnx.numpy_helper import to_array
     except ImportError:
         raise ImportError("Onnx and protobuf need to be installed. "
                           + "Instructions to install - https://github.com/onnx/onnx")
     if len(tuple(tensor_proto.dims)) > 0:
         np_array = to_array(tensor_proto).reshape(tuple(tensor_proto.dims))
     else:
         # If onnx's params are scalar values without dims mentioned.
         np_array = np.array([to_array(tensor_proto)])
     return nd.array(np_array)
Exemplo n.º 4
0
def run_generated_test(model_file, data_dir, device='CPU'):
    model = onnx.load(model_file)
    input_num = len(glob.glob(os.path.join(data_dir, "input_*.pb")))
    inputs = []
    for i in range(input_num):
        inputs.append(numpy_helper.to_array(load_tensor_as_numpy_array(
            os.path.join(data_dir, "input_{}.pb".format(i)))))
    output_num = len(glob.glob(os.path.join(data_dir, "output_*.pb")))
    outputs = []
    for i in range(output_num):
        outputs.append(numpy_helper.to_array(load_tensor_as_numpy_array(
            os.path.join(data_dir, "output_{}.pb".format(i)))))
    prepared = c2.prepare(model, device=device)
    c2_outputs = prepared.run(inputs)
    assert_similar(outputs, c2_outputs)
Exemplo n.º 5
0
    def prepare(
        cls,
        model,  # type: ModelProto
        device,  # type: singa device
        **kwargs  # type: Any
    ):  # type: (...) -> Optional[BackendRep]
        """
        Args:
            model: onnx model proto
            device: singa device
        Return:
            SingaBackendRep instance
        """
        super(SingaBackend, cls).prepare(model, device, **kwargs)
        name2tensor = {}
        for node in model.graph.node:
            if node.op_type == "Constant":
                data = helper.get_attribute_value(node.attribute[0])
                requires_grad, stores_grad = True, True
                if len(node.attribute) == 3:
                    requires_grad = helper.get_attribute_value(
                        node.attribute[1]
                    )
                    stores_grad = helper.get_attribute_value(node.attribute[2])
                t = tensor.Tensor(
                    device=device,
                    data=numpy_helper.to_array(data),
                    requires_grad=requires_grad,
                    stores_grad=stores_grad,
                )

                name2tensor[node.output[0]] = t

        return SingaBackendRep(model, device, name2tensor)
Exemplo n.º 6
0
 def _parse_array(self, tensor_proto):
     """Grab data in TensorProto and convert to numpy array."""
     try:
         from onnx.numpy_helper import to_array
     except ImportError as e:
         raise ImportError("Unable to import onnx which is required {}".format(e))
     np_array = to_array(tensor_proto).reshape(tuple(tensor_proto.dims))
     return tvm.nd.array(np_array)
Exemplo n.º 7
0
 def _parse_array(self, tensor_proto):
     """Grab data in TensorProto and convert to numpy array."""
     try:
         from onnx.numpy_helper import to_array
     except ImportError:
         raise ImportError("Onnx and protobuf need to be installed. "
                           + "Instructions to install - https://github.com/onnx/onnx")
     np_array = to_array(tensor_proto).reshape(tuple(tensor_proto.dims))
     return nd.array(np_array)
Exemplo n.º 8
0
    def test_fuse_bn_into_conv_simple(self):  # type: () -> None
        for (tensor_type, np_type) in [(TensorProto.FLOAT, np.float32), (TensorProto.DOUBLE, np.float64)]:
            conv = helper.make_node("Conv", ["X", "W", "B"], ["Y"])
            bn = helper.make_node("BatchNormalization", ["Y", "scale", "b", "mean", "var"], ["Z"])

            W = np.random.randn(3, 2, 5, 5).astype(np_type) + 2
            B = np.random.randn(3,).astype(np_type) + 2
            scale = np.random.randn(3,).astype(np_type) + 2
            b = np.random.randn(3,).astype(np_type) + 2
            mean = np.random.randn(3,).astype(np_type) + 2
            var = np.abs(np.random.randn(3,).astype(np_type)) + 2

            initializers = [
                helper.make_tensor(name, tensor_type, npa.shape, npa.tobytes(), raw=True)
                for name, npa in [('W', W), ('B', B), ('scale', scale), ('b', b), ('mean', mean), ('var', var)]
            ]
            graph = helper.make_graph(
                [conv, bn],
                "test",
                [helper.make_tensor_value_info("X", tensor_type, (5, 2, 28, 28)),
                 helper.make_tensor_value_info("W", tensor_type, (3, 2, 5, 5)),
                 helper.make_tensor_value_info("B", tensor_type, (3,)),
                 helper.make_tensor_value_info("scale", tensor_type, (3,)),
                 helper.make_tensor_value_info("b", tensor_type, (3,)),
                 helper.make_tensor_value_info("mean", tensor_type, (3,)),
                 helper.make_tensor_value_info("var", tensor_type, (3,))],
                [helper.make_tensor_value_info("Z", tensor_type, (3,))],
                initializer=initializers,
                value_info=[
                    helper.make_tensor_value_info("Y", tensor_type, (3,))
                ]
            )
            optimized_model = self._optimized(graph, ["fuse_bn_into_conv"])

            self.assertEqual(len(optimized_model.graph.node), 1)
            self.assertEqual(optimized_model.graph.node[0].op_type, 'Conv')
            self.assertEqual(len(optimized_model.graph.initializer), 2)
            new_W = numpy_helper.to_array(optimized_model.graph.initializer[0])
            new_b = numpy_helper.to_array(optimized_model.graph.initializer[1])

            f = scale / np.sqrt(var + 1e-5)
            np.testing.assert_almost_equal((B - mean) * f + b, new_b)
            np.testing.assert_almost_equal(W * f[:, np.newaxis, np.newaxis, np.newaxis], new_W)
Exemplo n.º 9
0
 def _test_numpy_helper_int_type(self, dtype):  # type: (np.number) -> None
     a = np.random.randint(
         np.iinfo(dtype).min,
         np.iinfo(dtype).max,
         dtype=dtype,
         size=(13, 37))
     tensor_def = numpy_helper.from_array(a, "test")
     self.assertEqual(tensor_def.name, "test")
     a_recover = numpy_helper.to_array(tensor_def)
     np.testing.assert_equal(a, a_recover)
def get_model_input(model_dir):
    import onnx
    from onnx import numpy_helper

    model_inputs = []
    for test_data_npz in glob.glob(
            os.path.join(model_dir, 'test_data_*.npz')):
        test_data = np.load(test_data_npz, encoding='bytes')
        model_inputs = list(test_data['inputs'])

    for test_data_dir in glob.glob(
            os.path.join(model_dir, "test_data_set*")):
        inputs_num = len(glob.glob(os.path.join(test_data_dir, 'input_*.pb')))
        for i in range(inputs_num):
            input_file = os.path.join(test_data_dir, 'input_{}.pb'.format(i))
            tensor = onnx.TensorProto()
            with open(input_file, 'rb') as f:
                tensor.ParseFromString(f.read())
            model_inputs.append(numpy_helper.to_array(tensor))

    return model_inputs
Exemplo n.º 11
0
    def getInputNodes(self, nodeName):
        """Get names of nodes that are inputs to the given node

        Args:
            nodeName (str): Name of node
            saveConstant (bool): If true, save constant variables to self.constantMap

        Returns:
            (list of str): Names of nodes that are inputs to the given node

        :meta private:
        """
        node = self.getNode(nodeName)
        inNodes = []
        for inp in node.input:
            if len([nde for nde in self.graph.node if inp in nde.output]):
                inNodes += [inp]
            elif len(
                [nde for nde in self.graph.initializer if nde.name == inp]):
                self.constantMap[inp] = [
                    numpy_helper.to_array(init)
                    for init in self.graph.initializer if init.name == inp
                ][0]
        return inNodes
Exemplo n.º 12
0
    def fuse_add_bias_skip_layer_norm(self):
        input_name_to_nodes = self.input_name_to_nodes()
        output_name_to_node = self.output_name_to_node()
        nodes_to_remove = []
        nodes_to_add = []

        skip_layer_norm_nodes = self.get_nodes_by_op_type(
            "SkipLayerNormalization")
        for node in skip_layer_norm_nodes:
            if len(node.input) != 4:
                continue

            return_indice = []
            nodes = self.match_parent_path(node, ['Add', 'MatMul'],
                                           [None, None], None, return_indice)
            if nodes is None:
                continue
            assert len(return_indice) == 2
            add_input_index = return_indice[0]
            if add_input_index >= 2:
                continue

            (add, matmul) = nodes

            # bias should be one dimension
            bias_index = -1
            for i, input in enumerate(add.input):
                initializer = self.get_initializer(input)
                if initializer is None:
                    continue
                bias_index = i
                bias_weight = numpy_helper.to_array(initializer)
                break
            if bias_weight is None:
                logger.debug(f"Bias weight not found")
                continue
            if len(bias_weight.shape) != 1:
                logger.debug(f"Bias weight is not 1D")
                continue

            subgraph_nodes = [node, add]
            if not self.is_safe_to_fuse_nodes(subgraph_nodes, [node.output[0]],
                                              input_name_to_nodes,
                                              output_name_to_node):
                logger.debug(
                    f"Skip fusing SkipLayerNormalization with Bias since it is not safe"
                )
                continue

            nodes_to_remove.extend(subgraph_nodes)
            new_node = onnx.helper.make_node(
                "SkipLayerNormalization",
                inputs=[
                    node.input[1 - add_input_index], matmul.output[0],
                    node.input[2], node.input[3], add.input[bias_index]
                ],
                outputs=node.output,
                name=self.create_node_name("SkipLayerNormalization",
                                           "SkipLayerNorm_AddBias_"))
            new_node.domain = "com.microsoft"
            nodes_to_add.append(new_node)

        if len(nodes_to_add) > 0:
            logger.info(
                f"Fused SkipLayerNormalization with Bias count:{len(nodes_to_add)}"
            )

        self.remove_nodes(nodes_to_remove)
        self.add_nodes(nodes_to_add)
Exemplo n.º 13
0
    def fuse_reshape(self):
        nodes = self.nodes()
        input_name_to_nodes = self.input_name_to_nodes()
        output_name_to_node = self.output_name_to_node()

        nodes_to_remove = []
        nodes_to_add = []

        for reshape_node in self.get_nodes_by_op_type('Reshape'):
            if reshape_node.input[1] not in output_name_to_node:
                continue
            concat_node = output_name_to_node[reshape_node.input[1]]
            if concat_node.op_type != 'Concat' or len(
                    concat_node.input) < 3 or len(concat_node.input) > 4:
                continue

            path0 = self.match_parent_path(concat_node,
                                           ['Unsqueeze', 'Gather', 'Shape'],
                                           [0, 0, 0], output_name_to_node)
            if path0 is None:
                continue
            (unsqueeze_0, gather_0, shape_0) = path0

            path1 = self.match_parent_path(concat_node,
                                           ['Unsqueeze', 'Gather', 'Shape'],
                                           [1, 0, 0], output_name_to_node)
            if path1 is None:
                continue
            (unsqueeze_1, gather_1, shape_1) = path1

            shape = []
            gather_value = self.get_constant_value(gather_0.input[1])
            if gather_value == 0:
                shape.append(0)

            gather_value = self.get_constant_value(gather_1.input[1])
            if gather_value == 1:
                shape.append(0)

            if len(shape) != 2:
                continue

            path2 = []
            path3 = []
            shape_nodes = [shape_0, shape_1]
            if len(concat_node.input) == 3 and self.get_initializer(
                    concat_node.input[2]) is None:
                path2 = self.match_parent_path(
                    concat_node, ['Unsqueeze', 'Mul', 'Gather', 'Shape'],
                    [2, 0, 0, 0], output_name_to_node)
                if path2 is None:
                    path2 = self.match_parent_path(
                        concat_node,
                        ['Unsqueeze', 'Mul', 'Squeeze', 'Slice', 'Shape'],
                        [2, 0, 0, 0, 0], output_name_to_node
                    )  # GPT2 exported by PyTorch 1.4 with opset_version=11
                    if path2 is None:
                        continue

                path3 = self.match_parent_path(
                    concat_node, ['Unsqueeze', 'Mul', 'Gather', 'Shape'],
                    [2, 0, 1, 0], output_name_to_node)
                if path3 is None:
                    path3 = self.match_parent_path(
                        concat_node,
                        ['Unsqueeze', 'Mul', 'Squeeze', 'Slice', 'Shape'],
                        [2, 0, 1, 0, 0], output_name_to_node
                    )  # GPT2 exported by PyTorch 1.4 with opset_version=11
                    if path3 is None:
                        continue

                shape_nodes.extend([path2[-1], path3[-1]])
                shape.append(-1)
            elif (len(concat_node.input) > 2):
                concat_2 = self.get_initializer(concat_node.input[2])
                if concat_2 is None:
                    continue
                concat_value = numpy_helper.to_array(concat_2)
                if isinstance(concat_value, list):
                    shape.extend(concat_value)
                else:
                    shape.append(concat_value)

            if len(concat_node.input) == 4 and self.get_initializer(
                    concat_node.input[3]) is None:
                if -1 in shape:
                    continue

                path2 = self.match_parent_path(
                    concat_node, ['Unsqueeze', 'Div', 'Gather', 'Shape'],
                    [3, 0, 0, 0], output_name_to_node)
                if path2 is None:
                    path2 = self.match_parent_path(
                        concat_node,
                        ['Unsqueeze', 'Div', 'Squeeze', 'Slice', 'Shape'],
                        [3, 0, 0, 0, 0], output_name_to_node
                    )  # GPT2 exported by PyTorch 1.4 with opset_version=11
                    if path2 is None:
                        continue
                shape_nodes.extend([path2[-1]])
                shape.append(-1)
            elif (len(concat_node.input) > 3):
                concat_3 = self.get_initializer(concat_node.input[3])
                if concat_3 is None:
                    continue

                concat_value = numpy_helper.to_array(concat_3)
                if isinstance(concat_value, list):
                    shape.extend(concat_value)
                else:
                    shape.append(concat_value)

            root_input = reshape_node.input[0]
            same_shape_input = True
            for shape_node in shape_nodes:
                if shape_node.input[0] != root_input:
                    same_shape_input = False

            if not same_shape_input:
                continue

            shape_value = np.asarray(shape, dtype=np.int64)

            constant_shape_name = self.create_node_name(
                'Constant', 'constant_shape')
            new_node = onnx.helper.make_node('Constant',
                                             inputs=[],
                                             outputs=[constant_shape_name],
                                             value=onnx.helper.make_tensor(
                                                 name='const_tensor',
                                                 data_type=TensorProto.INT64,
                                                 dims=shape_value.shape,
                                                 vals=shape_value))
            reshape_node.input[1] = constant_shape_name
            reshape_node.name = self.create_node_name('Reshape',
                                                      'Reshape_Fuse')
            nodes_to_remove.extend([concat_node])
            nodes_to_remove.extend(path0)
            nodes_to_remove.extend(path1)
            nodes_to_remove.extend(path2)
            nodes_to_remove.extend(path3)
            nodes_to_add.append(new_node)

        logger.info(f"Fused Reshape count:{len(nodes_to_add)}")

        self.remove_nodes(nodes_to_remove)
        self.add_nodes(nodes_to_add)
Exemplo n.º 14
0
def convert_tensor(tensor):
    a = numpy_helper.to_array(tensor)
    a = a / max(abs(np.max(a)), abs(np.min(a))) * 127
    a = a.astype(dest_np_type)
    tensor.CopyFrom(numpy_helper.from_array(a, name=tensor.name))
def test_attention_streamingmemory(tmpdir):
    np.random.seed(0XDEAD1337)
    batches_per_step = 5
    batch_size = 8
    hidden_size = 16
    sequence_length = 8
    attention_heads = 4
    qkv_length = hidden_size / attention_heads

    input_shape = [batch_size * sequence_length, hidden_size]
    mask_shape = [batch_size, 1, 1, sequence_length]

    qkv_data = np.random.normal(
        0, 0.02, [hidden_size, hidden_size * 3]).astype(np.float32)

    r = np.arange(0, sequence_length)
    r = np.reshape(batch_size * [r], mask_shape)
    masks = []
    for i in range(batches_per_step):
        masks.append(np.less(r, i).astype(np.float32))
    mask_data = (1 - np.stack(masks)) * -1000.0

    input_data = np.random.normal(0, 0.02, [batches_per_step] +
                                  input_shape).astype(np.float32)

    def run_test(index, options):
        per_replica_batch_size = batch_size / options["replication"]
        model_input_shape = input_shape[:]
        model_input_shape[0] = int(model_input_shape[0] /
                                   options["replication"])
        model_mask_shape = mask_shape[:]
        model_mask_shape[0] = int(model_mask_shape[0] / options["replication"])

        stride = 2 // options["stages"]
        if "stride" in options and options["stride"]:
            stride = options["stride"]

        builder = popart.Builder(opsets={
            "ai.onnx": 9,
            "ai.onnx.ml": 1,
            "ai.graphcore": 1
        })

        mask = builder.addInputTensor(
            popart.TensorInfo("FLOAT", model_mask_shape), "mask")
        x_in = builder.addInputTensor(
            popart.TensorInfo("FLOAT", model_input_shape), "x_in")

        anchors = {}
        x = x_in
        for i in range(options["numLayers"]):
            qkv = builder.addInitializedInputTensor(qkv_data, f"qkv_{i}")
            anchors[popart.reservedGradientPrefix() +
                    qkv] = popart.AnchorReturnType("All")

            vgid = (i % options["stages"]) if options["phasedExecution"] else i

            with builder.virtualGraph(vgid), builder.executionPhase(i *
                                                                    stride):
                x = builder.aiOnnx.matmul([x, qkv])
                x = attention_onnx(builder, x, mask, per_replica_batch_size,
                                   sequence_length, hidden_size,
                                   attention_heads, qkv_length)

        vgid = ((options["numLayers"] - 1) % options["stages"]
                ) if options["phasedExecution"] else options["numLayers"] - 1

        with builder.virtualGraph(vgid), builder.executionPhase(
            (options["numLayers"] - 1) * stride):
            l1 = builder.aiGraphcore.l1loss([x], 0.2, popart.ReductionType.Sum)

        proto = builder.getModelProto()

        gradient_keys = list(anchors.keys())
        anchors[x] = popart.AnchorReturnType("All")

        dataFlow = popart.DataFlow(batches_per_step, anchors)

        opts = popart.SessionOptions()
        opts.executionPhaseSettings.stages = options["stages"]

        opts.executionPhaseSettings.phases = (
            options["numLayers"] * stride if options["phasedExecution"] else 0)
        opts.enableOutlining = options["outlining"]

        if "phaseSchedule" in options:
            opts.executionPhaseSettings.schedule = options["phaseSchedule"]

        # Phased execution currently does its own recompute annotations
        opts.autoRecomputation = (popart.RecomputationType.Standard
                                  if options["explicitRecomputation"] else
                                  popart.RecomputationType.NoRecompute)

        opts.outlineThreshold = -np.inf
        opts.enableOutliningCopyCostPruning = False
        opts.virtualGraphMode = (popart.VirtualGraphMode.ExecutionPhases
                                 if options["phasedExecution"] else
                                 popart.VirtualGraphMode.Manual)
        opts.explicitRecomputation = options["explicitRecomputation"]
        opts.aliasZeroCopy = options["aliasZeroCopy"]

        opts.batchSerializationSettings.factor = options["batchSerialize"]
        if "batchSchedule" in options:
            opts.batchSerializationSettings.batchSchedule = options[
                "batchSchedule"]
        if "batchConcat" in options:
            # Do not concatenate the batch across phases and virtual graphs
            # (causes more, smalle transfers but allows for individual sub-batch
            # elements to be transferred)
            opts.batchSerializationSettings.concatOnVirtualGraphChange = options[
                "batchConcat"]
            opts.batchSerializationSettings.concatOnExecutionPhaseChange = options[
                "batchConcat"]
            # Wait with loading activations until they are required
            opts.executionPhaseSettings.activationIOSchedule = popart.ExecutionPhaseIOSchedule.OnDemand

        if "tensorLocationSettings" in options and options[
                "tensorLocationSettings"]:
            opts.activationTensorLocationSettings = options[
                "tensorLocationSettings"]
            opts.weightTensorLocationSettings = options[
                "tensorLocationSettings"]
            opts.optimizerStateTensorLocationSettings = options[
                "tensorLocationSettings"]
            opts.accumulatorTensorLocationSettings = options[
                "tensorLocationSettings"]
        if "weightTensorLocationSettings" in options and options[
                "weightTensorLocationSettings"]:
            opts.weightTensorLocationSettings = options[
                "weightTensorLocationSettings"]
        if options["replication"] > 1:
            opts.replicatedGraphCount = options["replication"]
            opts.enableReplicatedGraphs = True
        if "ioTiles" in options:
            opts.numIOTiles = options["ioTiles"]

        pat = popart.Patterns(popart.PatternsLevel.Default)
        if options["phasedExecution"]:
            numIpus = options["stages"]
        else:
            numIpus = options["numLayers"] + 1
        if options["replication"] > 1:
            numIpus = numIpus * options["replication"]
        device = tu.create_test_device(numIpus,
                                       pattern=popart.SyncPattern.Full)

        session = popart.TrainingSession(fnModel=proto,
                                         dataFlow=dataFlow,
                                         userOptions=opts,
                                         loss=l1,
                                         optimizer=popart.ConstSGD(0.1),
                                         patterns=pat,
                                         deviceInfo=device)

        session.prepareDevice()

        session.weightsFromHost()

        anchors = session.initAnchorArrays()
        for k, v in anchors.items():
            print(f"anchor_before {k}={v.shape}")

        inputs = {x_in: input_data, mask: mask_data}
        stepio = popart.PyStepIO(inputs, anchors)

        for __ in range(10):
            session.run(stepio)

        session.modelToHost(
            str(tmpdir / f"streamingmemory_attention_{index}.onnx"))

        if options["replication"] > 1:
            for k, v in anchors.items():
                if k in gradient_keys:
                    # The gradient anchors will have an additional replication axis.
                    anchors[k] = np.sum(v, 1 if batches_per_step > 1 else 0)
                else:
                    # Output tensor needs reshaping.
                    anchors[k] = np.reshape(anchors[k], [
                        batches_per_step, sequence_length * batch_size,
                        hidden_size
                    ])
            for k, v in anchors.items():
                print(f"anchor_after {k}={v.shape}")

        return anchors

    test_results = []

    # AliasZeroCopy only supported with explicit recomputation, but not with
    # standard recomputation
    # Phased execution only supported with explicit recomputaton, but not with
    # standard recomputation

    test_variants = []

    defaultOffChip = popart.TensorLocationSettings(
        location=popart.TensorLocation(
            storage=popart.TensorStorage.OffChip,
            loadTileSet=popart.TileSet.Compute,
            storageTileSet=popart.TileSet.Compute,
            replicatedTensorSharding=popart.ReplicatedTensorSharding.Off),
        minElementsForOffChip=0,
        minElementsForReplicatedTensorSharding=2)

    ioOffChip = popart.TensorLocationSettings(
        location=popart.TensorLocation(
            storage=popart.TensorStorage.OffChip,
            loadTileSet=popart.TileSet.IO,
            storageTileSet=popart.TileSet.IO,
            replicatedTensorSharding=popart.ReplicatedTensorSharding.Off),
        minElementsForOffChip=0,
        minElementsForReplicatedTensorSharding=2)

    # Ground truth variant
    test_variants.append({
        "stages": 2,
        "numLayers": 3,
        "phasedExecution": False,
        "outlining": False,
        "explicitRecomputation": False,
        "aliasZeroCopy": False,
        "batchSerialize": 1,
        "replication": 1,
    })

    test_variants.append({
        "stages": 2,
        "numLayers": 3,
        "phasedExecution": False,
        "outlining": False,
        "explicitRecomputation": False,
        "aliasZeroCopy": False,
        "batchSerialize": 4,
        "replication": 1,
    })

    test_variants.append({
        "stages": 2,
        "numLayers": 3,
        "phasedExecution": True,
        "outlining": False,
        "explicitRecomputation": False,
        "aliasZeroCopy": False,
        "batchSerialize": 1,
        "replication": 1,
        "tensorLocationSettings": defaultOffChip,
    })

    test_variants.append({
        "stages": 2,
        "numLayers": 3,
        "phasedExecution": True,
        "outlining": True,
        "explicitRecomputation": False,
        "aliasZeroCopy": False,
        "batchSerialize": 1,
        "replication": 1,
        "tensorLocationSettings": defaultOffChip,
    })

    test_variants.append({
        "stages": 2,
        "numLayers": 3,
        "phasedExecution": True,
        "outlining": True,
        "explicitRecomputation": True,
        "aliasZeroCopy": False,
        "batchSerialize": 1,
        "replication": 1,
        "tensorLocationSettings": defaultOffChip,
    })

    test_variants.append({
        "stages": 2,
        "numLayers": 3,
        "phasedExecution": True,
        "outlining": True,
        "explicitRecomputation": True,
        "aliasZeroCopy": True,
        "batchSerialize": 1,
        "replication": 1,
        "tensorLocationSettings": defaultOffChip,
    })

    # Test batch serialized single device per replica execution, where all
    # streaming memory traffic goes through IO tiles, and activations are
    # stored and loaded one-by-one
    test_variants.append({
        "stages": 1,
        "stride": 4,
        "numLayers": 3,
        "phasedExecution": True,
        "outlining": True,
        "explicitRecomputation": True,
        "aliasZeroCopy": True,
        "batchSerialize": 4,
        "batchConcat": False,
        "replication": 2,
        "tensorLocationSettings": ioOffChip,
        "ioTiles": 192
    })

    # Test batch serialized single device per replica execution, where all
    # streaming memory traffic goes through IO tiles, and loading of the next
    # phase happens before storing the current phase
    test_variants.append({
        "stages": 1,
        "stride": 1,
        "numLayers": 3,
        "phasedExecution": True,
        "phaseSchedule": popart.ExecutionPhaseSchedule.BatchClusteredIO,
        "outlining": False,
        "explicitRecomputation": True,
        "aliasZeroCopy": True,
        "batchSerialize": 4,
        "batchConcat": True,
        "replication": 2,
        "tensorLocationSettings": ioOffChip,
        "ioTiles": 192
    })

    # Test a variety of batch serialisation schedules.
    for batchSchedule in [
            popart.BatchSerializationBatchSchedule.Scheduler,
            popart.BatchSerializationBatchSchedule.Isomorphic,
            popart.BatchSerializationBatchSchedule.OverlapOnIo,
            popart.BatchSerializationBatchSchedule.OverlapOnCompute,
    ]:

        test_variants.append({
            "stages": 1,
            "stride": 4,
            "numLayers": 3,
            "phasedExecution": True,
            "outlining": False,
            "explicitRecomputation": True,
            "aliasZeroCopy": True,
            "batchSerialize": 4,
            "batchSchedule": batchSchedule,
            "batchConcat": False,
            "replication": 2,
            "tensorLocationSettings": ioOffChip,
            "ioTiles": 192
        })

    # Test replicated tensor sharding + on chip (no outlining).
    test_variants.append({
        "stages":
        2,
        "numLayers":
        3,
        "phasedExecution":
        True,
        "outlining":
        False,
        "explicitRecomputation":
        False,
        "aliasZeroCopy":
        False,
        "batchSerialize":
        1,
        "replication":
        2,
        "tensorLocationSettings":
        defaultOffChip,
        "weightTensorLocationSettings":
        popart.TensorLocationSettings(location=popart.TensorLocation(
            storage=popart.TensorStorage.OnChip,
            loadTileSet=popart.TileSet.Compute,
            storageTileSet=popart.TileSet.Compute,
            replicatedTensorSharding=popart.ReplicatedTensorSharding.On),
                                      minElementsForOffChip=0,
                                      minElementsForReplicatedTensorSharding=2)
    })

    # Test replicated tensor sharding + off chip (no outlining).
    test_variants.append({
        "stages":
        2,
        "numLayers":
        3,
        "phasedExecution":
        True,
        "outlining":
        False,
        "explicitRecomputation":
        False,
        "aliasZeroCopy":
        False,
        "batchSerialize":
        1,
        "replication":
        2,
        "tensorLocationSettings":
        defaultOffChip,
        "weightTensorLocationSettings":
        popart.TensorLocationSettings(location=popart.TensorLocation(
            storage=popart.TensorStorage.OffChip,
            loadTileSet=popart.TileSet.Compute,
            storageTileSet=popart.TileSet.Compute,
            replicatedTensorSharding=popart.ReplicatedTensorSharding.On),
                                      minElementsForOffChip=0,
                                      minElementsForReplicatedTensorSharding=2)
    })

    index = 0
    for test_option in test_variants:
        print(f"Running {index}: {test_option}")
        test_results.append(run_test(index, test_option))
        index += 1

    gt_onnx = onnx.load(str(tmpdir / f"streamingmemory_attention_0.onnx"))

    for i in range(1, index):
        print(f"Testing run {i}: {test_variants[i]}")
        for key in test_results[0].keys():
            assert np.all(
                np.isclose(test_results[0][key],
                           test_results[i][key],
                           equal_nan=False))

        val_onnx = onnx.load(
            str(tmpdir / f"streamingmemory_attention_{i}.onnx"))
        for j in range(len(gt_onnx.graph.initializer)):
            print(f"Checking initializer {j}")
            gt = gt_onnx.graph.initializer[j]
            gt = numpy_helper.to_array(gt)
            val = val_onnx.graph.initializer[j]
            val = numpy_helper.to_array(val)
            assert np.allclose(gt, val, equal_nan=False)
Exemplo n.º 16
0
 def test_bool(self):
     a = np.random.randint(2, size=(13, 37)).astype(np.bool)
     tensor_def = numpy_helper.from_array(a, "test")
     self.assertEqual(tensor_def.name, "test")
     a_recover = numpy_helper.to_array(tensor_def)
     np.testing.assert_equal(a, a_recover)
Exemplo n.º 17
0
#Load sample inputs and outputs

test_data_dir = 'test_data_set'
test_data_num = 3

import glob
import os

# Load inputs
inputs = []
for i in range(test_data_num):
    input_file = os.path.join(test_data_dir + '_{}'.format(i), 'input_0.pb')
    tensor = onnx.TensorProto()
    with open(input_file, 'rb') as f:
        tensor.ParseFromString(f.read())
        inputs.append(numpy_helper.to_array(tensor))

print('Loaded {} inputs successfully.'.format(test_data_num))

# Load reference outputs

ref_outputs = []
for i in range(test_data_num):
    output_file = os.path.join(test_data_dir + '_{}'.format(i), 'output_0.pb')
    tensor = onnx.TensorProto()
    with open(output_file, 'rb') as f:
        tensor.ParseFromString(f.read())
        ref_outputs.append(numpy_helper.to_array(tensor))

print('Loaded {} reference outputs successfully.'.format(test_data_num))
Exemplo n.º 18
0
def convert_operations(onnx_model, batch_dim=0):
    """
    Convert onnx model operations. Yields onnx's operator_id, opeartor_name and
    converted pytorch operator.

    Parameters
    ----------
    onnx_model: onnx.ModelProto
        Loaded onnx model.
    batch_dim: int
        Usually 0 for computer vision models and 1 for NLP models.

    Returns
    -------
    iterator: (op_id, op_name, op)
    """
    weights = {tensor.name: tensor for tensor in onnx_model.graph.initializer}

    for i, node in enumerate(onnx_model.graph.node):
        # extract only useful inputs
        params = [
            weights[par_name] for par_name in node.input if par_name in weights
        ]

        if node.op_type == "Conv":
            op = convert_layer(node, "Conv", params)
        elif node.op_type == "Relu":
            op = nn.ReLU(inplace=True)
        elif node.op_type == "LeakyRelu":
            op = nn.LeakyReLU(**extract_attributes(node), inplace=True)
        elif node.op_type == "Sigmoid":
            op = nn.Sigmoid()
        elif node.op_type == "MaxPool":
            op = convert_layer(node, "MaxPool")
        elif node.op_type == "AveragePool":
            op = convert_layer(node, "AvgPool")
        elif node.op_type == "Flatten":
            op = Flatten(**extract_attributes(node))
        elif node.op_type == "Gemm":
            op = convert_linear_layer(node, params)
            op.feature_dim = batch_dim + 1  # Necessary for transformers
        elif node.op_type == "BatchNormalization":
            op = convert_batch_norm_layer(node, params=params)
        elif node.op_type == "InstanceNormalization":
            op = convert_instance_norm_layer(node, params=params)
        elif node.op_type == "Concat":
            op = Concat(**extract_attributes(node))
        elif node.op_type == "Constant":
            # 常量OP如何解决的问题
            op = value_wrapper(
                torch.from_numpy(extract_attributes(node)["constant"]))
        elif node.op_type == "Reshape":
            shape = list(
                filter(lambda x: x.name == node.input[1],
                       onnx_model.graph.initializer))
            shape = numpy_helper.to_array(shape[0]) if shape else None
            op = Reshape(tuple(shape))
        elif node.op_type == "Shape":
            op = Shape()
        elif node.op_type == "Gather":
            op = Gather(**extract_attributes(node))
        elif node.op_type == "Squeeze":
            op = Squeeze(**extract_attributes(node))
        elif node.op_type == "Unsqueeze":
            op = partial(torch.unsqueeze, **extract_attributes(node))
        elif node.op_type == "ConstantOfShape":
            op = ConstantOfShape(**extract_attributes(node))
        elif node.op_type == "Slice":
            op = Slice(**extract_attributes(node))
        elif node.op_type == "Cast":
            op = Cast(**extract_attributes(node))
        elif node.op_type == "Where":
            op = Where()
        elif node.op_type == "Equal":
            op = torch.eq
        elif node.op_type == "Mul":
            op = Mul(**extract_attributes(node))
        elif node.op_type == "Div":
            op = torch.true_divide
        elif node.op_type == "MatMul":
            if params:
                weight = torch.from_numpy(numpy_helper.to_array(params[0]))
                op = nn.Linear(weight.shape[0], weight.shape[1], bias=False)
                op.weight.data = weight.t()

                # check if next node Add to add bias
                next_node = onnx_model.graph.node[i + 1]
                next_params = [
                    weights[par_name] for par_name in next_node.input
                    if par_name in weights
                ]
                if next_params and next_node.op_type == "Add":
                    bias = torch.from_numpy(
                        numpy_helper.to_array(next_params[0]))
                    op.bias = nn.Parameter(bias)
                    node.output.pop()
                    node.output.extend(next_node.output)
                    onnx_model.graph.node.pop(i + 1)  # remove next node
            else:
                op = Matmul()
        elif node.op_type == "Sub":
            op = torch.sub
        elif node.op_type == "Pow":
            op = torch.pow
        elif node.op_type == "Sqrt":
            op = torch.sqrt
        elif node.op_type == "Softmax":
            op = nn.Softmax(dim=1)
        elif node.op_type == "Transpose":
            op = partial(torch.Tensor.permute, **extract_attributes(node))
        elif node.op_type == "Split":
            kwargs = extract_attributes(node)
            # if the split_size_or_sections is not in node attributes,
            # the number_of_splits becomes the number of node outputs
            if "split_size_or_sections" not in kwargs:
                kwargs["number_of_splits"] = len(node.output)
            op = Split(**kwargs)
        elif node.op_type == "ReduceMean":
            kwargs = dict(keepdim=True)
            kwargs.update(extract_attributes(node))
            op = partial(torch.mean, **kwargs)
        elif node.op_type == "Add":
            op = Add()
        elif node.op_type == "GlobalAveragePool":
            op = GlobalAveragePool()
        elif node.op_type == "ConvTranspose":
            op = convert_layer(node, "ConvTranspose", params)
        elif node.op_type == "Identity":
            op = nn.Identity()
        elif node.op_type == "Resize":
            op = Resize(**extract_attributes(node))
        elif node.op_type == "Upsample":
            op = Upsample(**extract_attributes(node))
        elif node.op_type == "OneHot":
            op = OneHot(**extract_attributes(node))
        elif node.op_type == "Pad":
            op = Pad(**extract_attributes(node))
        elif node.op_type == "Clip":
            op = Clamp(**extract_attributes(node))
        elif node.op_type == "Tanh":
            op = torch.tanh
        elif node.op_type == "Erf":
            op = torch.erf
        elif node.op_type == "Log":
            op = torch.log
        elif node.op_type == "Exp":
            op = torch.exp
        elif node.op_type == "LRN":
            op = nn.LocalResponseNorm(**extract_attributes(node))
        elif node.op_type == "Dropout":
            op = nn.Dropout(p=1.0)
        else:
            op = getattr(torch, node.op_type.lower(), None)
            if op is None:
                raise NotImplementedError(
                    "Conversion not implemented for op_type={}.".format(
                        node.op_type))
            else:
                print("Automatic inference of operator: {}".format(
                    node.op_type.lower()))

        op_name = "{}_{}".format(node.op_type, node.output[0])
        op_id = node.output[0]
        yield op_id, op_name, op
Exemplo n.º 19
0
def from_onnx(filename,
              value_dtypes=None,
              default_placeholder_dtype=dtype_list.int32,
              default_variable_dtype=dtype_list.int32,
              default_constant_dtype=dtype_list.int32,
              default_operator_dtype=dtype_list.int32,
              default_scale_dtype=dtype_list.int32,
              default_bias_dtype=dtype_list.int32,
              onnx_input_layout='NCHW',
              onnx_filter_layout='OIHW',
              disable_fusion=False):
    """
    Convert ONNX model to NNgen model

    Parameters
    ----------
    filename : str
        File name of ONNX model

    value_dtypes : dict
        dtype_info dictionary by name

    default_placeholder_dtype : nngen.dtype_info
        Default dtype for placeholder

    default_variable_dtype : nngen.dtype_info
        Default dtype for variable

    default_constant_dtype : nngen.dtype_info
        Default dtype for constant

    default_operator_dtype : nngen.dtype_info
        Default dtype for operator

    default_scale_dtype : nngen.dtype_info
        Default dtype for scale

    default_bias_dtype : nngen.dtype_info
        Default dtype for bias

    onnx_input_layout : str
        Layout of ONNX input values

    onnx_filter_layout : str
        Layout of ONNX filter (weight) values

    disable_fusion : bool
        Disable operator fusion

    Returns
    -------
    outputs : collections.OrderedDict
        Dict of output values

    placeholders : collections.OrderedDict
        Dictionary of placeholders

    variables : collections.OrderedDict
        Dictionary of variables

    constants : collections.OrderedDict
        Dictionary of constants

    operators : collections.OrderedDict
        Dictionary of operators
    """

    try:
        import onnx
        from onnx import numpy_helper
    except:
        raise ImportError('onnx is required.')

    if value_dtypes is None:
        value_dtypes = {}

    # load model
    model = onnx.load(filename)

    # input/output node dict
    input_nodes = collections.OrderedDict()
    output_nodes = collections.OrderedDict()

    for input_var in model.graph.input:
        input_nodes[input_var.name] = input_var

    for output_var in model.graph.output:
        output_nodes[output_var.name] = output_var

    # variable ndarray dict
    variable_values = collections.OrderedDict()

    for weight in model.graph.initializer:
        name = weight.name
        np_weight = numpy_helper.to_array(weight)
        variable_values[name] = np_weight

    # constant ndarray dict
    constant_values = collections.OrderedDict()

    for node in model.graph.node:
        if node.op_type == 'Constant':
            name = util.get_name(node)
            value = numpy_helper.to_array(node.attribute[0].t)
            constant_values[name] = value

    # placeholders
    placeholders = _to_placeholders(input_nodes, output_nodes,
                                    variable_values, constant_values,
                                    value_dtypes,
                                    default_placeholder_dtype,
                                    default_variable_dtype,
                                    default_constant_dtype,
                                    default_operator_dtype)

    # variables
    variables = _to_variables(input_nodes, output_nodes,
                              variable_values, constant_values,
                              value_dtypes,
                              default_placeholder_dtype,
                              default_variable_dtype,
                              default_constant_dtype,
                              default_operator_dtype)

    # constants
    # constants = _to_constants(input_nodes, output_nodes,
    #                          variable_values, constant_values,
    #                          value_dtypes,
    #                          default_placeholder_dtype,
    #                          default_variable_dtype,
    #                          default_constant_dtype,
    #                          default_operator_dtype)
    constants = constant_values

    # producer/consumer table
    producers = collections.defaultdict(list)
    consumers = collections.defaultdict(list)

    for node in model.graph.node:
        node_name = util.get_name(node)
        for arg in node.input:
            if arg not in producers[node_name]:
                producers[node_name].append(arg)
            if node_name not in consumers[arg]:
                consumers[arg].append(node_name)

    # operators
    operators = collections.OrderedDict()
    visitor = _OperatorVisitor(model,
                               placeholders, variables, constants, operators,
                               producers, consumers,
                               value_dtypes,
                               default_placeholder_dtype, default_variable_dtype,
                               default_constant_dtype, default_operator_dtype,
                               default_scale_dtype, default_bias_dtype,
                               onnx_input_layout, onnx_filter_layout,
                               disable_fusion)

    placeholders = visitor.placeholders
    variables = visitor.variables
    constants = visitor.constants
    operators = visitor.operators

    for name, output_node in output_nodes.items():
        visitor.visit(name)

    # outputs
    outputs = collections.OrderedDict()

    for name, node in output_nodes.items():
        if name in operators:
            outputs[name] = operators[name]
        elif name in placeholders:
            outputs[name] = placeholders[name]
        elif name in variables:
            outputs[name] = variables[name]
        elif name in constants:
            outputs[name] = constants[name]

    return outputs, placeholders, variables, constants, operators
Exemplo n.º 20
0
def prepare_model(model):
    """
	The constructor has produced a graph_def with the help of the functions graph_util.convert_variables_to_constants and graph_util.remove_training_nodes.
	translate() takes that graph_def, imports it, and translates it into two lists which then can be processed by an Optimzer object.

	Return
	------
	(operation_types, operation_resources) : (list, list)
	    A tuple with two lists, the first one has items of type str and the second one of type dict. In the first list the operation types are stored (like "Add", "MatMul", etc.).
	    In the second list we store the resources (matrices, biases, etc.) for those operations. It is organised as follows: operation_resources[i][domain] has the resources related to
	    operation_types[i] when analyzed with domain (domain is currently either 'deepzono' or 'deeppoly', as of 8/30/18)
	"""
    shape_map = {}
    constants_map = {}
    output_node_map = {}
    input_node_map = {}

    for initial in model.graph.initializer:
        const = nchw_to_nhwc(numpy_helper.to_array(initial)).copy()
        constants_map[initial.name] = const
        shape_map[initial.name] = const.shape

    placeholdernames = []
    for input in model.graph.input:
        placeholdernames.append(input.name)
        if input.name not in shape_map:
            shape_map[input.name] = onnxshape_to_intlist(
                input.type.tensor_type.shape)
            input_node_map[input.name] = input

    for node in model.graph.node:
        #print(node)
        output_node_map[node.output[0]] = node
        for input in node.input:
            input_node_map[input] = node
        if node.op_type == "Constant":
            const = node.attribute
            const = nchw_to_nhwc(numpy_helper.to_array(const[0].t))
            constants_map[node.output[0]] = const
            shape_map[node.output[0]] = const.shape

        elif node.op_type in ["MatMul", "Gemm"]:
            transA = 0
            transB = 0
            for attribute in node.attribute:
                if 'transA' == attribute.name:
                    transA = attribute.i
                elif 'transB' == attribute.name:
                    transB = attribute.i
            M = shape_map[node.input[0]][transA]
            if len(shape_map[node.input[1]]) == 1 and transB == 0:
                N = 1
            else:
                N = shape_map[node.input[1]][1 - transB]
            shape_map[node.output[0]] = [M, N]

        elif node.op_type in ["Add", "Sub", "Mul"]:
            shape_map[node.output[0]] = shape_map[node.input[0]]
            if node.input[0] in constants_map and node.input[
                    1] in constants_map:
                if node.op_type == "Add":
                    result = np.add(constants_map[node.input[0]],
                                    constants_map[node.input[1]])
                elif node.op_type == "Sub":
                    result = np.subtract(constants_map[node.input[0]],
                                         constants_map[node.input[1]])
                elif node.op_type == "Mul":
                    result = np.multiply(constants_map[node.input[0]],
                                         constants_map[node.input[1]])
                constants_map[node.output[0]] = result
        elif node.op_type in ["Conv", "MaxPool", "AveragePool"]:
            output_shape = []
            input_shape = shape_map[node.input[0]]

            require_kernel_shape = node.op_type in ["MaxPool", "AveragePool"]
            if not require_kernel_shape:
                filter_shape = shape_map[node.input[1]]
                kernel_shape = filter_shape[1:-1]

            strides = [1, 1]
            padding = [0, 0, 0, 0]
            auto_pad = 'NOTSET'
            dilations = [1, 1]
            group = 1
            ceil_mode = 0
            for attribute in node.attribute:
                if attribute.name == 'strides':
                    strides = attribute.ints
                elif attribute.name == 'pads':
                    padding = attribute.ints
                elif attribute.name == 'auto_pad':
                    auto_pad = attribute.s
                elif attribute.name == 'kernel_shape':
                    kernel_shape = attribute.ints
                elif attribute.name == 'dilations':
                    dilations = attribute.ints
                elif attribute.name == 'group':
                    group = attribute.i
                elif attribute.name == 'ceil_mode':
                    ceil_mode = attribute.i

            effective_kernel_shape = [(kernel_shape[i] - 1) * dilations[i] + 1
                                      for i in range(len(kernel_shape))]

            output_shape.append(input_shape[0])

            for i in range(len(kernel_shape)):
                effective_input_size = input_shape[1 + i]
                effective_input_size += padding[i]
                effective_input_size += padding[i + len(kernel_shape)]
                if ceil_mode == 1:
                    strided_kernel_positions = int(
                        np.ceil(
                            (effective_input_size - effective_kernel_shape[i])
                            / float(strides[i])))
                else:
                    strided_kernel_positions = int(
                        np.floor(
                            (effective_input_size - effective_kernel_shape[i])
                            / strides[i]))
                output_shape.append(1 + strided_kernel_positions)

            if require_kernel_shape:
                output_shape.append(input_shape[1])
            else:
                output_shape.append(filter_shape[0])

            shape_map[node.output[0]] = output_shape
        elif node.op_type in ["Relu", "Sigmoid", "Tanh", "Softmax"]:
            shape_map[node.output[0]] = shape_map[node.input[0]]

        # Gather is for the moment solely for shapes
        elif node.op_type == "Gather":
            axis = 0
            for attribute in node.attribute:
                axis = attribute.i
            if node.input[0] in constants_map and node.input[
                    1] in constants_map:
                data = constants_map[node.input[0]]
                indexes = constants_map[node.input[1]]
                constants_map[node.output[0]] = np.take(data, indexes, axis)

            if node.input[0] in shape_map and node.input[1] in shape_map:
                r = len(shape_map[node.input[0]])
                q = len(shape_map[node.input[1]])
                out_rank = q + r - 1
                if out_rank == 0:
                    shape_map[node.output[0]] = shape_map[node.input[1]]
                else:
                    output_shape = []
                    for i in range(out_rank):
                        if i < axis:
                            output_shape.append(
                                shape_map[node.input[0]][i])  # i < axis < r
                        elif i >= axis and i < axis + q:
                            output_shape.append(
                                shape_map[node.input[0]][i -
                                                         axis])  # i - axis < q
                        else:
                            output_shape.append(shape_map[node.input[0]][
                                i - q + 1])  # i < out_rank < q + r - 1
                    shape_map[node.output[0]] = output_shape
        elif node.op_type == "Shape":
            if node.input[0] in shape_map:
                constants_map[node.output[0]] = shape_map[node.input[0]]
                shape_map[node.output[0]] = [len(shape_map[node.input[0]])]

        elif node.op_type == "Reshape":
            if node.input[1] in constants_map:
                total = 1
                replace_index = -1
                for index in range(len(constants_map[node.input[1]])):
                    if constants_map[node.input[1]][index] == -1:
                        replace_index = index
                    else:
                        total *= constants_map[node.input[1]][index]

                if replace_index != -1:
                    constants_map[node.input[1]][replace_index] = np.prod(
                        shape_map[node.input[0]]) / total

                if len(constants_map[node.input[1]]) == 4:
                    shape_map[node.output[0]] = [
                        constants_map[node.input[1]][0],
                        constants_map[node.input[1]][2],
                        constants_map[node.input[1]][3],
                        constants_map[node.input[1]][1]
                    ]
                else:
                    shape_map[node.output[0]] = constants_map[node.input[1]]

        elif node.op_type == "Unsqueeze":
            if node.input[0] in shape_map:
                axis = node.attribute[0].ints
                output_shape = list(shape_map[node.input[0]])
                if node.input[0] in constants_map:
                    constants_map[node.output[0]] = constants_map[
                        node.input[0]]
                for i in axis:
                    output_shape.insert(i, 1)
                    if node.input[0] in constants_map:
                        constants_map[node.output[0]] = np.expand_dims(
                            constants_map[node.output[0]], axis=i)
                shape_map[node.output[0]] = output_shape

        elif node.op_type == "Concat":
            all_constant = True
            axis = node.attribute[0].i
            for input in node.input:
                if not input in constants_map:
                    all_constant = False
                    break
            if all_constant:
                constants_map[node.output[0]] = np.concatenate(
                    [constants_map[input] for input in node.input], axis=axis)

            all_shape_known = True
            for input in node.input:
                if not input in shape_map:
                    all_shape_known = False
                    break
            if all_shape_known:
                new_axis_size = 0
                for input in node.input:
                    new_axis_size += shape_map[input][axis]
                shape_map[node.output[0]] = [
                    shape_map[node.input[0]][i] if i != axis else new_axis_size
                    for i in range(len(shape_map[node.input[0]]))
                ]

        elif node.op_type == "Expand":
            if node.input[1] in constants_map:
                if len(constants_map[node.input[1]]) == 4:
                    shape_map[node.output[0]] = [
                        constants_map[node.input[1]][0],
                        constants_map[node.input[1]][2],
                        constants_map[node.input[1]][3],
                        constants_map[node.input[1]][1]
                    ]
                else:
                    shape_map[node.output[0]] = constants_map[node.input[1]]

                result = np.zeros(
                    shape_map[node.output[0]]) + constants_map[node.input[0]]
                constants_map[node.output[0]] = result
        else:
            assert 0, "Operations of type " + node.op_type + " are not yet supported."

    #print('const_map')
    #print(constants_map)
    #print('shape_map')
    #print(shape_map)
    return shape_map, constants_map, output_node_map, input_node_map, placeholdernames
Exemplo n.º 21
0
        total_existing_data_set = 0
        print('Verifying model {} with existing test data...'.format(onnx_model_name))
        for f in glob.glob(os.path.join(onnx_model_dir, '*.npz')):
            test_data = np.load(f, encoding='bytes')
            inputs = list(test_data['inputs'])
            ref_outputs = list(test_data['outputs'])
            onnx_verify(onnx_model, inputs, ref_outputs)
            total_existing_data_set += 1
        for f in glob.glob(os.path.join(onnx_model_dir, 'test_data_set*')):
            inputs = []
            inputs_num = len(glob.glob(os.path.join(f, 'input_*.pb')))
            for i in range(inputs_num):
                tensor = onnx.TensorProto()
                with open(os.path.join(f, 'input_{}.pb'.format(i)), 'rb') as pf:
                    tensor.ParseFromString(pf.read())
                inputs.append(numpy_helper.to_array(tensor))
            ref_outputs = []
            ref_outputs_num = len(glob.glob(os.path.join(f, 'output_*.pb')))
            for i in range(ref_outputs_num):
                tensor = onnx.TensorProto()
                with open(os.path.join(f, 'output_{}.pb'.format(i)), 'rb') as pf:
                    tensor.ParseFromString(pf.read())
                ref_outputs.append(numpy_helper.to_array(tensor))
            onnx_verify(onnx_model, inputs, ref_outputs)
            total_existing_data_set += 1

        starting_index = 0
        while os.path.exists(os.path.join(onnx_model_dir, 'test_data_set_{}'.format(starting_index))):
            starting_index += 1

        if total_existing_data_set == 0 and add_test_data == 0:
Exemplo n.º 22
0
 def _asarray(proto):
     return jnp.asarray(
         numpy_helper.to_array(proto).reshape(tuple(proto.dims)))
Exemplo n.º 23
0
    def convert_model_float32_to_float16(self, cast_input_output=True):
        """Convert a graph to FLOAT16. By default, we will keep data types of inputs and outputs.
           For decoder model with past_key_values, it is recommended to set cast_input_output=False for better performance.
        Args:
            cast_input_output (bool, optional): keep data type of inputs and outputs, and add Cast nodes to convert float32 inputs to float16, and float16 to float32 for outputs. Defaults to True.
        """
        from packaging.version import Version
        import onnxconverter_common as oc
        if Version(oc.__version__) > Version("1.7.0"):
            self.model = oc.float16.convert_float_to_float16(self.model, keep_io_types=cast_input_output)
            return

        graph = self.model.graph
        initializers = graph.initializer

        for initializer in initializers:
            if initializer.data_type == 1:
                initializer.CopyFrom(
                    numpy_helper.from_array(numpy_helper.to_array(initializer).astype(np.float16), initializer.name))

        for node in graph.node:
            if node.op_type in ['Constant', 'ConstantOfShape']:
                for att in node.attribute:
                    if att.name == 'value' and att.t.data_type == 1:
                        att.CopyFrom(
                            helper.make_attribute(
                                "value", numpy_helper.from_array(numpy_helper.to_array(att.t).astype(np.float16))))
            if node.op_type == 'Cast':
                for att in node.attribute:
                    if att.name == 'to' and att.i == 1:
                        att.CopyFrom(helper.make_attribute("to", int(TensorProto.FLOAT16)))

        if not cast_input_output:
            self.change_input_output_float32_to_float16()
            return

        # Below assumes that we keep input and output data types.
        # Add Cast node to convert input from float32 to float16.
        for input_value_info in graph.input:
            if input_value_info.type.tensor_type.elem_type == TensorProto.FLOAT:
                initializer = self.get_initializer(input_value_info.name)
                if initializer is not None:  # for compatibility for old converter/exporter
                    input_value_info.type.tensor_type.elem_type = TensorProto.FLOAT16
                else:
                    cast_input = input_value_info.name
                    cast_output = input_value_info.name + '_float16'
                    self.replace_input_of_all_nodes(cast_input, cast_output)
                    cast_node = helper.make_node('Cast', inputs=[cast_input], outputs=[cast_output])
                    cast_node.attribute.extend([helper.make_attribute("to", int(TensorProto.FLOAT16))])
                    self.add_node(cast_node)

        # Add Cast node to convert output from float16 back to float32.
        for output_value_info in graph.output:
            if output_value_info.type.tensor_type.elem_type == TensorProto.FLOAT:
                cast_input = output_value_info.name + '_float16'
                cast_output = output_value_info.name
                self.replace_output_of_all_nodes(cast_output, cast_input)
                self.replace_input_of_all_nodes(cast_output, cast_input)
                cast_node = helper.make_node('Cast', inputs=[cast_input], outputs=[cast_output])
                cast_node.attribute.extend([helper.make_attribute("to", int(TensorProto.FLOAT))])
                self.add_node(cast_node)
Exemplo n.º 24
0
    def create_attention_node(self, mask_index, q_matmul, k_matmul, v_matmul,
                              q_add, k_add, v_add, input, output):
        q_weight = self.get_initializer(q_matmul.input[1])
        k_weight = self.get_initializer(k_matmul.input[1])
        v_weight = self.get_initializer(v_matmul.input[1])
        q_bias = self.get_initializer(q_add.input[1])
        k_bias = self.get_initializer(k_add.input[1])
        v_bias = self.get_initializer(v_add.input[1])

        qw = numpy_helper.to_array(q_weight)
        assert qw.shape == (self.hidden_size, self.hidden_size)

        kw = numpy_helper.to_array(k_weight)
        assert kw.shape == (self.hidden_size, self.hidden_size)

        vw = numpy_helper.to_array(v_weight)
        assert vw.shape == (self.hidden_size, self.hidden_size)

        qkv_weight = np.stack((qw, kw, vw), axis=-2)

        qb = numpy_helper.to_array(q_bias)
        assert qb.shape == (self.hidden_size, )

        kb = numpy_helper.to_array(k_bias)
        assert kb.shape == (self.hidden_size, )

        vb = numpy_helper.to_array(v_bias)
        assert vb.shape == (self.hidden_size, )

        qkv_bias = np.stack((qb, kb, vb), axis=-2)

        attention_node_name = self.create_node_name('Attention')

        weight = onnx.helper.make_tensor(
            name=attention_node_name + '_qkv_weight',
            data_type=TensorProto.FLOAT,
            dims=[self.hidden_size, 3 * self.hidden_size],
            vals=qkv_weight.flatten().tolist())
        self.add_initializer(weight)

        weight_input = onnx.helper.make_tensor_value_info(
            weight.name, TensorProto.FLOAT,
            [self.hidden_size, 3 * self.hidden_size])
        self.add_input(weight_input)

        bias = onnx.helper.make_tensor(name=attention_node_name + '_qkv_bias',
                                       data_type=TensorProto.FLOAT,
                                       dims=[3 * self.hidden_size],
                                       vals=qkv_bias.flatten().tolist())
        self.add_initializer(bias)

        bias_input = onnx.helper.make_tensor_value_info(
            bias.name, TensorProto.FLOAT, [3 * self.hidden_size])
        self.add_input(bias_input)

        attention_node = onnx.helper.make_node(
            'Attention',
            inputs=[
                input, attention_node_name + '_qkv_weight',
                attention_node_name + '_qkv_bias', mask_index
            ],
            outputs=[output],
            name=attention_node_name)
        attention_node.domain = "com.microsoft"
        attention_node.attribute.extend(
            [onnx.helper.make_attribute("num_heads", self.num_heads)])

        self.add_node(attention_node)
Exemplo n.º 25
0
 def _test_numpy_helper_float_type(self, dtype):  # type: (np.number) -> None
     a = np.random.rand(13, 37).astype(dtype)
     tensor_def = numpy_helper.from_array(a, "test")
     self.assertEqual(tensor_def.name, "test")
     a_recover = numpy_helper.to_array(tensor_def)
     np.testing.assert_equal(a, a_recover)
Exemplo n.º 26
0
 def test_bool(self):  # type: () -> None
     a = np.random.randint(2, size=(13, 37)).astype(np.bool)
     tensor_def = numpy_helper.from_array(a, "test")
     self.assertEqual(tensor_def.name, "test")
     a_recover = numpy_helper.to_array(tensor_def)
     np.testing.assert_equal(a, a_recover)
Exemplo n.º 27
0
    def fuse_embedding(self, node, output_name_to_node):
        assert node.op_type == 'LayerNormalization'
        logger.debug(
            f"start fusing embedding from node with output={node.output[0]}..."
        )
        word_embed_path = self.match_parent_path(node,
                                                 ['Add', 'Add', 'Gather'],
                                                 [0, 0, 0],
                                                 output_name_to_node)
        if word_embed_path is None:
            logger.debug("failed to match word_embed_path")
            return False

        skip_node, add_node, gather_node = word_embed_path

        word_initializer = self.get_initializer(gather_node.input[0])
        if word_initializer is None:
            logger.debug("failed to get word initializer")
            return False

        temp = numpy_helper.to_array(word_initializer)
        if len(temp.shape) == 2:
            logger.info("Found word embedding. name:{}, shape:{}".format(
                word_initializer.name, temp.shape))
            word_embedding = word_initializer.name
        else:
            logger.info(
                "Failed to find word embedding. name:{}, shape:{}".format(
                    word_initializer.name, temp.shape))
            return False

        pos_initializer = self.get_initializer(add_node.input[1])
        if pos_initializer is not None:
            temp = numpy_helper.to_array(pos_initializer)
            if len(temp.shape) == 3 and temp.shape[0] == 1:
                tensor = numpy_helper.from_array(
                    temp.reshape((temp.shape[1], temp.shape[2])),
                    "position_embedding")
                self.add_initializer(tensor)
                logger.info(
                    "Found position embedding. name:{}, shape:{}".format(
                        pos_initializer.name, temp.shape[1:]))
                position_embedding = "position_embedding"
            else:
                logger.info(
                    "Failed to find position embedding. name:{}, shape:{}".
                    format(pos_initializer.name, temp.shape))
                return False
        else:
            pos_embed_path = self.match_parent_path(add_node,
                                                    ['Gather', 'Slice'],
                                                    [1, 1],
                                                    output_name_to_node)
            if pos_embed_path is None:
                logger.debug("failed to match pos_embed_path")
                return False

            pos_gather, pos_slice = pos_embed_path
            pos_initializer = self.get_initializer(pos_gather.input[0])
            if pos_initializer is None:
                logger.debug("failed to get pos initializer")
                return False

            temp = numpy_helper.to_array(pos_initializer)
            if len(temp.shape) == 2:
                logger.info("Found word embedding. name:{}, shape:{}".format(
                    pos_initializer.name, temp.shape))
                position_embedding = pos_initializer.name
            else:
                logger.info(
                    "Failed to find position embedding. name:{}, shape:{}".
                    format(pos_initializer.name, temp.shape))
                return False

        gather = self.get_parent(skip_node, 1, output_name_to_node)
        if gather is None or gather.op_type != "Gather":
            logger.debug("failed to get gather")
            return False

        segment_initializer = self.get_initializer(gather.input[0])
        if segment_initializer is None:
            logger.debug("failed to get segment initializer")
            return False

        temp = numpy_helper.to_array(segment_initializer)
        if len(temp.shape) == 2:
            logger.info("Found segment embedding. name:{}, shape:{}".format(
                segment_initializer.name, temp.shape))
            segment_embedding = segment_initializer.name
        else:
            logger.info(
                "Failed to find segment embedding. name:{}, shape:{}".format(
                    segment_initializer.name, temp.shape))
            return False

        logger.info("Create Embedding node")
        self.create_embedding_subgraph(node, word_embedding, segment_embedding,
                                       position_embedding)
        return True
Exemplo n.º 28
0
def boxprop(box, model):

    resources = {}

    for initial in model.graph.initializer:
        const = numpy_helper.to_array(initial)
        resources[initial.name] = const

    ignoredNodes = {
        'Constant', 'Reshape', 'Concat', 'Unsqueeze', 'Shape', 'Gather'
    }

    for node in model.graph.node:

        if node.op_type == 'Relu':
            # print('box.relu()')
            box.relu()

        elif node.op_type == 'Tanh':
            # print('box.tanh()')
            box.tanh()

        elif node.op_type == 'Conv':
            weight = None
            bias = None
            for inp in node.input:
                if inp.split('.')[-1] == 'weight':
                    weight = resources[inp]
                if inp.split('.')[-1] == 'bias':
                    bias = resources[inp]

            c_out = weight.shape[0]
            kernel_size = 1
            stride = 1
            padding = 0

            for attr in node.attribute:
                if attr.name == 'kernel_shape':
                    kernel_size = attr.ints[0]
                elif attr.name == 'strides':
                    stride = attr.ints[0]
                elif attr.name == 'pads':
                    padding = attr.ints[0]
            # print('box.conv2d','weight',c_out,kernel_size[0],stride, padding,'bias')
            box.conv2d(weight, c_out, kernel_size, stride, padding, bias)

        elif node.op_type == 'ConvTranspose':
            weight = None
            for inp in node.input:
                if inp.split('.')[-1] == 'weight':
                    weight = resources[inp]

            c_out = weight.shape[1]
            kernel_size = [0, 0]
            stride = 1
            padding = 0

            for attr in node.attribute:
                if attr.name == 'kernel_shape':
                    for i, ints in enumerate(attr.ints):
                        kernel_size[i] = ints
                elif attr.name == 'strides':
                    stride = attr.ints[0]
                elif attr.name == 'pads':
                    padding = attr.ints[0]
            # print('box.convTranspose2d','weight', c_out, kernel_size, stride, padding)
            box.convTranspose2d(weight, c_out, kernel_size, stride, padding)

        elif node.op_type == 'BatchNormalization':
            mean = None
            var = None
            weight = None
            bias = None
            for inp in node.input:
                if inp.split('.')[-1] == 'weight':
                    weight = resources[inp]
                elif inp.split('.')[-1] == 'bias':
                    bias = resources[inp]
                elif inp.split('.')[-1] == 'running_mean':
                    mean = resources[inp]
                elif inp.split('.')[-1] == 'running_var':
                    var = resources[inp]

            eps = 0
            for attr in node.attribute:
                if attr.name == 'epsilon':
                    eps = attr.f
            # print('box.batchNorm2d','mean', 'var', eps, weight, bias)
            box.batchNorm2d(mean, var, eps, weight, bias)

        elif node.op_type == 'MaxPool':
            kernel_size = 1
            for attr in node.attribute:
                if attr.name == 'kernel_shape':
                    kernel_size = attr.ints[0]
            # print('box.maxpool2d',kernel_size)
            box.maxpool2d(kernel_size)

        elif node.op_type == 'Gemm':
            weight = None
            bias = None

            for inp in node.input:
                if inp.split('.')[-1] == 'weight':
                    weight = resources[inp]
                if inp.split('.')[-1] == 'bias':
                    bias = resources[inp]
            # print('box.linear','weight','bias')
            box.linear(weight, bias)

        elif node.op_type not in ignoredNodes:
            raise ValueError('Cannot handle layer of type ' + node.op_type)
Exemplo n.º 29
0
def quantitize_graph(g, verbose=False):
    """Quantitize graph."""
    new_weights = []
    quantitized_weights = []
    nodes = []
    remap = {}
    remove = []

    for i, w in enumerate(g.initializer):
        # only quantitize float32
        if w.data_type != onnx_pb.TensorProto.FLOAT:
            continue
        w_np = numpy_helper.to_array(w)
        # only look at sizes >= 32 elements
        if w_np.size < 32:
            continue

        # weights we want to quantitize
        remove.append(i)
        name = w.name
        if verbose:
            logger.info("quantitizing %s", name)
        w_quant, zp, scale = eight_bit_quantitize(w_np)
        nw = numpy_helper.from_array(w_quant, name=name)
        if verbose:
            w_dequant = eight_bit_dequantitize(w_quant, zp, scale)
            rtol = np.abs(w_dequant - w_np)
            s = {}
            for j in [1.0, 5.0, 10.0, 20.0]:
                above_rtol = np.sum(rtol > np.abs(j * w_np / 100.)) / w_np.size
                s["> " + str(j) + "%"] = "{:.2f}".format(100. * above_rtol)
            logger.info("above_rtol: %s", str(s))
            logger.info("raw:   %s", stats(w_np))
            logger.info("quant: %s", stats(w_dequant))
        output_name = _compose_quantitize(nodes, new_weights, zp, scale, name)
        remap[name] = output_name
        quantitized_weights.append(nw)

    # few things to do to initializers and graph inputs:

    # 1. remove initializers that got quantitized
    for i in reversed(remove):
        del g.initializer[i]

    # 2. add quantitized to initializers
    g.initializer.extend(new_weights)
    g.initializer.extend(quantitized_weights)

    # 3. modify the type of weights that we quantitized
    modified = {w.name: w for w in quantitized_weights}
    new_inputs = []
    remove = []
    for i, inp in enumerate(g.input):
        w = modified.get(inp.name)
        if w is not None:
            new_inputs.append(
                helper.make_tensor_value_info(w.name, w.data_type, w.dims))
            remove.append(i)
    for i in reversed(remove):
        del g.input[i]

    # 4. add new weights as inputs
    for w in new_weights:
        tv = helper.make_tensor_value_info(w.name, w.data_type, w.dims)
        new_inputs.append(tv)
    g.input.extend(new_inputs)

    # 5. rewrite consumers of the quantitized weights
    for node in g.node:
        for i, name in enumerate(node.input):
            new_name = remap.get(name)
            if new_name is not None:
                node.input[i] = new_name

    # 6. add composed nodes to graph, new nodes in the front
    nodes.extend(g.node)
    del g.node[:]
    g.node.extend(nodes)
    return g
Exemplo n.º 30
0
import onnx
from onnx import numpy_helper

itensor = onnx.TensorProto()
with open('input_0.pb', 'rb') as f:
    itensor.ParseFromString(f.read())

npitensor = numpy_helper.to_array(itensor)

otensor = onnx.TensorProto()
with open('output_0.pb', 'rb') as f:
    otensor.ParseFromString(f.read())

npotensor = numpy_helper.to_array(otensor)

pass

Exemplo n.º 31
0
# Preprocessing: create a Numpy array
numpy_array = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=float)
if LooseVersion(numpy.version.version) < LooseVersion('1.14'):
    print('Original Numpy array:\n{}\n'.format(
        numpy.array2string(numpy_array)))
else:
    print('Original Numpy array:\n{}\n'.format(
        numpy.array2string(numpy_array, legacy='1.13')))

# Convert the Numpy array to a TensorProto
tensor = numpy_helper.from_array(numpy_array)
print('TensorProto:\n{}'.format(tensor))

# Convert the TensorProto to a Numpy array
new_array = numpy_helper.to_array(tensor)
if LooseVersion(numpy.version.version) < LooseVersion('1.14'):
    print('After round trip, Numpy array:\n{}\n'.format(
        numpy.array2string(numpy_array)))
else:
    print('After round trip, Numpy array:\n{}\n'.format(
        numpy.array2string(numpy_array, legacy='1.13')))

# Save the TensorProto
with open(os.path.join('resources', 'tensor.pb'), 'wb') as f:
    f.write(tensor.SerializeToString())

# Load the TensorProto
new_tensor = onnx.TensorProto()
with open(os.path.join('resources', 'tensor.pb'), 'rb') as f:
    new_tensor.ParseFromString(f.read())
Exemplo n.º 32
0
def from_onnx(onnx_string_or_file):
    """
    Constructs a CrypTen model or module from an ONNX Protobuf string or file.
    """

    # if input is file, read string:
    if hasattr(onnx_string_or_file, "seek"):  # input is file-like
        onnx_string_or_file.seek(0)
        onnx_model = onnx.load(onnx_string_or_file)
    else:
        onnx_model = onnx.load_model_from_string(onnx_string_or_file)

    # create dict of all parameters, inputs, and outputs:
    all_parameters = {
        t.name: torch.from_numpy(numpy_helper.to_array(t))
        for t in onnx_model.graph.initializer
    }
    input_names = [input.name for input in onnx_model.graph.input]
    output_names = [output.name for output in onnx_model.graph.output]
    input_names = [
        name for name in input_names if name not in all_parameters.keys()
    ]  # parameters are not inputs
    assert len(input_names) == 1, "number of inputs should be 1"
    assert len(output_names) == 1, "number of outputs should be 1"

    # create graph by looping over nodes:
    crypten_model = Graph(input_names[0], output_names[0])
    for node in onnx_model.graph.node:
        # retrieve inputs, outputs, attributes, and parameters for this node:
        node_output_name = list(node.output)[0]
        node_input_names = list(node.input)  # includes parameters
        parameters = {
            get_parameter_name(name): all_parameters[name]
            for name in node_input_names
            if name in all_parameters and name not in input_names
        }  # all the parameters for the current module
        node_input_names = [
            name
            for name in node_input_names
            if get_parameter_name(name) not in parameters
        ]
        attributes = {attr.name: get_attribute_value(attr) for attr in node.attribute}

        # get operator type:
        if node.op_type == "Conv":
            dims = len(attributes["kernel_shape"])
            if dims == 1:
                cls = Conv1d
            elif dims == 2:
                cls = Conv2d
            else:
                raise ValueError("CrypTen does not support op Conv%dd." % dims)
        else:
            if node.op_type not in ONNX_TO_CRYPTEN:
                raise ValueError("CrypTen does not support op %s." % node.op_type)
            cls = ONNX_TO_CRYPTEN[node.op_type]

        # add CrypTen module to graph:
        crypten_module = cls.from_onnx(parameters=parameters, attributes=attributes)
        crypten_model.add_module(node_output_name, crypten_module, node_input_names)

    # return model (or module when there is only one module):
    num_modules = len(list(crypten_model.modules()))
    if num_modules == 1:
        for crypten_module in crypten_model.modules():
            return crypten_module
    else:
        return crypten_model
Exemplo n.º 33
0
        name = 'var' + name
    return name


data_dir = os.path.dirname(sys.argv[1])
input_names = sys.argv[2].split(',')
output_names = sys.argv[3].split(',')
squeeze_data = len(sys.argv) > 4

# Load inputs
inputs = []
for fn in glob(os.path.join(data_dir, 'input_*.pb')):
    tensor = onnx.TensorProto()
    with open(fn, 'rb') as f:
        tensor.ParseFromString(f.read())
    tensor = numpy_helper.to_array(tensor)
    while squeeze_data and tensor.ndim > 4 and tensor.shape[0] == 1:
        tensor = tensor.squeeze(0)
    inputs.append(tensor)

# Load outputs
outputs = []
for fn in glob(os.path.join(data_dir, 'output_*.pb')):
    tensor = onnx.TensorProto()
    with open(fn, 'rb') as f:
        tensor.ParseFromString(f.read())
    tensor = numpy_helper.to_array(tensor)
    while squeeze_data and tensor.ndim > 2 and tensor.shape[0] == 1:
        tensor = tensor.squeeze(0)
    outputs.append(tensor)
Exemplo n.º 34
0
def test_onnx_model(model_name, round_trip):
    if model_name in skip_model_names and not round_trip:
        pytest.skip('Skip onnx model test. ')
    if model_name in skip_round_trip_model_names and round_trip:
        pytest.skip('Skip onnx model round trip test. ')

    model_dir = os.path.join(onnx_base_dir, model_name)
    model = C.Function.load(os.path.join(model_dir, 'model.onnx'),
                            format=C.ModelFormat.ONNX)

    if round_trip:
        resave_model_path = 'model_resave.onnx'
        model.save(resave_model_path, format=C.ModelFormat.ONNX)
        model = C.Function.load(resave_model_path, format=C.ModelFormat.ONNX)

    data_dirs = [
        os.path.join(model_dir, dir) for dir in os.listdir(model_dir)
        if os.path.isdir(os.path.join(model_dir, dir))
    ]
    for data_dir in data_dirs:
        inputs = []
        ref_outputs = []
        tensor = onnx.TensorProto()

        input_filenames = [
            filename for filename in os.listdir(data_dir)
            if input_filename_pattern.match(filename)
        ]
        input_files_sorted = [
            os.path.join(data_dir, 'input_{:d}.pb'.format(i))
            for i in range(len(input_filenames))
        ]
        output_filenames = [
            filename for filename in os.listdir(data_dir)
            if output_filename_pattern.match(filename)
        ]
        output_files_sorted = [
            os.path.join(data_dir, 'output_{:d}.pb'.format(i))
            for i in range(len(output_filenames))
        ]

        for input_file in input_files_sorted:
            with open(input_file, 'rb') as f:
                tensor.ParseFromString(f.read())
            inputs.append(numpy_helper.to_array(tensor))

        for output_file in output_files_sorted:
            with open(output_file, 'rb') as f:
                tensor.ParseFromString(f.read())
            ref_outputs.append(numpy_helper.to_array(tensor))

        cntk_input = {
            model.arguments[i]: inputs[i]
            for i in range(len(inputs))
        }
        cntk_res = [model.eval(cntk_input)]

        if ref_outputs[0].dtype == np.bool:
            cntk_res = [cntk_res[0].astype("bool")]

        outputs = list(cntk_res)

        np.testing.assert_equal(len(ref_outputs), len(outputs))
        for i in range(len(outputs)):
            np.testing.assert_equal(ref_outputs[i].dtype, outputs[i].dtype)
            np.testing.assert_allclose(ref_outputs[i],
                                       outputs[i],
                                       rtol=1e-3,
                                       atol=1e-4)
Exemplo n.º 35
0
def _main_predict(args) -> None:
    inputs = {}
    if args.pb_in:
        for path, input_name in zip(args.pb_in, args.pb_in_names):
            with open(path, 'rb') as fp:
                tensor = onnx.TensorProto()
                tensor.ParseFromString(fp.read())
            print('Using {} as "{}" input'.format(path, input_name))
            inputs[input_name] = numpy_helper.to_array(tensor)
    elif args.json_in:
        with open(args.json_in) as fp:
            obj = json.load(fp)
        for input_name, obj in obj.items():
            arr = np.asarray(obj['values'], dtype=obj['type'])
            inputs[input_name] = arr
    else:
        raise NotImplementedError

    if args.verbose:
        print('Inputs:')
        for input_name, arr in inputs.items():
            print(' {}: dtype={} shape={}'.format(input_name, arr.dtype,
                                                  arr.shape))

    enclave_signing_key = None
    if args.enclave_signing_key_file:
        with open(args.enclave_signing_key_file) as fp:
            enclave_signing_key = fp.read()

    if args.enclave_model_hash_file:
        with open(args.enclave_model_hash_file) as f:
            enclave_model_hash = f.read()
    else:
        enclave_model_hash = args.enclave_model_hash

    c = Client(url=args.url,
               auth=get_auth(args),
               enclave_signing_key=enclave_signing_key,
               enclave_hash=args.enclave_hash,
               enclave_model_hash=enclave_model_hash,
               enclave_allow_debug=args.enclave_allow_debug)

    try:
        outputs = c.predict(inputs)
    except Exception as e:
        if args.verbose:
            raise
        else:
            print(f'{C.FAIL}{C.BOLD}ERROR: {e}{C.END}')
            sys.exit(1)

    if args.verbose:
        print('Outputs:')
        for output_name, arr in outputs.items():
            print(' {}: dtype={} shape={}'.format(output_name, arr.dtype,
                                                  arr.shape))

    if args.pb_out:
        os.makedirs(args.pb_out, exist_ok=True)
        for i, (output_name, arr) in enumerate(outputs.items()):
            filename = 'output_{}.pb'.format(i)
            print('Saving "{}" output as {}'.format(output_name, filename))
            path = os.path.join(args.pb_out, filename)
            tensor = numpy_helper.from_array(arr, output_name)
            with open(path, 'wb') as fp:
                fp.write(tensor.SerializeToString())

    if args.json_out:
        print('Saving inference results to {}'.format(args.json_out))
        with open(args.json_out, 'w') as fp:
            json.dump(outputs, fp, cls=NumpyEncoder, sort_keys=True)
Exemplo n.º 36
0
    def from_onnx(graph):  # type: (GraphProto) -> Graph
        input_tensors = {
            t.name: numpy_helper.to_array(t)
            for t in graph.initializer
        }
        nodes_ = []
        nodes_by_input = {}  # type: Dict[Text, List[Node]]
        nodes_by_output = {}
        for node in graph.node:
            node_ = Node.from_onnx(node)
            for input_ in node_.inputs:
                if input_ in input_tensors:
                    node_.input_tensors[input_] = input_tensors[input_]
                else:
                    if input_ in nodes_by_input:
                        input_nodes = nodes_by_input[input_]
                    else:
                        input_nodes = []
                        nodes_by_input[input_] = input_nodes
                    input_nodes.append(node_)
            for output_ in node_.outputs:
                nodes_by_output[output_] = node_
            nodes_.append(node_)

        inputs = []
        for i in graph.input:
            if i.name not in input_tensors:
                inputs.append(_input_from_onnx_input(i))

        outputs = []
        for o in graph.output:
            outputs.append(_input_from_onnx_input(o))

        for node_ in nodes_:
            for input_ in node_.inputs:
                if input_ in nodes_by_output:
                    node_.parents.append(nodes_by_output[input_])
            for output_ in node_.outputs:
                if output_ in nodes_by_input:
                    node_.children.extend(nodes_by_input[output_])

        # Dictionary to hold the "value_info" field from ONNX graph
        shape_dict = {}  # type: Dict[Text,Tuple[int,...]]

        def extract_value_info(
                shape_dict,  # type: Dict[Text,Tuple[int,...]]
                value_info,  # type: ValueInfoProto[...]
        ):
            # type: (...) -> None
            t = tuple([
                int(dim.dim_value)
                for dim in value_info.type.tensor_type.shape.dim
            ])
            if t:
                shape_dict[value_info.name] = t

        for value_info in graph.value_info:
            extract_value_info(shape_dict, value_info)
        for value_info in graph.input:
            extract_value_info(shape_dict, value_info)
        for value_info in graph.output:
            extract_value_info(shape_dict, value_info)

        return Graph(nodes_, inputs, outputs, shape_dict)
Exemplo n.º 37
0
    def fuse_attention(self):
        output_name_to_node = self.output_name_to_node()

        nodes_to_remove = []
        attention_count = 0

        start_nodes = []
        skip_layer_norm_nodes = self.get_nodes_by_op_type(
            "SkipLayerNormalization")
        layer_norm_nodes = self.get_nodes_by_op_type("LayerNormalization")
        # Sometimes we can not fuse skiplayernormalization since the add before layernorm has an output that used by nodes outside skiplayernorm
        # Conceptually we treat add before layernorm as skiplayernorm node since they share the same pattern
        start_nodes.extend(skip_layer_norm_nodes)
        start_nodes.extend(layer_norm_nodes)

        for normalize_node in start_nodes:
            # SkipLayerNormalization has two inputs, and one of them is the root input for attention.
            if normalize_node.op_type == 'LayerNormalization':
                add_before_layernorm = self.match_parent(
                    normalize_node, 'Add', 0)
                if add_before_layernorm is not None:
                    normalize_node = add_before_layernorm
                else:
                    continue
            parent = self.get_parent(normalize_node, 1)
            if parent is None or parent.op_type not in [
                    "SkipLayerNormalization", "LayerNormalization", "Reshape"
            ]:
                parent = self.get_parent(normalize_node, 0)
                if parent is None or parent.op_type not in [
                        "SkipLayerNormalization", "LayerNormalization",
                        "Reshape"
                ]:
                    logger.debug("Failed to match parent of normalize_node")
                    continue

            qkv_nodes = self.match_parent_path(
                normalize_node,
                ['Add', 'MatMul', 'Reshape', 'Transpose', 'MatMul'],
                [0, 0, 0, 0, 0])
            if qkv_nodes is None:
                qkv_nodes = self.match_parent_path(
                    normalize_node,
                    ['MatMul', 'Reshape', 'Transpose', 'MatMul'], [1, 0, 0, 0])
                if qkv_nodes is None:
                    qkv_nodes = self.match_parent_path(
                        normalize_node, ['Add', 'Einsum', 'Einsum'], [0, 0, 0])
                    if qkv_nodes is None:
                        logger.debug("Failed to match qkv nodes")
                        continue

            matmul_qkv = qkv_nodes[-1]
            v_nodes = self.match_parent_path(
                matmul_qkv, ['Transpose', 'Reshape', 'Add', 'MatMul'],
                [1, 0, 0, 0])
            if v_nodes is None:
                v_nodes = self.match_parent_path(matmul_qkv, ['Add', 'Einsum'],
                                                 [1, 0])
                if v_nodes is None:
                    logger.debug("Failed to match v path")
                    continue

            add_v = v_nodes[-2]
            matmul_v = v_nodes[-1]
            qk_nodes = self.match_parent_path(
                matmul_qkv, ['Softmax', 'Add', "Mul", 'MatMul'], [0, 0, 0, 0])
            if qk_nodes is None:
                qk_nodes = self.match_parent_path(matmul_qkv,
                                                  ['Softmax', 'Add', 'Einsum'],
                                                  [0, 0, 0])
                if qk_nodes is None:
                    logger.debug("Failed to match qk_paths")
                    continue
            matmul_qk = qk_nodes[-1]

            q_nodes = self.match_parent_path(
                matmul_qk, ['Transpose', 'Reshape', 'Add', 'MatMul'],
                [0, 0, 0, 0])
            if q_nodes is None:
                q_nodes = self.match_parent_path(matmul_qk, ['Add', 'Einsum'],
                                                 [0, 0])
                if q_nodes is None:
                    logger.debug("Failed to match q path")
                    continue
            add_q = q_nodes[-2]
            matmul_q = q_nodes[-1]

            k_nodes = self.match_parent_path(
                matmul_qk, ['Transpose', 'Reshape', 'Add', 'MatMul'],
                [1, 0, 0, 0])
            if k_nodes is None:
                k_nodes = self.match_parent_path(matmul_qk,
                                                 ['Mul', 'Add', 'Einsum'],
                                                 [1, 0, 0])
                if k_nodes is None:
                    logger.debug("Failed to match k path")
                    continue
            add_k = k_nodes[-2]
            matmul_k = k_nodes[-1]

            mask_nodes = self.match_mask_path(qk_nodes[1])

            if mask_nodes is None:
                logger.debug("Cannot find mask_nodes.")
                continue

            if not self.has_constant_input(mask_nodes[1], 1):
                logger.debug(
                    "Sub node expected to have an input with constant value 1.0."
                )
                continue

            # add a squeeze node to convert a 3-d mask to 2-d
            squeeze_node = self.match_parent_path(
                mask_nodes[-1], ['Squeeze'], [0]) or self.match_parent_path(
                    mask_nodes[-1], ['Expand'], [0])
            squeeze_node_name = "Squeeze_3d_to_2d_mask"
            squeeze_output_name = squeeze_node_name + "_output"
            if squeeze_node is None and len(
                    mask_nodes) == 5 and self.find_graph_input(
                        mask_nodes[-1].input[0]) is None:
                mask_input = mask_nodes[-1].input[1]
                self.add_node(
                    helper.make_node("Squeeze", [mask_input],
                                     [squeeze_output_name],
                                     squeeze_node_name,
                                     axes=[1]))
                mask_nodes[-1].input[0] = squeeze_output_name

            is_same_root = self.check_attention_input(matmul_q, matmul_k,
                                                      matmul_v, parent,
                                                      output_name_to_node)
            if is_same_root:
                mask_index = self.attention_mask.process_mask(
                    mask_nodes[-1].input[0])
                logger.debug("Create an Attention node.")
                # For tf models, q and v are flipped.
                attention_node = self.attention_fusion.create_attention_node(
                    mask_index, matmul_k, matmul_q, matmul_v, add_k, add_q,
                    add_v, self.num_heads, self.hidden_size, parent.output[0],
                    qkv_nodes[2].output[0])
                if attention_node is None:
                    continue

                if qkv_nodes[1].op_type == 'Einsum':
                    # add reshape before einsum
                    tensor = helper.make_tensor(
                        name=qkv_nodes[1].name + "_newshape",
                        data_type=TensorProto.INT64,
                        dims=[4],
                        vals=np.int64([[
                            0, 0, self.num_heads,
                            int(self.hidden_size / self.num_heads)
                        ]]).tobytes(),
                        raw=True)
                    self.add_initializer(tensor)
                    reshape_ = helper.make_node(
                        "Reshape",
                        inputs=[
                            attention_node.output[0],
                            qkv_nodes[1].name + "_newshape"
                        ],
                        outputs=[qkv_nodes[1].name + "_reshape_output"],
                        name=qkv_nodes[1].name + "_reshape")
                    qkv_nodes[1].input[
                        0] = qkv_nodes[1].name + "_reshape_output"
                    self.add_node(reshape_)
                if parent.op_type == 'Reshape':
                    # Temporary work around: we require the skiplayernorm and attention op be fed with 3-d input
                    hidden_size = numpy_helper.to_array(
                        self.get_initializer(parent.input[1]))[1]
                    tensor = helper.make_tensor(
                        name=parent.name + "_modified",
                        data_type=TensorProto.INT64,
                        dims=[3],
                        vals=np.int64([[1, -1, hidden_size]]).tobytes(),
                        raw=True)
                    self.add_initializer(tensor)
                    parent.input[1] = parent.name + "_modified"

                self.add_node(attention_node)
                attention_count += 1

                nodes_to_remove.extend(qkv_nodes[2:])
                nodes_to_remove.extend(qk_nodes)
                nodes_to_remove.extend(q_nodes)
                nodes_to_remove.extend(k_nodes)
                nodes_to_remove.extend(v_nodes)
                nodes_to_remove.extend(mask_nodes)
            else:
                logger.debug("Root node not matched.")
                continue
        self.remove_nodes(nodes_to_remove)
        self.update_graph()
        logger.info(f"Fused Attention count:{attention_count}")
Exemplo n.º 38
0
from yolo3 import YoloBody


def get_anchors(anchors_path):
    with open(anchors_path) as f:
        anchors = f.readline()
    anchors = [float(x) for x in anchors.split(',')]
    return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :]



num_classes = 3
anchors_path = 'Practice\Pytorch\EPT\yolo_anchors.txt'
anchors      = get_anchors(anchors_path)


model = YoloBody(anchors, num_classes)

onnx_model = onnx.load('Practice\Pytorch\EPT\Yolov3_55.onnx')

graph = onnx_model.graph
initalizers = dict()

for init in graph.initializer:
    initalizers[init.name] = numpy_helper.to_array(init)

for name, p in model.named_parameters():
    p.data = (torch.from_numpy(initalizers[name])).data

torch.save(model.state_dict(), '999.pth')
Exemplo n.º 39
0
 def _test_numpy_helper_float_type(self, dtype):
     a = np.random.rand(13, 37).astype(dtype)
     tensor_def = numpy_helper.from_array(a, "test")
     self.assertEqual(tensor_def.name, "test")
     a_recover = numpy_helper.to_array(tensor_def)
     np.testing.assert_equal(a, a_recover)
Exemplo n.º 40
0
    def fuse(self, reshape_node, input_name_to_nodes, output_name_to_node):
        if reshape_node.input[1] not in output_name_to_node:
            return

        concat_node = output_name_to_node[reshape_node.input[1]]
        if concat_node.op_type != 'Concat' or len(concat_node.input) != 4:
            return

        path0 = self.model.match_parent_path(concat_node,
                                             ['Unsqueeze', 'Gather', 'Shape'],
                                             [0, 0, 0], output_name_to_node)
        if path0 is None:
            return

        (_, gather_0, shape_0) = path0

        shape = []
        gather_value = self.model.get_constant_value(gather_0.input[1])
        if gather_value == 0:
            shape.append(0)

        path1 = self.model.match_parent_path(concat_node,
                                             ['Unsqueeze', 'Gather', 'Shape'],
                                             [1, 0, 0], output_name_to_node)
        if path1 is None:
            input_1_proto = self.model.get_initializer(concat_node.input[1])
            input_2_proto = self.model.get_initializer(concat_node.input[2])
            input_3_proto = self.model.get_initializer(concat_node.input[3])
            if input_1_proto is None or input_2_proto is None or input_3_proto is None:
                return

            input_1 = numpy_helper.to_array(input_1_proto)
            input_2 = numpy_helper.to_array(input_2_proto)
            input_3 = numpy_helper.to_array(input_3_proto)
            if len(input_1) != 1 or len(input_2) != 1 or len(input_3) != 1:
                return

            if not (input_1[0] == -1 and input_2[0] > 0 and input_3[0] > 0):
                return

            shape.extend(input_1)
            shape.extend(input_2)
            shape.extend(input_3)
            gemm_path = self.model.match_parent_path(reshape_node,
                                                     ['Add', 'MatMul'], [0, 1],
                                                     output_name_to_node)
            if gemm_path is None:
                return

            top_matmul = gemm_path[-1]
            root_input = top_matmul.input[0]
            if shape_0.input[0] != root_input:
                return

            self.replace_reshape_node(shape, reshape_node, concat_node)
        else:
            (_, gather_1, shape_1) = path1

            gather_value = self.model.get_constant_value(gather_1.input[1])
            if gather_value == 1:
                shape.append(0)

            input_2_proto = self.model.get_initializer(concat_node.input[2])
            input_3_proto = self.model.get_initializer(concat_node.input[3])
            if input_2_proto is None or input_3_proto is None:
                return

            input_2 = numpy_helper.to_array(input_2_proto)
            input_3 = numpy_helper.to_array(input_3_proto)
            if len(input_2) != 1 or len(input_3) != 1:
                return

            if not (input_2[0] > 0 and input_3[0] > 0):
                return

            shape.extend(input_2)
            shape.extend(input_3)
            gemm_path = self.model.match_parent_path(reshape_node,
                                                     ['Mul', 'Add', 'MatMul'],
                                                     [0, 0, 1],
                                                     output_name_to_node)
            if gemm_path is None:
                return

            top_matmul = gemm_path[-1]
            root_input = top_matmul.input[0]
            if shape_0.input[0] != root_input or shape_1.input[0] != root_input:
                return

            self.replace_reshape_node(shape, reshape_node, concat_node)
Exemplo n.º 41
0
    def resize_model(self):
        graph = self.model.graph
        initializers = graph.initializer

        for input in graph.input:
            if (input.type.tensor_type.shape.dim[1].dim_value ==
                    old_parameters["seq_len"]):
                print("input", input.name, input.type.tensor_type.shape)
                input.type.tensor_type.shape.dim[1].dim_value = new_parameters[
                    "seq_len"]
                print("=>", input.type.tensor_type.shape)

        reshapes = {}
        for initializer in initializers:
            tensor = numpy_helper.to_array(initializer)
            if initializer.data_type == TensorProto.FLOAT:
                dtype = np.float32
            elif initializer.data_type == TensorProto.INT32:
                dtype = np.int32
            elif initializer.data_type == TensorProto.INT64:
                dtype = np.int64
            else:
                print("data type not supported by this tool:", dtype)

            if len(tensor.shape) == 1 and tensor.shape[0] == 1:
                if tensor == old_parameters["num_heads"]:
                    print("initializer type={}".format(initializer.data_type),
                          initializer.name, old_parameters["num_heads"], "=>[",
                          new_parameters["num_heads"], "]")
                    initializer.CopyFrom(
                        numpy_helper.from_array(
                            np.asarray([new_parameters["num_heads"]],
                                       dtype=dtype), initializer.name))
                elif tensor == old_parameters["seq_len"]:
                    print("initializer type={}".format(initializer.data_type),
                          initializer.name, old_parameters["seq_len"], "=>[",
                          new_parameters["seq_len"], "]")
                    initializer.CopyFrom(
                        numpy_helper.from_array(
                            np.asarray([new_parameters["seq_len"]],
                                       dtype=dtype), initializer.name))
                elif tensor == old_parameters["size_per_head"]:
                    print("initializer type={}".format(initializer.data_type),
                          initializer.name, old_parameters["size_per_head"],
                          "=>[", new_parameters["size_per_head"], "]")
                    initializer.CopyFrom(
                        numpy_helper.from_array(
                            np.asarray([new_parameters["size_per_head"]],
                                       dtype=dtype), initializer.name))
                elif tensor == old_parameters["hidden_size"]:
                    print("initializer type={}".format(initializer.data_type),
                          initializer.name, old_parameters["hidden_size"],
                          "=>[", new_parameters["hidden_size"], "]")
                    initializer.CopyFrom(
                        numpy_helper.from_array(
                            np.asarray([new_parameters["hidden_size"]],
                                       dtype=dtype), initializer.name))
                elif tensor == 4 * old_parameters["hidden_size"]:
                    print("initializer type={}".format(initializer.data_type),
                          initializer.name, 4 * old_parameters["hidden_size"],
                          "=>[", 4 * new_parameters["hidden_size"], "]")
                    initializer.CopyFrom(
                        numpy_helper.from_array(
                            np.asarray([4 * new_parameters["hidden_size"]],
                                       dtype=dtype), initializer.name))
                elif tensor == 3 * old_parameters["hidden_size"]:
                    print("initializer type={}".format(initializer.data_type),
                          initializer.name, 3 * old_parameters["hidden_size"],
                          "=>[", 3 * new_parameters["hidden_size"], "]")
                    initializer.CopyFrom(
                        numpy_helper.from_array(
                            np.asarray([3 * new_parameters["hidden_size"]],
                                       dtype=dtype), initializer.name))
            elif len(tensor.shape) == 0:
                if tensor == old_parameters["num_heads"]:
                    print("initializer type={}".format(initializer.data_type),
                          initializer.name, old_parameters["num_heads"], "=>",
                          new_parameters["num_heads"])
                    initializer.CopyFrom(
                        numpy_helper.from_array(
                            np.asarray(new_parameters["num_heads"],
                                       dtype=dtype), initializer.name))
                elif tensor == old_parameters["seq_len"]:
                    print("initializer type={}".format(initializer.data_type),
                          initializer.name, old_parameters["seq_len"], "=>",
                          new_parameters["seq_len"])
                    initializer.CopyFrom(
                        numpy_helper.from_array(
                            np.asarray(new_parameters["seq_len"], dtype=dtype),
                            initializer.name))
                elif tensor == old_parameters["size_per_head"]:
                    print("initializer type={}".format(initializer.data_type),
                          initializer.name, old_parameters["size_per_head"],
                          "=>", new_parameters["size_per_head"])
                    initializer.CopyFrom(
                        numpy_helper.from_array(
                            np.asarray(new_parameters["size_per_head"],
                                       dtype=dtype), initializer.name))
                elif tensor == old_parameters["hidden_size"]:
                    print("initializer type={}".format(initializer.data_type),
                          initializer.name, old_parameters["hidden_size"],
                          "=>", new_parameters["hidden_size"])
                    initializer.CopyFrom(
                        numpy_helper.from_array(
                            np.asarray(new_parameters["hidden_size"],
                                       dtype=dtype), initializer.name))
                elif tensor == 4 * old_parameters["hidden_size"]:
                    print("initializer type={}".format(initializer.data_type),
                          initializer.name, 4 * old_parameters["hidden_size"],
                          "=>", 4 * new_parameters["hidden_size"])
                    initializer.CopyFrom(
                        numpy_helper.from_array(
                            np.asarray(4 * new_parameters["hidden_size"],
                                       dtype=dtype), initializer.name))
                elif tensor == 3 * old_parameters["hidden_size"]:
                    print("initializer type={}".format(initializer.data_type),
                          initializer.name, 3 * old_parameters["hidden_size"],
                          "=>", 3 * new_parameters["hidden_size"])
                    initializer.CopyFrom(
                        numpy_helper.from_array(
                            np.asarray(3 * new_parameters["hidden_size"],
                                       dtype=dtype), initializer.name))
                elif tensor == 1.0 / np.sqrt(old_parameters["size_per_head"]):
                    print("initializer type={}".format(initializer.data_type),
                          initializer.name,
                          1.0 / np.sqrt(old_parameters["size_per_head"]), "=>",
                          1.0 / np.sqrt(new_parameters["size_per_head"]))
                    initializer.CopyFrom(
                        numpy_helper.from_array(
                            np.asarray(
                                1.0 / np.sqrt(new_parameters["size_per_head"]),
                                dtype=dtype), initializer.name))
                elif tensor == np.sqrt(old_parameters["size_per_head"]):
                    print("initializer type={}".format(initializer.data_type),
                          initializer.name,
                          np.sqrt(old_parameters["size_per_head"]), "=>",
                          np.sqrt(new_parameters["size_per_head"]))
                    initializer.CopyFrom(
                        numpy_helper.from_array(
                            np.asarray(np.sqrt(
                                new_parameters["size_per_head"]),
                                       dtype=dtype), initializer.name))

            new_shape = []
            shape_changed = False
            for dim in tensor.shape:
                if (dim == old_parameters["hidden_size"]):
                    new_shape.append(new_parameters["hidden_size"])
                    shape_changed = True
                elif (dim == 4 * old_parameters["hidden_size"]):
                    new_shape.append(4 * new_parameters["hidden_size"])
                    shape_changed = True
                elif (dim == 3 * old_parameters["hidden_size"]):
                    new_shape.append(3 * new_parameters["hidden_size"])
                    shape_changed = True
                elif (dim in old_parameters["word_dict_size"]):
                    new_shape.append(new_parameters["word_dict_size"])
                    shape_changed = True
                elif (dim == old_parameters["max_word_position"]):
                    new_shape.append(new_parameters["max_word_position"])
                    shape_changed = True
                else:
                    new_shape.append(dim)
            if shape_changed:
                reshapes[initializer.name] = new_shape
                print("initializer", initializer.name, tensor.shape, "=>",
                      new_shape)

        for initializer_name in reshapes:
            self.replace_input_of_all_nodes(initializer_name,
                                            initializer_name + '_resize')
            tensor = self.resize_weight(initializer_name,
                                        reshapes[initializer_name])
            self.model.graph.initializer.extend([tensor])

        # Add node name, replace split node attribute.
        nodes_to_add = []
        nodes_to_remove = []
        for i, node in enumerate(graph.node):
            if node.op_type == "Split":
                nodes_to_add.append(
                    onnx.helper.make_node('Split',
                                          node.input,
                                          node.output,
                                          name="Split_{}".format(i),
                                          axis=2,
                                          split=[
                                              new_parameters["hidden_size"],
                                              new_parameters["hidden_size"],
                                              new_parameters["hidden_size"]
                                          ]))
                nodes_to_remove.append(node)
                print("update split", [
                    new_parameters["hidden_size"],
                    new_parameters["hidden_size"],
                    new_parameters["hidden_size"]
                ])
            if node.op_type == "Constant":
                for att in node.attribute:
                    if att.name == 'value':
                        if numpy_helper.to_array(
                                att.t) == old_parameters["num_heads"]:
                            nodes_to_add.append(
                                onnx.helper.make_node(
                                    'Constant',
                                    inputs=node.input,
                                    outputs=node.output,
                                    value=onnx.helper.make_tensor(
                                        name=att.t.name,
                                        data_type=TensorProto.INT64,
                                        dims=[],
                                        vals=[new_parameters["num_heads"]])))
                            print("constant", att.t.name,
                                  old_parameters["num_heads"], "=>",
                                  new_parameters["num_heads"])
                        if numpy_helper.to_array(att.t) == np.sqrt(
                                old_parameters["size_per_head"]):
                            nodes_to_add.append(
                                onnx.helper.make_node(
                                    'Constant',
                                    inputs=node.input,
                                    outputs=node.output,
                                    value=onnx.helper.make_tensor(
                                        name=att.t.name,
                                        data_type=TensorProto.FLOAT,
                                        dims=[],
                                        vals=[
                                            np.sqrt(
                                                new_parameters["size_per_head"]
                                            )
                                        ])))
                            print("constant", att.t.name,
                                  np.sqrt(old_parameters["size_per_head"]),
                                  "=>",
                                  np.sqrt(new_parameters["size_per_head"]))
            else:
                node.name = node.op_type + "_" + str(i)
        for node in nodes_to_remove:
            graph.node.remove(node)
        graph.node.extend(nodes_to_add)

        for i, input in enumerate(self.model.graph.input):
            if i > 0:
                dim_proto = input.type.tensor_type.shape.dim[2]
                dim_proto.dim_value = new_parameters["num_heads"]
                dim_proto = input.type.tensor_type.shape.dim[4]
                dim_proto.dim_value = new_parameters["size_per_head"]

        for i, output in enumerate(self.model.graph.output):
            if i == 0:
                dim_proto = output.type.tensor_type.shape.dim[2]
                dim_proto.dim_value = new_parameters["hidden_size"]
            if i > 0:
                dim_proto = output.type.tensor_type.shape.dim[2]
                dim_proto.dim_value = new_parameters["num_heads"]
                dim_proto = output.type.tensor_type.shape.dim[4]
                dim_proto.dim_value = new_parameters["size_per_head"]
Exemplo n.º 42
0
    def fuse(self, normalize_node, input_name_to_nodes, output_name_to_node):
        past = None
        present = None
        return_indice = []
        qkv_nodes = self.model.match_parent_path(
            normalize_node,
            ['Add', 'Reshape', 'Gemm', 'Reshape', 'Reshape', 'Transpose', 'MatMul'],
            [0,      None,      0,     0,          0,         0,           0],
            output_name_to_node=output_name_to_node,
            return_indice=return_indice
            ) # yapf: disable
        if qkv_nodes is None:
            return
        (add_qkv, reshape_qkv, gemm_qkv, reshape_1, reshape_2, transpose_qkv,
         matmul_qkv) = qkv_nodes

        another_input = add_qkv.input[1 - return_indice[0]]

        v_nodes = self.model.match_parent_path(
            matmul_qkv, ['Concat', 'Transpose', 'Reshape', 'Split'],
            [1, 1, 0, 0])
        if v_nodes is None:
            logger.debug("fuse_attention: failed to match v path")
            return
        (concat_v, transpose_v, reshape_v, split_fc) = v_nodes

        fc_nodes = self.model.match_parent_path(
            split_fc, ['Reshape', 'Gemm', 'Reshape', 'LayerNormalization'],
            [0, 0, 0, 0], output_name_to_node)
        if fc_nodes is None:
            fc_nodes = self.model.match_parent_path(
                split_fc, ['Add', 'MatMul', 'LayerNormalization'],
                [0, None, 0], output_name_to_node)
            if fc_nodes is None:
                logger.debug("fuse_attention: failed to match fc path")
                return
            fc_weight = fc_nodes[1].input[1]
            i, _ = self.model.get_constant_input(fc_nodes[0])
            fc_bias = fc_nodes[0].input[i]
        else:
            fc_weight = fc_nodes[1].input[1]
            fc_bias = fc_nodes[1].input[2]

        layernorm_before_attention = fc_nodes[-1]

        if not another_input in layernorm_before_attention.input:
            logger.debug(
                "Add and LayerNormalization shall have one same input")
            return

        is_unidirectional = True
        slice_mask = None
        input_mask_nodes = None
        concat_k_to_match = None
        qk_nodes = self.model.match_parent_path(
            matmul_qkv, ['Softmax', 'Sub', 'Mul', 'Div', 'MatMul'],
            [0, 0, 0, 0, 0])
        if qk_nodes is not None:
            (softmax_qk, sub_qk, mul_qk, div_qk, matmul_qk) = qk_nodes
            mask_nodes = self.model.match_parent_path(
                sub_qk,
                ['Mul', 'Sub', 'Slice', 'Slice', 'Unsqueeze', 'Sub', 'Squeeze', 'Slice', 'Shape', 'Div'],
                [1,      0,     1,       0,       1,           0,     0,         0,       0,       0])  # yapf: disable
            if mask_nodes is None:
                logger.debug(
                    "fuse_attention: failed to match unidirectional mask path")
                return
            div_mask = mask_nodes[-1]
            slice_mask = mask_nodes[3]

            if div_qk != div_mask:
                logger.debug("fuse_attention: skip since div_qk != div_mask")
                return
        else:
            # New pattern for gpt2 from PyTorch 1.5.0 and Transformers 2.9.0.
            i, qk_nodes, _ = self.model.match_parent_paths(
                matmul_qkv,
                [(['Softmax', 'Where', 'Div', 'MatMul'], [0, 0, 1, 0]),
                 (['Softmax', 'Add', 'Where', 'Div', 'MatMul'
                   ], [0, 0, None, 1, 0])], output_name_to_node)
            if qk_nodes is None:
                logger.debug("fuse_attention: failed to match qk nodes")
                return

            where_qk = qk_nodes[-3]
            div_qk = qk_nodes[-2]
            matmul_qk = qk_nodes[-1]

            if i == 1:
                add_qk = qk_nodes[1]
                _, input_mask_nodes, _ = self.model.match_parent_paths(
                    add_qk,
                    [
                        (['Mul', 'Sub', 'Cast', 'Unsqueeze', 'Unsqueeze', 'Reshape'], [None, 0, 1, 0, 0, 0]),
                        (['Mul', 'Sub', 'Unsqueeze', 'Unsqueeze', 'Reshape'], [None, 0, 1, 0, 0]),
                        (['Mul', 'Sub', 'Unsqueeze', 'Unsqueeze'], [None, 0, 1, 0]),  # useless cast and reshape are removed.
                    ],
                    output_name_to_node)  # yapf: disable
                if input_mask_nodes is None:
                    logger.debug(
                        "fuse_attention: failed to match input attention mask path"
                    )
                    return

            mask_nodes = self.model.match_parent_path(
                where_qk,
                ['Cast', 'Slice', 'Slice', 'Unsqueeze', 'Sub', 'Squeeze', 'Slice', 'Shape'],
                [ 0,     0,       0,       1,           0,     0,         0,       0],
                output_name_to_node)  # yapf: disable
            if mask_nodes is None:
                # TODO: match mask path for GPT2LMHeadModel_BeamSearchStep.
                logger.debug("fuse_attention: failed to match mask path")
                return

            slice_mask = mask_nodes[2]

            div_or_concat = self.model.get_parent(mask_nodes[-1], 0,
                                                  output_name_to_node)
            if div_or_concat.op_type == "Div":
                div_mask = div_or_concat
                if div_qk != div_mask:
                    logger.debug(
                        "fuse_attention: skip since div_qk != div_mask")
                    return
            elif div_or_concat.op_type == "Concat":
                concat_k_to_match = div_or_concat
            else:
                logger.debug("fuse_attention: failed to match mask path")

        # Validate that the mask data is either lower triangular (unidirectional) or all ones
        mask_data = numpy_helper.to_array(
            self.model.get_initializer(slice_mask.input[0]))
        if not (len(mask_data.shape) == 4 and mask_data.shape[:2] == (1, 1)
                and mask_data.shape[2] == mask_data.shape[3]):
            logger.debug(
                "fuse_attention: skip since mask shape is not 1x1xWxW")
            return
        if np.allclose(mask_data, np.ones_like(mask_data)):
            is_unidirectional = False
        elif not np.allclose(mask_data, np.tril(np.ones_like(mask_data))):
            logger.debug(
                "fuse_attention: skip since mask is neither lower triangular nor ones"
            )
            return

        q_nodes = self.model.match_parent_path(
            matmul_qk, ['Transpose', 'Reshape', 'Split'], [0, 0, 0])
        if q_nodes is None:
            logger.debug("fuse_attention: failed to match q path")
            return
        (transpose_q, reshape_q, split_q) = q_nodes
        if split_fc != split_q:
            logger.debug("fuse_attention: skip since split_fc != split_q")
            return

        k_nodes = self.model.match_parent_path(
            matmul_qk, ['Concat', 'Transpose', 'Reshape', 'Split'],
            [1, 1, 0, 0])
        if k_nodes is None:
            # This pattern is from pytorch 1.7.1 and transformers 4.6.1
            k_nodes = self.model.match_parent_path(
                matmul_qk,
                ['Transpose', 'Concat', 'Transpose', 'Reshape', 'Split'],
                [1, 0, 1, 0, 0])
            if k_nodes is None:
                logger.debug("fuse_attention: failed to match k path")
                return
            else:
                (_, concat_k, transpose_k, reshape_k, split_k) = k_nodes
        else:
            (concat_k, transpose_k, reshape_k, split_k) = k_nodes
        if split_fc != split_k:
            logger.debug("fuse_attention: skip since split_fc != split_k")
            return

        if concat_k_to_match and concat_k != concat_k_to_match:
            logger.debug(
                "fuse_attention: skip since concat_k != concat_k_to_match")
            return

        attention_mask_input_name = ''
        if input_mask_nodes is not None:
            input_name = input_mask_nodes[-1].input[0]
            attention_mask_input_name = self.cast_attention_mask(input_name)

        # Match past and present paths
        past = self.match_past_pattern_1(concat_k, concat_v, output_name_to_node) or \
               self.match_past_pattern_2(concat_k, concat_v, output_name_to_node)
        if past is None:
            logger.info("fuse_attention: failed to match past path")
            return
        if not self.model.find_graph_input(past):
            logger.debug("past is not graph input.")
            # For GPT2LMHeadModel_BeamSearchStep, there is an extra Gather node to select beam index so it is not graph input.

        present = self.match_present(concat_v, input_name_to_nodes)
        if present is None:
            logger.info("fuse_attention: failed to match present path")
            return
        if not self.model.find_graph_output(present):
            logger.info("expect present to be graph output")
            return

        self.create_attention_node(fc_weight, fc_bias, gemm_qkv, past, present,
                                   layernorm_before_attention.output[0],
                                   reshape_qkv.output[0],
                                   attention_mask_input_name,
                                   is_unidirectional)

        # we rely on prune_graph() to clean old subgraph nodes:
        # qk_nodes + q_nodes + k_nodes + v_nodes + mask_nodes + [reshape_qkv, transpose_qkv, matmul_qkv]
        self.prune_graph = True
Exemplo n.º 43
0
    def process_embedding(self):
        """
        Automatically detect word, segment and position embeddings.
        """
        logger.info("start processing embedding layer...")
        output_name_to_node = self.output_name_to_node()

        layer_norm_nodes = self.get_nodes_by_op_type("LayerNormalization")
        for layer_norm_node in layer_norm_nodes:
            pos_embed_path = self.match_parent_path(
                layer_norm_node, ['Add', 'Reshape', 'Slice'], [0, 1, 0],
                output_name_to_node)
            if pos_embed_path is None:
                continue

            add_node, reshape_node, slice_node = pos_embed_path
            initializer = self.get_initializer(slice_node.input[0])
            if initializer is None:
                continue

            temp = numpy_helper.to_array(initializer)
            if len(temp.shape) == 2:
                logger.info(
                    "Found position embedding. name:{}, shape:{}".format(
                        initializer.name, temp.shape))
                position_embedding = initializer.name
            else:
                logger.info(
                    "Failed to find position embedding. name:{}, shape:{}".
                    format(initializer.name, temp.shape))
                return

            first_parent = self.get_parent(add_node, 0, output_name_to_node)
            if first_parent is not None and first_parent.op_type == "Add":
                embeddings = self.get_2d_initializers_from_parent_subgraphs(
                    first_parent)
                if len(embeddings) != 2:
                    logger.warning(
                        "Failed to find two embeddings (word and segment) from Add node. Found {}"
                        .format(embeddings))
                    return

                word_embedding = None
                segment_embedding = None
                for name, shape in embeddings.items():
                    if shape[0] == 2:
                        segment_embedding = name
                        logger.info(
                            "Found segment embedding. name:{}, shape:{}".
                            format(name, shape))
                    else:
                        word_embedding = name
                        logger.info(
                            "Found words embedding. name:{}, shape:{}".format(
                                name, shape))

                if word_embedding is None or segment_embedding is None:
                    logger.info(
                        "Failed to find both word and segment embedding")
                    return

                logger.info("Create Embedding node")
                self.create_embedding_subgraph(layer_norm_node, word_embedding,
                                               segment_embedding,
                                               position_embedding)
                # Prune graph to remove those original embedding nodes.
                self.prune_graph()
                break
Exemplo n.º 44
0
def read_pb(filename):
    read_tensor = onnx.TensorProto()
    with open(filename, 'rb') as f:
        read_tensor.ParseFromString(f.read())
    return numpy_helper.to_array(read_tensor)