Пример #1
0
def fuse_linear_ops(graph: Graph):
    """
    This function makes fusing of linear operations (Mul,Add) to Convolution/FC.
    """
    fuse_count = 0

    # Fusion in backward direction
    nodes = graph.pseudo_topological_sort()
    for node in nodes:
        is_fused = False

        # Fuse Mul to Convolution/FC
        if node.soft_get('op') == 'Mul' and get_value_in_port(
                node) is not None and node.has_and_set('can_be_fused'):
            fuse_nodes = backward_bfs(
                node, [], ['Convolution', 'Deconvolution', 'MatMul'])
            is_fused = _fuse_mul(graph, node, fuse_nodes)

        fuse_count += is_fused

    # Fusion in forward direction
    nodes = graph.pseudo_topological_sort(reverse=True)
    for node in nodes:
        is_fused = False

        # Fuse Mul to Convolution/FC
        if node.soft_get('op') == 'Mul' and get_value_in_port(
                node) is not None and node.has_and_set('can_be_fused'):
            fuse_nodes = forward_bfs(
                node, [], ['Convolution', 'Deconvolution', 'MatMul'])
            is_fused = _fuse_mul(graph, node, fuse_nodes, False)

        fuse_count += is_fused

    log.debug("Fused {} nodes".format(fuse_count))
Пример #2
0
    def replace_pattern(self, graph: Graph, match: Dict[str, Node]):
        quantize = match['quantize']
        preop = match['preop']

        tensor_port, value_port = get_tensor_in_port(preop), get_value_in_port(
            preop)

        if value_port is None or value_port.data.get_value() is None:
            log.debug(
                'MulQuantizeFuse: cannot fuse because Mul op has dynamic inputs'
            )
            return

        mul_val = value_port.data.get_value()
        if np.any(mul_val <= 0):
            return

        # Direct modifications to quantize 1-st and 2-nd port inputs are performed.
        # So the data nodes at those inputs shouldn't have more than 1 consumer maximum 2 consumers to the same
        # quantize op (consumed by 1st and 2nd ports). So we duplicate FakeQuantize in_port 1, 2 data if needed
        resolve_shared_inputs(node=quantize, port_ids_to_duplicate=[1, 2])

        # TODO: need some special processing for values that exactly equal to threshold

        quantize.in_port(1).data.set_value(
            quantize.in_port(1).data.get_value() / mul_val)
        if quantize.in_node(1).id != quantize.in_node(2).id:
            quantize.in_port(2).data.set_value(
                quantize.in_port(2).data.get_value() / mul_val)

        # Reconnect Mul as it no longer needed for current FakeQuantize
        in_mul_connection = quantize.in_port(0).get_source().node.in_port(
            0).get_connection()
        quantize.in_port(0).disconnect()
        in_mul_connection.add_destination(quantize.in_port(0))
Пример #3
0
    def replace_pattern(self, graph: Graph, match: Dict[str, Node]):
        quantize = match['quantize']
        preop = match['preop']

        for i in [0, 1]:
            if preop.in_port(i).get_source().node.soft_get('type') in [
                    'Convolution', 'Deconvolution', 'MatMul'
            ]:
                return

        tensor_port, value_port = get_tensor_in_port(preop), get_value_in_port(
            preop)

        if value_port is None or value_port.data.get_value() is None:
            log.debug(
                'AddQuantizeFuse: cannot fuse because Add op has dynamic inputs'
            )
            return

        # Direct modifications to quantize 1-st and 2-nd port inputs are performed.
        # So the data nodes at those inputs shouldn't have more than 1 consumer maximum 2 consumers to the same
        # quantize op (consumed by 1st and 2nd ports). So we duplicate FakeQuantize in_port 1, 2, 3, 4 data
        resolve_shared_inputs(node=quantize, port_ids_to_duplicate=[1, 2])

        quantize.in_port(1).data.set_value(
            quantize.in_port(1).data.get_value() - value_port.data.get_value())
        if quantize.in_node(1).id != quantize.in_node(2).id:
            quantize.in_port(2).data.set_value(
                quantize.in_port(2).data.get_value() -
                value_port.data.get_value())

        in_add_connection = quantize.in_port(0).get_source().node.in_port(
            0).get_connection()
        quantize.in_port(0).disconnect()
        in_add_connection.add_destination(quantize.in_port(0))
Пример #4
0
    def mark_eltwise_node(self, node, feature_channel=None):
        tensor_port, value_port = get_tensor_in_port(node), get_value_in_port(
            node)
        if tensor_port is None or value_port is None:
            self.set_flags_to_false(node,
                                    ['can_be_fused', 'can_be_scaleshift'])
            return

        connected_in_ports = {
            idx: port
            for idx, port in node.in_ports().items()
            if not port.disconnected()
        }
        if len(connected_in_ports) != 2:
            return

        tensor_shape = tensor_port.data.get_shape()
        out_shape = node.out_port(0).data.get_shape()
        assert tensor_shape is not None and out_shape is not None
        if not np.array_equal(tensor_shape, out_shape):
            # ScaleShift operation doesn't support broadcasting
            self.set_flags_to_false(node,
                                    ['can_be_fused', 'can_be_scaleshift'])
            return

        value_shape = value_port.data.get_shape()
        assert value_shape is not None
        assert len(value_shape) <= len(tensor_shape), \
            "No broadcasting was done for elementwise node {} due to previous checks in EltwiseChecker class. " \
            "But constant input rank is larger than tensor input rank, that is inconsistent".format(node.name)

        # if both tensors are 0D they cannot be converted to scaleshift
        if len(tensor_shape) == 0 and len(value_shape) == 0:
            self.set_flags_to_false(node, ['can_be_scaleshift'])
            return

        broadcasted_value_shape = shape_insert(
            value_shape, 0, [1] * (len(tensor_shape) - len(value_shape)))

        feature_dim = min(1, tensor_shape.size -
                          1) if node.graph.graph['layout'] == 'NCHW' else -1
        if feature_channel is not None:
            feature_dim = feature_channel
        ones = np.ones(len(tensor_shape))
        possible_shape = ones.copy()
        np.put(possible_shape, feature_dim, tensor_shape.item(feature_dim))

        if not np.array_equal(broadcasted_value_shape, ones) and \
                not np.array_equal(broadcasted_value_shape, possible_shape):
            # ScaleShift weights should have [1,C,1,1]-like or [1,1,1,1]-like shape
            self.set_flags_to_false(node,
                                    ['can_be_fused', 'can_be_scaleshift'])
            return

        if len(tensor_shape) not in [2, 4, 5]:
            # ScaleShift operation is supported for 2D, 4D or 5D tensor inputs
            self.set_flags_to_false(node, ['can_be_scaleshift'])
            return
Пример #5
0
def fuse_mul_add_sequence(graph: Graph):
    """
    This function finds first valid Mul/Add node and pass it to fuse_linear_sequence where full sequence will be found
    """
    while True:
        is_fused = False
        for node in graph.pseudo_topological_sort():
            if node.id in graph:
                if node.soft_get('op') in ['Mul', 'Add'] and get_value_in_port(node) is not None and \
                        node.soft_get('can_be_fused') is True:
                    is_fused |= _fuse_linear_sequence(graph, node)
        if not is_fused:
            break
Пример #6
0
 def mark_fusable_muls_on_weights(graph):
     for node in graph.get_op_nodes(op='Mul'):
         children = node.out_port(0).get_destinations()
         if len(children) > 1 or children[0].node.soft_get('type') not in [
                 'Convolution', 'Deconvolution', 'MatMul'
         ]:
             continue
         value_in_port = get_value_in_port(node)
         if value_in_port is None:
             continue
         value_shape = value_in_port.data.get_shape()
         non_one_axis = np.argwhere(value_shape != 1)
         if non_one_axis.size != 1:
             continue
         non_one_axis = non_one_axis.item(0)
         node['can_be_fused'] = True
         EltwiseChecker().mark_eltwise_node(node, non_one_axis)
Пример #7
0
def _fuse_linear_sequence(graph: Graph, start_node: Node):
    """
    This function finds the sequence of Mul/Add operations and replaces this sequence with two ops (Mul->Add).
    :param graph:
    :param start_node: The first operation of the sequence
    """
    fnodes = [start_node]
    while True:
        node = fnodes[-1]
        destinations = node.out_port(0).get_destinations()
        if len(destinations) != 1:
            break
        dst_node = destinations[0].node
        if dst_node.soft_get('op') in ['Mul', 'Add'] and get_value_in_port(dst_node) is not None and \
                dst_node.soft_get('can_be_fused') is True:
            fnodes.append(dst_node)
        else:
            break

    if len(fnodes) == 1 or (len(fnodes) == 2 and fnodes[0].op == 'Mul' and fnodes[1].op == 'Add'):
        return False

    input_shape = get_tensor_in_port(start_node).data.get_shape()

    init_dims_cnt = len(input_shape) - 2 if graph.graph['layout'] == 'NCHW' else 1

    first_value = get_value_in_port(fnodes[0]).data.get_value()
    if not isinstance(first_value, np.ndarray):
        first_value = mo_array(first_value)
    first_value_type = first_value.dtype

    mul = np.ones([1 for x in range(init_dims_cnt)], dtype=first_value_type)
    add = np.zeros([1 for x in range(init_dims_cnt)], dtype=first_value_type)

    first_mul_name = None
    first_add_name = None

    for node in fnodes:
        const_port_value = get_value_in_port(node).data.get_value()
        if node.op == 'Mul':
            if first_mul_name is None:
                first_mul_name = node.name
            mul = mul * const_port_value
            add = add * const_port_value
        elif node.op == 'Add':
            if first_add_name is None:
                first_add_name = node.name
            add = add + const_port_value

    # If mul is scalar we broadcast it to biases shape
    if mul.shape != add.shape and len(mul.shape) == 1 and mul.shape[0] == 1:
        mul = mo_array([mul[0] for x in range(add.shape[0])])

    assert (compatible_shapes(get_tensor_in_port(fnodes[0]).data.get_shape(), fnodes[-1].out_port(0).data.get_shape()))

    mul_op = Mul(graph, dict(name='{}/Fused_Mul_'.format(first_mul_name or '')))
    add_op = Add(graph, dict(name='{}/Fused_Add_'.format(first_add_name or '')))

    in_port = get_tensor_in_port(fnodes[0])
    out_port = fnodes[-1].out_port(0)

    """
    Four cases considered below:
        1. Mul and Add have valid values (mul value != 1 and add value != 0)
        2. Only Mul has valid values, so we add only Mul node
        3. Only Add has valid values, so we add only Add node
        4. When Mul and Add has not valid values we just merge two data nodes
    """
    if any([x != 0 for x in np.nditer(add)]) and any([x != 1 for x in np.nditer(mul)]):
        #  Const\    Const\
        #  ----->Mul------>Add-->
        mul_const = Const(graph, dict(name="data_mul_", value=mo_array(mul))).create_node()
        add_const = Const(graph, dict(name="data_add_", value=mo_array(add))).create_node()

        mul_node = mul_op.create_node()
        add_node = add_op.create_node()

        in_port.get_connection().set_destination(mul_node.in_port(0))
        mul_const.out_port(0).connect(mul_node.in_port(1))

        mul_node.out_port(0).connect(add_node.in_port(0))
        add_const.out_port(0).connect(add_node.in_port(1))
        out_port.get_connection().set_source(add_node.out_port(0))
    elif any([x != 1 for x in np.nditer(mul)]):
        #  Const\
        #  ----->Mul-->
        mul_const = Const(graph, dict(name="data_mul_", value=mo_array(mul))).create_node()
        mul_node = mul_op.create_node()

        in_port.get_connection().set_destination(mul_node.in_port(0))
        mul_const.out_port(0).connect(mul_node.in_port(1))
        out_port.get_connection().set_source(mul_node.out_port(0))
    elif any([x != 0 for x in np.nditer(add)]):
        #  Const\
        #  ----->Add-->
        add_const = Const(graph, dict(name="data_add_", value=mo_array(add))).create_node()
        add_node = add_op.create_node()

        in_port.get_connection().set_destination(add_node.in_port(0))
        add_const.out_port(0).connect(add_node.in_port(1))
        out_port.get_connection().set_source(add_node.out_port(0))
    else:
        source_node = in_port.get_source()
        in_port.disconnect()
        out_port.get_connection().set_source(source_node)

    # Remove fused nodes
    for node in fnodes:
        graph.remove_node(node.id)

    log.debug('Fused {} operations'.format(len(fnodes)))
    return True
Пример #8
0
def _fuse_mul(graph: Graph,
              node: Node,
              fuse_nodes: list,
              backward: bool = True):
    """
    This function takes Mul node and array of convolution/fc nodes for further fusion
    Parameters
    ----------
    x : bool
        If backward is False, that means that Convolution/FC goes after Mul node
        else means that Mul goes after Convolutions/FC
        :param backward:
        :param fuse_nodes:
        :param node:
        :param graph:
    """
    is_fused = False
    const_port, tensor_port = get_value_in_port(node), get_tensor_in_port(node)

    if const_port is None or tensor_port is None:
        log.warning(
            'Cannot do fuse_mul for node {} because this node has wrong inputs'
            .format(node.id))
        return False

    for fuse_node in fuse_nodes:
        if fuse_node.soft_get('can_be_fused') is False:
            log.warning(
                'Node {} can\'t be used in fusing because attr can_be_fused = False'
                .format(fuse_node.name))
            return False

        if len(fuse_node.in_ports()) < 2:
            log.warning('Node {} has no weights node'.format(fuse_node.name))
            return False

        if not backward and not fuse_node.has_valid('layout'):
            log.warning('Node {} has no layout attr'.format(fuse_node.name))
            return False

        weights_port = fuse_node.in_port(1)
        if not weights_port.data.has_valid('output_channel_dim') or \
                not weights_port.data.has_valid('input_channel_dim'):
            log.warning(
                'Cannot do fuse_mul for node {} because there is no field ' +
                'output_channel_dim and/or input_channel_dim in weights.'.
                format(fuse_node.soft_get('name')))
            return False

        inp_ch = weights_port.data.get_attr('input_channel_dim')
        out_ch = weights_port.data.get_attr('output_channel_dim')
        if max(inp_ch, out_ch) >= len(weights_port.data.get_shape()):
            log.warning('Node {} has wrong weights shape'.format(
                fuse_node.name))
            return False

    for fuse_node in fuse_nodes:
        weights_port = fuse_node.in_port(1)
        value = mo_array(const_port.data.get_value())

        value = np.squeeze(value)

        # TODO : ch_dim should be equal to node.in_node(1).value.shape
        # We will multiply weights according output/input channel dimension
        ch_dim = weights_port.data.get_attr(
            'output_channel_dim' if backward else 'input_channel_dim')
        shape = mo_array([weights_port.data.get_shape()[ch_dim]])

        # Scalar broadcast
        if value.size == 1:
            value = np.full(shape, value.item(), dtype=value.dtype)

        # Common broadcast for forward fusion
        if not backward:
            cnt = shape[-1] / value.shape[0]
            if fuse_node.layout == 'NCHW':
                tmp = mo_array([], dtype=value.dtype)
                for val in value:
                    tmp = np.concatenate((tmp, np.repeat(val, cnt)))
                value = mo_array(tmp)
            else:
                value = np.tile(value, int(cnt))

        # Expand dims for multiplication (ex. [38] to [38, 1, 1])
        wdims_number = weights_port.data.get_attr('dims_number')
        for x in range(wdims_number - ch_dim - 1):
            shape = np.append(shape, 1)

        mul_val = mo_array(value)
        # If the value fails to reshape to the provided shape, skip fusing.
        # This can happen in case of group != 1 of the convolution.
        try:
            value = np.reshape(value, shape)
        except ValueError:
            log.error(
                "Cannot fuse const from {} to {}. Reshape failed. Skipping.".
                format(node.soft_get('name', node.id),
                       fuse_node.soft_get('name', fuse_node.id)),
                extra={'is_warning': True})
            return False

        # Weights multiplication
        mul_name = node.name + '_copy'
        mul_const = Const(graph, {
            'value': value,
            'name': mul_name + '/const'
        }).create_node()
        w_mul = node.copy_node({
            'name': mul_name,
            'in_ports_count': len(node.in_ports()),
            'out_ports_count': len(node.out_ports()),
            'can_be_fused': False
        })
        w_mul.in_port(const_port.idx).connect(mul_const.out_port(0))
        w_const = weights_port.get_source()
        weights_port.get_connection().set_source(w_mul.out_port(0))
        w_const.connect(w_mul.in_port(tensor_port.idx))

        fuse_node_in_data = fuse_node.in_node(weights_port.idx)
        w_const_out_data = w_const.node.out_node(w_const.idx)

        # During this reconnection new data node name is copied from the data node
        # outgoing from w_const port. Duplicate names of data nodes lead to appearing
        # of duplicate op node names after constant folding. So we should manually
        # set a unique name for the new data node.
        if fuse_node_in_data.soft_get('name') == w_const_out_data.soft_get('name') and \
                fuse_node_in_data.soft_get('name', None) is not None:
            fuse_node.in_node(
                weights_port.idx)['name'] = graph.unique_id(mul_name)

        # If we fuse in backward direction we should multiply biases if they exists
        if backward and len(fuse_node.in_ports()) == 3 and not fuse_node.in_port(2).disconnected() and \
                not fuse_node.has_and_set('shape_input'):
            conv_bias = fuse_node.in_port(2)
            conv_bias.data.set_value(conv_bias.data.get_value() *
                                     np.squeeze(mul_val))

        mul_const.infer(mul_const)
        w_mul.infer(w_mul)

        log.debug('Fused: {} to {}'.format(node.name, fuse_node.name))
        is_fused = True

    if is_fused:
        # Delete Mul node
        producer_port = tensor_port.get_source()
        tensor_port.disconnect()
        const_port.disconnect()
        # as Mul node is added before convolution, output tensor from Convolution node
        # corresponds to original Mul node
        node.out_port(0).get_connection().set_source(producer_port, "dest")

    return is_fused