Пример #1
0
def test_scope_name_workarounds():
    class ModelWithGemms(nn.Module):
        def __init__(self):
            super(ModelWithGemms, self).__init__()
            self.drop1 = nn.Dropout()
            self.fc1 = nn.Linear(100, 50)
            self.relu1 = nn.ReLU(inplace=True)
            self.drop2 = nn.Dropout()
            self.fc2 = nn.Linear(50, 25)
            self.relu2 = nn.ReLU(inplace=True)
            self.fc3 = nn.Linear(25, 1)
            self.drop3 = nn.Dropout()

        def forward(self, x):
            x = self.drop1(x)
            x = self.fc1(x)
            x = self.relu1(x)
            x = self.drop2(x)
            x = self.fc2(x)
            x = self.relu2(x)
            x = self.fc3(x)
            x = self.drop3(x)
            return x

    m = ModelWithGemms()
    dummy_input = distiller.get_dummy_input(input_shape=(1, 100))
    expected_types = ('Gemm', 'Relu', 'Gemm', 'Relu', 'Gemm')

    # We have workarounds for 2 issues:
    #   1. GEMM ops get the scope name of the op that came before them
    #   2. Ops that come before a dropout op get the scope name of the dropout op
    # If both conditions apply, empirically that #2 is the issue that manifests

    # For the model above we expect the ops in the graph to be named (in order):
    #   'fc1', 'relu1', 'fc2', 'relu2', 'fc3'
    # (note that dropout ops are dropped)
    #
    # But without our workarounds in place, we'll get:
    #   'drop1', 'drop2', 'drop2__1', 'relu2', 'drop3'
    #
    # What happens is:
    #   * 'fc1' - issue #1 applies, so 'fc1' --> 'drop1'
    #   * 'relu1' - issue #2 applies, so 'relu1' --> 'drop2'
    #   * 'fc2' - issue #1 applies, so 'fc1' --> 'drop2__1' ('__1' suffix because 'drop2' already exists)
    #   * 'relu2' should be ok as-is
    #   * 'fc3' is susceptible to both issues - it's a GEMM op AND it comes before a dropout. As mentioned above,
    #     issue #2 "wins", so 'fc3' --> 'drop3'

    # We test without the workarounds as a means to see if the issues still exist. New PyTorch versions
    # may fix them, in which case we can remove the workarounds
    sg = SummaryGraph(m, dummy_input, apply_scope_name_workarounds=False)
    names, types = zip(*[(op_name, op['type']) for op_name, op in sg.ops.items()])
    assert names == ('drop1', 'drop2', 'drop2__1', 'relu2', 'drop3')
    assert types == expected_types

    # Now test with the workarounds
    sg = SummaryGraph(m, dummy_input)
    names, types = zip(*[(op_name, op['type']) for op_name, op in sg.ops.items()])
    assert names == ('fc1', 'relu1', 'fc2', 'relu2', 'fc3')
    assert types == expected_types
Пример #2
0
def test_scope_name_workarounds():
    class ModelWithGemms(nn.Module):
        def __init__(self):
            super(ModelWithGemms, self).__init__()
            self.drop1 = nn.Dropout()
            self.fc1 = nn.Linear(100, 50)
            self.relu1 = nn.ReLU(inplace=True)
            self.drop2 = nn.Dropout()
            self.fc2 = nn.Linear(50, 25)
            self.relu2 = nn.ReLU(inplace=True)
            self.fc3 = nn.Linear(25, 1)
            self.drop3 = nn.Dropout()

        def forward(self, x):
            x = self.drop1(x)
            x = self.fc1(x)
            x = self.relu1(x)
            x = self.drop2(x)
            x = self.fc2(x)
            x = self.relu2(x)
            x = self.fc3(x)
            x = self.drop3(x)
            return x

    m = ModelWithGemms()
    dummy_input = distiller.get_dummy_input(input_shape=(1, 100))
    expected_types = ('Gemm', 'Relu', 'Gemm', 'Relu', 'Gemm')

    # We have a workaround for the following issue:
    # (used to be 2 issues but one got fixed in PyTorch 1.2)
    #   * Ops that come before a dropout op get the scope name of the dropout op

    # For the model above we expect the ops in the graph to be named (in order):
    #   'fc1', 'relu1', 'fc2', 'relu2', 'fc3'
    # (note that dropout ops are dropped)
    #
    # But since 'relu1' and 'fc3' come before a dropout op, without the workaround in place we'll get:
    #   'fc1', 'drop2', 'fc2', 'relu2', 'drop3'

    # We test without the workarounds as a means to see if the issues still exist. New PyTorch versions
    # may fix them, in which case we can remove the workarounds
    sg = SummaryGraph(m, dummy_input, apply_scope_name_workarounds=False)
    names, types = zip(*[(op_name, op['type'])
                         for op_name, op in sg.ops.items()])
    assert names == ('fc1', 'drop2', 'fc2', 'relu2', 'drop3')
    assert types == expected_types

    # Now test with the workarounds
    sg = SummaryGraph(m, dummy_input)
    names, types = zip(*[(op_name, op['type'])
                         for op_name, op in sg.ops.items()])
    assert names == ('fc1', 'relu1', 'fc2', 'relu2', 'fc3')
    assert types == expected_types
Пример #3
0
def test_fuse_modules_with_pre_exist_adj_map():
    model = WrappedSequential(DummyA(), DummyB(), DummyD())
    with pytest.raises(ValueError):
        mt.fuse_modules(model,
                        types_sequence,
                        fuse_fn,
                        dummy_input=None,
                        adjacency_map=None)

    dummy_input = torch.randn(10, 10)
    sg = SummaryGraph(deepcopy(model), dummy_input)
    adj_map = sg.adjacency_map()

    fused_dummy_input = mt.fuse_modules(deepcopy(model),
                                        types_sequence,
                                        fuse_fn,
                                        dummy_input=dummy_input,
                                        adjacency_map=None)
    compare_models(fused_dummy_input, fused_reference())

    fused_pre_sg = mt.fuse_modules(deepcopy(model),
                                   types_sequence,
                                   fuse_fn,
                                   dummy_input=None,
                                   adjacency_map=adj_map)
    compare_models(fused_pre_sg, fused_reference())

    fused_both = mt.fuse_modules(deepcopy(model),
                                 types_sequence,
                                 fuse_fn,
                                 dummy_input=dummy_input,
                                 adjacency_map=adj_map)
    compare_models(fused_both, fused_reference())
Пример #4
0
def test_gemm_nodes_scope_names():
    class ModelWithGemms(nn.Module):
        def __init__(self):
            super(ModelWithGemms, self).__init__()
            self.drop1 = nn.Dropout()
            self.fc1 = nn.Linear(100, 50)
            self.relu1 = nn.ReLU(inplace=True)
            self.drop2 = nn.Dropout()
            self.fc2 = nn.Linear(50, 25)
            self.relu2 = nn.ReLU(inplace=True)
            self.fc3 = nn.Linear(25, 1)

        def forward(self, x):
            # Isn't this pretty...
            return self.fc3(self.relu2(self.fc2(self.drop2(self.relu1(self.fc1(self.drop1(x)))))))

    m = ModelWithGemms()
    sg = SummaryGraph(m, distiller.get_dummy_input(input_shape=(1, 100)))

    # For the model above we expect the ops to be named (in order):
    #   'drop1', 'fc1', 'relu1', 'drop2', 'fc2', 'relu2', 'fc3'
    # But without our workaround in place, they'll be named:
    #   'drop1', 'drop1__1', 'relu1', 'drop2', 'drop2__1', 'relu2', 'relu2__1'
    # (that is - each FC node gets the name of the node before)
    names, types = zip(*[(op_name, op['type']) for op_name, op in sg.ops.items()])
    assert names == ('drop1', 'fc1', 'relu1', 'drop2', 'fc2', 'relu2', 'fc3')
    assert types == ('Dropout', 'Gemm', 'Relu', 'Dropout', 'Gemm', 'Relu', 'Gemm')
Пример #5
0
def create_graph(dataset, arch):
    dummy_input = get_input(dataset)
    assert dummy_input is not None, "Unsupported dataset ({}) - aborting draw operation".format(dataset)

    model = create_model(False, dataset, arch, parallel=False)
    assert model is not None
    return SummaryGraph(model, dummy_input)
Пример #6
0
def named_params_layers_test_aux(dataset, arch, dataparallel:bool):
    model = create_model(False, dataset, arch, parallel=dataparallel)
    sgraph = SummaryGraph(model, get_input(dataset))
    sgraph_layer_names = set(k for k, i, j in sgraph.named_params_layers())
    for layer_name in sgraph_layer_names:
        assert (sgraph.find_op(layer_name) is not None,
            '{} was not found in summary graph'.format(layer_name))
Пример #7
0
def test_named_params_layers(dataset, arch, parallel):
    model = create_model(False, dataset, arch, parallel=parallel)
    sgraph = SummaryGraph(model, distiller.get_dummy_input(dataset))
    sgraph_layer_names = set(k for k, i, j in sgraph.named_params_layers())
    for layer_name in sgraph_layer_names:
        assert sgraph.find_op(
            layer_name
        ) is not None, '{} was not found in summary graph'.format(layer_name)
Пример #8
0
def test_weights_size_attr(dataset, arch, parallel):
    model = create_model(False, dataset, arch, parallel=parallel)
    sgraph = SummaryGraph(model, distiller.get_dummy_input(dataset))

    distiller.assign_layer_fq_names(model)
    for name, mod in model.named_modules():
        if isinstance(mod, nn.Conv2d) or isinstance(mod, nn.Linear):
            op = sgraph.find_op(name)
            assert op is not None
            assert op['attrs']['weights_vol'] == distiller.volume(mod.weight)
Пример #9
0
def test_merge_pad_avgpool():
    class ModelWithAvgPool(nn.Module):
        def __init__(self):
            super(ModelWithAvgPool, self).__init__()
            self.conv = nn.Conv2d(3, 10, 5)
            self.avgpool = nn.AvgPool2d(2)

        def forward(self, input):
            return self.avgpool(self.conv(input))

    m = ModelWithAvgPool()
    sg = SummaryGraph(m, distiller.get_dummy_input(input_shape=(1, 3, 50, 50)))

    avgpool_ops = [op_name for op_name in sg.ops if 'avgpool' in op_name]
    assert len(avgpool_ops) == 1
    assert sg.ops[avgpool_ops[0]]['name'] == 'avgpool'
    assert sg.ops[avgpool_ops[0]]['type'] == 'AveragePool'
Пример #10
0
def collect_conv_details(model,
                         dataset,
                         perform_thinning,
                         layers_to_prune=None):
    dummy_input = distiller.get_dummy_input(dataset)
    g = SummaryGraph(model, dummy_input)
    conv_layers = OrderedDict()
    total_macs = 0
    total_params = 0
    for id, (name, m) in enumerate(model.named_modules()):
        if isinstance(m, torch.nn.Conv2d):
            conv = SimpleNamespace()
            conv.t = len(conv_layers)
            conv.k = m.kernel_size[0]
            conv.stride = m.stride

            # Use the SummaryGraph to obtain some other details of the models
            conv_op = g.find_op(normalize_module_name(name))
            assert conv_op is not None

            conv.weights_vol = conv_op['attrs']['weights_vol']
            total_params += conv.weights_vol
            conv.macs = conv_op['attrs']['MACs']
            conv_pname = name + ".weight"
            conv_p = distiller.model_find_param(model, conv_pname)
            if not perform_thinning:
                #conv.macs *= distiller.density_ch(conv_p)  # Channel pruning
                conv.macs *= distiller.density_3D(conv_p)  # Filter pruning
            total_macs += conv.macs

            conv.ofm_h = g.param_shape(conv_op['outputs'][0])[2]
            conv.ofm_w = g.param_shape(conv_op['outputs'][0])[3]
            conv.ifm_h = g.param_shape(conv_op['inputs'][0])[2]
            conv.ifm_w = g.param_shape(conv_op['inputs'][0])[3]

            conv.name = name
            conv.id = id
            if layers_to_prune is None or name in layers_to_prune:
                conv_layers[len(conv_layers)] = conv
    return conv_layers, total_macs, total_params
Пример #11
0
def create_graph(dataset, arch, parallel=False):
    dummy_input = distiller.get_dummy_input(dataset)
    model = create_model(False, dataset, arch, parallel)
    assert model is not None
    return SummaryGraph(model, dummy_input)
Пример #12
0
def test_adjacency_map(parallel, dedicated_modules):
    class TestModel(nn.Module):
        def __init__(self):
            super(TestModel, self).__init__()
            self.conv = nn.Conv2d(3, 10, 5)
            self.bn = nn.BatchNorm2d(10)
            self.post_conv_bn = nn.ModuleList([nn.Tanh(), nn.ReLU()])

        def forward(self, x):
            res = self.conv(x)
            y = self.bn(res)
            for m in self.post_conv_bn:
                y = m(y)
            return y + res

    def check_adj_entry(actual, expected):
        assert actual.op_meta == expected.op_meta
        assert actual.predecessors == expected.predecessors
        assert actual.successors == expected.successors

    prefix = 'module.' if parallel else ''

    m = TestModel()
    if parallel:
        m = nn.DataParallel(m)
    sg = SummaryGraph(m, distiller.get_dummy_input(input_shape=(1, 3, 10, 10)))
    adj_map = sg.adjacency_map(dedicated_modules_only=dedicated_modules)

    if dedicated_modules:
        assert len(adj_map) == 4
    else:
        assert len(adj_map) == 5

    conv_op_meta = OpSimpleMetadata(prefix + 'conv', 'Conv')
    bn_op_meta = OpSimpleMetadata(prefix + 'bn', 'BatchNormalization')
    tanh_op_meta = OpSimpleMetadata(prefix + 'post_conv_bn.0', 'Tanh')
    relu_op_meta = OpSimpleMetadata(prefix + 'post_conv_bn.1', 'Relu')
    add_op_meta = OpSimpleMetadata('top_level_op', 'Add')

    name = conv_op_meta.name
    assert name in adj_map
    expected = AdjacentsEntry(conv_op_meta)
    expected.successors = [bn_op_meta] if dedicated_modules else [
        bn_op_meta, add_op_meta
    ]
    check_adj_entry(adj_map[name], expected)

    name = bn_op_meta.name
    assert name in adj_map
    expected = AdjacentsEntry(bn_op_meta)
    expected.predecessors = [conv_op_meta]
    expected.successors = [tanh_op_meta]
    check_adj_entry(adj_map[name], expected)

    name = tanh_op_meta.name
    assert name in adj_map
    expected = AdjacentsEntry(tanh_op_meta)
    expected.predecessors = [bn_op_meta]
    expected.successors = [relu_op_meta]
    check_adj_entry(adj_map[name], expected)

    name = relu_op_meta.name
    assert name in adj_map
    expected = AdjacentsEntry(relu_op_meta)
    expected.predecessors = [tanh_op_meta]
    expected.successors = [] if dedicated_modules else [add_op_meta]
    check_adj_entry(adj_map[name], expected)

    name = add_op_meta.name
    if dedicated_modules:
        assert name not in adj_map
    else:
        assert name in adj_map
        expected = AdjacentsEntry(add_op_meta)
        expected.predecessors = [relu_op_meta, conv_op_meta]
        check_adj_entry(adj_map[name], expected)
Пример #13
0
def get_network_details(model, dataset, dependency_type, layers_to_prune=None):
    def make_conv(model, conv_module, g, name, seq_id, layer_id):
        conv = SimpleNamespace()
        conv.name = name
        conv.id = layer_id
        conv.t = seq_id
        conv.k = conv_module.kernel_size[0]
        conv.stride = conv_module.stride

        # Use the SummaryGraph to obtain some other details of the models
        conv_op = g.find_op(normalize_module_name(name))
        assert conv_op is not None

        conv.weights_vol = conv_op['attrs']['weights_vol']
        conv.macs = conv_op['attrs']['MACs']
        conv.n_ofm = conv_op['attrs']['n_ofm']
        conv.n_ifm = conv_op['attrs']['n_ifm']
        conv_pname = name + ".weight"
        conv_p = distiller.model_find_param(model, conv_pname)
        conv.ofm_h = g.param_shape(conv_op['outputs'][0])[2]
        conv.ofm_w = g.param_shape(conv_op['outputs'][0])[3]
        conv.ifm_h = g.param_shape(conv_op['inputs'][0])[2]
        conv.ifm_w = g.param_shape(conv_op['inputs'][0])[3]
        return conv

    def make_fc(model, fc_module, g, name, seq_id, layer_id):
        fc = SimpleNamespace()
        fc.name = name
        fc.id = layer_id
        fc.t = seq_id

        # Use the SummaryGraph to obtain some other details of the models
        fc_op = g.find_op(normalize_module_name(name))
        assert fc_op is not None

        fc.weights_vol = fc_op['attrs']['weights_vol']
        fc.macs = fc_op['attrs']['MACs']
        fc.n_ofm = fc_op['attrs']['n_ofm']
        fc.n_ifm = fc_op['attrs']['n_ifm']
        fc_pname = name + ".weight"
        fc_p = distiller.model_find_param(model, fc_pname)
        return fc

    dummy_input = distiller.get_dummy_input(dataset)
    g = SummaryGraph(model, dummy_input)
    all_layers = OrderedDict()
    pruned_indices = list()
    dependent_layers = set()
    total_macs = 0
    total_params = 0
    layers = OrderedDict({mod_name: m for mod_name, m in model.named_modules() 
                          if isinstance(m, (torch.nn.Conv2d, torch.nn.Linear))})
    for layer_id, (name, m) in enumerate(layers.items()):
        if isinstance(m, torch.nn.Conv2d):
            conv = make_conv(model, m, g, name, seq_id=len(pruned_indices), layer_id=layer_id)
            all_layers[layer_id] = conv
            total_params += conv.weights_vol
            total_macs += conv.macs

            if layers_to_prune is None or name in layers_to_prune:
                pruned_indices.append(layer_id)
                # Find the data-dependent layers of this convolution
                from utils.data_dependencies import find_dependencies
                conv.dependencies = list()
                find_dependencies(dependency_type, g, all_layers, name, conv.dependencies)
                dependent_layers.add(tuple(conv.dependencies))
        elif isinstance(m, torch.nn.Linear):
            fc = make_fc(model, m, g, name, seq_id=len(pruned_indices), layer_id=layer_id)
            all_layers[layer_id] = fc
            total_macs += fc.macs
            total_params += fc.weights_vol
 
    def convert_layer_names_to_indices(layer_names):
        """Args:
            layer_names - list of layer names
           Returns:
            list of layer indices
        """
        layer_indices = [index for name in layer_names for index, 
                            layer in all_layers.items() if layer.name == name[0]]
        return layer_indices

    dependent_indices = convert_layer_names_to_indices(dependent_layers)
    return all_layers, pruned_indices, dependent_indices, total_macs, total_params
Пример #14
0
def layers_topological_order(model, dummy_input, recurrent=False):
    """
    Prepares an ordered list of layers to quantize sequentially. This list has all the layers ordered by their
    topological order in the graph.
    Args:
        model (nn.Module): the model to quantize.
        dummy_input (torch.Tensor): an input to be passed through the model.
        recurrent (bool): indication on whether the model might have recurrent connections.
    """
    class _OpRank:
        def __init__(self, adj_entry, rank=None):
            self.adj_entry = adj_entry
            self._rank = rank or 0

        @property
        def rank(self):
            return self._rank

        @rank.setter
        def rank(self, val):
            self._rank = max(val, self._rank)

        def __repr__(self):
            return '_OpRank(\'%s\' | %d)' % (self.adj_entry.op_meta.name,
                                             self.rank)

    adj_map = SummaryGraph(model, dummy_input).adjacency_map()
    ranked_ops = {k: _OpRank(v, 0) for k, v in adj_map.items()}

    def _recurrent_ancestor(ranked_ops_dict, dest_op_name, src_op_name):
        def _is_descendant(parent_op_name, dest_op_name):
            successors_names = [
                op.name for op in adj_map[parent_op_name].successors
            ]
            if dest_op_name in successors_names:
                return True
            for succ_name in successors_names:
                if _is_descendant(succ_name, dest_op_name):
                    return True
            return False

        return _is_descendant(dest_op_name, src_op_name) and \
            (0 < ranked_ops_dict[dest_op_name].rank < ranked_ops_dict[src_op_name].rank)

    def rank_op(ranked_ops_dict, op_name, rank):
        ranked_ops_dict[op_name].rank = rank
        for child_op in adj_map[op_name].successors:
            # In recurrent models: if a successor is also an ancestor - we don't increment its rank.
            if not recurrent or not _recurrent_ancestor(
                    ranked_ops_dict, child_op.name, op_name):
                rank_op(ranked_ops_dict, child_op.name,
                        ranked_ops_dict[op_name].rank + 1)

    roots = [k for k, v in adj_map.items() if len(v.predecessors) == 0]
    for root_op_name in roots:
        rank_op(ranked_ops, root_op_name, 0)

    # Take only the modules from the original model
    # module_dict = dict(model.named_modules())
    # Neta
    ret = sorted([k for k in ranked_ops.keys()],
                 key=lambda k: ranked_ops[k].rank)

    # Check that only the actual roots have a rank of 0
    assert {k for k in ret if ranked_ops[k].rank == 0} <= set(roots)
    return ret