示例#1
0
    def __init__(self, config=None):
        super().__init__()

        defaults = dict(
            device="cpu",
            input_size=1024,
            num_classes=12,
            boost_strength=1.5,
            boost_strength_factor=0.9,
            k_inference_factor=1.5,
            duty_cycle_period=1000,
            use_kwinners=True,
            hidden_neurons_fc=207,
        )
        defaults.update(config or {})
        self.__dict__.update(defaults)
        self.device = torch.device(self.device)

        # hidden layers
        conv_layers = [
            *self._conv_block(1, 12, percent_on=0.095),  # 28x28 -> 14x14
            *self._conv_block(12, 12, percent_on=0.125),  # 10x10 -> 5x5
            Flatten(),
        ]

        linear_layers = [
            # *self._linear_block(1600, 1500, percent_on= 0.067),
            *self._linear_block(300, self.hidden_neurons_fc, percent_on=0.1),
            nn.Linear(self.hidden_neurons_fc, self.num_classes),
        ]

        self.features = nn.Sequential(*conv_layers)
        self.classifier = nn.Sequential(*linear_layers)
    def __init__(self,
                 block_config=None,
                 depth=100,
                 growth_rate=12,
                 reduction=0.5,
                 num_classes=10,
                 bottleneck_size=4,
                 avg_pool_size=8):
        super(DenseNetCIFAR, self).__init__()

        # Compute blocks from depth
        if block_config is None:
            layers = (depth - 4) // 6
            block_config = (layers,) * 3

        # First convolution
        num_features = growth_rate * 2
        self.add_module("conv", nn.Conv2d(in_channels=3,
                                          out_channels=num_features,
                                          kernel_size=3,
                                          padding=1,
                                          bias=False))

        for i, num_layers in enumerate(block_config):
            block = _DenseBlock(num_layers=num_layers,
                                num_input_features=num_features,
                                bn_size=bottleneck_size,
                                growth_rate=growth_rate,
                                drop_rate=0)
            self.add_module("block{0}".format(i + 1), block)

            num_features = num_features + num_layers * growth_rate
            if i != len(block_config) - 1:
                out_features = math.floor(num_features * reduction)
                trans = _Transition(num_input_features=num_features,
                                    num_output_features=out_features)
                self.add_module("transition{0}".format(i + 1), trans)
                num_features = out_features

        # Final batch norm
        self.add_module("norm", nn.BatchNorm2d(num_features))
        self.add_module("relu", nn.ReLU(inplace=True))
        self.add_module("avg_pool", nn.AvgPool2d(kernel_size=avg_pool_size))

        # classifier layer
        outputs = int(num_features * 16 / (avg_pool_size * avg_pool_size))
        self.add_module("flatten", Flatten())
        self.add_module("classifier", nn.Linear(outputs, num_classes))

        # Official init from torch repo.
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight.data)
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
            elif isinstance(m, nn.Linear):
                m.bias.data.zero_()
示例#3
0
    def __init__(self,
                 cnn_out_channels=(64, 64),
                 cnn_percent_on=(0.095, 0.125),
                 linear_units=1000,
                 linear_percent_on=0.1,
                 linear_weight_sparsity=0.4,
                 boost_strength=1.5,
                 boost_strength_factor=0.9,
                 k_inference_factor=1.5,
                 duty_cycle_period=1000):
        super(GSCSparseCNN, self).__init__(
            OrderedDict([
                # First Sparse CNN layer
                ("cnn1", nn.Conv2d(1, cnn_out_channels[0], 5)),
                ("cnn1_batchnorm",
                 nn.BatchNorm2d(cnn_out_channels[0], affine=False)),
                ("cnn1_maxpool", nn.MaxPool2d(2)),
                ("cnn1_kwinner",
                 KWinners2d(channels=cnn_out_channels[0],
                            percent_on=cnn_percent_on[0],
                            k_inference_factor=k_inference_factor,
                            boost_strength=boost_strength,
                            boost_strength_factor=boost_strength_factor,
                            duty_cycle_period=duty_cycle_period)),

                # Second Sparse CNN layer
                ("cnn2", nn.Conv2d(cnn_out_channels[0], cnn_out_channels[1],
                                   5)),
                ("cnn2_batchnorm",
                 nn.BatchNorm2d(cnn_out_channels[1], affine=False)),
                ("cnn2_maxpool", nn.MaxPool2d(2)),
                ("cnn2_kwinner",
                 KWinners2d(channels=cnn_out_channels[1],
                            percent_on=cnn_percent_on[1],
                            k_inference_factor=k_inference_factor,
                            boost_strength=boost_strength,
                            boost_strength_factor=boost_strength_factor,
                            duty_cycle_period=duty_cycle_period)),
                ("flatten", Flatten()),

                # Sparse Linear layer
                ("linear",
                 SparseWeights(nn.Linear(25 * cnn_out_channels[1],
                                         linear_units),
                               weight_sparsity=linear_weight_sparsity)),
                ("linear_bn", nn.BatchNorm1d(linear_units, affine=False)),
                ("linear_kwinner",
                 KWinners(n=linear_units,
                          percent_on=linear_percent_on,
                          k_inference_factor=k_inference_factor,
                          boost_strength=boost_strength,
                          boost_strength_factor=boost_strength_factor,
                          duty_cycle_period=duty_cycle_period)),

                # Classifier
                ("output", nn.Linear(linear_units, 12)),
                ("softmax", nn.LogSoftmax(dim=1))
            ]))
示例#4
0
def simple_conv_net():
    return torch.nn.Sequential(
        torch.nn.Conv2d(1, 3, 5),
        torch.nn.MaxPool2d(2),
        torch.nn.ReLU(),
        Flatten(),
        torch.nn.Linear(111, 3),
        torch.nn.ReLU(),
        torch.nn.Linear(3, 2)
    )
示例#5
0
    def __init__(self,
                 cnn_out_channels=(32, 64),
                 cnn_percent_on=(0.087, 0.293),
                 linear_units=700,
                 linear_percent_on=0.143,
                 linear_weight_sparsity=0.3,
                 boost_strength=1.5,
                 boost_strength_factor=0.85,
                 k_inference_factor=1.5,
                 duty_cycle_period=1000):
        super(MNISTSparseCNN, self).__init__(
            OrderedDict([
                # First Sparse CNN layer
                ("cnn1", nn.Conv2d(1, cnn_out_channels[0], 5)),
                ("cnn1_maxpool", nn.MaxPool2d(2)),
                ("cnn1_kwinner",
                 KWinners2d(channels=cnn_out_channels[0],
                            percent_on=cnn_percent_on[0],
                            k_inference_factor=k_inference_factor,
                            boost_strength=boost_strength,
                            boost_strength_factor=boost_strength_factor,
                            duty_cycle_period=duty_cycle_period)),

                # Second Sparse CNN layer
                ("cnn2", nn.Conv2d(cnn_out_channels[0], cnn_out_channels[1],
                                   5)),
                ("cnn2_maxpool", nn.MaxPool2d(2)),
                ("cnn2_kwinner",
                 KWinners2d(channels=cnn_out_channels[1],
                            percent_on=cnn_percent_on[1],
                            k_inference_factor=k_inference_factor,
                            boost_strength=boost_strength,
                            boost_strength_factor=boost_strength_factor,
                            duty_cycle_period=duty_cycle_period)),
                ("flatten", Flatten()),

                # Sparse Linear layer
                ("linear",
                 SparseWeights(nn.Linear(16 * cnn_out_channels[1],
                                         linear_units),
                               weight_sparsity=linear_weight_sparsity)),
                ("linear_kwinner",
                 KWinners(n=linear_units,
                          percent_on=linear_percent_on,
                          k_inference_factor=k_inference_factor,
                          boost_strength=boost_strength,
                          boost_strength_factor=boost_strength_factor,
                          duty_cycle_period=duty_cycle_period)),

                # Classifier
                ("output", nn.Linear(linear_units, 10)),
                ("softmax", nn.LogSoftmax(dim=1))
            ]))
示例#6
0
    def __init__(self, config=None):
        super(VGG19, self).__init__()

        defaults = dict(
            device="gpu",
            input_size=784,
            num_classes=10,
            hidden_sizes=[4000, 1000, 4000],
            batch_norm=False,
            dropout=0.3,
            bias=False,
            init_weights=True,
            kwinners=False,
            percent_on=0.3,
        )
        defaults.update(config or {})
        self.__dict__.update(defaults)
        self.device = torch.device(self.device)

        # define if kwinners or regular network
        if self.kwinners:
            self.pool_func = lambda: nn.AvgPool2d(kernel_size=2, stride=2)
            self.nonlinear_func = self._kwinners
        else:
            self.pool_func = lambda: nn.MaxPool2d(kernel_size=2, stride=2)
            self.nonlinear_func = lambda fout: nn.ReLU()

        # initialize network
        layers = [
            *self._conv_block(3, 64),
            *self._conv_block(64, 64, pool=True),  # 16x16
            *self._conv_block(64, 128),
            *self._conv_block(128, 128, pool=True),  # 8x8
            *self._conv_block(128, 256),
            *self._conv_block(256, 256),
            *self._conv_block(256, 256),
            *self._conv_block(256, 256, pool=True),  # 4x4
            *self._conv_block(256, 512),
            *self._conv_block(512, 512),
            *self._conv_block(512, 512),
            *self._conv_block(512, 512, pool=True),  # 2x2
            *self._conv_block(512, 512),
            *self._conv_block(512, 512),
            *self._conv_block(512, 512),
            *self._conv_block(512, 512, pool=True),  # 1x1
        ]
        layers.append(Flatten())
        layers.append(nn.Linear(512, self.num_classes))
        self.classifier = nn.Sequential(*layers)

        if self.init_weights:
            self._initialize_weights()
示例#7
0
    def __init__(self, config=None):
        super(ResNet, self).__init__()

        # update config
        defaults = dict(
            depth=50,
            num_classes=10,
            percent_on_k_winner=1.0,
            boost_strength=1.4,
            boost_strength_factor=0.7,
            k_inference_factor=1.0,
        )
        defaults.update(config or {})
        self.__dict__.update(defaults)

        # adds kwinners
        for attr in [
                "percent_on_k_winner",
                "boost_strength",
                "boost_strength_factor",
                "k_inference_factor",
        ]:
            if type(self.__dict__[attr]) == list:
                raise ValueError("""ResNet currently supports only single
                    percentage of activations for KWinners layers""")

        if self.percent_on_k_winner < 0.5:
            self.activation_func = lambda out: self._kwinners(out)
        else:
            self.activation_func = lambda _: nn.ReLU()

        self.in_planes = 64
        # TODO: analyze what are these attributes used for in torchvision:
        # self.groups, self.base_width

        block, num_blocks = self._config_layers()

        self.features = nn.Sequential(
            conv7x7(3, 64, stride=2),
            nn.BatchNorm2d(64),
            self.activation_func(64),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            self._make_layer(block, 64, num_blocks[0], stride=1),
            self._make_layer(block, 128, num_blocks[1], stride=2),
            self._make_layer(block, 256, num_blocks[2], stride=2),
            self._make_layer(block, 512, num_blocks[3], stride=2),
            nn.AdaptiveAvgPool2d(1),
            Flatten(),  # TODO: see if I still need it
        )
        self.classifier = nn.Linear(512 * block.expansion, self.num_classes)
示例#8
0
    def __init__(self,
                 input_size=28 * 28,
                 n_hidden_units=1000,
                 n_classes=10,
                 is_sparse=False,
                 sparsity=(0.75, 0.85),
                 percent_on=0.1):
        """
        Initialize a 2-layer MLP
        :param input_size: number of input features to the MLP
        :type input_size: int
        :param n_hidden_units: number of units in each of the two hidden layers
        :type n_hidden_units: int
        :param n_classes: number of output units
        :type n_classes: int
        :param is_sparse: whether or not to initialize the sparse network instead of a
        dense one
        :type is_sparse: bool
        :param sparsity: a 2-element list/tuple specifying the sparsity in each of the
        hidden layers
        :type sparsity: list/tuple of float
        :param percent_on: number of active units in the K-Winners layer (only applies
        to sparse networks)
        :type percent_on: float
        """
        super().__init__()

        self.is_sparse = is_sparse
        self.flatten = Flatten()
        self.n_classes = n_classes

        self.fc1 = torch.nn.Linear(input_size, n_hidden_units)
        self.fc2 = torch.nn.Linear(n_hidden_units, n_hidden_units)
        self.fc3 = torch.nn.Linear(n_hidden_units, n_classes)

        if is_sparse:
            self.fc1_sparsity, self.fc2_sparsity = sparsity
            self.percent_on = percent_on

            self.fc1 = SparseWeights(self.fc1, sparsity=self.fc1_sparsity)
            self.kw1 = KWinners(n=n_hidden_units,
                                percent_on=percent_on,
                                boost_strength=0.0)

            self.fc2 = SparseWeights(self.fc2, sparsity=self.fc2_sparsity)
            self.kw2 = KWinners(n=n_hidden_units,
                                percent_on=percent_on,
                                boost_strength=0.0)
示例#9
0
    def __init__(self, config=None):
        super(VGG19Heb, self).__init__()

        defaults = dict(
            device="gpu",
            input_size=784,
            num_classes=10,
            hidden_sizes=[4000, 1000, 4000],
            batch_norm=False,
            dropout=0.3,
            bias=False,
            init_weights=True,
            kwinners=False,
            percent_on=0.3,
            boost_strength=1.4,
            boost_strength_factor=0.7,
            hebbian_learning=True,
        )
        defaults.update(config or {})
        self.__dict__.update(defaults)
        self.device = torch.device(self.device)

        # define if kwinners or regular network
        if self.kwinners:
            self.pool_func = lambda: nn.AvgPool2d(kernel_size=2, stride=2)
            self.nonlinear_func = self._kwinners
        else:
            self.pool_func = lambda: nn.MaxPool2d(kernel_size=2, stride=2)
            self.nonlinear_func = lambda fout: nn.ReLU()

        # initialize network
        layers = [
            *self._conv_block(3, 64, pool=True),  # 16x16
            *self._conv_block(64, 64, pool=True),  # 8x8
            *self._conv_block(64, 128, pool=True),  # 4x4
            *self._conv_block(128, 256, pool=True),  # 2x2
            *self._conv_block(256, 512, pool=True),  # 1x1
        ]
        layers.append(Flatten())
        layers.append(nn.Linear(512, self.num_classes))
        self.classifier = nn.Sequential(*layers)

        # track the activations
        # should reset at the end of each round, done in the model
        self.correlations = []

        if self.init_weights:
            self._initialize_weights()
示例#10
0
    def __init__(self, config=None):
        super(GSCHebDepreciated, self).__init__()

        defaults = dict(
            input_size=1024,
            num_classes=12,
            boost_strength=1.5,
            boost_strength_factor=0.9,
            k_inference_factor=1.5,
            duty_cycle_period=1000,
            use_kwinners=True,
            hidden_neurons_fc=1000,
        )
        defaults.update(config or {})
        self.__dict__.update(defaults)
        self.device = torch.device(self.device)

        if self.model == "DSNNMixedHeb":
            self.hebbian_learning = True
        else:
            self.hebbian_learning = False

        # hidden layers
        conv_layers = [
            *self._conv_block(1, 64, percent_on=0.095),  # 28x28 -> 14x14
            *self._conv_block(64, 64, percent_on=0.125),  # 10x10 -> 5x5
        ]
        linear_layers = [
            Flatten(),
            # *self._linear_block(1600, 1500, percent_on= 0.067),
            *self._linear_block(1600, self.hidden_neurons_fc, percent_on=0.1),
            nn.Linear(self.hidden_neurons_fc, self.num_classes),
        ]

        # classifier (*redundancy on layers to facilitate traversing)
        self.layers = conv_layers + linear_layers
        self.features = nn.Sequential(*conv_layers)
        self.classifier = nn.Sequential(*linear_layers)

        # track correlations
        self.correlations = []
示例#11
0
    def __init__(self, config=None):
        super().__init__()

        defaults = dict(
            device="cpu",
            input_size=784,
            num_classes=10,
            hidden_sizes=[100, 100, 100],
            batch_norm=False,
            dropout=False,
            use_kwinners=False,
            hebbian_learning=False,
            bias=True,
        )
        defaults.update(config or {})
        self.__dict__.update(defaults)
        self.device = torch.device(self.device)

        # decide which actiovation function to use
        if self.use_kwinners:
            self.activation_func = self._kwinners
        else:
            self.activation_func = lambda _: nn.ReLU()

        layers = [Flatten()]
        # add the first layer
        layers.extend(self._linear_block(self.input_size,
                                         self.hidden_sizes[0]))
        # all hidden layers
        for i in range(1, len(self.hidden_sizes)):
            layers.extend(
                self._linear_block(self.hidden_sizes[i - 1],
                                   self.hidden_sizes[i]))
        # last layer
        layers.append(
            nn.Linear(self.hidden_sizes[-1], self.num_classes, bias=self.bias))

        # create the layers
        self.classifier = nn.Sequential(*layers)
    def __init__(
        self,
        dpc=3,
        cnn_w_sparsity=0.05,
        linear_w_sparsity=0.5,
        cat_w_sparsity=0.01,
        n_classes=4,
    ):
        super(ToyNetwork, self).__init__()
        conv_channels = 128
        self.n_classes = n_classes
        self.conv1 = SparseWeights2d(
            nn.Conv2d(
                in_channels=1,
                out_channels=conv_channels,
                kernel_size=10,
                padding=0,
                stride=1,
            ),
            cnn_w_sparsity,
        )
        self.kwin1 = KWinners2d(conv_channels, percent_on=0.1)
        self.bn = nn.BatchNorm2d(conv_channels, affine=False)
        self.mp1 = nn.MaxPool2d(kernel_size=2)
        self.flatten = Flatten()

        self.d1 = DendriteLayer(
            in_dim=int(conv_channels / 64) * 7744,
            out_dim=1000,
            dendrites_per_neuron=dpc,
        )

        self.linear = SparseWeights(nn.Linear(1000, n_classes + 1),
                                    linear_w_sparsity)

        self.cat = SparseWeights(nn.Linear(n_classes + 1, 1000 * dpc),
                                 cat_w_sparsity)
    def __init__(
        self,
        in_channels=1,
        cnn_out_channels=2,
        linear_units=3,
        sparse_weights=False,
    ):
        super(SimpleCNN, self).__init__()
        if sparse_weights:
            self.add_module(
                "cnn1_sparse",
                SparseWeights2d(nn.Conv2d(in_channels, cnn_out_channels, 5),
                                0.5))
        else:
            self.add_module("cnn1", nn.Conv2d(in_channels, cnn_out_channels,
                                              5))
        self.add_module("cnn1_batchnorm",
                        nn.BatchNorm2d(cnn_out_channels, affine=False))
        self.add_module("cnn1_maxpool", nn.MaxPool2d(2))
        self.add_module("cnn1_relu", nn.ReLU())

        # Linear layer
        self.add_module("flatten", Flatten())
        if sparse_weights:
            self.add_module(
                "linear_sparse",
                SparseWeights(nn.Linear(196 * cnn_out_channels, linear_units),
                              0.5))
        else:
            self.add_module("linear",
                            nn.Linear(196 * cnn_out_channels, linear_units))
        self.add_module("linear_bn", nn.BatchNorm1d(linear_units,
                                                    affine=False))
        self.add_module("linear_relu", nn.ReLU())

        # Classifier layer with 12 classes
        self.add_module("output", nn.Linear(linear_units, 12))
示例#14
0
    def __init__(self, config=None):
        super().__init__()

        defaults = dict(
            device="cpu",
            input_size=1024,
            num_classes=12,
            boost_strength=[1.5, 1.5, 1.5],
            boost_strength_factor=[0.9, 0.9, 0.9],
            duty_cycle_period=1000,
            k_inference_factor=1.5,
            percent_on_k_winner=[0.095, 0.125, 0.1],
            hidden_neurons_conv=[64, 64],
            hidden_neurons_fc=1000,
            batch_norm=True,
            dropout=False,
            bias=True,
        )
        defaults.update(config or {})
        self.__dict__.update(defaults)
        self.device = torch.device(self.device)

        kwargs = dict(bias=self.bias, batch_norm=self.batch_norm, dropout=self.dropout)

        # decide which actiovation function to use for conv
        self.activation_funcs = []
        for layer, hidden_size in enumerate(self.hidden_neurons_conv):
            if self.percent_on_k_winner[layer] < 0.5:
                self.activation_funcs.append(
                    KWinners2d(
                        hidden_size,
                        percent_on=self.percent_on_k_winner[layer],
                        boost_strength=self.boost_strength[layer],
                        boost_strength_factor=self.boost_strength_factor[layer],
                        k_inference_factor=self.k_inference_factor,
                    )
                )
            else:
                self.activation_funcs.append(nn.ReLU())

        # decide which activvation to use for linear
        if self.percent_on_k_winner[-1] < 0.5:
            linear_activation = KWinners(
                self.hidden_neurons_fc,
                percent_on=self.percent_on_k_winner[-1],
                boost_strength=self.boost_strength[-1],
                boost_strength_factor=self.boost_strength_factor[-1],
                k_inference_factor=self.k_inference_factor,
            )
        else:
            linear_activation = nn.ReLU()

        # linear layers
        conv_layers = [
            # 28x28 -> 14x14
            *self._conv_block(1, self.hidden_neurons_conv[0], self.activation_funcs[0]),
            # 10x10 -> 5x5
            *self._conv_block(
                self.hidden_neurons_conv[0],
                self.hidden_neurons_conv[1],
                self.activation_funcs[1],
            ),
            Flatten(),
        ]
        linear_layers = [
            DSLinearBlock(
                self.hidden_neurons_conv[1] * 25,
                self.hidden_neurons_fc,
                activation_func=linear_activation,
                batch_norm_affine=False,
                config=config,
                **kwargs,
            ),
            DSLinearBlock(self.hidden_neurons_fc, self.num_classes, config=config),
        ]

        self.features = nn.Sequential(*conv_layers)
        self.classifier = nn.Sequential(*linear_layers)
示例#15
0
    def __init__(
        self,
        input_shape=(1, 32, 32),
        cnn_out_channels=(64, 64),
        cnn_activity_percent_on=(0.1, 0.1),
        cnn_weight_percent_on=(1.0, 1.0),
        linear_n=(1000, ),
        linear_activity_percent_on=(0.1, ),
        linear_weight_percent_on=(0.4, ),
        use_dendrites=False,
        dendrites_per_cell=5,
        num_classes=10,
        boost_strength=1.67,
        boost_strength_factor=0.9,
        duty_cycle_period=1000,
        k_inference_factor=1.5,
        use_batch_norm=True,
        dropout=0.0,
        activation_fct_before_max_pool=False,
        consolidated_sparse_weights=False,
        use_kwinners_local=False,
        use_softmax=True,
    ):
        super(LeSparseNet, self).__init__()
        # Add CNN Layers
        current_input_shape = input_shape
        cnn_layers = len(cnn_out_channels)
        self.dpc = dendrites_per_cell
        for i in range(cnn_layers):
            in_channels, height, width = current_input_shape

            # We only do consolidated weights for the second CNN layer
            csw = (i == 1) and consolidated_sparse_weights
            add_sparse_cnn_layer(
                network=self,
                suffix=i + 1,
                in_channels=in_channels,
                out_channels=cnn_out_channels[i],
                use_batch_norm=use_batch_norm,
                weight_sparsity=cnn_weight_percent_on[i],
                percent_on=cnn_activity_percent_on[i],
                k_inference_factor=k_inference_factor,
                boost_strength=boost_strength,
                boost_strength_factor=boost_strength_factor,
                duty_cycle_period=duty_cycle_period,
                activation_fct_before_max_pool=activation_fct_before_max_pool,
                use_kwinners_local=use_kwinners_local,
                consolidated_sparse_weights=csw,
            )

            # Compute next layer input shape
            wout = (width - 5) + 1
            maxpool_width = wout // 2
            current_input_shape = (cnn_out_channels[i], maxpool_width,
                                   maxpool_width)

        # Flatten CNN output before passing to linear layer
        self.add_module("flatten", Flatten())

        # Add Linear layers
        input_size = np.prod(current_input_shape)
        for i in range(len(linear_n)):
            if use_dendrites and i == 0:
                add_sparse_dendrite_layer(
                    network=self,
                    suffix=i + 1,
                    in_dim=input_size,
                    out_dim=linear_n[i],
                    dendrites_per_neuron=self.dpc,
                    use_batch_norm=use_batch_norm,
                    weight_sparsity=linear_weight_percent_on[i],
                    percent_on=linear_activity_percent_on[i],
                    k_inference_factor=k_inference_factor,
                    boost_strength=boost_strength,
                    boost_strength_factor=boost_strength_factor,
                    duty_cycle_period=duty_cycle_period,
                )
            else:
                add_sparse_linear_layer(
                    network=self,
                    suffix=i + 1,
                    input_size=input_size,
                    linear_n=linear_n[i],
                    dropout=dropout,
                    use_batch_norm=use_batch_norm,
                    weight_sparsity=linear_weight_percent_on[i],
                    percent_on=linear_activity_percent_on[i],
                    k_inference_factor=k_inference_factor,
                    boost_strength=boost_strength,
                    boost_strength_factor=boost_strength_factor,
                    duty_cycle_period=duty_cycle_period,
                    consolidated_sparse_weights=consolidated_sparse_weights,
                )
            input_size = linear_n[i]

        if use_softmax:
            self.add_module("softmax", nn.LogSoftmax(dim=1))
示例#16
0
    def __init__(self,
                 depth=50,
                 num_classes=1000,
                 conv_layer=nn.Conv2d,
                 conv_args=None,
                 linear_layer=nn.Linear,
                 linear_args=None,
                 act_layer=default_activation_layer,
                 act_args=None,
                 norm_layer=nn.BatchNorm2d,
                 norm_args=None,
                 deprecated_compatibility_mode=False):
        """
        :param conv_layer:
            A conv2d layer that receives the arguments of a nn.Conv2d and custom
            conv_args
        :type conv_layer: callable

        :param conv_args:
            A dictionary specifying extra kwargs for the conv_layer, possibly
            assigning different args to each layer.
        :type conv_args: dict or None

        :param linear_layer:
            A linear layer that receives the arguments of a nn.Linear and custom
            linear_args
        :type linear_layer: callable

        :param linear_args:
            A dictionary specifying extra kwargs for the linear_layer, possibly
            assigning different args to each layer.
        :type linear_args: dict or None

        :param act_layer:
            An activation layer that receives the number of input channels and
            custom linear_args
        :type act_layer: callable

        :param act_args:
            A dictionary specifying extra kwargs for the act_layer, possibly
            assigning different args to each layer.
        :type act_args: dict or None

        :param norm_layer:
            A normalization layer that receives the arguments of nn.BatchNorm2d
            and custom norm_args
        :type norm_layer: callable

        :param norm_args:
            A dictionary specifying extra kwargs for the norm_layer, possibly
            assigning different args to each layer.
        :type norm_args: dict or None

        :param deprecated_compatibility_mode:
            Enables behavior required by SparseResNet
        :type deprecated_compatibility_mode: bool
        """

        super().__init__()

        assert str(depth) in cf_dict, "Resnet depth should be in {}".format(
            ",".join(cf_dict.keys()))
        block, num_blocks = cf_dict[str(depth)]

        conv_args = expand_args(conv_args, num_blocks, block.conv_keys)
        norm_args = expand_args(norm_args, num_blocks, block.norm_keys)
        act_args = expand_args(act_args, num_blocks, block.act_keys)
        linear_args = linear_args or {}

        if not deprecated_compatibility_mode:
            # Previous models expect to receive the kernel size in the
            # activation layer. Do this in the Bottleneck code, but discard it
            # by default.
            act_layer = discard_kernel_size(act_layer)

        self.quant = QuantStub()

        features = [
            # stem
            ("stem", conv_layer(3, 64, kernel_size=7, stride=2,
                                padding=3, bias=False, **conv_args["stem"])),
            ("bn_stem", norm_layer(64, **norm_args["stem"])),
            ("act_stem", act_layer(64, **act_args["stem"])),
            ("pool_stem", nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
        ]

        # Track the previous out_channels during initialization.
        self.in_planes = 64
        features += [
            # groups 1 to 4
            ("group1", self._make_group(
                block, 64, num_blocks[0], stride=1,
                conv_layer=conv_layer, conv_args=conv_args["filters64"],
                act_layer=act_layer, act_args=act_args["filters64"],
                norm_layer=norm_layer, norm_args=norm_args["filters64"])),
            ("group2", self._make_group(
                block, 128, num_blocks[1], stride=2,
                conv_layer=conv_layer, conv_args=conv_args["filters128"],
                act_layer=act_layer, act_args=act_args["filters128"],
                norm_layer=norm_layer, norm_args=norm_args["filters128"])),
            ("group3", self._make_group(
                block, 256, num_blocks[2], stride=2,
                conv_layer=conv_layer, conv_args=conv_args["filters256"],
                act_layer=act_layer, act_args=act_args["filters256"],
                norm_layer=norm_layer, norm_args=norm_args["filters256"])),
            ("group4", self._make_group(
                block, 512, num_blocks[3], stride=2,
                conv_layer=conv_layer, conv_args=conv_args["filters512"],
                act_layer=act_layer, act_args=act_args["filters512"],
                norm_layer=norm_layer, norm_args=norm_args["filters512"])),
            ("avg_pool", nn.AdaptiveAvgPool2d(1)),
            ("flatten", Flatten()),
        ]
        self.features = nn.Sequential(OrderedDict(features))
        del self.in_planes

        # last output layer
        self.classifier = linear_layer(
            512 * block.expansion,
            num_classes,
            **linear_args
        )

        self.dequant = DeQuantStub()
示例#17
0
    def __init__(
        self,
        cnn_out_channels=(32, 64, 32),
        cnn_percent_on=(0.095, 0.125, 0.0925),
        linear_units=1600,
        linear_percent_on=0.1,
        linear_weight_sparsity=0.4,
        boost_strength=1.5,
        boost_strength_factor=0.9,
        k_inference_factor=1.5,
        duty_cycle_period=1000,
    ):
        super(GSCSparseFullCNN, self).__init__()
        # input_shape = (1, 32, 32)
        # First Sparse CNN layer
        self.add_module("cnn1", nn.Conv2d(1, cnn_out_channels[0], 5))
        self.add_module("cnn1_batchnorm",
                        nn.BatchNorm2d(cnn_out_channels[0], affine=False))
        self.add_module("cnn1_maxpool", nn.MaxPool2d(2))
        self.add_module(
            "cnn1_kwinner",
            KWinners2d(
                channels=cnn_out_channels[0],
                percent_on=cnn_percent_on[0],
                k_inference_factor=k_inference_factor,
                boost_strength=boost_strength,
                boost_strength_factor=boost_strength_factor,
                duty_cycle_period=duty_cycle_period,
            ),
        )

        # Second Sparse CNN layer
        self.add_module("cnn2",
                        nn.Conv2d(cnn_out_channels[0], cnn_out_channels[1], 5))
        self.add_module("cnn2_batchnorm",
                        nn.BatchNorm2d(cnn_out_channels[1], affine=False))
        self.add_module("cnn2_maxpool", nn.MaxPool2d(2))
        self.add_module(
            "cnn2_kwinner",
            KWinners2d(
                channels=cnn_out_channels[1],
                percent_on=cnn_percent_on[1],
                k_inference_factor=k_inference_factor,
                boost_strength=boost_strength,
                boost_strength_factor=boost_strength_factor,
                duty_cycle_period=duty_cycle_period,
            ),
        )

        # # Third Sparse CNN layer
        # self.add_module("cnn3",
        #                 nn.Conv2d(cnn_out_channels[1], cnn_out_channels[2], 5))
        # self.add_module("cnn3_batchnorm",
        #                 nn.BatchNorm2d(cnn_out_channels[2], affine=False))
        # # self.add_module("cnn3_maxpool", nn.MaxPool2d(2))
        # self.add_module("cnn3_kwinner", KWinners2d(
        #     channels=cnn_out_channels[2],
        #     percent_on=cnn_percent_on[2],
        #     k_inference_factor=k_inference_factor,
        #     boost_strength=boost_strength,
        #     boost_strength_factor=boost_strength_factor,
        #     duty_cycle_period=duty_cycle_period))

        self.add_module("flatten", Flatten())

        # # Sparse Linear layer
        # self.add_module("linear", SparseWeights(
        #     nn.Linear(25 * cnn_out_channels[1], linear_units),
        #     weight_sparsity=linear_weight_sparsity))
        # self.add_module("linear_bn", nn.BatchNorm1d(linear_units, affine=False))
        # self.add_module("linear_kwinner", KWinners(
        #     n=linear_units,
        #     percent_on=linear_percent_on,
        #     k_inference_factor=k_inference_factor,
        #     boost_strength=boost_strength,
        #     boost_strength_factor=boost_strength_factor,
        #     duty_cycle_period=duty_cycle_period))

        # Classifier
        self.add_module("output", nn.Linear(1600, 12))
        self.add_module("softmax", nn.LogSoftmax(dim=1))
示例#18
0
def setup_model(in_features, out_features):
    return torch.nn.Sequential(Flatten(),
                               torch.nn.Linear(in_features, out_features))
    def __init__(
        self,
        input_shape=(1, 32, 32),
        cnn_out_channels=(64, 64),
        cnn_activity_percent_on=(0.1, 0.1),
        cnn_weight_percent_on=(1.0, 1.0),
        linear_n=(1000, ),
        linear_activity_percent_on=(0.1, ),
        linear_weight_percent_on=(0.4, ),
        num_classes=10,
        temperature=10.0,
        eval_temperature=1.0,
        temperature_decay_rate=0.99,
        k_inference_factor=1.5,
        use_batch_norm=True,
        dropout=0.0,
        activation_fct_before_max_pool=False,
        consolidated_sparse_weights=False,
        use_softmax=True,
    ):
        super(SampledKWinnerLeSparseNet, self).__init__()

        # Add CNN Layers
        current_input_shape = input_shape
        cnn_layers = len(cnn_out_channels)
        for i in range(cnn_layers):
            in_channels, height, width = current_input_shape

            # We only do consolidated weights for the second CNN layer
            csw = (i == 1) and consolidated_sparse_weights
            add_sparse_cnn_layer(
                network=self,
                suffix=i + 1,
                in_channels=in_channels,
                out_channels=cnn_out_channels[i],
                use_batch_norm=use_batch_norm,
                weight_sparsity=cnn_weight_percent_on[i],
                percent_on=cnn_activity_percent_on[i],
                k_inference_factor=k_inference_factor,
                temperature=temperature,
                eval_temperature=eval_temperature,
                temperature_decay_rate=temperature_decay_rate,
                activation_fct_before_max_pool=activation_fct_before_max_pool,
                consolidated_sparse_weights=csw)

            # Compute next layer input shape
            wout = (width - 5) + 1
            maxpool_width = wout // 2
            current_input_shape = (cnn_out_channels[i], maxpool_width,
                                   maxpool_width)

        # Flatten CNN output before passing to linear layer
        self.add_module("flatten", Flatten())

        # Add Linear layers
        input_size = np.prod(current_input_shape)
        for i in range(len(linear_n)):
            add_sparse_linear_layer(
                network=self,
                suffix=i + 1,
                input_size=input_size,
                linear_n=linear_n[i],
                dropout=dropout,
                use_batch_norm=use_batch_norm,
                weight_sparsity=linear_weight_percent_on[i],
                percent_on=linear_activity_percent_on[i],
                k_inference_factor=k_inference_factor,
                temperature=temperature,
                eval_temperature=eval_temperature,
                temperature_decay_rate=temperature_decay_rate,
                consolidated_sparse_weights=consolidated_sparse_weights,
            )
            input_size = linear_n[i]

        # Classifier
        self.add_module("output", nn.Linear(input_size, num_classes))
        if use_softmax:
            self.add_module("softmax", nn.LogSoftmax(dim=1))
    def __init__(self, config):
        """Called once at the beginning of each experiment."""
        super(MNISTSparseExperiment, self).__init__()
        self.start_time = time.time()
        self.logger = get_logger(config["name"], config.get("verbose", 2))
        self.logger.debug("Config: %s", config)

        # Setup random seed
        seed = config["seed"]
        set_random_seed(seed)

        self.data_dir = config["data_dir"]
        self.batch_size = config["batch_size"]
        self.test_batch_size = config["test_batch_size"]
        self.first_epoch_batch_size = config["first_epoch_batch_size"]
        self.validation = config.get("validation", 50000.0 / 60000.0)
        self.learning_rate_factor = config["learning_rate_factor"]
        self.lr_scheduler_params = config.get("lr_scheduler_params", None)

        self._configure_dataloaders()

        # Configure Model
        cnn_input_shape = config.get("cnn_input_shape", (1, 28, 28))
        linear_n = config["linear_n"]
        linear_percent_on = config["linear_percent_on"]
        cnn_out_channels = config["cnn_out_channels"]
        cnn_percent_on = config["cnn_percent_on"]
        boost_strength = config["boost_strength"]
        weight_sparsity = config["weight_sparsity"]
        cnn_weight_sparsity = config["cnn_weight_sparsity"]
        boost_strength_factor = config["boost_strength_factor"]
        k_inference_factor = config["k_inference_factor"]
        use_batch_norm = config["use_batch_norm"]
        dropout = config.get("dropout", 0.0)

        model = nn.Sequential()

        # Add CNN Layers
        input_shape = cnn_input_shape
        cnn_layers = len(cnn_out_channels)
        if cnn_layers > 0:
            for i in range(cnn_layers):
                in_channels, height, width = input_shape
                add_sparse_cnn_layer(
                    network=model,
                    suffix=i + 1,
                    in_channels=in_channels,
                    out_channels=cnn_out_channels[i],
                    use_batch_norm=use_batch_norm,
                    weight_sparsity=cnn_weight_sparsity,
                    percent_on=cnn_percent_on[i],
                    k_inference_factor=k_inference_factor,
                    boost_strength=boost_strength,
                    boost_strength_factor=boost_strength_factor,
                )

                # Feed this layer output into next layer input
                in_channels = cnn_out_channels[i]

                # Compute next layer input shape
                wout = (width - 5) + 1
                maxpool_width = wout // 2
                input_shape = (in_channels, maxpool_width, maxpool_width)

        # Flatten CNN output before passing to linear layer
        model.add_module("flatten", Flatten())

        # Add Linear layers
        input_size = np.prod(input_shape)
        for i in range(len(linear_n)):
            add_sparse_linear_layer(
                network=model,
                suffix=i + 1,
                input_size=input_size,
                linear_n=linear_n[i],
                dropout=dropout,
                use_batch_norm=False,
                weight_sparsity=weight_sparsity,
                percent_on=linear_percent_on[i],
                k_inference_factor=k_inference_factor,
                boost_strength=boost_strength,
                boost_strength_factor=boost_strength_factor,
            )
            input_size = linear_n[i]

        # Output layer
        model.add_module("output", nn.Linear(input_size, 10))
        model.add_module("softmax", nn.LogSoftmax(dim=1))

        if torch.cuda.is_available():
            self.device = torch.device("cuda")
            model = model.cuda()
        else:
            self.device = torch.device("cpu")

        if torch.cuda.device_count() > 1:
            self.logger.debug("Using", torch.cuda.device_count(), "GPUs")
            model = torch.nn.DataParallel(model)

        self.model = model
        self.logger.debug("Model: %s", self.model)
        self.learning_rate = config["learning_rate"]
        self.momentum = config["momentum"]

        self.batches_in_epoch = config["batches_in_epoch"]
        self.batches_in_first_epoch = config["batches_in_first_epoch"]
        self.config = config

        self.optimizer = self._create_optimizer(name=config["optimizer"],
                                                model=self.model)
        self.lr_scheduler = self._create_learning_rate_scheduler(
            name=config.get("lr_scheduler", None), optimizer=self.optimizer)
示例#21
0
    def __init__(
        self,
        input_shape,
        block_sizes,
        cnn_out_channels,
        cnn_kernel_sizes,
        cnn_weight_sparsity,
        cnn_percent_on,
        linear_units,
        linear_weight_sparsity,
        linear_percent_on,
        k_inference_factor,
        boost_strength,
        boost_strength_factor,
        use_max_pooling,
        num_classes,
    ):
        super(VGGSparseNet, self).__init__()
        in_channels, h, w = input_shape
        output_size = h * w
        output_units = output_size * in_channels
        for l, block_size in enumerate(block_sizes):
            for b in range(block_size):
                self._add_cnn_layer(
                    index_str=str(l) + "_" + str(b),
                    in_channels=in_channels,
                    out_channels=cnn_out_channels[l],
                    kernel_size=cnn_kernel_sizes[l],
                    percent_on=cnn_percent_on[l],
                    weight_sparsity=cnn_weight_sparsity[l],
                    k_inference_factor=k_inference_factor,
                    boost_strength=boost_strength,
                    boost_strength_factor=boost_strength_factor,
                    add_pooling=b == block_size - 1,
                    use_max_pooling=use_max_pooling,
                )
                in_channels = cnn_out_channels[l]
            output_size = int(output_size / 4)
            output_units = output_size * in_channels

        # Flatten CNN output before passing to linear layer
        self.add_module("flatten", Flatten())

        # Linear layer
        input_size = output_units
        for l, linear_n in enumerate(linear_units):
            linear = nn.Linear(input_size, linear_n)
            if linear_weight_sparsity[l] < 1.0:
                self.add_module(
                    "linear_" + str(l),
                    SparseWeights(linear, linear_weight_sparsity[l]),
                )
            else:
                self.add_module("linear_" + str(l), linear)

            if linear_percent_on[l] < 1.0:
                self.add_module(
                    "kwinners_linear_" + str(l),
                    KWinners(
                        n=linear_n,
                        percent_on=linear_percent_on[l],
                        k_inference_factor=k_inference_factor,
                        boost_strength=boost_strength,
                        boost_strength_factor=boost_strength_factor,
                    ),
                )
            else:
                self.add_module("Linear_ReLU_" + str(l), nn.ReLU())

            input_size = linear_n

        # Output layer
        self.add_module("output", nn.Linear(input_size, num_classes))

        self._initialize_weights()
示例#22
0
    def __init__(self, config=None):
        super(ResNet, self).__init__()

        # update config
        defaults = dict(
            depth=50,
            num_classes=1000,
            linear_sparse_weights_type="SparseWeights",
            conv_sparse_weights_type="SparseWeights2d",
            defaults_sparse=False,
            layer_params_type=None,  # Sub-classed from `LayerParams`.
            # To be passed to layer_params_type:
            layer_params_kwargs=None,
            linear_params_func=None,
            conv_params_func=None,
            activation_params_func=None,
        )
        defaults.update(config or {})
        self.__dict__.update(defaults)
        if isinstance(self.linear_sparse_weights_type, str):
            self.linear_sparse_weights_type = getattr(
                nupic_modules, self.linear_sparse_weights_type)
        if isinstance(self.conv_sparse_weights_type, str):
            self.conv_sparse_weights_type = getattr(
                nupic_modules, self.conv_sparse_weights_type)

        if self.defaults_sparse:
            if self.conv_params_func is None:
                self.conv_params_func = auto_sparse_conv_params
            if self.activation_params_func is None:
                self.activation_params_func = auto_sparse_activation_params

        if not hasattr(self, "sparse_params"):
            self.sparse_params = default_resnet_params(
                *cf_dict[str(self.depth)],
                layer_params_type=self.layer_params_type,
                layer_params_kwargs=self.layer_params_kwargs,
                linear_params_func=self.linear_params_func,
                conv_params_func=self.conv_params_func,
                activation_params_func=self.activation_params_func,
            )

        self.in_planes = 64

        block, num_blocks = self._config_layers()

        self.features = nn.Sequential(
            # stem
            conv_layer(
                "7x7",
                3,
                64,
                self.sparse_params["stem"],
                sparse_weights_type=self.conv_sparse_weights_type,
                stride=2,
            ),
            nn.BatchNorm2d(64),
            activation_layer(64, self.sparse_params["stem"], kernel_size=7),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
            # groups 1 to 4
            self._make_group(
                block, 64, num_blocks[0], self.sparse_params["filters64"], stride=1
            ),
            self._make_group(
                block, 128, num_blocks[1], self.sparse_params["filters128"], stride=2
            ),
            self._make_group(
                block, 256, num_blocks[2], self.sparse_params["filters256"], stride=2
            ),
            self._make_group(
                block, 512, num_blocks[3], self.sparse_params["filters512"], stride=2
            ),
            nn.AdaptiveAvgPool2d(1),
            Flatten(),
        )

        # last output layer
        self.classifier = linear_layer(
            512 * block.expansion,
            self.num_classes,
            self.sparse_params["linear"],
            self.linear_sparse_weights_type,
        )
    def __init__(self, config=None):

        config = config or {}
        defaults = dict(
            input_size=(1, 32, 32),
            l0_strength=7e-4,
            l2_strength=0,
            droprate_init=0.5,
            temperature=2 / 3,
            learn_weight=True,
            num_classes=12,
            cnn_out_channels=(64, 64),
            kernel_size=5,
            linear_units=1000,
            maxpool_stride=2,
        )
        new_defaults = {
            k: (config.get(k, None) or v)
            for k, v in defaults.items()
        }
        self.__dict__.update(new_defaults)

        feature_map_sidelength = ((
            ((self.input_size[1] - self.kernel_size + 1) / self.maxpool_stride)
            - self.kernel_size + 1) / self.maxpool_stride)
        assert (feature_map_sidelength == int(feature_map_sidelength))
        feature_map_sidelength = int(feature_map_sidelength)

        l0_strengths = [self.l0_strength] * 4

        super().__init__(
            OrderedDict([

                # -------------
                # Conv Block
                # -------------
                ("cnn1",
                 HardConcreteGatedConv2d(self.input_size[0],
                                         self.cnn_out_channels[0],
                                         self.kernel_size,
                                         droprate_init=self.droprate_init,
                                         temperature=self.temperature,
                                         l2_strength=self.l2_strength,
                                         l0_strength=l0_strengths[0],
                                         learn_weight=self.learn_weight)),
                ("cnn1_bn",
                 nn.BatchNorm2d(self.cnn_out_channels[0], affine=False)),
                ("cnn1_maxpool", nn.MaxPool2d(self.maxpool_stride)),
                ("cnn1_relu", nn.ReLU()),

                # -------------
                # Conv Block
                # -------------
                ("cnn2",
                 HardConcreteGatedConv2d(self.cnn_out_channels[0],
                                         self.cnn_out_channels[1],
                                         self.kernel_size,
                                         droprate_init=self.droprate_init,
                                         temperature=self.temperature,
                                         l2_strength=self.l2_strength,
                                         l0_strength=l0_strengths[1],
                                         learn_weight=self.learn_weight)),
                ("cnn2_bn",
                 nn.BatchNorm2d(self.cnn_out_channels[1], affine=False)),
                ("cnn2_maxpool", nn.MaxPool2d(self.maxpool_stride)),
                ("cnn2_relu", nn.ReLU()),
                ("flatten", Flatten()),

                # -------------
                # Linear Block
                # -------------
                ("fc1",
                 HardConcreteGatedLinear(
                     (feature_map_sidelength**2) * self.cnn_out_channels[1],
                     self.linear_units,
                     droprate_init=self.droprate_init,
                     l2_strength=self.l2_strength,
                     l0_strength=l0_strengths[2],
                     temperature=self.temperature,
                     learn_weight=self.learn_weight)),
                ("fc1_bn", nn.BatchNorm1d(self.linear_units, affine=False)),
                ("fc1_relu", nn.ReLU()),

                # -------------
                # Output Layer
                # -------------
                ("fc2",
                 HardConcreteGatedLinear(self.linear_units,
                                         self.num_classes,
                                         droprate_init=self.droprate_init,
                                         l2_strength=self.l2_strength,
                                         l0_strength=l0_strengths[3],
                                         temperature=self.temperature,
                                         learn_weight=self.learn_weight)),
            ]))
示例#24
0
    def __init__(self, num_classes=1001, width_mult=1.0):
        """Inspired by https://github.com/kuangliu/pytorch-
        cifar/blob/master/models/mobilenet.py.

        :param num_classes: Number of output classes (10 for CIFAR10)
        :param width_mult: Width multiplier, used to thin the network
        """
        super(MobileNetV1, self).__init__()

        # Check for CIFAR10
        if num_classes == 10:
            first_stride = 1
            avgpool_size = 2
        else:
            first_stride = 2
            avgpool_size = 7

        # First 3x3 convolution layer
        self.conv = nn.Sequential(
            nn.Conv2d(
                in_channels=3,
                out_channels=int(32 * width_mult),
                kernel_size=3,
                stride=first_stride,
                padding=1,
                bias=False,
            ),
            nn.BatchNorm2d(int(32 * width_mult)),
            nn.ReLU(True),
        )

        # Depthwise Separable Convolution layers
        self.deepwise = nn.Sequential(
            separable_convolution2d(
                in_channels=32, out_channels=64, stride=1, width_mult=width_mult
            ),
            separable_convolution2d(
                in_channels=64, out_channels=128, stride=2, width_mult=width_mult
            ),
            separable_convolution2d(
                in_channels=128, out_channels=128, stride=1, width_mult=width_mult
            ),
            separable_convolution2d(
                in_channels=128, out_channels=256, stride=2, width_mult=width_mult
            ),
            separable_convolution2d(
                in_channels=256, out_channels=256, stride=1, width_mult=width_mult
            ),
            separable_convolution2d(
                in_channels=256, out_channels=512, stride=2, width_mult=width_mult
            ),
            separable_convolution2d(
                in_channels=512, out_channels=512, stride=1, width_mult=width_mult
            ),
            separable_convolution2d(
                in_channels=512, out_channels=512, stride=1, width_mult=width_mult
            ),
            separable_convolution2d(
                in_channels=512, out_channels=512, stride=1, width_mult=width_mult
            ),
            separable_convolution2d(
                in_channels=512, out_channels=512, stride=1, width_mult=width_mult
            ),
            separable_convolution2d(
                in_channels=512, out_channels=512, stride=1, width_mult=width_mult
            ),
            separable_convolution2d(
                in_channels=512, out_channels=1024, stride=2, width_mult=width_mult
            ),
            separable_convolution2d(
                in_channels=1024, out_channels=1024, stride=1, width_mult=width_mult
            ),
        )

        # Classifier
        self.classifier = nn.Sequential(
            nn.AvgPool2d(avgpool_size),
            Flatten(),
            nn.Linear(in_features=int(1024 * width_mult), out_features=num_classes),
        )
    def _setup(self, config):
        l0_strength = config["l0_strength"]
        l2_strength = config["l2_strength"]

        data_path = os.path.expanduser("~/nta/datasets")
        batch_size = 100
        transform = transforms.Compose([transforms.ToTensor()])
        self.train_loader = torch.utils.data.DataLoader(
            datasets.MNIST(data_path,
                           train=True,
                           download=True,
                           transform=transform),
            batch_size=batch_size,
            shuffle=True,
            num_workers=4,
            pin_memory=torch.cuda.is_available())
        self.val_loader = torch.utils.data.DataLoader(
            datasets.MNIST(data_path, train=False, transform=transform),
            batch_size=batch_size,
            num_workers=4,
            pin_memory=torch.cuda.is_available())
        num_classes = 10
        input_size = (1, 28, 28)

        conv_dims = (20, 50)
        fc_dims = 500

        l0_strengths = (l0_strength, l0_strength, l0_strength, l0_strength)

        kernel_sidelength = 5
        maxpool_stride = 2
        feature_map_sidelength = (((
            (input_size[1] - kernel_sidelength + 1) / maxpool_stride) -
                                   kernel_sidelength + 1) / maxpool_stride)
        assert (feature_map_sidelength == int(feature_map_sidelength))
        feature_map_sidelength = int(feature_map_sidelength)

        model_type = config["model_type"]
        learn_weight = config["learn_weight"]
        if model_type == "HardConcrete":
            temperature = 2 / 3
            self.model = nn.Sequential(
                OrderedDict([
                    ("cnn1",
                     HardConcreteGatedConv2d(input_size[0],
                                             conv_dims[0],
                                             kernel_sidelength,
                                             droprate_init=0.5,
                                             temperature=temperature,
                                             l2_strength=l2_strength,
                                             l0_strength=l0_strengths[0],
                                             learn_weight=learn_weight)),
                    ("cnn1_relu", nn.ReLU()),
                    ("cnn1_maxpool", nn.MaxPool2d(maxpool_stride)),
                    ("cnn2",
                     HardConcreteGatedConv2d(conv_dims[0],
                                             conv_dims[1],
                                             kernel_sidelength,
                                             droprate_init=0.5,
                                             temperature=temperature,
                                             l2_strength=l2_strength,
                                             l0_strength=l0_strengths[1],
                                             learn_weight=learn_weight)),
                    ("cnn2_relu", nn.ReLU()),
                    ("cnn2_maxpool", nn.MaxPool2d(maxpool_stride)),
                    ("flatten", Flatten()),
                    ("fc1",
                     HardConcreteGatedLinear(
                         (feature_map_sidelength**2) * conv_dims[1],
                         fc_dims,
                         droprate_init=0.5,
                         l2_strength=l2_strength,
                         l0_strength=l0_strengths[2],
                         temperature=temperature,
                         learn_weight=learn_weight)),
                    ("fc1_relu", nn.ReLU()),
                    ("fc2",
                     HardConcreteGatedLinear(fc_dims,
                                             num_classes,
                                             droprate_init=0.5,
                                             l2_strength=l2_strength,
                                             l0_strength=l0_strengths[3],
                                             temperature=temperature,
                                             learn_weight=learn_weight)),
                ]))
        elif model_type == "Binary":
            self.model = nn.Sequential(
                OrderedDict([
                    ("cnn1",
                     BinaryGatedConv2d(input_size[0],
                                       conv_dims[0],
                                       kernel_sidelength,
                                       droprate_init=0.5,
                                       l2_strength=l2_strength,
                                       l0_strength=l0_strengths[0],
                                       learn_weight=learn_weight)),
                    ("cnn1_relu", nn.ReLU()),
                    ("cnn1_maxpool", nn.MaxPool2d(maxpool_stride)),
                    ("cnn2",
                     BinaryGatedConv2d(conv_dims[0],
                                       conv_dims[1],
                                       kernel_sidelength,
                                       droprate_init=0.5,
                                       l2_strength=l2_strength,
                                       l0_strength=l0_strengths[1],
                                       learn_weight=learn_weight)),
                    ("cnn2_relu", nn.ReLU()),
                    ("cnn2_maxpool", nn.MaxPool2d(maxpool_stride)),
                    ("flatten", Flatten()),
                    ("fc1",
                     BinaryGatedLinear(
                         (feature_map_sidelength**2) * conv_dims[1],
                         fc_dims,
                         droprate_init=0.5,
                         l2_strength=l2_strength,
                         l0_strength=l0_strengths[2],
                         learn_weight=learn_weight)),
                    ("fc1_relu", nn.ReLU()),
                    ("fc2",
                     BinaryGatedLinear(fc_dims,
                                       num_classes,
                                       droprate_init=0.5,
                                       l2_strength=l2_strength,
                                       l0_strength=l0_strengths[3],
                                       learn_weight=learn_weight)),
                ]))
        else:
            raise ValueError("Unrecognized model type: {}".format(model_type))

        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model.to(device)
        self.device = device

        self.loglike = nn.CrossEntropyLoss().to(self.device)
        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          config["lr"])
示例#26
0
    def __init__(
        self,
        cnn_out_channels=(64, 64),
        cnn_percent_on=(0.095, 0.125),
        cnn_weight_sparsity=None,
        linear_units=1000,
        linear_percent_on=0.1,
        linear_weight_sparsity=None,
        temperature=10.0,
        eval_temperature=1.0,
        temperature_decay_rate=0.99,
        k_inference_factor=1.0,
        cnn_sparsity=(0.5, 0.8),
        linear_sparsity=0.9,
    ):
        super(SampledKWinnerGSCSparseCNN, self).__init__()

        if cnn_weight_sparsity is not None:
            warnings.warn(
                "Parameter `cnn_weight_sparsity` is deprecated. Use "
                "`cnn_sparsity` instead.",
                DeprecationWarning,
            )
            cnn_sparsity = (1.0 - cnn_weight_sparsity[0], 1.0 - cnn_weight_sparsity[1])

        if linear_weight_sparsity is not None:
            warnings.warn(
                "Parameter `linear_weight_sparsity` is deprecated. Use "
                "`linear_sparsity` instead.",
                DeprecationWarning,
            )
            linear_sparsity = 1.0 - linear_weight_sparsity

        # input_shape = (1, 32, 32)
        # First Sparse CNN layer
        if cnn_sparsity[0] > 0:
            self.add_module(
                "cnn1",
                SparseWeights2d(
                    nn.Conv2d(1, cnn_out_channels[0], 5), sparsity=cnn_sparsity[0]
                ),
            )
        else:
            self.add_module("cnn1", nn.Conv2d(1, cnn_out_channels[0], 5))
        self.add_module(
            "cnn1_batchnorm", nn.BatchNorm2d(cnn_out_channels[0], affine=False)
        )
        self.add_module(
            "cnn1_kwinner",
            SampledKWinners2d(
                percent_on=cnn_percent_on[0],
                k_inference_factor=k_inference_factor,
                temperature=temperature,
                eval_temperature=eval_temperature,
                temperature_decay_rate=temperature_decay_rate,
                relu=False,
            ),
        )
        self.add_module("cnn1_maxpool", nn.MaxPool2d(2))

        # Second Sparse CNN layer
        if cnn_sparsity[1] > 0:
            self.add_module(
                "cnn2",
                SparseWeights2d(
                    nn.Conv2d(cnn_out_channels[0], cnn_out_channels[1], 5),
                    sparsity=cnn_sparsity[1],
                ),
            )
        else:
            self.add_module(
                "cnn2", nn.Conv2d(cnn_out_channels[0], cnn_out_channels[1], 5)
            )
        self.add_module(
            "cnn2_batchnorm", nn.BatchNorm2d(cnn_out_channels[1], affine=False)
        )
        self.add_module(
            "cnn2_kwinner",
            SampledKWinners2d(
                percent_on=cnn_percent_on[0],
                k_inference_factor=k_inference_factor,
                temperature=temperature,
                eval_temperature=eval_temperature,
                temperature_decay_rate=temperature_decay_rate,
                relu=False,
            ),
        )
        self.add_module("cnn2_maxpool", nn.MaxPool2d(2))

        self.add_module("flatten", Flatten())

        # Sparse Linear layer
        self.add_module(
            "linear",
            SparseWeights(
                nn.Linear(25 * cnn_out_channels[1], linear_units),
                sparsity=linear_sparsity,
            ),
        )
        self.add_module("linear_bn", nn.BatchNorm1d(linear_units, affine=False))
        self.add_module(
            "linear_kwinner",
            SampledKWinners(
                percent_on=linear_percent_on,
                k_inference_factor=k_inference_factor,
                temperature=temperature,
                eval_temperature=eval_temperature,
                temperature_decay_rate=temperature_decay_rate,
                relu=False,
            ),
        )

        # Classifier
        self.add_module("output", nn.Linear(linear_units, 12))
        self.add_module("softmax", nn.LogSoftmax(dim=1))
示例#27
0
    def __init__(self,
                 cnn_out_channels=(64, 64),
                 cnn_percent_on=(0.095, 0.125),
                 cnn_weight_sparsity=(0.5, 0.2),
                 linear_units=1000,
                 linear_percent_on=0.1,
                 linear_weight_sparsity=0.1,
                 boost_strength=1.5,
                 boost_strength_factor=0.9,
                 k_inference_factor=1.0,
                 duty_cycle_period=1000,
                 kwinner_local=False):
        super(GSCSparseCNN, self).__init__()
        # input_shape = (1, 32, 32)
        # First Sparse CNN layer
        if cnn_weight_sparsity[0] < 1.0:
            self.add_module(
                "cnn1",
                SparseWeights2d(nn.Conv2d(1, cnn_out_channels[0], 5),
                                weight_sparsity=cnn_weight_sparsity[0]))
        else:
            self.add_module("cnn1", nn.Conv2d(1, cnn_out_channels[0], 5))
        self.add_module("cnn1_batchnorm",
                        nn.BatchNorm2d(cnn_out_channels[0], affine=False))
        self.add_module(
            "cnn1_kwinner",
            KWinners2d(
                channels=cnn_out_channels[0],
                percent_on=cnn_percent_on[0],
                k_inference_factor=k_inference_factor,
                boost_strength=boost_strength,
                boost_strength_factor=boost_strength_factor,
                duty_cycle_period=duty_cycle_period,
                local=kwinner_local,
            ))
        self.add_module("cnn1_maxpool", nn.MaxPool2d(2))

        # Second Sparse CNN layer
        if cnn_weight_sparsity[1] < 1.0:
            self.add_module(
                "cnn2",
                SparseWeights2d(nn.Conv2d(cnn_out_channels[0],
                                          cnn_out_channels[1], 5),
                                weight_sparsity=cnn_weight_sparsity[1]))
        else:
            self.add_module(
                "cnn2", nn.Conv2d(cnn_out_channels[0], cnn_out_channels[1], 5))
        self.add_module("cnn2_batchnorm",
                        nn.BatchNorm2d(cnn_out_channels[1], affine=False))
        self.add_module(
            "cnn2_kwinner",
            KWinners2d(
                channels=cnn_out_channels[1],
                percent_on=cnn_percent_on[1],
                k_inference_factor=k_inference_factor,
                boost_strength=boost_strength,
                boost_strength_factor=boost_strength_factor,
                duty_cycle_period=duty_cycle_period,
                local=kwinner_local,
            ))
        self.add_module("cnn2_maxpool", nn.MaxPool2d(2))

        self.add_module("flatten", Flatten())

        # Sparse Linear layer
        self.add_module(
            "linear",
            SparseWeights(nn.Linear(25 * cnn_out_channels[1], linear_units),
                          weight_sparsity=linear_weight_sparsity))
        self.add_module("linear_bn", nn.BatchNorm1d(linear_units,
                                                    affine=False))
        self.add_module(
            "linear_kwinner",
            KWinners(n=linear_units,
                     percent_on=linear_percent_on,
                     k_inference_factor=k_inference_factor,
                     boost_strength=boost_strength,
                     boost_strength_factor=boost_strength_factor,
                     duty_cycle_period=duty_cycle_period))

        # Classifier
        self.add_module("output", nn.Linear(linear_units, 12))
        self.add_module("softmax", nn.LogSoftmax(dim=1))
    def __init__(self, config):
        """Called once at the beginning of each experiment."""
        self.start_time = time.time()
        self.logger = get_logger(config["name"], config.get("verbose", 2))
        self.logger.debug("Config: %s", config)

        # Setup random seed
        seed = config["seed"]
        set_random_seed(seed)

        # Get our directories correct
        self.data_dir = config["data_dir"]

        # Configure Model
        self.model_type = config["model_type"]
        self.num_classes = 12
        self.log_interval = config["log_interval"]
        self.batches_in_epoch = config["batches_in_epoch"]
        self.batch_size = config["batch_size"]
        self.background_noise_dir = config["background_noise_dir"]
        self.noise_values = [0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]
        cnn_input_shape = config.get("cnn_input_shape", (1, 32, 32))
        linear_n = config["linear_n"]
        linear_percent_on = config["linear_percent_on"]
        cnn_out_channels = config["cnn_out_channels"]
        cnn_percent_on = config["cnn_percent_on"]
        boost_strength = config["boost_strength"]
        weight_sparsity = config["weight_sparsity"]
        cnn_weight_sparsity = config["cnn_weight_sparsity"]
        boost_strength_factor = config["boost_strength_factor"]
        k_inference_factor = config["k_inference_factor"]
        use_batch_norm = config["use_batch_norm"]
        dropout = config.get("dropout", 0.0)

        self.load_datasets()

        model = nn.Sequential()

        if self.model_type == "cnn":
            # Add CNN Layers
            input_shape = cnn_input_shape
            cnn_layers = len(cnn_out_channels)
            if cnn_layers > 0:
                for i in range(cnn_layers):
                    in_channels, height, width = input_shape
                    add_sparse_cnn_layer(
                        network=model,
                        suffix=i + 1,
                        in_channels=in_channels,
                        out_channels=cnn_out_channels[i],
                        use_batch_norm=use_batch_norm,
                        weight_sparsity=cnn_weight_sparsity,
                        percent_on=cnn_percent_on[i],
                        k_inference_factor=k_inference_factor,
                        boost_strength=boost_strength,
                        boost_strength_factor=boost_strength_factor,
                    )

                    # Feed this layer output into next layer input
                    in_channels = cnn_out_channels[i]

                    # Compute next layer input shape
                    wout = (width - 5) + 1
                    maxpool_width = wout // 2
                    input_shape = (in_channels, maxpool_width, maxpool_width)

            # Flatten CNN output before passing to linear layer
            model.add_module("flatten", Flatten())

            # Add Linear layers
            input_size = np.prod(input_shape)
            for i in range(len(linear_n)):
                add_sparse_linear_layer(
                    network=model,
                    suffix=i + 1,
                    input_size=input_size,
                    linear_n=linear_n[i],
                    dropout=dropout,
                    use_batch_norm=use_batch_norm,
                    weight_sparsity=weight_sparsity,
                    percent_on=linear_percent_on[i],
                    k_inference_factor=k_inference_factor,
                    boost_strength=boost_strength,
                    boost_strength_factor=boost_strength_factor,
                )
                input_size = linear_n[i]

            # Output layer
            model.add_module(
                "output", nn.Linear(input_size, self.num_classes)
            )
            model.add_module("softmax", nn.LogSoftmax(dim=1))

        elif self.model_type == "resnet9":
            model = resnet9(
                num_classes=self.num_classes, in_channels=1
            )

        elif self.model_type == "gsc_sparse_cnn":
            model = GSCSparseCNN()

        elif self.model_type == "gsc_super_sparse_cnn":
            model = GSCSuperSparseCNN()

        else:
            raise RuntimeError("Unknown model type")

        self.use_cuda = torch.cuda.is_available()
        self.logger.debug("use_cuda %s", self.use_cuda)
        if self.use_cuda:
            self.device = torch.device("cuda")
            model = model.cuda()
        else:
            self.device = torch.device("cpu")

        self.logger.debug("device %s", self.device)
        if torch.cuda.device_count() > 1:
            self.logger.debug("Using %s GPUs", torch.cuda.device_count())
            model = torch.nn.DataParallel(model)

        self.model = model
        self.logger.debug("Model: %s", self.model)
        self.learning_rate = config["learning_rate"]
        self.optimizer = self.create_optimizer(config, self.model)
        self.lr_scheduler = self.create_learning_rate_scheduler(config, self.optimizer)
示例#29
0
    def _create_vgg_model(self):
        """
        block_sizes = [1,1,1] - number of CNN layers in each block
        cnn_out_channels = [c1, c2, c3] - # out_channels in each layer of this block
        cnn_kernel_size = [k1, k2, k3] - kernel_size in each layer of this block
        cnn_weight_sparsity = [w1, w2, w3] - weight sparsity of each layer of this block
        cnn_percent_on = [p1, p2, p3] - percent_on in each layer of this block
        """
        # Here we require exactly 3 blocks
        # assert(len(self.block_sizes) == 3)

        # Create simple CNN model, with options for sparsity
        self.model = nn.Sequential()

        in_channels = 3
        output_size = 32 * 32
        output_units = output_size * in_channels
        for ly, block_size in enumerate(self.block_sizes):
            for b in range(block_size):
                self._add_cnn_layer(
                    index_str=str(ly) + "_" + str(b),
                    in_channels=in_channels,
                    out_channels=self.cnn_out_channels[ly],
                    kernel_size=self.cnn_kernel_sizes[ly],
                    percent_on=self.cnn_percent_on[ly],
                    weight_sparsity=self.cnn_weight_sparsity[ly],
                    add_pooling=b == block_size - 1,
                )
                in_channels = self.cnn_out_channels[ly]
            output_size = int(output_size / 4)
            output_units = output_size * in_channels

        # Flatten CNN output before passing to linear layer
        self.model.add_module("flatten", Flatten())

        # Linear layer
        input_size = output_units
        for ly, linear_n in enumerate(self.linear_n):
            linear = nn.Linear(input_size, linear_n)
            if self.linear_weight_sparsity[ly] < 1.0:
                self.model.add_module(
                    "linear_" + str(ly),
                    SparseWeights(linear, self.linear_weight_sparsity[ly]),
                )
            else:
                self.model.add_module("linear_" + str(ly), linear)

            if self.linear_percent_on[ly] < 1.0:
                self.model.add_module(
                    "kwinners_linear_" + str(ly),
                    KWinners(
                        n=linear_n,
                        percent_on=self.linear_percent_on[ly],
                        k_inference_factor=self.k_inference_factor,
                        boost_strength=self.boost_strength,
                        boost_strength_factor=self.boost_strength_factor,
                    ),
                )
            else:
                self.model.add_module("Linear_ReLU_" + str(ly), nn.ReLU())

            input_size = self.linear_n[ly]

        # Output layer
        self.model.add_module("output", nn.Linear(input_size,
                                                  self.output_size))

        print(self.model)

        self.model.to(self.device)

        self._initialize_weights()
示例#30
0
    def _setup(self, config):

        # Get trial parameters
        seed = config["seed"]
        datadir = config["datadir"]
        batch_size = config["batch_size"]
        test_batch_size = config["test_batch_size"]
        first_epoch_batch_size = config["first_epoch_batch_size"]
        in_channels, h, w = config["c1_input_shape"]
        learning_rate = config["learning_rate"]
        momentum = config["momentum"]
        weight_sparsity = config["weight_sparsity"]
        boost_strength = config["boost_strength"]
        boost_strength_factor = config["boost_strength_factor"]
        n = config["n"]
        percent_on = config["percent_on"]
        cnn_percent_on = config["cnn_percent_on"]
        k_inference_factor = config["k_inference_factor"]
        kernel_size = config["kernel_size"]
        out_channels = config["out_channels"]
        output_size = config["output_size"]
        cnn_output_len = out_channels * ((w - kernel_size + 1) // 2)**2

        torch.manual_seed(seed)
        if torch.cuda.is_available():
            self.device = torch.device("cuda")
            torch.cuda.manual_seed(seed)
        else:
            self.device = torch.device("cpu")

        xforms = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])
        train_dataset = datasets.MNIST(datadir, train=True, transform=xforms)
        test_dataset = datasets.MNIST(datadir, train=False, transform=xforms)

        self.train_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_size=batch_size,
                                                        shuffle=True)
        self.test_loader = torch.utils.data.DataLoader(
            test_dataset, batch_size=test_batch_size, shuffle=True)
        self.first_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=first_epoch_batch_size, shuffle=True)

        # Create simple sparse model
        self.model = nn.Sequential()

        # CNN layer
        self.model.add_module(
            "cnn",
            nn.Conv2d(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=kernel_size,
            ),
        )

        if cnn_percent_on < 1.0:
            self.model.add_module(
                "kwinners_cnn",
                KWinners2d(
                    percent_on=cnn_percent_on,
                    channels=out_channels,
                    k_inference_factor=k_inference_factor,
                    boost_strength=boost_strength,
                    boost_strength_factor=boost_strength_factor,
                ),
            )
        else:
            self.model.add_module("ReLU_cnn", nn.ReLU())

        self.model.add_module("maxpool", nn.MaxPool2d(kernel_size=2))

        # Flatten max pool output before passing to linear layer
        self.model.add_module("flatten", Flatten())

        # Linear layer
        linear = nn.Linear(cnn_output_len, n)
        if weight_sparsity < 1.0:
            self.model.add_module("sparse_linear",
                                  SparseWeights(linear, weight_sparsity))
        else:
            self.model.add_module("linear", linear)

        if percent_on < 1.0:
            self.model.add_module(
                "kwinners_kinear",
                KWinners(
                    n=n,
                    percent_on=percent_on,
                    k_inference_factor=k_inference_factor,
                    boost_strength=boost_strength,
                    boost_strength_factor=boost_strength_factor,
                ),
            )
        else:
            self.model.add_module("Linear_ReLU", nn.ReLU())

        # Output layer
        self.model.add_module("fc", nn.Linear(n, output_size))
        self.model.add_module("softmax", nn.LogSoftmax(dim=1))

        self.model.to(self.device)
        self.optimizer = optim.SGD(self.model.parameters(),
                                   lr=learning_rate,
                                   momentum=momentum)