def __init__(self, initial_sparsity=0.98, sparse=True, no_batch_norm=False): super(mnist_mlp, self).__init__() self.fc1 = DynamicLinear(784, 300, initial_sparsity, bias=no_batch_norm, sparse=sparse) self.fc_int = DynamicLinear(300, 100, initial_sparsity, bias=no_batch_norm, sparse=sparse) #self.fc2 = DynamicLinear(100, 10, min(0.5,initial_sparsity),bias = False,sparse = sparse) self.fc2 = DynamicLinear(100, 10, initial_sparsity, bias=no_batch_norm, sparse=sparse) if no_batch_norm: self.bn1 = lambda x: x self.bn2 = lambda x: x self.bn3 = lambda x: x else: self.bn1 = nn.BatchNorm1d(300) self.bn2 = nn.BatchNorm1d(100) self.bn3 = nn.BatchNorm1d(10)
def __init__(self, num_classes=10, depth=19, init_weights=True, cfg=None, affine=True, batchnorm=True, initial_sparsity_conv=0.95, initial_sparsity_fc=0.95, sub_kernel_granularity=4, sparse=True): super(VGG, self).__init__() self.initial_sparsity_conv = initial_sparsity_conv self.initial_sparsity_fc = initial_sparsity_fc self.sparse = sparse self.sub_kernel_granularity = sub_kernel_granularity if cfg is None: cfg = defaultcfg[depth] self._AFFINE = affine self.feature = self.make_layers(cfg, batchnorm) self.num_classes = num_classes #self.classifier = nn.Linear(cfg[-1], num_classes) self.classifier = DynamicLinear(cfg[-1], num_classes, initial_sparsity = self.initial_sparsity_fc,sparse = self.sparse)
def __init__(self, block, num_blocks, num_classes=10, widen_factor=1, initial_sparsity_conv=0.95, initial_sparsity_fc=0.95, sub_kernel_granularity=4, sparse=True): super(ResNet32, self).__init__() _outputs = [32, 64, 128] self.widen_factor = widen_factor self.initial_sparsity_conv = initial_sparsity_conv self.initial_sparsity_fc = initial_sparsity_fc self.sub_kernel_granularity = sub_kernel_granularity self.sparse = sparse self.in_planes = _outputs[0] self.conv1 = nn.Conv2d(3, _outputs[0], kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(_outputs[0]) self.layer1 = self._make_layer(block, _outputs[0], num_blocks[0], stride=1) self.layer2 = self._make_layer(block, _outputs[1], num_blocks[1], stride=2) self.layer3 = self._make_layer(block, _outputs[2], num_blocks[2], stride=2) #self.linear = nn.Linear(_outputs[2], num_classes) self.linear = DynamicLinear(_outputs[2], num_classes,initial_sparsity = self.initial_sparsity_fc,sparse = self.sparse)
def __init__(self, block, layers, num_classes=1000,widen_factor = 1,vanilla_downsample = True,vanilla_conv1 = True,vanilla_conv3 = True, initial_sparsity_conv = 0.5,initial_sparsity_fc = 0.95,sub_kernel_granularity = 4,sparse = True): self.inplanes = np.round(64 * widen_factor).astype('int32') super(ResNet, self).__init__() self.widen_factor = widen_factor self.vanilla_conv1 = vanilla_conv1 self.vanilla_conv3 = vanilla_conv3 self.vanilla_downsample = vanilla_downsample self.initial_sparsity_conv = initial_sparsity_conv self.initial_sparsity_fc = initial_sparsity_fc self.sub_kernel_granularity = sub_kernel_granularity self.sparse = sparse self.conv1 = nn.Conv2d(3, np.round(64 * widen_factor).astype('int32'), kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(np.round(64 * widen_factor).astype('int32')) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, np.round(64 * widen_factor).astype('int32'), layers[0]) self.layer2 = self._make_layer(block, np.round(64 * widen_factor).astype('int32')*2, layers[1], stride=2) self.layer3 = self._make_layer(block, np.round(64 * widen_factor).astype('int32')*4, layers[2], stride=2) self.layer4 = self._make_layer(block, np.round(64 * widen_factor).astype('int32')*8, layers[3], stride=2) self.avgpool = nn.AvgPool2d(7, stride=1) self.fc = DynamicLinear(np.round(64 * widen_factor).astype('int32') * block.expansion * 8, num_classes,initial_sparsity = self.initial_sparsity_fc,sparse = sparse) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, DynamicConv2d): n = m.kernel_size * m.kernel_size * m.n_output_maps if m.sparse: m.d_tensor.s_tensor.data.normal_(0, math.sqrt(2. / n)) else: m.d_tensor.bank.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()