def model(): model = MetaSequential( MetaLinear(2, 3, bias=True), nn.ReLU(), MetaLinear(3, 1, bias=False)) return model
def __init__(self): super(MIL, self).__init__() h1, w1 = 200, 200 h2, w2 = calcHW(h1, w1, kernel_size=8, stride=2) h3, w3 = calcHW(h2, w2, kernel_size=4, stride=2) h4, w4 = calcHW(h3, w3, kernel_size=4, stride=2) self.features = MetaSequential( MetaConv2d(in_channels=3, out_channels=32, kernel_size=8, stride=2), MetaLayerNorm([32, h2, w2]), nn.ReLU(), MetaConv2d(in_channels=32, out_channels=64, kernel_size=4, stride=2), MetaLayerNorm([64, h3, w3]), nn.ReLU(), MetaConv2d(in_channels=64, out_channels=64, kernel_size=4, stride=2), MetaLayerNorm([64, h4, w4]), nn.ReLU(), SpatialSoftmax(h4, w4)) self.policy = MetaSequential( MetaLinear(2 * 64 + 3, 128), nn.ReLU(), MetaLinear(128, 128), nn.ReLU(), MetaLinear(128, 128), nn.ReLU(), MetaLinear(128, 4), )
def __init__(self, out_features, in_channels=1, hidden_size=32, mid_feats=256, feature_size=25088): super(MetaMNISTConvModel, self).__init__() self.in_channels = in_channels self.out_features = out_features self.hidden_size = hidden_size self.feature_size = feature_size self.mid_feats = mid_feats self.features = MetaSequential( OrderedDict([ ('layer1', conv_block(in_channels, hidden_size, kernel_size=3, stride=1, padding=1, bias=True)), ('layer2', conv_block(hidden_size, hidden_size, kernel_size=3, stride=1, padding=1, bias=True)), ])) self.classifier_first = MetaLinear(feature_size, mid_feats, bias=True) self.classifier = MetaLinear(mid_feats, out_features, bias=True)
def __init__(self, l_obs, n_action, l1=64, l2=64): super(MetaNet_PG, self).__init__() self.l_obs = l_obs self.n_action = n_action self.l1 = l1 self.l2 = l2 self.actor_net = MetaSequential( MetaLinear(self.l_obs, self.l1), nn.ReLU(), MetaLinear(self.l1, self.l2), nn.ReLU(), MetaLinear(self.l2, self.n_action), nn.Sigmoid(), MetaLinear(self.n_action, self.n_action))
def __init__(self, in_channels, hidden1_size=40, hidden2_size=80): super(RegressionNeuralNetwork, self).__init__() self.in_channels = in_channels self.hidden1_size = hidden1_size self.hidden2_size = hidden2_size self.regressor = MetaSequential(MetaLinear(in_channels, hidden1_size), nn.ReLU(), MetaLinear(hidden1_size, hidden2_size), nn.ReLU(), MetaLinear(hidden2_size, hidden1_size), nn.ReLU(), MetaLinear(hidden1_size, 1))
def __init__(self, in_features, out_features, hidden_sizes): super(MetaMLPModel, self).__init__() self.in_features = in_features self.out_features = out_features self.hidden_sizes = hidden_sizes layer_sizes = [in_features] + hidden_sizes self.features = MetaSequential(OrderedDict([('layer{0}'.format(i + 1), MetaSequential(OrderedDict([ ('linear', MetaLinear(hidden_size, layer_sizes[i + 1], bias=True)), ('relu', nn.ReLU()) ]))) for (i, hidden_size) in enumerate(layer_sizes[:-1])])) self.classifier = MetaLinear(hidden_sizes[-1], out_features, bias=True)
def test_ridge_regression_requires_grad(reg_lambda, use_woodbury, scale, bias): # Numpy num_classes = 3 embeddings_np = np.random.randn(5, 7).astype(np.float32) targets_np = np.random.randint(0, num_classes, size=(5, )) # PyTorch embeddings_th = torch.as_tensor(embeddings_np).requires_grad_() targets_th = torch.as_tensor(targets_np) model = MetaLinear(7, 3, bias=bias) solution = ridge_regression(embeddings_th, targets_th, reg_lambda, num_classes, use_woodbury=use_woodbury, scale=scale, bias=bias) params = OrderedDict([('weight', solution.weight)]) if bias: params['bias'] = solution.bias # Compute loss on test/query samples test_embeddings = torch.randn(11, 7) test_logits = model(test_embeddings, params=params) test_targets = torch.randint(num_classes, size=(11, )) loss = F.cross_entropy(test_logits, test_targets) # Backpropagation loss.backward() assert embeddings_th.grad is not None
def __init__(self, in_dim, out_dim, num_layers=3, hidden_size=64, nonlinearity="relu"): super(MultiLayerPerceptron, self).__init__() self.in_dim = in_dim self.hidden_size = hidden_size self.out_dim = out_dim if nonlinearity == "relu": self.activation = nn.ReLU elif nonlinearity == "swish": self.activation = swish elif nonlinearity == "sigmoid": self.activation = nn.sigmoid else: raise () self.layer_list = [ nn.Flatten(), nn.Linear(in_dim, hidden_size), self.activation() ] for _ in range(num_layers): self.layer_list.extend( [nn.Linear(hidden_size, hidden_size), self.activation()]) # Should be able to add variable layers self.features = MetaSequential(*self.layer_list) self.classifier = MetaLinear(hidden_size, out_dim)
def __init__(self, out_features, in_channels=1, hidden_size=64, feature_size=64): super(MetaToyConvModel, self).__init__() self.in_channels = in_channels self.out_features = out_features self.hidden_size = hidden_size self.feature_size = feature_size self.features = MetaSequential( OrderedDict([ ('layer1', conv_block(in_channels, hidden_size, kernel_size=3, stride=1, padding=1, bias=True)), ('layer2', conv_block(hidden_size, hidden_size, kernel_size=3, stride=1, padding=1, bias=True)), # ('layer3', conv_block(hidden_size, hidden_size, kernel_size=3, # stride=1, padding=1, bias=True)), # ('layer4', conv_block(hidden_size, hidden_size, kernel_size=3, # stride=1, padding=1, bias=True)) ])) self.classifier = MetaLinear(feature_size, out_features, bias=True)
def test_metasequential_params(): meta_model = MetaSequential( nn.Linear(2, 3, bias=True), nn.ReLU(), MetaLinear(3, 5, bias=True)) model = nn.Sequential( nn.Linear(2, 3, bias=True), nn.ReLU(), nn.Linear(3, 5, bias=True)) # Set same weights for both models (first layer) weight0 = torch.randn(3, 2) meta_model[0].weight.data.copy_(weight0) model[0].weight.data.copy_(weight0) bias0 = torch.randn(3) meta_model[0].bias.data.copy_(bias0) model[0].bias.data.copy_(bias0) params = OrderedDict() params['2.weight'] = torch.randn(5, 3) model[2].weight.data.copy_(params['2.weight']) params['2.bias'] = torch.randn(5) model[2].bias.data.copy_(params['2.bias']) inputs = torch.randn(5, 2) outputs_torchmeta = meta_model(inputs, params=params) outputs_nn = model(inputs) np.testing.assert_equal(outputs_torchmeta.detach().numpy(), outputs_nn.detach().numpy())
def __init__(self, in_channels, out_features, hidden_size=64, fc_in_size=None, conv_kernel=[3, 3, 3, 3], strides=None): super(ConvolutionalNeuralNetwork, self).__init__() self.in_channels = in_channels self.out_features = out_features self.hidden_size = hidden_size if strides is None: strides = [None] * len(conv_kernel) else: assert (len(strides) == len(conv_kernel)) self.features = MetaSequential( conv3x3(in_channels, hidden_size, conv_kernel[0], strides[0]), ) for k in range(1, len(conv_kernel)): self.features.add_module( 'block_' + str(k), conv3x3(hidden_size, hidden_size, conv_kernel[k], strides[k])) if fc_in_size is None: fc_in_size = hidden_size self.classifier = MetaLinear(fc_in_size, out_features)
def __init__(self, out_features=10, input_size=100, hidden_size1=300, hidden_size2=200): super(MLP, self).__init__() self.out_features = out_features self.features = MetaSequential(nn.Linear(input_size, hidden_size1), nn.ReLU(), nn.Linear(hidden_size1, hidden_size2), nn.ReLU(), nn.Linear(hidden_size2, hidden_size1), nn.ReLU(), nn.Linear(hidden_size1, hidden_size2), nn.ReLU(), nn.Linear(hidden_size2, hidden_size1), nn.ReLU(), nn.Linear(hidden_size1, hidden_size2), nn.ReLU(), nn.Linear(hidden_size2, hidden_size1), nn.ReLU()) self.classifier = MetaLinear(hidden_size1, out_features)
def __init__(self, backbone, in_features, num_ways): super(ConvolutionalNeuralNetwork, self).__init__() self.in_features = in_features self.num_ways = num_ways self.encoder = get_meta_backbone(backbone) self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) self.classifier = MetaLinear(in_features, num_ways) #1600
def __init__(self, nc, num_classes, block, num_blocks): super(ResNet, self).__init__() self.in_planes = 64 self.conv1 = MetaConv2d(nc, 64, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = MetaBatchNorm2d(64) self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) self.linear = MetaLinear(512 * block.expansion, num_classes)
def __init__(self, model_dict, dataset, device): super().__init__() self.dataset = dataset self.model_dict = model_dict if self.model_dict['name'] == 'resnet18': if self.model_dict['pretrained']: self.net = models.resnet18(pretrained=True) self.net.fc = nn.Linear(512, self.dataset.n_classes) else: self.net = models.resnet18(num_classes=self.dataset.n_classes) elif self.model_dict['name'] == 'resnet18_meta': if self.model_dict.get('pretrained', True): self.net = resnet_meta.resnet18(pretrained=True) self.net.fc = MetaLinear(512, self.dataset.n_classes) else: self.net = resnet_meta.resnet18( num_classes=self.dataset.n_classes) elif self.model_dict['name'] == 'resnet18_meta_2': self.net = resnet_meta_2.ResNet18(nc=3, nclasses=self.dataset.n_classes) elif self.model_dict['name'] == 'resnet18_meta_old': self.net = resnet_meta_old.ResNet18( nc=3, nclasses=self.dataset.n_classes) else: raise ValueError('network %s does not exist' % model_dict['name']) if (device.type == 'cuda'): self.net = DataParallel(self.net) self.net.to(device) # set optimizer self.opt_dict = model_dict['opt'] self.lr_init = self.opt_dict['lr'] if self.model_dict['opt']['name'] == 'sps': n_batches_per_epoch = 120 self.opt = sps.Sps(self.net.parameters(), n_batches_per_epoch=n_batches_per_epoch, c=0.5, adapt_flag='smooth_iter', eps=0, eta_max=None) else: self.opt = optim.SGD(self.net.parameters(), lr=self.opt_dict['lr'], momentum=self.opt_dict['momentum'], weight_decay=self.opt_dict['weight_decay']) # variables self.device = device
def __init__(self, in_channels, out_features, hidden_size=64): super(ConvolutionalNeuralNetwork, self).__init__() self.in_channels = in_channels self.out_features = out_features self.hidden_size = hidden_size self.features = MetaSequential(conv3x3(in_channels, hidden_size), conv3x3(hidden_size, hidden_size), conv3x3(hidden_size, hidden_size), conv3x3(hidden_size, hidden_size)) self.classifier = MetaLinear(hidden_size, out_features)
def __init__(self, blocks, keep_prob=1.0, avg_pool=False, drop_rate=0.0, dropblock_size=5, out_features=5, wh_size=None): self.inplanes = 3 super(ResNet, self).__init__() self.layer1 = self._make_layer(blocks[0], 64, stride=2, drop_rate=drop_rate, drop_block=True, block_size=dropblock_size) self.layer2 = self._make_layer(blocks[1], 128, stride=2, drop_rate=drop_rate, drop_block=True, block_size=dropblock_size) self.layer3 = self._make_layer(blocks[2], 256, stride=2, drop_rate=drop_rate, drop_block=True, block_size=dropblock_size) self.layer4 = self._make_layer(blocks[3], 512, stride=2, drop_rate=drop_rate, drop_block=True, block_size=dropblock_size) if avg_pool: self.avgpool = nn.AdaptiveAvgPool2d(1) self.keep_prob = keep_prob self.keep_avg_pool = avg_pool self.dropout = nn.Dropout(p=1 - self.keep_prob, inplace=False) self.drop_rate = drop_rate self.classifier = MetaLinear(512 * wh_size * wh_size, out_features) for m in self.modules(): if isinstance(m, MetaConv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='leaky_relu') elif isinstance(m, MetaBatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0)
def __init__(self, in_channels, out_features, hidden_size=64): super(ConvolutionalNeuralNetwork, self).__init__() self.in_channels = in_channels self.out_features = out_features self.hidden_size = hidden_size self.features = nn.Sequential(conv3x3(in_channels, hidden_size), conv3x3(hidden_size, hidden_size), conv3x3(hidden_size, hidden_size), conv3x3(hidden_size, hidden_size)) # Only the last (linear) layer is used for adaptation in ANIL self.classifier = MetaLinear(hidden_size, out_features)
def test_ridge_regression(reg_lambda, use_woodbury, scale, bias): # Numpy num_classes = 3 embeddings_np = np.random.randn(5, 7).astype(np.float32) targets_np = np.random.randint(0, num_classes, size=(5, )) # PyTorch embeddings_th = torch.as_tensor(embeddings_np) targets_th = torch.as_tensor(targets_np) model = MetaLinear(7, 3, bias=bias) solution = ridge_regression(embeddings_th, targets_th, reg_lambda, num_classes, use_woodbury=use_woodbury, scale=scale, bias=bias) assert solution.weight.shape == (3, 7) if bias: assert solution.bias is not None assert solution.bias.shape == (3, ) else: assert solution.bias is None # Optimality criterion # Check if the gradient of the L2-regularized MSE at the solution # is close to 0 params = OrderedDict([('weight', solution.weight.requires_grad_())]) if bias: params['bias'] = solution.bias.requires_grad_() logits = model(embeddings_th, params=params) targets_binary = F.one_hot(targets_th, num_classes=num_classes).float() # Least-square loss = F.mse_loss(logits, targets_binary, reduction='sum') if scale: loss /= embeddings_th.size(0) # L2-regularization loss += reg_lambda * torch.sum(solution.weight**2) if bias: loss += reg_lambda * torch.sum(solution.bias**2) loss.backward() np.testing.assert_allclose(solution.weight.grad.numpy(), 0., atol=1e-4) if bias: np.testing.assert_allclose(solution.bias.grad.numpy(), 0., atol=1e-4)
def __init__(self, channels, init_block_channels, bottleneck, conv1_stride, in_channels=3, in_size=(224, 224), num_classes=1000, mode='', linear=True): super(ResNet, self).__init__() self.in_size = in_size self.num_classes = num_classes self.mode = mode self.linear = linear self.features = MetaSequential() self.features.add_module( "init_block", ResInitBlock(in_channels=in_channels, out_channels=init_block_channels, mode=self.mode)) in_channels = init_block_channels for i, channels_per_stage in enumerate(channels): stage = MetaSequential() for j, out_channels in enumerate(channels_per_stage): stride = 2 if (j == 0) and (i != 0) else 1 stage.add_module( "unit{}".format(j + 1), ResUnit(in_channels=in_channels, out_channels=out_channels, stride=stride, bottleneck=bottleneck, conv1_stride=conv1_stride, mode=self.mode)) in_channels = out_channels self.features.add_module("stage{}".format(i + 1), stage) self.features.add_module("final_pool", nn.AdaptiveAvgPool2d(1)) # self.features.add_module("final_pool", nn.AvgPool2d(kernel_size=7, stride=1)) if self.mode == 'maml': self.output = MetaLinear(in_features=in_channels, out_features=num_classes) else: self.output = nn.Linear(in_features=in_channels, out_features=num_classes) self._init_params()
def __init__(self, in_channels, input_dim, out_features, hidden_size=64): super(ConvolutionalNeuralNetwork, self).__init__() self.in_channels = in_channels self.out_features = out_features self.features = MetaSequential(conv3x3(in_channels, hidden_size), conv3x3(hidden_size, hidden_size), conv3x3(hidden_size, hidden_size), conv3x3(hidden_size, hidden_size)) i = 4 idim = input_dim for x in range(i): idim = math.floor(idim / 2) self.linear_input = hidden_size * idim * idim self.classifier = MetaLinear(self.linear_input, out_features)
def __init__(self, in_channels, out_features, hidden_size=64, feature_size=64, embedding=False): super(MetaConvModel, self).__init__() self.in_channels = in_channels self.out_features = out_features self.hidden_size = hidden_size self.feature_size = feature_size self.embedding = embedding self.features = MetaSequential( OrderedDict([('layer1', conv_block(in_channels, hidden_size)), ('layer2', conv_block(hidden_size, hidden_size)), ('layer3', conv_block(hidden_size, hidden_size)), ('layer4', conv_block(hidden_size, hidden_size))])) self.classifier = MetaLinear(feature_size, out_features)
def test_metasequential(): meta_model = MetaSequential( nn.Linear(2, 3, bias=True), nn.ReLU(), MetaLinear(3, 5, bias=True)) model = nn.Sequential( nn.Linear(2, 3, bias=True), nn.ReLU(), nn.Linear(3, 5, bias=True)) assert isinstance(meta_model, MetaModule) assert isinstance(meta_model, nn.Sequential) params = OrderedDict(meta_model.meta_named_parameters()) assert set(params.keys()) == set(['2.weight', '2.bias']) # Set same weights for both models weight0 = torch.randn(3, 2) meta_model[0].weight.data.copy_(weight0) model[0].weight.data.copy_(weight0) bias0 = torch.randn(3) meta_model[0].bias.data.copy_(bias0) model[0].bias.data.copy_(bias0) weight2 = torch.randn(5, 3) meta_model[2].weight.data.copy_(weight2) model[2].weight.data.copy_(weight2) bias2 = torch.randn(5) meta_model[2].bias.data.copy_(bias2) model[2].bias.data.copy_(bias2) inputs = torch.randn(5, 2) outputs_torchmeta = meta_model(inputs, params=None) outputs_nn = model(inputs) np.testing.assert_equal(outputs_torchmeta.detach().numpy(), outputs_nn.detach().numpy())
def __init__(self, in_dims, out_dims, hidden_size=100): super(PolicyNetwork, self).__init__() self.in_dims = in_dims self.out_dims = out_dims * 2 # diag guassian distribution self.hidden_size = hidden_size fc1 = MetaLinear(in_dims, hidden_size) fc2 = MetaLinear(hidden_size, hidden_size) fc3 = MetaLinear(hidden_size, self.out_dims) self.features = MetaSequential( fc1, nn.Tanh(), fc2, nn.Tanh(), fc3, nn.Tanh(), ) self.activation = {} def get_activation(name): def hook(model, input, output): self.activation[name] = output.detach() return hook fc1.register_forward_hook(get_activation('fc1')) fc2.register_forward_hook(get_activation('fc2')) fc3.register_forward_hook(get_activation('fc3'))
def model(): model = MetaLinear(3, 1, bias=False) model.weight.data = torch.tensor([[2., 3., 5.]]) return model
def __init__(self, node_embedding_dim, hidden_dim, num_classes): super(GraphClassificationOutputModule, self).__init__() self.linear1 = MetaLinear(node_embedding_dim, hidden_dim) self.linear2 = MetaLinear(hidden_dim, num_classes)
def __init__(self, node_embedding_dim): super(LinkPredictionOutputModule, self).__init__() self.linear_a = MetaLinear(node_embedding_dim, node_embedding_dim) self.linear = MetaLinear(2 * node_embedding_dim, 1)
def __init__(self, node_embedding_dim, num_classes): super(NodeClassificationOutputModule, self).__init__() self.linear = MetaLinear(node_embedding_dim, num_classes)
def linear_model(): return MetaLinear(2, 1)
def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None): super(ResNet, self).__init__() if norm_layer is None: norm_layer = MetaBatchNorm2d self._norm_layer = norm_layer self.inplanes = 64 self.dilation = 1 if replace_stride_with_dilation is None: # each element in the tuple indicates if we should replace # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: raise ValueError("replace_stride_with_dilation should be None " "or a 3-element tuple, got {}".format( replace_stride_with_dilation)) self.groups = groups self.base_width = width_per_group self.conv1 = MetaConv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) # self.conv1 = MetaConv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, # bias=False) self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = MetaLinear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, MetaConv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (MetaBatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) elif isinstance(m, BasicBlock): nn.init.constant_(m.bn2.weight, 0)