def receptive_field_for(self, input_field=None): import netharn as nh field = nh.ReceptiveFieldFor(self.hidden)(input_field) if self.skip: skip = nh.ReceptiveFieldFor(self.skip)(field) field.hidden['skip'] = skip return field
def _debug_hidden(self, input_shape, n=5): """ Print internal shape and field info """ import netharn as nh shape = nh.OutputShapeFor(self.branch)(input_shape=input_shape) print(ub.repr2(shape.hidden.shallow(n), nl=-1, dtype=False, si=True)) field = nh.ReceptiveFieldFor(self.branch)(input_shape=input_shape) print(ub.repr2(field.hidden.shallow(n), nl=-1, dtype=False, si=True))
def test_convT_rf(): """ CommandLine: xdoctest -m ~/code/netharn/tests/test_receptive_feild.py test_convT_rf """ # Test that we always invert whatever weird crazy thing we do import netharn as nh rng = np.random.RandomState(3668028386) ntrials = 100 M = 9 for _ in ub.ProgIter(range(ntrials), desc='testing rand convT instances'): depth = rng.randint(1, M) params = [] for i in range(depth): k = rng.randint(0, 1 + M // 2) * 2 + 1 s = rng.randint(1, 1 + M) d = rng.randint(1, 1 + M) p = rng.randint(0, 1 + M) params.append((i, (k, s, d))) # Construct a series of forward convolutions and the tranpose # convolutions that should "invert" them. Assert that the strides and # crop of the RF are the same on every layer. Furthremote the RF size # should strictly increase. layers = ub.odict() for i, (k, s, d) in params: key = 'c{}'.format(i) conv = nn.Conv2d(1, 1, kernel_size=k, stride=s, padding=p, dilation=d) layers[key] = conv for i, (k, s, d) in reversed(params): key = 'c{}T'.format(i) convT = nn.ConvTranspose2d(1, 1, kernel_size=k, stride=s, padding=p, dilation=d) layers[key] = convT module = nn.Sequential(layers) field = nh.ReceptiveFieldFor(module)() input_rf = nh.ReceptiveFieldFor.input() symmetric = [('input', input_rf)] + list(field.hidden.items()) for a, b, in ub.iter_window(symmetric, 2): k1, v1 = a k2, v2 = b assert np.all(v1['shape'] <= v2['shape']), 'v1={} v2={}'.format( v1, v2) for a, b in zip(symmetric, symmetric[::-1]): k1, v1 = a k2, v2 = b assert np.all(v1['stride'] == v2['stride']), 'v1={} v2={}'.format( v1, v2) assert np.all(v1['crop'] == v2['crop']), 'v1={} v2={}'.format( v1, v2)
def receptive_field_for(self, input_field=None): import netharn as nh return nh.ReceptiveFieldFor(self.module)(input_field)
def __init__(self, branch='resnet50', input_shape=(1, 3, 416, 416), norm_desc=False, desc_size=1024, hidden_channels=3, dropout=0, norm='batch', noli='relu', residual=False, bias=False): """ Note: * i have found norm_desc to be generally unhelpful. Example: >>> from netharn.models.descriptor_network import * >>> import netharn as nh >>> input_shape = (4, 3, 32, 32) >>> self = DescriptorNetwork(input_shape=input_shape) >>> nh.OutputShapeFor(self)._check_consistency(input_shape) {'dvecs': (4, 1024)} """ import netharn as nh super(DescriptorNetwork, self).__init__() pretrained = True if branch is None or branch == 'resnet50': self.branch = torchvision.models.resnet50(pretrained=pretrained) else: self.branch = branch if not isinstance(self.branch, torchvision.models.ResNet): raise ValueError('can only accept resnet at the moment') self.norm_desc = norm_desc self.in_channels = input_shape[1] self.out_channels = desc_size if self.branch.conv1.in_channels != self.in_channels: prev = self.branch.conv1 cls = prev.__class__ self.branch.conv1 = cls( in_channels=self.in_channels, out_channels=prev.out_channels, kernel_size=prev.kernel_size, stride=prev.stride, padding=prev.padding, dilation=prev.dilation, groups=prev.groups, bias=prev.bias, ) if pretrained: nh.initializers.functional.load_partial_state( self.branch.conv1, prev.state_dict(), leftover=nh.initializers.KaimingNormal(), verbose=0, ) # Note the advanced usage of output-shape-for if 0 and __debug__: # new torchvision broke this branch_field = nh.ReceptiveFieldFor(self.branch)( input_shape=input_shape) prepool_field = branch_field.hidden.shallow(1)['layer4'] input_dims = np.array(input_shape[-2:]) rf_stride = prepool_field['stride'] if np.any(input_dims < rf_stride // 2): msg = ('Network is too deep OR input is to small. ' 'rf_stride={} but input_dims={}'.format( rf_stride, input_dims)) self._debug_hidden(input_shape, n=2) print(msg) import warnings warnings.warn(msg) raise Exception(msg) branch_shape = nh.OutputShapeFor(self.branch)(input_shape) prepool_shape = branch_shape.hidden.shallow(1)['layer4'] # replace the last layer of resnet with a linear embedding to learn the # LP distance between pairs of images. # Also need to replace the pooling layer in case the input has a # different size. self.prepool_shape = prepool_shape pool_channels = prepool_shape[1] pool_dims = prepool_shape[2:] if np.all(np.array(pool_dims) == 1): self.branch.avgpool = layers.Identity() else: self.branch.avgpool = torch.nn.AvgPool2d(pool_dims, stride=1) # Check that the modification to the layer fixed the size postbranch_shape = nh.OutputShapeFor(self.branch)(input_shape) postpool_shape = postbranch_shape.hidden.shallow(1)['layer4'] assert np.all(np.array(prepool_shape[1:]) > 0) assert np.all(np.array(postpool_shape[1:]) > 0) # Replace the final linear layer with an MLP head self.branch.fc = layers.MultiLayerPerceptronNd( dim=0, in_channels=pool_channels, hidden_channels=hidden_channels, out_channels=desc_size, bias=bias, dropout=dropout, norm=norm, noli=noli, residual=residual)