def __init__(self, input_channels, depth=50, encoder_feature_size=1024): super(Encoder, self).__init__() self.enc_image_size = encoded_image_size resnet = depth_map[depth](pretrained=True) resnet.conv1 = nn.Conv2d( input_channels, resnet.conv1.out_channels, kernel_size=resnet.conv1.kernel_size, stride=resnet.conv1.stride, padding=resnet.conv1.padding, bias=False) backbone_out_features = 512 if self.depth < 50 else 2048 modules = list(resnet.children())[:-2] self.resnet = nn.Sequential(*modules) self.adaptive_pool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(backbone_out_features, encoder_feature_size, bias=False)
def __init__( self, embedded_size: int, dropout_cnn: float, dropout_cnn_out: float, resnet: int, pretrained_resnet: bool, ): super(EncoderCNN, self).__init__() resnet: models.resnet.ResNet = get_resnet(resnet, pretrained_resnet) modules: List[nn.Module] = list( resnet.children())[:-1] # delete the last fc layer. modules_dropout: List[Union[nn.Module, nn.Dropout]] = [] for layer in modules: modules_dropout.append(layer) modules_dropout.append(nn.Dropout(dropout_cnn)) self.resnet: nn.Module = nn.Sequential(*modules_dropout) self.fc: nn.Linear = nn.Linear(resnet.fc.in_features, embedded_size) self.dropout: nn.Dropout = nn.Dropout(p=dropout_cnn_out) self.bn: nn.BatchNorm1d = nn.BatchNorm1d(embedded_size, momentum=0.01)