def __init__(self, jigsaw_classes=1000, n_classes=100, domains=3, dropout=True): super(AlexNetCaffe, self).__init__() print("Using Caffe AlexNet") self.features_1 = nn.Sequential(OrderedDict([ ("conv1", nn.Conv2d(3, 96, kernel_size=11, stride=4)), ("relu1", nn.ReLU(inplace=True)),])) self.infodrop_1 = Info_Dropout(3, 96, kernel_size=11, stride=4) self.features = nn.Sequential(OrderedDict([ ("pool1", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)), ("norm1", nn.LocalResponseNorm(5, 1.e-4, 0.75)), ("conv2", nn.Conv2d(96, 256, kernel_size=5, padding=2, groups=2)), ("relu2", nn.ReLU(inplace=True)), ("pool2", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)), ("norm2", nn.LocalResponseNorm(5, 1.e-4, 0.75)), ("conv3", nn.Conv2d(256, 384, kernel_size=3, padding=1)), ("relu3", nn.ReLU(inplace=True)), ("conv4", nn.Conv2d(384, 384, kernel_size=3, padding=1, groups=2)), ("relu4", nn.ReLU(inplace=True)), ("conv5", nn.Conv2d(384, 256, kernel_size=3, padding=1, groups=2)), ("relu5", nn.ReLU(inplace=True)), ("pool5", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)), ])) self.classifier = nn.Sequential(OrderedDict([ ("fc6", nn.Linear(256 * 6 * 6, 4096)), ("relu6", nn.ReLU(inplace=True)), ("drop6", nn.Dropout() if dropout else Id()), ("fc7", nn.Linear(4096, 4096)), ("relu7", nn.ReLU(inplace=True)), ("drop7", nn.Dropout() if dropout else Id())])) self.jigsaw_classifier = nn.Linear(4096, jigsaw_classes) self.class_classifier = nn.Linear(4096, n_classes)
def __init__(self, jigsaw_classes=1000, n_classes=100, dropout=True): super().__init__() print("Using Caffe AlexNet") self.features = nn.Sequential( OrderedDict([ ("conv1", nn.Conv2d(3, 96, kernel_size=11, stride=2)), ("relu1", nn.ReLU(inplace=True)), ("pool1", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)), ("norm1", nn.LocalResponseNorm(5, 1.e-4, 0.75)), ("conv2", nn.Conv2d(96, 256, kernel_size=5, padding=2, groups=2)), ("relu2", nn.ReLU(inplace=True)), ("pool2", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)), ("norm2", nn.LocalResponseNorm(5, 1.e-4, 0.75)), ("conv3", nn.Conv2d(256, 384, kernel_size=3, padding=1)), ("relu3", nn.ReLU(inplace=True)), ("conv4", nn.Conv2d(384, 384, kernel_size=3, padding=1, groups=2)), ("relu4", nn.ReLU(inplace=True)), ("conv5", nn.Conv2d(384, 256, kernel_size=3, padding=1, groups=2)), ("relu5", nn.ReLU(inplace=True)), # ("pool5", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)), ])) self.fc_size = 4096 self.classifier = nn.Sequential( OrderedDict([("fc6", nn.Linear(256 * 6 * 6, self.fc_size)), ("relu6", nn.ReLU(inplace=True)), ("drop6", nn.Dropout() if dropout else Id()), ("fc7", nn.Linear(4096, 4096)), ("relu7", nn.ReLU(inplace=True)), ("drop7", nn.Dropout() if dropout else Id())])) self.jigsaw_classifier = nn.Sequential( nn.Linear(9 * self.fc_size, jigsaw_classes), # nn.ReLU(inplace=True), # nn.Dropout(), # nn.Linear(4096, jigsaw_classes) ) self.class_classifier = nn.Sequential( nn.Linear(4096, n_classes), # nn.ReLU(inplace=True), # nn.Dropout(), # nn.Linear(4096, n_classes) ) for m in self.modules(): if isinstance(m, nn.Linear): nn.init.xavier_uniform_(m.weight, .1) nn.init.constant_(m.bias, 0.)
def __init__(self, n_classes=100, domains=3, dropout=True): super(AlexNetCaffe, self).__init__() print("Using Caffe AlexNet") self.features = nn.Sequential( OrderedDict([ ("conv1", nn.Conv2d(3, 96, kernel_size=11, stride=4)), ("relu1", nn.ReLU(inplace=True)), ("pool1", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)), ("norm1", nn.LocalResponseNorm(5, 1.e-4, 0.75)), ("conv2", nn.Conv2d(96, 256, kernel_size=5, padding=2, groups=2)), ("relu2", nn.ReLU(inplace=True)), ("pool2", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)), ("norm2", nn.LocalResponseNorm(5, 1.e-4, 0.75)), ("conv3", nn.Conv2d(256, 384, kernel_size=3, padding=1)), ("relu3", nn.ReLU(inplace=True)), ("conv4", nn.Conv2d(384, 384, kernel_size=3, padding=1, groups=2)), ("relu4", nn.ReLU(inplace=True)), ("conv5", nn.Conv2d(384, 256, kernel_size=3, padding=1, groups=2)), ("relu5", nn.ReLU(inplace=True)), ("pool5", nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True)), ])) self.classifier = nn.Sequential( OrderedDict([("fc6", nn.Linear(256 * 6 * 6, 4096)), ("relu6", nn.ReLU(inplace=True)), ("drop6", nn.Dropout() if dropout else Id()), ("fc7", nn.Linear(4096, 4096)), ("relu7", nn.ReLU(inplace=True)), ("drop7", nn.Dropout() if dropout else Id())])) classes = n_classes K = 2 self.sms = torch.nn.Parameter(torch.normal(0., 1e-3, size=[K, 4096, classes], dtype=torch.float, device='cuda'), requires_grad=True) self.sm_biases = torch.nn.Parameter(torch.normal(0., 1e-3, size=[K, classes], dtype=torch.float, device='cuda'), requires_grad=True) self.embs = torch.nn.Parameter(torch.normal(mean=0., std=1e-1, size=[3, K - 1], dtype=torch.float, device='cuda'), requires_grad=True) self.cs_wt = torch.nn.Parameter(torch.normal(mean=0., std=1e-4, size=[], dtype=torch.float, device='cuda'), requires_grad=True)