def __init__(self, num_classes=3, num_aux=23, feat_dim=512, cos_layer=True, dropout=0., m=0.5, image_net='tf_efficientnet_b3_ns', pretrained=True): super().__init__() self.feat_dim = feat_dim self.cos_layer = cos_layer if pretrained == True: backbone = timm.create_model(image_net, pretrained=True) else: backbone = timm.create_model(image_net, pretrained=False) self.base = nn.Sequential(backbone.conv_stem, backbone.bn1, backbone.act1, backbone.blocks, backbone.conv_head, backbone.bn2, backbone.act2) self.pool = GeM(p=3.0, freeze_p=True) self.dropout = nn.Dropout(p=dropout) features_num = backbone.num_features * backbone.global_pool.feat_mult() # self.neck = nn.Sequential( # nn.BatchNorm1d(features_num), # nn.Linear(features_num, feat_dim, bias=False), # nn.ReLU(inplace=True), # nn.BatchNorm1d(feat_dim), # nn.Linear(feat_dim, feat_dim, bias=False) # ) self.neck = nn.Linear(features_num, feat_dim, bias=False) self.bottleneck = nn.BatchNorm1d(feat_dim) self.head = nn.Linear(feat_dim, num_aux) self.num_classes = num_classes if self.cos_layer: print('using cosine layer') self.arcface = ArcFace(self.feat_dim, self.num_classes, s=30.0, m=m) else: self.classifier = nn.Linear(self.feat_dim, self.num_classes, bias=False) self.classifier.apply(weights_init_classifier)
def __init__(self, num_classes=3, feat_dim=512, cos_layer=True, dropout=0., image_net='resnet50', pretrained=True): super().__init__() self.EX = 4 self.feat_dim = feat_dim self.cos_layer = cos_layer if pretrained == True: backbone = timm.create_model(image_net, pretrained=True) else: backbone = timm.create_model(image_net, pretrained=False) self.base = nn.Sequential(backbone.conv1, backbone.bn1, backbone.act1, backbone.maxpool, backbone.layer1, backbone.layer2, backbone.layer3, backbone.layer4) self.pool = GeM(p=3.0, freeze_p=True) self.dropout = nn.Dropout(p=dropout) self.fc = nn.Linear(2048, self.feat_dim) self.num_classes = num_classes if self.cos_layer: print('using cosine layer') self.arcface = ArcFace(self.feat_dim, self.num_classes, s=30.0, m=0.30) else: self.classifier = nn.Linear(self.feat_dim, self.num_classes, bias=False) self.classifier.apply(weights_init_classifier) self.bottleneck = nn.BatchNorm1d(self.feat_dim) self.bottleneck.bias.requires_grad_(False) self.bottleneck.apply(weights_init_kaiming)
def __init__(self, num_classes=3, num_aux=23, feat_dim=512, metric_loss='ArcFace', dropout=0., m=0.5, depth=50, pretrained=True, path='../input/backbone_pretrained/resnet101_ibn_a.pth.tar'): super().__init__() self.feat_dim = feat_dim self.metric_loss = metric_loss self.base = build_resnet_backbone(depth, pretrained, path) self.pool = GeM(p=3.0, freeze_p=True) self.dropout = nn.Dropout(p=dropout) self.neck = nn.Linear(2048, feat_dim, bias=False) self.bottleneck = nn.BatchNorm1d(feat_dim) self.head = nn.Linear(feat_dim, num_aux) self.num_classes = num_classes if self.metric_loss == 'ArcFace': print('using cosine layer') self.metric = ArcFace(self.feat_dim, self.num_classes, s=30.0, m=m) elif self.metric_loss == 'Circle': print('using circle layer') self.metric = Circle(self.feat_dim, self.num_classes, s=128.0, m=m) elif self.metric_loss == 'Softmax': self.classifier = nn.Linear(self.feat_dim, self.num_classes, bias=False) self.classifier.apply(weights_init_classifier) else: raise ValueError( 'Please select one from ["ArcFace", "Circle", "Softmax"]')
net = resnet_face50() device = torch.device("cuda:0") if torch.cuda.device_count() > 1: devices_ids = [i for i in range(torch.cuda.device_count())] print(devices_ids) net = nn.DataParallel(net, device_ids=devices_ids) print("Let's use %d/%d GPUs!" % (len(devices_ids), torch.cuda.device_count())) net.to(device) data_loader = DataLoader(dataset=data, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True) arcFace = ArcFace(512, data.person).to(device) criterion = nn.CrossEntropyLoss().to(device) optimizer = optim.Adam([{ 'params': net.parameters() }, { 'params': arcFace.parameters() }], lr=learning_rate, weight_decay=weight_decay) # optimizer = optim.SGD([{'params': net.parameters()}, # {'params': arcFace.parameters()}], # lr=learning_rate, weight_decay=weight_decay, # momentum=momentum) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1,
# Some Args setting net = Net() device = torch.device("cuda:0") if torch.cuda.device_count() > 1: devices_ids = [0] net = nn.DataParallel(net, device_ids=devices_ids) print("Let's use %d/%d GPUs!" % (len(devices_ids), torch.cuda.device_count())) net.to(device) data_loader = DataLoader(dataset=data, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True) arcFace = ArcFace(640, data.type).to(device) # criterion = nn.CrossEntropyLoss(weight=torch.from_numpy(np.array([1, 7])).float()).to(device) criterion = nn.BCEWithLogitsLoss().to(device) optimizer = optim.Adam( [{ 'params': net.parameters() }], # {'params': arcFace.parameters()}], lr=learning_rate, weight_decay=weight_decay) # optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[1000, 2000], gamma=0.1, last_epoch=-1) print(net.parameters())