def __init__( self, model, margin=0.2, lr=1e-3, lr_patience=2, lr_decay_ratio=0.5, memory_batch_max_num=2048, ): super().__init__() self.save_hyperparameters() self.model = model self.margin = margin self.lr = lr self.lr_patience = lr_patience self.lr_decay_ratio = lr_decay_ratio self.memory_batch_max_num = memory_batch_max_num self.loss_func = losses.CrossBatchMemory( losses.ContrastiveLoss(pos_margin=1, neg_margin=0, distance=CosineSimilarity()), self.model.feature_dim, memory_size=self.memory_batch_max_num, miner=miners.MultiSimilarityMiner(epsilon=self.margin))
def __init__(self, margin: float = 0.2, type_of_triplets: str = "semihard") -> None: self.distance = distances.CosineSimilarity() self.reducer = reducers.ThresholdReducer(low=0) self.loss_fn = losses.TripletMarginLoss(margin=margin, distance=self.distance, reducer=self.reducer) self.miner = miners.MultiSimilarityMiner(epsilon=margin, distance=self.distance)
def test_distributed_loss(self): for world_size in range(1,5): batch_size = 20 lr = 1 inputs = [torch.randn(batch_size, 10) for _ in range(world_size)] labels = [torch.randint(low=0, high=2, size=(batch_size,)) for _ in range(world_size)] original_model = ToyMpModel().to(self.device) model = ToyMpModel().to(self.device) model.load_state_dict(original_model.state_dict()) optimizer = optim.SGD(original_model.parameters(), lr=lr) optimizer.zero_grad() all_inputs = torch.cat(inputs, dim=0).to(self.device) all_labels = torch.cat(labels, dim=0).to(self.device) all_outputs = original_model(all_inputs) original_loss_fn = losses.ContrastiveLoss() original_miner_fn = miners.MultiSimilarityMiner() correct_indices_tuple = original_miner_fn(all_outputs, all_labels) correct_loss = original_loss_fn(all_outputs, all_labels, correct_indices_tuple) correct_loss.backward() optimizer.step() # need to make separate copy to do test properly loss_fn = losses.ContrastiveLoss() miner_fn = miners.MultiSimilarityMiner() mp.spawn(single_process_function, args=(world_size, lr, model, inputs, labels, loss_fn, miner_fn, original_model, original_loss_fn, original_miner_fn, correct_loss.detach(), correct_indices_tuple, self.device), nprocs=world_size, join=True)
def __init__(self, ): super(MultiSimilarityLoss, self).__init__() self.thresh = 0.5 self.epsilon = 0.1 self.scale_pos = 2 self.scale_neg = 50 self.miner = miners.MultiSimilarityMiner(epsilon=self.epsilon) self.loss_func = losses.MultiSimilarityLoss(self.scale_pos, self.scale_neg, self.thresh)
def __init__(self, configer): super(CircleLoss, self).__init__() self.params_dict = dict() if 'circle_loss' in configer.get('loss', 'params'): self.params_dict = configer.get('loss', 'params')['circle_loss'] loss_function = losses.CircleLoss( m=self.params_dict['m'], gamma=self.params_dict['gamma'], triplets_per_anchor=self.params_dict['triplets_per_anchor']) self.miner = miners.MultiSimilarityMiner( epsilon=0.1) if self.params_dict['miner'] else None self.loss_function = losses.CrossBatchMemory( loss_function, self.params_dict['feat_dim'], memory_size=self.params_dict['memory_size'], miner=self.miner) if self.params_dict['xbm'] else loss_function
def __init__(self, thresh=0.5, epsilon=0.1, scale_pos=2, scale_neg=50): super(MultiSimilarityLoss, self).__init__() #self.thresh = 0.5 #self.epsilon = 0.1 #self.scale_pos = 2 #self.scale_neg = 50 self.thresh = 0.77 self.epsilon = 0.39 self.scale_pos = 17.97 self.scale_neg = 75.66 self.miner = miners.MultiSimilarityMiner(epsilon=self.epsilon) self.loss_func = losses.MultiSimilarityLoss(self.scale_pos, self.scale_neg, self.thresh)
def __init__(self, configer): super(MultiSimilarityLoss, self).__init__() self.params_dict = dict() if 'multi_similarity_loss' in configer.get('loss', 'params'): self.params_dict = configer.get('loss', 'params')['multi_similarity_loss'] loss_function = losses.MultiSimilarityLoss( alpha=self.params_dict['alpha'], beta=self.params_dict['beta'], base=self.params_dict['base']) self.miner = miners.MultiSimilarityMiner( epsilon=0.1) if self.params_dict['miner'] else None self.loss_function = losses.CrossBatchMemory( loss_function, self.params_dict['feat_dim'], memory_size=self.params_dict['memory_size'], miner=self.miner) if self.params_dict['xbm'] else loss_function
def __init__(self, configer): super(TripletMarginLoss, self).__init__() self.params_dict = dict() if 'triplet_margin_loss' in configer.get('loss', 'params'): self.params_dict = configer.get('loss', 'params')['triplet_margin_loss'] loss_function = losses.TripletMarginLoss( margin=self.params_dict['margin'], distance_norm=self.params_dict['distance_norm'], power=self.params_dict['power'], swap=self.params_dict['swap'], smooth_loss=self.params_dict['smooth_loss'], avg_non_zero_only=self.params_dict['avg_non_zero_only'], triplets_per_anchor=self.params_dict['triplets_per_anchor']) self.miner = miners.MultiSimilarityMiner( epsilon=0.1) if self.params_dict['miner'] else None self.loss_function = losses.CrossBatchMemory( loss_function, self.params_dict['feat_dim'], memory_size=self.params_dict['memory_size'], miner=self.miner) if self.params_dict['xbm'] else loss_function
train_dataset = datasets.CIFAR100(root="CIFAR100_Dataset", train=True, transform=train_transform, download=True) val_dataset = datasets.CIFAR100(root="CIFAR100_Dataset", train=False, transform=val_transform, download=True) # Set the loss function metric_loss = losses.TripletMarginLoss(margin=0.01) synth_loss = losses.AngularLoss(alpha=35) g_adv_loss = losses.AngularLoss(alpha=35) # Set the mining function miner = miners.MultiSimilarityMiner(epsilon=0.1) # Set the dataloader sampler sampler = samplers.MPerClassSampler(train_dataset.targets, m=4) # Set other training parameters batch_size = 32 num_epochs = 2 iterations_per_epoch = 100 # Set up your models, optimizers, loss functions etc. models = {"trunk": trunk, "embedder": embedder, "generator": generator} optimizers = { "trunk_optimizer": trunk_optimizer, "embedder_optimizer": embedder_optimizer,
def train(args): ### Prepare Dataset if args.data_format=="coco": #TODO from datasets.cocodataset import COCODetection trainset = COCODetection(image_path=args.train_images, info_file=args.train_info, transform=SSDAugmentation(MEANS)) valset = None else: from datasets.customdataset import CustomDataset trainset = CustomDataset( image_path=args.train_imgdir, info_file=args.train_annofile, num_class = args.num_class, transform=transforms["train"]) valset = CustomDataset( image_path=args.val_imgdir, info_file=args.val_annofile, num_class=args.num_class, transform=transforms["val"]) trainloader = DataLoader(trainset, args.train_batch_size, shuffle=True, num_workers=args.num_workers) valloader = DataLoader(valset, args.val_batch_size, shuffle=False, num_workers=args.num_workers) # batch_num = len(trainloader) ### Init Model model = getattr(models, args.backbone)(pretrained = args.backbone_pretrained and not args.pretrained_model) model = EmbClsNet(model, args.num_class) ### Pretrained Model if args.pretrained_model: model.load_state_dict(torch.load(args.pretrained_model)) ### Data parallel IS_DP_AVAILABLE = False try: devices = list(map(int,args.devices.strip().split(","))) if len(devices)>=2: IS_DP_AVAILABLE = True except: logging.warning(f"Format of args.devices is invalid. {args.devices}") if IS_DP_AVAILABLE: model = torch.nn.DataParallel(model) if args.cuda: model = model.cuda() ### Init Optimizer # optimizer = optim.SGD(model.parameters(), lr=args.start_lr, # momentum=args.momentum, weight_decay=args.weight_decay) optimizer = optim.RMSprop(model.parameters(), lr=args.start_lr, alpha=0.9, eps=1e-08, momentum=args.momentum, weight_decay=args.weight_decay) ### Init Triplet Loss loss_func = tripletloss.TripletMarginLoss(triplets_per_anchor=args.triplets_per_anchor, margin=args.triplet_margin) if args.mining: miner = tripletminer.MultiSimilarityMiner(epsilon=args.mining_epsilon) interval = -1 for epoch in range(args.max_epoch): for batch_idx, batch_data in enumerate(trainloader): interval+=1 imgs, labels = batch_data optimizer.zero_grad() embeddings, scores = model(imgs) # Metric Learning if args.mining: hard_pairs = miner(embeddings, labels) loss = loss_func(embeddings, labels, hard_pairs) else: loss = loss_func(embeddings, labels) loss.backward() optimizer.step() # Print Loss if args.print_interval % interval == 0: logging.info(f"[{epoch:%4d}/{args.max_epoch:%4d}] {interval:%7d} Triplet Loss: {loss}") # Validation if args.val_interval% interval == 0 and interval!=0: logging.info(f"[{epoch}/{args.max_epoch}] Starting Validating..") for valbatch_idx, valbatch_data in enumerate(valloader): val_imgs, val_labels = valbatch_data val_embeddings, val_scores = model(val_imgs) # Metric Learning val_loss = loss_func(val_embeddings, val_labels) cls_acc = calculate_class_accuracy(val_scores, val_labels) if args.val_print_interval % valbatch_idx == 0: logging.info(f"[{epoch:%4d}/{args.max_epoch:%4d}] {interval:%7d} Triplet Loss: {val_loss} Cls Acc: {cls_acc}")
def __init__(self): self.loss = torch.nn.MSELoss() self.npair_loss = losses.NPairsLoss(l2_reg_weight=0.02) self.miner = miners.MultiSimilarityMiner(epsilon=0.1)
def __init__(self, num_classes=101, embedding_size=512, trunk_architecture="efficientnet-b0", trunk_optim="RMSprop", embedder_optim="RMSprop", classifier_optim="RMSprop", trunk_lr=1e-4, embedder_lr=1e-3, classifier_lr=1e-3, weight_decay=1.5e-6, trunk_decay=0.98, embedder_decay=0.93, classifier_decay=0.93, log_train=True, gpu_id=0): """ Inputs: num_classes int: Number of Classes (for Classifier purely) embedding_size int: The size of embedding space output from Embedder trunk_architecture str: To pass to self.get_trunk() either efficientnet-b{i} or resnet-18/50 or mobilenet trunk_optim optim: Which optimizer to use, such as adamW embedder_optim optim: Which optimizer to use, such as adamW classifier_optim optim: Which optimizer to use, such as adamW trunk_lr float: The learning rate for the Trunk Optimizer embedder_lr float: The learning rate for the Embedder Optimizer classifier_lr float: The learning rate for the Classifier Optimizer weight_decay float: The weight decay for all 3 optimizers trunk_decay float: The multiplier for the Scheduler y_{t+1} <- trunk_decay * y_{t} embedder_decay float: The multiplier for the Scheduler y_{t+1} <- embedder_decay * y_{t} classifier_decay float: The multiplier for the Scheduler y_{t+1} <- classifier_decay * y_{t} log_train Bool: whether or not to save training logs gpu_id Int: Only currently used to track the GPU useage """ self.gpu_id = gpu_id #self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.device = torch.device(f"cuda") self.pretrained = False # this is used to load the indices for train/val data for now self.log_train = log_train # build three stage network self.num_classes = num_classes self.embedding_size = embedding_size self.MLP_neurons = 2048 # output size of neural network + size used inside embedder/classifier MLP self.get_trunk(trunk_architecture) self.trunk = nn.DataParallel(self.trunk.to(self.device)) self.embedder = nn.DataParallel( Network(layer_sizes=[self.MLP_neurons, self.embedding_size], neuron_fc=self.MLP_neurons).to(self.device)) self.classifier = nn.DataParallel( Network(layer_sizes=[self.embedding_size, self.num_classes], neuron_fc=self.MLP_neurons).to(self.device)) # build optimizers self.trunk_optimizer = self.get_optimizer(trunk_optim, self.trunk.parameters(), lr=trunk_lr, weight_decay=weight_decay) self.embedder_optimizer = self.get_optimizer( embedder_optim, self.embedder.parameters(), lr=embedder_lr, weight_decay=weight_decay) self.classifier_optimizer = self.get_optimizer( classifier_optim, self.classifier.parameters(), lr=classifier_lr, weight_decay=weight_decay) # build schedulers self.trunk_scheduler = ExponentialLR(self.trunk_optimizer, gamma=trunk_decay) self.embedder_scheduler = ExponentialLR(self.embedder_optimizer, gamma=embedder_decay) self.classifier_scheduler = ExponentialLR(self.classifier_optimizer, gamma=classifier_decay) # build pair based losses and the miner self.triplet = losses.TripletMarginLoss(margin=0.2).to(self.device) self.multisimilarity = losses.MultiSimilarityLoss(alpha=2, beta=50, base=1).to( self.device) self.miner = miners.MultiSimilarityMiner(epsilon=0.1) # build proxy anchor loss self.proxy_anchor = Proxy_Anchor(nb_classes=num_classes, sz_embed=embedding_size, mrg=0.2, alpha=32).to(self.device) self.proxy_optimizer = AdamW(self.proxy_anchor.parameters(), lr=trunk_lr * 10, weight_decay=1.5E-6) self.proxy_scheduler = ExponentialLR(self.proxy_optimizer, gamma=0.8) # finally crossentropy loss self.crossentropy = torch.nn.CrossEntropyLoss().to(self.device) # log some of this information self.model_params = { "Trunk_Model": trunk_architecture, "Optimizers": [ str(self.trunk_optimizer), str(self.embedder_optimizer), str(self.classifier_optimizer) ], "Embedder": str(self.embedder), "Embedding_Dimension": str(embedding_size), "Weight_Decay": weight_decay, "Scheduler_Decays": [trunk_decay, embedder_decay, classifier_decay], "Embedding_Size": embedding_size, "Learning_Rates": [trunk_lr, embedder_lr, classifier_lr], "Miner": str(self.miner) }
def train_model(model, model_test, criterion, optimizer, scheduler, num_epochs=25): since = time.time() # best_model_wts = model.state_dict() # best_acc = 0.0 warm_up = 0.1 # We start from the 0.1*lrRate warm_iteration = round(dataset_sizes['satellite'] / opt.batchsize) * opt.warm_epoch # first 5 epoch if opt.arcface: criterion_arcface = losses.ArcFaceLoss(num_classes=opt.nclasses, embedding_size=512) if opt.cosface: criterion_cosface = losses.CosFaceLoss(num_classes=opt.nclasses, embedding_size=512) if opt.circle: criterion_circle = CircleLoss(m=0.25, gamma=32) # gamma = 64 may lead to a better result. if opt.triplet: miner = miners.MultiSimilarityMiner() criterion_triplet = losses.TripletMarginLoss(margin=0.3) if opt.lifted: criterion_lifted = losses.GeneralizedLiftedStructureLoss(neg_margin=1, pos_margin=0) if opt.contrast: criterion_contrast = losses.ContrastiveLoss(pos_margin=0, neg_margin=1) if opt.sphere: criterion_sphere = losses.SphereFaceLoss(num_classes=opt.nclasses, embedding_size=512, margin=4) for epoch in range(num_epochs - start_epoch): epoch = epoch + start_epoch print('Epoch {}/{}'.format(epoch, num_epochs - 1)) print('-' * 10) # Each epoch has a training and validation phase for phase in ['train']: if phase == 'train': model.train(True) # Set model to training mode else: model.train(False) # Set model to evaluate mode running_loss = 0.0 running_corrects = 0.0 running_corrects2 = 0.0 running_corrects3 = 0.0 # Iterate over data. for data, data2, data3, data4 in zip(dataloaders['satellite'], dataloaders['street'], dataloaders['drone'], dataloaders['google']): # get the inputs inputs, labels = data inputs2, labels2 = data2 inputs3, labels3 = data3 inputs4, labels4 = data4 now_batch_size, c, h, w = inputs.shape if now_batch_size < opt.batchsize: # skip the last batch continue if use_gpu: inputs = Variable(inputs.cuda().detach()) inputs2 = Variable(inputs2.cuda().detach()) inputs3 = Variable(inputs3.cuda().detach()) labels = Variable(labels.cuda().detach()) labels2 = Variable(labels2.cuda().detach()) labels3 = Variable(labels3.cuda().detach()) if opt.extra_Google: inputs4 = Variable(inputs4.cuda().detach()) labels4 = Variable(labels4.cuda().detach()) else: inputs, labels = Variable(inputs), Variable(labels) # zero the parameter gradients optimizer.zero_grad() # forward if phase == 'val': with torch.no_grad(): outputs, outputs2 = model(inputs, inputs2) else: if opt.views == 2: outputs, outputs2 = model(inputs, inputs2) elif opt.views == 3: if opt.extra_Google: outputs, outputs2, outputs3, outputs4 = model(inputs, inputs2, inputs3, inputs4) else: outputs, outputs2, outputs3 = model(inputs, inputs2, inputs3) return_feature = opt.arcface or opt.cosface or opt.circle or opt.triplet or opt.contrast or opt.lifted or opt.sphere if opt.views == 2: _, preds = torch.max(outputs.data, 1) _, preds2 = torch.max(outputs2.data, 1) loss = criterion(outputs, labels) + criterion(outputs2, labels2) elif opt.views == 3: if return_feature: logits, ff = outputs logits2, ff2 = outputs2 logits3, ff3 = outputs3 fnorm = torch.norm(ff, p=2, dim=1, keepdim=True) fnorm2 = torch.norm(ff2, p=2, dim=1, keepdim=True) fnorm3 = torch.norm(ff3, p=2, dim=1, keepdim=True) ff = ff.div(fnorm.expand_as(ff)) # 8*512,tensor ff2 = ff2.div(fnorm2.expand_as(ff2)) ff3 = ff3.div(fnorm3.expand_as(ff3)) loss = criterion(logits, labels) + criterion(logits2, labels2) + criterion(logits3, labels3) _, preds = torch.max(logits.data, 1) _, preds2 = torch.max(logits2.data, 1) _, preds3 = torch.max(logits3.data, 1) # Multiple perspectives are combined to calculate losses, please join ''--loss_merge'' in run.sh if opt.loss_merge: ff_all = torch.cat((ff, ff2, ff3), dim=0) labels_all = torch.cat((labels, labels2, labels3), dim=0) if opt.extra_Google: logits4, ff4 = outputs4 fnorm4 = torch.norm(ff4, p=2, dim=1, keepdim=True) ff4 = ff4.div(fnorm4.expand_as(ff4)) loss = criterion(logits, labels) + criterion(logits2, labels2) + criterion(logits3, labels3) +criterion(logits4, labels4) if opt.loss_merge: ff_all = torch.cat((ff_all, ff4), dim=0) labels_all = torch.cat((labels_all, labels4), dim=0) if opt.arcface: if opt.loss_merge: loss += criterion_arcface(ff_all, labels_all) else: loss += criterion_arcface(ff, labels) + criterion_arcface(ff2, labels2) + criterion_arcface(ff3, labels3) # /now_batch_size if opt.extra_Google: loss += criterion_arcface(ff4, labels4) # /now_batch_size if opt.cosface: if opt.loss_merge: loss += criterion_cosface(ff_all, labels_all) else: loss += criterion_cosface(ff, labels) + criterion_cosface(ff2, labels2) + criterion_cosface(ff3, labels3) # /now_batch_size if opt.extra_Google: loss += criterion_cosface(ff4, labels4) # /now_batch_size if opt.circle: if opt.loss_merge: loss += criterion_circle(*convert_label_to_similarity(ff_all, labels_all)) / now_batch_size else: loss += criterion_circle(*convert_label_to_similarity(ff, labels)) / now_batch_size + criterion_circle(*convert_label_to_similarity(ff2, labels2)) / now_batch_size + criterion_circle(*convert_label_to_similarity(ff3, labels3)) / now_batch_size if opt.extra_Google: loss += criterion_circle(*convert_label_to_similarity(ff4, labels4)) / now_batch_size if opt.triplet: if opt.loss_merge: hard_pairs_all = miner(ff_all, labels_all) loss += criterion_triplet(ff_all, labels_all, hard_pairs_all) else: hard_pairs = miner(ff, labels) hard_pairs2 = miner(ff2, labels2) hard_pairs3 = miner(ff3, labels3) loss += criterion_triplet(ff, labels, hard_pairs) + criterion_triplet(ff2, labels2, hard_pairs2) + criterion_triplet(ff3, labels3, hard_pairs3)# /now_batch_size if opt.extra_Google: hard_pairs4 = miner(ff4, labels4) loss += criterion_triplet(ff4, labels4, hard_pairs4) if opt.lifted: if opt.loss_merge: loss += criterion_lifted(ff_all, labels_all) else: loss += criterion_lifted(ff, labels) + criterion_lifted(ff2, labels2) + criterion_lifted(ff3, labels3) # /now_batch_size if opt.extra_Google: loss += criterion_lifted(ff4, labels4) if opt.contrast: if opt.loss_merge: loss += criterion_contrast(ff_all, labels_all) else: loss += criterion_contrast(ff, labels) + criterion_contrast(ff2,labels2) + criterion_contrast(ff3, labels3) # /now_batch_size if opt.extra_Google: loss += criterion_contrast(ff4, labels4) if opt.sphere: if opt.loss_merge: loss += criterion_sphere(ff_all, labels_all) / now_batch_size else: loss += criterion_sphere(ff, labels) / now_batch_size + criterion_sphere(ff2, labels2) / now_batch_size + criterion_sphere(ff3, labels3) / now_batch_size if opt.extra_Google: loss += criterion_sphere(ff4, labels4) else: _, preds = torch.max(outputs.data, 1) _, preds2 = torch.max(outputs2.data, 1) _, preds3 = torch.max(outputs3.data, 1) if opt.loss_merge: outputs_all = torch.cat((outputs, outputs2, outputs3), dim=0) labels_all = torch.cat((labels, labels2, labels3), dim=0) if opt.extra_Google: outputs_all = torch.cat((outputs_all, outputs4), dim=0) labels_all = torch.cat((labels_all, labels4), dim=0) loss = 4*criterion(outputs_all, labels_all) else: loss = criterion(outputs, labels) + criterion(outputs2, labels2) + criterion(outputs3, labels3) if opt.extra_Google: loss += criterion(outputs4, labels4) # backward + optimize only if in training phase if epoch < opt.warm_epoch and phase == 'train': warm_up = min(1.0, warm_up + 0.9 / warm_iteration) loss *= warm_up if phase == 'train': if fp16: # we use optimier to backward loss with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() ########## if opt.moving_avg < 1.0: update_average(model_test, model, opt.moving_avg) # statistics if int(version[0]) > 0 or int(version[2]) > 3: # for the new version like 0.4.0, 0.5.0 and 1.0.0 running_loss += loss.item() * now_batch_size else: # for the old version like 0.3.0 and 0.3.1 running_loss += loss.data[0] * now_batch_size running_corrects += float(torch.sum(preds == labels.data)) running_corrects2 += float(torch.sum(preds2 == labels2.data)) if opt.views == 3: running_corrects3 += float(torch.sum(preds3 == labels3.data)) epoch_loss = running_loss / dataset_sizes['satellite'] epoch_acc = running_corrects / dataset_sizes['satellite'] epoch_acc2 = running_corrects2 / dataset_sizes['satellite'] if opt.views == 2: print('{} Loss: {:.4f} Satellite_Acc: {:.4f} Street_Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc, epoch_acc2)) elif opt.views == 3: epoch_acc3 = running_corrects3 / dataset_sizes['satellite'] print('{} Loss: {:.4f} Satellite_Acc: {:.4f} Street_Acc: {:.4f} Drone_Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc, epoch_acc2, epoch_acc3)) y_loss[phase].append(epoch_loss) y_err[phase].append(1.0 - epoch_acc) # deep copy the model if phase == 'train': scheduler.step() last_model_wts = model.state_dict() if epoch % 20 == 19: save_network(model, opt.name, epoch) # draw_curve(epoch) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) # print('Best val Acc: {:4f}'.format(best_acc)) # save_network(model_test, opt.name+'adapt', epoch) return model
def _init_state_test2(self) -> None: """ Initialize the state and load it from an existing checkpoint if any """ """ Initialize the state and load it from an existing checkpoint if any """ torch.manual_seed(0) np.random.seed(0) print("Create data loaders", flush=True) Input_size_Image = self._train_cfg.input_size Test_size = Input_size_Image print("Input size : " + str(Input_size_Image)) print("Test size : " + str(Input_size_Image)) print("Initial LR :" + str(self._train_cfg.lr)) transf = get_transforms(input_size=Input_size_Image, test_size=Test_size, kind='full', crop=True, need=('train', 'val'), backbone=None) transform_train = transf['train'] transform_test = transf['val'] self.train_set = datasets.ImageFolder(self._train_cfg.imnet_path + '/train', transform=transform_train) self.test_set = datasets.ImageFolder(self._train_cfg.imnet_path + '/val', transform=transform_test) self.train_dataset = self.train_set self.val_dataset = self.test_set # self.train_dataset = ClassDisjointMURA(self.train_set, transform_train) # self.val_dataset = ClassDisjointMURA(self.test_set, transform_test) print( f"Total batch_size: {self._train_cfg.batch_per_gpu * self._train_cfg.num_tasks}", flush=True) print("Create distributed model", flush=True) model = models.resnet152(pretrained=False) num_ftrs = model.fc.in_features model.fc = common_functions.Identity() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = torch.nn.DataParallel(model.to(device)) embedder = torch.nn.DataParallel(MLP([num_ftrs, 512]).to(device)) # Set optimizers trunk_optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=0.0001) embedder_optimizer = torch.optim.Adam(embedder.parameters(), lr=0.0001, weight_decay=0.0001) # Set the loss function loss = losses.TripletMarginLoss(margin=0.1) # Set the mining function miner = miners.MultiSimilarityMiner(epsilon=0.1) # Set the dataloader sampler self.sampler = samplers.MPerClassSampler(self.train_dataset.targets, m=4, length_before_new_iter=len( self.train_dataset)) # Package the above stuff into dictionaries. self.models_dict = {"trunk": model, "embedder": embedder} self.optimizers = { "trunk_optimizer": trunk_optimizer, "embedder_optimizer": embedder_optimizer } self.loss_funcs = {"metric_loss": loss} self.mining_funcs = {"tuple_miner": miner}
def train(train_data, test_data, save_model, num_epochs, lr, embedding_size, batch_size): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Set trunk model and replace the softmax layer with an identity function trunk = torchvision.models.resnet18(pretrained=True) trunk_output_size = trunk.fc.in_features trunk.fc = common_functions.Identity() trunk = torch.nn.DataParallel(trunk.to(device)) # Set embedder model. This takes in the output of the trunk and outputs 64 dimensional embeddings embedder = torch.nn.DataParallel( MLP([trunk_output_size, embedding_size]).to(device)) # Set optimizers trunk_optimizer = torch.optim.Adam(trunk.parameters(), lr=lr / 10, weight_decay=0.0001) embedder_optimizer = torch.optim.Adam(embedder.parameters(), lr=lr, weight_decay=0.0001) # Set the loss function loss = losses.TripletMarginLoss(margin=0.1) # Set the mining function miner = miners.MultiSimilarityMiner(epsilon=0.1) # Set the dataloader sampler sampler = samplers.MPerClassSampler(train_data.targets, m=4, length_before_new_iter=len(train_data)) save_dir = os.path.join( save_model, ''.join(str(lr).split('.')) + '_' + str(batch_size) + '_' + str(embedding_size)) os.makedirs(save_dir, exist_ok=True) # Package the above stuff into dictionaries. models = {"trunk": trunk, "embedder": embedder} optimizers = { "trunk_optimizer": trunk_optimizer, "embedder_optimizer": embedder_optimizer } loss_funcs = {"metric_loss": loss} mining_funcs = {"tuple_miner": miner} record_keeper, _, _ = logging_presets.get_record_keeper( os.path.join(save_dir, "example_logs"), os.path.join(save_dir, "example_tensorboard")) hooks = logging_presets.get_hook_container(record_keeper) dataset_dict = {"val": test_data, "train": train_data} model_folder = "example_saved_models" def visualizer_hook(umapper, umap_embeddings, labels, split_name, keyname, *args): logging.info("UMAP plot for the {} split and label set {}".format( split_name, keyname)) label_set = np.unique(labels) num_classes = len(label_set) fig = plt.figure(figsize=(20, 15)) plt.title(str(split_name) + '_' + str(num_embeddings)) plt.gca().set_prop_cycle( cycler("color", [ plt.cm.nipy_spectral(i) for i in np.linspace(0, 0.9, num_classes) ])) for i in range(num_classes): idx = labels == label_set[i] plt.plot(umap_embeddings[idx, 0], umap_embeddings[idx, 1], ".", markersize=1) plt.show() # Create the tester tester = testers.GlobalEmbeddingSpaceTester( end_of_testing_hook=hooks.end_of_testing_hook, visualizer=umap.UMAP(), visualizer_hook=visualizer_hook, dataloader_num_workers=32, accuracy_calculator=AccuracyCalculator(k="max_bin_count")) end_of_epoch_hook = hooks.end_of_epoch_hook(tester, dataset_dict, model_folder, test_interval=1, patience=1) trainer = trainers.MetricLossOnly( models, optimizers, batch_size, loss_funcs, mining_funcs, train_data, sampler=sampler, dataloader_num_workers=32, end_of_iteration_hook=hooks.end_of_iteration_hook, end_of_epoch_hook=end_of_epoch_hook) trainer.train(num_epochs=num_epochs) if save_model is not None: torch.save(models["trunk"].state_dict(), os.path.join(save_dir, 'trunk.pth')) torch.save(models["embedder"].state_dict(), os.path.join(save_dir, 'embedder.pth')) print('Model saved in ', save_dir)
def __init__( self, h5fn, mode="eval", graph="HAND", embed_size: int = 64, loss="nsm", batch_size=32, lr=0.001, no_aug=False, vocab=None, include_score=False, body_labels=None, tester_device=None, **kwargs, ): from argparse import Namespace super().__init__() if isinstance(h5fn, dict): # FIXME: This seems to be the only way to get checkpoint loading # working from the hand checkpoint. There must be a better way... self.hparams = Namespace(**h5fn) assert self.hparams.skel_graph_name == "HAND" graph = self.hparams.skel_graph_name self.skel_graph = SkeletonReducer(EMBED_SKELS[graph]) self.num_classes = 58 - 5 else: self.data_path = h5fn self.hparams.skel_graph_name = graph self.skel_graph = SkeletonReducer(EMBED_SKELS[graph]) self.tester_device = tester_device self.hparams.embed_size = embed_size self.hparams.loss_name = loss print("MetGcnLit", self.data_path) if graph == "HAND": self.dataset = HandSkeletonDataset(self.data_path, vocab=vocab) else: self.dataset = BodySkeletonDataset( self.data_path, vocab=vocab, skel_graph=self.skel_graph, body_labels=body_labels, ) self.hparams.mode = mode assert self.hparams.mode in ("eval", "prod") self.batch_size = batch_size self.hparams.lr = lr self.hparams.no_aug = no_aug self.hparams.include_score = include_score if self.hparams.mode == "prod": self.num_classes = self.dataset.num_classes() else: self.num_classes = ( self.dataset.num_classes() - self.dataset.num_left_out() ) self.gcn = FlexStGcn( 3 if self.hparams.include_score else 2, self.hparams.embed_size, GraphAdapter(self.skel_graph.dense_skel), ) # Parameters are based on ones that seem reasonable given # https://github.com/KevinMusgrave/powerful-benchmarker # Could optimise... if self.hparams.loss_name == "nsm": self.loss = losses.NormalizedSoftmaxLoss( num_classes=self.num_classes, embedding_size=self.hparams.embed_size, temperature=0.07, ) else: assert self.hparams.loss_name == "msl" self.loss = losses.MultiSimilarityLoss(alpha=10, beta=50, base=0.7) self.miner = miners.MultiSimilarityMiner(epsilon=0.5) self.val_tester = None self.test_tester = None
def train_app(cfg): print(cfg.pretty()) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Set trunk model and replace the softmax layer with an identity function trunk = torchvision.models.__dict__[cfg.model.model_name](pretrained=cfg.model.pretrained) #resnet18(pretrained=True) #trunk = models.alexnet(pretrained=True) #trunk = models.resnet50(pretrained=True) #trunk = models.resnet152(pretrained=True) #trunk = models.wide_resnet50_2(pretrained=True) #trunk = EfficientNet.from_pretrained('efficientnet-b2') trunk_output_size = trunk.fc.in_features trunk.fc = Identity() trunk = torch.nn.DataParallel(trunk.to(device)) embedder = torch.nn.DataParallel(MLP([trunk_output_size, cfg.embedder.size]).to(device)) classifier = torch.nn.DataParallel(MLP([cfg.embedder.size, cfg.embedder.class_out_size])).to(device) # Set optimizers if cfg.optimizer.name == "sdg": trunk_optimizer = torch.optim.SGD(trunk.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) embedder_optimizer = torch.optim.SGD(embedder.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) classifier_optimizer = torch.optim.SGD(classifier.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) elif cfg.optimizer.name == "rmsprop": trunk_optimizer = torch.optim.RMSprop(trunk.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) embedder_optimizer = torch.optim.RMSprop(embedder.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) classifier_optimizer = torch.optim.RMSprop(classifier.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay) # Set the datasets data_dir = os.environ["DATASET_FOLDER"]+"/"+cfg.dataset.data_dir print("Data dir: "+data_dir) train_dataset, val_dataset, val_samples_dataset = get_datasets(data_dir, cfg, mode=cfg.mode.type) print("Trainset: ",len(train_dataset), "Testset: ",len(val_dataset), "Samplesset: ",len(val_samples_dataset)) # Set the loss function if cfg.embedder_loss.name == "margin_loss": loss = losses.MarginLoss(margin=cfg.embedder_loss.margin,nu=cfg.embedder_loss.nu,beta=cfg.embedder_loss.beta) if cfg.embedder_loss.name == "triplet_margin": loss = losses.TripletMarginLoss(margin=cfg.embedder_loss.margin) if cfg.embedder_loss.name == "multi_similarity": loss = losses.MultiSimilarityLoss(alpha=cfg.embedder_loss.alpha, beta=cfg.embedder_loss.beta, base=cfg.embedder_loss.base) # Set the classification loss: classification_loss = torch.nn.CrossEntropyLoss() # Set the mining function if cfg.miner.name == "triplet_margin": #miner = miners.TripletMarginMiner(margin=0.2) miner = miners.TripletMarginMiner(margin=cfg.miner.margin) if cfg.miner.name == "multi_similarity": miner = miners.MultiSimilarityMiner(epsilon=cfg.miner.epsilon) #miner = miners.MultiSimilarityMiner(epsilon=0.05) batch_size = cfg.trainer.batch_size num_epochs = cfg.trainer.num_epochs iterations_per_epoch = cfg.trainer.iterations_per_epoch # Set the dataloader sampler sampler = samplers.MPerClassSampler(train_dataset.targets, m=4, length_before_new_iter=len(train_dataset)) # Package the above stuff into dictionaries. models = {"trunk": trunk, "embedder": embedder, "classifier": classifier} optimizers = {"trunk_optimizer": trunk_optimizer, "embedder_optimizer": embedder_optimizer, "classifier_optimizer": classifier_optimizer} loss_funcs = {"metric_loss": loss, "classifier_loss": classification_loss} mining_funcs = {"tuple_miner": miner} # We can specify loss weights if we want to. This is optional loss_weights = {"metric_loss": cfg.loss.metric_loss, "classifier_loss": cfg.loss.classifier_loss} schedulers = { #"metric_loss_scheduler_by_epoch": torch.optim.lr_scheduler.StepLR(classifier_optimizer, cfg.scheduler.step_size, gamma=cfg.scheduler.gamma), "embedder_scheduler_by_epoch": torch.optim.lr_scheduler.StepLR(embedder_optimizer, cfg.scheduler.step_size, gamma=cfg.scheduler.gamma), "classifier_scheduler_by_epoch": torch.optim.lr_scheduler.StepLR(classifier_optimizer, cfg.scheduler.step_size, gamma=cfg.scheduler.gamma), "trunk_scheduler_by_epoch": torch.optim.lr_scheduler.StepLR(embedder_optimizer, cfg.scheduler.step_size, gamma=cfg.scheduler.gamma), } experiment_name = "%s_model_%s_cl_%s_ml_%s_miner_%s_mix_ml_%02.2f_mix_cl_%02.2f_resize_%d_emb_size_%d_class_size_%d_opt_%s_lr_%02.2f_m_%02.2f_wd_%02.2f"%(cfg.dataset.name, cfg.model.model_name, "cross_entropy", cfg.embedder_loss.name, cfg.miner.name, cfg.loss.metric_loss, cfg.loss.classifier_loss, cfg.transform.transform_resize, cfg.embedder.size, cfg.embedder.class_out_size, cfg.optimizer.name, cfg.optimizer.lr, cfg.optimizer.momentum, cfg.optimizer.weight_decay) record_keeper, _, _ = logging_presets.get_record_keeper("logs/%s"%(experiment_name), "tensorboard/%s"%(experiment_name)) hooks = logging_presets.get_hook_container(record_keeper) dataset_dict = {"samples": val_samples_dataset, "val": val_dataset} model_folder = "example_saved_models/%s/"%(experiment_name) # Create the tester tester = OneShotTester( end_of_testing_hook=hooks.end_of_testing_hook, #size_of_tsne=20 ) #tester.embedding_filename=data_dir+"/embeddings_pretrained_triplet_loss_multi_similarity_miner.pkl" tester.embedding_filename=data_dir+"/"+experiment_name+".pkl" end_of_epoch_hook = hooks.end_of_epoch_hook(tester, dataset_dict, model_folder) trainer = trainers.TrainWithClassifier(models, optimizers, batch_size, loss_funcs, mining_funcs, train_dataset, sampler=sampler, lr_schedulers=schedulers, dataloader_num_workers = cfg.trainer.batch_size, loss_weights=loss_weights, end_of_iteration_hook=hooks.end_of_iteration_hook, end_of_epoch_hook=end_of_epoch_hook ) trainer.train(num_epochs=num_epochs) tester = OneShotTester()