def init_network(self): netG = models.__dict__[cfg.MODEL.NET](pretrained=True) netE = models.classifier.Classifier(class_num=cfg.MODEL.CLASS_NUM, distributed=self.distributed) self.load_checkpoint(netG, netE) if self.distributed: #sync_netG = nn.SyncBatchNorm.convert_sync_batchnorm(netG) #sync_netE = nn.SyncBatchNorm.convert_sync_batchnorm(netE) sync_netG = netG sync_netE = netE self.netG = torch.nn.parallel.DistributedDataParallel( sync_netG.to(self.device), device_ids=[self.args.local_rank], output_device=self.args.local_rank) self.netE = torch.nn.parallel.DistributedDataParallel( sync_netE.to(self.device), device_ids=[self.args.local_rank], output_device=self.args.local_rank) else: self.netG = torch.nn.DataParallel(netG).cuda() self.netE = torch.nn.DataParallel(netE).cuda() self.optim = models.optimizer.Optimizer(self.netG, self.netE) self.cross_ent = losses.create('CrossEntropy').cuda() if cfg.LOSSES.MMD_WEIGHT > 0: self.mmd = losses.create('MMD').cuda() if cfg.LOSSES.TPN_TASK_WEIGHT > 0: self.tpn_task = losses.create('TpnTask').cuda() if cfg.LOSSES.TRG_GXENT_WEIGHT > 0: self.trg_xent = losses.create('GeneralEntropy').cuda() if cfg.LOSSES.SYMM_XENT_WEIGHT > 0: self.symm_xent_loss = losses.create('SymmEntropy').cuda()
def setup_network(self): # model = models.create(cfg.MODEL.TYPE, args) model = models.create('XTransformer', args, submodel=submodel) if self.distributed: # this should be removed if we update BatchNorm stats self.model = torch.nn.parallel.DistributedDataParallel( model.to(self.device), device_ids=[self.args.local_rank], output_device=self.args.local_rank, broadcast_buffers=False) else: # self.model = torch.nn.DataParallel(model).cuda() # strange self.model = model.cuda() # strange if self.args.resume > 0: self.model.load_state_dict( torch.load(self.snapshot_path("caption_model", self.args.resume), map_location=lambda storage, loc: storage)) # self.optim = Optimizer(self.model) self.optim = build_optimizer(args, model) self.xe_criterion = losses.create(cfg.LOSSES.XE_TYPE).cuda() self.rl_criterion = losses.create(cfg.LOSSES.RL_TYPE).cuda()
def __init__(self, in_dim=[2048], out_dim=1000, **kwargs): super(Bilinear, self).__init__() fc1s = [] fc2s = [] for indim in in_dim: fc1 = nn.Sequential( nn.Linear(indim, out_dim), nn.BatchNorm1d(out_dim, affine=True), nn.ReLU(inplace=True), nn.Dropout() ) fc2 = nn.Sequential( nn.Linear(out_dim, out_dim), nn.BatchNorm1d(out_dim, affine=True), nn.ReLU(inplace=True), #nn.Dropout() ) fc1s.append(fc1) fc2s.append(fc2) self.fc1s = nn.ModuleList(fc1s) self.fc2s = nn.ModuleList(fc2s) self.fc3 = nn.Linear(out_dim, cfg.MODEL.CLASS_NUM) self.xent_loss = losses.create('CrossEntropy') if cfg.LOSSES.TRG_GXENT_WEIGHT > 0: self.gxent_loss = losses.create('GeneralEntropy')
def initialize_criterion(self, args): if args.loss == 'triplet_no_hard_mining' or args.loss == 'triplet': self.criterion = losses.create( args.loss, margin=args.margin, num_instances=args.num_instances).cuda() elif args.loss == 'center_triplet': self.criterion = losses.create(args.loss).cuda()
def __init__(self, in_dim=2048, out_dim=1000, **kwargs): super(Basic, self).__init__() self.fc1 = nn.Sequential(nn.Linear(in_dim, out_dim), nn.BatchNorm1d(out_dim, affine=True), nn.ReLU(inplace=True), nn.Dropout()) self.fc2 = nn.Sequential(nn.Linear(out_dim, out_dim), nn.BatchNorm1d(out_dim, affine=True), nn.ReLU(inplace=True), nn.Dropout()) self.xent_loss = losses.create('CrossEntropy') if cfg.LOSSES.TRG_GXENT_WEIGHT > 0: self.gxent_loss = losses.create('GeneralEntropy')
def main(args): # s_ = time.time() save_dir = args.save_dir #模型存储位置 mkdir_if_missing(save_dir) #检查该存储文件是否可用/utils库 sys.stdout = logging.Logger(os.path.join(save_dir, 'log.txt')) display(args) #打印当前训练模型的参数 start = 0 model = models.create(args.net, pretrained = False , model_path = None, normalized = True) #@@@创建模型/ pretrained = true 将会去读取现有预训练模型/models文件中的函数 model = torch.nn.DataParallel(model) #使用torch进行模型的并行训练/分布 model = model.cuda() #使用GPU print('initial model is save at %s' % save_dir) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) #优化器 criterion = losses.create(args.loss, margin_same=args.margin_same, margin_diff=args.margin_diff).cuda() #TWConstrativeloss data = DataSet.create(name = args.data, root=args.data_root, set_name = args.set_name) #数据 set_name = "test" or "train" ; train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.batch_size,shuffle = True, drop_last=True, pin_memory=True, num_workers=args.nThreads) for epoch in range(start, 50): #args.epochs L = train(epoch=epoch, model=model, criterion=criterion, optimizer=optimizer, train_loader=train_loader, args=args) losses_.append(L) if (epoch+1) % args.save_step == 0 or epoch==0: if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'epoch': (epoch+1), }, is_best=False, fpath=osp.join(args.save_dir, 'ckp_ep' + str(epoch + 1) + '.pth.tar')) # added batch_nums = range(1, len(losses_) + 1) import matplotlib.pyplot as plt plt.plot(batch_nums, losses_) plt.show()
def setModel(self): print('Setting model') from models import resnet_50 self.model = resnet_50.resnet_50(pretrained=True,num_class =self.classSize, saliency=self.saliency,pool_type = self.pool_type, is_train = True,scale = self.scale) self.model = self.model.to(self.gpuid) self.criterion = losses.create(self.loss).to(self.gpuid) self.center_criterion = nn.MSELoss() self.optimizer = optim.SGD(self.model.parameters(), lr=self.init_lr, momentum=0.9, weight_decay=0.00005) # self.optimizer = optim.SGD(self.model.parameters(), lr=self.init_lr, momentum=0.9) return
def init_network(self): netG = models.__dict__[cfg.MODEL.NET](pretrained=True) netE = models.classifier.Classifier(class_num=cfg.MODEL.CLASS_NUM, distributed=self.distributed).cuda() self.load_checkpoint(netG, netE) if self.distributed: sync_netG = nn.SyncBatchNorm.convert_sync_batchnorm(netG) sync_netE = nn.SyncBatchNorm.convert_sync_batchnorm(netE) self.netG = torch.nn.parallel.DistributedDataParallel(sync_netG.to(self.device), device_ids=[self.args.local_rank], output_device=self.args.local_rank) self.netE = torch.nn.parallel.DistributedDataParallel(sync_netE.to(self.device), device_ids=[self.args.local_rank], output_device=self.args.local_rank) else: self.netG = torch.nn.DataParallel(netG).cuda() self.netE = torch.nn.DataParallel(netE).cuda() self.optim = models.optimizer.Optimizer(self.netG, self.netE) if cfg.LOSSES.LABEL_SMOOTH > 0: self.cross_ent = losses.create('SmoothCrossEntropy').cuda() else: self.cross_ent = losses.create('CrossEntropy').cuda()
def transfer_feature(model, model_old, train_loader, class_mean, args, task_id, num_class_per_task, test_loader): generator_old = Generator() generator_old = generator_old.cuda() generator_current = Generator() generator_current = generator_current.cuda() parameters_to_optimize = list() parameters_to_optimize += list(generator_old.parameters()) parameters_to_optimize += list(generator_current.parameters()) optimizer = torch.optim.Adam(parameters_to_optimize, lr=0.002, betas=(0.9, 0.999)) criterion = losses.create('triplet', margin=0, num_instances=8).cuda() class_mean_best = [] if args.data == 'cifar100': epoch = 50 else: epoch = 100 for j in range(epoch): class_mean_epoch = class_mean.copy() for i, data in enumerate(train_loader, 0): x_input_current, labels_current, x_input_old, labels_old, class_mean_old_idx, class_label_old_idx = data x_input_current = Variable(x_input_current.cuda()) x_input_old = Variable(x_input_old.cuda()) optimizer.zero_grad() re_current = generator_current(x_input_current) x_fake_current = re_current + x_input_current re_old = generator_old(x_input_old) x_fake_old = re_old + x_input_old g_l1 = torch.sum(torch.abs(x_fake_current - x_fake_old)) class_mean_old_idx = class_mean_old_idx.cuda() re_class_mean_old_idx = generator_old(class_mean_old_idx) class_mean_old_idx = re_class_mean_old_idx + class_mean_old_idx g_tri_1, inter_, dist_ap, dist_an = criterion( x_fake_current, labels_current) g_tri_2, inter_, dist_ap, dist_an = criterion( x_fake_old, labels_old) g_tri_old, inter_, dist_ap, dist_an = criterion( class_mean_old_idx, class_label_old_idx) if args.data == 'cifar100': g_tri = g_tri_1 * (200) + g_tri_2 * (100) + g_tri_old * (100) else: g_tri = g_tri_1 * (1000) + g_tri_2 * (100) + g_tri_old * (100) g_loss = g_l1 + g_tri print('epoch: %d,g_loss:%.3f,g_l1_loss:%.3f,,g_tri_loss:%.3f' % (j, g_loss, g_l1, g_tri)) g_loss.backward() optimizer.step() generator_old.eval() generator_current.eval() old_embedding_all = [] for idx in range(int(args.base + (task_id - 1) * num_class_per_task)): input = class_mean_epoch[idx] input = torch.from_numpy(input) input = input.cuda() old_embedding = generator_old(input) + input old_embedding = old_embedding.data.cpu() old_embedding_all.append(old_embedding.numpy()) class_mean_epoch[:int(args.base + (task_id - 1) * num_class_per_task)] = old_embedding_all old_embedding_all = [] for idx in range(int(num_class_per_task)): input = class_mean_epoch[args.base + (task_id - 1) * num_class_per_task + idx] input = torch.from_numpy(input) input = input.cuda() old_embedding = generator_current(input) + input old_embedding = old_embedding.data.cpu() old_embedding_all.append(old_embedding.numpy()) class_mean_epoch[int(args.base + (task_id - 1) * num_class_per_task):] = old_embedding_all val_embeddings_cl, val_labels_cl = extract_features_val( generator_current, test_loader) acc_ave = computer_acc(class_mean_epoch, task_id, val_labels_cl, val_embeddings_cl) generator_old.train() generator_current.train() result.writelines("the current results is %.3f" % (acc_ave) + '\n') print('the current results') print(acc_ave) torch.save( generator_old, os.path.join(save_path, 'task_' + str(task_id) + '_old' + '.t7')) torch.save( generator_current, os.path.join(save_path, 'task_' + str(task_id) + '_current' + '.t7')) return class_mean
def main(args): s_ = time.time() # 训练日志保存 log_dir = args.log_dir mkdir_if_missing(log_dir) sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt')) display(args) if args.r is None: model = models.create(args.net) model = load_parameter(model) else: # resume model print('Resume from model at Epoch %d' % args.start) model = torch.load(args.r) model = model.cuda() torch.save(model, os.path.join(log_dir, 'model.pkl')) print('initial model is save at %s' % log_dir) # fine tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids = set.union( set(map(id, model.Embedding.parameters())), set(map(id, model.attention_blocks.parameters()))) new_params = [p for p in model.parameters() if id(p) in new_param_ids] base_params = [p for p in model.parameters() if id(p) not in new_param_ids] param_groups = [{ 'params': base_params, 'lr_mult': 0.0 }, { 'params': new_params, 'lr_mult': 1.0 }] optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) if args.loss == 'bin': criterion = losses.create(args.loss, margin=args.margin, alpha=args.alpha).cuda() Div = losses.create('div').cuda() else: criterion = losses.create(args.loss).cuda() data = DataSet.create(args.data, root=None) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.BatchSize, sampler=RandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, num_workers=args.nThreads) # save the train information epoch_list = list() loss_list = list() pos_list = list() neg_list = list() for epoch in range(args.start, args.epochs): epoch_list.append(epoch) running_loss = 0.0 divergence = 0.0 running_pos = 0.0 running_neg = 0.0 if epoch == 2: param_groups[0]['lr_mult'] = 0.1 for i, data in enumerate(train_loader, 0): inputs, labels = data # wrap them in Variable inputs = Variable(inputs.cuda()) # type of labels is Variable cuda.Longtensor labels = Variable(labels).cuda() optimizer.zero_grad() embed_feat = model(inputs) loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels) div = Div(embed_feat) loss_ = loss + args.theta * div if not type(loss) == torch.Tensor: print('One time con not back-ward') continue loss_.backward() optimizer.step() running_loss += loss.item() divergence += div.item() running_neg += dist_an running_pos += dist_ap if epoch == 0 and i == 0: print(50 * '#') print('Train Begin -- HA-HA-HA-HA-AH-AH-AH-AH --') loss_list.append(running_loss) pos_list.append(running_pos / i) neg_list.append(running_neg / i) print( '[Epoch %05d]\t Loss: %.2f \t Divergence: %.2f \t Accuracy: %.2f \t Pos-Dist: %.2f \t Neg-Dist: %.2f' % (epoch + 1, running_loss, divergence, inter_, dist_ap, dist_an)) if epoch % args.save_step == 0: torch.save(model, os.path.join(log_dir, '%d_model.pkl' % epoch)) np.savez(os.path.join(log_dir, "result.npz"), epoch=epoch_list, loss=loss_list, pos=pos_list, neg=neg_list) t = time.time() - s_ print('training takes %.2f hour' % (t / 3600))
else: print( 'initialize the network randomly ----------- hello wangxiaowu! come on!!' ) model.load_state_dict(model_dict) # os.mkdir(log_dir) model = model.cuda() torch.save(model, os.path.join(log_dir, 'model.pkl')) print('initial model is save at %s' % log_dir) print('the margin of ------------ loss function is ----------%f' % args.m) if args.m == 0.5 and args.nums is None: print('-------------use default margin -----------------') criterion = losses.create(args.loss).cuda() elif args.nums is None: criterion = losses.create(args.loss, margin=args.m).cuda() else: nums = chars2nums(args.nums) print('-------------use nums -----------------', nums) criterion = losses.create(args.loss, nums=nums).cuda() # fine tune the model: the learning rate for pretrained parameter is 1/10 new_param_ids = set(map(id, model.Embed.parameters())) new_params = [p for p in model.parameters() if id(p) in new_param_ids] base_params = [p for p in model.parameters() if id(p) not in new_param_ids] param_groups = [{ 'params': base_params, 'lr_mult': args.base
def train_fun(args, train_loader, feat_loader, current_task, fisher={}, prototype={}): log_dir = args.log_dir mkdir_if_missing(log_dir) sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt')) display(args) model = models.create(args.net, Embed_dim=args.dim) # load part of the model if args.method == 'Independent' or current_task == 0: model_dict = model.state_dict() if args.net == 'resnet32': pickle.load = partial(pickle.load, encoding="latin1") pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1") pretrained_dict = torch.load( 'pretrained_models/Finetuning_0_task_0_200_model_task2_cifar100_seed1993.pkl', map_location=lambda storage, loc: storage, pickle_module=pickle) pretrained_dict = pretrained_dict.state_dict() pretrained_dict = {k: v for k, v in pretrained_dict.items( ) if k in model_dict and 'fc' not in k} model_dict.update(pretrained_dict) model.load_state_dict(model_dict) elif args.net == 'resnet18' and args.data == 'imagenet_sub': pickle.load = partial(pickle.load, encoding="latin1") pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1") pretrained_dict = torch.load( 'pretrained_models/Finetuning_0_task_0_200_model_task2_imagenet_sub_seed1993.pkl', map_location=lambda storage, loc: storage, pickle_module=pickle) pretrained_dict = pretrained_dict.state_dict() pretrained_dict = {k: v for k, v in pretrained_dict.items( ) if k in model_dict and 'fc' not in k} model_dict.update(pretrained_dict) model.load_state_dict(model_dict) else: print (' Oops! That was no valid models. ') if args.method != 'Independent' and current_task > 0: model = torch.load(os.path.join(log_dir, args.method + '_' + args.exp + '_task_' + str(current_task-1) + '_%d_model.pkl' % int(args.epochs-1))) model_old = deepcopy(model) model_old.eval() model_old = freeze_model(model_old) model = model.cuda() torch.save(model, os.path.join(log_dir, args.method + '_' + args.exp + '_task_' + str(current_task) + '_pre_model.pkl')) print('initial model is save at %s' % log_dir) # fine tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids = set(map(id, model.Embed.parameters())) new_params = [p for p in model.parameters() if id(p) in new_param_ids] base_params = [p for p in model.parameters() if id(p) not in new_param_ids] param_groups = [ {'params': base_params, 'lr_mult': 0.1}, {'params': new_params, 'lr_mult': 1.0}] criterion = losses.create(args.loss, margin=args.margin, num_instances=args.num_instances).cuda() optimizer = torch.optim.Adam( param_groups, lr=args.lr, weight_decay=args.weight_decay) scheduler = StepLR(optimizer, step_size=200, gamma=0.1) if args.data == 'cifar100' or args.data == 'imagenet_sub': if current_task > 0: model.eval() for epoch in range(args.start, args.epochs): running_loss = 0.0 running_lwf = 0.0 scheduler.step() for i, data in enumerate(train_loader, 0): inputs, labels = data # wrap them in Variable inputs = Variable(inputs.cuda()) labels = Variable(labels).cuda() optimizer.zero_grad() _, embed_feat = model(inputs) if current_task == 0: loss_aug = 0*torch.sum(embed_feat) else: if args.method == 'Finetuning' or args.method == 'Independent': loss_aug = 0*torch.sum(embed_feat) elif args.method == 'LwF': _, embed_feat_old = model_old(inputs) loss_aug = args.tradeoff * \ torch.sum((embed_feat-embed_feat_old).pow(2))/2. elif args.method == 'EWC' or args.method == 'MAS': loss_aug = 0 for (name, param), (_, param_old) in zip(model.named_parameters(), model_old.named_parameters()): loss_aug += args.tradeoff * \ torch.sum(fisher[name]*(param_old-param).pow(2))/2. if args.loss == 'MSLoss': loss = criterion(embed_feat, labels) inter_ = 0 dist_ap = 0 dist_an = 0 else: loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels) loss += loss_aug loss.backward() optimizer.step() running_loss += loss.data[0] running_lwf += loss_aug.data[0] if epoch == 0 and i == 0: print(50*'#') print('Train Begin -- HA-HA-HA') print('[Epoch %05d]\t Total Loss: %.3f \t LwF Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f' % (epoch + 1, running_loss, running_lwf, inter_, dist_ap, dist_an)) if epoch % args.save_step == 0: torch.save(model, os.path.join(log_dir, args.method + '_' + args.exp + '_task_' + str(current_task) + '_%d_model.pkl' % epoch)) if args.method == 'EWC' or args.method == 'MAS': fisher = fisher_matrix_diag( model, criterion, train_loader, number_samples=500) return fisher
def main(args): # 训练日志保存 log_dir = os.path.join('checkpoints', args.log_dir) mkdir_if_missing(log_dir) sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt')) display(args) if args.r is None: model = models.create(args.net, Embed_dim=args.dim) # load part of the model model_dict = model.state_dict() # print(model_dict) if args.net == 'bn': pretrained_dict = torch.load( 'pretrained_models/bn_inception-239d2248.pth') else: pretrained_dict = torch.load( 'pretrained_models/inception_v3_google-1a9a5a14.pth') pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict } model_dict.update(pretrained_dict) model.load_state_dict(model_dict) else: # resume model model = torch.load(args.r) model = model.cuda() torch.save(model, os.path.join(log_dir, 'model.pkl')) print('initial model is save at %s' % log_dir) # fine tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids = set(map(id, model.Embed.parameters())) new_params = [p for p in model.parameters() if id(p) in new_param_ids] base_params = [p for p in model.parameters() if id(p) not in new_param_ids] param_groups = [{ 'params': base_params, 'lr_mult': 0.1 }, { 'params': new_params, 'lr_mult': 1.0 }] optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) criterion = losses.create(args.loss, alpha=args.alpha, k=args.k).cuda() data = DataSet.create(args.data, root=None, test=False) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.BatchSize, sampler=RandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, num_workers=args.nThreads) for epoch in range(args.start, args.epochs): running_loss = 0.0 for i, data in enumerate(train_loader, 0): inputs, labels = data # wrap them in Variable inputs = Variable(inputs.cuda()) labels = Variable(labels).cuda() optimizer.zero_grad() embed_feat = model(inputs) loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels) if args.orth > 0: loss = orth_reg(model, loss, cof=args.orth) loss.backward() optimizer.step() running_loss += loss.data[0] if epoch == 0 and i == 0: print(50 * '#') print('Train Begin -- HA-HA-HA') print( '[Epoch %05d]\t Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f' % (epoch + 1, running_loss, inter_, dist_ap, dist_an)) if epoch % args.save_step == 0: torch.save(model, os.path.join(log_dir, '%d_model.pkl' % epoch))
print('dimension of the embedding space is %d' % args.dim) print('log dir is: %s' % args.log_dir) # load fine-tuned models if args.r is not None: model = torch.load(args.r) else: model = models.create(args.net, Embed_dim=args.dim, num_class=num_class, pretrain=True) #visualize the network model = model.cuda() criterion = losses.create(args.loss).cuda() param_groups = model.parameters() learn_rate = args.lr # optimizer = optim.Adam(param_groups, lr=learn_rate, # weight_decay=args.weight_decay) optimizer = optim.SGD(param_groups, lr=learn_rate, momentum=0.9, weight_decay=0.00005) #get train_loader if 'mxnet' in args.net: normalize = transforms.Normalize(mean=[123, 117, 104], std=[1, 1, 1]) else: normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
def main(args): # s_ = time.time() save_dir = args.save_dir mkdir_if_missing(save_dir) sys.stdout = logging.Logger(os.path.join(save_dir, 'log.txt')) display(args) start = 0 model = models.create(args.net, pretrained=True, dim=args.dim) # model_frozen = models.create(args.net, pretrained=True, dim=args.dim) # # for vgg and densenet if args.resume is None: model_dict = model.state_dict() else: # resume model print('load model from {}'.format(args.resume)) model_dict = model.state_dict() model_dict_frozen = model_frozen.state_dict() chk_pt = torch.load(args.resume) weight = chk_pt['state_dict'] start = chk_pt['epoch'] pretrained_dict = {k: v for k, v in weight.items() if k in model_dict} model_dict.update(pretrained_dict) model.load_state_dict(model_dict) pretrained_dict_frozen = { k: v for k, v in weight.items() if k in model_dict_frozen } model_dict_frozen.update(pretrained_dict_frozen) model_frozen.load_state_dict(model_dict_frozen) model_frozen.eval() model = torch.nn.DataParallel(model) model = model.cuda() model_frozen = torch.nn.DataParallel(model_frozen) model_frozen = model_frozen.cuda() # freeze BN if args.freeze_BN is True: print(40 * '#', '\n BatchNorm frozen') model.apply(set_bn_eval) else: print(40 * '#', 'BatchNorm NOT frozen') # Fine-tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids_fc_layer = set(map(id, model.module.fc_layer.parameters())) new_param_ids = new_param_ids_fc_layer new_params_fc = [ p for p in model.module.parameters() if id(p) in new_param_ids_fc_layer ] base_params = [ p for p in model.module.parameters() if id(p) not in new_param_ids ] frozen_params = [p for p in model_frozen.module.parameters() ] # frozen the model, but with learning_rate = 0.0 for p in frozen_params: p.requires_grad = False # if fine-tune basenetwork, then lr_mult: 0.1. if lr_mult=0.0, then the basenetwork is not updated param_groups = [{ 'params': base_params, 'lr_mult': 0.1 }, { 'params': new_params_fc, 'lr_mult': 1.0 }] print('initial model is save at %s' % save_dir) optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) criterion_loss = losses.create(args.loss, margin=args.margin, alpha=args.alpha, base=args.loss_base).cuda() CE_loss = nn.CrossEntropyLoss().cuda() l2_loss = L2Norm().cuda() similarity_loss = Similarity_preserving().cuda() criterion = [criterion_loss, CE_loss, l2_loss, similarity_loss] # Decor_loss = losses.create('decor').cuda() data = DataSet.create(args.data, ratio=args.ratio, width=args.width, origin_width=args.origin_width, root=args.data_root) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.batch_size, sampler=FastRandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, pin_memory=True, num_workers=args.nThreads) # save the train information best_accuracy = 0 model_list = [model, model_frozen] if args.Incremental_flag == False: print( "######################This is non-incremental learning! ########################" ) if args.Incremental_flag == True: print( "#########################This is incremental learning! #########################" ) else: NotImplementedError() for epoch in range(start, args.epochs): accuracy = train(epoch=epoch, model=model_list, criterion=criterion, optimizer=optimizer, train_loader=train_loader, args=args) if (epoch + 1) % args.save_step == 0 or epoch == 0: if use_gpu: state_dict = model.module.state_dict( ) # save the parameters from updated model else: state_dict = model.state_dict() is_best = accuracy > best_accuracy best_accuracy = max(accuracy, best_accuracy) save_checkpoint({ 'state_dict': state_dict, 'epoch': (epoch + 1), }, is_best, fpath=osp.join( args.save_dir, 'ckp_ep' + str(epoch + 1) + '.pth.tar'))
def main(args): num_class_dict = {'cub': int(100), 'car': int(98)} # 训练日志保存 log_dir = os.path.join(args.checkpoints, args.log_dir) mkdir_if_missing(log_dir) sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt')) display(args) if args.r is None: model = models.create(args.net, Embed_dim=args.dim) # load part of the model model_dict = model.state_dict() # print(model_dict) if args.net == 'bn': pretrained_dict = torch.load('pretrained_models/bn_inception-239d2248.pth') else: pretrained_dict = torch.load('pretrained_models/inception_v3_google-1a9a5a14.pth') pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} model_dict.update(pretrained_dict) # orth init if args.init == 'orth': print('initialize the FC layer orthogonally') _, _, v = torch.svd(model_dict['Embed.linear.weight']) model_dict['Embed.linear.weight'] = v.t() # zero bias model_dict['Embed.linear.bias'] = torch.zeros(args.dim) model.load_state_dict(model_dict) else: # resume model model = torch.load(args.r) model = model.cuda() # compute the cluster centers for each class here def normalize(x): norm = x.norm(dim=1, p=2, keepdim=True) x = x.div(norm.expand_as(x)) return x data = DataSet.create(args.data, root=None, test=False) if args.center_init == 'cluster': data_loader = torch.utils.data.DataLoader( data.train, batch_size=args.BatchSize, shuffle=False, drop_last=False) features, labels = extract_features(model, data_loader, print_freq=32, metric=None) features = [feature.resize_(1, args.dim) for feature in features] features = torch.cat(features) features = features.numpy() labels = np.array(labels) centers, center_labels = cluster_(features, labels, n_clusters=args.n_cluster) center_labels = [int(center_label) for center_label in center_labels] centers = Variable(torch.FloatTensor(centers).cuda(), requires_grad=True) center_labels = Variable(torch.LongTensor(center_labels)).cuda() print(40*'#', '\n Clustering Done') else: center_labels = int(args.n_cluster) * list(range(num_class_dict[args.data])) center_labels = Variable(torch.LongTensor(center_labels).cuda()) centers = normalize(torch.rand(num_class_dict[args.data]*args.n_cluster, args.dim)) centers = Variable(centers.cuda(), requires_grad=True) torch.save(model, os.path.join(log_dir, 'model.pkl')) print('initial model is save at %s' % log_dir) # fine tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids = set(map(id, model.Embed.parameters())) new_params = [p for p in model.parameters() if id(p) in new_param_ids] base_params = [p for p in model.parameters() if id(p) not in new_param_ids] param_groups = [ {'params': base_params, 'lr_mult': 0.1}, {'params': new_params, 'lr_mult': 1.0}, {'params': centers, 'lr_mult': 1.0}] optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) cluster_counter = np.zeros([num_class_dict[args.data], args.n_cluster]) criterion = losses.create(args.loss, alpha=args.alpha, centers=centers, center_labels=center_labels, cluster_counter=cluster_counter).cuda() # random sampling to generate mini-batch train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.BatchSize, shuffle=True, drop_last=False) # save the train information epoch_list = list() loss_list = list() pos_list = list() neg_list = list() # _mask = Variable(torch.ByteTensor(np.ones([2, 4]))).cuda() dtype = torch.ByteTensor _mask = torch.ones(int(num_class_dict[args.data]), args.n_cluster).type(dtype) _mask = Variable(_mask).cuda() for epoch in range(args.start, args.epochs): epoch_list.append(epoch) running_loss = 0.0 running_pos = 0.0 running_neg = 0.0 to_zero(cluster_counter) for i, data in enumerate(train_loader, 0): inputs, labels = data # wrap them in Variable inputs = Variable(inputs.cuda()) # type of labels is Variable cuda.Longtensor labels = Variable(labels).cuda() optimizer.zero_grad() # centers.zero_grad() embed_feat = model(inputs) # update network weight loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels, _mask) loss.backward() optimizer.step() centers.data = normalize(centers.data) running_loss += loss.data[0] running_neg += dist_an running_pos += dist_ap if epoch == 0 and i == 0: print(50 * '#') print('Train Begin -- HA-HA-HA') if i % 10 == 9: print('[Epoch %05d Iteration %2d]\t Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f' % (epoch + 1, i+1, loss.data[0], inter_, dist_ap, dist_an)) # cluster number counter show here print(cluster_counter) loss_list.append(running_loss) pos_list.append(running_pos / i) neg_list.append(running_neg / i) # update the _mask to make the cluster with only 1 or no member to be silent # _mask = Variable(torch.FloatTensor(cluster_counter) > 1).cuda() # cluster_distribution = torch.sum(_mask, 1).cpu().data.numpy().tolist() # print(cluster_distribution) # print('[Epoch %05d]\t Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f' # % (epoch + 1, running_loss, inter_, dist_ap, dist_an)) if epoch % args.save_step == 0: torch.save(model, os.path.join(log_dir, '%d_model.pkl' % epoch)) np.savez(os.path.join(log_dir, "result.npz"), epoch=epoch_list, loss=loss_list, pos=pos_list, neg=neg_list)
def main(args): # s_ = time.time() save_dir = args.save_dir mkdir_if_missing(save_dir) #sys.stdout = logging.Logger(os.path.join(save_dir, 'log.txt')) writer = SummaryWriter('log/' + args.log_name) display(args) start = 0 model = models.create(args.net, pretrain=True, dim=args.dim) # for vgg and densenet if args.resume is None: model_dict = model.state_dict() else: # resume model print('load model from {}'.format(args.resume)) model = load_checkpoint(args.resume, args) start = 80 model = torch.nn.DataParallel(model) model = model.cuda() #freeze vgg layers optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) criterion = losses.create(args.loss, margin=args.margin, alpha=args.alpha, beta=args.beta).cuda() data = DataSet.create(args.data) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.batch_size, sampler=FastRandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, pin_memory=True) # save the train information for epoch in range(start, args.epochs): # if epoch == 5: # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr/100) # print(args.lr/100) train( writer, epoch=epoch, model=model, criterion=criterion, optimizer=optimizer, train_loader=train_loader, args=args, ) if epoch == 800: optimizer = torch.optim.Adam(model.parameters(), lr=args.lr / 10, weight_decay=args.weight_decay) if (epoch + 1) % args.save_step == 0: if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'epoch': (epoch + 1), }, is_best=False, fpath=osp.join( args.save_dir, 'ckp_ep' + str(epoch + 1) + '.pth.tar'))
def main(args): # s_ = time.time() print(torch.cuda.get_device_properties(device=0).total_memory) torch.cuda.empty_cache() print(args) save_dir = args.save_dir mkdir_if_missing(save_dir) num_txt = len(glob.glob(save_dir + "/*.txt")) sys.stdout = logging.Logger( os.path.join(save_dir, "log_" + str(num_txt) + ".txt")) display(args) start = 0 model = models.create(args.net, pretrained=args.pretrained, dim=args.dim, self_supervision_rot=args.self_supervision_rot) all_pretrained = glob.glob(save_dir + "/*.pth.tar") if (args.resume is None) or (len(all_pretrained) == 0): model_dict = model.state_dict() else: # resume model all_pretrained_epochs = sorted( [int(x.split("/")[-1][6:-8]) for x in all_pretrained]) args.resume = os.path.join( save_dir, "ckp_ep" + str(all_pretrained_epochs[-1]) + ".pth.tar") print('load model from {}'.format(args.resume)) chk_pt = load_checkpoint(args.resume) weight = chk_pt['state_dict'] start = chk_pt['epoch'] model.load_state_dict(weight) model = torch.nn.DataParallel(model) model = model.cuda() fake_centers_dir = os.path.join(args.save_dir, "fake_center.npy") if np.sum(["train_1.txt" in x for x in glob.glob(args.save_dir + "/**/*")]) == 0: if args.rot_only: create_fake_labels(None, None, args) else: data = dataset.Dataset(args.data, ratio=args.ratio, width=args.width, origin_width=args.origin_width, root=args.data_root, self_supervision_rot=0, mode="test", rot_bt=args.rot_bt, corruption=args.corruption, args=args) fake_train_loader = torch.utils.data.DataLoader( data.train, batch_size=100, shuffle=False, drop_last=False, pin_memory=True, num_workers=args.nThreads) train_feature, train_labels = extract_features( model, fake_train_loader, print_freq=1e5, metric=None, pool_feature=args.pool_feature, org_feature=True) create_fake_labels(train_feature, train_labels, args) del train_feature fake_centers = "k-means++" torch.cuda.empty_cache() elif os.path.exists(fake_centers_dir): fake_centers = np.load(fake_centers_dir) else: fake_centers = "k-means++" time.sleep(60) model.train() # freeze BN if (args.freeze_BN is True) and (args.pretrained): print(40 * '#', '\n BatchNorm frozen') model.apply(set_bn_eval) else: print(40 * '#', 'BatchNorm NOT frozen') # Fine-tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids = set(map(id, model.module.classifier.parameters())) new_rot_param_ids = set() if args.self_supervision_rot: new_rot_param_ids = set( map(id, model.module.classifier_rot.parameters())) print(new_rot_param_ids) new_params = [ p for p in model.module.parameters() if id(p) in new_param_ids ] new_rot_params = [ p for p in model.module.parameters() if id(p) in new_rot_param_ids ] base_params = [ p for p in model.module.parameters() if (id(p) not in new_param_ids) and (id(p) not in new_rot_param_ids) ] param_groups = [{ 'params': base_params }, { 'params': new_params }, { 'params': new_rot_params, 'lr': args.rot_lr }] print('initial model is save at %s' % save_dir) optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) criterion = losses.create(args.loss, margin=args.margin, alpha=args.alpha, beta=args.beta, base=args.loss_base).cuda() data = dataset.Dataset(args.data, ratio=args.ratio, width=args.width, origin_width=args.origin_width, root=args.save_dir, self_supervision_rot=args.self_supervision_rot, rot_bt=args.rot_bt, corruption=1, args=args) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.batch_size, sampler=FastRandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, pin_memory=True, num_workers=args.nThreads) # save the train information for epoch in range(start, args.epochs): train(epoch=epoch, model=model, criterion=criterion, optimizer=optimizer, train_loader=train_loader, args=args) if (epoch + 1) % args.save_step == 0 or epoch == 0: if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'epoch': (epoch + 1), }, is_best=False, fpath=osp.join( args.save_dir, 'ckp_ep' + str(epoch + 1) + '.pth.tar')) if ((epoch + 1) % args.up_step == 0) and (not args.rot_only): # rewrite train_1.txt file data = dataset.Dataset(args.data, ratio=args.ratio, width=args.width, origin_width=args.origin_width, root=args.data_root, self_supervision_rot=0, mode="test", rot_bt=args.rot_bt, corruption=args.corruption, args=args) fake_train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.batch_size, shuffle=False, drop_last=False, pin_memory=True, num_workers=args.nThreads) train_feature, train_labels = extract_features( model, fake_train_loader, print_freq=1e5, metric=None, pool_feature=args.pool_feature, org_feature=(args.dim % 64 != 0)) fake_centers = create_fake_labels(train_feature, train_labels, args, init_centers=fake_centers) del train_feature torch.cuda.empty_cache() time.sleep(60) np.save(fake_centers_dir, fake_centers) # reload data data = dataset.Dataset( args.data, ratio=args.ratio, width=args.width, origin_width=args.origin_width, root=args.save_dir, self_supervision_rot=args.self_supervision_rot, rot_bt=args.rot_bt, corruption=1, args=args) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.batch_size, sampler=FastRandomIdentitySampler( data.train, num_instances=args.num_instances), drop_last=True, pin_memory=True, num_workers=args.nThreads) # test on testing data # extract_recalls(data=args.data, data_root=args.data_root, width=args.width, net=args.net, checkpoint=None, # dim=args.dim, batch_size=args.batch_size, nThreads=args.nThreads, pool_feature=args.pool_feature, # gallery_eq_query=args.gallery_eq_query, model=model) model.train() if (args.freeze_BN is True) and (args.pretrained): print(40 * '#', '\n BatchNorm frozen') model.apply(set_bn_eval)
def main(args): s_ = time.time() # 训练日志保存 save_dir = args.save_dir mkdir_if_missing(save_dir) sys.stdout = logging.Logger(os.path.join(save_dir, 'log.txt')) display(args) start = 0 model = models.create(args.net, pretrained=True, dim=args.dim) if args.r is None: model_dict = model.state_dict() # orthogonal init if args.init == 'orth': w = model_dict['classifier.0.weight'] model_dict['classifier.0.weight'] = torch.nn.init.orthogonal_(w) else: print('initialize the FC layer kai-ming-ly') w = model_dict['classifier.0.weight'] model_dict['classifier.0.weight'] = torch.nn.init.kaiming_normal_( w) # zero bias model_dict['classifier.0.bias'] = torch.zeros(args.dim) model.load_state_dict(model_dict) else: # resume model chk_pt = load_checkpoint(args.r) weight = chk_pt['state_dict'] start = chk_pt['epoch'] model.load_state_dict(weight) model = torch.nn.DataParallel(model) model = model.cuda() # freeze BN if args.BN == 1: print(40 * '#', 'BatchNorm frozen') model.apply(set_bn_eval) else: print(40 * '#', 'BatchNorm NOT frozen') # Fine-tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids = set(map(id, model.module.classifier.parameters())) new_params = [ p for p in model.module.parameters() if id(p) in new_param_ids ] base_params = [ p for p in model.module.parameters() if id(p) not in new_param_ids ] param_groups = [{ 'params': base_params, 'lr_mult': 0.0 }, { 'params': new_params, 'lr_mult': 1.0 }] print('initial model is save at %s' % save_dir) optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) if args.loss == 'center-nca': criterion = losses.create(args.loss, alpha=args.alpha).cuda() elif args.loss == 'cluster-nca': criterion = losses.create(args.loss, alpha=args.alpha, beta=args.beta).cuda() elif args.loss == 'neighbour': criterion = losses.create(args.loss, k=args.k, margin=args.margin).cuda() elif args.loss == 'nca': criterion = losses.create(args.loss, alpha=args.alpha, k=args.k).cuda() elif args.loss == 'triplet': criterion = losses.create(args.loss, alpha=args.alpha).cuda() elif args.loss == 'bin' or args.loss == 'ori_bin': criterion = losses.create(args.loss, margin=args.margin, alpha=args.alpha) else: criterion = losses.create(args.loss).cuda() # Decor_loss = losses.create('decor').cuda() data = DataSet.create(args.data, root=None) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.BatchSize, sampler=FastRandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, pin_memory=True, num_workers=args.nThreads) # save the train information epoch_list = list() loss_list = list() pos_list = list() neg_list = list() for epoch in range(start, args.epochs): epoch_list.append(epoch) running_loss = 0.0 running_pos = 0.0 running_neg = 0.0 if epoch == 1: optimizer.param_groups[0]['lr_mul'] = 0.1 if (epoch == 1000 and args.data == 'car') or \ (epoch == 550 and args.data == 'cub') or \ (epoch == 100 and args.data in ['shop', 'jd']): param_groups = [{ 'params': base_params, 'lr_mult': 0.1 }, { 'params': new_params, 'lr_mult': 1.0 }] optimizer = torch.optim.Adam(param_groups, lr=0.1 * args.lr, weight_decay=args.weight_decay) for i, data in enumerate(train_loader, 0): inputs, labels = data # wrap them in Variable inputs = Variable(inputs.cuda()) # type of labels is Variable cuda.Longtensor labels = Variable(labels).cuda() optimizer.zero_grad() embed_feat = model(inputs) loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels) # decor_loss = Decor_loss(embed_feat) # loss += args.theta * decor_loss if not type(loss) == torch.Tensor: print('One time con not back-ward') continue loss.backward() optimizer.step() running_loss += loss.item() running_neg += dist_an running_pos += dist_ap if epoch == 0 and i == 0: print(50 * '#') print('Train Begin -- HA-HA-HA-HA-AH-AH-AH-AH --') loss_list.append(running_loss) pos_list.append(running_pos / (i + 1)) neg_list.append(running_neg / (i + 1)) print( '[Epoch %03d]\t Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f' % (epoch + 1, running_loss / (i + 1), inter_, dist_ap, dist_an)) if (epoch + 1) % args.save_step == 0: if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'epoch': (epoch + 1), }, is_best=False, fpath=osp.join( args.save_dir, 'ckp_ep' + str(epoch + 1) + '.pth.tar')) np.savez(os.path.join(save_dir, "result.npz"), epoch=epoch_list, loss=loss_list, pos=pos_list, neg=neg_list) t = time.time() - s_ print('training takes %.2f hour' % (t / 3600))
def main(args): os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu dir = '%s_%s_dis_%s_%s_%s_%0.2f_%s' % (args.data, args.loss, args.net, args.TNet, args.Ttype, args.lamda, args.lr) log_dir = os.path.join('checkpoints', dir) mkdir_if_missing(log_dir) sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt')) display(args) # Teacher Netowrk if args.r is None: Network_T = args.TNet model_T = models.create(Network_T, Embed_dim=args.dim) model_dict_T = model_T.state_dict() if args.data == 'cub': model_T = torch.load('checkpoints/cub_Tmodel.pkl') elif args.data == 'car': model_T = torch.load('checkpoints/car_Tmodel.pkl') elif args.data == 'product': model_T = torch.load('checkpoints/product_Tmodel.pkl') else: model_T = torch.load(args.r) model_T = model_T.cuda() model_T.eval() # Student network if args.r is None: model = models.create(args.net, Embed_dim=args.dim) model_dict = model.state_dict() if args.net == 'bn': pretrained_dict = torch.load( 'pretrained_models/bn_inception-239d2248.pth') elif args.net == 'resnet101': pretrained_dict = torch.load( 'pretrained_models/resnet101-5d3b4d8f.pth') elif args.net == 'resnet50': pretrained_dict = torch.load( 'pretrained_models/resnet50-19c8e357.pth') elif args.net == 'resnet34': pretrained_dict = torch.load( 'pretrained_models/resnet34-333f7ec4.pth') elif args.net == 'resnet18': pretrained_dict = torch.load( 'pretrained_models/resnet18-5c106cde.pth') elif args.net == 'inception': pretrained_dict = torch.load( 'pretrained_models/inception_v3_google-1a9a5a14.pth') else: print(' Oops! That was no valid models. ') pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in model_dict } model_dict.update(pretrained_dict) model.load_state_dict(model_dict) else: model = torch.load(args.r) if args.continue_train: model = torch.load(log_dir + '/%d_model.pkl' % (args.start)) model = model.cuda() torch.save(model, os.path.join(log_dir, 'model.pkl')) print('initial model is save at %s' % log_dir) new_param_ids = set(map(id, model.Embed.parameters())) new_params = [p for p in model.parameters() if id(p) in new_param_ids] base_params = [p for p in model.parameters() if id(p) not in new_param_ids] param_groups = [{ 'params': base_params, 'lr_mult': 0.1 }, { 'params': new_params, 'lr_mult': 1.0 }] optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) if args.loss == 'knnsoftmax': criterion = losses.create(args.loss, alpha=args.alpha, k=args.k).cuda() else: criterion = losses.create(args.loss).cuda() data = DataSet.create(args.data, root=None, test=False) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.BatchSize, sampler=RandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, num_workers=args.nThreads) loss_log = [] for i in range(3): loss_log.append([]) loss_dis = [] for i in range(3): loss_dis.append([]) for epoch in range(args.start, args.epochs): running_loss = 0.0 for i, data in enumerate(train_loader, 0): inputs, labels = data inputs = Variable(inputs.cuda()) labels = Variable(labels).cuda() optimizer.zero_grad() embed_feat = model(inputs) embed_feat_T = model_T(inputs) loss_net, inter_, dist_ap, dist_an, dis_pos, dis_neg, dis = criterion( embed_feat, labels) loss_net_T, inter_T, dist_ap_T, dist_an_T, dis_pos_T, dis_neg_T, dis_T = criterion( embed_feat_T, labels) lamda = args.lamda if args.Ttype == 'relative': loss_dis[0].append( torch.mean(torch.norm(dis - dis_T, p=2)).data[0]) loss_dis[1].append(0.0) loss_dis[2].append(0.0) loss_distillation = 0.0 * torch.mean( F.pairwise_distance(embed_feat, embed_feat_T)) loss_distillation += torch.mean(torch.norm(dis - dis_T, p=2)) loss = loss_net + lamda * loss_distillation elif args.Ttype == 'absolute': loss_dis[0].append(0.0) loss_dis[1].append(0.0) loss_dis[2].append( torch.mean(F.pairwise_distance(embed_feat, embed_feat_T)).data[0]) loss_distillation = torch.mean( F.pairwise_distance(embed_feat, embed_feat_T)) loss = loss_net + lamda * loss_distillation else: print('This type does not exist') loss.backward() optimizer.step() running_loss += loss.data[0] loss_log[0].append(loss.data[0]) loss_log[1].append(loss_net.data[0]) loss_log[2].append(lamda * loss_distillation.data[0]) if epoch == 0 and i == 0: print(50 * '#') print('Train Begin -- HA-HA-HA') print( '[Epoch %05d]\t Loss_net: %.3f \t Loss_distillation: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f' % (epoch + 1, loss_net, lamda * loss_distillation, inter_, dist_ap, dist_an)) if epoch % args.save_step == 0: torch.save(model, os.path.join(log_dir, '%d_model.pkl' % epoch)) #plot loss line1, = plt.plot( loss_log[0], 'r-', label="Total loss", ) line2, = plt.plot(loss_log[1], 'b-', label="KNNsoftmax loss") line3, = plt.plot(loss_log[2], 'g--', label="Distillation loss") plt.title( '%s_%s_dis_%s_%s_%s_%0.2f' % (args.data, args.loss, args.net, args.TNet, args.Ttype, args.lamda)) plt.legend([line1, line2, line3], ['Total loss', 'Contrastive loss', 'Distance loss']) plt.savefig( './fig/%s_%s_dis_%s_%s_%s_%0.2f.jpg' % (args.data, args.loss, args.net, args.TNet, args.Ttype, args.lamda))
def main(args): # s_ = time.time() save_dir = args.save_dir mkdir_if_missing(save_dir) sys.stdout = logging.Logger(os.path.join(save_dir, 'log.txt')) display(args) start = 0 model = models.create(args.net, pretrained=True, dim=args.dim) # for vgg and densenet if args.resume is None: model_dict = model.state_dict() else: # resume model print('load model from {}'.format(args.resume)) chk_pt = load_checkpoint(args.resume) weight = chk_pt['state_dict'] start = chk_pt['epoch'] model.load_state_dict(weight) model = torch.nn.DataParallel(model) model = model.cuda() # freeze BN if args.freeze_BN is True: print(40 * '#', '\n BatchNorm frozen') model.apply(set_bn_eval) else: print(40 * '#', 'BatchNorm NOT frozen') # Fine-tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids = set(map(id, model.module.classifier.parameters())) new_params = [ p for p in model.module.parameters() if id(p) in new_param_ids ] base_params = [ p for p in model.module.parameters() if id(p) not in new_param_ids ] param_groups = [{ 'params': base_params, 'lr_mult': 0.0 }, { 'params': new_params, 'lr_mult': 1.0 }] print('initial model is save at %s' % save_dir) optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) criterion = losses.create(args.loss, margin=args.margin, alpha=args.alpha, base=args.loss_base).cuda() # Decor_loss = losses.create('decor').cuda() data = DataSet.create(args.data, ratio=args.ratio, width=args.width, origin_width=args.origin_width, root=args.data_root) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.batch_size, sampler=FastRandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, pin_memory=True, num_workers=args.nThreads) # save the train information for epoch in range(start, args.epochs): train(epoch=epoch, model=model, criterion=criterion, optimizer=optimizer, train_loader=train_loader, args=args) if epoch == 1: optimizer.param_groups[0]['lr_mul'] = 0.1 if (epoch + 1) % args.save_step == 0 or epoch == 0: if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'epoch': (epoch + 1), }, is_best=False, fpath=osp.join( args.save_dir, 'ckp_ep' + str(epoch + 1) + '.pth.tar'))
def main(args): # 训练日志保存 log_dir = os.path.join(args.checkpoints, args.log_dir) mkdir_if_missing(log_dir) sys.stdout = logging.Logger(os.path.join(log_dir, 'log.txt')) display(args) if args.r is None: model = models.create(args.net) # load part of the model model_dict = model.state_dict() # print(model_dict) model = models.create(args.net, pretrained=True) model.features = torch.nn.Sequential( model.features, torch.nn.MaxPool2d(7), # torch.nn.BatchNorm2d(512), torch.nn.Dropout(p=0.01)) model.classifier = torch.nn.Sequential(torch.nn.Linear(512, args.dim)) # # orth init if args.init == 'orth': w = model_dict['classifier.0.weight'] model_dict['classifier.0.weight'] = torch.nn.init.orthogonal_(w) else: print('initialize the FC layer kaiming-ly') w = model_dict['classifier.0.weight'] model_dict['classifier.0.weight'] = torch.nn.init.kaiming_normal_( w) # zero bias model_dict['classifier.0.bias'] = torch.zeros(args.dim) else: # resume model print('Resume from model at Epoch %d' % args.start) model = torch.load(args.r) model = model.cuda() torch.save(model, os.path.join(log_dir, 'model.pkl')) print('initial model is save at %s' % log_dir) # fine tune the model: the learning rate for pre-trained parameter is 1/10 new_param_ids = set(map(id, model.classifier.parameters())) new_params = [p for p in model.parameters() if id(p) in new_param_ids] base_params = [p for p in model.parameters() if id(p) not in new_param_ids] param_groups = [{ 'params': base_params, 'lr_mult': 0.1 }, { 'params': new_params, 'lr_mult': 1.0 }] optimizer = torch.optim.Adam(param_groups, lr=args.lr, weight_decay=args.weight_decay) if args.loss == 'center-nca': criterion = losses.create(args.loss, alpha=args.alpha).cuda() elif args.loss == 'cluster-nca': criterion = losses.create(args.loss, alpha=args.alpha, beta=args.beta).cuda() elif args.loss == 'neighbour': criterion = losses.create(args.loss, k=args.k, margin=args.margin).cuda() elif args.loss == 'nca': criterion = losses.create(args.loss, alpha=args.alpha, k=args.k).cuda() elif args.loss == 'triplet': criterion = losses.create(args.loss, alpha=args.alpha).cuda() elif args.loss == 'bin': criterion = losses.create(args.loss, margin=args.margin) else: criterion = losses.create(args.loss).cuda() if args.data == 'shop': data = DataSet.create(args.data, root=None, gallery=False, query=False) else: data = DataSet.create(args.data, root=None, test=False) train_loader = torch.utils.data.DataLoader( data.train, batch_size=args.BatchSize, sampler=RandomIdentitySampler(data.train, num_instances=args.num_instances), drop_last=True, num_workers=args.nThreads) # save the train information epoch_list = list() loss_list = list() pos_list = list() neg_list = list() for epoch in range(args.start, args.epochs): epoch_list.append(epoch) running_loss = 0.0 running_pos = 0.0 running_neg = 0.0 if epoch == 1500: optimizer = torch.optim.Adam(param_groups, lr=0.1 * args.lr, weight_decay=args.weight_decay) for i, data in enumerate(train_loader, 0): inputs, labels = data # wrap them in Variable inputs = Variable(inputs.cuda()) # type of labels is Variable cuda.Longtensor labels = Variable(labels).cuda() optimizer.zero_grad() embed_feat = model(inputs) loss, inter_, dist_ap, dist_an = criterion(embed_feat, labels) loss.backward() optimizer.step() running_loss += loss.item() running_neg += dist_an running_pos += dist_ap if epoch == 0 and i == 0: print(50 * '#') print('Train Begin -- HA-HA-HA-HA-AH-AH-AH-AH --') loss_list.append(running_loss) pos_list.append(running_pos / i) neg_list.append(running_neg / i) print( '[Epoch %05d]\t Loss: %.3f \t Accuracy: %.3f \t Pos-Dist: %.3f \t Neg-Dist: %.3f' % (epoch + 1, running_loss, inter_, dist_ap, dist_an)) if epoch % args.save_step == 0: torch.save(model, os.path.join(log_dir, '%d_model.pkl' % epoch)) np.savez(os.path.join(log_dir, "result.npz"), epoch=epoch_list, loss=loss_list, pos=pos_list, neg=neg_list)