def ewc_process(epochs, importance, use_cuda=True, weight=None): model = MLP(hidden_size) if torch.cuda.is_available() and use_cuda: model.cuda() optimizer = optim.SGD(params=model.parameters(), lr=lr) loss, acc, ewc = {}, {}, {} for task in range(num_task): loss[task] = [] acc[task] = [] if task == 0: if weight: model.load_state_dict(weight) else: for _ in tqdm(range(epochs)): loss[task].append( normal_train(model, optimizer, train_loader[task])) # noqa acc[task].append(test(model, test_loader[task])) else: old_tasks = [] for sub_task in range(task): old_tasks = old_tasks + train_loader[ sub_task].dataset.get_sample(sample_size) # noqa old_tasks = random.sample(old_tasks, k=sample_size) for _ in tqdm(range(epochs)): loss[task].append( ewc_train(model, optimizer, train_loader[task], EWC(model, old_tasks), importance)) # noqa for sub_task in range(task + 1): acc[sub_task].append(test(model, test_loader[sub_task])) return loss, acc
def ewc_process_without_split(train_loader, test_loader, labels, online=False, result_file='./ewc_without_split.txt'): gpu = torch.device('cuda:0') model = model_retrieval().cuda(gpu) optimizer = optim.SGD(params=model.parameters(), lr=args.first_lr) ewcs = [] for task in range(args.num_task): print('Training Task {}... Labels: {}'.format(task, labels[task])) if task == 0: for iteration in range(args.iterations): loss = normal_train(model, labels[task], optimizer, train_loader[task], gpu) print('Iteration: {}\tLoss:{}'.format(iteration, loss)) acc = test_model(model, labels[task], test_loader[task], gpu) print('Test Task: {}\tAccuracy: {}'.format(task, acc)) with open(result_file, 'a') as f: f.write('{}\t{}\t{}\t{}\t{}\t{}\n'.format(task, iteration, loss, task, task, acc)) else: for param_group in optimizer.param_groups: param_group['lr'] = args.lr for iteration in range(args.iterations): if online: loss = ewc_train(model, labels[task], optimizer, train_loader[task], ewcs[-1:], args.lam, gpu) else: loss = ewc_train(model, labels[task], optimizer, train_loader[task], ewcs, args.lam, gpu) print('Iteration: {}\tLoss:{}'.format(iteration, loss)) for sub_task in range(task + 1): acc = test_model(model, labels[sub_task], test_loader[sub_task], gpu) print('Test Task: {}\tAccuracy: {}'.format(sub_task, acc)) with open(result_file, 'a') as f: # Current server parameter (task) + device parameter (task) --> training a given task f.write('{}\t{}\t{}\t{}\t{}\n'.format(task, iteration, loss, sub_task, acc)) ewcs.append(EWC(model, train_loader[task], gpu))
def standard_process(train_loader, test_loader, labels, result_file='./standard.txt'): gpu = torch.device('cuda:0') new_model = model_retrieval().cuda(gpu) # print_model(new_model) models = [copy.deepcopy(new_model) for _ in range(args.num_task)] # time.sleep(10) temp_model = copy.deepcopy(new_model) cut_idx = generate_cut_layer(args.split, temp_model) optimizers = [optim.SGD(params=models[idx].parameters(), lr=args.lr) for idx in range(args.num_task)] for task in range(args.num_task): print('Training Task {}... Labels: {}'.format(task, labels[task])) model, optimizer = models[task] if task == 0 else models_copy(models[task], models[task-1], cut_idx), optimizers[task] # print_model(model) if task == 0: for param_group in optimizer.param_groups: param_group['lr'] = args.first_lr for iteration in range(args.iterations): loss = normal_train(model, labels[task], optimizer, train_loader[task], gpu) # print_model(model) print('Iteration: {}\tLoss:{}'.format(iteration, loss)) for sub_task in range(task + 1): temp_model = copy.deepcopy(models[sub_task]) temp_model = models_copy(temp_model, model, cut_idx) for i in range(task + 1): acc = test_model(temp_model, labels[i], test_loader[i], gpu) print('Device Task: {}\tTest Task: {}\tAccuracy: {}'.format(sub_task, i, acc)) with open(result_file, 'a') as f: # Current server parameter (task) + device parameter (task) --> training a given task f.write('{}\t{}\t{}\t{}\t{}\t{}\n'.format(task, iteration, loss, sub_task, i, acc))
def standard_process(epochs, use_cuda=True, weight=True): model = classifier() # TODO: fix parms MLP(hidden_size) if torch.cuda.is_available() and use_cuda: model.cuda() # if torch.cuda.device_count() > 1: # print("Let's use", torch.cuda.device_count(), "GPUs!") # model = nn.DataParallel(model) # model.to(device) optimizer = optim.SGD(params=model.parameters(), lr=lr) loss, acc = {}, {} for task in range(num_task): loss[task] = [] acc[task] = [] for _ in tqdm(range(epochs)): loss[task].append( normal_train(model, optimizer, train_loader[task])) for sub_task in range(task + 1): acc[sub_task].append(test(model, test_loader[sub_task])) if task == 0 and weight: weight = model.state_dict() return loss, acc, weight
def our_process(train_loader, test_loader, labels, online=False, result_file='./our_process.txt'): gpu = torch.device('cuda:0') new_model = model_retrieval().cuda(gpu) models = [copy.deepcopy(new_model) for _ in range(args.num_task)] temp_model = copy.deepcopy(new_model) cut_idx = generate_cut_layer(args.split, temp_model) optimizers = [optim.SGD(params=models[idx].parameters(), lr=args.lr) for idx in range(args.num_task)] ewcs = [] if_freeze = 0 for task in range(args.num_task): print('Training Task {}... Labels: {}'.format(task, labels[task])) model, optimizer = models[task] if task == 0 else models_copy(models[task], models[task-1], cut_idx), optimizers[task] #model为tmp变量循环用,model:copy了上一次task中model的param(与cut_idx有关) if task == 0: for param_group in optimizer.param_groups: param_group['lr'] = args.first_lr for iteration in range(args.iterations): loss = normal_train(model, labels[task], optimizer, train_loader[task], gpu) print('Iteration: {}\tLoss:{}'.format(iteration, loss)) acc = test_model(model, labels[task], test_loader[task], gpu) print('Device Task: {}\tTest Task: {}\tAccuracy: {}'.format(task, task, acc)) with open(result_file, 'a') as f: f.write('{}\t{}\t{}\t{}\t{}\t{}\n'.format(task, iteration, loss, task, task, acc)) else: for iteration in range(args.iterations): if online: loss = our_train(model, labels[task], optimizer, train_loader[task], ewcs[-1:], args.lam, gpu, cut_idx, if_freeze) #与normal_train相比多了ewcs[]与args.lam else: loss = our_train(model, labels[task], optimizer, train_loader[task], ewcs, args.lam, gpu, cut_idx, if_freeze) #与online区别是ewcs[-1:]为ewcs只取最后一个元素构成的列表 #判断loss,loss若小于阈值,令变量if_freeze=1,传入下次our_train #our_train相比于ewc_train多两个参数:if_freeze和cut_idx if loss < args.threshold: if_freeze = 1 else: if_freeze = 0 print('Iteration: {}\tLoss:{}\tif freeze:{}'.format(iteration, loss, if_freeze)) for sub_task in range(task + 1): #循环不同model temp_model = copy.deepcopy(models[sub_task]) temp_model = models_copy(temp_model, model, cut_idx) #temp_model 用的是当前model后半部分,models[]前半部分 for i in range(task + 1): #循环不同task acc = test_model(temp_model, labels[i], test_loader[i], gpu) print('Device Task: {}\tTest Task: {}\tAccuracy: {}'.format(sub_task, i, acc)) with open(result_file, 'a') as f: # Current server parameter (task) + device parameter (task) --> training a given task f.write('{}\t{}\t{}\t{}\t{}\t{}\n'.format(task, iteration, loss, sub_task, i, acc)) ewcs.append(splitEWC(model, train_loader[task], cut_idx, gpu))
def standard_process(epochs, use_cuda=True, weight=True): model = MLP(hidden_size) if torch.cuda.is_available() and use_cuda: model.cuda() optimizer = optim.SGD(params=model.parameters(), lr=lr) loss, acc = {}, {} for task in range(num_task): loss[task] = [] acc[task] = [] for _ in tqdm(range(epochs)): loss[task].append( normal_train(model, optimizer, train_loader[task])) # noqa for sub_task in range(task + 1): acc[sub_task].append(test(model, test_loader[sub_task])) if task == 0 and weight: weight = model.state_dict() return loss, acc, weight
def ewc_process(epochs, importance, use_cuda=True, weight=None): model = classifier() # TODO: fix parms MLP(hidden_size) if torch.cuda.is_available() and use_cuda: model.cuda() # if torch.cuda.device_count() > 1: # print("Let's use", torch.cuda.device_count(), "GPUs!") # model = nn.DataParallel(model) # model.to(device) optimizer = optim.SGD(params=model.parameters(), lr=lr) loss, acc, ewc = {}, {}, {} for task in range(num_task): loss[task] = [] acc[task] = [] if task == 0: if weight: model.load_state_dict(weight) else: for _ in tqdm(range(epochs)): loss[task].append( normal_train(model, optimizer, train_loader[task])) acc[task].append(test(model, test_loader[task])) else: old_tasks = [] for sub_task in range(task): old_tasks = old_tasks + train_loader[ sub_task].dataset.get_sample(sample_size) old_tasks = random.sample(old_tasks, k=sample_size) for _ in tqdm(range(epochs)): loss[task].append( ewc_train(model, optimizer, train_loader[task], EWC(model, old_tasks), importance)) for sub_task in range(task + 1): acc[sub_task].append(test(model, test_loader[sub_task])) return loss, acc
def ewc_process(train_loader, test_loader, labels, class_incremental, online=False, result_file='./ewc.txt'): gpu = torch.device('cuda:0') new_model = model_retrieval().cuda(gpu) models, cur_label = [ copy.deepcopy(new_model) for _ in range(args.num_task) ], [] temp_model = copy.deepcopy(new_model) cut_idx = generate_cut_layer(args.split, temp_model) optimizers = [ optim.SGD(params=models[idx].parameters(), lr=args.lr) for idx in range(args.num_task) ] ewcs = [] for task in range(args.num_task): print('Training Task {}... Labels: {}'.format(task, labels[task])) model, optimizer = models[task] if task == 0 else models_copy( models[task], models[task - 1], cut_idx ), optimizers[ task] #model为tmp变量循环用,model:copy了上一次task中model的param(与cut_idx有关) if task == 0: cur_label = cur_label + labels[ task] if class_incremental else labels[task] for param_group in optimizer.param_groups: param_group['lr'] = args.first_lr for iteration in range(args.iterations): loss = normal_train(model, cur_label, optimizer, train_loader[task], gpu) print('Iteration: {}\tLoss:{}'.format(iteration, loss)) acc = test_model(model, cur_label, test_loader[task], gpu) print('Device Task: {}\tTest Task: {}\tAccuracy: {}'.format( task, task, acc)) with open(result_file, 'a') as f: f.write('{}\t{}\t{}\t{}\t{}\t{}\n'.format( task, iteration, loss, task, task, acc)) if iteration % 20 == 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.95 else: cur_label = cur_label + labels[ task] if class_incremental else labels[task] for iteration in range(args.iterations): if online: loss = ewc_train(model, cur_label, optimizer, train_loader[task], ewcs[-1:], args.lam, gpu) #与normal_train相比多了ewcs[]与args.lam else: loss = ewc_train( model, cur_label, optimizer, train_loader[task], ewcs, args.lam, gpu) #与online区别是ewcs[-1:]为ewcs只取最后一个元素构成的列表 print('Iteration: {}\tLoss:{}'.format(iteration, loss)) for sub_task in range(task + 1): #循环不同model temp_model = copy.deepcopy(models[sub_task]) temp_model = models_copy( temp_model, model, cut_idx) #temp_model 用的是当前model后半部分,models[]前半部分 for i in range(task + 1): #循环不同task cur_label = cur_label if class_incremental else labels[ i] acc = test_model(temp_model, cur_label, test_loader[i], gpu) print('Device Task: {}\tTest Task: {}\tAccuracy: {}'. format(sub_task, i, acc)) with open(result_file, 'a') as f: # Current server parameter (task) + device parameter (task) --> training a given task f.write('{}\t{}\t{}\t{}\t{}\t{}\n'.format( task, iteration, loss, sub_task, i, acc)) if iteration % 20 == 0: for param_group in optimizer.param_groups: param_group['lr'] *= 0.95 ewcs.append(splitEWC(model, train_loader[task], cut_idx, gpu))