def train(data_loader): total_loss = 0 model.train() data_loader = tqdm(data_loader) for idx, (cpu_images, cpu_texts) in enumerate(data_loader): batch_size = cpu_images.size(0) util.loadData(image, cpu_images) t, l = converter.encode(cpu_texts) util.loadData(text, t) util.loadData(length, l) output = model(image) output_size = Variable(torch.LongTensor([output.size(0)] * batch_size)) loss = criterion(output, text, output_size, length) / batch_size optimizer.zero_grad() loss.backward() clipping_value = 1.0 torch.nn.utils.clip_grad_norm_(model.parameters(), clipping_value) if not (torch.isnan(loss) or torch.isinf(loss)): optimizer.step() total_loss += loss.item() if idx % 1000 == 0 and idx != 0: print('{} index: {}/{}(~{}%) loss: {}'.format( datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), idx, len(data_loader), round(idx * 100 / len(data_loader)), total_loss / idx)) return total_loss / len(data_loader)
def train(): # Turn on training mode which enables dropout. total_loss = 0 begin_t = time.time() #ntokens = len(corpus.dictionary) hidden = model.init_hidden(BATCH_SIZE) batch, i = 0, 0 while i < train_data.size(0) - 1 - 1: bptt = BPTT if np.random.random() < 0.95 else BPTT / 2. # Prevent excessively small or negative sequence lengths seq_len = max(5, int(np.random.normal(bptt, 5))) # There's a very small chance that it could select a very long sequence length resulting in OOM # seq_len = min(seq_len, args.bptt + 10) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * seq_len / BPTT model.train() data, targets = batching.get_batch(train_data, i, seq_len=seq_len) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) optimizer.zero_grad() output, hidden, rnn_hs, dropped_rnn_hs = model(data, hidden, return_h=True) raw_loss = criterion(output.view(-1, num_tokens), targets) loss = raw_loss # Activiation Regularization loss = loss + sum(ALPHA * dropped_rnn_h.pow(2).mean() for dropped_rnn_h in dropped_rnn_hs[-1:]) # Temporal Activation Regularization (slowness) loss = loss + sum(BETA * (rnn_h[1:] - rnn_h[:-1]).pow(2).mean() for rnn_h in rnn_hs[-1:]) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), CLIP_GRADIENTS) optimizer.step() total_loss += raw_loss.data optimizer.param_groups[0]['lr'] = lr2 if batch % DEUBG_LOG_INTERVAL == 0 and batch > 0: cur_loss = total_loss[0] / DEUBG_LOG_INTERVAL elapsed = time.time() - start_time print( '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // BPTT, optimizer.param_groups[0]['lr'], elapsed * 1000 / DEUBG_LOG_INTERVAL, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() ### batch += 1 i += seq_len
def main(_): mode = tf.flags.FLAGS.mode #print("is gpu avaliable:",tf.test.is_gpu_available()) if mode == 'train': model.train() elif mode == 'test': model.test() elif mode == 'predict': model.predict() else: raise ValueError('--mode {} was not found.'.format(mode))
def train(epoch): iters = [] lrs = [] train_losses = [] val_losses = [] val_accuracies = [] model.train() # train loop for batch_idx, batch in enumerate(train_loader): # prepare data images = Variable(batch[0]) targets = Variable(batch[1]) if args.cuda: images, targets = images.cuda(), targets.cuda() optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, targets) loss.backward() optimizer.step() if args.vis and batch_idx % args.log_interval == 0 and images.shape[ 0] == 1: cv2.imshow('output: ', outputs.cpu().data.numpy()[0][0]) cv2.imshow('target: ', targets.cpu().data.numpy()[0][0]) cv2.waitKey(10) # Learning rate decay. if epoch % args.step_interval == 0 and epoch != 1 and batch_idx == 0: if args.lr_decay != 1: global lr, optimizer lr *= args.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr print('Learning rate decayed to %f.' % lr) if batch_idx % args.log_interval == 0: val_loss, val_acc = evaluate('val', n_batches=80) train_loss = loss.item() iters.append(len(train_loader.dataset) * (epoch - 1) + batch_idx) lrs.append(lr) train_losses.append(train_loss) val_losses.append(val_loss) val_accuracies.append(val_acc) examples_this_epoch = batch_idx * len(images) epoch_progress = 100. * batch_idx / len(train_loader) print('Train Epoch: {} [{}/{} ({:.0f}%)]\t' 'Train Loss: {:.6f}\tVal Loss: {:.6f}\tVal Acc: {}'.format( epoch, examples_this_epoch, len(train_loader.dataset), epoch_progress, train_loss, val_loss, val_acc)) return iters, train_losses, val_losses, val_accuracies
def train(epoch): iters = [] lrs = [] train_losses = [] val_losses = [] val_accuracies = [] model.train() # train loop for batch_idx, batch in enumerate(train_loader): # prepare data images = Variable(batch[0]) targets = Variable(batch[1]) if args.cuda: images, targets = images.cuda(), targets.cuda() optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, targets) loss.backward() optimizer.step() if args.vis and batch_idx % args.log_interval == 0 and images.shape[0] == 1: cv2.imshow('output: ', outputs.cpu().data.numpy()[0][0]) cv2.imshow('target: ', targets.cpu().data.numpy()[0][0]) cv2.waitKey(10) # Learning rate decay. if epoch % args.step_interval == 0 and epoch != 1 and batch_idx == 0: if args.lr_decay != 1: global lr, optimizer lr *= args.lr_decay for param_group in optimizer.param_groups: param_group['lr'] = lr print('Learning rate decayed to %f.' % lr) if batch_idx % args.log_interval == 0: val_loss, val_acc = evaluate('val', n_batches=80) train_loss = loss.item() iters.append(len(train_loader.dataset)*(epoch-1)+batch_idx) lrs.append(lr) train_losses.append(train_loss) val_losses.append(val_loss) val_accuracies.append(val_acc) examples_this_epoch = batch_idx * len(images) epoch_progress = 100. * batch_idx / len(train_loader) print('Train Epoch: {} [{}/{} ({:.0f}%)]\t' 'Train Loss: {:.6f}\tVal Loss: {:.6f}\tVal Acc: {}'.format( epoch, examples_this_epoch, len(train_loader.dataset), epoch_progress, train_loss, val_loss, val_acc)) return iters, train_losses, val_losses, val_accuracies
def train(model, device, train_loader, optimizer, epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = loss_func(output, target) loss.backward() optimizer.step() if (batch_idx + 1) % 30 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), loss.item()))
def train(): model.train() # Turn on training mode which enables dropout. total_loss = 0 # Define the total loss of the model to be 0 to start start_time = time.time( ) # Start a timer to keep track of how long our model is taking to train ntokens = myFactorsInfo.getFactorVocabSize( WORD_FACTOR) # Define our vocabulary size hidden = model.init_hidden(BATCH_SIZE) # Define our hidden states trainOrder = range(0, train_data.size()[1] - 1, MAX_SEQ_LEN) np.random.shuffle(trainOrder) for batch, i in enumerate( trainOrder): # For every batch (batch#, batch starting index) data, targets = get_batch( train_data, i, myFactorsInfo ) # Get the batch based on the training data and the batch starting index # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) model.zero_grad( ) # Before doing our backwards pass make sure that the gradients are all set to zero output, hidden = model( data, hidden ) # Based on the current batch, do the forward pass, using the given hidden params loss = criterion( output.view(-1, ntokens), targets ) # Calculate the loss with respect to the last element of the output (we discard all the other outputs here) and the targets loss.backward( ) # Actually do the backwards pass, this populates the gradients # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), CLIP_GRADIENTS) optimizer.step( ) # The step the optimizer, this actually updates the weights. The optimizer was initialized with the model as a parameter so thats how it keeps track total_loss += loss.data # Update the total loss if batch % DEUBG_LOG_INTERVAL == 0 and batch > 0: # If we want to print things out... cur_loss = total_loss[ 0] / DEUBG_LOG_INTERVAL # Calculate the current loss elapsed = time.time( ) - start_time # Calculate how much time has passed for this epoch print( '| epoch {:3d} | {:5d}/{:5d} batches | lr ADAM | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, train_data.size()[1] // MAX_SEQ_LEN, elapsed * 1000 / DEUBG_LOG_INTERVAL, cur_loss, math.exp(cur_loss))) # Print some log statement total_loss = 0 # Reset the loss start_time = time.time() # Reset the time
def score(): time = 5 meanAcc = 0 names = ["CORA", "CITESEER", "PUBMED"] for name in names: for i in range(time): #nodes are labeled from 0 to N - 1 #trainData{node:numpy array(N, 1), # edge:numpy array(M, 2), # node_attr:numpy array(N, D), # ID: (N1, 1) numbers in range 0 to N - 1 # label:numpy array(N1,1)} #testData{node:numpy array(N, 1), # edge:numpy array(M, 2), # node_attr:numpy array(N, D), # ID: (N2, 1)} numbers in range 0 to N - 1} #testLabel:numpy array(N2,1) #N1 + N2 = N #loadData will random split nodeID in train and test, the split rate is 2:8 trainData, testData, testLabel = loadData(name) trainedModel = model.train(trainData) #return a numpy array of (N2, 1) contains the predicted label of test nodes predictedLabel = model.test(testData) meanACC += accuracy(testLabel, predictedLabel) meanACC = meanAcc * 1.0 / time / len(names) if __name__ == '__main__': score()
def train(stochastic): global optimizer if MODEL_TYPE == "QRNN": model.reset() if stochastic == False: optimizer = torch.optim.ASGD(model.parameters(), lr=INITIAL_LEARNING_RATE, t0=0, \ lambd=0., weight_decay=WEIGHT_DECAY) total_loss = 0 begin_t = time.time() hidden = model.init_hidden(BATCH_SIZE) i = 0 while i < train_data.size(0)-2: prob = 0.95 rand_prob = np.random.random() if rand_prob < prob: bptt = BPTT else: bptt = BPTT/2 s = 5 window = max(s, int(np.random.normal(bptt, s))) window = min(window, BPTT+10) lr2 = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = lr2 * window / BPTT model.train() X, Y = batching.get_batch(train_data, i, seq_len=window) hidden = repackage_hidden(hidden) optimizer.zero_grad() # NOT SURE output, hidden = model(X, hidden) loss_base = criterion(output.view(-1, num_tokens), Y) ar_loss = ALPHA * model.ar_fragment tar_loss = BETA * model.tar_fragment loss = loss_base + ar_loss + tar_loss loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), CLIP_GRADIENTS) optimizer.step() total_loss += loss_base.data optimizer.param_groups[0]['lr'] = lr2 i += window
def train(): running_loss_sum = 0 running_loss_cls1 = 0 running_loss_reg1 = 0 running_loss_cls2 = 0 running_loss_reg2 = 0 model.train() for i, (image, targets) in enumerate(train_loader): name = targets[0]["name"] image = image[0].to(device=device) targets = [{ "boxes": targets[0]["boxes"].to(device=device), "labels": targets[0]["labels"].to(device=device), "name": name }] loss = model(image, targets) loss_sum = sum(lss for lss in loss.values()) running_loss_sum += loss_sum running_loss_cls1 += loss["loss_objectness"] running_loss_cls2 += loss["loss_classifier"] running_loss_reg1 += loss["loss_rpn_box_reg"] running_loss_reg2 += loss["loss_box_reg"] optimizer.zero_grad() loss_sum.backward() optimizer.step() print( f"Epoch: {epoch}, iteration: {i} of {len(trainset)}, loss: {loss_sum}, image: {name}" ) training_loss_sum.append(running_loss_sum / len(trainset)) rpn_cls_loss.append(running_loss_cls1 / len(trainset)) roi_cls_loss.append(running_loss_cls2 / len(trainset)) rpn_reg_loss.append(running_loss_reg1 / len(trainset)) roi_reg_loss.append(running_loss_reg2 / len(trainset))
train_dataset = dataset.MyDataset() validate_dataset = dataset.MyDataset() criticer = torch.nn.MSELoss() model = model.Model() optimizer = optim.Adam(model.parameters(), lr=config.lr) if config.gpu >= 0: model.cuda(config.gpu) max_loss = 0 no_gain = 0 global_step = 0 train_num = len(train_dataset) model.train() for epoch in range(config.epoch_num): train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True) for step, (traindata, trainlabel) in enumerate(train_loader): traindata = Variable(traindata).float() trainlabel = Variable(trainlabel).float() if config.gpu >= 0: traindata = traindata.cuda(config.gpu) trainlabel = trainlabel.cuda(config.gpu) pred = model(traindata) loss = criticer(pred, trainlabel) optimizer.zero_grad() loss.backward()
def train(model, criterion, converter, device, train_datasets, valid_datasets=None, pretrain=False): print('Device:', device) ''' data_parallel = False if torch.cuda.device_count() > 1: print("Use", torch.cuda.device_count(), 'gpus') data_parallel = True model = nn.DataParallel(model) ''' model = model.to(device) if pretrain: #print("Using pretrained model") ''' state_dict = torch.load("/home/chen-ubuntu/Desktop/checks_dataset/pths/crnn_pertrain.pth", map_location=device) cnn_modules = {} rnn_modules = {} for module in state_dict: if module.split('.')[1] == 'FeatureExtraction': key = module.replace("module.FeatureExtraction.", "") cnn_modules[key] = state_dict[module] elif module.split('.')[1] == 'SequenceModeling': key = module.replace("module.SequenceModeling.", "") rnn_modules[key] = state_dict[module] model.cnn.load_state_dict(cnn_modules) model.rnn.load_state_dict(rnn_modules) ''' #model.load_state_dict(torch.load('/root/checks_recognize_v2/pths/hand_num_epoch278_acc0.995020.pth')) dataset_name = 'symbol' batch_dict = { 'print_word': 32, 'hand_num': 48, 'print_num': 48, 'symbol': 64, 'hand_word': 64, 'seal': 64, 'catword': 32 } dataset = train_datasets.get(dataset_name) dataloader = DataLoader(dataset, batch_size=batch_dict.get(dataset_name), shuffle=True, num_workers=4, drop_last=False) lr = 1e-3 params = model.parameters() optimizer = optim.Adam(params, lr) optimizer.zero_grad() batch_cnt = 0 for epoch in range(config.epochs): epoch_loss = 0 model.train() train_acc = 0 train_acc_cnt = 0 for i, (img, label, _) in enumerate(dataloader): n_correct = 0 batch_cnt += 1 train_acc_cnt += 1 img = img.to(device) text, length = converter.encode(label) preds = model(img) preds_size = torch.IntTensor([preds.size(0)] * img.size(0)) preds = preds.to('cpu') loss = criterion(preds, text, preds_size, length) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) list1 = [x for x in label] for pred, target in zip(sim_preds, list1): if pred == target: n_correct += 1 # loss.backward() # optimizer.step() # model.zero_grad() loss.backward() if (i + 1) % 4: optimizer.step() optimizer.zero_grad() epoch_loss += loss.item() train_acc += n_correct / len(list1) if (i + 1) % 4 == 0: print("epoch: {:<3d}, dataset:{:<8}, batch: {:<3d}, batch loss: {:4f}, epoch loss: {:4f}, acc: {}". \ format(epoch, dataset_name, i, loss.item(), epoch_loss, n_correct / len(list1))) # writer.add_scalar('data/train_loss', loss.item(), batch_cnt) # writer.add_scalar('data/train_acc', n_correct/len(list1), batch_cnt) print('==========train_average_acc is: {:.3f}'.format(train_acc / train_acc_cnt)) # writer.add_scalar('data/valid_{}acc'.format(dataset_name), train_acc/train_acc_cnt, batch_cnt) if epoch % 3 == 0: dataset_names = [dataset_name] accs, valid_losses = valid(model, criterion, converter, device, valid_datasets, dataset_names) acc, valid_loss = accs.get(dataset_name), valid_losses.get( dataset_name) print('========== valid acc: ', acc, ' ============valid loss: ', valid_loss) # writer.add_scalar('data/valid_{}acc'.format(dataset_name), acc, batch_cnt) # writer.add_scalar('data/valid_{}loss'.format(dataset_name), valid_loss, batch_cnt) if epoch % 3 == 0: state_dict = model.state_dict() torch.save( state_dict, '/root/last_dataset/crnn_char_pths/catword_lr3_epoch_{}_acc{:4f}.pth' .format(epoch + 1, train_acc / train_acc_cnt)) if train_acc / train_acc_cnt > 0.95: state_dict = model.state_dict() torch.save( state_dict, '/root/last_dataset/crnn_char_pths/catword_lr3_epoch{}_acc{:4f}.pth' .format(epoch + 1, train_acc / train_acc_cnt))
def train_model(model): return train(model, training_set(size=15000))
def train(): tb = SummaryWriter(comment=f"LR_{args.lr}_BS_{args.batch_size}") images, labels = next(iter(train_loader)) grid = torchvision.utils.make_grid(images) tb.add_image("image", grid) tb.add_graph(model.to(device=device), images.to(device=device)) print("Batch Size: {} Learning Rate: {}".format(args.lr, args.batch_size)) for epoch in range(1, args.epochs + 1): t1 = time.time() batch_metrics = defaultdict(list) batch_metrics = { "iters": [], "lrs": [], "train_losses": [], "val_losses": [], "val_accuracies": [], } model.train() for batch_idx, batch in enumerate(train_loader): # prepare data images = Variable(batch[0]).to(device=device) targets = Variable(batch[1]).to(device=device) optimizer.zero_grad() outputs = model(images) loss = criterion(outputs, targets) loss.backward() optimizer.step() if args.vis and batch_idx % args.log_interval == 0 and images.shape[ 0] == 1: cv2.imshow("output: ", outputs.cpu().data.numpy()[0][0]) cv2.imshow("target: ", targets.cpu().data.numpy()[0][0]) cv2.waitKey(10) if batch_idx % args.log_interval == 0: val_loss, val_acc = evaluate("val", n_batches=args.val_size) train_loss = loss.item() batch_metrics["iters"].append( len(train_loader.dataset) * (epoch - 1) + batch_idx) batch_metrics["lrs"].append(lr) batch_metrics["train_losses"].append(train_loss) batch_metrics["val_losses"].append(val_loss) batch_metrics["val_accuracies"].append(val_acc) examples_this_epoch = batch_idx * len(images) epoch_progress = 100.0 * batch_idx / len(train_loader) print("Train Epoch: {} [{}/{} ({:.0f}%)]\t" "Train Loss: {:.4f}\tVal Loss: {:.4}\tVal Acc: {:.4}". format( epoch, examples_this_epoch, len(train_loader.dataset), epoch_progress, train_loss, val_loss, val_acc, )) print( "epoch: {} total train_loss: {:.4f} total val_loss: {:.4f} total val_acc: {:.4f}" .format( epoch, sum(batch_metrics["train_losses"]), sum(batch_metrics["val_losses"]), sum(batch_metrics["val_accuracies"]) / len(batch_metrics["val_accuracies"]), )) if epoch % args.save_interval == 0 and args.save_model: save_path = os.path.join(backup_dir, "IGVCModel" + "_" + str(epoch) + ".pt") print("Saving model: %s" % save_path) torch.save(model.state_dict(), save_path) tb.add_scalar("train loss", sum(batch_metrics["train_losses"]), epoch) tb.add_scalar("val loss", sum(batch_metrics["val_losses"]), epoch) tb.add_scalar( "val_acc", sum(batch_metrics["val_accuracies"]) / len(batch_metrics["val_accuracies"]), epoch, ) for name, weight in model.named_parameters(): tb.add_histogram(name, weight, epoch) tb.add_histogram("{}.grad".format(name), weight.grad, epoch) metrics_path = os.path.join(backup_dir, "metrics.npy") np.save(metrics_path, batch_metrics) t2 = time.time() print("training time: %.2fs" % (t2 - t1)) tb.close()
def test(model, queryLoader, galleryLoader, device, ranks=[1, 5, 10, 20]): with torch.no_grad(): model.train(False) model.eval() correct = 0. total = 0. total_correct = 0. qf, q_pids, q_bags = [], [], [] for batch_idx, (img, pid, bag) in tqdm(enumerate(queryLoader)): #total += 1.0 img = img.to(device=device, dtype=torch.float) bag = bag.to(device=device, dtype=torch.long) _, _, features, output_bag = model(img) #print(output_bag.shape) _, predicted = torch.max(output_bag, 1) #print('test_predicted:') #print(predicted) #print('test_label:') #print(bag) correct = (predicted == bag).sum() total += img.shape[0] total_correct += correct features = features.squeeze(0) features = features.data.cpu() qf.append(features) q_pids.extend(pid) q_bags.extend(bag) qf = torch.cat([x for x in qf]) #torch.stack(qf) q_pids = np.asarray(q_pids) print("Extracted features for query set, obtained {}-by-{} matrix". format(qf.size(0), qf.size(1))) gf, g_pids, g_bags = [], [], [] for batch_idx, (img, pid, bag) in tqdm(enumerate(galleryLoader)): #total += 1.0 img = img.to(device=device, dtype=torch.float) bag = bag.to(device=device, dtype=torch.long) _, _, features, output_bag = model(img) _, predicted = torch.max(output_bag, 1) #print('test_predicted:') #print(predicted) #rint('test_label:') #print(bag) correct = (predicted == bag).sum() total += img.shape[0] total_correct += correct features = features.squeeze(0) features = features.data.cpu() gf.append(features) g_pids.extend(pid) g_bags.extend(bag) gf = torch.cat([x for x in gf]) #torch.stack(gf) g_pids = np.asarray(g_pids) qf = qf.squeeze() gf = gf.squeeze() # 29102*128 print("Extracted features for gallery set, obtained {}-by-{} matrix". format(gf.size(0), gf.size(1))) print("Computing distance matrix") cmc = evaluate_rank(gf, qf, g_pids, q_pids) ''' m, n = qf.size(0), gf.size(0) distmat = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \ torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t() distmat.addmm_(1, -2, qf, gf.t()) distmat = distmat.numpy() cmc, mAP = evaluate(distmat, q_pids, g_pids) ''' print("Results ----------") #print("mAP: {:.1%}".format(mAP)) print("CMC curve") for r in ranks: print("Rank-{:<3}: {:.1%}".format(r, cmc[r - 1])) test_f.write("Rank-{:<3}: {:.1%}\n".format(r, cmc[r - 1])) print("\n") print(total_correct.float(), total) print("correct_rate:", total_correct.float() * 1.0 / total) return cmc[0], total_correct.float() * 1.0 / total
def train(epoch, model, criterion_cont, criterion_trip, criterion_sim, criterion_l2, criterion_label, optimizer, scheduler, trainLoader, device, cont_iter): model.train(True) losses = AverageMeter() cont_losses = AverageMeter() trip_losses = AverageMeter() sim_losses = AverageMeter() label_losses = AverageMeter() scheduler.step() print('lr:', optimizer.state_dict()['param_groups'][0]['lr']) lr = optimizer.state_dict()['param_groups'][0]['lr'] if lr <= 0.0001: checkpoint = torch.load('./' + args.save_dir + '/best_model.pth.tar') model.load_state_dict(checkpoint['state_dict']) total = 0.0 total_correct = 0.0 for batch_idx, (imgs, pids, bags_label) in tqdm(enumerate(trainLoader)): #imgs, pids, bags_label = imgs.to(device=device, dtype=torch.float), \ # pids.to(device=device, dtype=torch.long), \ # bags_label.to(device=device, dtype=torch.long) imgs, pids, bags_label = imgs.cuda(), pids.cuda(), bags_label.cuda() imgs = imgs.float() pids = pids.long() bags_label = bags_label.long() # identification #if cont_iter == 100000: # scheduler.step() optimizer.zero_grad() output_ident, output_all, ident_features, output_bag = model(imgs) _, predicted = torch.max(output_bag, 1) #print('predicted:') #print(predicted) #print('label:') #print(bags_label) correct = (predicted == bags_label).sum() total += imgs.shape[0] total_correct += correct #print('total_correct: total:',total_correct,total) #in the input find the image without bag dict_hasBag = dict() for i in range(len(pids)): if bags_label[i] == 0: dict_hasBag[pids[i].data.cpu().item()] = i imgs_proj_without_bag = torch.zeros(imgs.shape, device=device) for i in range(len(pids)): imgs_proj_without_bag[i] = imgs[dict_hasBag[ pids[i].data.cpu().item()]] # triplet loss label_loss = criterion_label(output_bag, bags_label) trip_loss, sim_loss = criterion_trip(ident_features, pids) #sim_loss = criterion_sim(ident_features, pids) cont_loss_withoutBag = criterion_l2(output_ident, imgs_proj_without_bag) cont_loss_withBag = criterion_l2(output_all, imgs) cont_loss = cont_loss_withBag + cont_loss_withoutBag if epoch < 10: loss = trip_loss + sim_loss * 0.1 + cont_loss * 500 + label_loss * 0.05 else: loss = trip_loss + sim_loss * 0.1 + cont_loss * 500 + label_loss * 0.05 loss.backward() optimizer.step() # loss to tensorboardx losses.update(loss.item()) trip_losses.update(trip_loss.item()) sim_losses.update(sim_loss.item()) cont_losses.update(cont_loss.item()) label_losses.update(label_loss.item()) writer.add_scalar("Train/Loss", losses.val, cont_iter) writer.add_scalar("Train/trip_Loss", trip_losses.val, cont_iter) writer.add_scalar("Train/sim_Loss", sim_losses.val, cont_iter) writer.add_scalar("Train/cont_Loss", cont_losses.val, cont_iter) writer.add_scalar("Train/label_loss", label_losses.val, cont_iter) cont_iter += 1 if (cont_iter + 1) % 50 == 0: print("iter {}\t Loss {:.4f} ({:.4f}) " "trip_loss {:.4f} ({:.4f}) " "sim_loss {:.4f} ({:.4f}) " "cont_loss {:.4f} ({:.4f})" "label_loss {:.4f} ({:.4f})" "total_correct_rate ({:.5f})".format( cont_iter, losses.val, losses.avg, trip_losses.val, trip_losses.avg, sim_losses.val, sim_losses.avg, cont_losses.val, cont_losses.avg, label_losses.val, label_losses.avg, total_correct.float() / total)) train_f.write("iter {}\t Loss {:.4f} ({:.4f}) " "trip_loss {:.4f} ({:.4f}) " "sim_loss {:.4f} ({:.4f}) " "cont_loss {:.4f} ({:.4f})" "label_loss {:.4f} ({:.4f})" "total_correct_rate ({:.5f})".format( cont_iter, losses.val, losses.avg, trip_losses.val, trip_losses.avg, sim_losses.val, sim_losses.avg, cont_losses.val, cont_losses.avg, label_losses.val, label_losses.avg, total_correct.float() * 1.0 / total)) train_f.write('\n') return cont_iter