def train(train_loader, net, criterion, optimizer, epoch, train_args): train_loss = AverageMeter() curr_iter = (epoch - 1) * len(train_loader) for i, data in enumerate(train_loader): inputs, labels = data assert inputs.size()[2:] == labels.size()[1:] N = inputs.size(0) inputs = Variable(inputs).cuda() labels = Variable(labels).cuda() optimizer.zero_grad() outputs = net(inputs) assert outputs.size()[2:] == labels.size()[1:] assert outputs.size()[1] == voc.num_classes loss = criterion(outputs, labels) / N loss.backward() optimizer.step() train_loss.update(loss.data[0], N) curr_iter += 1 writer.add_scalar('train_loss', train_loss.avg, curr_iter) if (i + 1) % train_args['print_freq'] == 0: print('[epoch %d], [iter %d / %d], [train loss %.5f]' % ( epoch, i + 1, len(train_loader), train_loss.avg ))
def validate(val_loader, model, criterion, log): losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() for i, (input, target) in enumerate(val_loader): if args.use_cuda: target = target.cuda(async=True) input = input.cuda() input_var = torch.autograd.Variable(input, volatile=True) target_var = torch.autograd.Variable(target, volatile=True) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.data[0], input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) print_log(' **Test** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}'.format(top1=top1, top5=top5, error1=100-top1.avg), log) return top1.avg, losses.avg
def main(args): # Parse device ids default_dev, *parallel_dev = parse_devices(args.devices) all_devs = parallel_dev + [default_dev] all_devs = [x.replace('gpu', '') for x in all_devs] all_devs = [int(x) for x in all_devs] nr_devs = len(all_devs) with open(args.list_val, 'r') as f: lines = f.readlines() nr_files = len(lines) if args.num_val > 0: nr_files = min(nr_files, args.num_val) nr_files_per_dev = math.ceil(nr_files / nr_devs) pbar = tqdm(total=nr_files) acc_meter = AverageMeter() intersection_meter = AverageMeter() union_meter = AverageMeter() result_queue = Queue(500) procs = [] for dev_id in range(nr_devs): start_idx = dev_id * nr_files_per_dev end_idx = min(start_idx + nr_files_per_dev, nr_files) proc = Process(target=worker, args=(args, dev_id, start_idx, end_idx, result_queue)) print('process:%d, start_idx:%d, end_idx:%d' % (dev_id, start_idx, end_idx)) proc.start() procs.append(proc) # master fetches results processed_counter = 0 while processed_counter < nr_files: if result_queue.empty(): continue (acc, pix, intersection, union) = result_queue.get() acc_meter.update(acc, pix) intersection_meter.update(intersection) union_meter.update(union) processed_counter += 1 pbar.update(1) for p in procs: p.join() iou = intersection_meter.sum / (union_meter.sum + 1e-10) for i, _iou in enumerate(iou): print('class [{}], IoU: {}'.format(i, _iou)) print('[Eval Summary]:') print('Mean IoU: {:.4}, Accuracy: {:.2f}%' .format(iou.mean(), acc_meter.average()*100)) print('Evaluation Done!')
def validate(val_loader, model, i, silence=False): batch_time = AverageMeter() coco_gt = val_loader.dataset.coco coco_pred = COCO() coco_pred.dataset['images'] = [img for img in coco_gt.datasets['images']] coco_pred.dataset['categories'] = copy.deepcopy(coco_gt.dataset['categories']) id = 0 # switch to evaluate mode model.eval() end = time.time() for i, (inputs, anns) in enumerate(val_loader): # forward images one by one (TODO: support batch mode later, or # multiprocess) for j, input in enumerate(inputs): input_anns= anns[j] # anns of this input gt_bbox= np.vstack([ann['bbox'] + [ann['ordered_id']] for ann in input_anns]) im_info= [[input.size(1), input.size(2), input_anns[0]['scale_ratio']]] input_var= Variable(input.unsqueeze(0), requires_grad=False).cuda() cls_prob, bbox_pred, rois = model(input_var, im_info) scores, pred_boxes = model.interpret_outputs(cls_prob, bbox_pred, rois, im_info) print(scores, pred_boxes) # for i in range(scores.shape[0]): # measure elapsed time batch_time.update(time.time() - end) end= time.time() coco_pred.createIndex() coco_eval = COCOeval(coco_gt, coco_pred, 'bbox') coco_eval.params.imgIds= sorted(coco_gt.getImgIds()) coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() print('iter: [{0}] ' 'Time {batch_time.avg:.3f} ' 'Val Stats: {1}' .format(i, coco_eval.stats, batch_time=batch_time)) return coco_eval.stats[0]
def train(train_loader, net, criterion, optimizer, curr_epoch, train_args, val_loader, visualize): while True: train_main_loss = AverageMeter() train_aux_loss = AverageMeter() curr_iter = (curr_epoch - 1) * len(train_loader) for i, data in enumerate(train_loader): optimizer.param_groups[0]['lr'] = 2 * train_args['lr'] * (1 - float(curr_iter) / train_args['max_iter'] ) ** train_args['lr_decay'] optimizer.param_groups[1]['lr'] = train_args['lr'] * (1 - float(curr_iter) / train_args['max_iter'] ) ** train_args['lr_decay'] inputs, gts, _ = data assert len(inputs.size()) == 5 and len(gts.size()) == 4 inputs.transpose_(0, 1) gts.transpose_(0, 1) assert inputs.size()[3:] == gts.size()[2:] slice_batch_pixel_size = inputs.size(1) * inputs.size(3) * inputs.size(4) for inputs_slice, gts_slice in zip(inputs, gts): inputs_slice = Variable(inputs_slice).cuda() gts_slice = Variable(gts_slice).cuda() optimizer.zero_grad() outputs, aux = net(inputs_slice) assert outputs.size()[2:] == gts_slice.size()[1:] assert outputs.size()[1] == cityscapes.num_classes main_loss = criterion(outputs, gts_slice) aux_loss = criterion(aux, gts_slice) loss = main_loss + 0.4 * aux_loss loss.backward() optimizer.step() train_main_loss.update(main_loss.data[0], slice_batch_pixel_size) train_aux_loss.update(aux_loss.data[0], slice_batch_pixel_size) curr_iter += 1 writer.add_scalar('train_main_loss', train_main_loss.avg, curr_iter) writer.add_scalar('train_aux_loss', train_aux_loss.avg, curr_iter) writer.add_scalar('lr', optimizer.param_groups[1]['lr'], curr_iter) if (i + 1) % train_args['print_freq'] == 0: print('[epoch %d], [iter %d / %d], [train main loss %.5f], [train aux loss %.5f]. [lr %.10f]' % ( curr_epoch, i + 1, len(train_loader), train_main_loss.avg, train_aux_loss.avg, optimizer.param_groups[1]['lr'])) if curr_iter >= train_args['max_iter']: return if curr_iter % train_args['val_freq'] == 0: validate(val_loader, net, criterion, optimizer, curr_epoch, i + 1, train_args, visualize) curr_epoch += 1
def test(data_loader, model, opt, class_names): print('test') model.eval() batch_time = AverageMeter() data_time = AverageMeter() end_time = time.time() output_buffer = [] previous_video_id = '' test_results = {'results': {}} for i, (inputs, targets) in enumerate(data_loader): data_time.update(time.time() - end_time) inputs = Variable(inputs, volatile=True) outputs = model(inputs) if not opt.no_softmax_in_test: outputs = F.softmax(outputs) for j in range(outputs.size(0)): if not (i == 0 and j == 0) and targets[j] != previous_video_id: calculate_video_results(output_buffer, previous_video_id, test_results, class_names) output_buffer = [] output_buffer.append(outputs[j].data.cpu()) previous_video_id = targets[j] if (i % 100) == 0: with open( os.path.join(opt.result_path, '{}.json'.format( opt.test_subset)), 'w') as f: json.dump(test_results, f) batch_time.update(time.time() - end_time) end_time = time.time() print('[{}/{}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format( i + 1, len(data_loader), batch_time=batch_time, data_time=data_time)) with open( os.path.join(opt.result_path, '{}.json'.format(opt.test_subset)), 'w') as f: json.dump(test_results, f)
def train(train_loader, model, optimizer, start_iter, num_iters): batch_time = AverageMeter() data_time = AverageMeter() total_losses = AverageMeter() rpn_losses = AverageMeter() odn_losses = AverageMeter() rpn_ce_losses = AverageMeter() rpn_box_losses = AverageMeter() odn_ce_losses = AverageMeter() odn_box_losses = AverageMeter() # switch to train mode end_iter = start_iter + num_iters - 1 model.train() end = time.time() # for i in range(start_iter, start_iter + num_iters): for i, (inputs, anns) in enumerate(train_loader): i += start_iter # get minibatch # inputs, anns = next(train_loader) lr = adjust_learning_rate(optimizer, args.lr, args.decay_rate, i, args.niters) # TODO: add custom # measure data loading time data_time.update(time.time() - end) optimizer.zero_grad() # forward images one by one (TODO: support batch mode later, or # multiprocess) for j, input in enumerate(inputs): input_anns = anns[j] # anns of this input if len(input_anns) == 0: continue gt_bbox = np.vstack([ann['bbox'] + [ann['ordered_id']] for ann in input_anns]) im_info= [[input.size(1), input.size(2), input_anns[0]['scale_ratio']]] input_var= torch.autograd.Variable(input.unsqueeze(0).cuda(), requires_grad=False) cls_prob, bbox_pred, rois= model(input_var, im_info, gt_bbox) loss= model.loss loss.backward() # record loss total_losses.update(loss.data[0], input_var.size(0)) rpn_losses.update(model.rpn.loss.data[0], input_var.size(0)) rpn_ce_losses.update( model.rpn.cross_entropy.data[0], input_var.size(0)) rpn_box_losses.update( model.rpn.loss_box.data[0], input_var.size(0)) odn_losses.update(model.odn.loss.data[0], input_var.size(0)) odn_ce_losses.update( model.odn.cross_entropy.data[0], input_var.size(0)) odn_box_losses.update( model.odn.loss_box.data[0], input_var.size(0)) # do SGD step optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if args.print_freq > 0 and (i + 1) % args.print_freq == 0: print('iter: [{0}] ' 'Time {batch_time.val:.3f} ' 'Data {data_time.val:.3f} ' 'Loss {total_losses.val:.4f} ' 'RPN {rpn_losses.val:.4f} ' '{rpn_ce_losses.val:.4f} ' '{rpn_box_losses.val:.4f} ' 'ODN {odn_losses.val:.4f} ' '{odn_ce_losses.val:.4f} ' '{odn_box_losses.val:.4f} ' .format(i, batch_time=batch_time, data_time=data_time, total_losses=total_losses, rpn_losses=rpn_losses, rpn_ce_losses=rpn_ce_losses, rpn_box_losses=rpn_box_losses, odn_losses=odn_losses, odn_ce_losses=odn_ce_losses, odn_box_losses=odn_box_losses)) del inputs del anns if i == end_iter: break print('iter: [{0}-{1}] ' 'Time {batch_time.avg:.3f} ' 'Data {data_time.avg:.3f} ' 'Loss {total_losses.avg:.4f} ' 'RPN {rpn_losses.avg:.4f} ' '{rpn_ce_losses.avg:.4f} ' '{rpn_box_losses.avg:.4f} ' 'ODN {odn_losses.avg:.4f} ' '{odn_ce_losses.avg:.4f} ' '{odn_box_losses.avg:.4f} ' .format(start_iter, end_iter, batch_time=batch_time, data_time=data_time, total_losses=total_losses, rpn_losses=rpn_losses, rpn_ce_losses=rpn_ce_losses, rpn_box_losses=rpn_box_losses, odn_losses=odn_losses, odn_ce_losses=odn_ce_losses, odn_box_losses=odn_box_losses)) if args.tensorboard: log_value('train_total_loss', total_losses.avg, end_iter) log_value('train_rpn_loss', rpn_losses.avg, end_iter) log_value('train_rpn_ce_loss', rpn_ce_losses.avg, end_iter) log_value('train_rpn_box_loss', rpn_box_losses.avg, end_iter) log_value('train_odn_loss', odn_losses.avg, end_iter) log_value('train_odn_ce_loss', odn_ce_losses.avg, end_iter) log_value('train_odn_box_loss', odn_box_losses.avg, end_iter) return total_losses.avg
def train(trainloader, model, criterion, optimizer, epoch, use_cuda): # switch to train mode model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() for batch_idx, (inputs, targets) in enumerate(trainloader): # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda(async=True) inputs, targets = torch.autograd.Variable( inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() progress_bar( batch_idx, len(trainloader), 'Loss: %.2f | Top1: %.2f | Top5: %.2f' % (losses.avg, top1.avg, top5.avg)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() return (losses.avg, top1.avg, top5.avg)
if args.other_distill is not None: if args.other_distill == 'AT': criterion = Attention() weight = 1000 elif args.other_distill == 'SP': criterion = Similarity() weight = 3000 best_acc = 0 counter = torch.zeros(args.epoch, 50000).cuda() epoch = 0 for epoch in range(args.epoch): record = {name: AverageMeter() for name in items} center = cal_center(val_loader, args, s_model) for f**k, (x, y, k) in enumerate(train_loader): s_model.train() x = x.cuda() y = y.cuda() k = k.cuda() with torch.no_grad(): s_feats, logits = s_model(x, is_feat=True) probs = F.softmax(logits, dim=1) # confidence conf = probs.max(dim=1)[0] # margin
def validate(val_loader, model, criterion, args, logger, writer, epoch, local_rank): batch_times = AverageMeter('Time', ':6.3f') losses = AverageMeter('Loss', ':.4e') # 4e表示科学记数法中的4位小数 top1 = AverageMeter('Acc@1', ':6.2f') # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (images, target) in enumerate(val_loader): images = images.cuda(local_rank, non_blocking=True) target = target.cuda(local_rank, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1 = accuracy(output, target, 1) # DDP: data synchronization dist.barrier() reduced_loss = reduce_mean(loss, args.nprocs) reduced_acc1 = reduce_mean(acc1, args.nprocs) losses.update(reduced_loss.item(), images.size(0)) top1.update(reduced_acc1, images.size(0)) # measure elapsed time batch_time = time.time() - end batch_times.update(batch_time) end = time.time() if i % args.print_freq == 0: ddp_print( 'Val epoch: [{:d}/{:d}][{:d}/{:d}]\tce_loss={:.4f}\ttop1_acc={:.4f}\tbatch_time={:6.3f}s' .format(epoch, args.epochs, i, len(val_loader), losses.avg, top1.avg, batch_times.avg), logger, local_rank) break ddp_print( '||==> Val epoch: [{:d}/{:d}]\tce_loss={:.4f}\ttop1_acc={:.4f}\tbatch_time={:6.3f}s' .format(epoch, args.epochs, losses.avg, top1.avg, batch_times.avg), logger, local_rank) if args.local_rank == 0: # save tensorboard writer.add_scalar('Val_ce_loss', losses.avg, epoch) writer.add_scalar('Val_top1_accuracy', top1.avg, epoch) return top1.avg
def test(testloader, model, criterion, epoch, use_cuda): global best_acc losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() for batch_idx, (inputs, targets) in enumerate(testloader): if use_cuda: inputs, targets = inputs.cuda(local_rank), targets.cuda(local_rank) inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets) # compute output with torch.no_grad(): outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.data.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) return (losses.avg, top1.avg)
def train(self, data_loader, epoch, args, result_dict): total_loss = 0 count = 0 losses = AverageMeter() top1 = AverageMeter() self.model.train() for batch_idx, (inputs, labels) in enumerate(data_loader): inputs, labels = inputs.cuda(), labels.cuda() if args.amp: with torch.cuda.amp.autocast(): outputs = self.model(inputs) loss = self.criterion(outputs, labels) else: outputs = self.model(inputs) loss = self.criterion(outputs, labels) if len(labels.size()) > 1: labels = torch.argmax(labels, axis=1) prec1, prec3 = accuracy(outputs.data, labels, topk=(1, 3)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) self.optimizer.zero_grad() if args.amp: self.scaler.scale(loss).backward() self.scaler.step(self.optimizer) self.scaler.update() else: loss.backward() self.optimizer.step() total_loss += loss.tolist() count += labels.size(0) if batch_idx % args.log_interval == 0: _s = str(len(str(len(data_loader.sampler)))) ret = [ ('epoch: {:0>3} [{: >' + _s + '}/{} ({: >3.0f}%)]').format( epoch, count, len(data_loader.sampler), 100 * count / len(data_loader.sampler)), 'train_loss: {: >4.2e}'.format(total_loss / count), 'train_accuracy : {:.2f}%'.format(top1.avg) ] print(', '.join(ret)) if not args.decay_type == 'swa': self.scheduler.step() else: if epoch <= args.swa_start: self.scheduler.step() if epoch > args.swa_start and args.decay_type == 'swa': self.swa_model.update_parameters(self.model) self.swa_scheduler.step() result_dict['train_loss'].append(losses.avg) result_dict['train_acc'].append(top1.avg) return result_dict
def eval_model_verbose(model, test_loader, decoder, cuda, out_path, item_info_array, warmups=0, meta=False): """ Model evaluation -- used during inference. returns wer, cer, batch time array and warm up time """ # Warming up end = time.time() total_trials = len(test_loader) for i, data in enumerate(test_loader): if i >= warmups: break sys.stdout.write("\rWarmups ({}/{}) ".format(i+1, warmups)) sys.stdout.flush() if meta: inputs, targets, input_percentages, target_sizes, batch_meta, item_meta = data else: inputs, targets, input_percentages, target_sizes = data inputs = torch.autograd.Variable(inputs, volatile=False) # unflatten targets split_targets = [] offset = 0 for size in target_sizes: split_targets.append(targets[offset:offset + size]) offset += size if cuda: inputs = inputs.cuda() out = model(inputs) warmup_time = time.time() - end if warmups > 0: print("Warmed up in {}s").format(warmup_time) # the actual inference trial total_cer, total_wer = 0, 0 word_count, char_count = 0, 0 model.eval() batch_time = AverageMeter() # For each batch in the test_loader, make a prediction and calculate the WER CER item_num = 1 with open(out_path, 'wb') as f: csvwriter = csv.DictWriter(f, fieldnames=csv_header) csvwriter.writeheader() for i, data in enumerate(test_loader): batch_num = i + 1 if meta: inputs, targets, input_percentages, target_sizes, batch_meta, item_meta = data else: inputs, targets, input_percentages, target_sizes = data inputs = torch.autograd.Variable(inputs, volatile=False) # unflatten targets split_targets = [] offset = 0 for size in target_sizes: split_targets.append(targets[offset:offset + size]) offset += size if cuda: inputs = inputs.cuda() end = time.time() # Timing start (Inference only) out = model(inputs) batch_time.update(time.time() - end) # Timing end (Inference only) out = out.transpose(0, 1) # TxNxH seq_length = out.size(0) sizes = input_percentages.mul_(int(seq_length)).int() # Decode the ouput to actual strings and compare to label # Get the LEV score and the word, char count decoded_output = decoder.decode(out.data, sizes) target_strings = decoder.process_strings(decoder.convert_to_strings(split_targets)) batch_we = batch_wc = batch_ce = batch_cc = 0 for x in range(len(target_strings)): this_we = decoder.wer(decoded_output[x], target_strings[x]) this_ce = decoder.cer(decoded_output[x], target_strings[x]) this_wc = len(target_strings[x].split()) this_cc = len(target_strings[x]) this_pred = decoded_output[x] this_true = target_strings[x] if item_num <= len(item_info_array): item_latency = item_info_array[item_num - 1]['batch_latency'] else: item_latency = "-9999" out_data = [batch_num, batch_time.array[-1], batch_meta[2], batch_meta[4], batch_meta[3], item_num, item_latency, item_meta[x][2], item_meta[x][4], item_meta[x][3], this_wc, this_cc, this_we, this_ce, this_pred, this_true] csv_dict = {k:v for k, v in zip(csv_header, out_data)} csvwriter.writerow(csv_dict) item_num += 1 batch_we += this_we batch_ce += this_ce batch_wc += this_wc batch_cc += this_cc total_wer += batch_we total_cer += batch_ce word_count += batch_wc char_count += batch_cc print('[{0}/{1}]\t' 'Batch: latency (running average) {batch_time.val:.4f} ({batch_time.avg:.3f})\t\t' 'WER {2:.1f} \t CER {3:.1f}' .format((i + 1), total_trials, batch_we / float(batch_wc), batch_ce / float(batch_cc), batch_time=batch_time)) if cuda: torch.cuda.synchronize() del out # WER, CER wer = total_wer / float(word_count) cer = total_cer / float(char_count) wer *= 100 cer *= 100 return wer, cer, batch_time, warmup_time
def train(epoch, model, criterion_xent, criterion_htri, optimizer, trainloader, use_gpu): losses = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() # add two meters xent_losses = AverageMeter() htri_losses = AverageMeter() #global_losses = AverageMeter() #local_losses = AverageMeter() model.train() end = time.time() for batch_idx, (imgs, pids, _) in enumerate(trainloader): if use_gpu: imgs, pids = imgs.cuda(), pids.cuda() # measure data loading time data_time.update(time.time() - end) outputs, features = model(imgs) if args.htri_only: if isinstance(features, tuple): loss = DeepSupervision(criterion_htri, features, pids) else: loss = criterion_htri(features, pids) else: if isinstance(outputs, tuple): xent_loss = DeepSupervision(criterion_xent, outputs, pids) else: xent_loss = criterion_xent(outputs, pids) # use this one if isinstance(features, tuple): htri_loss = DeepSupervision(criterion_htri, features, pids) else: htri_loss = criterion_htri(features, pids) # use this one loss = xent_loss + htri_loss # use this one optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() losses.update(loss.item(), pids.size(0)) xent_losses.update(xent_loss.item(), pids.size(0)) htri_losses.update(htri_loss.item(), pids.size(0)) if (batch_idx + 1) % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'CLoss {xent_loss.val:.4f} ({xent_loss.avg:.4f})\t' 'MLoss {htri_loss.val:.4f} ({htri_loss.avg:.4f})\t'.format( epoch + 1, batch_idx + 1, len(trainloader), batch_time=batch_time, data_time=data_time, loss=losses, xent_loss=xent_losses, htri_loss=htri_losses))
def test(model, queryloader, galleryloader, use_gpu, ranks=[1, 5, 10, 20]): batch_time = AverageMeter() model.eval() with torch.no_grad(): qf, q_pids, q_camids = [], [], [] for batch_idx, (imgs, pids, camids) in enumerate(queryloader): if use_gpu: imgs = imgs.cuda() end = time.time() features = model(imgs) batch_time.update(time.time() - end) features = features.data.cpu() qf.append(features) q_pids.extend(pids) q_camids.extend(camids) qf = torch.cat(qf, 0) q_pids = np.asarray(q_pids) q_camids = np.asarray(q_camids) print("Extracted features for query set, obtained {}-by-{} matrix". format(qf.size(0), qf.size(1))) gf, g_pids, g_camids = [], [], [] for batch_idx, (imgs, pids, camids) in enumerate(galleryloader): if use_gpu: imgs = imgs.cuda() end = time.time() features = model(imgs) batch_time.update(time.time() - end) features = features.data.cpu() gf.append(features) g_pids.extend(pids) g_camids.extend(camids) gf = torch.cat(gf, 0) g_pids = np.asarray(g_pids) g_camids = np.asarray(g_camids) print("Extracted features for gallery set, obtained {}-by-{} matrix". format(gf.size(0), gf.size(1))) print("==> BatchTime(s)/BatchSize(img): {:.3f}/{}".format( batch_time.avg, args.test_batch)) m, n = qf.size(0), gf.size(0) distmat = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \ torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t() distmat.addmm_(1, -2, qf, gf.t()) distmat = distmat.numpy() print("Computing CMC and mAP") cmc, mAP = evaluate(distmat, q_pids, g_pids, q_camids, g_camids, use_metric_cuhk03=args.use_metric_cuhk03) print("Results ----------") print("mAP: {:.1%}".format(mAP)) print("CMC curve") for r in ranks: print("Rank-{:<3}: {:.1%}".format(r, cmc[r - 1])) print("------------------") return cmc[0]
def valid(valid_loader, model, criterions, logger): model.eval() # eval mode (dropout and batchnorm is NOT used) losses = AverageMeter() reg_losses = AverageMeter() expression_accs = AverageMeter() gender_accs = AverageMeter() glasses_accs = AverageMeter() race_accs = AverageMeter() L1Loss, CrossEntropyLoss = criterions # Batches for i, (img, reg, expression, gender, glasses, race) in enumerate(valid_loader): # Move to GPU, if available img = img.to(device) reg_label = reg.type(torch.FloatTensor).to(device) # [N, 5] expression_label = expression.type(torch.LongTensor).to( device) # [N, 3] gender_label = gender.type(torch.LongTensor).to(device) # [N, 2] glasses_label = glasses.type(torch.LongTensor).to(device) # [N, 3] race_label = race.type(torch.LongTensor).to(device) # [N, 4] # Forward prop. reg_out, expression_out, gender_out, glasses_out, race_out = model(img) # Calculate loss reg_loss = L1Loss(reg_out, reg_label) * loss_ratio expression_loss = CrossEntropyLoss(expression_out, expression_label) gender_loss = CrossEntropyLoss(gender_out, gender_label) glasses_loss = CrossEntropyLoss(glasses_out, glasses_label) race_loss = CrossEntropyLoss(race_out, race_label) loss = reg_loss + expression_loss + gender_loss + glasses_loss + race_loss # Keep track of metrics losses.update(loss.item()) reg_losses.update(reg_loss.item()) expression_accuracy = accuracy(expression_out, expression_label) expression_accs.update(expression_accuracy) gender_accuracy = accuracy(gender_out, gender_label) gender_accs.update(gender_accuracy) glasses_accuracy = accuracy(glasses_out, glasses_label) glasses_accs.update(glasses_accuracy) race_accuracy = accuracy(race_out, race_label) race_accs.update(race_accuracy) # Print status status = 'Validation: Loss {loss.avg:.4f}\t' \ 'Reg Loss {reg_loss.val:.4f} ({reg_loss.avg:.4f})\t' \ 'Expression Accuracy {expression_acc.val:.4f} ({expression_acc.avg:.4f})\t' \ 'Gender Accuracy {gender_acc.val:.4f} ({gender_acc.avg:.4f})\t' \ 'Glasses Accuracy {expression_acc.val:.4f} ({expression_acc.avg:.4f})\t' \ 'Race Accuracy {expression_acc.val:.4f} ({expression_acc.avg:.4f})\n'.format(loss=losses, reg_loss=reg_losses, expression_acc=expression_accs, gender_acc=gender_accs, glasses_acc=glasses_accs, race_acc=race_accs) logger.info(status) return losses.avg
def trainModel(data_loader, model, normalizer_global, normalizer_local, epoch=None, evaluation=False, testing=False): """ The function to train/test the model for one epoch. Also, writes the test results to a file 'test_results.csv' in the end Parameters ---------- data_loader : The data iterator to generate batches model : The model to train normalizer_global : The normalizer for global gdt targets normalizer_local : The normalizer for local lddt targets epoch : The current epoch evaluation : (bool) Denotes if the model is in eval mode (True for both testing and validation) testing : (bool) Denotes if the model is in test mode (True only while testing) Returns ------- avg_errors_global : The average global MAE error avg_errors_local : The average local MAE error losses : The average MSE loss """ batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() avg_errors_global = AverageMeter() avg_errors_local = AverageMeter() # placeholders to store results to write to file if testing: test_targets_global = [] test_preds_global = [] test_targets_local = [] test_preds_local = [] test_cif_ids = [] test_amino_crystal = [] end = time.time() for protein_batch_iter, (input_data, batch_data, target_tuples) in enumerate(data_loader): batch_protein_ids = batch_data[0] batch_amino_crystal = batch_data[1] batch_size = len(batch_protein_ids) # measure data loading time data_time.update(time.time() - end) # move inputs and targets to cuda input_var, target_var = getInputs(input_data, target_tuples, normalizer_global, normalizer_local) if not evaluation and not testing: # Switch to train mode model.train() out = model(input_var) out = model.module.mask_remove(out) assert out[1].shape[0] == target_var[1].shape[ 0], "Predicted Outputs Amino & Target Outputs Amino don't match" model.module.fit(out, target_var, batch_protein_ids) else: # evaluate one iteration with torch.no_grad(): # Switch to evaluation mode model.eval() predicted = model(input_var) predicted = model.module.mask_remove(predicted) assert predicted[1].shape[0] == target_var[1].shape[ 0], "Predicted Outputs Amino & Target Outputs Amino don't match" model.module.fit(predicted, target_var, batch_protein_ids, pred=True) # Calculate the accuracy between the denormalized values model.module.accuracy[ 0] = model.module.accuracy[0] * normalizer_global.std model.module.accuracy[ 1] = model.module.accuracy[1] * normalizer_local.std # measure accuracy and record loss losses.update(model.module.loss.item(), batch_size) avg_errors_global.update(model.module.accuracy[0].item(), batch_size) avg_errors_local.update(model.module.accuracy[1].item(), batch_size) # Collect all the results that needs to be written to file if testing and batch_size != 1: test_pred_global = normalizer_global.denorm( model.module.outputs[0].data).squeeze().tolist() test_target_global = target_tuples[0].squeeze() test_preds_global += test_pred_global test_targets_global += test_target_global.tolist() test_amino_crystal += batch_amino_crystal.tolist() test_pred_local = normalizer_local.denorm( model.module.outputs[1].data).squeeze().tolist() test_target_local = target_tuples[1].squeeze().tolist() res1, res2 = OrderedDict(), OrderedDict() for i, idx in enumerate(batch_amino_crystal): if idx not in res1: res1[idx] = [] if idx not in res2: res2[idx] = [] res1[idx].append(test_target_local[i]) res2[idx].append(test_pred_local[i]) test_target_local = [v for _, v in res1.items()] test_pred_local = [v for _, v in res2.items()] test_preds_local += test_pred_local test_targets_local += test_target_local test_cif_ids += batch_protein_ids # measure elapsed time batch_time.update(time.time() - end) end = time.time() # print progress between steps if protein_batch_iter % args.print_freq == 0: if evaluation or testing: print( 'Test: [{0}][{1}]/{2}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'ERRG {avg_errors_global.val:.3f} ({avg_errors_global.avg:.3f})\t' 'ERRL {avg_errors_local.val:.3f} ({avg_errors_local.avg:.3f})' .format(epoch, protein_batch_iter, len(data_loader), batch_time=batch_time, loss=losses, avg_errors_global=avg_errors_global, avg_errors_local=avg_errors_local)) else: print( 'Epoch: [{0}][{1}]/{2}\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'ERRG {avg_errors_global.val:.3f} ({avg_errors_global.avg:.3f})\t' 'ERRL {avg_errors_local.val:.3f} ({avg_errors_local.avg:.3f})' .format(epoch, protein_batch_iter, len(data_loader), batch_time=batch_time, data_time=data_time, loss=losses, avg_errors_global=avg_errors_global, avg_errors_local=avg_errors_local)) if protein_batch_iter % args.print_freq == 0: clearCache() # write results to file if testing: star_label = '**' with open(savepath + 'test_results.csv', 'w') as f: writer = csv.writer(f) for cif_id, targets_global, preds_global, targets_local, preds_local in zip( test_cif_ids, test_targets_global, test_preds_global, test_targets_local, test_preds_local): writer.writerow((cif_id, targets_global, preds_global, targets_local, preds_local)) elif evaluation: star_label = '*' else: star_label = '##' print( ' {star} ERRG {avg_errors_global.avg:.3f} ERRL {avg_errors_local.avg:.3f} LOSS {avg_loss.avg:.3f}' .format(star=star_label, avg_errors_global=avg_errors_global, avg_errors_local=avg_errors_local, avg_loss=losses)) return avg_errors_global.avg, avg_errors_local.avg, losses.avg
def train(train_loader, model, criterions, optimizer, epoch, logger): model.train() # train mode (dropout and batchnorm is used) losses = AverageMeter() reg_losses = AverageMeter() expression_accs = AverageMeter() gender_accs = AverageMeter() glasses_accs = AverageMeter() race_accs = AverageMeter() L1Loss, CrossEntropyLoss = criterions # Batches for i, (img, reg, expression, gender, glasses, race) in enumerate(train_loader): # Move to GPU, if available img = img.to(device) reg_label = reg.type(torch.FloatTensor).to(device) # [N, 5] expression_label = expression.type(torch.LongTensor).to( device) # [N, 3] gender_label = gender.type(torch.LongTensor).to(device) # [N, 2] glasses_label = glasses.type(torch.LongTensor).to(device) # [N, 3] race_label = race.type(torch.LongTensor).to(device) # [N, 4] # Forward prop. reg_out, expression_out, gender_out, glasses_out, race_out = model( img) # embedding => [N, 17] # Calculate loss reg_loss = L1Loss(reg_out, reg_label) * loss_ratio expression_loss = CrossEntropyLoss(expression_out, expression_label) gender_loss = CrossEntropyLoss(gender_out, gender_label) glasses_loss = CrossEntropyLoss(glasses_out, glasses_label) race_loss = CrossEntropyLoss(race_out, race_label) loss = reg_loss + expression_loss + gender_loss + glasses_loss + race_loss # Back prop. optimizer.zero_grad() loss.backward() # Clip gradients clip_gradient(optimizer, grad_clip) # Update weights optimizer.step() # Keep track of metrics losses.update(loss.item()) reg_losses.update(reg_loss.item()) expression_accuracy = accuracy(expression_out, expression_label) expression_accs.update(expression_accuracy) gender_accuracy = accuracy(gender_out, gender_label) gender_accs.update(gender_accuracy) glasses_accuracy = accuracy(glasses_out, glasses_label) glasses_accs.update(glasses_accuracy) race_accuracy = accuracy(race_out, race_label) race_accs.update(race_accuracy) # Print status if i % print_freq == 0: status = 'Epoch: [{0}][{1}/{2}]\t' \ 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \ 'Reg Loss {reg_loss.val:.4f} ({reg_loss.avg:.4f})\t' \ 'Expression Accuracy {expression_acc.val:.4f} ({expression_acc.avg:.4f})\t' \ 'Gender Accuracy {gender_acc.val:.4f} ({gender_acc.avg:.4f})\t' \ 'Glasses Accuracy {expression_acc.val:.4f} ({expression_acc.avg:.4f})\t' \ 'Race Accuracy {expression_acc.val:.4f} ({expression_acc.avg:.4f})\t'.format(epoch, i, len(train_loader), loss=losses, reg_loss=reg_losses, expression_acc=expression_accs, gender_acc=gender_accs, glasses_acc=glasses_accs, race_acc=race_accs) logger.info(status) return losses.avg
def validate(val_loader, model, criterion, gpu_avail, print_freq, f): batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (input, target) in enumerate(val_loader): if gpu_avail: input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss acc1 = accuracy(output, target, topk=(1, )) losses.update(loss.item(), input.size(0)) top1.update(acc1[0], input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: print('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1)) f.write('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\n'.format( i, len(val_loader), batch_time=batch_time, loss=losses, top1=top1)) print(' * Acc@1 {top1.avg:.3f}'.format(top1=top1)) f.write(' * Acc@1 {top1.avg:.3f}\n'.format(top1=top1)) return top1.avg
def train(train_loader, model, criterion, optimizer, epoch, gpu_avail, print_freq, f): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if gpu_avail: input = input.cuda(non_blocking=True) target = target.cuda(non_blocking=True) # compute output output = model(input) loss = criterion(output, target) # measure accuracy and record loss acc1 = accuracy(output, target, topk=(1, )) losses.update(loss.item(), input.size(0)) top1.update(acc1[0], input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1)) f.write('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\n'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1))
def validate(val_loader, net, criterion, optimizer, epoch, train_args, restore, visualize): net.eval() val_loss = AverageMeter() inputs_all, gts_all, predictions_all = [], [], [] for vi, data in enumerate(val_loader): inputs, gts = data N = inputs.size(0) inputs = Variable(inputs, volatile=True).cuda() gts = Variable(gts, volatile=True).cuda() outputs = net(inputs) predictions = outputs.data.max(1)[1].squeeze_(1).squeeze_(0).cpu().numpy() val_loss.update(criterion(outputs, gts).data[0] / N, N) if random.random() > train_args['val_img_sample_rate']: inputs_all.append(None) else: inputs_all.append(inputs.data.squeeze_(0).cpu()) gts_all.append(gts.data.squeeze_(0).cpu().numpy()) predictions_all.append(predictions) acc, acc_cls, mean_iu, fwavacc = evaluate(predictions_all, gts_all, voc.num_classes) if mean_iu > train_args['best_record']['mean_iu']: train_args['best_record']['val_loss'] = val_loss.avg train_args['best_record']['epoch'] = epoch train_args['best_record']['acc'] = acc train_args['best_record']['acc_cls'] = acc_cls train_args['best_record']['mean_iu'] = mean_iu train_args['best_record']['fwavacc'] = fwavacc snapshot_name = 'epoch_%d_loss_%.5f_acc_%.5f_acc-cls_%.5f_mean-iu_%.5f_fwavacc_%.5f_lr_%.10f' % ( epoch, val_loss.avg, acc, acc_cls, mean_iu, fwavacc, optimizer.param_groups[1]['lr'] ) torch.save(net.state_dict(), os.path.join(ckpt_path, exp_name, snapshot_name + '.pth')) torch.save(optimizer.state_dict(), os.path.join(ckpt_path, exp_name, 'opt_' + snapshot_name + '.pth')) if train_args['val_save_to_img_file']: to_save_dir = os.path.join(ckpt_path, exp_name, str(epoch)) check_mkdir(to_save_dir) val_visual = [] for idx, data in enumerate(zip(inputs_all, gts_all, predictions_all)): if data[0] is None: continue input_pil = restore(data[0]) gt_pil = voc.colorize_mask(data[1]) predictions_pil = voc.colorize_mask(data[2]) if train_args['val_save_to_img_file']: input_pil.save(os.path.join(to_save_dir, '%d_input.png' % idx)) predictions_pil.save(os.path.join(to_save_dir, '%d_prediction.png' % idx)) gt_pil.save(os.path.join(to_save_dir, '%d_gt.png' % idx)) val_visual.extend([visualize(input_pil.convert('RGB')), visualize(gt_pil.convert('RGB')), visualize(predictions_pil.convert('RGB'))]) val_visual = torch.stack(val_visual, 0) val_visual = vutils.make_grid(val_visual, nrow=3, padding=5) writer.add_image(snapshot_name, val_visual) print('--------------------------------------------------------------------') print('[epoch %d], [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f]' % ( epoch, val_loss.avg, acc, acc_cls, mean_iu, fwavacc)) print('best record: [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f], [epoch %d]' % ( train_args['best_record']['val_loss'], train_args['best_record']['acc'], train_args['best_record']['acc_cls'], train_args['best_record']['mean_iu'], train_args['best_record']['fwavacc'], train_args['best_record']['epoch'])) print('--------------------------------------------------------------------') writer.add_scalar('val_loss', val_loss.avg, epoch) writer.add_scalar('acc', acc, epoch) writer.add_scalar('acc_cls', acc_cls, epoch) writer.add_scalar('mean_iu', mean_iu, epoch) writer.add_scalar('fwavacc', fwavacc, epoch) writer.add_scalar('lr', optimizer.param_groups[1]['lr'], epoch) net.train() return val_loss.avg
def train(train_loader, decoder, criterion_ce, criterion_dis, decoder_optimizer, epoch): """ Performs one epoch's training. :param train_loader: DataLoader for training data :param decoder: decoder model :param criterion_ce: cross entropy loss layer :param criterion_dis : discriminative loss layer :param decoder_optimizer: optimizer to update decoder's weights :param epoch: epoch number """ decoder.train() # train mode (dropout and batchnorm is used) batch_time = AverageMeter() # forward prop. + back prop. time data_time = AverageMeter() # data loading time losses = AverageMeter() # loss (per word decoded) top5accs = AverageMeter() # top5 accuracy start = time.time() # Batches for i, sample in enumerate(train_loader): if scene_graph: (obj, rel, caps, caplens, obj_mask, rel_mask, pair_idx) = sample obj = obj.to(device) rel = rel.to(device) obj_mask = obj_mask.to(device) rel_mask = rel_mask.to(device) pair_idx = pair_idx.to(device) else: (imgs, caps, caplens) = sample imgs = imgs.to(device) data_time.update(time.time() - start) # Move to GPU, if available caps = caps.to(device) caplens = caplens.to(device) # Forward prop. if scene_graph: scores, scores_d, caps_sorted, decode_lengths, sort_ind = decoder(object_features=obj, relation_features=rel, encoded_captions=caps, caption_lengths=caplens, object_mask=obj_mask, relation_mask=rel_mask, rel_pair_idx=pair_idx) else: scores, scores_d, caps_sorted, decode_lengths, sort_ind = decoder(imgs, caps, caplens) # Max-pooling across predicted words across time steps for discriminative supervision scores_d = scores_d.max(1)[0] # Since we decoded starting with <start>, the targets are all words after <start>, up to <end> targets = caps_sorted[:, 1:] targets_d = torch.zeros(scores_d.size(0), scores_d.size(1)).to(device) targets_d.fill_(-1) for length in decode_lengths: targets_d[:, :length - 1] = targets[:, :length - 1] # Remove timesteps that we didn't decode at, or are pads # pack_padded_sequence is an easy trick to do this scores = pack_padded_sequence(scores, decode_lengths, batch_first=True, enforce_sorted=True).data targets = pack_padded_sequence(targets, decode_lengths, batch_first=True, enforce_sorted=True).data #scores, _ = pack_padded_sequence(scores, decode_lengths, batch_first=True) #targets, _ = pack_padded_sequence(targets, decode_lengths, batch_first=True) # Calculate loss loss_d = criterion_dis(scores_d, targets_d.long()) loss_g = criterion_ce(scores, targets) loss = loss_g + (10 * loss_d) # Back prop. decoder_optimizer.zero_grad() loss.backward() # Clip gradients when they are getting too large torch.nn.utils.clip_grad_norm_(filter(lambda p: p.requires_grad, decoder.parameters()), 0.25) # Update weights decoder_optimizer.step() # Keep track of metrics top5 = accuracy(scores, targets, 5) losses.update(loss.item(), sum(decode_lengths)) top5accs.update(top5, sum(decode_lengths)) batch_time.update(time.time() - start) start = time.time() # Print status if i % args.print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data Load Time {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Top-5 Accuracy {top5.val:.3f} ({top5.avg:.3f})'.format(epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top5=top5accs))
def train_model(model, criterion, optimizer, log_saver, num_epochs=70): since = time.time() steps = 0 for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch + 1, num_epochs)) print('-' * 10) for phase in ['train', 'test']: loss_meter = AverageMeter() acc_meter = AverageMeter() if phase == 'train': model.train(True) else: model.train(False) for i, data in enumerate(loaders[phase]): inputs, labels = data if use_gpu: inputs = inputs.cuda() labels = labels.cuda() if phase == 'train': inputs = Variable(inputs) labels = Variable(labels) else: inputs = Variable(inputs, volatile=True) labels = Variable(labels, volatile=True) optimizer.zero_grad() outputs = model(inputs) _, preds = torch.max(outputs.data, 1) loss = criterion(outputs, labels) if phase == 'train': loss.backward() optimizer.step() steps += 1 loss_meter.update(loss.data[0], outputs.size(0)) acc_meter.update( accuracy(outputs.data, labels.data)[-1][0], outputs.size(0)) epoch_loss = loss_meter.avg epoch_error = 1 - acc_meter.avg / 100 if phase == 'train' and epoch == num_epochs - 1: log_saver['train_loss'].append(epoch_loss) log_saver['train_error'].append(epoch_error) elif phase == 'test' and epoch == num_epochs - 1: log_saver['test_loss'].append(epoch_loss) log_saver['test_error'].append(epoch_error) print('{} Loss: {:.4f} Error: {:.4f}'.format( phase, epoch_loss, epoch_error)) if epoch % 30 == 0 or epoch == num_epochs - 1: print('Saving..') state = {'net': model, 'epoch': epoch, 'log': log_saver} if not os.path.isdir('checkpoint_CNN'): os.mkdir('checkpoint_CNN') torch.save( state, './checkpoint_CNN/ckpt_epoch_{}_{}.t7'.format( epoch, log_saver['num_params'][-1])) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) return model, log_saver
def validate(val_loader, decoder, criterion_ce, criterion_dis, epoch): """ Performs one epoch's validation. :param val_loader: DataLoader for validation data. :param decoder: decoder model :param criterion_ce: cross entropy loss layer :param criterion_dis : discriminative loss layer :return: BLEU-4 score """ decoder.eval() # eval mode (no dropout or batchnorm) batch_time = AverageMeter() losses = AverageMeter() top5accs = AverageMeter() start = time.time() references = list() # references (true captions) for calculating BLEU-4 score hypotheses = list() # hypotheses (predictions) # Batches with torch.no_grad(): # for i, (imgs, caps, caplens,allcaps) in enumerate(val_loader): for i, sample in enumerate(val_loader): if i % 5 != 0: # only decode every 5th caption, starting from idx 0. # this is because the iterator iterates over all captions in the dataset, not all images. if i % args.print_freq_val == 0: print('Validation: [{0}/{1}]\t' 'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Top-5 Accuracy {top5.val:.3f} ({top5.avg:.3f})\t'.format(i, len(val_loader), batch_time=batch_time, loss=losses, top5=top5accs)) continue if scene_graph: (obj, rel, caps, caplens, orig_caps, obj_mask, rel_mask, pair_idx) = sample obj = obj.to(device) rel = rel.to(device) obj_mask = obj_mask.to(device) rel_mask = rel_mask.to(device) pair_idx = pair_idx.to(device) else: (imgs, caps, caplens, orig_caps) = sample imgs = imgs.to(device) # Move to device, if available caps = caps.to(device) caplens = caplens.to(device) # Forward prop. if scene_graph: scores, scores_d, caps_sorted, decode_lengths, sort_ind = decoder(object_features=obj, relation_features=rel, encoded_captions=caps, caption_lengths=caplens, object_mask=obj_mask, relation_mask=rel_mask, rel_pair_idx=pair_idx) else: scores, scores_d, caps_sorted, decode_lengths, sort_ind = decoder(imgs, caps, caplens) # Max-pooling across predicted words across time steps for discriminative supervision scores_d = scores_d.max(1)[0] # Since we decoded starting with <start>, the targets are all words after <start>, up to <end> targets = caps_sorted[:, 1:] targets_d = torch.zeros(scores_d.size(0), scores_d.size(1)).to(device) targets_d.fill_(-1) for length in decode_lengths: targets_d[:, :length - 1] = targets[:, :length - 1] # Remove timesteps that we didn't decode at, or are pads # pack_padded_sequence is an easy trick to do this scores_copy = scores.clone() scores = pack_padded_sequence(scores, decode_lengths, batch_first=True, enforce_sorted=True).data targets = pack_padded_sequence(targets, decode_lengths, batch_first=True, enforce_sorted=True).data #scores, _ = pack_padded_sequence(scores, decode_lengths, batch_first=True) #targets, _ = pack_padded_sequence(targets, decode_lengths, batch_first=True) # Calculate loss loss_d = criterion_dis(scores_d, targets_d.long()) loss_g = criterion_ce(scores, targets) loss = loss_g + (10 * loss_d) # Keep track of metrics losses.update(loss.item(), sum(decode_lengths)) top5 = accuracy(scores, targets, 5) top5accs.update(top5, sum(decode_lengths)) batch_time.update(time.time() - start) start = time.time() if i % args.print_freq_val == 0: print('Validation: [{0}/{1}]\t' 'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Top-5 Accuracy {top5.val:.3f} ({top5.avg:.3f})\t'.format(i, len(val_loader), batch_time=batch_time, loss=losses, top5=top5accs)) # Store references (true captions), and hypothesis (prediction) for each image # If for n images, we have n hypotheses, and references a, b, c... for each image, we need - # references = [[ref1a, ref1b, ref1c], [ref2a, ref2b], ...], hypotheses = [hyp1, hyp2, ...] # References assert (len(sort_ind) == 1), "Cannot have batch_size>1 for validation." # a reference is a list of lists: # [['the', 'cat', 'sat', 'on', 'the', 'mat'], ['a', 'cat', 'on', 'the', 'mat']] references.append(orig_caps) # Hypotheses _, preds = torch.max(scores_copy, dim=2) preds = preds.tolist() preds_idxs_no_pads = list() for j, p in enumerate(preds): preds_idxs_no_pads.append(preds[j][:decode_lengths[j]]) # remove pads preds_idxs_no_pads = list(map(lambda c: [w for w in c if w not in {word_map['<start>'], word_map['<pad>']}], preds_idxs_no_pads)) temp_preds = list() # remove <start> and pads and convert idxs to string for hyp in preds_idxs_no_pads: temp_preds.append([]) for w in hyp: assert (not w == word_map['pad']), "Should have removed all pads." if not w == word_map['<start>']: temp_preds[-1].append(word_map_inv[w]) preds = temp_preds hypotheses.extend(preds) assert len(references) == len(hypotheses) # Calculate BLEU-4 scores # bleu4 = corpus_bleu(references, hypotheses) # bleu4 = round(bleu4, 4) # compute the metrics hypotheses_file = os.path.join(args.outdir, 'hypotheses', 'Epoch{:0>3d}.Hypotheses.json'.format(epoch)) references_file = os.path.join(args.outdir, 'references', 'Epoch{:0>3d}.References.json'.format(epoch)) create_captions_file(range(len(hypotheses)), hypotheses, hypotheses_file) create_captions_file(range(len(references)), references, references_file) coco = COCO(references_file) # add the predicted results to the object coco_results = coco.loadRes(hypotheses_file) # create the evaluation object with both the ground-truth and the predictions coco_eval = COCOEvalCap(coco, coco_results) # change to use the image ids in the results object, not those from the ground-truth coco_eval.params['image_id'] = coco_results.getImgIds() # run the evaluation coco_eval.evaluate(verbose=False, metrics=['bleu', 'meteor', 'rouge', 'cider']) # Results contains: "Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4", "METEOR", "ROUGE_L", "CIDEr", "SPICE" results = coco_eval.eval results['loss'] = losses.avg results['top5'] = top5accs.avg for k, v in results.items(): print(k+':\t'+str(v)) # print('\n * LOSS - {loss.avg:.3f}, TOP-5 ACCURACY - {top5.avg:.3f}, BLEU-4 - {bleu}, CIDEr - {cider}\n' # .format(loss=losses, top5=top5accs, bleu=round(results['Bleu_4'], 4), cider=round(results['CIDEr'], 1))) return results
def val_epoch(epoch, data_loader, model, criterion, opt, logger, writer): print('validation at epoch {}'.format(epoch)) model.eval() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() accuracies = AverageMeter() recalls = AverageMeter() writer = writer end_time = time.time() for i, (inputs, labels) in enumerate(data_loader): data_time.update(time.time() - end_time) labels = list(map(int, labels)) inputs = torch.unsqueeze(inputs, 1) inputs = inputs.type(torch.FloatTensor) if not opt.no_cuda: labels = torch.LongTensor(labels).cuda(async=True) with torch.no_grad(): inputs = Variable(inputs) labels = Variable(labels) outputs = model(inputs) loss = criterion(outputs, labels) acc = calculate_accuracy(outputs, labels) recall = calculate_recall(outputs, labels) losses.update(loss.data, inputs.size(0)) accuracies.update(acc, inputs.size(0)) recalls.update(recall, inputs.size(0)) batch_time.update(time.time() - end_time) end_time = time.time() print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {acc.val:.3f} ({acc.avg:.3f})\t' 'Recall {recall.val:.3f} ({recall.avg:.3f})'.format( epoch, i + 1, len(data_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=accuracies, recall=recalls)) logger.log({ 'epoch': epoch, 'loss': round(losses.avg.item(), 4), 'acc': round(accuracies.avg.item(), 4), 'recall': round(recalls.avg.item(), 4) }) writer.add_scalar('val/loss', losses.avg, epoch) writer.add_scalar('val/accuracy', accuracies.avg, epoch) writer.add_scalar('val/recall', recalls.avg, epoch) return losses.avg
def train_one_epoch_mixup(train_loader, mix_loader, model, criterion, optimizer, epoch, meters, since, alpha=0.4, log=None): losses = AverageMeter() f1 = AverageMeter() model.train() if len(meters['f1']): previous_loss = meters['loss'][-1] previous_f1 = meters['f1'][-1] best_f1_epoch = np.argmax(meters['f1']) best_f1_score = meters['f1'][best_f1_epoch] best_loss_epoch = np.argmin(meters['loss']) best_loss = meters['loss'][best_loss_epoch] else: best_f1_epoch = 0 best_f1_score = 0 best_loss_epoch = 0 best_loss = 0 previous_loss = 0 previous_f1 = 0 for batch_id, ((x1, y1), (x2, y2)) in enumerate(zip(train_loader, mix_loader)): batch_x1 = x1.cuda(non_blocking=True) batch_x2 = x2.cuda(non_blocking=True) lam = np.random.beta(alpha, alpha) batch_x = lam * batch_x1 + (1.0 - lam) * batch_x2 batch_y1 = torch.Tensor(np.array(y1)).float().cuda(non_blocking=True) batch_y2 = torch.Tensor(np.array(y2)).float().cuda(non_blocking=True) batch_y = lam * batch_y1 + (1.0 - lam) * batch_y2 output = model(batch_x) loss = criterion(output, batch_y) losses.update(loss.item(), batch_x.size(0)) f1_batch = f1_score(batch_y.cpu() > 0.5, output.sigmoid().cpu() > 0.15, average='macro') f1.update(f1_batch, batch_x.size(0)) optimizer.zero_grad() loss.backward() if cfg.grident_clip: torch.nn.utils.clip_grad_norm(model.parameters(), 1.) optimizer.step() print('Epoch %3d\t' % epoch, 'Batch %3d|%3d\t' % (batch_id, len(train_loader)), 'Loss: %10.5f\t' % losses.avg, 'Metrics|F1 Score: %10.5f\t' % f1.avg, 'Previous Loss: %10.5f\t' % previous_loss, 'Previous F1 Score: %10.5f\t' % previous_f1, 'Best loss:%10.5f Epoch %3d\t' % (best_loss, best_loss_epoch), 'Besr F1:%10.5f Epoch %3d\t' % (best_f1_score, best_f1_epoch), 'Time: %s' % time_to_str((timer() - since), 'min'), file=log) meters['loss'].append(losses.avg) meters['f1'].append(f1.avg) return meters
def train(train_loader, model, criterion, optimizer, epoch, args, logger, writer, local_rank): batch_times = AverageMeter('Time', ':6.3f') data_times = AverageMeter('Data', ':6.3f') losses = AverageMeter('Loss', ':.4e') # 4e表示科学记数法中的4位小数 top1 = AverageMeter('Acc@1', ':6.2f') # switch to train mode model.train() end = time.time() for i, (images, target) in enumerate(train_loader): # measure data loading time data_time = time.time() - end data_times.update(data_time) images = images.cuda(local_rank, non_blocking=True) target = target.cuda(local_rank, non_blocking=True) # compute output output = model(images) loss = criterion(output, target) # measure accuracy and record loss acc1 = accuracy(output, target, 1) # DDP: data synchronization dist.barrier() reduced_loss = reduce_mean(loss, args.nprocs) reduced_acc1 = reduce_mean(acc1, args.nprocs) losses.update(reduced_loss.item(), images.size(0)) top1.update(reduced_acc1, images.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_times.update(time.time() - end) end = time.time() if i % args.print_freq == 0: ddp_print( 'Train epoch: [{:d}/{:d}][{:d}/{:d}]\tlr={:.6f}\tce_loss={:.4f}\ttop1_acc={:.4f}\tdata_time={:6.3f}s' '\tbatch_time={:6.3f}s'.format(epoch, args.epochs, i, len(train_loader), get_learning_rate(optimizer), losses.avg, top1.avg, data_times.avg, batch_times.avg), logger, local_rank) break ddp_print( '||==> Train epoch: [{:d}/{:d}]\tlr={:.6f}\tce_loss={:.4f}\ttop1_acc={:.4f}\tbatch_time={:6.3f}s' .format(epoch, args.epochs, get_learning_rate(optimizer), losses.avg, top1.avg, batch_times.avg), logger, local_rank) if args.local_rank == 0: # save tensorboard writer.add_scalar('lr', get_learning_rate(optimizer), epoch) writer.add_scalar('Train_ce_loss', losses.avg, epoch) writer.add_scalar('Train_top1_accuracy', top1.avg, epoch)
def test(val_loader, model, epoch, use_cuda): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() model.eval() save_objs = args.evaluate import os if not os.path.exists(args.save_path): os.makedirs( args.save_path) # /scratch/xiaolonw/davis_results_mask_mixfcn/') # save_path = '/scratch/xiaolonw/davis_results_mask_mixfcn/' save_path = args.save_path + '/' # img_path = '/scratch/xiaolonw/vlog_frames/' save_file = '%s/list.txt' % save_path fileout = open(save_file, 'w') end = time.time() # bar = Bar('Processing', max=len(val_loader)) for batch_idx, (imgs_total, patch2_total, lbls, meta) in enumerate(tqdm(val_loader)): finput_num_ori = params['videoLen'] finput_num = finput_num_ori # measure data loading time data_time.update(time.time() - end) imgs_total = torch.autograd.Variable(imgs_total.cuda()) # patch2_total = torch.autograd.Variable(patch2_total.cuda()) t00 = time.time() bs = imgs_total.size(0) total_frame_num = imgs_total.size(1) channel_num = imgs_total.size(2) height_len = imgs_total.size(3) width_len = imgs_total.size(4) assert (bs == 1) folder_paths = meta['folder_path'] gridx = int(meta['gridx'].data.cpu().numpy()[0]) gridy = int(meta['gridy'].data.cpu().numpy()[0]) print('gridx: ' + str(gridx) + ' gridy: ' + str(gridy)) print('total_frame_num: ' + str(total_frame_num)) height_dim = int(params['cropSize'] / 8) width_dim = int(params['cropSize'] / 8) # processing labels lbls = lbls[0].data.cpu().numpy() print(lbls.shape) # print(patch2_total.size()) lbls_new = [] lbl_set = [] lbl_set.append(np.zeros(3).astype(np.uint8)) count_lbls = [] count_lbls.append(0) for i in range(lbls.shape[0]): nowlbl = lbls[i].copy() if i == 0: for j in range(nowlbl.shape[0]): for k in range(nowlbl.shape[1]): pixellbl = nowlbl[j, k, :].astype(np.uint8) flag = 0 for t in range(len(lbl_set)): if lbl_set[t][0] == pixellbl[0] and lbl_set[t][ 1] == pixellbl[1] and lbl_set[t][ 2] == pixellbl[2]: flag = 1 count_lbls[t] = count_lbls[t] + 1 break if flag == 0: lbl_set.append(pixellbl) count_lbls.append(0) lbls_new.append(nowlbl) lbl_set_temp = [] for i in range(len(lbl_set)): if count_lbls[i] > 10: lbl_set_temp.append(lbl_set[i]) lbl_set = lbl_set_temp print(lbl_set) print(count_lbls) t01 = time.time() lbls_resize = np.zeros( (lbls.shape[0], lbls.shape[1], lbls.shape[2], len(lbl_set))) lbls_resize2 = np.zeros( (lbls.shape[0], height_dim, width_dim, len(lbl_set))) for i in range(lbls.shape[0]): nowlbl = lbls[i].copy() for j in range(nowlbl.shape[0]): for k in range(nowlbl.shape[1]): pixellbl = nowlbl[j, k, :].astype(np.uint8) for t in range(len(lbl_set)): if lbl_set[t][0] == pixellbl[0] and lbl_set[t][ 1] == pixellbl[1] and lbl_set[t][ 2] == pixellbl[2]: lbls_resize[i, j, k, t] = 1 for i in range(lbls.shape[0]): lbls_resize2[i] = cv2.resize(lbls_resize[i], (height_dim, width_dim)) t02 = time.time() print(t02 - t01, 'relabel', t01 - t00, 'label') # print the images imgs_set = imgs_total.data imgs_set = imgs_set.cpu().numpy() imgs_set = imgs_set[0] mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] imgs_toprint = [] # ref image for t in range(imgs_set.shape[0]): img_now = imgs_set[t] for c in range(3): img_now[c] = img_now[c] * std[c] img_now[c] = img_now[c] + mean[c] img_now = img_now * 255 img_now = np.transpose(img_now, (1, 2, 0)) img_now = cv2.resize(img_now, (img_now.shape[0] * 2, img_now.shape[1] * 2)) imgs_toprint.append(img_now) imname = save_path + str(batch_idx) + '_' + str(t) + '_frame.jpg' scipy.misc.imsave(imname, img_now) for t in range(finput_num_ori): nowlbl = lbls_new[t] imname = save_path + str(batch_idx) + '_' + str(t) + '_label.jpg' scipy.misc.imsave(imname, nowlbl) # now_batch_size = 4 now_batch_size = 1 # we use one gpu for eval imgs_stack = [] patch2_stack = [] im_num = total_frame_num - finput_num_ori trans_out_2_set = [] corrfeat2_set = [] imgs_tensor = torch.Tensor(now_batch_size, finput_num, 3, params['cropSize'], params['cropSize']) target_tensor = torch.Tensor(now_batch_size, 1, 3, params['cropSize'], params['cropSize']) imgs_tensor = torch.autograd.Variable(imgs_tensor.cuda()) target_tensor = torch.autograd.Variable(target_tensor.cuda()) t03 = time.time() for iter in range(0, im_num, now_batch_size): # print(iter) startid = iter endid = iter + now_batch_size if endid > im_num: endid = im_num now_batch_size2 = endid - startid for i in range(now_batch_size2): imgs = imgs_total[:, iter + i + 1:iter + i + finput_num_ori, :, :, :] imgs2 = imgs_total[:, 0, :, :, :].unsqueeze(1) imgs = torch.cat((imgs2, imgs), dim=1) imgs_tensor[i] = imgs target_tensor[i, 0] = imgs_total[0, iter + i + finput_num_ori] corrfeat2_now = model(imgs_tensor, target_tensor) corrfeat2_now = corrfeat2_now.view(now_batch_size, finput_num_ori, corrfeat2_now.size(1), corrfeat2_now.size(2), corrfeat2_now.size(3)) for i in range(now_batch_size2): corrfeat2_set.append(corrfeat2_now[i].data.cpu().numpy()) t04 = time.time() print(t04 - t03, 'model forward', t03 - t02, 'image prep') for iter in range(total_frame_num - finput_num_ori): if iter % 10 == 0: print(iter) imgs = imgs_total[:, iter + 1:iter + finput_num_ori, :, :, :] imgs2 = imgs_total[:, 0, :, :, :].unsqueeze(1) imgs = torch.cat((imgs2, imgs), dim=1) # trans_out_2, corrfeat2 = model(imgs, patch2) corrfeat2 = corrfeat2_set[iter] corrfeat2 = torch.from_numpy(corrfeat2) out_frame_num = int(finput_num) height_dim = corrfeat2.size(2) width_dim = corrfeat2.size(3) corrfeat2 = corrfeat2.view(corrfeat2.size(0), height_dim, width_dim, height_dim, width_dim) corrfeat2 = corrfeat2.data.cpu().numpy() topk_vis = args.topk_vis vis_ids_h = np.zeros((corrfeat2.shape[0], height_dim, width_dim, topk_vis)).astype(np.int) vis_ids_w = np.zeros((corrfeat2.shape[0], height_dim, width_dim, topk_vis)).astype(np.int) t05 = time.time() atten1d = corrfeat2.reshape(corrfeat2.shape[0], height_dim * width_dim, height_dim, width_dim) ids = np.argpartition(atten1d, -topk_vis, axis=1)[:, -topk_vis:] # ids = np.argsort(atten1d, axis=1)[:, -topk_vis:] hid = ids // width_dim wid = ids % width_dim vis_ids_h = wid.transpose(0, 2, 3, 1) vis_ids_w = hid.transpose(0, 2, 3, 1) t06 = time.time() img_now = imgs_toprint[iter + finput_num_ori] predlbls = np.zeros((height_dim, width_dim, len(lbl_set))) # predlbls2 = np.zeros((height_dim * width_dim, len(lbl_set))) for t in range(finput_num): tt1 = time.time() h, w, k = np.meshgrid(np.arange(height_dim), np.arange(width_dim), np.arange(topk_vis), indexing='ij') h, w = h.flatten(), w.flatten() hh, ww = vis_ids_h[t].flatten(), vis_ids_w[t].flatten() if t == 0: lbl = lbls_resize2[0, hh, ww, :] else: lbl = lbls_resize2[t + iter, hh, ww, :] np.add.at(predlbls, (h, w), lbl * corrfeat2[t, ww, hh, h, w][:, None]) t07 = time.time() # print(t07-t06, 'lbl proc', t06-t05, 'argsorts') predlbls = predlbls / finput_num for t in range(len(lbl_set)): nowt = t predlbls[:, :, nowt] = predlbls[:, :, nowt] - predlbls[:, :, nowt].min() predlbls[:, :, nowt] = predlbls[:, :, nowt] / predlbls[:, :, nowt].max() lbls_resize2[iter + finput_num_ori] = predlbls predlbls_cp = predlbls.copy() predlbls_cp = cv2.resize(predlbls_cp, (params['imgSize'], params['imgSize'])) predlbls_val = np.zeros((params['imgSize'], params['imgSize'], 3)) ids = np.argmax(predlbls_cp[:, :, 1:len(lbl_set)], 2) predlbls_val = np.array(lbl_set)[np.argmax(predlbls_cp, axis=-1)] predlbls_val = predlbls_val.astype(np.uint8) predlbls_val2 = cv2.resize(predlbls_val, (img_now.shape[0], img_now.shape[1]), interpolation=cv2.INTER_NEAREST) # activation_heatmap = cv2.applyColorMap(predlbls, cv2.COLORMAP_JET) img_with_heatmap = np.float32(img_now) * 0.5 + np.float32( predlbls_val2) * 0.5 imname = save_path + str(batch_idx) + '_' + str( iter + finput_num_ori) + '_label.jpg' imname2 = save_path + str(batch_idx) + '_' + str( iter + finput_num_ori) + '_mask.png' scipy.misc.imsave(imname, np.uint8(img_with_heatmap)) scipy.misc.imsave(imname2, np.uint8(predlbls_val)) fileout.close() return losses.avg
def train(trainloader, model, criterion, optimizer, epoch, use_cuda): # switch to train mode model.train() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() for batch_idx, (inputs, targets) in enumerate(trainloader): if use_cuda: inputs, targets = inputs.cuda(local_rank), targets.cuda(local_rank, async=True) inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.data.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() return (losses.avg, top1.avg)
def train(model, train_dataloader, lr=0.01, save_dir='./weights', num_epoches=200, model_name="", valid_dataloader=None, batch_num=None, train_type="clean", random_layer=None): if random_layer: model_name = model_name + "_randomLayer" print(train_dataloader, valid_dataloader) optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, nesterov=True, weight_decay=0.0005) cost = nn.CrossEntropyLoss() batch_time = AverageMeter() losses = AverageMeter() error = AverageMeter() end = time.time() best_error = 1.0 nobetter_num = 1 for epoch in range(num_epoches): if nobetter_num >= 5: print("train done .lr={},best_error={}".format(lr, best_error)) break if nobetter_num >= 3: lr = lr * 0.1 for param_group in optimizer.param_groups: param_group['lr'] = lr _, train_loss, train_error = train_epoch( model, train_dataloader if train_type == "clean" else combind_loader( *train_dataloader), optimizer, cost, epoch, num_epoches, batch_num=batch_num, random_layer=random_layer) if valid_dataloader: with torch.no_grad(): _, valid_loss, valid_error = valid_epoch( model, valid_dataloader if train_type == "clean" else combind_loader(*valid_dataloader), cost, batch_num=len(valid_dataloader) if train_type == "clean" else len(valid_dataloader[0]), random_layer=random_layer) if valid_dataloader and valid_error < best_error: best_error = valid_error if valid_error + 0.005 < best_error: nobetter_num += 1 else: nobetter_num = 1 print('New best error: %.4f' % best_error) torch.save(model.state_dict(), os.path.join(save_dir, model_name + '_model.dat')) else: #torch.save(model.state_dict(), os.path.join(save_dir, 'vgg16_model.dat')) nobetter_num += 1 with open(os.path.join(save_dir, model_name + '_results.csv'), 'a') as f: f.write('%03d,%0.6f,%0.6f,%0.5f,%0.5f,\n' % ( (epoch + 1), train_loss, train_error, valid_loss, valid_error, ))
def train(train_loader, model, criterion, optimizer, epoch, log): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if args.use_cuda: target = target.cuda(async=True) input = input.cuda() input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss prec1, prec5 = accuracy(output.data, target, topk=(1, 5)) losses.update(loss.data[0], input.size(0)) top1.update(prec1[0], input.size(0)) top5.update(prec5[0], input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() print_log(' Epoch: [{:03d}][{:03d}/{:03d}] ' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Loss {loss.val:.4f} ({loss.avg:.4f}) ' 'Prec@1 {top1.val:.3f} ({top1.avg:.3f}) ' 'Prec@5 {top5.val:.3f} ({top5.avg:.3f}) '.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log) return top1.avg, losses.avg
def validate(val_loader, model, use_val_th=False): batch_time = AverageMeter() # switch to evaluate mode model.eval() with torch.no_grad(): end = time.time() for i, (inputs, targets, infos) in enumerate(val_loader): if default.generate_features_all: logger.info('generating features, batch %d', i) filenames = [info[0] for info in infos] lesion_idxs = [info[1] for info in infos] inputs = [input.cuda() for input in inputs] unc_targets = targets[1] targets = targets[0] # compute output out = model(inputs) if config.SCORE_PROPAGATION: prob_np = out['class_prob2'].detach().cpu().numpy() scores_np = out['class_score2'].detach().cpu().numpy() else: prob_np = out['class_prob1'].detach().cpu().numpy() scores_np = out['class_score1'].detach().cpu().numpy() target1 = targets.numpy() > 0 pred_wt = unc_targets.numpy() == 0 if i == 0: target_all = target1 prob_all = prob_np score_all = scores_np lesion_idx_all = lesion_idxs pred_wt_all = pred_wt if default.generate_features_all: ft_all = out['emb'] else: target_all = np.vstack((target_all, target1)) prob_all = np.vstack((prob_all, prob_np)) score_all = np.vstack((score_all, scores_np)) pred_wt_all = np.vstack((pred_wt_all, pred_wt)) lesion_idx_all.extend(lesion_idxs) if default.generate_features_all: ft_all = np.vstack((ft_all, out['emb'])) if default.generate_features_all: save_ft_to_file(ft_all) assert 0, 'all features have been generated and saved.' if config.TEST.USE_CALIBRATED_TH: accs, pred_label_all = compute_all_acc_wt_th(target_all, prob_all, pred_wt_all, use_val_th) else: pred_label_all = score2label(prob_all, config.TEST.SCORE_PARAM) accs = compute_all_acc_wt(target_all, pred_label_all, prob_all, pred_wt_all) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % default.frequent == 0: logger.info('Test: [{0}/{1}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' '{crit} {accs:.3f}' .format( i, len(val_loader), batch_time=batch_time, crit=config.TEST.CRITERION, accs=accs[config.TEST.CRITERION] )) print_accs(accs) accs['ex_neg'] = np.sum((target_all == 0) & pred_wt_all, axis=0) if use_val_th: # only save for test set not val set save_acc_to_file(accs, val_loader, 'all_terms') if default.mode == 'infer' and use_val_th: save_test_scores_to_file(score_all, pred_label_all, target_all, accs, lesion_idx_all) return accs
def main(): if not os.path.isdir(args.save_path): os.makedirs(args.save_path) log = open(os.path.join(args.save_path, 'log_seed_{}.txt'.format(args.manualSeed)), 'w') print_log('save path : {}'.format(args.save_path), log) state = {k: v for k, v in args._get_kwargs()} print_log(state, log) print_log("Random Seed: {}".format(args.manualSeed), log) print_log("python version : {}".format(sys.version.replace('\n', ' ')), log) print_log("torch version : {}".format(torch.__version__), log) print_log("cudnn version : {}".format(torch.backends.cudnn.version()), log) # Init dataset if not os.path.isdir(args.data_path): os.makedirs(args.data_path) if args.dataset == 'cifar10': mean = [x / 255 for x in [125.3, 123.0, 113.9]] std = [x / 255 for x in [63.0, 62.1, 66.7]] elif args.dataset == 'cifar100': mean = [x / 255 for x in [129.3, 124.1, 112.4]] std = [x / 255 for x in [68.2, 65.4, 70.4]] else: assert False, "Unknow dataset : {}".format(args.dataset) train_transform = transforms.Compose( [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(), transforms.Normalize(mean, std)]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean, std)]) if args.dataset == 'cifar10': train_data = dset.CIFAR10(args.data_path, train=True, transform=train_transform, download=True) test_data = dset.CIFAR10(args.data_path, train=False, transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'cifar100': train_data = dset.CIFAR100(args.data_path, train=True, transform=train_transform, download=True) test_data = dset.CIFAR100(args.data_path, train=False, transform=test_transform, download=True) num_classes = 100 elif args.dataset == 'svhn': train_data = dset.SVHN(args.data_path, split='train', transform=train_transform, download=True) test_data = dset.SVHN(args.data_path, split='test', transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'stl10': train_data = dset.STL10(args.data_path, split='train', transform=train_transform, download=True) test_data = dset.STL10(args.data_path, split='test', transform=test_transform, download=True) num_classes = 10 elif args.dataset == 'imagenet': assert False, 'Do not finish imagenet code' else: assert False, 'Do not support dataset : {}'.format(args.dataset) train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) # Init model, criterion, and optimizer #net = models.__dict__[args.arch](num_classes).cuda() net = SENet34() # define loss function (criterion) and optimizer criterion = F.nll_loss optimizer = torch.optim.SGD(net.parameters(), state['learning_rate'], momentum=state['momentum'], weight_decay=state['decay'], nesterov=True) if args.use_cuda: net.cuda() recorder = RecorderMeter(args.epochs) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print_log("=> loading checkpoint '{}'".format(args.resume), log) checkpoint = torch.load(args.resume) recorder = checkpoint['recorder'] args.start_epoch = checkpoint['epoch'] net.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print_log("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch']), log) else: print_log("=> no checkpoint found at '{}'".format(args.resume), log) else: print_log("=> do not use any checkpoint for model", log) if args.evaluate: validate(test_loader, net, criterion, log) return # Main loop start_time = time.time() epoch_time = AverageMeter() for epoch in range(args.start_epoch, args.epochs): current_learning_rate = adjust_learning_rate(optimizer, epoch, args.gammas, args.schedule) need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg * (args.epochs-epoch)) need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(need_hour, need_mins, need_secs) print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \ + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log) # train for one epoch train_acc, train_los = train(train_loader, net, criterion, optimizer, epoch, log) # evaluate on validation set val_acc, val_los = validate(test_loader, net, criterion, log) is_best = recorder.update(epoch, train_los, train_acc, val_los, val_acc) save_checkpoint({ 'epoch': epoch + 1, 'state_dict': net.state_dict(), 'recorder': recorder, 'optimizer' : optimizer.state_dict(), }, is_best, args.save_path, 'checkpoint.pth.tar') # measure elapsed time epoch_time.update(time.time() - start_time) start_time = time.time() recorder.plot_curve( os.path.join(args.save_path, 'curve.png') ) log.close()
def val_epoch(epoch, data_loader, model, criterion, opt, logger): print('validation at epoch {}'.format(epoch)) model.eval() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() accuracies = AverageMeter() end_time = time.time() for i, (inputs, targets) in enumerate(data_loader): data_time.update(time.time() - end_time) if not opt.no_cuda: targets = targets.cuda(async=True) inputs = Variable(inputs, volatile=True) targets = Variable(targets, volatile=True) outputs = model(inputs) loss = criterion(outputs, targets) acc = calculate_accuracy(outputs, targets) losses.update(loss.data[0], inputs.size(0)) accuracies.update(acc, inputs.size(0)) batch_time.update(time.time() - end_time) end_time = time.time() print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {acc.val:.3f} ({acc.avg:.3f})'.format( epoch, i + 1, len(data_loader), batch_time=batch_time, data_time=data_time, loss=losses, acc=accuracies)) logger.log({'epoch': epoch, 'loss': losses.avg, 'acc': accuracies.avg}) return losses.avg
def train(segmentation_module, iterator, optimizers, history, epoch, args): batch_time = AverageMeter() data_time = AverageMeter() names = ['object', 'part', 'scene', 'material'] ave_losses = {n: AverageMeter() for n in names} ave_metric = {n: AverageMeter() for n in names} ave_losses['total'] = AverageMeter() segmentation_module.train(not args.fix_bn) # main loop tic = time.time() for i in range(args.epoch_iters): batch_data, src_idx = next(iterator) data_time.update(time.time() - tic) segmentation_module.zero_grad() # forward pass ret = segmentation_module(batch_data) # Backward loss = ret['loss']['total'].mean() loss.backward() for optimizer in optimizers: optimizer.step() # measure elapsed time batch_time.update(time.time() - tic) tic = time.time() # measure losses for name in ret['loss'].keys(): ave_losses[name].update(ret['loss'][name].mean().item()) # measure metrics # NOTE: scene metric will be much lower than benchmark for name in ret['metric'].keys(): ave_metric[name].update(ret['metric'][name].mean().item()) # calculate accuracy, and display if i % args.disp_iter == 0: loss_info = "Loss: total {:.4f}, ".format(ave_losses['total'].average()) loss_info += ", ".join(["{} {:.2f}".format( n[0], ave_losses[n].average() if ave_losses[n].average() is not None else 0) for n in names]) acc_info = "Accuracy: " + ", ".join(["{} {:4.2f}".format( n[0], ave_metric[n].average() if ave_metric[n].average() is not None else 0) for n in names]) print('Epoch: [{}][{}/{}], Time: {:.2f}, Data: {:.2f}, ' 'LR: encoder {:.6f}, decoder {:.6f}, {}, {}' .format(epoch, i, args.epoch_iters, batch_time.average(), data_time.average(), args.running_lr_encoder, args.running_lr_decoder, acc_info, loss_info)) fractional_epoch = epoch - 1 + 1. * i / args.epoch_iters history['train']['epoch'].append(fractional_epoch) history['train']['loss'].append(loss.item()) # adjust learning rate cur_iter = i + (epoch - 1) * args.epoch_iters adjust_learning_rate(optimizers, cur_iter, args)
def train(self, epoch): batch_time = AverageMeter() losses = AverageMeter() acc = AverageMeter() self.scheduler.step() self.model.train() end = time.time() lr = self.scheduler.get_lr()[0] # for batch, (softmax_data, triplet_data) in enumerate(itertools.zip_longest(self.softmax_train_loader, self.triplet_train_loader)): for batch, (softmax_data, triplet_data, gallery_data) in enumerate(zip(self.softmax_train_loader, self.triplet_train_loader, self.gallery_loader)): loss = 0 # 1st softmax_inputs, softmax_labels = softmax_data # 转cuda softmax_inputs = softmax_inputs.to(self.device) if torch.cuda.device_count() >= 1 else softmax_inputs softmax_labels = softmax_labels.to(self.device) if torch.cuda.device_count() >= 1 else softmax_labels softmax_score, softmax_outputs = self.model(softmax_inputs) traditional_loss = self.softmax_loss(softmax_score, softmax_outputs, softmax_labels) loss += traditional_loss # total losses.update(loss.item(), softmax_inputs.size(0)) prec = (softmax_score.max(1)[1] == softmax_labels).float().mean() acc.update(prec, softmax_inputs.size(0)) # 2nd triplet_inputs, triplet_labels = triplet_data # 转cuda triplet_inputs = triplet_inputs.to(self.device) if torch.cuda.device_count() >= 1 else triplet_inputs triplet_labels = triplet_labels.to(self.device) if torch.cuda.device_count() >= 1 else triplet_labels triplet_score, triplet_outputs = self.model(triplet_inputs) triplet_loss = self.triplet_loss(triplet_score, triplet_outputs, triplet_labels) loss += triplet_loss # 3rd gallery_inputs, gallery_labels = gallery_data gallery_inputs = gallery_inputs.to(self.device) if torch.cuda.device_count() >= 1 else gallery_inputs gallery_score, gallery_outputs = self.model(gallery_inputs) query_feats = [] for query_inputs, query_labels in data.query_loader: query_inputs = query_inputs.cuda() query_score, query_outputs = model(query_inputs) query_feats.append(query_outputs) logger.debug('query_outputs: {}'.format(query_outputs.shape)) query_feats = torch.cat(query_feats, dim=0) logger.debug('query_feats: {}'.format(query_feats.shape)) entropy = self.entropy_loss(gallery_outputs, query_feats) loss += entropy self.optimizer.zero_grad() if opt.fp16: # we use optimier to backward loss with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() # loss.backward(retain_graph=True) self.optimizer.step() # 评估训练耗时 batch_time.update(time.time() - end) end = time.time() # 打印耗时与结果 if (batch+1) % 10 == 0: logger.debug('Epoch: [{}][{}/{}]\t' 'Base_lr: [{:.2e}]\t' 'Time: ({batch_time.avg:.3f})\t' 'Loss_val: {loss.val:.4f} (Loss_avg: {loss.avg:.4f})\t' 'Accuray_val: {acc.val:.4f} (Accuray_avg: {acc.avg:.4f})'.format( epoch, batch+1, len(self.softmax_train_loader), lr, batch_time=batch_time, loss=losses, acc=acc)) # 每个epoch的结果 log_text = 'Epoch[{}]\tBase_lr {:.2e}\tAccuray {acc.avg:.4f}\tLoss {loss.avg:.4f}'.format(epoch, lr, acc=acc, loss=losses) logger.info(log_text) with open(log_file, 'a') as f: f.write(log_text + '\n') f.flush()
def main(): cnt_errors = { 'mae': AverageMeter(), 'mse': AverageMeter(), 'nae': AverageMeter(), } metrics_s = { 'tp': AverageMeter(), 'fp': AverageMeter(), 'fn': AverageMeter(), 'tp_c': AverageCategoryMeter(num_classes), 'fn_c': AverageCategoryMeter(num_classes) } metrics_l = { 'tp': AverageMeter(), 'fp': AverageMeter(), 'fn': AverageMeter(), 'tp_c': AverageCategoryMeter(num_classes), 'fn_c': AverageCategoryMeter(num_classes) } pred_data, gt_data = read_pred_and_gt(pred_file, gt_file) for i_sample in id_std: print(i_sample) # init gt_p,pred_p,fn_gt_index,tp_pred_index,fp_pred_index= [],[],[],[],[] tp_s, fp_s, fn_s, tp_l, fp_l, fn_l = [0, 0, 0, 0, 0, 0] tp_c_s = np.zeros([num_classes]) fn_c_s = np.zeros([num_classes]) tp_c_l = np.zeros([num_classes]) fn_c_l = np.zeros([num_classes]) if gt_data[i_sample]['num'] == 0 and pred_data[i_sample]['num'] != 0: pred_p = pred_data[i_sample]['points'] fp_pred_index = np.array(range(pred_p.shape[0])) fp_s = fp_pred_index.shape[0] fp_l = fp_pred_index.shape[0] if pred_data[i_sample]['num'] == 0 and gt_data[i_sample]['num'] != 0: gt_p = gt_data[i_sample]['points'] level = gt_data[i_sample]['level'] fn_gt_index = np.array(range(gt_p.shape[0])) fn_s = fn_gt_index.shape[0] fn_l = fn_gt_index.shape[0] for i_class in range(num_classes): fn_c_s[i_class] = (level[fn_gt_index] == i_class).sum() fn_c_l[i_class] = (level[fn_gt_index] == i_class).sum() if gt_data[i_sample]['num'] != 0 and pred_data[i_sample]['num'] != 0: pred_p = pred_data[i_sample]['points'] gt_p = gt_data[i_sample]['points'] sigma_s = gt_data[i_sample]['sigma'][:, 0] sigma_l = gt_data[i_sample]['sigma'][:, 1] level = gt_data[i_sample]['level'] # dist dist_matrix = ss.distance_matrix(pred_p, gt_p, p=2) match_matrix = np.zeros(dist_matrix.shape, dtype=bool) # sigma_s and sigma_l tp_s, fp_s, fn_s, tp_c_s, fn_c_s = compute_metrics( dist_matrix, match_matrix, pred_p.shape[0], gt_p.shape[0], sigma_s, level) tp_l, fp_l, fn_l, tp_c_l, fn_c_l = compute_metrics( dist_matrix, match_matrix, pred_p.shape[0], gt_p.shape[0], sigma_l, level) metrics_s['tp'].update(tp_s) metrics_s['fp'].update(fp_s) metrics_s['fn'].update(fn_s) metrics_s['tp_c'].update(tp_c_s) metrics_s['fn_c'].update(fn_c_s) metrics_l['tp'].update(tp_l) metrics_l['fp'].update(fp_l) metrics_l['fn'].update(fn_l) metrics_l['tp_c'].update(tp_c_l) metrics_l['fn_c'].update(fn_c_l) gt_count, pred_cnt = gt_data[i_sample]['num'], pred_data[i_sample][ 'num'] s_mae = abs(gt_count - pred_cnt) s_mse = (gt_count - pred_cnt) * (gt_count - pred_cnt) cnt_errors['mae'].update(s_mae) cnt_errors['mse'].update(s_mse) if gt_count != 0: s_nae = abs(gt_count - pred_cnt) / gt_count cnt_errors['nae'].update(s_nae) ap_s = metrics_s['tp'].sum / (metrics_s['tp'].sum + metrics_s['fp'].sum + 1e-20) ar_s = metrics_s['tp'].sum / (metrics_s['tp'].sum + metrics_s['fn'].sum + 1e-20) f1m_s = 2 * ap_s * ar_s / (ap_s + ar_s) ar_c_s = metrics_s['tp_c'].sum / (metrics_s['tp_c'].sum + metrics_s['fn_c'].sum + 1e-20) ap_l = metrics_l['tp'].sum / (metrics_l['tp'].sum + metrics_l['fp'].sum + 1e-20) ar_l = metrics_l['tp'].sum / (metrics_l['tp'].sum + metrics_l['fn'].sum + 1e-20) f1m_l = 2 * ap_l * ar_l / (ap_l + ar_l) ar_c_l = metrics_l['tp_c'].sum / (metrics_l['tp_c'].sum + metrics_l['fn_c'].sum + 1e-20) print('-----Localization performance-----') print('AP_small: ' + str(ap_s)) print('AR_small: ' + str(ar_s)) print('F1m_small: ' + str(f1m_s)) print('AR_small_category: ' + str(ar_c_s)) print(' avg: ' + str(ar_c_s.mean())) print('AP_large: ' + str(ap_l)) print('AR_large: ' + str(ar_l)) print('F1m_large: ' + str(f1m_l)) print('AR_large_category: ' + str(ar_c_l)) print(' avg: ' + str(ar_c_l.mean())) mae = cnt_errors['mae'].avg mse = np.sqrt(cnt_errors['mse'].avg) nae = cnt_errors['nae'].avg print('-----Counting performance-----') print('MAE: ' + str(mae)) print('MSE: ' + str(mse)) print('NAE: ' + str(nae))
def valid_epoch(model, valid_dataloader, cost, print_freq=40, batch_num=None, random_layer=None): batch_time = AverageMeter() losses = AverageMeter() error = AverageMeter() end = time.time() model.eval() for i, (images, labels, noises) in enumerate(valid_dataloader): images, labels, noises = images.cuda(), labels.cuda(), noises.cuda() noises = noises * args.phi if random_layer: images = random_layer(images) outputs = model(images, noises) loss = cost(outputs, labels) batch_size = labels.size(0) outputs = outputs.max(1)[1] error.update( torch.ne(outputs.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size) losses.update(loss.item(), batch_size) batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: res = '\t'.join([ 'Valid', 'Iter: [%d/%d]' % (i + 1, batch_num), 'Time %.3f (%.3f)' % (batch_time.val, batch_time.avg), 'Loss %.4f (%.4f)' % (losses.val, losses.avg), 'Error %.4f (%.4f)' % (error.val, error.avg), ]) print(res) return batch_time.avg, losses.avg, error.avg
def test(testloader, model, criterion, epoch, use_cuda): global best_acc batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() start_index = 0 for batch_idx, (inputs, targets) in enumerate(testloader): # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() with torch.no_grad(): inputs, targets = torch.autograd.Variable( inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.item(), inputs.size(0)) top1.update(prec1.item(), inputs.size(0)) top5.update(prec5.item(), inputs.size(0)) progress_bar( batch_idx, len(testloader), 'Loss: %.2f | Top1: %.2f | Top5: %.2f' % (losses.avg, top1.avg, top5.avg)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() return (losses.avg, top1.avg, top5.avg)
def train(train_loader, model, criterions, optimizer, epoch): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() accs = AverageMeter() # switch to train mode model.train() end = time.time() for i, (inputs, targets, infos) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) # construct targets target_clsf, target_unc, target_ex = targets target_conf = (target_clsf + target_ex) > 0 rhem_wt = torch.zeros_like(target_clsf).cuda() rhem_wt[target_conf] = 1. target_clsf = target_clsf.cuda() target_clsf_wt = 1-target_unc.cuda() # run model inputs = [input.cuda() for input in inputs] out = model(inputs) # compute losses emb = out['emb'] A, P, N = select_triplets_multilabel(emb, target_clsf) loss_metric = criterions['metric'](A, P, N) prob1 = out['class_prob1'] loss_ce1 = criterions['wce'](prob1, target_clsf, infos, wt=target_clsf_wt) loss_rhem = criterions['rhem'](prob1, target_clsf, infos, wt=rhem_wt) if config.SCORE_PROPAGATION: prob2 = out['class_prob2'] loss_ce2 = criterions['wce'](prob2, target_clsf, infos, wt=target_clsf_wt) sub_losses = [loss_ce1, loss_rhem, loss_metric, loss_ce2] wts_names = ['CE_LOSS_WT_1', 'RHEM_LOSS_WT', 'TRIPLET_LOSS_WT', 'CE_LOSS_WT_2'] else: sub_losses = [loss_ce1, loss_rhem, loss_metric] wts_names = ['CE_LOSS_WT_1', 'RHEM_LOSS_WT', 'TRIPLET_LOSS_WT'] loss = 0 wts = [eval('config.TRAIN.' + name1) for name1 in wts_names] for wt1, loss1 in zip(wts, sub_losses): loss += wt1 * loss1 losses.update(loss.item()) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() clip_gradient(model, default.clip_gradient) optimizer.step() # measure accuracy if config.SCORE_PROPAGATION: prob_np = prob2.detach().cpu().numpy() else: prob_np = prob1.detach().cpu().numpy() pred_labels = score2label(prob_np, config.TEST.SCORE_PARAM) targets_np = target_clsf.detach().cpu().numpy() target_unc = target_unc.numpy() acc = compute_all_acc_wt(targets_np > 0, pred_labels, prob_np, target_unc == 0)[config.TEST.CRITERION] accs.update(acc) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if i % default.frequent == 0: crit = 'mean_pcF1' if config.TEST.CRITERION == 'mean_perclass_f1' else config.TEST.CRITERION msg = 'Epoch: [{0}][{1}/{2}] Time {batch_time.val:.1f} ' \ '({batch_time.avg:.1f}, {data_time.val:.1f})\t' \ .format(epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time) msg += 'Loss {loss.val:.3f} ({loss.avg:.3f}){{'.format(loss=losses) for wt1, loss1 in zip(wts, sub_losses): msg += '%.3f*%.1f, ' % (loss1, wt1) msg += '}}\t{crit} {accs.val:.3f} ({accs.avg:.3f})'.format( crit=crit, accs=accs, ms=prob_np.max()) logger.info(msg)
def train_epoch(model, train_dataloader, optimizer, cost, epoch, n_epochs, print_freq=40, batch_num=None, random_layer=None): print(random_layer) batch_time = AverageMeter() losses = AverageMeter() error = AverageMeter() end = time.time() model.train() for i, (images, labels, noises) in enumerate(train_dataloader): #if i>30: # return batch_time.avg, losses.avg, error.avg #print(type(images),type(labels)) #print(images.shape,labels.shape) images, labels, noises = images.cuda(), labels.cuda(), noises.cuda() noises = noises * args.phi if random_layer: images = random_layer(images) #print(images.shape) #print(images.shape,labels.shape) optimizer.zero_grad() outputs = model(images, noises) #print("outpits.size={},labels.size={}",outputs.size(),labels.size()) loss = cost(outputs, labels) loss.backward() #optimizer.step() batch_size = labels.size(0) outputs = outputs.max(1)[1] error.update( torch.ne(outputs.cpu(), labels.cpu()).float().sum().item() / batch_size, batch_size) losses.update(loss.item(), batch_size) batch_time.update(time.time() - end) end = time.time() if i % print_freq == 0: res = '\t'.join([ 'Epoch: [%d/%d]' % (epoch + 1, n_epochs), 'Iter: [%d/%d]' % (i + 1, batch_num), 'Time %.3f (%.3f)' % (batch_time.val, batch_time.avg), 'Loss %.4f (%.4f)' % (losses.val, losses.avg), 'Error %.4f (%.4f)' % (error.val, error.avg), ]) print(res) return batch_time.avg, losses.avg, error.avg
def test(val_loader, model, criterion, epoch, use_cuda): global best_acc batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() end = time.time() bar = Bar('Processing', max=len(val_loader)) for batch_idx, (inputs, targets) in enumerate(val_loader): # measure data loading time data_time.update(time.time() - end) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = torch.autograd.Variable(inputs, volatile=True), torch.autograd.Variable(targets) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.data[0], inputs.size(0)) top1.update(prec1[0], inputs.size(0)) top5.update(prec5[0], inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(val_loader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() bar.finish() return (losses.avg, top1.avg)
def train(train_loader, model, criterion, optimizer, epoch, use_cuda): # switch to train mode model.train() torch.set_grad_enabled(True) batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() end = time.time() bar = Bar('Processing', max=len(train_loader)) show_step = len(train_loader) // 10 prefetcher = data_prefetcher(train_loader) inputs, targets = prefetcher.next() batch_idx = -1 while inputs is not None: # for batch_idx, (inputs, targets) in enumerate(train_loader): batch_idx += 1 batch_size = inputs.size(0) if batch_size < args.train_batch: break # measure data loading time data_time.update(time.time() - end) #if use_cuda: # inputs, targets = inputs.cuda(), targets.cuda(async=True) #inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets) # compute output outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) losses.update(loss.data, inputs.size(0)) top1.update(prec1, inputs.size(0)) top5.update(prec5, inputs.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(train_loader), data=data_time.val, bt=batch_time.val, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) if (batch_idx) % show_step == 0: print(bar.suffix) bar.next() inputs, targets = prefetcher.next() bar.finish() return (losses.avg, top1.avg)
def validate(val_loader, net, criterion, optimizer, epoch, train_args, visualize): # the following code is written assuming that batch size is 1 net.eval() val_loss = AverageMeter() gts_all = np.zeros((len(val_loader), args['shorter_size'], 2 * args['shorter_size']), dtype=int) predictions_all = np.zeros((len(val_loader), args['shorter_size'], 2 * args['shorter_size']), dtype=int) for vi, data in enumerate(val_loader): input, gt, slices_info = data assert len(input.size()) == 5 and len(gt.size()) == 4 and len(slices_info.size()) == 3 input.transpose_(0, 1) gt.transpose_(0, 1) slices_info.squeeze_(0) assert input.size()[3:] == gt.size()[2:] count = torch.zeros(args['shorter_size'], 2 * args['shorter_size']).cuda() output = torch.zeros(voc.num_classes, args['shorter_size'], 2 * args['shorter_size']).cuda() slice_batch_pixel_size = input.size(1) * input.size(3) * input.size(4) for input_slice, gt_slice, info in zip(input, gt, slices_info): input_slice = Variable(input_slice).cuda() gt_slice = Variable(gt_slice).cuda() output_slice = net(input_slice) assert output_slice.size()[2:] == gt_slice.size()[1:] assert output_slice.size()[1] == voc.num_classes output[:, info[0]: info[1], info[2]: info[3]] += output_slice[0, :, :info[4], :info[5]].data gts_all[vi, info[0]: info[1], info[2]: info[3]] += gt_slice[0, :info[4], :info[5]].data.cpu().numpy() count[info[0]: info[1], info[2]: info[3]] += 1 val_loss.update(criterion(output_slice, gt_slice).data[0], slice_batch_pixel_size) output /= count gts_all[vi, :, :] /= count.cpu().numpy().astype(int) predictions_all[vi, :, :] = output.max(0)[1].squeeze_(0).cpu().numpy() print('validating: %d / %d' % (vi + 1, len(val_loader))) acc, acc_cls, mean_iu, fwavacc = evaluate(predictions_all, gts_all, voc.num_classes) train_args['best_record']['val_loss'] = val_loss.avg train_args['best_record']['epoch'] = epoch train_args['best_record']['acc'] = acc train_args['best_record']['acc_cls'] = acc_cls train_args['best_record']['mean_iu'] = mean_iu train_args['best_record']['fwavacc'] = fwavacc snapshot_name = 'epoch_%d_loss_%.5f_acc_%.5f_acc-cls_%.5f_mean-iu_%.5f_fwavacc_%.5f_lr_%.10f' % ( epoch, val_loss.avg, acc, acc_cls, mean_iu, fwavacc, optimizer.param_groups[1]['lr']) torch.save(net.state_dict(), os.path.join(ckpt_path, exp_name, snapshot_name + '.pth')) torch.save(optimizer.state_dict(), os.path.join(ckpt_path, exp_name, 'opt_' + snapshot_name + '.pth')) if train_args['val_save_to_img_file']: to_save_dir = os.path.join(ckpt_path, exp_name, str(epoch)) check_mkdir(to_save_dir) val_visual = [] for idx, data in enumerate(zip(gts_all, predictions_all)): gt_pil = voc.colorize_mask(data[0]) predictions_pil = voc.colorize_mask(data[1]) if train_args['val_save_to_img_file']: predictions_pil.save(os.path.join(to_save_dir, '%d_prediction.png' % idx)) gt_pil.save(os.path.join(to_save_dir, '%d_gt.png' % idx)) val_visual.extend([visualize(gt_pil.convert('RGB')), visualize(predictions_pil.convert('RGB'))]) val_visual = torch.stack(val_visual, 0) val_visual = vutils.make_grid(val_visual, nrow=2, padding=5) writer.add_image(snapshot_name, val_visual) print('-----------------------------------------------------------------------------------------------------------') print('[epoch %d], [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f]' % ( epoch, val_loss.avg, acc, acc_cls, mean_iu, fwavacc)) print('best record: [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f], [epoch %d]' % ( train_args['best_record']['val_loss'], train_args['best_record']['acc'], train_args['best_record']['acc_cls'], train_args['best_record']['mean_iu'], train_args['best_record']['fwavacc'], train_args['best_record']['epoch'])) print('-----------------------------------------------------------------------------------------------------------') writer.add_scalar('val_loss', val_loss.avg, epoch) writer.add_scalar('acc', acc, epoch) writer.add_scalar('acc_cls', acc_cls, epoch) writer.add_scalar('mean_iu', mean_iu, epoch) writer.add_scalar('fwavacc', fwavacc, epoch) net.train() return val_loss.avg
def test(val_loader, model, criterion, epoch, use_cuda): global best_acc batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() # switch to evaluate mode model.eval() # torch.set_grad_enabled(False) end = time.time() bar = Bar('Processing', max=len(val_loader)) prefetcher = data_prefetcher(val_loader) inputs, targets = prefetcher.next() batch_idx = -1 while inputs is not None: # for batch_idx, (inputs, targets) in enumerate(val_loader): batch_idx += 1 # measure data loading time data_time.update(time.time() - end) #if use_cuda: # inputs, targets = inputs.cuda(), targets.cuda() #inputs, targets = torch.autograd.Variable(inputs, volatile=True), torch.autograd.Variable(targets) # compute output with torch.no_grad(): outputs = model(inputs) loss = criterion(outputs, targets) # measure accuracy and record loss prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5)) # losses.update(loss.data[0], inputs.size(0)) losses.update(loss.data, inputs.size(0)) #top1.update(prec1[0], inputs.size(0)) top1.update(prec1, inputs.size(0)) #top5.update(prec5[0], inputs.size(0)) top5.update(prec5, inputs.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() # plot progress bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format( batch=batch_idx + 1, size=len(val_loader), data=data_time.avg, bt=batch_time.avg, total=bar.elapsed_td, eta=bar.eta_td, loss=losses.avg, top1=top1.avg, top5=top5.avg, ) bar.next() inputs, targets = prefetcher.next() print(bar.suffix) bar.finish() return (losses.avg, top1.avg)