def train(train_loader, net, criterion, optimizer, epoch, train_args):
    train_loss = AverageMeter()
    curr_iter = (epoch - 1) * len(train_loader)
    for i, data in enumerate(train_loader):
        inputs, labels = data
        assert inputs.size()[2:] == labels.size()[1:]
        N = inputs.size(0)
        inputs = Variable(inputs).cuda()
        labels = Variable(labels).cuda()

        optimizer.zero_grad()
        outputs = net(inputs)
        assert outputs.size()[2:] == labels.size()[1:]
        assert outputs.size()[1] == voc.num_classes

        loss = criterion(outputs, labels) / N
        loss.backward()
        optimizer.step()

        train_loss.update(loss.data[0], N)

        curr_iter += 1
        writer.add_scalar('train_loss', train_loss.avg, curr_iter)

        if (i + 1) % train_args['print_freq'] == 0:
            print('[epoch %d], [iter %d / %d], [train loss %.5f]' % (
                epoch, i + 1, len(train_loader), train_loss.avg
            ))
예제 #2
0
def validate(val_loader, model, criterion, log):
  losses = AverageMeter()
  top1 = AverageMeter()
  top5 = AverageMeter()

  # switch to evaluate mode
  model.eval()

  for i, (input, target) in enumerate(val_loader):
    if args.use_cuda:
      target = target.cuda(async=True)
      input = input.cuda()
    input_var = torch.autograd.Variable(input, volatile=True)
    target_var = torch.autograd.Variable(target, volatile=True)

    # compute output
    output = model(input_var)
    loss = criterion(output, target_var)

    # measure accuracy and record loss
    prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
    losses.update(loss.data[0], input.size(0))
    top1.update(prec1[0], input.size(0))
    top5.update(prec5[0], input.size(0))

  print_log('  **Test** Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f} Error@1 {error1:.3f}'.format(top1=top1, top5=top5, error1=100-top1.avg), log)

  return top1.avg, losses.avg
def main(args):
    # Parse device ids
    default_dev, *parallel_dev = parse_devices(args.devices)
    all_devs = parallel_dev + [default_dev]
    all_devs = [x.replace('gpu', '') for x in all_devs]
    all_devs = [int(x) for x in all_devs]
    nr_devs = len(all_devs)

    with open(args.list_val, 'r') as f:
        lines = f.readlines()
        nr_files = len(lines)
        if args.num_val > 0:
            nr_files = min(nr_files, args.num_val)
    nr_files_per_dev = math.ceil(nr_files / nr_devs)

    pbar = tqdm(total=nr_files)

    acc_meter = AverageMeter()
    intersection_meter = AverageMeter()
    union_meter = AverageMeter()

    result_queue = Queue(500)
    procs = []
    for dev_id in range(nr_devs):
        start_idx = dev_id * nr_files_per_dev
        end_idx = min(start_idx + nr_files_per_dev, nr_files)
        proc = Process(target=worker, args=(args, dev_id, start_idx, end_idx, result_queue))
        print('process:%d, start_idx:%d, end_idx:%d' % (dev_id, start_idx, end_idx))
        proc.start()
        procs.append(proc)

    # master fetches results
    processed_counter = 0
    while processed_counter < nr_files:
        if result_queue.empty():
            continue
        (acc, pix, intersection, union) = result_queue.get()
        acc_meter.update(acc, pix)
        intersection_meter.update(intersection)
        union_meter.update(union)
        processed_counter += 1
        pbar.update(1)

    for p in procs:
        p.join()

    iou = intersection_meter.sum / (union_meter.sum + 1e-10)
    for i, _iou in enumerate(iou):
        print('class [{}], IoU: {}'.format(i, _iou))

    print('[Eval Summary]:')
    print('Mean IoU: {:.4}, Accuracy: {:.2f}%'
          .format(iou.mean(), acc_meter.average()*100))

    print('Evaluation Done!')
예제 #4
0
def validate(val_loader, model, i, silence=False):
    batch_time = AverageMeter()
    coco_gt = val_loader.dataset.coco
    coco_pred = COCO()
    coco_pred.dataset['images'] = [img for img in coco_gt.datasets['images']]
    coco_pred.dataset['categories'] = copy.deepcopy(coco_gt.dataset['categories'])
    id = 0

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for i, (inputs, anns) in enumerate(val_loader):

        # forward images one by one (TODO: support batch mode later, or
        # multiprocess)
        for j, input in enumerate(inputs):
            input_anns= anns[j] # anns of this input
            gt_bbox= np.vstack([ann['bbox'] + [ann['ordered_id']] for ann in input_anns])
            im_info= [[input.size(1), input.size(2),
                        input_anns[0]['scale_ratio']]]
            input_var= Variable(input.unsqueeze(0),
                                 requires_grad=False).cuda()

            cls_prob, bbox_pred, rois = model(input_var, im_info)
            scores, pred_boxes = model.interpret_outputs(cls_prob, bbox_pred, rois, im_info)
            print(scores, pred_boxes)
            # for i in range(scores.shape[0]):


        # measure elapsed time
        batch_time.update(time.time() - end)
        end= time.time()

    coco_pred.createIndex()
    coco_eval = COCOeval(coco_gt, coco_pred, 'bbox')
    coco_eval.params.imgIds= sorted(coco_gt.getImgIds())
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    print('iter: [{0}] '
          'Time {batch_time.avg:.3f} '
          'Val Stats: {1}'
          .format(i, coco_eval.stats,
                  batch_time=batch_time))

    return coco_eval.stats[0]
def train(train_loader, net, criterion, optimizer, curr_epoch, train_args, val_loader, visualize):
    while True:
        train_main_loss = AverageMeter()
        train_aux_loss = AverageMeter()
        curr_iter = (curr_epoch - 1) * len(train_loader)
        for i, data in enumerate(train_loader):
            optimizer.param_groups[0]['lr'] = 2 * train_args['lr'] * (1 - float(curr_iter) / train_args['max_iter']
                                                                      ) ** train_args['lr_decay']
            optimizer.param_groups[1]['lr'] = train_args['lr'] * (1 - float(curr_iter) / train_args['max_iter']
                                                                  ) ** train_args['lr_decay']

            inputs, gts, _ = data
            assert len(inputs.size()) == 5 and len(gts.size()) == 4
            inputs.transpose_(0, 1)
            gts.transpose_(0, 1)

            assert inputs.size()[3:] == gts.size()[2:]
            slice_batch_pixel_size = inputs.size(1) * inputs.size(3) * inputs.size(4)

            for inputs_slice, gts_slice in zip(inputs, gts):
                inputs_slice = Variable(inputs_slice).cuda()
                gts_slice = Variable(gts_slice).cuda()

                optimizer.zero_grad()
                outputs, aux = net(inputs_slice)
                assert outputs.size()[2:] == gts_slice.size()[1:]
                assert outputs.size()[1] == cityscapes.num_classes

                main_loss = criterion(outputs, gts_slice)
                aux_loss = criterion(aux, gts_slice)
                loss = main_loss + 0.4 * aux_loss
                loss.backward()
                optimizer.step()

                train_main_loss.update(main_loss.data[0], slice_batch_pixel_size)
                train_aux_loss.update(aux_loss.data[0], slice_batch_pixel_size)

            curr_iter += 1
            writer.add_scalar('train_main_loss', train_main_loss.avg, curr_iter)
            writer.add_scalar('train_aux_loss', train_aux_loss.avg, curr_iter)
            writer.add_scalar('lr', optimizer.param_groups[1]['lr'], curr_iter)

            if (i + 1) % train_args['print_freq'] == 0:
                print('[epoch %d], [iter %d / %d], [train main loss %.5f], [train aux loss %.5f]. [lr %.10f]' % (
                    curr_epoch, i + 1, len(train_loader), train_main_loss.avg, train_aux_loss.avg,
                    optimizer.param_groups[1]['lr']))
            if curr_iter >= train_args['max_iter']:
                return
            if curr_iter % train_args['val_freq'] == 0:
                validate(val_loader, net, criterion, optimizer, curr_epoch, i + 1, train_args, visualize)
        curr_epoch += 1
예제 #6
0
def test(data_loader, model, opt, class_names):
    print('test')

    model.eval()

    batch_time = AverageMeter()
    data_time = AverageMeter()

    end_time = time.time()
    output_buffer = []
    previous_video_id = ''
    test_results = {'results': {}}
    for i, (inputs, targets) in enumerate(data_loader):
        data_time.update(time.time() - end_time)

        inputs = Variable(inputs, volatile=True)
        outputs = model(inputs)
        if not opt.no_softmax_in_test:
            outputs = F.softmax(outputs)

        for j in range(outputs.size(0)):
            if not (i == 0 and j == 0) and targets[j] != previous_video_id:
                calculate_video_results(output_buffer, previous_video_id,
                                        test_results, class_names)
                output_buffer = []
            output_buffer.append(outputs[j].data.cpu())
            previous_video_id = targets[j]

        if (i % 100) == 0:
            with open(
                    os.path.join(opt.result_path, '{}.json'.format(
                        opt.test_subset)), 'w') as f:
                json.dump(test_results, f)

        batch_time.update(time.time() - end_time)
        end_time = time.time()

        print('[{}/{}]\t'
              'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
              'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(
                  i + 1,
                  len(data_loader),
                  batch_time=batch_time,
                  data_time=data_time))
    with open(
            os.path.join(opt.result_path, '{}.json'.format(opt.test_subset)),
            'w') as f:
        json.dump(test_results, f)
예제 #7
0
def train(train_loader, model, optimizer, start_iter, num_iters):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    total_losses = AverageMeter()
    rpn_losses = AverageMeter()
    odn_losses = AverageMeter()
    rpn_ce_losses = AverageMeter()
    rpn_box_losses = AverageMeter()
    odn_ce_losses = AverageMeter()
    odn_box_losses = AverageMeter()

    # switch to train mode
    end_iter = start_iter + num_iters - 1
    model.train()

    end = time.time()
    # for i in range(start_iter, start_iter + num_iters):
    for i, (inputs, anns) in enumerate(train_loader):
        i += start_iter
        # get minibatch
        # inputs, anns = next(train_loader)
        lr = adjust_learning_rate(optimizer, args.lr, args.decay_rate,
                                  i, args.niters)  # TODO: add custom
        # measure data loading time
        data_time.update(time.time() - end)

        optimizer.zero_grad()
        # forward images one by one (TODO: support batch mode later, or
        # multiprocess)
        for j, input in enumerate(inputs):
            input_anns = anns[j]  # anns of this input
            if len(input_anns) == 0:
                continue
            gt_bbox = np.vstack([ann['bbox'] + [ann['ordered_id']] for ann in input_anns])
            im_info= [[input.size(1), input.size(2),
                        input_anns[0]['scale_ratio']]]
            input_var= torch.autograd.Variable(input.unsqueeze(0).cuda(),
                                 requires_grad=False)

            cls_prob, bbox_pred, rois= model(input_var, im_info, gt_bbox)
            loss= model.loss
            loss.backward()
            # record loss
            total_losses.update(loss.data[0], input_var.size(0))
            rpn_losses.update(model.rpn.loss.data[0], input_var.size(0))
            rpn_ce_losses.update(
                model.rpn.cross_entropy.data[0], input_var.size(0))
            rpn_box_losses.update(
                model.rpn.loss_box.data[0], input_var.size(0))
            odn_losses.update(model.odn.loss.data[0], input_var.size(0))
            odn_ce_losses.update(
                model.odn.cross_entropy.data[0], input_var.size(0))
            odn_box_losses.update(
                model.odn.loss_box.data[0], input_var.size(0))

        # do SGD step
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if args.print_freq > 0 and (i + 1) % args.print_freq == 0:
            print('iter: [{0}] '
                  'Time {batch_time.val:.3f} '
                  'Data {data_time.val:.3f} '
                  'Loss {total_losses.val:.4f} '
                  'RPN {rpn_losses.val:.4f} '
                  '{rpn_ce_losses.val:.4f} '
                  '{rpn_box_losses.val:.4f} '
                  'ODN {odn_losses.val:.4f} '
                  '{odn_ce_losses.val:.4f} '
                  '{odn_box_losses.val:.4f} '
                  .format(i, batch_time=batch_time,
                          data_time=data_time,
                          total_losses=total_losses,
                          rpn_losses=rpn_losses,
                          rpn_ce_losses=rpn_ce_losses,
                          rpn_box_losses=rpn_box_losses,
                          odn_losses=odn_losses,
                          odn_ce_losses=odn_ce_losses,
                          odn_box_losses=odn_box_losses))

        del inputs
        del anns
        if i == end_iter:
            break

    print('iter: [{0}-{1}] '
          'Time {batch_time.avg:.3f} '
          'Data {data_time.avg:.3f} '
          'Loss {total_losses.avg:.4f} '
          'RPN {rpn_losses.avg:.4f} '
          '{rpn_ce_losses.avg:.4f} '
          '{rpn_box_losses.avg:.4f} '
          'ODN {odn_losses.avg:.4f} '
          '{odn_ce_losses.avg:.4f} '
          '{odn_box_losses.avg:.4f} '
          .format(start_iter, end_iter,
                  batch_time=batch_time,
                  data_time=data_time,
                  total_losses=total_losses,
                  rpn_losses=rpn_losses,
                  rpn_ce_losses=rpn_ce_losses,
                  rpn_box_losses=rpn_box_losses,
                  odn_losses=odn_losses,
                  odn_ce_losses=odn_ce_losses,
                  odn_box_losses=odn_box_losses))

    if args.tensorboard:
        log_value('train_total_loss', total_losses.avg, end_iter)
        log_value('train_rpn_loss', rpn_losses.avg, end_iter)
        log_value('train_rpn_ce_loss', rpn_ce_losses.avg, end_iter)
        log_value('train_rpn_box_loss', rpn_box_losses.avg, end_iter)
        log_value('train_odn_loss', odn_losses.avg, end_iter)
        log_value('train_odn_ce_loss', odn_ce_losses.avg, end_iter)
        log_value('train_odn_box_loss', odn_box_losses.avg, end_iter)
    return total_losses.avg
예제 #8
0
def train(trainloader, model, criterion, optimizer, epoch, use_cuda):
    # switch to train mode
    model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        # measure data loading time
        data_time.update(time.time() - end)
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda(async=True)
        inputs, targets = torch.autograd.Variable(
            inputs), torch.autograd.Variable(targets)
        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        # measure accuracy and record loss
        prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
        losses.update(loss.item(), inputs.size(0))
        top1.update(prec1.item(), inputs.size(0))
        top5.update(prec5.item(), inputs.size(0))
        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        progress_bar(
            batch_idx, len(trainloader),
            'Loss: %.2f | Top1: %.2f | Top5: %.2f' %
            (losses.avg, top1.avg, top5.avg))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

    return (losses.avg, top1.avg, top5.avg)
예제 #9
0
if args.other_distill is not None:
    if args.other_distill == 'AT':
        criterion = Attention()
        weight = 1000
    elif args.other_distill == 'SP':
        criterion = Similarity()
        weight = 3000

best_acc = 0
counter = torch.zeros(args.epoch, 50000).cuda()
epoch = 0

for epoch in range(args.epoch):

    record = {name: AverageMeter() for name in items}
    center = cal_center(val_loader, args, s_model)
    for f**k, (x, y, k) in enumerate(train_loader):

        s_model.train()

        x = x.cuda()
        y = y.cuda()
        k = k.cuda()
        with torch.no_grad():
            s_feats, logits = s_model(x, is_feat=True)
        probs = F.softmax(logits, dim=1)

        # confidence
        conf = probs.max(dim=1)[0]
        # margin
예제 #10
0
def validate(val_loader, model, criterion, args, logger, writer, epoch,
             local_rank):
    batch_times = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')  # 4e表示科学记数法中的4位小数
    top1 = AverageMeter('Acc@1', ':6.2f')

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            images = images.cuda(local_rank, non_blocking=True)
            target = target.cuda(local_rank, non_blocking=True)

            # compute output
            output = model(images)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1 = accuracy(output, target, 1)

            # DDP: data synchronization
            dist.barrier()
            reduced_loss = reduce_mean(loss, args.nprocs)
            reduced_acc1 = reduce_mean(acc1, args.nprocs)

            losses.update(reduced_loss.item(), images.size(0))
            top1.update(reduced_acc1, images.size(0))

            # measure elapsed time
            batch_time = time.time() - end
            batch_times.update(batch_time)
            end = time.time()

            if i % args.print_freq == 0:
                ddp_print(
                    'Val epoch: [{:d}/{:d}][{:d}/{:d}]\tce_loss={:.4f}\ttop1_acc={:.4f}\tbatch_time={:6.3f}s'
                    .format(epoch, args.epochs, i, len(val_loader), losses.avg,
                            top1.avg, batch_times.avg), logger, local_rank)
            break

        ddp_print(
            '||==> Val epoch: [{:d}/{:d}]\tce_loss={:.4f}\ttop1_acc={:.4f}\tbatch_time={:6.3f}s'
            .format(epoch, args.epochs, losses.avg, top1.avg,
                    batch_times.avg), logger, local_rank)

        if args.local_rank == 0:
            # save tensorboard
            writer.add_scalar('Val_ce_loss', losses.avg, epoch)
            writer.add_scalar('Val_top1_accuracy', top1.avg, epoch)

        return top1.avg
예제 #11
0
def test(testloader, model, criterion, epoch, use_cuda):
    global best_acc
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    for batch_idx, (inputs, targets) in enumerate(testloader):
        if use_cuda:
            inputs, targets = inputs.cuda(local_rank), targets.cuda(local_rank)
        inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets)

        # compute output
        with torch.no_grad():
            outputs = model(inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
        losses.update(loss.data.item(), inputs.size(0))
        top1.update(prec1.item(), inputs.size(0))
        top5.update(prec5.item(), inputs.size(0))
    return (losses.avg, top1.avg)
예제 #12
0
    def train(self, data_loader, epoch, args, result_dict):
        total_loss = 0
        count = 0

        losses = AverageMeter()
        top1 = AverageMeter()

        self.model.train()

        for batch_idx, (inputs, labels) in enumerate(data_loader):
            inputs, labels = inputs.cuda(), labels.cuda()

            if args.amp:
                with torch.cuda.amp.autocast():
                    outputs = self.model(inputs)
                    loss = self.criterion(outputs, labels)
            else:
                outputs = self.model(inputs)
                loss = self.criterion(outputs, labels)

            if len(labels.size()) > 1:
                labels = torch.argmax(labels, axis=1)

            prec1, prec3 = accuracy(outputs.data, labels, topk=(1, 3))
            losses.update(loss.item(), inputs.size(0))
            top1.update(prec1.item(), inputs.size(0))

            self.optimizer.zero_grad()

            if args.amp:
                self.scaler.scale(loss).backward()
                self.scaler.step(self.optimizer)
                self.scaler.update()
            else:
                loss.backward()
                self.optimizer.step()

            total_loss += loss.tolist()
            count += labels.size(0)

            if batch_idx % args.log_interval == 0:
                _s = str(len(str(len(data_loader.sampler))))
                ret = [
                    ('epoch: {:0>3} [{: >' + _s + '}/{} ({: >3.0f}%)]').format(
                        epoch, count, len(data_loader.sampler),
                        100 * count / len(data_loader.sampler)),
                    'train_loss: {: >4.2e}'.format(total_loss / count),
                    'train_accuracy : {:.2f}%'.format(top1.avg)
                ]
                print(', '.join(ret))

            if not args.decay_type == 'swa':
                self.scheduler.step()
            else:
                if epoch <= args.swa_start:
                    self.scheduler.step()

        if epoch > args.swa_start and args.decay_type == 'swa':
            self.swa_model.update_parameters(self.model)
            self.swa_scheduler.step()

        result_dict['train_loss'].append(losses.avg)
        result_dict['train_acc'].append(top1.avg)

        return result_dict
예제 #13
0
def eval_model_verbose(model,
                       test_loader,
                       decoder,
                       cuda,
                       out_path,
                       item_info_array,
                       warmups=0,
                       meta=False):
    """
    Model evaluation -- used during inference.

    returns wer, cer, batch time array and warm up time
    """
    # Warming up
    end = time.time()
    total_trials = len(test_loader)
    for i, data in enumerate(test_loader):
        if i >= warmups:
            break
        sys.stdout.write("\rWarmups ({}/{}) ".format(i+1, warmups))
        sys.stdout.flush()
        if meta:
            inputs, targets, input_percentages, target_sizes, batch_meta, item_meta = data
        else:
            inputs, targets, input_percentages, target_sizes = data
        inputs = torch.autograd.Variable(inputs, volatile=False)

        # unflatten targets
        split_targets = []
        offset = 0
        for size in target_sizes:
            split_targets.append(targets[offset:offset + size])
            offset += size

        if cuda:
            inputs = inputs.cuda()

        out = model(inputs)
    warmup_time = time.time() - end
    if warmups > 0: print("Warmed up in {}s").format(warmup_time)

    # the actual inference trial
    total_cer, total_wer = 0, 0
    word_count, char_count = 0, 0
    model.eval()
    batch_time = AverageMeter()

    # For each batch in the test_loader, make a prediction and calculate the WER CER
    item_num = 1
    with open(out_path, 'wb') as f:
        csvwriter = csv.DictWriter(f, fieldnames=csv_header)
        csvwriter.writeheader()
        for i, data in enumerate(test_loader):
            batch_num = i + 1
            if meta:
                inputs, targets, input_percentages, target_sizes, batch_meta, item_meta = data
            else:
                inputs, targets, input_percentages, target_sizes = data

            inputs = torch.autograd.Variable(inputs, volatile=False)

            # unflatten targets
            split_targets = []
            offset = 0
            for size in target_sizes:
                split_targets.append(targets[offset:offset + size])
                offset += size

            if cuda:
                inputs = inputs.cuda()
            end = time.time()  # Timing start (Inference only)
            out = model(inputs)
            batch_time.update(time.time() - end)  # Timing end (Inference only)
            out = out.transpose(0, 1)  # TxNxH
            seq_length = out.size(0)
            sizes = input_percentages.mul_(int(seq_length)).int()

            # Decode the ouput to actual strings and compare to label
            # Get the LEV score and the word, char count
            decoded_output = decoder.decode(out.data, sizes)
            target_strings = decoder.process_strings(decoder.convert_to_strings(split_targets))
            batch_we = batch_wc = batch_ce = batch_cc = 0
            for x in range(len(target_strings)):
                this_we = decoder.wer(decoded_output[x], target_strings[x])
                this_ce = decoder.cer(decoded_output[x], target_strings[x])
                this_wc = len(target_strings[x].split())
                this_cc = len(target_strings[x])
                this_pred = decoded_output[x]
                this_true = target_strings[x]
                if item_num <= len(item_info_array):
                    item_latency = item_info_array[item_num - 1]['batch_latency']
                else:
                    item_latency = "-9999"

                out_data = [batch_num,
                            batch_time.array[-1],
                            batch_meta[2],
                            batch_meta[4],
                            batch_meta[3],
                            item_num,
                            item_latency,
                            item_meta[x][2],
                            item_meta[x][4],
                            item_meta[x][3],
                            this_wc, this_cc,
                            this_we, this_ce,
                            this_pred, this_true]

                csv_dict = {k:v for k, v in zip(csv_header, out_data)}
                csvwriter.writerow(csv_dict)

                item_num += 1
                batch_we += this_we
                batch_ce += this_ce
                batch_wc += this_wc
                batch_cc += this_cc

            total_wer += batch_we
            total_cer += batch_ce
            word_count += batch_wc
            char_count += batch_cc

            print('[{0}/{1}]\t'
                  'Batch: latency (running average) {batch_time.val:.4f} ({batch_time.avg:.3f})\t\t'
                  'WER {2:.1f} \t CER {3:.1f}'
                  .format((i + 1), total_trials,
                          batch_we / float(batch_wc),
                          batch_ce / float(batch_cc),
                          batch_time=batch_time))
            if cuda:
                torch.cuda.synchronize()
            del out

    # WER, CER
    wer = total_wer / float(word_count)
    cer = total_cer / float(char_count)
    wer *= 100
    cer *= 100

    return wer, cer, batch_time, warmup_time
예제 #14
0
def train(epoch, model, criterion_xent, criterion_htri, optimizer, trainloader,
          use_gpu):

    losses = AverageMeter()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    # add two meters
    xent_losses = AverageMeter()
    htri_losses = AverageMeter()
    #global_losses = AverageMeter()
    #local_losses = AverageMeter()

    model.train()

    end = time.time()
    for batch_idx, (imgs, pids, _) in enumerate(trainloader):
        if use_gpu:
            imgs, pids = imgs.cuda(), pids.cuda()

        # measure data loading time
        data_time.update(time.time() - end)

        outputs, features = model(imgs)
        if args.htri_only:
            if isinstance(features, tuple):
                loss = DeepSupervision(criterion_htri, features, pids)
            else:
                loss = criterion_htri(features, pids)
        else:
            if isinstance(outputs, tuple):
                xent_loss = DeepSupervision(criterion_xent, outputs, pids)
            else:
                xent_loss = criterion_xent(outputs, pids)  # use this one

            if isinstance(features, tuple):
                htri_loss = DeepSupervision(criterion_htri, features, pids)
            else:
                htri_loss = criterion_htri(features, pids)  # use this one

            loss = xent_loss + htri_loss  # use this one
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        losses.update(loss.item(), pids.size(0))
        xent_losses.update(xent_loss.item(), pids.size(0))
        htri_losses.update(htri_loss.item(), pids.size(0))

        if (batch_idx + 1) % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'CLoss {xent_loss.val:.4f} ({xent_loss.avg:.4f})\t'
                  'MLoss {htri_loss.val:.4f} ({htri_loss.avg:.4f})\t'.format(
                      epoch + 1,
                      batch_idx + 1,
                      len(trainloader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      xent_loss=xent_losses,
                      htri_loss=htri_losses))
예제 #15
0
def test(model, queryloader, galleryloader, use_gpu, ranks=[1, 5, 10, 20]):
    batch_time = AverageMeter()

    model.eval()

    with torch.no_grad():
        qf, q_pids, q_camids = [], [], []
        for batch_idx, (imgs, pids, camids) in enumerate(queryloader):
            if use_gpu:
                imgs = imgs.cuda()

            end = time.time()
            features = model(imgs)
            batch_time.update(time.time() - end)

            features = features.data.cpu()
            qf.append(features)
            q_pids.extend(pids)
            q_camids.extend(camids)
        qf = torch.cat(qf, 0)
        q_pids = np.asarray(q_pids)
        q_camids = np.asarray(q_camids)

        print("Extracted features for query set, obtained {}-by-{} matrix".
              format(qf.size(0), qf.size(1)))

        gf, g_pids, g_camids = [], [], []
        for batch_idx, (imgs, pids, camids) in enumerate(galleryloader):
            if use_gpu:
                imgs = imgs.cuda()

            end = time.time()
            features = model(imgs)
            batch_time.update(time.time() - end)

            features = features.data.cpu()
            gf.append(features)
            g_pids.extend(pids)
            g_camids.extend(camids)
        gf = torch.cat(gf, 0)
        g_pids = np.asarray(g_pids)
        g_camids = np.asarray(g_camids)

        print("Extracted features for gallery set, obtained {}-by-{} matrix".
              format(gf.size(0), gf.size(1)))

    print("==> BatchTime(s)/BatchSize(img): {:.3f}/{}".format(
        batch_time.avg, args.test_batch))

    m, n = qf.size(0), gf.size(0)
    distmat = torch.pow(qf, 2).sum(dim=1, keepdim=True).expand(m, n) + \
              torch.pow(gf, 2).sum(dim=1, keepdim=True).expand(n, m).t()
    distmat.addmm_(1, -2, qf, gf.t())
    distmat = distmat.numpy()

    print("Computing CMC and mAP")
    cmc, mAP = evaluate(distmat,
                        q_pids,
                        g_pids,
                        q_camids,
                        g_camids,
                        use_metric_cuhk03=args.use_metric_cuhk03)

    print("Results ----------")
    print("mAP: {:.1%}".format(mAP))
    print("CMC curve")
    for r in ranks:
        print("Rank-{:<3}: {:.1%}".format(r, cmc[r - 1]))
    print("------------------")

    return cmc[0]
예제 #16
0
def valid(valid_loader, model, criterions, logger):
    model.eval()  # eval mode (dropout and batchnorm is NOT used)

    losses = AverageMeter()

    reg_losses = AverageMeter()
    expression_accs = AverageMeter()
    gender_accs = AverageMeter()
    glasses_accs = AverageMeter()
    race_accs = AverageMeter()

    L1Loss, CrossEntropyLoss = criterions

    # Batches
    for i, (img, reg, expression, gender, glasses,
            race) in enumerate(valid_loader):
        # Move to GPU, if available
        img = img.to(device)
        reg_label = reg.type(torch.FloatTensor).to(device)  # [N, 5]
        expression_label = expression.type(torch.LongTensor).to(
            device)  # [N, 3]
        gender_label = gender.type(torch.LongTensor).to(device)  # [N, 2]
        glasses_label = glasses.type(torch.LongTensor).to(device)  # [N, 3]
        race_label = race.type(torch.LongTensor).to(device)  # [N, 4]

        # Forward prop.
        reg_out, expression_out, gender_out, glasses_out, race_out = model(img)

        # Calculate loss
        reg_loss = L1Loss(reg_out, reg_label) * loss_ratio
        expression_loss = CrossEntropyLoss(expression_out, expression_label)
        gender_loss = CrossEntropyLoss(gender_out, gender_label)
        glasses_loss = CrossEntropyLoss(glasses_out, glasses_label)
        race_loss = CrossEntropyLoss(race_out, race_label)

        loss = reg_loss + expression_loss + gender_loss + glasses_loss + race_loss

        # Keep track of metrics
        losses.update(loss.item())

        reg_losses.update(reg_loss.item())
        expression_accuracy = accuracy(expression_out, expression_label)
        expression_accs.update(expression_accuracy)
        gender_accuracy = accuracy(gender_out, gender_label)
        gender_accs.update(gender_accuracy)
        glasses_accuracy = accuracy(glasses_out, glasses_label)
        glasses_accs.update(glasses_accuracy)
        race_accuracy = accuracy(race_out, race_label)
        race_accs.update(race_accuracy)

    # Print status
    status = 'Validation: Loss {loss.avg:.4f}\t' \
             'Reg Loss {reg_loss.val:.4f} ({reg_loss.avg:.4f})\t' \
             'Expression Accuracy {expression_acc.val:.4f} ({expression_acc.avg:.4f})\t' \
             'Gender Accuracy {gender_acc.val:.4f} ({gender_acc.avg:.4f})\t' \
             'Glasses Accuracy {expression_acc.val:.4f} ({expression_acc.avg:.4f})\t' \
             'Race Accuracy {expression_acc.val:.4f} ({expression_acc.avg:.4f})\n'.format(loss=losses,
                                                                                          reg_loss=reg_losses,
                                                                                          expression_acc=expression_accs,
                                                                                          gender_acc=gender_accs,
                                                                                          glasses_acc=glasses_accs,
                                                                                          race_acc=race_accs)

    logger.info(status)

    return losses.avg
예제 #17
0
def trainModel(data_loader,
               model,
               normalizer_global,
               normalizer_local,
               epoch=None,
               evaluation=False,
               testing=False):
    """
	The function to train/test the model for one epoch. Also, writes the test results to a file 'test_results.csv' in the end

	Parameters
	----------
	data_loader         : The data iterator to generate batches
	model               : The model to train
	normalizer_global   : The normalizer for global gdt targets
	normalizer_local    : The normalizer for local lddt targets
	epoch               : The current epoch
	evaluation          : (bool) Denotes if the model is in eval mode (True for both testing and validation)
	testing             : (bool) Denotes if the model is in test mode (True only while testing)

	Returns
	-------
	avg_errors_global   : The average global MAE error
	avg_errors_local    : The average local MAE error
	losses              : The average MSE loss
	"""
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    avg_errors_global = AverageMeter()
    avg_errors_local = AverageMeter()

    # placeholders to store results to write to file
    if testing:
        test_targets_global = []
        test_preds_global = []
        test_targets_local = []
        test_preds_local = []
        test_cif_ids = []
        test_amino_crystal = []

    end = time.time()

    for protein_batch_iter, (input_data, batch_data,
                             target_tuples) in enumerate(data_loader):
        batch_protein_ids = batch_data[0]
        batch_amino_crystal = batch_data[1]
        batch_size = len(batch_protein_ids)

        # measure data loading time
        data_time.update(time.time() - end)

        # move inputs and targets to cuda
        input_var, target_var = getInputs(input_data, target_tuples,
                                          normalizer_global, normalizer_local)

        if not evaluation and not testing:
            # Switch to train mode
            model.train()

            out = model(input_var)
            out = model.module.mask_remove(out)
            assert out[1].shape[0] == target_var[1].shape[
                0], "Predicted Outputs Amino & Target Outputs Amino don't match"
            model.module.fit(out, target_var, batch_protein_ids)
        else:
            # evaluate one iteration
            with torch.no_grad():
                # Switch to evaluation mode
                model.eval()
                predicted = model(input_var)
                predicted = model.module.mask_remove(predicted)
                assert predicted[1].shape[0] == target_var[1].shape[
                    0], "Predicted Outputs Amino & Target Outputs Amino don't match"
                model.module.fit(predicted,
                                 target_var,
                                 batch_protein_ids,
                                 pred=True)

        # Calculate the accuracy between the denormalized values
        model.module.accuracy[
            0] = model.module.accuracy[0] * normalizer_global.std
        model.module.accuracy[
            1] = model.module.accuracy[1] * normalizer_local.std

        # measure accuracy and record loss
        losses.update(model.module.loss.item(), batch_size)
        avg_errors_global.update(model.module.accuracy[0].item(), batch_size)
        avg_errors_local.update(model.module.accuracy[1].item(), batch_size)

        # Collect all the results that needs to be written to file
        if testing and batch_size != 1:
            test_pred_global = normalizer_global.denorm(
                model.module.outputs[0].data).squeeze().tolist()
            test_target_global = target_tuples[0].squeeze()
            test_preds_global += test_pred_global
            test_targets_global += test_target_global.tolist()

            test_amino_crystal += batch_amino_crystal.tolist()
            test_pred_local = normalizer_local.denorm(
                model.module.outputs[1].data).squeeze().tolist()
            test_target_local = target_tuples[1].squeeze().tolist()

            res1, res2 = OrderedDict(), OrderedDict()
            for i, idx in enumerate(batch_amino_crystal):
                if idx not in res1: res1[idx] = []
                if idx not in res2: res2[idx] = []
                res1[idx].append(test_target_local[i])
                res2[idx].append(test_pred_local[i])

            test_target_local = [v for _, v in res1.items()]
            test_pred_local = [v for _, v in res2.items()]

            test_preds_local += test_pred_local
            test_targets_local += test_target_local
            test_cif_ids += batch_protein_ids

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # print progress between steps
        if protein_batch_iter % args.print_freq == 0:
            if evaluation or testing:
                print(
                    'Test: [{0}][{1}]/{2}\t'
                    'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                    'ERRG {avg_errors_global.val:.3f} ({avg_errors_global.avg:.3f})\t'
                    'ERRL {avg_errors_local.val:.3f} ({avg_errors_local.avg:.3f})'
                    .format(epoch,
                            protein_batch_iter,
                            len(data_loader),
                            batch_time=batch_time,
                            loss=losses,
                            avg_errors_global=avg_errors_global,
                            avg_errors_local=avg_errors_local))
            else:
                print(
                    'Epoch: [{0}][{1}]/{2}\t'
                    'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                    'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                    'ERRG {avg_errors_global.val:.3f} ({avg_errors_global.avg:.3f})\t'
                    'ERRL {avg_errors_local.val:.3f} ({avg_errors_local.avg:.3f})'
                    .format(epoch,
                            protein_batch_iter,
                            len(data_loader),
                            batch_time=batch_time,
                            data_time=data_time,
                            loss=losses,
                            avg_errors_global=avg_errors_global,
                            avg_errors_local=avg_errors_local))

        if protein_batch_iter % args.print_freq == 0:
            clearCache()

    # write results to file
    if testing:
        star_label = '**'
        with open(savepath + 'test_results.csv', 'w') as f:
            writer = csv.writer(f)
            for cif_id, targets_global, preds_global, targets_local, preds_local in zip(
                    test_cif_ids, test_targets_global, test_preds_global,
                    test_targets_local, test_preds_local):
                writer.writerow((cif_id, targets_global, preds_global,
                                 targets_local, preds_local))
    elif evaluation:
        star_label = '*'
    else:
        star_label = '##'

    print(
        ' {star} ERRG {avg_errors_global.avg:.3f} ERRL {avg_errors_local.avg:.3f} LOSS {avg_loss.avg:.3f}'
        .format(star=star_label,
                avg_errors_global=avg_errors_global,
                avg_errors_local=avg_errors_local,
                avg_loss=losses))

    return avg_errors_global.avg, avg_errors_local.avg, losses.avg
예제 #18
0
def train(train_loader, model, criterions, optimizer, epoch, logger):
    model.train()  # train mode (dropout and batchnorm is used)

    losses = AverageMeter()

    reg_losses = AverageMeter()
    expression_accs = AverageMeter()
    gender_accs = AverageMeter()
    glasses_accs = AverageMeter()
    race_accs = AverageMeter()

    L1Loss, CrossEntropyLoss = criterions

    # Batches
    for i, (img, reg, expression, gender, glasses,
            race) in enumerate(train_loader):
        # Move to GPU, if available
        img = img.to(device)
        reg_label = reg.type(torch.FloatTensor).to(device)  # [N, 5]
        expression_label = expression.type(torch.LongTensor).to(
            device)  # [N, 3]
        gender_label = gender.type(torch.LongTensor).to(device)  # [N, 2]
        glasses_label = glasses.type(torch.LongTensor).to(device)  # [N, 3]
        race_label = race.type(torch.LongTensor).to(device)  # [N, 4]

        # Forward prop.
        reg_out, expression_out, gender_out, glasses_out, race_out = model(
            img)  # embedding => [N, 17]

        # Calculate loss
        reg_loss = L1Loss(reg_out, reg_label) * loss_ratio
        expression_loss = CrossEntropyLoss(expression_out, expression_label)
        gender_loss = CrossEntropyLoss(gender_out, gender_label)
        glasses_loss = CrossEntropyLoss(glasses_out, glasses_label)
        race_loss = CrossEntropyLoss(race_out, race_label)

        loss = reg_loss + expression_loss + gender_loss + glasses_loss + race_loss

        # Back prop.
        optimizer.zero_grad()
        loss.backward()

        # Clip gradients
        clip_gradient(optimizer, grad_clip)

        # Update weights
        optimizer.step()

        # Keep track of metrics
        losses.update(loss.item())

        reg_losses.update(reg_loss.item())
        expression_accuracy = accuracy(expression_out, expression_label)
        expression_accs.update(expression_accuracy)
        gender_accuracy = accuracy(gender_out, gender_label)
        gender_accs.update(gender_accuracy)
        glasses_accuracy = accuracy(glasses_out, glasses_label)
        glasses_accs.update(glasses_accuracy)
        race_accuracy = accuracy(race_out, race_label)
        race_accs.update(race_accuracy)

        # Print status

        if i % print_freq == 0:
            status = 'Epoch: [{0}][{1}/{2}]\t' \
                     'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \
                     'Reg Loss {reg_loss.val:.4f} ({reg_loss.avg:.4f})\t' \
                     'Expression Accuracy {expression_acc.val:.4f} ({expression_acc.avg:.4f})\t' \
                     'Gender Accuracy {gender_acc.val:.4f} ({gender_acc.avg:.4f})\t' \
                     'Glasses Accuracy {expression_acc.val:.4f} ({expression_acc.avg:.4f})\t' \
                     'Race Accuracy {expression_acc.val:.4f} ({expression_acc.avg:.4f})\t'.format(epoch, i,
                                                                                                  len(train_loader),
                                                                                                  loss=losses,
                                                                                                  reg_loss=reg_losses,
                                                                                                  expression_acc=expression_accs,
                                                                                                  gender_acc=gender_accs,
                                                                                                  glasses_acc=glasses_accs,
                                                                                                  race_acc=race_accs)
            logger.info(status)

    return losses.avg
예제 #19
0
def validate(val_loader, model, criterion, gpu_avail, print_freq, f):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(val_loader):
            if gpu_avail:
                input = input.cuda(non_blocking=True)
                target = target.cuda(non_blocking=True)

            # compute output
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1 = accuracy(output, target, topk=(1, ))
            losses.update(loss.item(), input.size(0))
            top1.update(acc1[0], input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Acc@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                          i,
                          len(val_loader),
                          batch_time=batch_time,
                          loss=losses,
                          top1=top1))
                f.write('Test: [{0}/{1}]\t'
                        'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                        'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                        'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\n'.format(
                            i,
                            len(val_loader),
                            batch_time=batch_time,
                            loss=losses,
                            top1=top1))

        print(' * Acc@1 {top1.avg:.3f}'.format(top1=top1))
        f.write(' * Acc@1 {top1.avg:.3f}\n'.format(top1=top1))

    return top1.avg
예제 #20
0
def train(train_loader, model, criterion, optimizer, epoch, gpu_avail,
          print_freq, f):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if gpu_avail:
            input = input.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # measure accuracy and record loss
        acc1 = accuracy(output, target, topk=(1, ))
        losses.update(loss.item(), input.size(0))
        top1.update(acc1[0], input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Acc@1 {top1.val:.3f} ({top1.avg:.3f})'.format(
                      epoch,
                      i,
                      len(train_loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1))
            f.write('Epoch: [{0}][{1}/{2}]\t'
                    'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                    'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                    'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                    'Acc@1 {top1.val:.3f} ({top1.avg:.3f})\n'.format(
                        epoch,
                        i,
                        len(train_loader),
                        batch_time=batch_time,
                        data_time=data_time,
                        loss=losses,
                        top1=top1))
예제 #21
0
def validate(val_loader, net, criterion, optimizer, epoch, train_args, restore, visualize):
    net.eval()

    val_loss = AverageMeter()
    inputs_all, gts_all, predictions_all = [], [], []

    for vi, data in enumerate(val_loader):
        inputs, gts = data
        N = inputs.size(0)
        inputs = Variable(inputs, volatile=True).cuda()
        gts = Variable(gts, volatile=True).cuda()

        outputs = net(inputs)
        predictions = outputs.data.max(1)[1].squeeze_(1).squeeze_(0).cpu().numpy()

        val_loss.update(criterion(outputs, gts).data[0] / N, N)

        if random.random() > train_args['val_img_sample_rate']:
            inputs_all.append(None)
        else:
            inputs_all.append(inputs.data.squeeze_(0).cpu())
        gts_all.append(gts.data.squeeze_(0).cpu().numpy())
        predictions_all.append(predictions)

    acc, acc_cls, mean_iu, fwavacc = evaluate(predictions_all, gts_all, voc.num_classes)

    if mean_iu > train_args['best_record']['mean_iu']:
        train_args['best_record']['val_loss'] = val_loss.avg
        train_args['best_record']['epoch'] = epoch
        train_args['best_record']['acc'] = acc
        train_args['best_record']['acc_cls'] = acc_cls
        train_args['best_record']['mean_iu'] = mean_iu
        train_args['best_record']['fwavacc'] = fwavacc
        snapshot_name = 'epoch_%d_loss_%.5f_acc_%.5f_acc-cls_%.5f_mean-iu_%.5f_fwavacc_%.5f_lr_%.10f' % (
            epoch, val_loss.avg, acc, acc_cls, mean_iu, fwavacc, optimizer.param_groups[1]['lr']
        )
        torch.save(net.state_dict(), os.path.join(ckpt_path, exp_name, snapshot_name + '.pth'))
        torch.save(optimizer.state_dict(), os.path.join(ckpt_path, exp_name, 'opt_' + snapshot_name + '.pth'))

        if train_args['val_save_to_img_file']:
            to_save_dir = os.path.join(ckpt_path, exp_name, str(epoch))
            check_mkdir(to_save_dir)

        val_visual = []
        for idx, data in enumerate(zip(inputs_all, gts_all, predictions_all)):
            if data[0] is None:
                continue
            input_pil = restore(data[0])
            gt_pil = voc.colorize_mask(data[1])
            predictions_pil = voc.colorize_mask(data[2])
            if train_args['val_save_to_img_file']:
                input_pil.save(os.path.join(to_save_dir, '%d_input.png' % idx))
                predictions_pil.save(os.path.join(to_save_dir, '%d_prediction.png' % idx))
                gt_pil.save(os.path.join(to_save_dir, '%d_gt.png' % idx))
            val_visual.extend([visualize(input_pil.convert('RGB')), visualize(gt_pil.convert('RGB')),
                               visualize(predictions_pil.convert('RGB'))])
        val_visual = torch.stack(val_visual, 0)
        val_visual = vutils.make_grid(val_visual, nrow=3, padding=5)
        writer.add_image(snapshot_name, val_visual)

    print('--------------------------------------------------------------------')
    print('[epoch %d], [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f]' % (
        epoch, val_loss.avg, acc, acc_cls, mean_iu, fwavacc))

    print('best record: [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f], [epoch %d]' % (
        train_args['best_record']['val_loss'], train_args['best_record']['acc'], train_args['best_record']['acc_cls'],
        train_args['best_record']['mean_iu'], train_args['best_record']['fwavacc'], train_args['best_record']['epoch']))

    print('--------------------------------------------------------------------')

    writer.add_scalar('val_loss', val_loss.avg, epoch)
    writer.add_scalar('acc', acc, epoch)
    writer.add_scalar('acc_cls', acc_cls, epoch)
    writer.add_scalar('mean_iu', mean_iu, epoch)
    writer.add_scalar('fwavacc', fwavacc, epoch)
    writer.add_scalar('lr', optimizer.param_groups[1]['lr'], epoch)

    net.train()
    return val_loss.avg
예제 #22
0
def train(train_loader, decoder, criterion_ce, criterion_dis, decoder_optimizer, epoch):
    """
    Performs one epoch's training.
    :param train_loader: DataLoader for training data
    :param decoder: decoder model
    :param criterion_ce: cross entropy loss layer
    :param criterion_dis : discriminative loss layer
    :param decoder_optimizer: optimizer to update decoder's weights
    :param epoch: epoch number
    """

    decoder.train()  # train mode (dropout and batchnorm is used)

    batch_time = AverageMeter()  # forward prop. + back prop. time
    data_time = AverageMeter()  # data loading time
    losses = AverageMeter()  # loss (per word decoded)
    top5accs = AverageMeter()  # top5 accuracy

    start = time.time()

    # Batches
    for i, sample in enumerate(train_loader):
        if scene_graph:
            (obj, rel, caps, caplens, obj_mask, rel_mask, pair_idx) = sample
            obj = obj.to(device)
            rel = rel.to(device)
            obj_mask = obj_mask.to(device)
            rel_mask = rel_mask.to(device)
            pair_idx = pair_idx.to(device)
        else:
            (imgs, caps, caplens) = sample
            imgs = imgs.to(device)
        data_time.update(time.time() - start)

        # Move to GPU, if available
        caps = caps.to(device)
        caplens = caplens.to(device)
        # Forward prop.
        if scene_graph:
                scores, scores_d, caps_sorted, decode_lengths, sort_ind = decoder(object_features=obj,
                                                                                  relation_features=rel,
                                                                                  encoded_captions=caps,
                                                                                  caption_lengths=caplens,
                                                                                  object_mask=obj_mask,
                                                                                  relation_mask=rel_mask,
                                                                                  rel_pair_idx=pair_idx)
        else:
            scores, scores_d, caps_sorted, decode_lengths, sort_ind = decoder(imgs, caps, caplens)

        # Max-pooling across predicted words across time steps for discriminative supervision
        scores_d = scores_d.max(1)[0]

        # Since we decoded starting with <start>, the targets are all words after <start>, up to <end>
        targets = caps_sorted[:, 1:]
        targets_d = torch.zeros(scores_d.size(0), scores_d.size(1)).to(device)
        targets_d.fill_(-1)

        for length in decode_lengths:
            targets_d[:, :length - 1] = targets[:, :length - 1]

        # Remove timesteps that we didn't decode at, or are pads
        # pack_padded_sequence is an easy trick to do this
        scores = pack_padded_sequence(scores, decode_lengths, batch_first=True, enforce_sorted=True).data
        targets = pack_padded_sequence(targets, decode_lengths, batch_first=True, enforce_sorted=True).data
        #scores, _ = pack_padded_sequence(scores, decode_lengths, batch_first=True)
        #targets, _ = pack_padded_sequence(targets, decode_lengths, batch_first=True)

        # Calculate loss
        loss_d = criterion_dis(scores_d, targets_d.long())
        loss_g = criterion_ce(scores, targets)
        loss = loss_g + (10 * loss_d)

        # Back prop.
        decoder_optimizer.zero_grad()
        loss.backward()

        # Clip gradients when they are getting too large
        torch.nn.utils.clip_grad_norm_(filter(lambda p: p.requires_grad, decoder.parameters()), 0.25)

        # Update weights
        decoder_optimizer.step()

        # Keep track of metrics
        top5 = accuracy(scores, targets, 5)
        losses.update(loss.item(), sum(decode_lengths))
        top5accs.update(top5, sum(decode_lengths))
        batch_time.update(time.time() - start)

        start = time.time()

        # Print status
        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data Load Time {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Top-5 Accuracy {top5.val:.3f} ({top5.avg:.3f})'.format(epoch, i, len(train_loader),
                                                                          batch_time=batch_time,
                                                                          data_time=data_time, loss=losses,
                                                                          top5=top5accs))
예제 #23
0
def train_model(model, criterion, optimizer, log_saver, num_epochs=70):
    since = time.time()
    steps = 0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        for phase in ['train', 'test']:

            loss_meter = AverageMeter()
            acc_meter = AverageMeter()

            if phase == 'train':
                model.train(True)
            else:
                model.train(False)

            for i, data in enumerate(loaders[phase]):
                inputs, labels = data
                if use_gpu:
                    inputs = inputs.cuda()
                    labels = labels.cuda()
                if phase == 'train':
                    inputs = Variable(inputs)
                    labels = Variable(labels)
                else:
                    inputs = Variable(inputs, volatile=True)
                    labels = Variable(labels, volatile=True)

                optimizer.zero_grad()

                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                    steps += 1

                loss_meter.update(loss.data[0], outputs.size(0))
                acc_meter.update(
                    accuracy(outputs.data, labels.data)[-1][0],
                    outputs.size(0))

            epoch_loss = loss_meter.avg
            epoch_error = 1 - acc_meter.avg / 100

            if phase == 'train' and epoch == num_epochs - 1:

                log_saver['train_loss'].append(epoch_loss)
                log_saver['train_error'].append(epoch_error)

            elif phase == 'test' and epoch == num_epochs - 1:

                log_saver['test_loss'].append(epoch_loss)
                log_saver['test_error'].append(epoch_error)

            print('{} Loss: {:.4f} Error: {:.4f}'.format(
                phase, epoch_loss, epoch_error))

        if epoch % 30 == 0 or epoch == num_epochs - 1:
            print('Saving..')
            state = {'net': model, 'epoch': epoch, 'log': log_saver}

            if not os.path.isdir('checkpoint_CNN'):
                os.mkdir('checkpoint_CNN')
            torch.save(
                state, './checkpoint_CNN/ckpt_epoch_{}_{}.t7'.format(
                    epoch, log_saver['num_params'][-1]))

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

    return model, log_saver
예제 #24
0
def validate(val_loader, decoder, criterion_ce, criterion_dis, epoch):
    """
    Performs one epoch's validation.
    :param val_loader: DataLoader for validation data.
    :param decoder: decoder model
    :param criterion_ce: cross entropy loss layer
    :param criterion_dis : discriminative loss layer
    :return: BLEU-4 score
    """
    decoder.eval()  # eval mode (no dropout or batchnorm)

    batch_time = AverageMeter()
    losses = AverageMeter()
    top5accs = AverageMeter()

    start = time.time()

    references = list()  # references (true captions) for calculating BLEU-4 score
    hypotheses = list()  # hypotheses (predictions)

    # Batches
    with torch.no_grad():
        # for i, (imgs, caps, caplens,allcaps) in enumerate(val_loader):
        for i, sample in enumerate(val_loader):

            if i % 5 != 0:
                # only decode every 5th caption, starting from idx 0.
                # this is because the iterator iterates over all captions in the dataset, not all images.
                if i % args.print_freq_val == 0:
                    print('Validation: [{0}/{1}]\t'
                          'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                          'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                          'Top-5 Accuracy {top5.val:.3f} ({top5.avg:.3f})\t'.format(i, len(val_loader),
                                                                                    batch_time=batch_time,
                                                                                    loss=losses, top5=top5accs))
                continue

            if scene_graph:
                (obj, rel, caps, caplens, orig_caps, obj_mask, rel_mask, pair_idx) = sample
                obj = obj.to(device)
                rel = rel.to(device)
                obj_mask = obj_mask.to(device)
                rel_mask = rel_mask.to(device)
                pair_idx = pair_idx.to(device)
            else:
                (imgs, caps, caplens, orig_caps) = sample
                imgs = imgs.to(device)

            # Move to device, if available
            caps = caps.to(device)
            caplens = caplens.to(device)

            # Forward prop.
            if scene_graph:
                scores, scores_d, caps_sorted, decode_lengths, sort_ind = decoder(object_features=obj,
                                                                                  relation_features=rel,
                                                                                  encoded_captions=caps,
                                                                                  caption_lengths=caplens,
                                                                                  object_mask=obj_mask,
                                                                                  relation_mask=rel_mask,
                                                                                  rel_pair_idx=pair_idx)
            else:
                scores, scores_d, caps_sorted, decode_lengths, sort_ind = decoder(imgs, caps, caplens)

            # Max-pooling across predicted words across time steps for discriminative supervision
            scores_d = scores_d.max(1)[0]

            # Since we decoded starting with <start>, the targets are all words after <start>, up to <end>
            targets = caps_sorted[:, 1:]
            targets_d = torch.zeros(scores_d.size(0), scores_d.size(1)).to(device)
            targets_d.fill_(-1)

            for length in decode_lengths:
                targets_d[:, :length - 1] = targets[:, :length - 1]

            # Remove timesteps that we didn't decode at, or are pads
            # pack_padded_sequence is an easy trick to do this
            scores_copy = scores.clone()
            scores = pack_padded_sequence(scores, decode_lengths, batch_first=True, enforce_sorted=True).data
            targets = pack_padded_sequence(targets, decode_lengths, batch_first=True, enforce_sorted=True).data
            #scores, _ = pack_padded_sequence(scores, decode_lengths, batch_first=True)
            #targets, _ = pack_padded_sequence(targets, decode_lengths, batch_first=True)

            # Calculate loss
            loss_d = criterion_dis(scores_d, targets_d.long())
            loss_g = criterion_ce(scores, targets)
            loss = loss_g + (10 * loss_d)

            # Keep track of metrics
            losses.update(loss.item(), sum(decode_lengths))
            top5 = accuracy(scores, targets, 5)
            top5accs.update(top5, sum(decode_lengths))
            batch_time.update(time.time() - start)

            start = time.time()

            if i % args.print_freq_val == 0:
                print('Validation: [{0}/{1}]\t'
                      'Batch Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Top-5 Accuracy {top5.val:.3f} ({top5.avg:.3f})\t'.format(i, len(val_loader),
                                                                                batch_time=batch_time,
                                                                                loss=losses, top5=top5accs))

            # Store references (true captions), and hypothesis (prediction) for each image
            # If for n images, we have n hypotheses, and references a, b, c... for each image, we need -
            # references = [[ref1a, ref1b, ref1c], [ref2a, ref2b], ...], hypotheses = [hyp1, hyp2, ...]

            # References
            assert (len(sort_ind) == 1), "Cannot have batch_size>1 for validation."
            # a reference is a list of lists:
            # [['the', 'cat', 'sat', 'on', 'the', 'mat'], ['a', 'cat', 'on', 'the', 'mat']]
            references.append(orig_caps)

            # Hypotheses
            _, preds = torch.max(scores_copy, dim=2)
            preds = preds.tolist()
            preds_idxs_no_pads = list()
            for j, p in enumerate(preds):
                preds_idxs_no_pads.append(preds[j][:decode_lengths[j]])  # remove pads
                preds_idxs_no_pads = list(map(lambda c: [w for w in c if w not in {word_map['<start>'],
                                                                                   word_map['<pad>']}],
                                              preds_idxs_no_pads))
            temp_preds = list()
            # remove <start> and pads and convert idxs to string
            for hyp in preds_idxs_no_pads:
                temp_preds.append([])
                for w in hyp:
                    assert (not w == word_map['pad']), "Should have removed all pads."
                    if not w == word_map['<start>']:
                        temp_preds[-1].append(word_map_inv[w])
            preds = temp_preds
            hypotheses.extend(preds)
            assert len(references) == len(hypotheses)

    # Calculate BLEU-4 scores
    # bleu4 = corpus_bleu(references, hypotheses)
    # bleu4 = round(bleu4, 4)
    # compute the metrics
    hypotheses_file = os.path.join(args.outdir, 'hypotheses', 'Epoch{:0>3d}.Hypotheses.json'.format(epoch))
    references_file = os.path.join(args.outdir, 'references', 'Epoch{:0>3d}.References.json'.format(epoch))
    create_captions_file(range(len(hypotheses)), hypotheses, hypotheses_file)
    create_captions_file(range(len(references)), references, references_file)
    coco = COCO(references_file)
    # add the predicted results to the object
    coco_results = coco.loadRes(hypotheses_file)
    # create the evaluation object with both the ground-truth and the predictions
    coco_eval = COCOEvalCap(coco, coco_results)
    # change to use the image ids in the results object, not those from the ground-truth
    coco_eval.params['image_id'] = coco_results.getImgIds()
    # run the evaluation
    coco_eval.evaluate(verbose=False, metrics=['bleu', 'meteor', 'rouge', 'cider'])
    # Results contains: "Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4", "METEOR", "ROUGE_L", "CIDEr", "SPICE"
    results = coco_eval.eval
    results['loss'] = losses.avg
    results['top5'] = top5accs.avg

    for k, v in results.items():
        print(k+':\t'+str(v))
    # print('\n * LOSS - {loss.avg:.3f}, TOP-5 ACCURACY - {top5.avg:.3f}, BLEU-4 - {bleu}, CIDEr - {cider}\n'
    #       .format(loss=losses, top5=top5accs, bleu=round(results['Bleu_4'], 4), cider=round(results['CIDEr'], 1)))
    return results
예제 #25
0
def val_epoch(epoch, data_loader, model, criterion, opt, logger, writer):
    print('validation at epoch {}'.format(epoch))
    model.eval()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    accuracies = AverageMeter()
    recalls = AverageMeter()

    writer = writer

    end_time = time.time()
    for i, (inputs, labels) in enumerate(data_loader):
        data_time.update(time.time() - end_time)
        labels = list(map(int, labels))
        inputs = torch.unsqueeze(inputs, 1)
        inputs = inputs.type(torch.FloatTensor)

        if not opt.no_cuda:
            labels = torch.LongTensor(labels).cuda(async=True)
        with torch.no_grad():
            inputs = Variable(inputs)
            labels = Variable(labels)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            acc = calculate_accuracy(outputs, labels)
            recall = calculate_recall(outputs, labels)

        losses.update(loss.data, inputs.size(0))
        accuracies.update(acc, inputs.size(0))
        recalls.update(recall, inputs.size(0))

        batch_time.update(time.time() - end_time)
        end_time = time.time()

        print('Epoch: [{0}][{1}/{2}]\t'
              'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
              'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
              'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
              'Acc {acc.val:.3f} ({acc.avg:.3f})\t'
              'Recall {recall.val:.3f} ({recall.avg:.3f})'.format(
                  epoch,
                  i + 1,
                  len(data_loader),
                  batch_time=batch_time,
                  data_time=data_time,
                  loss=losses,
                  acc=accuracies,
                  recall=recalls))

    logger.log({
        'epoch': epoch,
        'loss': round(losses.avg.item(), 4),
        'acc': round(accuracies.avg.item(), 4),
        'recall': round(recalls.avg.item(), 4)
    })
    writer.add_scalar('val/loss', losses.avg, epoch)
    writer.add_scalar('val/accuracy', accuracies.avg, epoch)
    writer.add_scalar('val/recall', recalls.avg, epoch)

    return losses.avg
예제 #26
0
def train_one_epoch_mixup(train_loader,
                          mix_loader,
                          model,
                          criterion,
                          optimizer,
                          epoch,
                          meters,
                          since,
                          alpha=0.4,
                          log=None):
    losses = AverageMeter()
    f1 = AverageMeter()
    model.train()
    if len(meters['f1']):
        previous_loss = meters['loss'][-1]
        previous_f1 = meters['f1'][-1]
        best_f1_epoch = np.argmax(meters['f1'])
        best_f1_score = meters['f1'][best_f1_epoch]
        best_loss_epoch = np.argmin(meters['loss'])
        best_loss = meters['loss'][best_loss_epoch]
    else:
        best_f1_epoch = 0
        best_f1_score = 0
        best_loss_epoch = 0
        best_loss = 0
        previous_loss = 0
        previous_f1 = 0

    for batch_id, ((x1, y1), (x2,
                              y2)) in enumerate(zip(train_loader, mix_loader)):
        batch_x1 = x1.cuda(non_blocking=True)
        batch_x2 = x2.cuda(non_blocking=True)
        lam = np.random.beta(alpha, alpha)
        batch_x = lam * batch_x1 + (1.0 - lam) * batch_x2
        batch_y1 = torch.Tensor(np.array(y1)).float().cuda(non_blocking=True)
        batch_y2 = torch.Tensor(np.array(y2)).float().cuda(non_blocking=True)
        batch_y = lam * batch_y1 + (1.0 - lam) * batch_y2
        output = model(batch_x)
        loss = criterion(output, batch_y)
        losses.update(loss.item(), batch_x.size(0))
        f1_batch = f1_score(batch_y.cpu() > 0.5,
                            output.sigmoid().cpu() > 0.15,
                            average='macro')
        f1.update(f1_batch, batch_x.size(0))
        optimizer.zero_grad()
        loss.backward()
        if cfg.grident_clip:
            torch.nn.utils.clip_grad_norm(model.parameters(), 1.)
        optimizer.step()
        print('Epoch %3d\t' % epoch,
              'Batch %3d|%3d\t' % (batch_id, len(train_loader)),
              'Loss: %10.5f\t' % losses.avg,
              'Metrics|F1 Score: %10.5f\t' % f1.avg,
              'Previous Loss: %10.5f\t' % previous_loss,
              'Previous F1 Score: %10.5f\t' % previous_f1,
              'Best loss:%10.5f Epoch %3d\t' % (best_loss, best_loss_epoch),
              'Besr F1:%10.5f Epoch %3d\t' % (best_f1_score, best_f1_epoch),
              'Time: %s' % time_to_str((timer() - since), 'min'),
              file=log)

    meters['loss'].append(losses.avg)
    meters['f1'].append(f1.avg)

    return meters
예제 #27
0
def train(train_loader, model, criterion, optimizer, epoch, args, logger,
          writer, local_rank):
    batch_times = AverageMeter('Time', ':6.3f')
    data_times = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')  # 4e表示科学记数法中的4位小数
    top1 = AverageMeter('Acc@1', ':6.2f')

    # switch to train mode
    model.train()
    end = time.time()

    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        data_time = time.time() - end
        data_times.update(data_time)

        images = images.cuda(local_rank, non_blocking=True)
        target = target.cuda(local_rank, non_blocking=True)

        # compute output
        output = model(images)
        loss = criterion(output, target)

        # measure accuracy and record loss
        acc1 = accuracy(output, target, 1)

        # DDP: data synchronization
        dist.barrier()
        reduced_loss = reduce_mean(loss, args.nprocs)
        reduced_acc1 = reduce_mean(acc1, args.nprocs)

        losses.update(reduced_loss.item(), images.size(0))
        top1.update(reduced_acc1, images.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_times.update(time.time() - end)
        end = time.time()

        if i % args.print_freq == 0:
            ddp_print(
                'Train epoch: [{:d}/{:d}][{:d}/{:d}]\tlr={:.6f}\tce_loss={:.4f}\ttop1_acc={:.4f}\tdata_time={:6.3f}s'
                '\tbatch_time={:6.3f}s'.format(epoch, args.epochs, i,
                                               len(train_loader),
                                               get_learning_rate(optimizer),
                                               losses.avg, top1.avg,
                                               data_times.avg,
                                               batch_times.avg), logger,
                local_rank)
        break

    ddp_print(
        '||==> Train epoch: [{:d}/{:d}]\tlr={:.6f}\tce_loss={:.4f}\ttop1_acc={:.4f}\tbatch_time={:6.3f}s'
        .format(epoch, args.epochs, get_learning_rate(optimizer), losses.avg,
                top1.avg, batch_times.avg), logger, local_rank)

    if args.local_rank == 0:
        # save tensorboard
        writer.add_scalar('lr', get_learning_rate(optimizer), epoch)
        writer.add_scalar('Train_ce_loss', losses.avg, epoch)
        writer.add_scalar('Train_top1_accuracy', top1.avg, epoch)
예제 #28
0
def test(val_loader, model, epoch, use_cuda):

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()

    model.eval()

    save_objs = args.evaluate

    import os
    if not os.path.exists(args.save_path):
        os.makedirs(
            args.save_path)  # /scratch/xiaolonw/davis_results_mask_mixfcn/')
    # save_path = '/scratch/xiaolonw/davis_results_mask_mixfcn/'
    save_path = args.save_path + '/'
    # img_path  = '/scratch/xiaolonw/vlog_frames/'
    save_file = '%s/list.txt' % save_path

    fileout = open(save_file, 'w')

    end = time.time()

    # bar = Bar('Processing', max=len(val_loader))
    for batch_idx, (imgs_total, patch2_total, lbls,
                    meta) in enumerate(tqdm(val_loader)):

        finput_num_ori = params['videoLen']
        finput_num = finput_num_ori

        # measure data loading time
        data_time.update(time.time() - end)
        imgs_total = torch.autograd.Variable(imgs_total.cuda())
        # patch2_total = torch.autograd.Variable(patch2_total.cuda())

        t00 = time.time()

        bs = imgs_total.size(0)
        total_frame_num = imgs_total.size(1)
        channel_num = imgs_total.size(2)
        height_len = imgs_total.size(3)
        width_len = imgs_total.size(4)

        assert (bs == 1)

        folder_paths = meta['folder_path']
        gridx = int(meta['gridx'].data.cpu().numpy()[0])
        gridy = int(meta['gridy'].data.cpu().numpy()[0])
        print('gridx: ' + str(gridx) + ' gridy: ' + str(gridy))
        print('total_frame_num: ' + str(total_frame_num))

        height_dim = int(params['cropSize'] / 8)
        width_dim = int(params['cropSize'] / 8)

        # processing labels
        lbls = lbls[0].data.cpu().numpy()
        print(lbls.shape)
        # print(patch2_total.size())

        lbls_new = []

        lbl_set = []
        lbl_set.append(np.zeros(3).astype(np.uint8))
        count_lbls = []
        count_lbls.append(0)

        for i in range(lbls.shape[0]):
            nowlbl = lbls[i].copy()
            if i == 0:
                for j in range(nowlbl.shape[0]):
                    for k in range(nowlbl.shape[1]):

                        pixellbl = nowlbl[j, k, :].astype(np.uint8)

                        flag = 0
                        for t in range(len(lbl_set)):
                            if lbl_set[t][0] == pixellbl[0] and lbl_set[t][
                                    1] == pixellbl[1] and lbl_set[t][
                                        2] == pixellbl[2]:
                                flag = 1
                                count_lbls[t] = count_lbls[t] + 1
                                break

                        if flag == 0:
                            lbl_set.append(pixellbl)
                            count_lbls.append(0)

            lbls_new.append(nowlbl)

        lbl_set_temp = []
        for i in range(len(lbl_set)):
            if count_lbls[i] > 10:
                lbl_set_temp.append(lbl_set[i])

        lbl_set = lbl_set_temp
        print(lbl_set)
        print(count_lbls)

        t01 = time.time()

        lbls_resize = np.zeros(
            (lbls.shape[0], lbls.shape[1], lbls.shape[2], len(lbl_set)))
        lbls_resize2 = np.zeros(
            (lbls.shape[0], height_dim, width_dim, len(lbl_set)))

        for i in range(lbls.shape[0]):
            nowlbl = lbls[i].copy()
            for j in range(nowlbl.shape[0]):
                for k in range(nowlbl.shape[1]):

                    pixellbl = nowlbl[j, k, :].astype(np.uint8)
                    for t in range(len(lbl_set)):
                        if lbl_set[t][0] == pixellbl[0] and lbl_set[t][
                                1] == pixellbl[1] and lbl_set[t][
                                    2] == pixellbl[2]:
                            lbls_resize[i, j, k, t] = 1

        for i in range(lbls.shape[0]):
            lbls_resize2[i] = cv2.resize(lbls_resize[i],
                                         (height_dim, width_dim))

        t02 = time.time()
        print(t02 - t01, 'relabel', t01 - t00, 'label')

        # print the images

        imgs_set = imgs_total.data
        imgs_set = imgs_set.cpu().numpy()
        imgs_set = imgs_set[0]
        mean = [0.485, 0.456, 0.406]
        std = [0.229, 0.224, 0.225]

        imgs_toprint = []

        # ref image
        for t in range(imgs_set.shape[0]):
            img_now = imgs_set[t]

            for c in range(3):
                img_now[c] = img_now[c] * std[c]
                img_now[c] = img_now[c] + mean[c]

            img_now = img_now * 255
            img_now = np.transpose(img_now, (1, 2, 0))
            img_now = cv2.resize(img_now,
                                 (img_now.shape[0] * 2, img_now.shape[1] * 2))

            imgs_toprint.append(img_now)

            imname = save_path + str(batch_idx) + '_' + str(t) + '_frame.jpg'
            scipy.misc.imsave(imname, img_now)

        for t in range(finput_num_ori):

            nowlbl = lbls_new[t]
            imname = save_path + str(batch_idx) + '_' + str(t) + '_label.jpg'
            scipy.misc.imsave(imname, nowlbl)

        # now_batch_size = 4
        now_batch_size = 1  # we use one gpu for eval

        imgs_stack = []
        patch2_stack = []

        im_num = total_frame_num - finput_num_ori

        trans_out_2_set = []
        corrfeat2_set = []

        imgs_tensor = torch.Tensor(now_batch_size, finput_num, 3,
                                   params['cropSize'], params['cropSize'])
        target_tensor = torch.Tensor(now_batch_size, 1, 3, params['cropSize'],
                                     params['cropSize'])

        imgs_tensor = torch.autograd.Variable(imgs_tensor.cuda())
        target_tensor = torch.autograd.Variable(target_tensor.cuda())

        t03 = time.time()

        for iter in range(0, im_num, now_batch_size):

            # print(iter)

            startid = iter
            endid = iter + now_batch_size

            if endid > im_num:
                endid = im_num

            now_batch_size2 = endid - startid

            for i in range(now_batch_size2):

                imgs = imgs_total[:, iter + i + 1:iter + i +
                                  finput_num_ori, :, :, :]
                imgs2 = imgs_total[:, 0, :, :, :].unsqueeze(1)
                imgs = torch.cat((imgs2, imgs), dim=1)

                imgs_tensor[i] = imgs
                target_tensor[i, 0] = imgs_total[0, iter + i + finput_num_ori]

            corrfeat2_now = model(imgs_tensor, target_tensor)
            corrfeat2_now = corrfeat2_now.view(now_batch_size, finput_num_ori,
                                               corrfeat2_now.size(1),
                                               corrfeat2_now.size(2),
                                               corrfeat2_now.size(3))

            for i in range(now_batch_size2):
                corrfeat2_set.append(corrfeat2_now[i].data.cpu().numpy())

        t04 = time.time()
        print(t04 - t03, 'model forward', t03 - t02, 'image prep')

        for iter in range(total_frame_num - finput_num_ori):

            if iter % 10 == 0:
                print(iter)

            imgs = imgs_total[:, iter + 1:iter + finput_num_ori, :, :, :]
            imgs2 = imgs_total[:, 0, :, :, :].unsqueeze(1)
            imgs = torch.cat((imgs2, imgs), dim=1)

            # trans_out_2, corrfeat2 = model(imgs, patch2)
            corrfeat2 = corrfeat2_set[iter]
            corrfeat2 = torch.from_numpy(corrfeat2)

            out_frame_num = int(finput_num)
            height_dim = corrfeat2.size(2)
            width_dim = corrfeat2.size(3)

            corrfeat2 = corrfeat2.view(corrfeat2.size(0), height_dim,
                                       width_dim, height_dim, width_dim)
            corrfeat2 = corrfeat2.data.cpu().numpy()

            topk_vis = args.topk_vis
            vis_ids_h = np.zeros((corrfeat2.shape[0], height_dim, width_dim,
                                  topk_vis)).astype(np.int)
            vis_ids_w = np.zeros((corrfeat2.shape[0], height_dim, width_dim,
                                  topk_vis)).astype(np.int)

            t05 = time.time()

            atten1d = corrfeat2.reshape(corrfeat2.shape[0],
                                        height_dim * width_dim, height_dim,
                                        width_dim)
            ids = np.argpartition(atten1d, -topk_vis, axis=1)[:, -topk_vis:]
            # ids = np.argsort(atten1d, axis=1)[:, -topk_vis:]

            hid = ids // width_dim
            wid = ids % width_dim

            vis_ids_h = wid.transpose(0, 2, 3, 1)
            vis_ids_w = hid.transpose(0, 2, 3, 1)

            t06 = time.time()

            img_now = imgs_toprint[iter + finput_num_ori]

            predlbls = np.zeros((height_dim, width_dim, len(lbl_set)))
            # predlbls2 = np.zeros((height_dim * width_dim, len(lbl_set)))

            for t in range(finput_num):

                tt1 = time.time()

                h, w, k = np.meshgrid(np.arange(height_dim),
                                      np.arange(width_dim),
                                      np.arange(topk_vis),
                                      indexing='ij')
                h, w = h.flatten(), w.flatten()

                hh, ww = vis_ids_h[t].flatten(), vis_ids_w[t].flatten()

                if t == 0:
                    lbl = lbls_resize2[0, hh, ww, :]
                else:
                    lbl = lbls_resize2[t + iter, hh, ww, :]

                np.add.at(predlbls, (h, w),
                          lbl * corrfeat2[t, ww, hh, h, w][:, None])

            t07 = time.time()
            # print(t07-t06, 'lbl proc', t06-t05, 'argsorts')

            predlbls = predlbls / finput_num

            for t in range(len(lbl_set)):
                nowt = t
                predlbls[:, :,
                         nowt] = predlbls[:, :, nowt] - predlbls[:, :,
                                                                 nowt].min()
                predlbls[:, :,
                         nowt] = predlbls[:, :, nowt] / predlbls[:, :,
                                                                 nowt].max()

            lbls_resize2[iter + finput_num_ori] = predlbls

            predlbls_cp = predlbls.copy()
            predlbls_cp = cv2.resize(predlbls_cp,
                                     (params['imgSize'], params['imgSize']))
            predlbls_val = np.zeros((params['imgSize'], params['imgSize'], 3))

            ids = np.argmax(predlbls_cp[:, :, 1:len(lbl_set)], 2)

            predlbls_val = np.array(lbl_set)[np.argmax(predlbls_cp, axis=-1)]
            predlbls_val = predlbls_val.astype(np.uint8)
            predlbls_val2 = cv2.resize(predlbls_val,
                                       (img_now.shape[0], img_now.shape[1]),
                                       interpolation=cv2.INTER_NEAREST)

            # activation_heatmap = cv2.applyColorMap(predlbls, cv2.COLORMAP_JET)
            img_with_heatmap = np.float32(img_now) * 0.5 + np.float32(
                predlbls_val2) * 0.5

            imname = save_path + str(batch_idx) + '_' + str(
                iter + finput_num_ori) + '_label.jpg'
            imname2 = save_path + str(batch_idx) + '_' + str(
                iter + finput_num_ori) + '_mask.png'

            scipy.misc.imsave(imname, np.uint8(img_with_heatmap))
            scipy.misc.imsave(imname2, np.uint8(predlbls_val))

    fileout.close()

    return losses.avg
예제 #29
0
def train(trainloader, model, criterion, optimizer, epoch, use_cuda):
    # switch to train mode
    model.train()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    for batch_idx, (inputs, targets) in enumerate(trainloader):
       
        if use_cuda:
            inputs, targets = inputs.cuda(local_rank), targets.cuda(local_rank, async=True)
        inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets)

        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
        losses.update(loss.data.item(), inputs.size(0))
        top1.update(prec1.item(), inputs.size(0))
        top5.update(prec5.item(), inputs.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    return (losses.avg, top1.avg)
def train(model,
          train_dataloader,
          lr=0.01,
          save_dir='./weights',
          num_epoches=200,
          model_name="",
          valid_dataloader=None,
          batch_num=None,
          train_type="clean",
          random_layer=None):
    if random_layer:
        model_name = model_name + "_randomLayer"
    print(train_dataloader, valid_dataloader)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=lr,
                                momentum=0.9,
                                nesterov=True,
                                weight_decay=0.0005)
    cost = nn.CrossEntropyLoss()

    batch_time = AverageMeter()
    losses = AverageMeter()
    error = AverageMeter()

    end = time.time()
    best_error = 1.0
    nobetter_num = 1
    for epoch in range(num_epoches):
        if nobetter_num >= 5:
            print("train done .lr={},best_error={}".format(lr, best_error))
            break
        if nobetter_num >= 3:
            lr = lr * 0.1
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        _, train_loss, train_error = train_epoch(
            model,
            train_dataloader if train_type == "clean" else combind_loader(
                *train_dataloader),
            optimizer,
            cost,
            epoch,
            num_epoches,
            batch_num=batch_num,
            random_layer=random_layer)
        if valid_dataloader:
            with torch.no_grad():
                _, valid_loss, valid_error = valid_epoch(
                    model,
                    valid_dataloader if train_type == "clean" else
                    combind_loader(*valid_dataloader),
                    cost,
                    batch_num=len(valid_dataloader)
                    if train_type == "clean" else len(valid_dataloader[0]),
                    random_layer=random_layer)
        if valid_dataloader and valid_error < best_error:
            best_error = valid_error
            if valid_error + 0.005 < best_error:
                nobetter_num += 1
            else:
                nobetter_num = 1
            print('New best error: %.4f' % best_error)
            torch.save(model.state_dict(),
                       os.path.join(save_dir, model_name + '_model.dat'))
        else:
            #torch.save(model.state_dict(), os.path.join(save_dir, 'vgg16_model.dat'))
            nobetter_num += 1

        with open(os.path.join(save_dir, model_name + '_results.csv'),
                  'a') as f:
            f.write('%03d,%0.6f,%0.6f,%0.5f,%0.5f,\n' % (
                (epoch + 1),
                train_loss,
                train_error,
                valid_loss,
                valid_error,
            ))
예제 #31
0
def train(train_loader, model, criterion, optimizer, epoch, log):
  batch_time = AverageMeter()
  data_time = AverageMeter()
  losses = AverageMeter()
  top1 = AverageMeter()
  top5 = AverageMeter()
  # switch to train mode
  model.train()

  end = time.time()
  for i, (input, target) in enumerate(train_loader):
    # measure data loading time
    data_time.update(time.time() - end)

    if args.use_cuda:
      target = target.cuda(async=True)
      input = input.cuda()
    input_var = torch.autograd.Variable(input)
    target_var = torch.autograd.Variable(target)

    # compute output
    output = model(input_var)
    loss = criterion(output, target_var)

    # measure accuracy and record loss
    prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
    losses.update(loss.data[0], input.size(0))
    top1.update(prec1[0], input.size(0))
    top5.update(prec5[0], input.size(0))

    # compute gradient and do SGD step
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # measure elapsed time
    batch_time.update(time.time() - end)
    end = time.time()

  print_log('  Epoch: [{:03d}][{:03d}/{:03d}]   '
        'Time {batch_time.val:.3f} ({batch_time.avg:.3f})   '
        'Data {data_time.val:.3f} ({data_time.avg:.3f})   '
        'Loss {loss.val:.4f} ({loss.avg:.4f})   '
        'Prec@1 {top1.val:.3f} ({top1.avg:.3f})   '
        'Prec@5 {top5.val:.3f} ({top5.avg:.3f})   '.format(
        epoch, i, len(train_loader), batch_time=batch_time,
        data_time=data_time, loss=losses, top1=top1, top5=top5) + time_string(), log)
  return top1.avg, losses.avg
예제 #32
0
def validate(val_loader, model, use_val_th=False):
    batch_time = AverageMeter()
    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (inputs, targets, infos) in enumerate(val_loader):
            if default.generate_features_all:
                logger.info('generating features, batch %d', i)
            filenames = [info[0] for info in infos]
            lesion_idxs = [info[1] for info in infos]
            inputs = [input.cuda() for input in inputs]
            unc_targets = targets[1]
            targets = targets[0]

            # compute output
            out = model(inputs)
            if config.SCORE_PROPAGATION:
                prob_np = out['class_prob2'].detach().cpu().numpy()
                scores_np = out['class_score2'].detach().cpu().numpy()
            else:
                prob_np = out['class_prob1'].detach().cpu().numpy()
                scores_np = out['class_score1'].detach().cpu().numpy()

            target1 = targets.numpy() > 0
            pred_wt = unc_targets.numpy() == 0
            if i == 0:
                target_all = target1
                prob_all = prob_np
                score_all = scores_np
                lesion_idx_all = lesion_idxs
                pred_wt_all = pred_wt
                if default.generate_features_all:
                    ft_all = out['emb']
            else:
                target_all = np.vstack((target_all, target1))
                prob_all = np.vstack((prob_all, prob_np))
                score_all = np.vstack((score_all, scores_np))
                pred_wt_all = np.vstack((pred_wt_all, pred_wt))
                lesion_idx_all.extend(lesion_idxs)
                if default.generate_features_all:
                    ft_all = np.vstack((ft_all, out['emb']))

        if default.generate_features_all:
            save_ft_to_file(ft_all)
            assert 0, 'all features have been generated and saved.'

        if config.TEST.USE_CALIBRATED_TH:
            accs, pred_label_all = compute_all_acc_wt_th(target_all, prob_all, pred_wt_all, use_val_th)
        else:
            pred_label_all = score2label(prob_all, config.TEST.SCORE_PARAM)
            accs = compute_all_acc_wt(target_all, pred_label_all, prob_all, pred_wt_all)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % default.frequent == 0:
            logger.info('Test: [{0}/{1}]\t'
                        'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                        '{crit} {accs:.3f}'
                        .format(
                   i, len(val_loader), batch_time=batch_time, crit=config.TEST.CRITERION,
                   accs=accs[config.TEST.CRITERION]
            ))

        print_accs(accs)
        accs['ex_neg'] = np.sum((target_all == 0) & pred_wt_all, axis=0)

        if use_val_th:  # only save for test set not val set
            save_acc_to_file(accs, val_loader, 'all_terms')
        if default.mode == 'infer' and use_val_th:
            save_test_scores_to_file(score_all, pred_label_all, target_all, accs, lesion_idx_all)

    return accs
예제 #33
0
def main():
  if not os.path.isdir(args.save_path): os.makedirs(args.save_path)
  log = open(os.path.join(args.save_path, 'log_seed_{}.txt'.format(args.manualSeed)), 'w')
  print_log('save path : {}'.format(args.save_path), log)
  state = {k: v for k, v in args._get_kwargs()}
  print_log(state, log)
  print_log("Random Seed: {}".format(args.manualSeed), log)
  print_log("python version : {}".format(sys.version.replace('\n', ' ')), log)
  print_log("torch  version : {}".format(torch.__version__), log)
  print_log("cudnn  version : {}".format(torch.backends.cudnn.version()), log)

  # Init dataset
  if not os.path.isdir(args.data_path):
    os.makedirs(args.data_path)

  if args.dataset == 'cifar10':
    mean = [x / 255 for x in [125.3, 123.0, 113.9]]
    std = [x / 255 for x in [63.0, 62.1, 66.7]]
  elif args.dataset == 'cifar100':
    mean = [x / 255 for x in [129.3, 124.1, 112.4]]
    std = [x / 255 for x in [68.2, 65.4, 70.4]]
  else:
    assert False, "Unknow dataset : {}".format(args.dataset)

  train_transform = transforms.Compose(
    [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(),
     transforms.Normalize(mean, std)])
  test_transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize(mean, std)])

  if args.dataset == 'cifar10':
    train_data = dset.CIFAR10(args.data_path, train=True, transform=train_transform, download=True)
    test_data = dset.CIFAR10(args.data_path, train=False, transform=test_transform, download=True)
    num_classes = 10
  elif args.dataset == 'cifar100':
    train_data = dset.CIFAR100(args.data_path, train=True, transform=train_transform, download=True)
    test_data = dset.CIFAR100(args.data_path, train=False, transform=test_transform, download=True)
    num_classes = 100
  elif args.dataset == 'svhn':
    train_data = dset.SVHN(args.data_path, split='train', transform=train_transform, download=True)
    test_data = dset.SVHN(args.data_path, split='test', transform=test_transform, download=True)
    num_classes = 10
  elif args.dataset == 'stl10':
    train_data = dset.STL10(args.data_path, split='train', transform=train_transform, download=True)
    test_data = dset.STL10(args.data_path, split='test', transform=test_transform, download=True)
    num_classes = 10
  elif args.dataset == 'imagenet':
    assert False, 'Do not finish imagenet code'
  else:
    assert False, 'Do not support dataset : {}'.format(args.dataset)

  train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True,
                         num_workers=args.workers, pin_memory=True)
  test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False,
                        num_workers=args.workers, pin_memory=True)

  # Init model, criterion, and optimizer
  #net = models.__dict__[args.arch](num_classes).cuda()
  net = SENet34()

  # define loss function (criterion) and optimizer
  criterion = F.nll_loss
  optimizer = torch.optim.SGD(net.parameters(), state['learning_rate'], momentum=state['momentum'],
                weight_decay=state['decay'], nesterov=True)

  if args.use_cuda: net.cuda()

  recorder = RecorderMeter(args.epochs)
  # optionally resume from a checkpoint
  if args.resume:
    if os.path.isfile(args.resume):
      print_log("=> loading checkpoint '{}'".format(args.resume), log)
      checkpoint = torch.load(args.resume)
      recorder = checkpoint['recorder']
      args.start_epoch = checkpoint['epoch']
      net.load_state_dict(checkpoint['state_dict'])
      optimizer.load_state_dict(checkpoint['optimizer'])
      print_log("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch']), log)
    else:
      print_log("=> no checkpoint found at '{}'".format(args.resume), log)
  else:
    print_log("=> do not use any checkpoint for model", log)

  if args.evaluate:
    validate(test_loader, net, criterion, log)
    return

  # Main loop
  start_time = time.time()
  epoch_time = AverageMeter()
  for epoch in range(args.start_epoch, args.epochs):
    current_learning_rate = adjust_learning_rate(optimizer, epoch, args.gammas, args.schedule)

    need_hour, need_mins, need_secs = convert_secs2time(epoch_time.avg * (args.epochs-epoch))
    need_time = '[Need: {:02d}:{:02d}:{:02d}]'.format(need_hour, need_mins, need_secs)

    print_log('\n==>>{:s} [Epoch={:03d}/{:03d}] {:s} [learning_rate={:6.4f}]'.format(time_string(), epoch, args.epochs, need_time, current_learning_rate) \
                + ' [Best : Accuracy={:.2f}, Error={:.2f}]'.format(recorder.max_accuracy(False), 100-recorder.max_accuracy(False)), log)

    # train for one epoch
    train_acc, train_los = train(train_loader, net, criterion, optimizer, epoch, log)

    # evaluate on validation set
    val_acc,   val_los   = validate(test_loader, net, criterion, log)
    is_best = recorder.update(epoch, train_los, train_acc, val_los, val_acc)

    save_checkpoint({
      'epoch': epoch + 1,
      'state_dict': net.state_dict(),
      'recorder': recorder,
      'optimizer' : optimizer.state_dict(),
    }, is_best, args.save_path, 'checkpoint.pth.tar')

    # measure elapsed time
    epoch_time.update(time.time() - start_time)
    start_time = time.time()
    recorder.plot_curve( os.path.join(args.save_path, 'curve.png') )

  log.close()
예제 #34
0
def val_epoch(epoch, data_loader, model, criterion, opt, logger):
    print('validation at epoch {}'.format(epoch))

    model.eval()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    accuracies = AverageMeter()

    end_time = time.time()
    for i, (inputs, targets) in enumerate(data_loader):
        data_time.update(time.time() - end_time)

        if not opt.no_cuda:
            targets = targets.cuda(async=True)
        inputs = Variable(inputs, volatile=True)
        targets = Variable(targets, volatile=True)
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        acc = calculate_accuracy(outputs, targets)

        losses.update(loss.data[0], inputs.size(0))
        accuracies.update(acc, inputs.size(0))

        batch_time.update(time.time() - end_time)
        end_time = time.time()

        print('Epoch: [{0}][{1}/{2}]\t'
              'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
              'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
              'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
              'Acc {acc.val:.3f} ({acc.avg:.3f})'.format(
                  epoch,
                  i + 1,
                  len(data_loader),
                  batch_time=batch_time,
                  data_time=data_time,
                  loss=losses,
                  acc=accuracies))

    logger.log({'epoch': epoch, 'loss': losses.avg, 'acc': accuracies.avg})

    return losses.avg
def validate(val_loader, net, criterion, optimizer, epoch, train_args, restore, visualize):
    net.eval()

    val_loss = AverageMeter()
    inputs_all, gts_all, predictions_all = [], [], []

    for vi, data in enumerate(val_loader):
        inputs, gts = data
        N = inputs.size(0)
        inputs = Variable(inputs, volatile=True).cuda()
        gts = Variable(gts, volatile=True).cuda()

        outputs = net(inputs)
        predictions = outputs.data.max(1)[1].squeeze_(1).squeeze_(0).cpu().numpy()

        val_loss.update(criterion(outputs, gts).data[0] / N, N)

        if random.random() > train_args['val_img_sample_rate']:
            inputs_all.append(None)
        else:
            inputs_all.append(inputs.data.squeeze_(0).cpu())
        gts_all.append(gts.data.squeeze_(0).cpu().numpy())
        predictions_all.append(predictions)

    acc, acc_cls, mean_iu, fwavacc = evaluate(predictions_all, gts_all, voc.num_classes)

    if mean_iu > train_args['best_record']['mean_iu']:
        train_args['best_record']['val_loss'] = val_loss.avg
        train_args['best_record']['epoch'] = epoch
        train_args['best_record']['acc'] = acc
        train_args['best_record']['acc_cls'] = acc_cls
        train_args['best_record']['mean_iu'] = mean_iu
        train_args['best_record']['fwavacc'] = fwavacc
        snapshot_name = 'epoch_%d_loss_%.5f_acc_%.5f_acc-cls_%.5f_mean-iu_%.5f_fwavacc_%.5f_lr_%.10f' % (
            epoch, val_loss.avg, acc, acc_cls, mean_iu, fwavacc, optimizer.param_groups[1]['lr']
        )
        torch.save(net.state_dict(), os.path.join(ckpt_path, exp_name, snapshot_name + '.pth'))
        torch.save(optimizer.state_dict(), os.path.join(ckpt_path, exp_name, 'opt_' + snapshot_name + '.pth'))

        if train_args['val_save_to_img_file']:
            to_save_dir = os.path.join(ckpt_path, exp_name, str(epoch))
            check_mkdir(to_save_dir)

        val_visual = []
        for idx, data in enumerate(zip(inputs_all, gts_all, predictions_all)):
            if data[0] is None:
                continue
            input_pil = restore(data[0])
            gt_pil = voc.colorize_mask(data[1])
            predictions_pil = voc.colorize_mask(data[2])
            if train_args['val_save_to_img_file']:
                input_pil.save(os.path.join(to_save_dir, '%d_input.png' % idx))
                predictions_pil.save(os.path.join(to_save_dir, '%d_prediction.png' % idx))
                gt_pil.save(os.path.join(to_save_dir, '%d_gt.png' % idx))
            val_visual.extend([visualize(input_pil.convert('RGB')), visualize(gt_pil.convert('RGB')),
                               visualize(predictions_pil.convert('RGB'))])
        val_visual = torch.stack(val_visual, 0)
        val_visual = vutils.make_grid(val_visual, nrow=3, padding=5)
        writer.add_image(snapshot_name, val_visual)

    print('--------------------------------------------------------------------')
    print('[epoch %d], [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f]' % (
        epoch, val_loss.avg, acc, acc_cls, mean_iu, fwavacc))

    print('best record: [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f], [epoch %d]' % (
        train_args['best_record']['val_loss'], train_args['best_record']['acc'], train_args['best_record']['acc_cls'],
        train_args['best_record']['mean_iu'], train_args['best_record']['fwavacc'], train_args['best_record']['epoch']))

    print('--------------------------------------------------------------------')

    writer.add_scalar('val_loss', val_loss.avg, epoch)
    writer.add_scalar('acc', acc, epoch)
    writer.add_scalar('acc_cls', acc_cls, epoch)
    writer.add_scalar('mean_iu', mean_iu, epoch)
    writer.add_scalar('fwavacc', fwavacc, epoch)
    writer.add_scalar('lr', optimizer.param_groups[1]['lr'], epoch)

    net.train()
    return val_loss.avg
예제 #36
0
def train(segmentation_module, iterator, optimizers, history, epoch, args):
    batch_time = AverageMeter()
    data_time = AverageMeter()

    names = ['object', 'part', 'scene', 'material']
    ave_losses = {n: AverageMeter() for n in names}
    ave_metric = {n: AverageMeter() for n in names}
    ave_losses['total'] = AverageMeter() 

    segmentation_module.train(not args.fix_bn)

    # main loop
    tic = time.time()
    for i in range(args.epoch_iters):

        batch_data, src_idx = next(iterator)

        data_time.update(time.time() - tic)

        segmentation_module.zero_grad()

        # forward pass
        ret = segmentation_module(batch_data)

        # Backward
        loss = ret['loss']['total'].mean()
        loss.backward()
        for optimizer in optimizers:
            optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - tic)
        tic = time.time()

        # measure losses 
        for name in ret['loss'].keys():
            ave_losses[name].update(ret['loss'][name].mean().item())

        # measure metrics 
        # NOTE: scene metric will be much lower than benchmark
        for name in ret['metric'].keys():
            ave_metric[name].update(ret['metric'][name].mean().item())

        # calculate accuracy, and display
        if i % args.disp_iter == 0:
            loss_info = "Loss: total {:.4f}, ".format(ave_losses['total'].average())
            loss_info += ", ".join(["{} {:.2f}".format(
                n[0], ave_losses[n].average() 
                if ave_losses[n].average() is not None else 0) for n in names])
            acc_info = "Accuracy: " + ", ".join(["{} {:4.2f}".format(
                n[0], ave_metric[n].average() 
                if ave_metric[n].average() is not None else 0) for n in names])
            print('Epoch: [{}][{}/{}], Time: {:.2f}, Data: {:.2f}, '
                  'LR: encoder {:.6f}, decoder {:.6f}, {}, {}'
                  .format(epoch, i, args.epoch_iters,
                          batch_time.average(), data_time.average(),
                          args.running_lr_encoder, args.running_lr_decoder,
                          acc_info, loss_info))

            fractional_epoch = epoch - 1 + 1. * i / args.epoch_iters
            history['train']['epoch'].append(fractional_epoch)
            history['train']['loss'].append(loss.item())

        # adjust learning rate
        cur_iter = i + (epoch - 1) * args.epoch_iters
        adjust_learning_rate(optimizers, cur_iter, args)
예제 #37
0
    def train(self, epoch):
        batch_time = AverageMeter()
        losses = AverageMeter()
        acc = AverageMeter()
        self.scheduler.step()
        self.model.train()

        end = time.time()
        lr = self.scheduler.get_lr()[0]

        # for batch, (softmax_data, triplet_data) in enumerate(itertools.zip_longest(self.softmax_train_loader, self.triplet_train_loader)):
        for batch, (softmax_data, triplet_data, gallery_data) in enumerate(zip(self.softmax_train_loader, self.triplet_train_loader, self.gallery_loader)):
            loss = 0
            # 1st
            softmax_inputs, softmax_labels = softmax_data
            # 转cuda
            softmax_inputs = softmax_inputs.to(self.device) if torch.cuda.device_count() >= 1 else softmax_inputs
            softmax_labels = softmax_labels.to(self.device) if torch.cuda.device_count() >= 1 else softmax_labels
            softmax_score, softmax_outputs = self.model(softmax_inputs)
            traditional_loss = self.softmax_loss(softmax_score, softmax_outputs, softmax_labels)
            loss += traditional_loss
            # total
            losses.update(loss.item(), softmax_inputs.size(0))
            prec = (softmax_score.max(1)[1] == softmax_labels).float().mean()
            acc.update(prec, softmax_inputs.size(0))

            # 2nd
            triplet_inputs, triplet_labels = triplet_data
            # 转cuda
            triplet_inputs = triplet_inputs.to(self.device) if torch.cuda.device_count() >= 1 else triplet_inputs
            triplet_labels = triplet_labels.to(self.device) if torch.cuda.device_count() >= 1 else triplet_labels
            triplet_score, triplet_outputs = self.model(triplet_inputs)
            triplet_loss = self.triplet_loss(triplet_score, triplet_outputs, triplet_labels)
            loss += triplet_loss

            # 3rd
            gallery_inputs, gallery_labels = gallery_data
            gallery_inputs = gallery_inputs.to(self.device) if torch.cuda.device_count() >= 1 else gallery_inputs
            gallery_score, gallery_outputs = self.model(gallery_inputs)
            query_feats = []
            for query_inputs, query_labels in data.query_loader:
                query_inputs = query_inputs.cuda()
                query_score, query_outputs = model(query_inputs)
                query_feats.append(query_outputs)
                logger.debug('query_outputs: {}'.format(query_outputs.shape))
            query_feats = torch.cat(query_feats, dim=0)
            logger.debug('query_feats: {}'.format(query_feats.shape))
            entropy = self.entropy_loss(gallery_outputs, query_feats)
            loss += entropy


            self.optimizer.zero_grad()
            if opt.fp16:  # we use optimier to backward loss
                with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()
                # loss.backward(retain_graph=True)
            self.optimizer.step()

            # 评估训练耗时
            batch_time.update(time.time() - end)
            end = time.time()

            # 打印耗时与结果
            if (batch+1) % 10 == 0:
                logger.debug('Epoch: [{}][{}/{}]\t'
                      'Base_lr: [{:.2e}]\t'
                      'Time: ({batch_time.avg:.3f})\t'
                      'Loss_val: {loss.val:.4f}  (Loss_avg: {loss.avg:.4f})\t'
                      'Accuray_val: {acc.val:.4f}  (Accuray_avg: {acc.avg:.4f})'.format(
                       epoch, batch+1, len(self.softmax_train_loader), lr, batch_time=batch_time, loss=losses, acc=acc))

        # 每个epoch的结果
        log_text = 'Epoch[{}]\tBase_lr {:.2e}\tAccuray {acc.avg:.4f}\tLoss {loss.avg:.4f}'.format(epoch, lr, acc=acc, loss=losses)
        logger.info(log_text)
        with open(log_file, 'a') as f:
            f.write(log_text + '\n')
            f.flush()
def main():

    cnt_errors = {
        'mae': AverageMeter(),
        'mse': AverageMeter(),
        'nae': AverageMeter(),
    }
    metrics_s = {
        'tp': AverageMeter(),
        'fp': AverageMeter(),
        'fn': AverageMeter(),
        'tp_c': AverageCategoryMeter(num_classes),
        'fn_c': AverageCategoryMeter(num_classes)
    }
    metrics_l = {
        'tp': AverageMeter(),
        'fp': AverageMeter(),
        'fn': AverageMeter(),
        'tp_c': AverageCategoryMeter(num_classes),
        'fn_c': AverageCategoryMeter(num_classes)
    }

    pred_data, gt_data = read_pred_and_gt(pred_file, gt_file)
    for i_sample in id_std:
        print(i_sample)
        # init
        gt_p,pred_p,fn_gt_index,tp_pred_index,fp_pred_index= [],[],[],[],[]
        tp_s, fp_s, fn_s, tp_l, fp_l, fn_l = [0, 0, 0, 0, 0, 0]
        tp_c_s = np.zeros([num_classes])
        fn_c_s = np.zeros([num_classes])
        tp_c_l = np.zeros([num_classes])
        fn_c_l = np.zeros([num_classes])

        if gt_data[i_sample]['num'] == 0 and pred_data[i_sample]['num'] != 0:
            pred_p = pred_data[i_sample]['points']
            fp_pred_index = np.array(range(pred_p.shape[0]))
            fp_s = fp_pred_index.shape[0]
            fp_l = fp_pred_index.shape[0]

        if pred_data[i_sample]['num'] == 0 and gt_data[i_sample]['num'] != 0:
            gt_p = gt_data[i_sample]['points']
            level = gt_data[i_sample]['level']
            fn_gt_index = np.array(range(gt_p.shape[0]))
            fn_s = fn_gt_index.shape[0]
            fn_l = fn_gt_index.shape[0]
            for i_class in range(num_classes):
                fn_c_s[i_class] = (level[fn_gt_index] == i_class).sum()
                fn_c_l[i_class] = (level[fn_gt_index] == i_class).sum()

        if gt_data[i_sample]['num'] != 0 and pred_data[i_sample]['num'] != 0:
            pred_p = pred_data[i_sample]['points']
            gt_p = gt_data[i_sample]['points']
            sigma_s = gt_data[i_sample]['sigma'][:, 0]
            sigma_l = gt_data[i_sample]['sigma'][:, 1]
            level = gt_data[i_sample]['level']

            # dist
            dist_matrix = ss.distance_matrix(pred_p, gt_p, p=2)
            match_matrix = np.zeros(dist_matrix.shape, dtype=bool)

            # sigma_s and sigma_l
            tp_s, fp_s, fn_s, tp_c_s, fn_c_s = compute_metrics(
                dist_matrix, match_matrix, pred_p.shape[0], gt_p.shape[0],
                sigma_s, level)
            tp_l, fp_l, fn_l, tp_c_l, fn_c_l = compute_metrics(
                dist_matrix, match_matrix, pred_p.shape[0], gt_p.shape[0],
                sigma_l, level)

        metrics_s['tp'].update(tp_s)
        metrics_s['fp'].update(fp_s)
        metrics_s['fn'].update(fn_s)
        metrics_s['tp_c'].update(tp_c_s)
        metrics_s['fn_c'].update(fn_c_s)
        metrics_l['tp'].update(tp_l)
        metrics_l['fp'].update(fp_l)
        metrics_l['fn'].update(fn_l)
        metrics_l['tp_c'].update(tp_c_l)
        metrics_l['fn_c'].update(fn_c_l)

        gt_count, pred_cnt = gt_data[i_sample]['num'], pred_data[i_sample][
            'num']
        s_mae = abs(gt_count - pred_cnt)
        s_mse = (gt_count - pred_cnt) * (gt_count - pred_cnt)
        cnt_errors['mae'].update(s_mae)
        cnt_errors['mse'].update(s_mse)

        if gt_count != 0:
            s_nae = abs(gt_count - pred_cnt) / gt_count
            cnt_errors['nae'].update(s_nae)

    ap_s = metrics_s['tp'].sum / (metrics_s['tp'].sum + metrics_s['fp'].sum +
                                  1e-20)
    ar_s = metrics_s['tp'].sum / (metrics_s['tp'].sum + metrics_s['fn'].sum +
                                  1e-20)
    f1m_s = 2 * ap_s * ar_s / (ap_s + ar_s)
    ar_c_s = metrics_s['tp_c'].sum / (metrics_s['tp_c'].sum +
                                      metrics_s['fn_c'].sum + 1e-20)

    ap_l = metrics_l['tp'].sum / (metrics_l['tp'].sum + metrics_l['fp'].sum +
                                  1e-20)
    ar_l = metrics_l['tp'].sum / (metrics_l['tp'].sum + metrics_l['fn'].sum +
                                  1e-20)
    f1m_l = 2 * ap_l * ar_l / (ap_l + ar_l)
    ar_c_l = metrics_l['tp_c'].sum / (metrics_l['tp_c'].sum +
                                      metrics_l['fn_c'].sum + 1e-20)

    print('-----Localization performance-----')
    print('AP_small: ' + str(ap_s))
    print('AR_small: ' + str(ar_s))
    print('F1m_small: ' + str(f1m_s))
    print('AR_small_category: ' + str(ar_c_s))
    print('    avg: ' + str(ar_c_s.mean()))
    print('AP_large: ' + str(ap_l))
    print('AR_large: ' + str(ar_l))
    print('F1m_large: ' + str(f1m_l))
    print('AR_large_category: ' + str(ar_c_l))
    print('    avg: ' + str(ar_c_l.mean()))

    mae = cnt_errors['mae'].avg
    mse = np.sqrt(cnt_errors['mse'].avg)
    nae = cnt_errors['nae'].avg

    print('-----Counting performance-----')
    print('MAE: ' + str(mae))
    print('MSE: ' + str(mse))
    print('NAE: ' + str(nae))
def valid_epoch(model,
                valid_dataloader,
                cost,
                print_freq=40,
                batch_num=None,
                random_layer=None):
    batch_time = AverageMeter()
    losses = AverageMeter()
    error = AverageMeter()

    end = time.time()
    model.eval()
    for i, (images, labels, noises) in enumerate(valid_dataloader):
        images, labels, noises = images.cuda(), labels.cuda(), noises.cuda()
        noises = noises * args.phi
        if random_layer:
            images = random_layer(images)
        outputs = model(images, noises)
        loss = cost(outputs, labels)

        batch_size = labels.size(0)
        outputs = outputs.max(1)[1]
        error.update(
            torch.ne(outputs.cpu(), labels.cpu()).float().sum().item() /
            batch_size, batch_size)
        losses.update(loss.item(), batch_size)
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            res = '\t'.join([
                'Valid',
                'Iter: [%d/%d]' % (i + 1, batch_num),
                'Time %.3f (%.3f)' % (batch_time.val, batch_time.avg),
                'Loss %.4f (%.4f)' % (losses.val, losses.avg),
                'Error %.4f (%.4f)' % (error.val, error.avg),
            ])
            print(res)
    return batch_time.avg, losses.avg, error.avg
예제 #40
0
def test(testloader, model, criterion, epoch, use_cuda):
    global best_acc

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()
    end = time.time()
    start_index = 0
    for batch_idx, (inputs, targets) in enumerate(testloader):
        # measure data loading time
        data_time.update(time.time() - end)

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        with torch.no_grad():
            inputs, targets = torch.autograd.Variable(
                inputs), torch.autograd.Variable(targets)
        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        # measure accuracy and record loss
        prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))

        losses.update(loss.item(), inputs.size(0))
        top1.update(prec1.item(), inputs.size(0))
        top5.update(prec5.item(), inputs.size(0))

        progress_bar(
            batch_idx, len(testloader),
            'Loss: %.2f | Top1: %.2f | Top5: %.2f' %
            (losses.avg, top1.avg, top5.avg))
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

    return (losses.avg, top1.avg, top5.avg)
예제 #41
0
def train(train_loader, model, criterions, optimizer, epoch):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    accs = AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (inputs, targets, infos) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        # construct targets
        target_clsf, target_unc, target_ex = targets
        target_conf = (target_clsf + target_ex) > 0
        rhem_wt = torch.zeros_like(target_clsf).cuda()
        rhem_wt[target_conf] = 1.
        target_clsf = target_clsf.cuda()
        target_clsf_wt = 1-target_unc.cuda()

        # run model
        inputs = [input.cuda() for input in inputs]
        out = model(inputs)

        # compute losses
        emb = out['emb']
        A, P, N = select_triplets_multilabel(emb, target_clsf)
        loss_metric = criterions['metric'](A, P, N)

        prob1 = out['class_prob1']
        loss_ce1 = criterions['wce'](prob1, target_clsf, infos, wt=target_clsf_wt)
        loss_rhem = criterions['rhem'](prob1, target_clsf, infos, wt=rhem_wt)
        if config.SCORE_PROPAGATION:
            prob2 = out['class_prob2']
            loss_ce2 = criterions['wce'](prob2, target_clsf, infos, wt=target_clsf_wt)

            sub_losses = [loss_ce1, loss_rhem, loss_metric, loss_ce2]
            wts_names = ['CE_LOSS_WT_1', 'RHEM_LOSS_WT', 'TRIPLET_LOSS_WT', 'CE_LOSS_WT_2']
        else:
            sub_losses = [loss_ce1, loss_rhem, loss_metric]
            wts_names = ['CE_LOSS_WT_1', 'RHEM_LOSS_WT', 'TRIPLET_LOSS_WT']

        loss = 0
        wts = [eval('config.TRAIN.' + name1) for name1 in wts_names]
        for wt1, loss1 in zip(wts, sub_losses):
            loss += wt1 * loss1

        losses.update(loss.item())

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        clip_gradient(model, default.clip_gradient)
        optimizer.step()

        # measure accuracy
        if config.SCORE_PROPAGATION:
            prob_np = prob2.detach().cpu().numpy()
        else:
            prob_np = prob1.detach().cpu().numpy()

        pred_labels = score2label(prob_np, config.TEST.SCORE_PARAM)
        targets_np = target_clsf.detach().cpu().numpy()
        target_unc = target_unc.numpy()
        acc = compute_all_acc_wt(targets_np > 0, pred_labels, prob_np, target_unc == 0)[config.TEST.CRITERION]

        accs.update(acc)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % default.frequent == 0:
            crit = 'mean_pcF1' if config.TEST.CRITERION == 'mean_perclass_f1' else config.TEST.CRITERION
            msg = 'Epoch: [{0}][{1}/{2}] Time {batch_time.val:.1f} ' \
                  '({batch_time.avg:.1f}, {data_time.val:.1f})\t' \
                  .format(epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time)
            msg += 'Loss {loss.val:.3f} ({loss.avg:.3f}){{'.format(loss=losses)
            for wt1, loss1 in zip(wts, sub_losses):
                msg += '%.3f*%.1f, ' % (loss1, wt1)
            msg += '}}\t{crit} {accs.val:.3f} ({accs.avg:.3f})'.format(
                crit=crit, accs=accs, ms=prob_np.max())
            logger.info(msg)
def train_epoch(model,
                train_dataloader,
                optimizer,
                cost,
                epoch,
                n_epochs,
                print_freq=40,
                batch_num=None,
                random_layer=None):
    print(random_layer)
    batch_time = AverageMeter()
    losses = AverageMeter()
    error = AverageMeter()
    end = time.time()
    model.train()
    for i, (images, labels, noises) in enumerate(train_dataloader):
        #if i>30:
        #    return batch_time.avg, losses.avg, error.avg
        #print(type(images),type(labels))
        #print(images.shape,labels.shape)
        images, labels, noises = images.cuda(), labels.cuda(), noises.cuda()
        noises = noises * args.phi
        if random_layer:
            images = random_layer(images)
        #print(images.shape)
        #print(images.shape,labels.shape)
        optimizer.zero_grad()
        outputs = model(images, noises)
        #print("outpits.size={},labels.size={}",outputs.size(),labels.size())
        loss = cost(outputs, labels)
        loss.backward()
        #optimizer.step()

        batch_size = labels.size(0)
        outputs = outputs.max(1)[1]
        error.update(
            torch.ne(outputs.cpu(), labels.cpu()).float().sum().item() /
            batch_size, batch_size)
        losses.update(loss.item(), batch_size)
        batch_time.update(time.time() - end)
        end = time.time()

        if i % print_freq == 0:
            res = '\t'.join([
                'Epoch: [%d/%d]' % (epoch + 1, n_epochs),
                'Iter: [%d/%d]' % (i + 1, batch_num),
                'Time %.3f (%.3f)' % (batch_time.val, batch_time.avg),
                'Loss %.4f (%.4f)' % (losses.val, losses.avg),
                'Error %.4f (%.4f)' % (error.val, error.avg),
            ])
            print(res)
    return batch_time.avg, losses.avg, error.avg
예제 #43
0
def test(val_loader, model, criterion, epoch, use_cuda):
    global best_acc

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    bar = Bar('Processing', max=len(val_loader))
    for batch_idx, (inputs, targets) in enumerate(val_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        inputs, targets = torch.autograd.Variable(inputs, volatile=True), torch.autograd.Variable(targets)

        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
        losses.update(loss.data[0], inputs.size(0))
        top1.update(prec1[0], inputs.size(0))
        top5.update(prec5[0], inputs.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # plot progress
        bar.suffix  = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format(
                    batch=batch_idx + 1,
                    size=len(val_loader),
                    data=data_time.avg,
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    loss=losses.avg,
                    top1=top1.avg,
                    top5=top5.avg,
                    )
        bar.next()
    bar.finish()
    return (losses.avg, top1.avg)
예제 #44
0
def train(train_loader, model, criterion, optimizer, epoch, use_cuda):
    # switch to train mode
    model.train()
    torch.set_grad_enabled(True)

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    bar = Bar('Processing', max=len(train_loader))
    show_step = len(train_loader) // 10

    prefetcher = data_prefetcher(train_loader)
    inputs, targets = prefetcher.next()

    batch_idx = -1
    while inputs is not None:
        # for batch_idx, (inputs, targets) in enumerate(train_loader):
        batch_idx += 1
        batch_size = inputs.size(0)
        if batch_size < args.train_batch:
            break
        # measure data loading time
        data_time.update(time.time() - end)

        #if use_cuda:
        #    inputs, targets = inputs.cuda(), targets.cuda(async=True)
        #inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(targets)

        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
        losses.update(loss.data, inputs.size(0))
        top1.update(prec1, inputs.size(0))
        top5.update(prec5, inputs.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # plot progress
        bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format(
            batch=batch_idx + 1,
            size=len(train_loader),
            data=data_time.val,
            bt=batch_time.val,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            loss=losses.avg,
            top1=top1.avg,
            top5=top5.avg,
        )
        if (batch_idx) % show_step == 0:
            print(bar.suffix)
        bar.next()

        inputs, targets = prefetcher.next()

    bar.finish()
    return (losses.avg, top1.avg)
def validate(val_loader, net, criterion, optimizer, epoch, train_args, visualize):
    # the following code is written assuming that batch size is 1
    net.eval()

    val_loss = AverageMeter()

    gts_all = np.zeros((len(val_loader), args['shorter_size'], 2 * args['shorter_size']), dtype=int)
    predictions_all = np.zeros((len(val_loader), args['shorter_size'], 2 * args['shorter_size']), dtype=int)
    for vi, data in enumerate(val_loader):
        input, gt, slices_info = data
        assert len(input.size()) == 5 and len(gt.size()) == 4 and len(slices_info.size()) == 3
        input.transpose_(0, 1)
        gt.transpose_(0, 1)
        slices_info.squeeze_(0)
        assert input.size()[3:] == gt.size()[2:]

        count = torch.zeros(args['shorter_size'], 2 * args['shorter_size']).cuda()
        output = torch.zeros(voc.num_classes, args['shorter_size'], 2 * args['shorter_size']).cuda()

        slice_batch_pixel_size = input.size(1) * input.size(3) * input.size(4)

        for input_slice, gt_slice, info in zip(input, gt, slices_info):
            input_slice = Variable(input_slice).cuda()
            gt_slice = Variable(gt_slice).cuda()

            output_slice = net(input_slice)
            assert output_slice.size()[2:] == gt_slice.size()[1:]
            assert output_slice.size()[1] == voc.num_classes
            output[:, info[0]: info[1], info[2]: info[3]] += output_slice[0, :, :info[4], :info[5]].data
            gts_all[vi, info[0]: info[1], info[2]: info[3]] += gt_slice[0, :info[4], :info[5]].data.cpu().numpy()

            count[info[0]: info[1], info[2]: info[3]] += 1

            val_loss.update(criterion(output_slice, gt_slice).data[0], slice_batch_pixel_size)

        output /= count
        gts_all[vi, :, :] /= count.cpu().numpy().astype(int)
        predictions_all[vi, :, :] = output.max(0)[1].squeeze_(0).cpu().numpy()

        print('validating: %d / %d' % (vi + 1, len(val_loader)))

    acc, acc_cls, mean_iu, fwavacc = evaluate(predictions_all, gts_all, voc.num_classes)

    train_args['best_record']['val_loss'] = val_loss.avg
    train_args['best_record']['epoch'] = epoch
    train_args['best_record']['acc'] = acc
    train_args['best_record']['acc_cls'] = acc_cls
    train_args['best_record']['mean_iu'] = mean_iu
    train_args['best_record']['fwavacc'] = fwavacc
    snapshot_name = 'epoch_%d_loss_%.5f_acc_%.5f_acc-cls_%.5f_mean-iu_%.5f_fwavacc_%.5f_lr_%.10f' % (
        epoch, val_loss.avg, acc, acc_cls, mean_iu, fwavacc, optimizer.param_groups[1]['lr'])
    torch.save(net.state_dict(), os.path.join(ckpt_path, exp_name, snapshot_name + '.pth'))
    torch.save(optimizer.state_dict(), os.path.join(ckpt_path, exp_name, 'opt_' + snapshot_name + '.pth'))

    if train_args['val_save_to_img_file']:
        to_save_dir = os.path.join(ckpt_path, exp_name, str(epoch))
        check_mkdir(to_save_dir)

    val_visual = []
    for idx, data in enumerate(zip(gts_all, predictions_all)):
        gt_pil = voc.colorize_mask(data[0])
        predictions_pil = voc.colorize_mask(data[1])
        if train_args['val_save_to_img_file']:
            predictions_pil.save(os.path.join(to_save_dir, '%d_prediction.png' % idx))
            gt_pil.save(os.path.join(to_save_dir, '%d_gt.png' % idx))
        val_visual.extend([visualize(gt_pil.convert('RGB')),
                           visualize(predictions_pil.convert('RGB'))])
    val_visual = torch.stack(val_visual, 0)
    val_visual = vutils.make_grid(val_visual, nrow=2, padding=5)
    writer.add_image(snapshot_name, val_visual)

    print('-----------------------------------------------------------------------------------------------------------')
    print('[epoch %d], [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f]' % (
        epoch, val_loss.avg, acc, acc_cls, mean_iu, fwavacc))

    print('best record: [val loss %.5f], [acc %.5f], [acc_cls %.5f], [mean_iu %.5f], [fwavacc %.5f], [epoch %d]' % (
        train_args['best_record']['val_loss'], train_args['best_record']['acc'], train_args['best_record']['acc_cls'],
        train_args['best_record']['mean_iu'], train_args['best_record']['fwavacc'], train_args['best_record']['epoch']))

    print('-----------------------------------------------------------------------------------------------------------')

    writer.add_scalar('val_loss', val_loss.avg, epoch)
    writer.add_scalar('acc', acc, epoch)
    writer.add_scalar('acc_cls', acc_cls, epoch)
    writer.add_scalar('mean_iu', mean_iu, epoch)
    writer.add_scalar('fwavacc', fwavacc, epoch)

    net.train()
    return val_loss.avg
예제 #46
0
def test(val_loader, model, criterion, epoch, use_cuda):
    global best_acc

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()
    # torch.set_grad_enabled(False)

    end = time.time()
    bar = Bar('Processing', max=len(val_loader))

    prefetcher = data_prefetcher(val_loader)
    inputs, targets = prefetcher.next()

    batch_idx = -1
    while inputs is not None:
        # for batch_idx, (inputs, targets) in enumerate(val_loader):
        batch_idx += 1
        # measure data loading time
        data_time.update(time.time() - end)

        #if use_cuda:
        #    inputs, targets = inputs.cuda(), targets.cuda()
        #inputs, targets = torch.autograd.Variable(inputs, volatile=True), torch.autograd.Variable(targets)

        # compute output
        with torch.no_grad():
            outputs = model(inputs)
            loss = criterion(outputs, targets)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
        # losses.update(loss.data[0], inputs.size(0))
        losses.update(loss.data, inputs.size(0))
        #top1.update(prec1[0], inputs.size(0))
        top1.update(prec1, inputs.size(0))
        #top5.update(prec5[0], inputs.size(0))
        top5.update(prec5, inputs.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # plot progress
        bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format(
            batch=batch_idx + 1,
            size=len(val_loader),
            data=data_time.avg,
            bt=batch_time.avg,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            loss=losses.avg,
            top1=top1.avg,
            top5=top5.avg,
        )
        bar.next()

        inputs, targets = prefetcher.next()

    print(bar.suffix)
    bar.finish()
    return (losses.avg, top1.avg)