Пример #1
0
def evaluate(model, criterion, data_loader, device):
    model.eval()
    metric_logger = MetricLogger(delimiter="  ")
    header = 'Test:'
    with torch.no_grad():
        for video, target in metric_logger.log_every(data_loader, 100, header):
            start_time = time.time()
            video = video.to(device, non_blocking=True)
            target = target.to(device, non_blocking=True)
            output = model(video)
            time_diff = time.time() - start_time
            print("Predicting on a video of shape {} took {} seconds".format(
                video.shape, time_diff))
            print("target shape {}".format(target.shape))
            print("target {}".format(target))
            loss = criterion(output, target)

            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            # FIXME need to take into account that the datasets
            # could have been padded in distributed setup
            batch_size = video.shape[0]
            metric_logger.update(loss=loss.item())
            metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
            metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)

    print(
        ' * Clip Acc@1 {top1.global_avg:.3f} Clip Acc@5 {top5.global_avg:.3f}'.
        format(top1=metric_logger.acc1, top5=metric_logger.acc5))
    return metric_logger.acc1.global_avg
Пример #2
0
def train_one_epoch(model,
                    optimizer,
                    lr_scheduler,
                    data_loader,
                    epoch,
                    print_freq,
                    checkpoint_fn=None):
    model.train()
    metric_logger = MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value}'))
    metric_logger.add_meter('batch/s',
                            SmoothedValue(window_size=10, fmt='{value:.3f}'))

    header = 'Epoch: [{}]'.format(epoch)

    for step, batched_inputs in enumerate(
            metric_logger.log_every(data_loader, print_freq, header)):
        start_time = time.time()
        loss = model(batched_inputs)

        if checkpoint_fn is not None and np.random.random() < 0.005:
            checkpoint_fn()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        metric_logger.update(loss=loss.item(),
                             lr=optimizer.param_groups[0]["lr"])
        metric_logger.meters['batch/s'].update((time.time() - start_time))
        lr_scheduler.step()

    if checkpoint_fn is not None:
        checkpoint_fn()
Пример #3
0
def train_linear_one_epoch(train_loader, model, criterion, optimizer, config,
                           device):
    log_header = 'EPOCH {}'.format(epoch + 1)
    losses = AverageMeter('Loss', fmt=':.4f')
    top1 = AverageMeter('Top1', fmt=':4.2f')
    top5 = AverageMeter('Top5', fmt=':4.2f')
    lr = AverageMeter('Lr', fmt=":.4f")

    metric_logger = MetricLogger(delimeter=" | ")
    metric_logger.add_meter(losses)
    metric_logger.add_meter(top1)
    metric_logger.add_meter(top5)
    metric_logger.add_meter(lr)

    for step, (img, target) in enumerate(
            metric_logger.log_every(train_loader, config.system.print_freq,
                                    log_header)):
        img = img.to(device)
        target = target.to(device)
        logit = model_sl(img)

        loss = criterion(logit, target)
        acc1, acc5 = accuracy(logit, target, topk=(1, 5))
        lr_ = optimizer.param_groups[0]['lr']

        metric_logger.update(Loss=loss.detach().cpu().item(),
                             Top1=acc1.detach().cpu().item(),
                             Top5=acc5.detach().cpu().item(),
                             Lr=lr_)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
Пример #4
0
def evaluate(model, epoch, criterion, data_loader, device, writer):
    model.eval()
    metric_logger = MetricLogger(delimiter="  ")
    header = 'Test:'
    cntr = 0
    running_accuracy = 0.0
    with torch.no_grad():
        for video, target in metric_logger.log_every(data_loader, 100, header):
            video = video.to(device, non_blocking=True)
            target = target.to(device, non_blocking=True)
            output = model(video)
            loss = criterion(output, target)

            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            # FIXME need to take into account that the datasets
            # could have been padded in distributed setup
            batch_size = video.shape[0]
            running_accuracy += acc1.item()
            if cntr % 10 == 9:  # average loss over the accumulated mini-batch
                writer.add_scalar('validation accuracy',
                                  running_accuracy / 10,
                                  epoch * len(data_loader) + cntr)
                running_accuracy = 0.0
            cntr += 1
            metric_logger.update(loss=loss.item())
            metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
            metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)

    print(' * Clip Acc@1 {top1.global_avg:.3f} Clip Acc@5 {top5.global_avg:.3f}'
          .format(top1=metric_logger.acc1, top5=metric_logger.acc5))
    return metric_logger.acc1.global_avg
Пример #5
0
def train_one_epoch(train_loader, model, criterion, optimizer, writer, epoch,
                    total_step, config):
    log_header = 'EPOCH {}'.format(epoch)
    losses = AverageMeter('Loss', fmt=':.4f')
    if config.method != 'byol':
        top1 = AverageMeter('Acc1', fmt=':4.2f')
        top5 = AverageMeter('Acc5', fmt=':4.2f')
    lr = AverageMeter('Lr', fmt=":.6f")

    metric_logger = MetricLogger(delimeter=" | ")
    metric_logger.add_meter(losses)
    if config.method != 'byol':
        metric_logger.add_meter(top1)
        metric_logger.add_meter(top5)
    metric_logger.add_meter(lr)
    # ce = nn.CrossEntropyLoss().cuda(config.system.gpu)
    # num_steps_per_epoch = int(len(train_loader.dataset) // config.train.batch_size)
    # global_step = num_steps_per_epoch * epoch
    for step, (images, _) in enumerate(
            metric_logger.log_every(train_loader, config.system.print_freq,
                                    log_header)):
        total_step.val += 1
        if config.system.gpu is not None:
            images[0] = images[0].cuda(config.system.gpu, non_blocking=True)
            images[1] = images[1].cuda(config.system.gpu, non_blocking=True)

        # [pos, neg]
        # output = model(view_1=images[0], view_2=images[1])
        # loss, logits, targets = criterion(output)
        if config.method != 'byol':
            logits, targets, logits_original = model(view_1=images[0],
                                                     view_2=images[1])
            loss = criterion(logits, targets)
            acc1, acc5 = accuracy(logits_original, targets, topk=(1, 5))
        else:
            loss_pre = model(view_1=images[0], view_2=images[1])
            loss = loss_pre.mean()

        lr_ = optimizer.param_groups[0]['lr']

        if config.method != 'byol':
            metric_logger.update(Loss=loss.detach().cpu().item(),
                                 Acc1=acc1.detach().cpu().item(),
                                 Acc5=acc5.detach().cpu().item(),
                                 Lr=lr_)
        else:
            metric_logger.update(Loss=loss.detach().cpu().item(), Lr=lr_)

        writer.add_scalar('loss', loss.detach().cpu().item(), total_step.val)
        if config.method != 'byol':
            writer.add_scalar('top1',
                              acc1.detach().cpu().item(), total_step.val)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
Пример #6
0
def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
    model.train()
    metric_logger = MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr',
                            SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    lr_scheduler = None
    if epoch == 0:
        warmup_factor = 1. / 1000
        warmup_iters = min(1000, len(data_loader) - 1)

        lr_scheduler = warmup_lr_scheduler(optimizer, warmup_iters,
                                           warmup_factor)

    for images, targets in metric_logger.log_every(data_loader, print_freq,
                                                   header):
        images = list(image.to(device) for image in images)
        #images = list(np.array(img) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        if not math.isfinite(loss_value):
            print("Loss is {}, stopping training".format(loss_value))
            print(loss_dict_reduced)
            sys.exit(1)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if lr_scheduler is not None:
            lr_scheduler.step()

        metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
        metric_logger.update(lr=optimizer.param_groups[0]["lr"])

    return metric_logger
Пример #7
0
def evaluate(model, data_loader, device):
    n_threads = torch.get_num_threads()
    # FIXME remove this and make paste_masks_in_image run on the GPU
    torch.set_num_threads(1)
    cpu_device = torch.device("cpu")
    model.eval()
    metric_logger = MetricLogger(delimiter="  ")
    header = 'Test:'

    coco = get_coco_api_from_dataset(data_loader.dataset)
    iou_types = _get_iou_types(model)
    coco_evaluator = CocoEvaluator(coco, iou_types)

    for images, targets in metric_logger.log_every(data_loader, 100, header):
        images = list(img.to(device) for img in images)

        if torch.cuda.is_available():
            torch.cuda.synchronize()
        model_time = time.time()
        outputs = model(images)

        outputs = [{k: v.to(cpu_device)
                    for k, v in t.items()} for t in outputs]
        model_time = time.time() - model_time

        res = {
            target["image_id"].item(): output
            for target, output in zip(targets, outputs)
        }
        evaluator_time = time.time()
        coco_evaluator.update(res)
        evaluator_time = time.time() - evaluator_time
        metric_logger.update(model_time=model_time,
                             evaluator_time=evaluator_time)

    # gather the stats from all processes
    metric_logger.synchronize_between_processes()
    print("Averaged stats:", metric_logger)
    coco_evaluator.synchronize_between_processes()

    # accumulate predictions from all images
    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    torch.set_num_threads(n_threads)
    return coco_evaluator
Пример #8
0
def train_one_epoch(model, criterion, optimizer, lr_scheduler, data_loader,
                    device, epoch, print_freq, writer):
    model.train()
    metric_logger = MetricLogger(delimiter="  ")
    metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value}'))
    metric_logger.add_meter('clips/s',
                            SmoothedValue(window_size=10, fmt='{value:.3f}'))
    running_loss = 0.0
    running_accuracy = 0.0
    header = 'Epoch: [{}]'.format(epoch)
    cntr = 0
    for video, target in metric_logger.log_every(data_loader, print_freq,
                                                 header):
        start_time = time.time()
        video, target = video.to(device), target.to(device)
        output = model(video)
        loss = criterion(output, target)

        optimizer.zero_grad()

        loss.backward()
        optimizer.step()

        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        batch_size = video.shape[0]
        running_loss += loss.item()
        running_accuracy += acc1.item()
        if cntr % 10 == 9:  #average loss over the accumulated mini-batch
            writer.add_scalar('training loss', running_loss / 10,
                              epoch * len(data_loader) + cntr)
            writer.add_scalar('learning rate', optimizer.param_groups[0]["lr"],
                              epoch * len(data_loader) + cntr)
            writer.add_scalar('accuracy', running_accuracy / 10,
                              epoch * len(data_loader) + cntr)
            running_loss = 0.0
            running_accuracy = 0.0
        cntr = cntr + 1
        metric_logger.update(loss=loss.item(),
                             lr=optimizer.param_groups[0]["lr"])
        metric_logger.meters['acc1'].update(acc1.item(), n=batch_size)
        metric_logger.meters['acc5'].update(acc5.item(), n=batch_size)
        metric_logger.meters['clips/s'].update(batch_size /
                                               (time.time() - start_time))
        lr_scheduler.step()
Пример #9
0
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(params, lr=lr, momentum=momentum, weight_decay=weight_decay)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)


# train
for epoch in range(num_epochs):
    metric_logger = MetricLogger(delimiter=' ')
    metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}'))
    header = 'Epoch: [{}]'.format(epoch)

    model.train()
    for images, targets in metric_logger.log_every(train_data_loader, print_freq, header):
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        loss_dict_reduced = utils.reduce_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())

        loss_value = losses_reduced.item()

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        lr_scheduler.step()