Exemplo n.º 1
0
def validate(val_dataloader, model, configs):
    losses = AverageMeter('Loss', ':.4e')
    criterion = Compute_Loss(device=configs.device)
    # switch to train mode
    model.eval()
    with torch.no_grad():
        for batch_idx, batch_data in enumerate(tqdm(val_dataloader)):
            metadatas, imgs, targets = batch_data
            batch_size = imgs.size(0)
            for k in targets.keys():
                targets[k] = targets[k].to(configs.device, non_blocking=True)
            imgs = imgs.to(configs.device, non_blocking=True).float()
            outputs = model(imgs)
            total_loss, loss_stats = criterion(outputs, targets)
            # For torch.nn.DataParallel case
            if (not configs.distributed) and (configs.gpu_idx is None):
                total_loss = torch.mean(total_loss)

            if configs.distributed:
                reduced_loss = reduce_tensor(total_loss.data, configs.world_size)
            else:
                reduced_loss = total_loss.data
            losses.update(to_python_float(reduced_loss), batch_size)

    return losses.avg
Exemplo n.º 2
0
def train_one_epoch(train_dataloader, model, optimizer, lr_scheduler, epoch, configs, logger, tb_writer):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')

    progress = ProgressMeter(len(train_dataloader), [batch_time, data_time, losses],
                             prefix="Train - Epoch: [{}/{}]".format(epoch, configs.num_epochs))

    criterion = Compute_Loss(device=configs.device)
    num_iters_per_epoch = len(train_dataloader)
    # switch to train mode
    model.train()
    start_time = time.time()
    for batch_idx, batch_data in enumerate(tqdm(train_dataloader)):
        data_time.update(time.time() - start_time)
        metadatas, imgs, targets = batch_data
        batch_size = imgs.size(0)
        global_step = num_iters_per_epoch * (epoch - 1) + batch_idx + 1
        for k in targets.keys():
            targets[k] = targets[k].to(configs.device, non_blocking=True)
        imgs = imgs.to(configs.device, non_blocking=True).float()
        outputs = model(imgs)
        total_loss, loss_stats = criterion(outputs, targets)
        # For torch.nn.DataParallel case
        if (not configs.distributed) and (configs.gpu_idx is None):
            total_loss = torch.mean(total_loss)

        # compute gradient and perform backpropagation
        total_loss.backward()
        if global_step % configs.subdivisions == 0:
            optimizer.step()
            # zero the parameter gradients
            optimizer.zero_grad()

            # ######################### Sersy #########################################
            # Adjust learning rate
            # if configs.step_lr_in_epoch:
            #     lr_scheduler.step()
            #     if tb_writer is not None:
            #         tb_writer.add_scalar('LR', lr_scheduler.get_lr()[0], global_step)

        if configs.distributed:
            reduced_loss = reduce_tensor(total_loss.data, configs.world_size)
        else:
            reduced_loss = total_loss.data
        losses.update(to_python_float(reduced_loss), batch_size)
        # measure elapsed time
        # torch.cuda.synchronize()
        batch_time.update(time.time() - start_time)

        if tb_writer is not None:
            if (global_step % configs.tensorboard_freq) == 0:
                loss_stats['avg_loss'] = losses.avg
                tb_writer.add_scalars('Train', loss_stats, global_step)
        # Log message
        if logger is not None:
            if (global_step % configs.print_freq) == 0:
                logger.info(progress.get_message(batch_idx))

        start_time = time.time()
Exemplo n.º 3
0
def validate(val_dataloader, model, configs):
    losses = AverageMeter('Loss', ':.4e')
    criterion = Compute_Loss(device=configs.device)
    # switch to train mode
    model.eval()
    with torch.no_grad():
        for batch_idx, batch_data in enumerate(tqdm(val_dataloader)):
            metadatas, targets = batch_data
            batch_size = len(metadatas['img_path'])
            voxelinput = metadatas['voxels']
            coorinput = metadatas['coors']
            numinput = metadatas['num_points']

            for k in targets.keys():
                targets[k] = targets[k].to(configs.device, non_blocking=True)
            #imgs = imgs.to(configs.device, non_blocking=True).float()

            dtype = torch.float32
            voxelinputr = torch.tensor(voxelinput,
                                       dtype=torch.float32,
                                       device=configs.device).to(dtype)

            coorinputr = torch.tensor(coorinput,
                                      dtype=torch.int32,
                                      device=configs.device)

            numinputr = torch.tensor(numinput,
                                     dtype=torch.int32,
                                     device=configs.device)

            try:
                outputs = model(voxelinputr, coorinputr, numinputr)
            except RuntimeError as exception:
                if "out of memory" in str(exception):
                    print("WARNING: out of memory")
                    print('###############################3')
                    if hasattr(torch.cuda, 'empty_cache'):
                        torch.cuda.empty_cache()
                else:
                    print('###############################3')
                    raise exception

            #outputs = model(voxelinputr, coorinputr, numinputr)

            total_loss, loss_stats = criterion(outputs, targets)
            # For torch.nn.DataParallel case
            if (not configs.distributed) and (configs.gpu_idx is None):
                total_loss = torch.mean(total_loss)

            if configs.distributed:
                reduced_loss = reduce_tensor(total_loss.data,
                                             configs.world_size)
            else:
                reduced_loss = total_loss.data
            losses.update(to_python_float(reduced_loss), batch_size)

    return losses.avg
Exemplo n.º 4
0
def train_one_epoch(train_dataloader, model, optimizer, lr_scheduler, epoch,
                    configs, logger, tb_writer):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')

    progress = ProgressMeter(len(train_dataloader),
                             [batch_time, data_time, losses],
                             prefix="Train - Epoch: [{}/{}]".format(
                                 epoch, configs.num_epochs))

    criterion = Compute_Loss(device=configs.device)
    num_iters_per_epoch = len(train_dataloader)
    # switch to train mode
    model.train()
    start_time = time.time()
    for batch_idx, batch_data in enumerate(tqdm(train_dataloader)):
        data_time.update(time.time() - start_time)
        metadatas, targets = batch_data
        batch_size = len(metadatas['img_path'])
        '''hetmap = np.array(targets['hm_cen'][0], dtype= np.uint8) * 100
        hetmap = hetmap.transpose(1,2,0)
        hetmap = cv2.resize(hetmap,(800,800))
        global count
        hetmap = hetmap.transpose(2,0,1)
        tb_writer.add_image('traget{}'.format(count), hetmap)'''

        voxelinput = metadatas['voxels']
        coorinput = metadatas['coors']
        numinput = metadatas['num_points']

        global_step = num_iters_per_epoch * (epoch - 1) + batch_idx + 1
        for k in targets.keys():
            targets[k] = targets[k].to(configs.device, non_blocking=True)

        dtype = torch.float32
        voxelinputr = torch.tensor(voxelinput,
                                   dtype=torch.float32,
                                   device=configs.device).to(dtype)

        coorinputr = torch.tensor(coorinput,
                                  dtype=torch.int32,
                                  device=configs.device)

        numinputr = torch.tensor(numinput,
                                 dtype=torch.int32,
                                 device=configs.device)

        #print('coor. {}'.format(coorinputr.shape))

        outputs = model(voxelinputr, coorinputr, numinputr)
        #print(type(outputs))
        #outputs = outputs._asdict()
        '''outhetmap = np.array(outputs['hm_cen'][0].cpu().detach().numpy(), dtype= np.uint8) * 100
        outhetmap = outhetmap.transpose(1,2,0)
        outhetmap = cv2.resize(outhetmap,(800,800))
        outhetmap = outhetmap.transpose(2,0,1)
        tb_writer.add_image('output{}'.format(count), outhetmap)'''

        #count += 1

        #box_preds = outputs.view(batch_size, -1, 7)

        total_loss, loss_stats = criterion(outputs, targets)
        # For torch.nn.DataParallel case
        if (not configs.distributed) and (configs.gpu_idx is None):
            total_loss = torch.mean(total_loss)

        # compute gradient and perform backpropagation
        total_loss.backward()
        if global_step % configs.subdivisions == 0:
            optimizer.step()
            # zero the parameter gradients
            optimizer.zero_grad()
            # Adjust learning rate
            if configs.step_lr_in_epoch:
                lr_scheduler.step()
                if tb_writer is not None:
                    tb_writer.add_scalar('LR',
                                         lr_scheduler.get_lr()[0], global_step)

        if configs.distributed:
            reduced_loss = reduce_tensor(total_loss.data, configs.world_size)
        else:
            reduced_loss = total_loss.data
        losses.update(to_python_float(reduced_loss), batch_size)
        # measure elapsed time
        # torch.cuda.synchronize()
        batch_time.update(time.time() - start_time)

        if tb_writer is not None:
            if (global_step % configs.tensorboard_freq) == 0:
                loss_stats['avg_loss'] = losses.avg
                tb_writer.add_scalars('Train', loss_stats, global_step)
        # Log message
        if logger is not None:
            if (global_step % configs.print_freq) == 0:
                logger.info(progress.get_message(batch_idx))

        start_time = time.time()