Exemplo n.º 1
0
def experiment(exp_name, device, eval_range='all', plot=True):
    config, _, _, _ = load_config(exp_name)
    net, loss_fn = build_model(config, device, train=False)
    state_dict = torch.load(get_model_name(config), map_location=device)
    if config['mGPUs']:
        net.module.load_state_dict(state_dict)
    else:
        net.load_state_dict(state_dict)
    train_loader, val_loader = get_data_loader(
        config['batch_size'],
        config['use_npy'],
        geometry=config['geometry'],
        frame_range=config['frame_range'])

    #Train Set
    train_metrics, train_precisions, train_recalls, _ = eval_batch(
        config, net, loss_fn, train_loader, device, eval_range)
    print("Training mAP", train_metrics['AP'])
    fig_name = "PRCurve_train_" + config['name']
    legend = "AP={:.1%} @IOU=0.5".format(train_metrics['AP'])
    plot_pr_curve(train_precisions, train_recalls, legend, name=fig_name)

    # Val Set
    val_metrics, val_precisions, val_recalls, _ = eval_batch(
        config, net, loss_fn, val_loader, device, eval_range)

    print("Validation mAP", val_metrics['AP'])
    print("Net Fwd Pass Time on average {:.4f}s".format(
        val_metrics['Forward Pass Time']))
    print("Nms Time on average {:.4f}s".format(
        val_metrics['Postprocess Time']))

    fig_name = "PRCurve_val_" + config['name']
    legend = "AP={:.1%} @IOU=0.5".format(val_metrics['AP'])
    plot_pr_curve(val_precisions, val_recalls, legend, name=fig_name)
Exemplo n.º 2
0
def train(config_name, device):
    config, learning_rate, batch_size, max_epochs = load_config(config_name)
    train_data_loader, test_data_loader = get_data_loader(
        batch_size=batch_size,
        use_npy=config['use_npy'],
        frame_range=config['frame_range'])
    net, criterion, optimizer, scheduler = build_model(config,
                                                       device,
                                                       train=True)

    if config['resume_training']:
        saved_ckpt_path = get_model_name(config['old_ckpt_name'])
        net.load_state_dict(torch.load(saved_ckpt_path, map_location=device))
        print("Successfully loaded trained ckpt at {}".format(saved_ckpt_path))

    net.train()
    #net.backbone.conv1.register_forward_hook(printnorm)
    #net.backbone.conv2.register_backward_hook(printgradnorm)

    start_time = time.time()
    for epoch in range(max_epochs):
        train_loss = 0
        num_samples = 0
        scheduler.step()
        print("Learning Rate for Epoch {} is {} ".format(
            epoch + 1, scheduler.get_lr()))
        for i, (input, label_map) in enumerate(train_data_loader):
            input = input.to(device)
            label_map = label_map.to(device)
            optimizer.zero_grad()
            # Forward
            predictions = net(input)
            loss = criterion(predictions, label_map)
            loss.backward()
            optimizer.step()

            train_loss += float(loss)
            num_samples += label_map.shape[0]

        train_loss = train_loss * batch_size / num_samples

        val_loss = validate_batch(net, criterion, batch_size, test_data_loader,
                                  device)

        print("Epoch {}|Time {:.3f}|Training Loss: {}|Validation Loss: {}".
              format(epoch + 1,
                     time.time() - start_time, train_loss, val_loss))

        if (epoch +
                1) == max_epochs or (epoch + 1) % config['save_every'] == 0:
            model_path = get_model_name(config['name'] +
                                        '__epoch{}'.format(epoch + 1))
            torch.save(net.state_dict(), model_path)
            print("Checkpoint saved at {}".format(model_path))

    print('Finished Training')
    end_time = time.time()
    elapsed_time = end_time - start_time
    print("Total time elapsed: {:.2f} seconds".format(elapsed_time))
Exemplo n.º 3
0
def experiment(config_name, device):
    config, _, _, _ = load_config(config_name)
    net, criterion = build_model(config, device, train=False)
    net.load_state_dict(
        torch.load(get_model_name(config['name']), map_location=device))
    net.set_decode(True)
    loader, _ = get_data_loader(batch_size=1,
                                use_npy=config['use_npy'],
                                frame_range=config['frame_range'])
    net.eval()

    image_id = 25
    threshold = config['cls_threshold']

    with torch.no_grad():
        input, label_map = loader.dataset[image_id]
        input = input.to(device)
        label_map = label_map.to(device)
        label_map_unnorm, label_list = loader.dataset.get_label(image_id)

        # Forward Pass
        t_start = time.time()
        pred = net(input.unsqueeze(0)).squeeze_(0)
        print("Forward pass time", time.time() - t_start)

        # Select all the bounding boxes with classification score above threshold
        cls_pred = pred[..., 0]
        activation = cls_pred > threshold

        # Compute (x, y) of the corners of selected bounding box
        num_boxes = int(activation.sum())
        if num_boxes == 0:
            print("No bounding box found")
            return

        corners = torch.zeros((num_boxes, 8))
        for i in range(1, 9):
            corners[:, i - 1] = torch.masked_select(pred[..., i], activation)
        corners = corners.view(-1, 4, 2).numpy()

        scores = torch.masked_select(pred[..., 0], activation).numpy()

        # NMS
        t_start = time.time()
        selected_ids = non_max_suppression(corners, scores,
                                           config['nms_iou_threshold'])
        corners = corners[selected_ids]
        scores = scores[selected_ids]
        print("Non max suppression time:", time.time() - t_start)

        # Visualization
        input_np = input.cpu().numpy()
        plot_bev(input_np, label_list, window_name='GT')
        plot_bev(input_np, corners, window_name='Prediction')
        plot_label_map(cls_pred.numpy())
Exemplo n.º 4
0
def quant_experiment(exp_name, device, epoch):
    config = load_config(exp_name)
    config['augmentation'] = False
    num_bits = config['num_bits']
    net, loss_fn = build_model(config, device, train=False)

    model_path, exist_best_model = get_model_path(config, epoch)
    assert exist_best_model, "There is no model"

    checkpoint = torch.load(model_path, map_location=device)
    weights = checkpoint['model_state_dict']
    if 'weight_scale' in checkpoint.keys():
        weight_scale_list = checkpoint['weight_scale']
        print("weight scale loaded")
        weights, weight_scale_list = weights_quant_with_scale(
            weights, weight_scale_list, num_bits)
        print("weight scale completed")

    if 'act_scale' in checkpoint.keys():
        act_scale_list = checkpoint['act_scale']
        print("act scale loaded")

    if config['mGPUs']:
        net.module.load_state_dict(weights)
    else:
        net.load_state_dict(weights)

    train_loader, val_loader = get_data_loader(config)

    #    train_metrics = evaluation(config, net, loss_fn, train_loader, device)
    #    print("------Training Result------")
    #    print("Prec@1           : ", train_metrics['top1'])
    #    print("Prec@2           : ", train_metrics['top5'])
    #    print("Forward Pass Time: ", train_metrics['Forward Pass Time'])
    #    print("loss             : ", train_metrics['loss'])

    if 'act_scale' in checkpoint.keys():
        val_metrics = quant_evaluation(net, loss_fn, val_loader, device,
                                       act_scale_list, num_bits)
    else:
        val_metrics = evaluation(config, net, loss_fn, val_loader, device)

    print("------Validation Result------")
    print("Prec@1           : ", val_metrics['top1'])
    print("Prec@5           : ", val_metrics['top5'])
    print("Forward Pass Time: ", val_metrics['Forward Pass Time'])
    print("loss             : ", val_metrics['loss'])
Exemplo n.º 5
0
def experiment(exp_name, device, epoch):
    config = load_config(exp_name)
    config['augmentation'] = False
    net, loss_fn = build_model(config, device, train=False)

    model_path, exist_best_model = get_model_path(config, epoch)
    assert exist_best_model, "There is no model"

    checkpoint = torch.load(model_path, map_location=device)
    if 'model_state_dict' in checkpoint.keys():
        weights = checkpoint['model_state_dict']
    else:
        weights = checkpoint

    if config['mGPUs']:
        net.module.load_state_dict(weights)
    else:
        net.load_state_dict(weights)
    train_loader, val_loader = get_data_loader(config)

    #    train_metrics = evaluation(config, net, loss_fn, train_loader, device)
    #    print("------Training Result------")
    #    print("Prec@1           : ", train_metrics['top1'])
    #    print("Prec@2           : ", train_metrics['top5'])
    #    print("Forward Pass Time: ", train_metrics['Forward Pass Time'])
    #    print("loss             : ", train_metrics['loss'])

    val_metrics = evaluation(config, net, loss_fn, val_loader, device)
    print("------Validation Result------")
    print("Prec@1           : ", val_metrics['top1'])
    print("Prec@5           : ", val_metrics['top5'])
    print("Forward Pass Time: ", val_metrics['Forward Pass Time'])
    print("loss             : ", val_metrics['loss'])

    if 'model_state_dict' not in checkpoint.keys():
        torch.save({
            'model_state_dict': weights,
            'top1': val_metrics['top1']
        }, model_path)
        print("model saved at ", model_path)
Exemplo n.º 6
0
def test(exp_name, device, image_id):
    config, _, _, _ = load_config(exp_name)
    net, loss_fn = build_model(config, device, train=False)
    net.load_state_dict(torch.load(get_model_name(config),
                                   map_location=device))
    net.set_decode(True)
    train_loader, val_loader = get_data_loader(
        1,
        config['use_npy'],
        geometry=config['geometry'],
        frame_range=config['frame_range'])
    net.eval()

    with torch.no_grad():
        num_gt, num_pred, scores, pred_image, pred_match, loss, t_forward, t_nms = \
            eval_one(net, loss_fn, config, train_loader, image_id, device, plot=True)

        TP = (pred_match != -1).sum()
        print("Loss: {:.4f}".format(loss))
        print("Precision: {:.2f}".format(TP / num_pred))
        print("Recall: {:.2f}".format(TP / num_gt))
        print("forward pass time {:.3f}s".format(t_forward))
        print("nms time {:.3f}s".format(t_nms))
Exemplo n.º 7
0
def train(exp_name, device):
    # Load Hyperparameters
    config, learning_rate, batch_size, max_epochs = load_config(exp_name)

    # Dataset and DataLoader
    train_data_loader, test_data_loader = get_data_loader(
        batch_size,
        config['use_npy'],
        geometry=config['geometry'],
        frame_range=config['frame_range'])
    # Model
    net, loss_fn, optimizer, scheduler = build_model(config,
                                                     device,
                                                     train=True)

    # Tensorboard Logger
    train_logger = get_logger(config, 'train')
    val_logger = get_logger(config, 'val')

    if config['resume_training']:
        saved_ckpt_path = get_model_name(config)
        if config['mGPUs']:
            net.module.load_state_dict(
                torch.load(saved_ckpt_path, map_location=device))
        else:
            net.load_state_dict(
                torch.load(saved_ckpt_path, map_location=device))
        print("Successfully loaded trained ckpt at {}".format(saved_ckpt_path))
        st_epoch = config['resume_from']
    else:
        # writefile(config, 'train_loss.csv', 'iteration, cls_loss, loc_loss\n')
        # writefile(config, 'val_loss.csv', 'epoch, cls_loss, loc_loss\n')
        st_epoch = 0

    step = 1 + st_epoch * len(train_data_loader)
    cls_loss = 0
    loc_loss = 0
    for epoch in range(st_epoch, max_epochs):
        start_time = time.time()

        train_loss = 0

        net.train()
        if config['mGPUs']:
            net.module.set_decode(False)
        else:
            net.set_decode(False)
        scheduler.step()

        for input, label_map, image_id in train_data_loader:

            tic = time.time()  #print('step', step)
            input = input.to(device)
            label_map = label_map.to(device)
            optimizer.zero_grad()

            # Forward
            predictions = net(input)
            loss, cls, loc = loss_fn(predictions, label_map)
            loss.backward()
            optimizer.step()
            cls_loss += cls
            loc_loss += loc
            train_loss += loss.item()

            if step % config['log_every'] == 0:
                cls_loss = cls_loss / config['log_every']
                loc_loss = loc_loss / config['log_every']
                train_logger.scalar_summary('cls_loss', cls_loss, step)
                train_logger.scalar_summary('loc_loss', loc_loss, step)
                cls_loss = 0
                loc_loss = 0

                #for tag, value in net.named_parameters():
                #    tag = tag.replace('.', '/')
                #    train_logger.histo_summary(tag, value.data.cpu().numpy(), step)
                #    train_logger.histo_summary(tag + '/grad', value.grad.data.cpu().numpy(), step)

            step += 1
            #print(time.time() - tic)

        # Record Training Loss
        train_loss = train_loss / len(train_data_loader)
        train_logger.scalar_summary('loss', train_loss, epoch + 1)
        print("Epoch {}|Time {:.3f}|Training Loss: {:.5f}".format(
            epoch + 1,
            time.time() - start_time, train_loss))

        # Run Validation
        # if (epoch +1) % 2 == 0:
        #     tic = time.time()
        #     val_metrics, _, _, log_images = eval_batch(config, net, loss_fn, test_data_loader, device)
        #     for tag, value in val_metrics.items():
        #         val_logger.scalar_summary(tag, value, epoch + 1)
        #     val_logger.image_summary('Predictions', log_images, epoch + 1)
        #     print("Epoch {}|Time {:.3f}|Validation Loss: {:.5f}".format(
        #         epoch + 1, time.time() - tic, val_metrics['loss']))

        # Save Checkpoint
        if (epoch +
                1) == max_epochs or (epoch + 1) % config['save_every'] == 0:
            model_path = get_model_name(config, epoch + 1)
            if config['mGPUs']:
                torch.save(net.module.state_dict(), model_path)
            else:
                torch.save(net.state_dict(), model_path)
            print("Checkpoint saved at {}".format(model_path))

    print('Finished Training')
Exemplo n.º 8
0
def quant_train(exp_name, device, epoch):
    num_bits = 8

    config = load_config(exp_name)
    config['resume_training'] = True
    config['resume_from'] = 0
    max_epochs = config['max_epochs']

    print("make data loader")
    train_data_loader, val_data_loader = get_data_loader(config)

    net, loss_fn, optimizer, scheduler = build_model(config,
                                                     device,
                                                     train=True)

    ckpt_path, exist_model = get_model_path(config, epoch)
    best_top1 = 0
    if exist_model:
        checkpoint = torch.load(ckpt_path)
        if 'top1' in checkpoint.keys():
            best_top1 = checkpoint['top1']
            print("best top1 score is {:.3f}".format(best_top1))
            best_top1 = best_top1 * 0.9  # 90% accuracy
    else:
        print()

    saved_ckpt_path = ckpt_path
    checkpoint = torch.load(saved_ckpt_path, map_location=device)

    if 'model_state_dict' in checkpoint.keys():
        weights = checkpoint['model_state_dict']
    else:
        weights = checkpoint

    if config['mGPUs']:
        net.module.load_state_dict(weights)
    else:
        net.load_state_dict(weights)

    print("Successfully loaded trained ckpt at {}".format(saved_ckpt_path))
    st_epoch = 0

    for g in optimizer.param_groups:
        g['lr'] = config['learning_rate']

    # quant weight
    quant_weights, weight_scale_list = weights_quant(weights,
                                                     num_bits=num_bits)
    #    quant_weights, weight_scale_list = weights_quant_b(net, val_data_loader.dataset, device, weights, num_bits=num_bits)
    net.load_state_dict(quant_weights)

    for epoch in range(st_epoch, max_epochs):
        start_time = time.time()
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()

        net.train()

        print("Epoch {}, learning rate : {}".format(
            epoch + 1,
            scheduler.optimizer.state_dict()['param_groups'][0]['lr']))

        # train
        start = time.time()
        for input, target in tqdm(train_data_loader):
            # measure data loading time
            data_time.update(time.time() - start)

            input_var = input.to(device)
            target_var = target.to(device)

            if config['model'] == 'tf' or config['model'] == 'tf_fused':
                input_var[:, 0, :, :] = (input_var[:, 0, :, :] * 0.229 +
                                         0.485) * 255 - 123.68
                input_var[:, 1, :, :] = (input_var[:, 1, :, :] * 0.224 +
                                         0.456) * 255 - 116.78
                input_var[:, 2, :, :] = (input_var[:, 2, :, :] * 0.225 +
                                         0.406) * 255 - 103.94

            # compute output
            output = net(input_var)
            if output.shape[1] == 1001:
                target_var += 1
            loss = loss_fn(output, target_var)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.data, target_var, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(prec1[0], input.size(0))
            top5.update(prec5[0], input.size(0))

            # compute gradient and optimizer step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # measure elapsed time
            batch_time.update(time.time() - start)
            start = time.time()

        scheduler.step()
        print("Epoch {}|Time {:.3f}|Training Loss: {:.5f}".format(
            epoch + 1,
            time.time() - start_time, losses.avg))
        print(
            "\033[32m training   || Prec@1: {:.3f} | Prec@5: {:.3f} \033[37m".
            format(top1.avg, top5.avg))

        # validation before quantize
        tic_val = time.time()
        val_metrics = evaluation(config, net, loss_fn, val_data_loader, device)
        print("Epoch {}|Time {:.3f}|Validation Loss: {:.5f}".format(
            epoch + 1,
            time.time() - tic_val, val_metrics['loss']))
        print(
            "\033[32m validation || Prec@1: {:.3f} | Prec@5: {:.3f} \033[37m".
            format(val_metrics['top1'], val_metrics['top5']))

        # save recent model

        # quant weight
        if epoch == 0:
            weights = net.state_dict()
            quant_weights, weight_scale_list = weights_quant(weights,
                                                             num_bits=num_bits)
            #            quant_weights, weight_scale_list = weights_quant_b(net, val_data_loader.dataset, device, weights, num_bits=num_bits)
            net.load_state_dict(quant_weights)
        else:
            weights = net.state_dict()
            #            quant_weights, weight_scale_list = weights_quant_with_scale(weights, weight_scale_list, num_bits)
            quant_weights, weight_scale_list = weights_quant(weights,
                                                             num_bits=num_bits)
            net.load_state_dict(quant_weights)

        # validation after quantize
        tic_val = time.time()
        val_metrics = evaluation(config, net, loss_fn, val_data_loader, device)
        print("Epoch {}|Time {:.3f}|Validation Loss: {:.5f}".format(
            epoch + 1,
            time.time() - tic_val, val_metrics['loss']))
        print(
            "\033[32m validation || Prec@1: {:.3f} | Prec@5: {:.3f} \033[37m".
            format(val_metrics['top1'], val_metrics['top5']))

        # save best model
        if val_metrics['top1'] > best_top1:
            if config['mGPUs']:
                torch.save(
                    {
                        'model_state_dict': net.module.state_dict(),
                        'top1': val_metrics['top1'],
                        'weight_scale': weight_scale_list
                    }, ckpt_path[:-4] + '_quant.pth')
            else:
                torch.save(
                    {
                        'model_state_dict': net.state_dict(),
                        'top1': val_metrics['top1'],
                        'weight_scale': weight_scale_list
                    }, ckpt_path[:-4] + '_quant.pth')
            print("\033[32m Best model saved at {}. Prec@1 is {} \033[37m".
                  format(ckpt_path[:-4] + '_quant.pth', val_metrics['top1']))
            best_top1 = val_metrics['top1']
Exemplo n.º 9
0
def train(exp_name, device, epoch):
    config = load_config(exp_name)
    max_epochs = config['max_epochs']

    print("make data loader")
    train_data_loader, val_data_loader = get_data_loader(config)

    net, loss_fn, optimizer, scheduler = build_model(config,
                                                     device,
                                                     train=True)

    best_ckpt_path, exist_best_model = get_model_path(config, "best")
    best_top1 = 0
    if exist_best_model:
        best_checkpoint = torch.load(best_ckpt_path)
        best_top1 = best_checkpoint['top1']
        print("best top1 score is {:.3f}".format(best_top1))

    if config['resume_training']:
        saved_ckpt_path = get_model_path(config, "epoch")
        checkpoint = torch.load(saved_ckpt_path, map_location=device)
        if config['mGPUs']:
            net.module.load_state_dict(checkpoint['model_state_dict'])
        else:
            net.load_state_dict(checkpoint['model_state_dict'])
        print("Successfully loaded trained ckpt at {}".format(saved_ckpt_path))
        st_epoch = config['resume_from']
    else:
        st_epoch = 0

    for g in optimizer.param_groups:
        g['lr'] = config['learning_rate']

    for epoch in range(st_epoch, max_epochs):
        start_time = time.time()
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()

        net.train()

        print("Epoch {}, learning rate : {}".format(
            epoch + 1,
            scheduler.optimizer.state_dict()['param_groups'][0]['lr']))

        # train
        start = time.time()
        for input, target in tqdm(train_data_loader):
            # measure data loading time
            data_time.update(time.time() - start)

            input_var = input.to(device)
            target_var = target.to(device)

            # compute output
            output = net(input_var)
            loss = loss_fn(output, target_var)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.data, target_var, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(prec1[0], input.size(0))
            top5.update(prec5[0], input.size(0))

            # compute gradient and optimizer step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # measure elapsed time
            batch_time.update(time.time() - start)
            start = time.time()

        scheduler.step()
        print("Epoch {}|Time {:.3f}|Training Loss: {:.5f}".format(
            epoch + 1,
            time.time() - start_time, losses.avg))
        print(
            "\033[32m training   || Prec@1: {:.3f} | Prec@5: {:.3f} \033[37m".
            format(top1.avg, top5.avg))

        # validation
        tic_val = time.time()
        val_metrics = evaluation(config, net, loss_fn, val_data_loader, device)
        print("Epoch {}|Time {:.3f}|Validation Loss: {:.5f}".format(
            epoch + 1,
            time.time() - tic_val, val_metrics['loss']))
        print(
            "\033[32m validation || Prec@1: {:.3f} | Prec@5: {:.3f} \033[37m".
            format(val_metrics['top1'], val_metrics['top5']))

        # save best model
        if val_metrics['top1'] > best_top1:
            if config['mGPUs']:
                torch.save(
                    {
                        'model_state_dict': net.module.state_dict(),
                        'top1': val_metrics['top1']
                    }, best_ckpt_path)
            else:
                torch.save(
                    {
                        'model_state_dict': net.state_dict(),
                        'top1': val_metrics['top1']
                    }, best_ckpt_path)
            print("\033[32m Best model saved at {}. Prec@1 is {} \033[37m".
                  format(best_ckpt_path, val_metrics['top1']))
            best_top1 = val_metrics['top1']