Exemplo n.º 1
0
    mrcnn_start = time.time()
    image = skimage.io.imread(os.path.join(IMAGE_DIR, img_name))
    #image = skimage.io.imread('/home/ubuntueric/darknet_1204/ITRI_test/00094_PT.jpg')

    # Run detection
    results = model.detect([image], verbose=1)
    r = results[0]
    # Time end
    mrcnn_end = time.time()
    total_mrcnn_time = total_mrcnn_time + (mrcnn_end - mrcnn_start)

    # Visualize results
    visualize.save_image(image,
                         img_name,
                         r['rois'],
                         r['masks'],
                         r['class_ids'],
                         r['scores'],
                         coco_class_names,
                         filter_classs_names=None,
                         scores_thresh=0.1,
                         save_dir=RESULT_DIR,
                         mode=1)

fps = frame_number / total_mrcnn_time
print("\n# ------------------------------------ #")
print("# Processed Frames: %d" % frame_number)
print("# Cost Time: %.3f" % total_mrcnn_time)
print("# FPS: %.1f" % fps)
print("# ------------------------------------ #\n")
def test(test_loader, model, criterion_mse,criterion_cross_entropy, criterion_landmark ,optimizer, epoch, use_cuda,test_batch):
    global best_loss
    
    model.eval()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    
    end = time.time()
    bar = Bar('Processing', max=len(test_loader))
    count = 0

    for batch_idx, [batch_lr_img, batch_sr_img, batch_lbl, batch_landmark] in enumerate(test_loader):
        with torch.no_grad():
    
            # measure data loading time
            data_time.update(time.time() - end)
            
            if use_cuda:
                batch_lr_img, batch_sr_img, batch_lbl, batch_landmark = batch_lr_img.cuda(), batch_sr_img.cuda(), batch_lbl.cuda(), batch_landmark.cuda()
            batch_lr_img, batch_sr_img, batch_lbl, batch_landmark = Variable(batch_lr_img), Variable(batch_sr_img), Variable(batch_lbl), Variable(batch_landmark)
            
            # compute output
            coarse_out, out_sr, out_landmark, out_lbl = model(batch_lr_img)
            loss = (7.0*criterion_mse(out_sr, batch_sr_img) + 7.0*criterion_mse(coarse_out, batch_sr_img) + criterion_landmark(out_landmark,batch_landmark) + criterion_cross_entropy(out_lbl, batch_lbl))/(2.0*test_batch)
             
            # ---------------------------------------------------------------------------
            #train coarse sr network 
            #out,coarse_out = model(batch_lr_img)
            #loss = criterion_mse(coarse_out,batch_sr_img)/(2.0*test_batch) 
            # rand_id = random.randint(0,4)
            #-------------------------------------------------------------------------
            #train prior estimation network
            # out,landmark_out,parsing_out = model(batch_sr_img)
            # loss = (criterion_landmark(landmark_out,batch_landmark)+criterion_cross_entropy(parsing_out,batch_lbl))/(2.0*test_batch)
            losses.update(loss.data, batch_lr_img.size(0))

            count += 1
            if count%90 ==0:
                #rand_id = random.randint(0, 4)
                ## count = 0
                random_img, random_landmark, random_parsing, random_coarse, sr_img, lr_img = out_sr[0], out_landmark[0], \
                                                                                             out_lbl[0], coarse_out[0], \
                                                                                             batch_sr_img[0], \
                                                                                             batch_lr_img[0]
                random_img, random_landmark, random_parsing ,random_coarse= random_img.detach().cpu().numpy(), random_landmark.detach().cpu().numpy(), random_parsing.max(dim=0)[1].detach().cpu().numpy() , random_coarse.detach().cpu().numpy()
                sr_img = sr_img.detach().cpu().numpy()
                lr_img = lr_img.detach().cpu().numpy()

                visualize.save_image(random_coarse, random_img, random_landmark, random_parsing, lr_img, sr_img, epoch,
                                     if_train=False, count=int(count / 90))

                ##-----------------------------------------------------------------------
                ##visualize coarse network
                #random_coarse = coarse_out[0]
                #random_coarse = random_coarse.detach().cpu().numpy()
                #visualize.save_image(coarse_image=random_coarse,epoch=epoch,if_train=True,count=int(count/90))        

                # random_landmark = landmark_out[0]
                # random_parsing = parsing_out[0]
                #random_coarse = random_coarse[0].detach().cpu().numpy()
                # random_landmark = random_landmark.detach().cpu().numpy()
                # random_parsing = random_parsing.max(dim=0)[1].detach().cpu().numpy()
                # visualize.save_image(landmark=random_landmark,parsing=random_parsing,epoch=epoch,if_train=False,count=int(count/5))

            losses.update(loss.data, batch_lr_img.size(0))
            
            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
        
        # plot progress
        bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Loss: {loss:.6f}'.format(
            batch=batch_idx + 1,
            size=len(test_loader),
            data=data_time.avg,
            bt=batch_time.avg,
            loss=losses.avg,
        )
        print(bar.suffix)
        bar.next()
    
    bar.finish()
    return losses.avg
def train(train_loader, model, criterion_mse, criterion_cross_entropy,
          criterion_landmark, criterion_mmd, overall_optim, coarse_optim,
          prior_optim, encoder_optim, decoder_optim, discriminator_optim,
          epoch, gan_epochs, use_cuda, train_batch, lr):
    model.train()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    gan_losses = AverageMeter()
    coarse_losses = AverageMeter()
    prior_losses = AverageMeter()
    encoder_losses = AverageMeter()
    decoder_losses = AverageMeter()

    end = time.time()
    bar = Bar('Processing', max=len(train_loader))
    count = 0

    for batch_idx, [batch_lr_img, batch_sr_img, batch_lbl,
                    batch_landmark] in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        # pdb.set_trace()
        if use_cuda:
            batch_lr_img, batch_sr_img, batch_lbl, batch_landmark = batch_lr_img.cuda(
            ), batch_sr_img.cuda(), batch_lbl.cuda(), batch_landmark.cuda()
        batch_lr_img, batch_sr_img, batch_lbl, batch_landmark = Variable(batch_lr_img), Variable(batch_sr_img), \
                                                                Variable(batch_lbl), Variable(batch_landmark)

        out_sr, out_coarse, out_landmark, out_lbl, out_embedding1, out_embedding2 = model(
            batch_lr_img, batch_sr_img)
        gan_loss = -criterion_mmd(out_embedding1, out_embedding2)
        gan_losses.update(gan_loss.data.cpu().numpy(), batch_lr_img.size(0))
        discriminator_optim.zero_grad()
        gan_loss.backward(retain_graph=True)
        discriminator_optim.step()

        coarse_loss = 12. * criterion_mse(out_coarse, batch_sr_img)
        coarse_losses.update(coarse_loss.data.cpu().numpy())
        coarse_optim.zero_grad()
        coarse_loss.backward(retain_graph=True)
        coarse_optim.step()

        encoder_loss = 10.0 * criterion_mse(out_sr, batch_sr_img) - gan_loss
        encoder_losses.update(encoder_loss.data.cpu().numpy())
        encoder_optim.zero_grad()
        encoder_loss.backward(retain_graph=True)
        encoder_optim.step()

        prior_loss = -gan_loss + criterion_mse(
            out_sr, batch_sr_img) + criterion_landmark(
                out_landmark, batch_landmark) + 1.0 * criterion_cross_entropy(
                    out_lbl, batch_lbl)
        prior_losses.update(prior_loss.data.cpu().numpy())
        prior_optim.zero_grad()
        prior_loss.backward(retain_graph=True)
        prior_optim.step()

        decoder_loss = 10. * criterion_mse(out_sr, batch_sr_img)
        decoder_losses.update(decoder_loss.data.cpu().numpy())
        decoder_optim.zero_grad()
        decoder_loss.backward()
        decoder_optim.step()

        loss = (8. * criterion_mse(out_sr, batch_sr_img) +
                8. * criterion_mse(out_coarse, batch_sr_img) +
                criterion_landmark(out_landmark, batch_landmark) +
                criterion_cross_entropy(out_lbl, batch_lbl) + criterion_mmd(
                    out_embedding1, out_embedding2)) / (2.0 * train_batch)
        loss = loss.data.cpu().numpy()
        losses.update(loss, batch_lr_img.size(0))

        batch_time.update(time.time() - end)
        end = time.time()

        # plot progress
        bar.suffix = '(Epoch: {epoch} | Learning Rate: {lr:.8f} | {batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total Loss: {loss:.6f} | ' \
                     'Gan Loss: {gan_loss:.6f} | Coarse Loss: {coarse_loss:.6f} | Encoder Loss: {encoder_loss:.6f} | Decoder Loss: {decoder_loss:.6f} | Prior Loss: {prior_loss:.6f}'.format(
            epoch=epoch,
            lr=lr,
            batch=batch_idx + 1,
            size=len(train_loader),
            data=data_time.avg,
            bt=batch_time.avg,
            loss=losses.avg,
            gan_loss=gan_losses.avg,
            coarse_loss=coarse_losses.avg,
            encoder_loss = encoder_losses.avg,
            decoder_loss = decoder_losses.avg,
            prior_loss = prior_losses.avg,
        )
        print(bar.suffix)

        count += 1

        if count % 300 == 0:
            ## count = 0
            # rand_id = random.randint(0, 4)
            random_img, random_landmark, random_parsing, random_coarse, sr_img, lr_img = \
                out_sr[0], out_landmark[0], out_lbl[0], out_coarse[0], batch_sr_img[0], batch_lr_img[0]
            ## pdb.set_trace()
            random_img, random_landmark, random_parsing, random_coarse = random_img.detach().cpu().numpy(), random_landmark.detach().cpu().numpy(), \
                                                                         random_parsing.max(dim=0)[
                                                                             1].detach().cpu().numpy(), random_coarse.detach().cpu().numpy()
            sr_img = sr_img.detach().cpu().numpy()
            lr_img = lr_img.detach().cpu().numpy()
            ## pdb.set_trace()
            visualize.save_image(random_coarse,
                                 random_img,
                                 random_landmark,
                                 random_parsing,
                                 lr_img,
                                 sr_img,
                                 epoch,
                                 if_train=True,
                                 count=int(count / 300))
        bar.next()

    bar.finish()
    return losses.avg
def train(train_loader, model, criterion_mse, criterion_cross_entropy,criterion_landmark,optimizer, epoch, use_cuda,train_batch,lr):
    model.train()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    NormMS = AverageMeter()
    end = time.time()
    bar = Bar('Processing', max=len(train_loader))
    count = 0

    for batch_idx, [batch_lr_img, batch_sr_img, batch_lbl, batch_landmark] in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        # pdb.set_trace() 
        if use_cuda:
            batch_lr_img, batch_sr_img, batch_lbl, batch_landmark = batch_lr_img.cuda(), batch_sr_img.cuda(), batch_lbl.cuda(), batch_landmark.cuda()
        batch_lr_img, batch_sr_img, batch_lbl, batch_landmark = Variable(batch_lr_img), Variable(batch_sr_img), \
                                                                Variable(batch_lbl), Variable(batch_landmark)
        
        #--------------------------------------------------------------------------
        #train overall network   
        # compute output
        coarse_out, out_sr, out_landmark, out_lbl = model(batch_lr_img)
        # pdb.set_trace()
        loss = (5.*criterion_mse(out_sr, batch_sr_img) + 5.*criterion_mse(coarse_out, batch_sr_img) + \
               criterion_landmark(out_landmark,batch_landmark) + criterion_cross_entropy(out_lbl, batch_lbl))/(2.0*train_batch)
        # pdb.set_trace() 
        
        ## ---------------------------------------------------------------------------
        ##train coarse sr network 
        #out,coarse_out = model(batch_lr_img)
        #loss = (criterion_mse(coarse_out,batch_sr_img))/(2.0*train_batch)
        
        #-------------------------------------------------------------------------
        #train prior estimation network
        # out,landmark_out,parsing_out = model(batch_sr_img)
        # loss = (criterion_landmark(landmark_out,batch_landmark)+criterion_cross_entropy(parsing_out,batch_lbl))/(2.0*train_batch)
        losses.update(loss.data, batch_lr_img.size(0))
    
        # compute gradient and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
    
        # plot progress
        bar.suffix = '(Epoch: {epoch} | Learning Rate: {lr:.8f} | {batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Loss: {loss:.6f}'.format(
			epoch = epoch,
			lr=lr,
            batch=batch_idx + 1,
            size=len(train_loader),
            data=data_time.avg,
            bt=batch_time.avg,
            loss=losses.avg,
        )
        print(bar.suffix)
        
        count += 1
        if count% 200 ==0:
            ## count = 0
            #rand_id = random.randint(0, 4)
            random_img, random_landmark, random_parsing,random_coarse,sr_img,lr_img = out_sr[0], out_landmark[0], out_lbl[0],coarse_out[0],batch_sr_img[0],batch_lr_img[0]
            ## pdb.set_trace()
            random_img, random_landmark, random_parsing,random_coarse= random_img.detach().cpu().numpy(), random_landmark.detach().cpu().numpy(), random_parsing.max(dim=0)[1].detach().cpu().numpy(), random_coarse.detach().cpu().numpy()
            sr_img = sr_img.detach().cpu().numpy()
            lr_img = lr_img.detach().cpu().numpy()
            ## pdb.set_trace()
            visualize.save_image(random_coarse,random_img, random_landmark, random_parsing,lr_img,sr_img, epoch,if_train=True,count=int(count/200))
            ##-----------------------------------------------------------------------
            #visualize coarse network
            #random_coarse = coarse_out[0]
            # random_landmark = landmark_out[0]
            # random_parsing = parsing_out[0]
            # #random_coarse = random_coarse.detach().cpu().numpy()
            # random_landmark = random_landmark.detach().cpu().numpy()
            # random_parsing = random_parsing.max(dim=0)[1].detach().cpu().numpy()
            # visualize.save_image(landmark=random_landmark,parsing=random_parsing,epoch=epoch,if_train=True,count=int(count/100))
        bar.next()

    bar.finish()
    return losses.avg
Exemplo n.º 5
0
def main(conf: DictConfig):
    print(conf.pretty())

    if 'seed' in conf and conf.seed:
        torch.manual_seed(conf.seed)
    if 'gpu' in conf:
        torch.cuda.set_device(conf.gpu)

    device = torch.device('cuda')

    out_dir = conf.out.get('dir', os.path.join(os.getcwd(), 'test_prediction'))
    make_dir(out_dir)

    dl = create_data_loader(conf.data)

    model = instantiate(conf.model).to(device)
    assert conf.model.params.bench_name == "predict"
    state_dict = torch.load(conf.model.weights)
    model.model.load_state_dict(state_dict)
    model.eval()
    model.requires_grad_(False)

    num_images_to_save = conf.out.num_images
    image_dir = conf.out.get('image_dir', os.path.join(os.getcwd(), 'images'))
    save_images = num_images_to_save > 0
    if save_images:
        if not os.path.isabs(image_dir):
            image_dir = os.path.join(out_dir, image_dir)
            make_dir(image_dir)
        logging.info("Saving images to {}".format(image_dir))

    min_score = conf.get("min_score", -1)
    use_tta = conf.tta.enabled
    iou_threshold = conf.tta.iou_threshold
    skip_threshold = conf.tta.skip_threshold
    mean, std = mean_std_tensors(conf.data, device)

    files = os.listdir(conf.data.params.image_dir)
    df = pd.DataFrame(np.empty((len(files), 2)),
                      columns=["image_id", "PredictionString"])
    i_image = 0
    s_image = 0

    for images, image_ids, metadata in tqdm(dl, desc="Predict"):
        images_gpu = images.to(device).float().sub_(mean).div_(std)
        img_scale = metadata['img_scale'].to(dtype=torch.float, device=device)
        assert len(metadata['img_size']) == 2
        img_size = torch.stack(metadata['img_size'],
                               dim=1).to(dtype=torch.float, device=device)

        predictions = model(images_gpu, img_scale, img_size).cpu()
        assert len(image_ids) == len(predictions)
        del images_gpu

        if use_tta:
            from utils.tta import combine_tta, ensemble_predictions
            predictions[:, :, [2, 3]] += predictions[:, :, [0, 1]]
            predictions = [predictions]

            for tta in combine_tta(1024):
                images_tta = tta(images)
                images_gpu = images_tta.to(device).float().sub_(mean).div_(std)
                predictions_tta = model(images_gpu, img_scale, img_size).cpu()
                N = images_gpu.size(0)
                boxes_tta = predictions_tta[..., :4].reshape(-1, 4)
                boxes_tta = tta.prepare_boxes(boxes_tta, box_format="coco")
                boxes_tta = tta.decode(boxes_tta)
                predictions_tta[:, :, :4] = torch.tensor(boxes_tta).reshape(
                    N, -1, 4)
                predictions.append(predictions_tta)
                del images_gpu, images_tta, predictions_tta, boxes_tta

            predictions = ensemble_predictions(
                predictions,
                iou_threshold=iou_threshold,
                skip_box_threshold=skip_threshold)

        del img_size
        if save_images:
            images = images.permute(0, 2, 3, 1)
            images = images.cpu().numpy().astype(np.uint8).copy()
            img_scale = img_scale.cpu().numpy()
        else:
            images, img_scale = None, None

        for j, image_id in enumerate(image_ids):
            scores_i = predictions[j][:, 4]
            pred_i = predictions[j][scores_i >= min_score]

            df.iloc[i_image, 0] = image_id
            df.iloc[i_image, 1] = stringify(pred_i)
            i_image += 1

            if save_images and s_image < num_images_to_save:
                image = images[j]
                boxes = pred_i[:, :4] / img_scale[j]
                draw_bboxes(image, boxes, (0, 255, 0), box_format='coco')
                path = os.path.join(image_dir, '%s.png' % image_id)
                save_image(image, path)
                s_image += 1
                del image, boxes

            del pred_i, scores_i

        del predictions

    logging.info("Saving {} to {}".format(conf.out.file, out_dir))
    path = os.path.join(out_dir, conf.out.file)
    df.to_csv(path, index=False)
    print("DONE")