Exemplo n.º 1
0
def get_corners(org_img, sol, s, part_path='', draw=False):
    img = org_img.transpose([2, 1, 0])[None, ...]
    img = img.astype(np.float32)
    img = torch.from_numpy(img)
    img = img / 128.0 - 1.0

    img = Variable(img, requires_grad=False, volatile=True).cuda()

    predictions = sol(img)
    if draw:
        org_img = img[0].data.cpu().numpy().transpose([2, 1, 0])
        org_img = ((org_img + 1) * 128).astype(np.uint8)
        c_img = org_img.copy()
        c_img = drawing.draw_sol_torch(predictions, c_img)
        cv2.imwrite("visuals/{}_predictions.png".format(part_path), c_img)

    predictions = predictions.data.cpu().numpy()
    # remove the ones with confidence less than 0.1
    predictions = predictions[predictions[:, :, 0] > 0.1]

    # predictions is a matrix of 2000ish x 5
    # where the first is the confidence and then there are two pairs of x and y coordinates
    # here we are extracting just the first pair since the second is probably a repeat
    confidence = predictions[:, 0]
    predictions = predictions[:, 1:3]
    return predictions, confidence
Exemplo n.º 2
0
    def get_predictions(self, org_img, s, write_path=''):
        img = org_img.transpose([2, 1, 0])[None, ...]
        img = img.astype(np.float32)
        img = torch.from_numpy(img)
        img = img / 128.0 - 1.0

        if torch.cuda.is_available():
            img = img.cuda()

        predictions = self.network(img)
        if write_path is not '':
            org_img = img[0].data.cpu().numpy().transpose([2, 1, 0])
            org_img = ((org_img + 1) * 128).astype(np.uint8)
            c_img = org_img.copy()
            c_img = drawing.draw_sol_torch(predictions, c_img)
            cv2.imwrite(write_path, c_img)

        predictions = predictions.squeeze(0).data.cpu().numpy()
        # print(predictions.shape)

        # predictions is a matrix of 2000ish x 5
        # where the first is the confidence and then there are two pairs of x and y coordinates
        # here we are extracting just the first pair since the second is probably a repeat
        confidence = predictions[:, 0]
        predictions = predictions[:, 1:3]
        return predictions, confidence
Exemplo n.º 3
0
def train(config):

    training_set_list = load_file_list(pretrain_config['training_set'])
    train_dataset = SolDataset(
        training_set_list,
        rescale_range=pretrain_config['sol']['training_rescale_range'],
        transform=CropTransform(pretrain_config['sol']['crop_params']))

    train_dataloader = DataLoader(
        train_dataset,
        batch_size=pretrain_config['sol']['batch_size'],
        shuffle=True,
        num_workers=0,
        collate_fn=sol.sol_dataset.collate)

    batches_per_epoch = int(pretrain_config['sol']['images_per_epoch'] /
                            pretrain_config['sol']['batch_size'])
    train_dataloader = DatasetWrapper(train_dataloader, batches_per_epoch)

    if not os.path.exists("snapshots/sol_train"):
        os.makedirs("snapshots/sol_train")

    solf.train()
    sum_loss = 0.0
    steps = 0.0

    for step_i, x in enumerate(train_dataloader):
        img = Variable(x['img'].type(dtype), requires_grad=False)

        sol_gt = None
        if x['sol_gt'] is not None:
            # This is needed because if sol_gt is None it means that there
            # no GT positions in the image. The alignment loss will handle,
            # it correctly as None
            sol_gt = Variable(x['sol_gt'].type(dtype), requires_grad=False)

        # print((img.shape))
        predictions = solf(img)
        loss = alignment_loss(predictions, sol_gt, x['label_sizes'],
                              alpha_alignment, alpha_backprop)

        org_img = img[0].data.cpu().numpy().transpose([2, 1, 0])
        org_img = ((org_img + 1) * 128).astype(np.uint8)
        org_img = org_img.copy()

        org_img = drawing.draw_sol_torch(predictions, org_img)
        cv2.imwrite("snapshots/sol_train/{}.png".format(step_i), org_img)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        sum_loss += loss.data.cpu().numpy()
        steps += 1
        predictions = None
        loss = None

        gc.collect()

    return sum_loss / steps
Exemplo n.º 4
0
    def val():

        test_set_list = load_file_list(pretrain_config['validation_set'])
        test_dataset = SolDataset(
            test_set_list,
            rescale_range=pretrain_config['sol']['validation_rescale_range'],
            transform=None)
        test_dataloader = DataLoader(test_dataset,
                                     batch_size=1,
                                     shuffle=False,
                                     num_workers=0,
                                     collate_fn=sol.sol_dataset.collate,
                                     pin_memory=True)

        writep = config['evaluation']['output_path'].split('_')[0]
        writep = 'snapshots/{}_val'.format(writep)
        if not os.path.exists(writep):
            os.makedirs(writep)

        solf.eval()
        sum_loss = 0.0
        steps = 0.0

        for step_i, x in enumerate(test_dataloader):
            img = x['img'].type(dtype)
            if x['sol_gt'] is not None:
                # This is needed because if sol_gt is None it means that there
                # no GT positions in the image. The alignment loss will handle,
                # it correctly as None
                sol_gt = x['sol_gt'].type(dtype)

            predictions = solf(img)
            loss = alignment_loss(predictions, sol_gt, x['label_sizes'],
                                  alpha_alignment, alpha_backprop)

            # Write images to file to visualization
            org_img = img[0].data.cpu().numpy().transpose([2, 1, 0])
            org_img = ((org_img + 1) * 128).astype(np.uint8)
            org_img = org_img.copy()
            org_img = drawing.draw_sol_torch(predictions, org_img)
            # out = Image.fromarray((org_img * 255).astype(np.uint8))
            # out.save(os.path.join(writep, "{}.png".format(step_i)))
            cv2.imwrite(os.path.join(writep, "{}.png".format(step_i)), org_img)

            sum_loss += loss.data.cpu().numpy()
            steps += 1
            predictions = None
            loss = None

            gc.collect()

        return sum_loss / steps
Exemplo n.º 5
0
def main():
  with open(sys.argv[1]) as f:
      config = yaml.load(f)

  sol_network_config = config['network']['sol']
  pretrain_config = config['pretraining']
  eval_folder = pretrain_config['validation_set']['img_folder']

  solf = continuous_state.init_model(config)

  if torch.cuda.is_available():
    print("Using GPU")
    solf.cuda()
    dtype = torch.cuda.FloatTensor
  else:
    print("Warning: Not using a GPU, untested")
    dtype = torch.FloatTensor

  writep = config['evaluation']['output_path'].split('_')[0]
  writep = 'data/{}_val'.format(writep)
  if not os.path.exists(writep):
    os.makedirs(writep)

  for fil in os.listdir(eval_folder):
    imgfil = os.path.join(eval_folder, fil)
    org_img = cv2.imread(imgfil, cv2.IMREAD_COLOR)
    if org_img is not None:
      rescale_range = config['pretraining']['sol']['validation_rescale_range']
      target_dim1 = rescale_range[0]

      s = target_dim1 / float(org_img.shape[1])
      target_dim0 = int(org_img.shape[0]/float(org_img.shape[1]) * target_dim1)
      org_img = cv2.resize(org_img,(target_dim1, target_dim0), interpolation=cv2.INTER_CUBIC)

      img = org_img.transpose([2,1,0])[None,...]
      img = img.astype(np.float32)
      img = torch.from_numpy(img)
      img = img / 128.0 - 1.0

      img = Variable(img.type(dtype), requires_grad=False)

      # print((img))
      predictions = solf(img)

      org_img = img[0].data.cpu().numpy().transpose([2,1,0])
      org_img = ((org_img + 1)*128).astype(np.uint8)
      org_img = org_img.copy()
      org_img = drawing.draw_sol_torch(predictions, org_img)
      predictions = None
      cv2.imwrite(os.path.join(writep, fil), org_img)
Exemplo n.º 6
0
def val(config):
    writep = config['evaluation']['output_path'].split('_')[0]
    writep = 'snapshots/{}_val'.format(writep)
    if not os.path.exists(writep):
        os.makedirs(writep)

    with open(pretrain_config['validation_set']['file_list'], 'r') as f:
        evals = json.load(f)

    solf.eval()

    for fil in evals:
        imgfil = os.path.join(eval_folder, fil[1])
        org_img = cv2.imread(imgfil, cv2.IMREAD_COLOR)
        if org_img is not None:
            rescale_range = pretrain_config['sol']['validation_rescale_range']
            target_dim1 = rescale_range[0]

            s = target_dim1 / float(org_img.shape[1])
            target_dim0 = int(org_img.shape[0] / float(org_img.shape[1]) *
                              target_dim1)
            org_img = cv2.resize(org_img, (target_dim1, target_dim0),
                                 interpolation=cv2.INTER_CUBIC)

            img = org_img.transpose([2, 1, 0])[None, ...]
            img = img.astype(np.float32)
            img = torch.from_numpy(img)
            img = img / 128.0 - 1.0

            img = Variable(img.type(dtype), requires_grad=False)

            # print((img))
            predictions = solf(img)

            org_img = img[0].data.cpu().numpy().transpose([2, 1, 0])
            org_img = ((org_img + 1) * 128).astype(np.uint8)
            org_img = org_img.copy()
            org_img = drawing.draw_sol_torch(predictions, org_img)
            cv2.imwrite(os.path.join(writep, fil[1]), org_img)
            predictions = None