Exemplo n.º 1
0
def img_rescale(PNG_img, height, width):
    """
    This function rescales an image to designated height and width,
    returns new img as np array, and save the resacled image as
    .png file in the directory ./scaled_input_image as scaled_PNG_img.
    
    Inputs:
        img - .png image files
        height - expected height after conversion in unit of pixels
        width - expected width after conversion in unit of pixels
    
    Output:
        new_img - an nd array of the rescaled image 
    """
    if not os.path.exists("scaled_input_image"):
        os.mkdir(
            "scaled_input_image")  # new directory to store the scaled images
    img = Image.open(PNG_img)
    img_array = np.array(img)
    scale = T.ScaleTransform(
        np.shape(img_array)[0],
        np.shape(img_array)[1], height, width)
    new_img = scale.apply_image(img_array, "bilinear")
    print("original image shape:", img_array.shape,
          "is scaled to new image shape", new_img.shape)
    img2save = Image.fromarray(new_img)
    img2save.save("./scaled_input_image/scaled_" + PNG_img)
    #plt.imshow(new_img)
    return new_img
Exemplo n.º 2
0
def img_rescale(PNG_img, height, width):
    """
    This function rescales an image to designated height and width,
    returns new img as np array, and save the resacled image as
    .png file in the directory ./scaled_input_image as scaled_PNG_img.
    
    Inputs:
        img - .png image files
        height - expected height after conversion in unit of pixels
        width - expected width after conversion in unit of pixels
    
    Output:
        new_img - an nd array of the rescaled image
        A rescaled .png image saved in ./scaled_input_image directory
    """
    # Assert input is PNG
    assert os.path.splitext(
        PNG_img
    )[1] == ".png", "Input has the wrong file type. A .png image is required."
    if not os.path.exists("scaled_input_image"):
        os.mkdir(
            "scaled_input_image")  # new directory to store the scaled images
    img = Image.open(PNG_img)
    img_array = np.array(img)
    scale = T.ScaleTransform(
        np.shape(img_array)[0],
        np.shape(img_array)[1], height, width)
    new_img = scale.apply_image(img_array, "bilinear")
    print("original image shape:", img_array.shape,
          "is scaled to new image shape", new_img.shape)
    img2save = Image.fromarray(new_img)
    img_name = os.path.split(PNG_img)[-1]
    img2save.save("./scaled_input_image/scaled_" + img_name)
    return new_img
Exemplo n.º 3
0
def img_mask_rescale(img, polygons, height, width):
    """
    Rescale image and masks to designated height and width.
    Return new img as np array, new maks as a list of np array
    """
    scale = T.ScaleTransform(np.shape(img)[0], np.shape(img)[1], height, width)
    new_img = scale.apply_image(img, "bilinear")
    new_poly = scale.apply_polygons(polygons)
    return new_img, new_poly
    def test_transform_RLE_resize(self):
        transforms = T.TransformList(
            [T.HFlipTransform(400), T.ScaleTransform(300, 400, 400, 400, "bilinear")]
        )
        mask = np.zeros((300, 400), order="F").astype("uint8")
        mask[:, :200] = 1

        anno = {
            "bbox": np.asarray([10, 10, 200, 300]),
            "bbox_mode": BoxMode.XYXY_ABS,
            "segmentation": mask_util.encode(mask[:, :, None])[0],
            "category_id": 3,
        }
        output = detection_utils.transform_instance_annotations(
            copy.deepcopy(anno), transforms, (400, 400)
        )

        inst = detection_utils.annotations_to_instances(
            [output, output], (400, 400), mask_format="bitmask"
        )
        self.assertTrue(isinstance(inst.gt_masks, BitMasks))
Exemplo n.º 5
0
  def train_mapper(self,dataset_dict):#,dataset_used):
    # Implement a mapper, similar to the default DatasetMapper, but with your own customizations
    # Create a copy of the dataset dict
    dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below


    ##### Image Transformations #####
    # Read in the image
    image = utils.read_image(dataset_dict["file_name"], format="BGR")
    # fileName = dataset_dict["file_name"]

    
    ## Crop to bounding box ##
    # Crop for all but comparison
    if(self.dataset_used != "comparison" and self.is_crop_to_bbox):
      # Get the bounding box
      bbox = ((dataset_dict["annotations"])[0])["bbox"]
      xmin,ymin,xmax,ymax = bbox
      w = xmax-xmin
      h = ymax-ymin

      # IsCropToBBox = True
      # if(IsCropToBBox):
      # Nudge the crop to be slightly outside of the bounding box
      nudgedXMin = xmin-15
      nudgedYMin = ymin-15
      nudgedW = w+50
      nudgedH = h+50

      # If the bounding boxes go outside of the image dimensions, fix this
      imageHeight = image.shape[0]
      imageWidth  = image.shape[1]
      if(nudgedXMin < 0): nudgedXMin = 0
      if(nudgedYMin < 0): nudgedYMin = 0
      if(nudgedXMin+nudgedW >= imageWidth):  nudgedW = imageWidth-1
      if(nudgedYMin+nudgedH >= imageHeight): nudgedH = imageHeight-1

      # Apply the crop
      cropT = T.CropTransform(nudgedXMin,nudgedYMin,nudgedW,nudgedH)
      image = cropT.apply_image(image)
      
      transforms = T.TransformList([cropT])
    # Comparison has bbox the size of the image, so dont bother cropping
    else:
      # scaled between 0.5 and 1; shifted up to 0.5 in each dimension
      # randomExtant = T.RandomExtent( (0.5,1),(0.5,0.5) )
      # transforms = T.TransformList([randomExtant])
      transforms = T.TransformList([])

    # Apply the crop to the bbox as well
    # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

    dataset_dict["height"] = image.shape[0]
    dataset_dict["width"]  = image.shape[1]
    
    # Add to the list of transforms
    
    # else:
    #   nudgedH = dataset_dict["height"]
    #   nudgedW = dataset_dict["width"]
      

    ## Scale the image size ##
    # thresholdDimension = 1000
    # if(dataset_used == "large"):
      # thresholdDimension = 500
    # thresholdDimension = 800
    # thresholdDimension = 600
    thresholdDimension = self.threshold_dimension

    currWidth  = dataset_dict["width"]
    currHeight = dataset_dict["height"]

    # NOTE: YOLO input size must be multiple of 32
    if(self.modelLink in ["VGG19_BN","YOLOV3"]):
      vgg_im_size = thresholdDimension
      # Apply the scaling transform
      scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=vgg_im_size,new_w=vgg_im_size,interp="nearest") 
      image = scaleT.apply_image(image.copy())

      # Apply the scaling to the bbox
      # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

      # Add this to the list of transforms
      transforms = transforms + scaleT

      # Set the dimensions
      dataset_dict["height"] = image.shape[0]
      dataset_dict["width"]  = image.shape[1]
    else:# Downscale only at this threshold
      if(currHeight > thresholdDimension or currWidth > thresholdDimension):
        myNewH = 0
        myNewW = 0
        # Scale the longest dimension to threshold, other in proportion
        if(currHeight > currWidth): 
          myNewH = thresholdDimension
          ratio = currHeight/float(myNewH)
          myNewW = currWidth/float(ratio)
          myNewW = int(round(myNewW))
          # myNewW = 800
        else:
          # myNewH = 800
          
          myNewW = thresholdDimension
          ratio = currWidth/float(myNewW)
          myNewH = currHeight/float(ratio)
          myNewH = int(round(myNewH))
 

        # Apply the scaling transform
        if(self.fixed_wh):
          scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") 
        else:
          scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") 
        image = scaleT.apply_image(image.copy())

        # Apply the scaling to the bbox
        # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

        # Add this to the list of transforms
        transforms = transforms + scaleT

        # Set the dimensions
        dataset_dict["height"] = image.shape[0]
        dataset_dict["width"]  = image.shape[1]
    
    ## Apply a random flip ##
    image, tfms = T.apply_transform_gens([T.RandomFlip()], image)
    transforms = transforms + tfms

    # Apply Other Transforms ##
    # Standard random image mods
    if(self.dataset_used != "comparison"):
      image, tfms = T.apply_transform_gens([T.RandomBrightness(0.4,1.6),T.RandomContrast(0.4,1.6),T.RandomSaturation(0.5,1.5),T.RandomLighting(1.2)], image)
    # More extreme for comparison set
    else:
      image, tfms = T.apply_transform_gens([T.RandomBrightness(0.2,1.8),T.RandomContrast(0.2,1.8),T.RandomSaturation(0.3,1.7),T.RandomLighting(1.5)], image)
    transforms = transforms + tfms

    ## Apply random affine (actually just a shear) ##
    # Pass in the image size
    PILImage = Image.fromarray(image)

    # Standard affine
    if(self.dataset_used != "comparison"):
      shear_range = 8
      angle_range = 30
      # rand_shear = (np.random.uniform(-shear_range,shear_range),np.random.uniform(-8,8))
      # rand_angle = np.random.uniform(-30,30)
    # More extreme random affine for comparison
    else:
      shear_range = 50
      angle_range = 30
      # rand_shear = (np.random.uniform(-30,30),np.random.uniform(-30,30))
      # rand_angle = np.random.uniform(-70,70)

    rand_shear = (np.random.uniform(-shear_range,shear_range),np.random.uniform(-shear_range,shear_range))
    rand_angle = np.random.uniform(-angle_range,angle_range)

    RandAffT = RandomAffineTransform(PILImage.size,shear=rand_shear,angle=rand_angle)
    # Apply affine to image
    image = RandAffT.apply_image(image.copy())
    # Append to transforms
    transforms = transforms + RandAffT

    ##### END Image Transformations #####

    # Keep these in for now I suppose
    if(image.shape[0] == 0): 
      raise ValueError("image shape[0] is 0!: ",print(image.shape),dataset_dict["file_name"])
    if(image.shape[1] == 0): 
      raise ValueError("image shape[1] is 0!: ",print(image.shape),dataset_dict["file_name"])

    # Set the image in the dictionary
    dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))


    # Do remainder of dictionary
    classID = ((dataset_dict["annotations"])[0])["category_id"]
    dataset_dict["classID"] = classID

    # bboxes
    # if(self.dataset_used != "comparison"):
    annos = \
    [
      utils.transform_instance_annotations(obj, transforms, image.shape[:2])
      for obj in dataset_dict.pop("annotations")
      if obj.get("iscrowd", 0) == 0
    ]

    # transformNames = [transforms.__name__ for x in transforms]
    # transformNames = ", ".join(transformNames)

    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = utils.filter_empty_instances(instances)
    # # no bboxes
    # else:
    #   instances = Instances(  (dataset_dict["height"],dataset_dict["width"])  )
    #   instances.gt_classes = torch.tensor([dataset_dict["classID"]])
    #   dataset_dict["instances"] = instances

    dataset_dict["transforms"] = transforms

    return dataset_dict
Exemplo n.º 6
0
  def test_mapper(self,dataset_dict):#,dataset_used):
    # If we're mapping at test time
    if(self.is_test_time_mapping):
      return self.train_mapper(dataset_dict)



    # Implement a mapper, similar to the default DatasetMapper, but with your own customizations
    # Create a copy of the dataset dict
    dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below


    ##### Image Transformations #####
    # Read in the image
    image = utils.read_image(dataset_dict["file_name"], format="BGR")
    # fileName = dataset_dict["file_name"]

    
    ## Crop to bounding box ##
    if(self.dataset_used != "comparison" and self.is_crop_to_bbox):
      # Get the bounding box
      bbox = ((dataset_dict["annotations"])[0])["bbox"]
      xmin,ymin,xmax,ymax = bbox
      w = xmax-xmin
      h = ymax-ymin

      # IsCropToBBox = True
      # if(IsCropToBBox):
      # Nudge the crop to be slightly outside of the bounding box
      nudgedXMin = xmin-15
      nudgedYMin = ymin-15
      nudgedW = w+50
      nudgedH = h+50

      # If the bounding boxes go outside of the image dimensions, fix this
      imageHeight = image.shape[0]
      imageWidth  = image.shape[1]
      if(nudgedXMin < 0): nudgedXMin = 0
      if(nudgedYMin < 0): nudgedYMin = 0
      if(nudgedXMin+nudgedW >= imageWidth):  nudgedW = imageWidth-1
      if(nudgedYMin+nudgedH >= imageHeight): nudgedH = imageHeight-1

      # Apply the crop
      cropT = T.CropTransform(nudgedXMin,nudgedYMin,nudgedW,nudgedH)
      image = cropT.apply_image(image)
      
      transforms = T.TransformList([cropT])
    # else:
      # nudgedH = dataset_dict["height"]
      # nudgedW = dataset_dict["width"]
    else:
      transforms = T.TransformList([])

      
    # Apply the crop to the bbox as well
    # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

    dataset_dict["height"] = image.shape[0]
    dataset_dict["width"]  = image.shape[1]
    
    # Add to the list of transforms
    
    # else:
    #   nudgedH = dataset_dict["height"]
    #   nudgedW = dataset_dict["width"]
      

    ## Scale the image size ##
    # thresholdDimension = 1000
    # if(dataset_used == "large"):
      # thresholdDimension = 500
    # thresholdDimension = 800
    # thresholdDimension = 600
    thresholdDimension = self.threshold_dimension

    currWidth  = dataset_dict["width"]
    currHeight = dataset_dict["height"]

    # the way ive done vgg and yolo means they need the same size images
    if(self.modelLink in ["VGG19_BN","YOLOV3"]):
      vgg_im_size = thresholdDimension
      # Apply the scaling transform
      scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=vgg_im_size,new_w=vgg_im_size,interp="nearest") 
      image = scaleT.apply_image(image.copy())

      # Apply the scaling to the bbox
      # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

      # Add this to the list of transforms
      transforms = transforms + scaleT

      # Set the dimensions
      dataset_dict["height"] = image.shape[0]
      dataset_dict["width"]  = image.shape[1]
    # not vgg or yolo
    else:# Downscale only at this threshold
      # Downscale only at this threshold
      if(currHeight > thresholdDimension or currWidth > thresholdDimension):
        myNewH = 0
        myNewW = 0
        # Scale the longest dimension to 1333, the shorter to 800
        if(currHeight > currWidth): 
          myNewH = thresholdDimension
          ratio = currHeight/float(myNewH)
          myNewW = currWidth/float(ratio)
          myNewW = int(round(myNewW))
          # myNewW = 800
        else:
          # myNewH = 800
          myNewW = thresholdDimension
          ratio = currWidth/float(myNewW)
          myNewH = currHeight/float(ratio)
          myNewH = int(round(myNewH))

        # Apply the scaling transform
        # scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") 
        # scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") 
        if(self.fixed_wh):
          scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewH,new_w=myNewW,interp="nearest") 
        else:
          scaleT = T.ScaleTransform(h=currHeight,w=currWidth,new_h=myNewW,new_w=myNewH,interp="nearest") 
        image = scaleT.apply_image(image.copy())

        # Apply the scaling to the bbox
        # THIS IS HANDLED IN annotations_to_instances, so long as this is appended to the list of transforms

        # Add this to the list of transforms
        transforms = transforms + scaleT

        # Set the dimensions
        dataset_dict["height"] = image.shape[0]
        dataset_dict["width"]  = image.shape[1]
    
    ## Apply a random flip ##
    # image, tfms = T.apply_transform_gens([T.RandomFlip()], image)
    # transforms = transforms + tfms

    # Apply Other Transforms ##
    # image, tfms = T.apply_transform_gens([T.RandomBrightness(0.4,1.6),T.RandomContrast(0.4,1.6),T.RandomSaturation(0.5,1.5),T.RandomLighting(1.2)], image)
    # transforms = transforms + tfms

    ## Apply random affine (actually just a shear) ##
    # Pass in the image size
    # PILImage = Image.fromarray(image)
    # RandAffT = RandomAffineTransform(PILImage.size)
    # Apply affine to image
    # image = RandAffT.apply_image(image.copy())
    # Append to transforms
    # transforms = transforms + RandAffT

    ##### END Image Transformations #####

    # Keep these in for now I suppose
    if(image.shape[0] == 0): 
      raise ValueError("image shape[0] is 0!: ",print(image.shape),dataset_dict["file_name"])
    if(image.shape[1] == 0): 
      raise ValueError("image shape[1] is 0!: ",print(image.shape),dataset_dict["file_name"])

    # Set the image in the dictionary
    dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))


    # Do remainder of dictionary
    classID = ((dataset_dict["annotations"])[0])["category_id"]
    dataset_dict["classID"] = classID

    annos = \
    [
      utils.transform_instance_annotations(obj, transforms, image.shape[:2])
      for obj in dataset_dict.pop("annotations")
      if obj.get("iscrowd", 0) == 0
    ]

    # transformNames = [transforms.__name__ for x in transforms]
    # transformNames = ", ".join(transformNames)

    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = utils.filter_empty_instances(instances)

    dataset_dict["transforms"] = transforms

    return dataset_dict

  # # Small mappers
  # def small_train_mapper(self,dataset_dict):
  #   return self.train_mapper(dataset_dict,"small")

  # def small_test_mapper(self,dataset_dict):
  #   if(self.is_test_time_mapping):
  #     return self.test_mapper(dataset_dict,"small")
  #   else:
  #     return self.train_mapper(dataset_dict,"small")

  # # Large mappers
  # def large_train_mapper(self,dataset_dict):
  #   return self.train_mapper(dataset_dict,"large")

  # def large_test_mapper(self,dataset_dict):
  #   if(self.is_test_time_mapping):
  #     return self.test_mapper(dataset_dict,"large")
  #   else:
  #     return self.train_mapper(dataset_dict,"large")

  # # Full mappers
  # def full_train_mapper(self,dataset_dict):
  #   return self.train_mapper(dataset_dict,"full")

  # def full_test_mapper(self,dataset_dict):
  #   if(self.is_test_time_mapping):
  #     return self.test_mapper(dataset_dict,"full")
  #   else:
  #     return self.train_mapper(dataset_dict,"full")

  # # Comparison mappers
  # def comparison_train_mapper(self,dataset_dict):
  #   return self.train_mapper(dataset_dict,"comparison")

  # def comparison_test_mapper(self,dataset_dict):
  #   if(self.is_test_time_mapping):
  #     return self.test_mapper(dataset_dict,"comparison")
  #   else:
  #     return self.train_mapper(dataset_dict,"comparison")