def _preprocess_img(img_bytes): """Load and transform image from its raw bytes to a keras-friendly np array. If the image cannot be read, it prints an error message and returns an array of all zeros. Arguments img_bytes: a Bytes object containing the bytes for a single image. Returns img: numpy array with shape (224, 224, 3). """ # Read from bytes to numpy array. try: img = imread(BytesIO(img_bytes)) assert isinstance(img, np.ndarray) except (ValueError, AssertionError) as ex: print("Error reading image, returning zeros:", ex, file=stderr) return np.zeros((224, 224, 3), dtype=np.uint8) # Extremely fast resize using lycon library. img = resize(img, 224, 224, interpolation=0) # Regular image: return. if img.shape[-1] == 3: return img # Grayscale image: repeat up to 3 channels. elif len(img.shape) == 2: return np.repeat(img[:, :, np.newaxis], 3, -1) # Other image: repeat first channel 3 times. return np.repeat(img[:, :, :1], 3, -1)
def test_resize(self): src_img = random_rgb_image() images = [ src_img, src_img.astype(np.float32), src_img.astype(np.float64), src_img.astype(np.int16) ] new_shapes = [ # No change src_img.shape[:2], # Upsample tuple(map(int, np.array(src_img.shape[:2]) * 3)), # Downsample tuple(map(int, np.array(src_img.shape[:2]) / 2)) ] for img in images: for (h, w) in new_shapes: for interp in lycon.Interpolation: cv_resized = cv2.resize(img, (w, h), interpolation=interp) lycon_resized = lycon.resize(img, width=w, height=h, interpolation=interp) np.testing.assert_array_equal( cv_resized, lycon_resized, err_msg='Mismatch for dtype={}, interp={}, size=({}, {})' .format(img.dtype, interp, w, h))
def preprocess_frame(frame, FRAME_SIZE): downsampled = lycon.resize(frame, width=FRAME_SIZE[0], height=FRAME_SIZE[1], interpolation=lycon.Interpolation.NEAREST) grayscale = downsampled.mean(axis=-1).astype(np.uint8) return grayscale
def load(self, fn): img = lycon.load(fn) img = lycon.resize(img, args.image_size, args.image_size) if img is None: print('failed to load', fn) return None return (img.astype(np.float32) / 127.5) - 1
def preprocess(self, observation): downsampled = lycon.resize(observation, width=self.FRAME_SIZE[0], height=self.FRAME_SIZE[1], interpolation=lycon.Interpolation.NEAREST) # grayscale = downsampled.mean(axis=-1) return downsampled.reshape((self.INPUT_SHAPE))
def pre_processing(self, img_matrix): image = lycon.resize(img_matrix, self.image_size, self.image_size) image = image.transpose((2, 0, 1)) image = torch.from_numpy(image).float().div(255) # image = image.clamp(0, 1) for t, m, s in zip(image, self.mean, self.std): t.sub_(m).div_(s) return image
def resize_and_write(im, side, out_filepath): if im.shape[0] != side or im.shape[1] != side: # Resize to (side, side) im = lycon.resize(im, width=side, height=side, interpolation=lycon.Interpolation.CUBIC) imageio.imwrite(out_filepath, im) print('Wrote `%s`.' % out_filepath)
def convert_string_to_np_array(filename, img_height, img_width): train_data, labels = [], [] with open(filename, 'r') as f: for line in f: image = lycon.resize(lycon.load(line.split(" ")[0]), width=img_width, height=img_height, interpolation=lycon.Interpolation.CUBIC) train_data.append(image) labels.append(line.split(" ")[1]) return np.array(train_data), np.array(labels)
def preprocess_image(image): IMAGE_WIDTH = 200 IMAGE_HEIGHT = 66 image = image[50:140, :, :] image = lycon.resize(image, width=IMAGE_WIDTH, height=IMAGE_HEIGHT, interpolation=lycon.Interpolation.CUBIC) return image
def benchmark_resize(img): h, w = img.shape[:2] new_sizes = [(2 * w, 2 * h), (int(w / 2), int(h / 2))] interpolations = { 'nearest': { 'Lycon': lycon.Interpolation.NEAREST, 'OpenCV': cv2.INTER_NEAREST, 'PIL': PIL.Image.NEAREST, 'SKImage': 0 }, 'bilinear': { 'Lycon': lycon.Interpolation.LINEAR, 'OpenCV': cv2.INTER_LINEAR, 'PIL': PIL.Image.BILINEAR, 'SKImage': 1 }, 'bicubic': { 'Lycon': lycon.Interpolation.CUBIC, 'OpenCV': cv2.INTER_CUBIC, 'PIL': PIL.Image.BICUBIC, 'SKImage': 3 }, 'lanczos': { 'Lycon': lycon.Interpolation.LANCZOS, 'OpenCV': cv2.INTER_LANCZOS4, 'PIL': PIL.Image.LANCZOS, }, 'area': { 'Lycon': lycon.Interpolation.AREA, 'OpenCV': cv2.INTER_AREA, } } for w, h in new_sizes: for interp in interpolations: msg = lambda tag: '[RESIZE ({} - {} x {})] {}'.format( interp, w, h, tag) modes = interpolations[interp] op = lambda tag, func: (msg(tag), lambda: func(modes[tag]) ) if tag in modes else None benchmark(*filter(None, [ op( 'Lycon', lambda i: lycon.resize( img, width=w, height=h, interpolation=i)), op('OpenCV', lambda i: cv2.resize(img, (w, h), interpolation=i)), op( 'PIL', lambda i: np.asarray( PIL.Image.fromarray(img).resize((w, h), i))), op('SKImage', lambda i: skimage.transform.resize(img, (h, w), order=i)) ]))
def convert_string_to_np_array(self, attr): types = ['train', 'test'] if attr not in types: raise ValueError("Invalid input. Expected one of: %s" % types) filename = self.train_txt if attr is 'train' else self.test_txt train_data, labels = [], [] height, width, channels = self.get_input_shape() with open(filename, 'r') as f: for line in f: image = lycon.resize(lycon.load(line.split(" ")[0]), width=width, height=height, interpolation=lycon.Interpolation.CUBIC) train_data.append(image) labels.append(line.split(" ")[1]) return np.array(train_data), np.array(labels)
def get_data_gen( base_image_dir: pathlib.Path, data_path: pathlib.Path, size: int, seen_set: Set[int] ) -> Iterable[Tuple[str, int, np.ndarray, Tuple[int, int], List[float], float, int]]: for logo_annotation in iter_jsonl(data_path): logo_id = logo_annotation["id"] if logo_id in seen_set: continue image_prediction = logo_annotation["image_prediction"] image = image_prediction["image"] barcode = image["barcode"] image_id = int(image["image_id"]) file_path = base_image_dir / generate_image_path( barcode, str(image_id)) if not file_path.is_file(): continue base_img = lycon.load(str(file_path)) assert base_img is not None if base_img.shape[-1] != 3: base_img = np.array(Image.fromarray(base_img).convert("RGB")) assert base_img.shape[-1] == 3 bounding_box = logo_annotation["bounding_box"] score = logo_annotation["score"] cropped_img = crop_image(base_img, bounding_box) original_height = int(cropped_img.shape[0]) original_width = int(cropped_img.shape[1]) original_resolution = (original_width, original_height) cropped_resized_img = lycon.resize( cropped_img, width=size, height=size, interpolation=lycon.Interpolation.CUBIC, ) yield ( barcode, image_id, cropped_resized_img, original_resolution, bounding_box, score, logo_id, )
def __getitem__(self, index): img_path = self.files[index % len(self.files)] # Extract image img = np.array(Image.open(img_path)) input_img, _ = pad_to_square(img, 127.5) # Resize input_img = lycon.resize(input_img, height=self.img_size, width=self.img_size, interpolation=lycon.Interpolation.NEAREST) # Channels-first input_img = np.transpose(input_img, (2, 0, 1)) # As pytorch tensor input_img = torch.from_numpy(input_img).float() / 255.0 return img_path, input_img
def convert_string_to_np_array(filename, img_height, img_width): train_data, labels = [], [] with open(filename, 'r') as n: for w, e in enumerate(n): pass data = np.empty((w + 1, height, width, channel), dtype=np.uint8) with open(filename, 'r') as f: for i, line in enumerate(f): image = lycon.resize(lycon.load(line.split(" ")[0]), width=img_width, height=img_height, interpolation=lycon.Interpolation.CUBIC) # print(image.shape) # zzz = np.rot90(image, 2) # print(zzz.shape) # exit() # data[i, ...] = lycon.resize(lycon.load(line.split(" ")[0]), width=img_width, height=img_height, # interpolation=lycon.Interpolation.CUBIC) data[i, ...] = image labels.append(line.split(" ")[1]) print(i) return data, np.array(labels)
def training_generator(training_data, batch_size): inputs_batch = [] targets_batch = [] while True: for idx, (k, v) in enumerate(training_data.items()): decoded_image = lycon.load(recordings_dir + k) decoded_image = decoded_image[50:140, :, :] decoded_image = lycon.resize( decoded_image, width=200, height=66, interpolation=lycon.Interpolation.CUBIC) decoded_image = 2 * image / 255 - 1 label = v inputs_batch.append(decoded_image) targets_batch.append(label) if (idx + 1) % batch_size == 0: yield (np.asarray(inputs_batch), np.asarray(targets_batch)) inputs_batch.clear() targets_batch.clear()
def transform(self, o): return lycon.resize(o, width=self.width, height=self.height, interpolation=self.interpolation)
WINDOW_W = 200 if args.tiny else 600 WINDOW_H = 150 if args.tiny else 450 loadPrcFileData('', 'window-type offscreen') loadPrcFileData('', 'sync-video 0') loadPrcFileData('', 'load-file-type p3assimp') loadPrcFileData('', 'win-size {w} {h}'.format(w=WINDOW_W, h=WINDOW_H)) styler = ImageStyler(args.vgg_ckpt_path, args.decoder_ckpt_path) styles = [ imageio.imread('pytorch-AdaIN/input/style/en_campo_gris.jpg'), imageio.imread('pytorch-AdaIN/input/style/la_muse.jpg'), ] # assumption: order is (1) less beautiful, (2) more beautiful for i in range(len(styles)): styles[i] = lycon.resize(styles[i], width=WINDOW_W, height=WINDOW_H, interpolation=lycon.Interpolation.CUBIC) app = BeautyApp() window_name = 'IN PURSUIT OF BEAUTY' output_window = OutputWindow(window_name) frames = 99999 pos_step = 0.2 yaw_step = 0.5 if not args.no_style: pos_step *= 2 # stylization is slow yaw_step *= 2 # so: speed things up if args.tiny: pos_step *= 2 yaw_step *= 2
def __getitem__(self, index): # --------- # Image # --------- img_path = self.img_files[index % len(self.img_files)].rstrip() img = lycon.load(img_path) # Handles images with less than three channels if len(img.shape) != 3: img = np.expand_dims(img, -1) img = np.repeat(img, 3, -1) h, w, _ = img.shape img, pad = pad_to_square(img, 127.5) padded_h, padded_w, _ = img.shape # Resize to target shape img = lycon.resize(img, height=self.img_size, width=self.img_size) # Channels-first and normalize input_img = torch.from_numpy(img).float().permute((2, 0, 1)) / 255.0 # --------- # Label # --------- label_path = self.label_files[index % len(self.img_files)].rstrip() labels = None if os.path.exists(label_path): labels = np.loadtxt(label_path).reshape(-1, 5) # Extract coordinates for unpadded + unscaled image x1 = w * (labels[:, 1] - labels[:, 3] / 2) y1 = h * (labels[:, 2] - labels[:, 4] / 2) x2 = w * (labels[:, 1] + labels[:, 3] / 2) y2 = h * (labels[:, 2] + labels[:, 4] / 2) # Adjust for added padding x1 += pad[1][0] y1 += pad[0][0] x2 += pad[1][1] y2 += pad[0][1] if self.is_training: # Returns (x, y, w, h) labels[:, 1] = ((x1 + x2) / 2) / padded_w labels[:, 2] = ((y1 + y2) / 2) / padded_h labels[:, 3] *= w / padded_w labels[:, 4] *= h / padded_h else: # Returns (x1, y1, x2, y2) labels[:, 1] = x1 * (self.img_size / padded_w) labels[:, 2] = y1 * (self.img_size / padded_h) labels[:, 3] = x2 * (self.img_size / padded_w) labels[:, 4] = y2 * (self.img_size / padded_h) # Fill matrix filled_labels = np.zeros((self.max_objects, 5)) if labels is not None: labels = labels[:self.max_objects] filled_labels[:len(labels)] = labels filled_labels = torch.from_numpy(filled_labels) return img_path, input_img, filled_labels
def imresize(img, width, height): return lycon.resize(img, width=width, height=height, interpolation=lycon.Interpolation.LINEAR)
styler = ImageStyler(args.vgg_ckpt_path, args.decoder_ckpt_path) content = imageio.imread('pytorch-AdaIN/input/content/cornell.jpg') style = imageio.imread( 'pytorch-AdaIN/input/style/woman_with_hat_matisse.jpg') start_time = time.time() output = styler.transfer(content, style) print('time elapsed: %fs' % (time.time() - start_time)) imageio.imwrite('output.png', np.transpose(output.squeeze(), (1, 2, 0))) print('wrote `output.png`') content_downscaled = content[::4, ::4] style_downscaled = style[::4, :: 4] # aliasing, but w/e we're on a time budget here imageio.imwrite('input_downscaled.png', content_downscaled) start_time = time.time() output = styler.transfer(content_downscaled, style_downscaled) print('time elapsed: %fs' % (time.time() - start_time)) output = np.transpose(output.squeeze(), (1, 2, 0)).numpy() imageio.imwrite('output_downscaled.png', output) print('wrote `output_downscaled.png`') start_time = time.time() output = lycon.resize(output, width=content.shape[1], height=content.shape[0], interpolation=lycon.Interpolation.CUBIC) print('time elapsed: %fs' % (time.time() - start_time)) imageio.imwrite('output_downscaled_upscaled.png', output) print('wrote `output_downscaled_upscaled.png`')
def __getitem__(self, idx): filename = self.right_result_ts.loc[idx, 'filename'] pid = self.right_result_ts.loc[idx, 'pid'] vid = self.right_result_ts.loc[idx, 'id'] # filename=self.filenames_ts[idx] # pid=self.pids_ts[idx] # vid=self.vids_ts[idx] # day_time=self.right_result_ts.loc[idx,'daytime'] x1, y1, x2, y2 = self.right_result_ts.loc[ idx, 'x1'], self.right_result_ts.loc[ idx, 'y1'], self.right_result_ts.loc[ idx, 'x2'], self.right_result_ts.loc[idx, 'y2'] # x1,y1,x2,y2=self.x1_ts[idx],self.y1_ts[idx],self.x2_ts[idx],self.y2_ts[idx] x1, y1, x2, y2 = max(0, x1), max(0, y1), min(1, x2), min(1, y2) # w,h=self.right_result_ts.loc[idx,'width'],self.right_result_ts.loc[idx,'height'] # w,h=self.width_ts[idx],self.height_ts[idx] bb_x1, bb_y1, bb_x2, bb_y2 = int(floor(x1 * self.size)), int( floor(y1 * self.size)), int(ceil(x2 * self.size)), int( ceil(y2 * self.size)) bb_x1, bb_y1, bb_x2, bb_y2 = max( bb_x1, 0), max(bb_y1, 0), min(bb_x2, self.size - 1), min( bb_y2, self.size - 1 ) # since we are limiting our input image to self.sizexself.size bb_w, bb_h = (bb_x2 - bb_x1) + 1, (bb_y2 - bb_y1) + 1 condition = not (bb_w > 256 or bb_h > 256 ) # check if the car mask itself is more than 256 input = lycon.load(os.path.join(self.complete_images_path, filename)) # input_resized=resize(input,(self.size,self.size)) input_resized = lycon.resize(input, width=self.size, height=self.size, interpolation=lycon.Interpolation.LINEAR) chip = np.zeros((self.size, self.size, 3), dtype=np.float32) chip[bb_y1:(bb_y2 + 1), bb_x1:(1 + bb_x2), :] = input_resized[bb_y1:(bb_y2 + 1), bb_x1:(1 + bb_x2), :] if (self.DA and self.training): if (condition): p, q = random.randint(max(0, bb_x2 - 256), min(bb_x1, 255)), random.randint( max(0, bb_y2 - 256), min(bb_y1, 255)) # print((p,q),(bb_x1,bb_y1),(bb_x2,bb_y2)) chip_da = np.zeros((256, 256, 3), dtype=np.float32) chip_da = chip[q:q + 256, p:p + 256, :] chip = chip_da else: chip = lycon.resize(chip, width=256, height=256) horizontal_flip_parameter = random.randint(0, 1) if (horizontal_flip_parameter and self.training): chip = np.flip(chip, axis=1).copy() # lycon.save('chip_'+str(idx)+'.jpg',chip) chip = torch.from_numpy(chip) chip = chip.type(torch.FloatTensor) chip_only = chip.permute(2, 0, 1) box = np.ones((bb_h, bb_w, 1), dtype=np.float32) mask = np.zeros((self.size, self.size, 1), dtype=np.float32) mask[bb_y1:(bb_y2 + 1), bb_x1:(1 + bb_x2), :] = box inv_mask = np.ones((self.size, self.size, 1), dtype=np.float32) - mask input_new = input_resized * inv_mask if (self.DA and self.training): if (condition): input_da = np.zeros((256, 256, 3), dtype=np.float32) input_da = input_new[q:q + 256, p:p + 256, :] input_new = input_da mask_da = np.zeros((256, 256, 1), dtype=np.float32) mask_da = mask[q:q + 256, p:p + 256, :] mask = mask_da else: input_new = lycon.resize(input_new, width=256, height=256) mask = lycon.resize(mask, width=256, height=256) mask = np.expand_dims( mask, axis=2) # need to add an extra dimension after resizing if (horizontal_flip_parameter and self.training): input_new = np.flip(input_new, axis=1).copy() # lycon.save('input_'+str(idx)+'.jpg',input_new) input = torch.from_numpy(input_new) input = input.type(torch.FloatTensor) input = torch.cat( (torch.from_numpy(mask).type(torch.FloatTensor), input), dim=2) input = input.permute(2, 0, 1) gt = input_resized if (self.DA and self.training): if (condition): gt_da = np.zeros((256, 256, 3), dtype=np.float32) gt_da = gt[q:q + 256, p:p + 256, :] gt = gt_da else: gt = lycon.resize(gt, width=256, height=256) if (horizontal_flip_parameter and self.training): gt = np.flip(gt, axis=1).copy() # lycon.save('gt_'+str(idx)+'.png',gt) gt = torch.from_numpy(gt) gt = gt.type(torch.FloatTensor) gt = gt.permute(2, 0, 1) input /= 255.0 chip_only /= 255.0 gt /= 255.0 if (self.training): return input, chip_only, gt else: return input, chip_only, gt, pid, vid, os.path.join( self.complete_images_path, filename), filename # cnt=0 # train_dataset=NYC3dcars(root_dir='/vulcan/scratch/koutilya/NYC3dcars',training=1,DA=True) # print(train_dataset.__len__()) # # val_dataset=NYC3dcars() # print(val_dataset.__len__()) # train_dataloader=DataLoader(train_dataset,batch_size=32,shuffle=False) # # val_dataloader=DataLoader(val_dataset,batch_size=10,shuffle=False) # start=time.time() # for i,data in enumerate(train_dataloader): # if(i==1): # break # input,new_chip,gt=data #,pid,vid,path,filename # print('Time to get entire batch: ' + str(time.time()-start)) # start=time.time() # cnt+=torch.sum(count) # print('Iteration: '+str(i)+' cnt: '+ str(cnt)) # print(input.shape,new_chip.shape,gt.shape) # print(h) # print(len(set(list(path)))) # # # print(filename[0]) # # # k=input[0,1:,:,:].permute(1,2,0).cpu().numpy() # # # print(k.shape) # # # imsave('test_input.png',k) # print(i) # train_dataset[61]
def __getitem__(self, index): # --------- # Image # --------- img_path = self.img_files[index % len(self.img_files)].rstrip() img = lycon.load(img_path) # Handles images with less than three channels if len(img.shape) != 3: img = np.expand_dims(img, -1) img = np.repeat(img, 3, -1) h, w, _ = img.shape img, pad = pad_to_square(img, 127.5) padded_h, padded_w, _ = img.shape # Resize to target shape img = lycon.resize(img, height=self.img_size, width=self.img_size) # Channels-first and normalize img = torch.from_numpy(img).float().permute((2, 0, 1)) / 255.0 # --------- # Label # --------- label_path = self.label_files[index % len(self.img_files)].rstrip() labels = None if os.path.exists(label_path): labels = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5)) # Extract coordinates for unpadded + unscaled image x1 = w * (labels[:, 1] - labels[:, 3] / 2) y1 = h * (labels[:, 2] - labels[:, 4] / 2) x2 = w * (labels[:, 1] + labels[:, 3] / 2) y2 = h * (labels[:, 2] + labels[:, 4] / 2) # Adjust for added padding x1 += pad[1][0] y1 += pad[0][0] x2 += pad[1][1] y2 += pad[0][1] if self.is_training: # Returns (x, y, w, h) labels[:, 1] = ((x1 + x2) / 2) / padded_w labels[:, 2] = ((y1 + y2) / 2) / padded_h labels[:, 3] *= w / padded_w labels[:, 4] *= h / padded_h else: # Returns (x1, y1, x2, y2) labels[:, 1] = x1 * (self.img_size / padded_w) labels[:, 2] = y1 * (self.img_size / padded_h) labels[:, 3] = x2 * (self.img_size / padded_w) labels[:, 4] = y2 * (self.img_size / padded_h) # Apply augmentations if self.augment: if np.random.random() < 0.5: img, labels = horisontal_flip(img, labels) # Add dummy label if there are none num_labels = 1 if labels is None else len(labels) boxes = torch.zeros((num_labels, 6)) if labels is not None: boxes[:, 1:] = labels return img_path, img, boxes
def __getitem__(self, index): # --------- # Image # --------- img_path = self.img_files[index % len(self.img_files)].rstrip() img = lycon.load(img_path) # Handles images with less than three channels if len(img.shape) != 3: img = np.expand_dims(img, -1) img = np.repeat(img, 3, -1) h, w, _ = img.shape img, pad = pad_to_square(img, 127.5) padded_h, padded_w, _ = img.shape # Resize to target shape img = lycon.resize(img, height=self.img_size, width=self.img_size) # Channels-first and normalize input_img = torch.from_numpy(img / 255).float().permute((2, 0, 1)) # --------- # Label # the labels of the data set contains 6 unnormalized numbers. # the object number # x,y : the center of the oriented BBox (x from the left limit, y from upper limit) # lenght (longest dimension) # width (shortest dimension) # orientation (- 0, / 45 , \ -45 , | -90 or 90) # return target_label, x,y (center), w, le, theta. (all normalized) # --------- label_path = self.label_files[index % len(self.img_files)].rstrip() labels = None if os.path.exists(label_path): labels = np.loadtxt(label_path, delimiter=' ', skiprows=1).reshape(-1, 6) if len(labels.shape) == 1: # only 1 object labels = labels.reshape(1, -1) # get the 4 vertices for the OBB of the unpadded + unscaled image. # TODO : the theory behind the equations p1_x = labels[:,1] + labels[:,3] * np.cos(np.radians(labels[:,5] )) / 2.0 + \ labels[:,4] * np.cos(np.radians(90 + labels[:,5])) / 2.0 p1_y = labels[:,2] - labels[:,3] * np.sin(np.radians(labels[:,5] )) / 2.0 - \ labels[:,4] * np.sin(np.radians(90 + labels[:,5])) / 2.0 p2_x = labels[:,1] - labels[:,3] * np.cos(np.radians(labels[:,5] )) / 2.0 + \ labels[:,4] * np.cos(np.radians(90 + labels[:,5])) / 2.0 p2_y = labels[:,2] + labels[:,3] * np.sin(np.radians(labels[:,5] )) / 2.0 - \ labels[:,4] * np.sin(np.radians(90 + labels[:,5])) / 2.0 p3_x = labels[:,1] - labels[:,3] * np.cos(np.radians(labels[:,5] )) / 2.0 - \ labels[:,4] * np.cos(np.radians(90 + labels[:,5])) / 2.0 p3_y = labels[:,2] + labels[:,3] * np.sin(np.radians(labels[:,5] )) / 2.0 + \ labels[:,4] * np.sin(np.radians(90 + labels[:,5])) / 2.0 p4_x = labels[:,1] + labels[:,3] * np.cos(np.radians(labels[:,5] )) / 2.0 - \ labels[:,4] * np.cos(np.radians(90 + labels[:,5])) / 2.0 p4_y = labels[:,2] - labels[:,3] * np.sin(np.radians(labels[:,5] )) / 2.0 + \ labels[:,4] * np.sin(np.radians(90 + labels[:,5])) / 2.0 # Adjust for added padding p1_x += pad[1][0] p2_x += pad[1][0] p3_x += pad[1][0] p4_x += pad[1][0] p1_y += pad[0][0] p2_y += pad[0][0] p3_y += pad[0][0] p4_y += pad[0][0] # Returns (x, y, w, h) # get the center of the scaled image and normalize it [0, 1] labels[:, 1] = ((p1_x + p2_x + p3_x + p4_x) / 4) / padded_w labels[:, 2] = ((p1_y + p2_y + p3_y + p4_y) / 4) / padded_h # normalize the width and lenght diagonal_lenght = np.sqrt(padded_h**2 + padded_w**2) # get the width and lenght after padding and scalling # normalize width and lenght with the diagonal[0, 1] dim1 = np.sqrt((p2_x - p1_x)**2 + (p2_y - p1_y)**2) dim2 = np.sqrt((p3_x - p2_x)**2 + (p3_y - p2_y)**2) labels[:, 3] = np.min([dim1, dim2], axis=0) / diagonal_lenght # width labels[:, 4] = np.max([dim1, dim2], axis=0) / diagonal_lenght # lenght # normalize theta [-1, 1] labels[:, 5] /= 90 # Fill matrix labels = torch.from_numpy(labels) filled_labels = torch.zeros( (self.max_objects, 6)) # label, x,y , w, le, theta if labels is not None: labels = labels[:self.max_objects] filled_labels[:len(labels)] = labels return img_path, input_img, filled_labels
def preprocess(self, observation): downsampled = lycon.resize(observation, width=self.OBSERVATION_SHAPE[0], height=self.OBSERVATION_SHAPE[1], interpolation=lycon.Interpolation.NEAREST) grayscale = downsampled.mean(axis=-1).astype(np.uint8) return grayscale
def preprocess(self, image): return lycon.resize(image, self.w, self.h) / 255
for t in range(10000): env.render() obs = obs.astype( dtype=np.float32 ) / 255.0 #np.swapaxes(obs.astype(dtype=np.float32) / 255.0, 0, 1) obs = obs[:, maxSize // 2 - minSize // 2:maxSize // 2 + minSize // 2, :] obsPrev = copy(obs) obs = (obs[:, :, 0] + obs[:, :, 1] + obs[:, :, 2]) / 3.0 obs = lycon.resize(obs, width=imageSize[0], height=imageSize[1], interpolation=lycon.Interpolation.CUBIC) h.step(cs, (obs * 6.0).ravel().tolist(), reward) maxValue = -99999.0 action = 0 for i in range(numActions): value = h.getOutputStates()[aStart + i] if value > maxValue: maxValue = value action = i obs, reward, done, info = env.step(action)
os.makedirs(output_dir) print('Created output directory: %s.' % output_dir) total_ims_written = 0 for filename in os.listdir(images_dir): if filename.endswith('.csv'): continue filepath = os.path.join(images_dir, filename) im = imageio.imread(filepath) h, w = im.shape[:2] if init_rescale != 1.0: h = int(h * init_rescale) w = int(w * init_rescale) im = lycon.resize(im, width=w, height=h, interpolation=lycon.Interpolation.CUBIC) y0_start = step if no_boundary else 0 x0_start = step if no_boundary else 0 y0_end = h - side - step if no_boundary else h - side x0_end = w - side - step if no_boundary else w - side ims_written = 0 for y0 in range(y0_start, y0_end, step): for x0 in range(x0_start, x0_end, step): sub_im = im[y0:y0 + side, x0:x0 + side] out_filepath = os.path.join( output_dir, str(ims_written).zfill(5) + '_' + filename) resize_and_write(sub_im, side, out_filepath)
def __getitem__(self, index): 'Generate one batch of data' # pick indexes of images for this batch indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size] # select and load images from this batch batch_paths = [self.img_paths[i] for i in indexes] batch_orig_images = [lycon.load(p) / 255 for p in batch_paths] # resize images to the input dimension of the network batch_prepared_images = [] input_size = self.input_dims[0] for img in batch_orig_images: h, w = img.shape[0:2] # compute largest dimension (hor or ver) dim_max = max(h, w) # if size is not preserved or image is larger than input_size, resize image to input_size if not (self.preserve_size) or (dim_max > input_size): # resize image so that largest dim is now equal to input_size img = lycon.resize(img, height=max(h * input_size // dim_max, 1), width=max(w * input_size // dim_max, 1), interpolation=lycon.Interpolation.AREA) h, w = img.shape[0:2] # TODO review the following for speed, possibly # create a square, empty output, of desired dimension, filled with padding value pad_value = self.padding_value(img) img_square = np.full(self.input_dims, pad_value) # compute number of pixels to leave blank offset_ver = int( (input_size - h) / 2) # on top and bottom of image offset_hor = int( (input_size - w) / 2) # on left and right of image # replace pixels by input image img_square[offset_ver:offset_ver + h, offset_hor:offset_hor + w] = img batch_prepared_images.append(img_square) # convert to array of images batch_prepared_images = np.array( [img for img in batch_prepared_images], dtype='float32') # augment images if self.augment == True: batch_prepared_images = self.augmenter(batch_prepared_images) # extract the labels corresponding to the selected indexes batch_labels = [self.labels[i] for i in indexes] batch_encoded_labels = self.class_encoder.fit_transform( [[l] for l in batch_labels]) # print('batch_prepared_images : ' + str(batch_prepared_images.shape)) # print('batch_encoded_labels : ' + str(batch_encoded_labels.shape)) # return reshaped images and labels return batch_prepared_images, batch_encoded_labels
def read_video_file_rand256_NL_SomethingV1(filename, num_frames=10, H=240, W=320, duration=-1, center=True, crop=True, crop_dim=224, smaller_side=256, sample_offset=False): # read video, cut and resize # if max_duration > 0 read random 'duration' seconds # if fisrt_stride: sample uniform with offset # read video from images # create the used clip with fixed size and number of frames from the whole video by sampling, and cropping # the video is resized, sampled, and cropped # the video is sampled uniformly # (if videos have different lengths sample at different fps) # clip = read_video_from_images(filename) # resize smaller size to a fixed (256) size height = clip.shape[1] width = clip.shape[2] ratio = width / height if width < height: new_width = smaller_side new_height = int(new_width / ratio) else: new_height = smaller_side new_width = int(ratio * new_height) sub_clip = clip new_clip = [] for frame in sub_clip: new_clip.append( lycon.resize(frame, width=new_width, height=new_height, interpolation=lycon.Interpolation.CUBIC)) # sample num_frame frames uniform from the entire duration or by cropping duration video_duration = len(new_clip) frames = [] time_frames = aa = np.linspace(0, video_duration - 1, num_frames, dtype='int') if sample_offset: start = int(np.round(0.5 * video_duration / num_frames)) # select the frames with minimum overlap with the first sampling # normal sampleing: | | | | | | | # offset sampling: | | | | | | | # repeat last freame if necesarry time_frames = bb = np.clip( np.linspace(start, video_duration - 1 + start, num_frames, dtype='int'), 0, video_duration - 1) frames = np.array([new_clip[i] for i in time_frames]) real_height = frames.shape[1] real_width = frames.shape[2] num_frames = frames.shape[0] bigger_frames = np.zeros([num_frames, new_height, new_width, 3], dtype=np.uint8) bigger_frames[:num_frames, :real_height, :real_width] = frames # crop only the center patch of every frame / or a random crom if center: center_h = bigger_frames.shape[1] // 2 - (crop_dim // 2) center_w = bigger_frames.shape[2] // 2 - (crop_dim // 2) else: center_h = int(np.random.rand() * (bigger_frames.shape[1] - crop_dim)) center_w = int(np.random.rand() * (bigger_frames.shape[2] - crop_dim)) if crop: final_video = bigger_frames[:, center_h:center_h + crop_dim, center_w:center_w + crop_dim] else: final_video = bigger_frames return final_video