def myResize(image,size): if np.max(image.shape) >= size: interp = "bilinear" else: interp = "bicubic" output = np.zeros((size,size)) #difference = int(size-size*0.75)/2 #size = int(size*0.75) curr_y, curr_x = image.shape if curr_y > curr_x: #the image is wide ratio = size*1./curr_y xsize = int(ratio*curr_x) offset = (size-xsize)/2 im2 = resize(image,(size,xsize),interp=interp) #print im2.shape output[:,offset:offset+xsize] = im2 else: #The image is tall ratio = size*1./curr_x ysize = int(ratio*curr_y) offset = (size-ysize)/2 im2 = resize(image,(ysize,size),interp=interp) #print im2.shape output[offset:offset+ysize,:] = im2 #print output.shape #print "output pixel sum", np.sum(output) return output
def myResize(image,size): #print "input pixel sum", np.sum(image) #plt.imshow(image) #plt.savefig("../evaluation"+CID+"whatever.jpg",format="jpg") output = np.zeros((size,size)) difference = int(size-size*0.75)/2 size = int(size*0.75) curr_y, curr_x = image.shape if curr_y > curr_x: #the image is wide ratio = size*1./curr_y xsize = int(ratio*curr_x) offset = (size-xsize)/2 im2 = resize(image,(size,xsize),interp="bicubic") #print im2.shape output[difference:difference+size , difference+offset:difference+offset+xsize] = im2 else: #The image is tall ratio = size*1./curr_x ysize = int(ratio*curr_y) offset = (size-ysize)/2 im2 = resize(image,(ysize,size),interp="bicubic") print im2.shape output[difference+offset:difference+offset+ysize,difference:difference+size] = im2 #print output.shape #print "output pixel sum", np.sum(output) return output
def encode(images, locations, length, n_patches): N, H, V = images.shape locations[:, 0] = locations[:, 0] * H + H / 2 locations[:, 1] = locations[:, 1] * V + V / 2 d = length / 2 images = np.pad(images, ((0, 0), (d, d), (d, d)), mode='edge') locations += d encoded = [] for i in range(N): h_center, v_center = locations[i] h_from = h_center - d h_to = h_center + d v_from = v_center - d v_to = v_center + d image = images[i] l = length patches = [] for p in range(n_patches): patch = image[h_from : h_to, v_from : v_to] resized = resize(patch, (length, length)) reshaped = resized.reshape((1, length, length)) patches.append(reshaped) l *= 2 concatenated = np.concatenate(patches) reshaped = concatenated.reshape((1, n_patches, length, length)) encoded.append(reshaped) return np.concatenate(encoded)
def _process_frame_green(frame): obs = frame[0:84, :, :].astype(np.float)/255.0 obs = resize(obs, (64, 64)) obs = ((1.0 - obs) * 255).round().astype(np.uint8) return obs[:, :, 1] # green channel
def _process_frame(frame): obs = np.array(frame[0:400, :, :]).astype(np.float) / 255.0 obs = np.array(resize(obs, (SCREEN_Y, SCREEN_X))) obs = ((1.0 - obs) * 255).round().astype(np.uint8) return obs
def __init__(self, path='data/omniglot.npy', batch_size=128, image_size=32): """ path: path to omniglot.npy file produced by "data/setup_omniglot.py" script batch_size: the output is (2 * batch size, 1, image_size, image_size) X[i] & X[i + batch_size] are the pair image_size: size of the image data_split: in number of alphabets, e.g. [30, 10] means out of 50 Omniglot characters, 30 is for training, 10 for validation and the remaining(10) for testing within_alphabet: for verfication task, when 2 characters are sampled to form a pair, this flag specifies if should they be from the same alphabet/language --------------------- Data Augmentation Parameters: flip: here flipping both the images in a pair scale: x would scale image by + or - x% rotation_deg shear_deg translation_px: in both x and y directions """ chars = np.load(path) # resize the images resized_chars = np.zeros((1623, 20, image_size, image_size), dtype='uint8') for i in xrange(1623): for j in xrange(20): resized_chars[i, j] = resize(chars[i, j], (image_size, image_size)) chars = resized_chars self.mean_pixel = chars.mean( ) / 255.0 # used later for mean subtraction # starting index of each alphabet in a list of chars a_start = [ 0, 20, 49, 75, 116, 156, 180, 226, 240, 266, 300, 333, 355, 381, 424, 448, 496, 518, 534, 586, 633, 673, 699, 739, 780, 813, 827, 869, 892, 909, 964, 984, 1010, 1036, 1062, 1088, 1114, 1159, 1204, 1245, 1271, 1318, 1358, 1388, 1433, 1479, 1507, 1530, 1555, 1597 ] # size of each alphabet (num of chars) a_size = [ 20, 29, 26, 41, 40, 24, 46, 14, 26, 34, 33, 22, 26, 43, 24, 48, 22, 16, 52, 47, 40, 26, 40, 41, 33, 14, 42, 23, 17, 55, 20, 26, 26, 26, 26, 26, 45, 45, 41, 26, 47, 40, 30, 45, 46, 28, 23, 25, 42, 26 ] # each alphabet/language has different number of characters. # in order to uniformly sample all characters, we need weigh the probability # of sampling a alphabet by its size. p is that probability def size2p(size): s = np.array(size).astype('float64') return s / s.sum() self.size2p = size2p self.data = chars self.a_start = a_start self.a_size = a_size self.image_size = image_size self.batch_size = batch_size flip = True scale = 0.2 rotation_deg = 20 shear_deg = 10 translation_px = 5 self.augmentor = ImageAugmenter(image_size, image_size, hflip=flip, vflip=flip, scale_to_percent=1.0 + scale, rotation_deg=rotation_deg, shear_deg=shear_deg, translation_x_px=translation_px, translation_y_px=translation_px)
def preprocess(self, obs): obs = obs.astype(np.float32) / 255.0 obs = np.array(resize(obs, (64, 64))) obs = ((1.0 - obs) * 255).round().astype(np.uint8) return obs
def __init__(self, path=os.path.join('font_imgs', 'font_imgs.npy'), batch_size=64, image_size=32): """ batch_size: the output is (2 * batch size, 1, image_size, image_size) X[i] & X[i + batch_size] are the pair image_size: size of the image data_split: in number of alphabets, e.g. [30, 10] means out of 50 Omniglot characters, 30 is for training, 10 for validation and the remaining(10) for testing within_alphabet: for verfication task, when 2 characters are sampled to form a pair, this flag specifies if should they be from the same alphabet/language --------------------- Data Augmentation Parameters: flip: here flipping both the images in a pair scale: x would scale image by + or - x% rotation_deg shear_deg translation_px: in both x and y directions """ num_chars_in_font = 62 # num of chars used num_fonts = 65 # num fonts num_chars_instances = 65 * 62 # 4030 num char instances, which is num_chars * num_fonts num_samples_per_char = 100 # samples per char chars = np.load(path) # resize the images resized_chars = np.zeros((num_chars_instances, num_samples_per_char, image_size, image_size), dtype='uint8') for i in range(num_chars_instances): for j in range(num_samples_per_char): resized_chars[i, j] = resize(chars[i, j], (image_size, image_size)) chars = resized_chars self.mean_pixel = chars.mean( ) / 255.0 # used later for mean subtraction # start of each alphbt in a list of chars a_start = [] for i in range(num_fonts): a_start.append(i * num_chars_in_font) a_size = [62] * num_fonts # each alphabet/language has different number of characters. # in order to uniformly sample all characters, we need weigh the probability # of sampling a alphabet by its size. p is that probability def size2p(size): s = np.array(size).astype('float64') return s / s.sum() self.size2p = size2p self.num_samples_per_char = num_samples_per_char self.num_chars_in_font = num_chars_in_font self.num_fonts = num_fonts self.data = chars self.a_start = a_start self.a_size = a_size self.image_size = image_size self.batch_size = batch_size
def __getitem__(self, index): item = [(self.hr[index], resize(self.lr[i][index], self.scale * 100, interp='cubic')) for i, _ in enumerate(self.lr)] # return [(self.transform(hr), self.transform(imresize(lr, 400, interp='cubic'))) for hr, lr in item] return [self.transform(hr, lr) for hr, lr in item]
def _process_frame(frame): obs = frame[0:84, :, :].astype(np.float)/255.0 obs = resize(obs, (SCREEN_X, SCREEN_Y)) obs = ((1.0 - obs) * 255).round().astype(np.uint8) return obs
def _process_frame(frame): obs = frame[0:84, :, :].astype(np.float) / 255.0 # 96 x 96 obs = resize(obs, (64, 64)) obs = ((1.0 - obs) * 255).round().astype(np.uint8) return obs
def preprocess_frame(self, obs): obs = np.array(obs[0:400, :, :]).astype(np.float) / 255.0 obs = np.array(resize(obs, (self.frame_shape[0], self.frame_shape[1]))) obs = ((1.0 - obs) * 255).round().astype(np.uint8) return obs
import os import sys from scipy.misc import imresize as resize import cv2 import time import numpy as np try: input_img_fname = sys.argv[1] output_fname = sys.argv[2] except: print ("Wrong input format. Try Pixels11.py img.jpg out.txt") sys.exit(1) image = cv2.imread(input_img_fname,0) h,w = image.shape n_patches = h/w patches_flat = np.ndarray((n_patches, 11*11), dtype=np.float32) t = time.time() for i in range(n_patches): patch = image[i*(w): (i+1)*(w), 0:w] patch = resize(patch, (11,11)) std1 = patch.std() if std1 < 0.000001: std1 = 1. patch = (patch - patch.mean()) / std1 norm1 = np.linalg.norm(patch.flatten(),2) if norm1 < 0.000000001: norm1 = 1.0 patch = patch / norm1 patches_flat[i,:] = patch.flatten() np.savetxt(output_fname, patches_flat, delimiter=' ', fmt='%10.7f')
with torch.no_grad(): # from pyglet.window import key action = np.array( [0.0, 0.0, 0.0] ) viewer=None env = CarRacing() # env.render() if viewer is None: viewer = rendering.SimpleImageViewer() z=torch.randn(64) z=z.cuda().view(1,-1).detach() obs=model.module.decoder(z) img=obs.squeeze().data.cpu().numpy().astype('float32').transpose([1,2,0]) img=np.array(img) img = resize(img, (int(np.round(SCREEN_Y*FACTOR)), int(np.round(SCREEN_X*FACTOR)))) viewer.imshow(img) # time.sleep(10) # exit() total_reward = 0.0 steps = 0 restart = False while True: obs=obs.view(1,3,64,64) mu_c, var_c = model.module.encoder(obs) mu, sigma = mu_c, var_c epsilon = torch.randn_like(sigma) z=mu+sigma*epsilon z=z.cuda().view(obs.shape[0],-1).detach() # z=torch.randn(64)
def resize_images(images, size=280): return np.array([resize(image, (size, size)) for image in images])
def load_image(path): image = imread(os.path.join(os.curdir, path)) image = resize(image, (32, 32)) return image.astype('float32') / 255.