def main(): # load images imgs = [] paths = [data_dir + './lenna.jpg', data_dir + './cat.jpg'] for i in range(len(paths)): img = img2array(paths[i], desired_size=[512, 512], expand=True) imgs.append(torch.from_numpy(img)) imgs = torch.cat(imgs) B, H, W, C = imgs.shape loc = torch.Tensor([[-1., 1.], [-1., 1.]]) imgs, loc = Variable(imgs), Variable(loc) sensor = glimpse_network(h_g=128, h_l=128, g=64, k=3, s=2, c=3) g_t = sensor(imgs, loc) rnn = core_network(input_size=256, hidden_size=256) h_t = Variable(torch.zeros(g_t.shape[0], 256)) h_t = rnn(g_t, h_t) classifier = action_network(256, 10) a_t = classifier(h_t) loc_net = location_network(256, 2, 0.11) mu, l_t = loc_net(h_t) base = baseline_network(256, 1) b_t = base(h_t) print("g_t: {}".format(g_t.shape)) print("h_t: {}".format(h_t.shape)) print("l_t: {}".format(l_t.shape)) print("a_t: {}".format(a_t.shape)) print("b_t: {}".format(b_t.shape))
def loadImages(path, batch_size, extension=".png"): """ Returns np array of all files with the specified extension. :param string: path to folder :param string: file type :return numpy array: Samples * height * width * channels """ filenames = glob.glob(path + "*" + extension) imgs = [] shuffle(filenames) for filename in filenames[:batch_size]: imgs.append(img2array(filename, DIMS, expand=True)) return np.concatenate(imgs, axis=0)
def main(): # load images imgs = [] paths = [data_dir + './lenna.jpg', data_dir + './cat.jpg'] for i in range(len(paths)): img = img2array(paths[i], desired_size=[512, 512], expand=True) imgs.append(torch.from_numpy(img)) imgs = Variable(torch.cat(imgs)) loc = torch.Tensor(2, 2).uniform_(-1, 1) loc = Variable(loc) if TEST_GLIMPSE: ret = retina(g=64, k=3, s=2) glimpse = ret.foveate(imgs, loc).data.numpy() print("Glimpse: {}".format(glimpse.shape)) rows, cols = glimpse.shape[0], glimpse.shape[1] fig, axs = plt.subplots(nrows=rows, ncols=cols, figsize=(5, 2)) for i in range(rows): for j in range(cols): axs[i, j].imshow(glimpse[i, j, :]) axs[i, j].get_xaxis().set_visible(False) axs[i, j].get_yaxis().set_visible(False) if SAVE: plt.savefig(plot_dir + 'glimpses.png', format='png', dpi=300, bbox_inches='tight') if TEST_BOUNDING: fig, ax = plt.subplots(nrows=1, ncols=2) coords = denormalize(imgs.shape[1], loc.data) imgs = imgs.data.numpy() for i in range(len(imgs)): ax[i].imshow(imgs[i]) size = 64 for j in range(3): rect = bounding_box( coords[i, 0], coords[i, 1], size, color='r' ) ax[i].add_patch(rect) size = size * 2 ax[i].get_xaxis().set_visible(False) ax[i].get_yaxis().set_visible(False) if SAVE: plt.savefig(plot_dir + 'bbox.png', format='png', dpi=300, bbox_inches='tight') plt.show()
def main(): # paths data_dir = "../data/" # load images imgs = [] paths = [data_dir + "./lenna.jpg", data_dir + "./cat.jpg"] for i in range(len(paths)): img = img2array(paths[i], desired_size=[512, 512], expand=True) imgs.append(torch.from_numpy(img)) imgs = torch.cat(imgs).permute(0, 3, 1, 2) # loc = torch.Tensor(2, 2).uniform_(-1, 1) loc = torch.from_numpy(np.array([[0.0, 0.0], [0.0, 0.0]])) num_patches = 5 scale = 2 patch_size = 10 ret = Retina(g=patch_size, k=num_patches, s=scale) glimpse = ret.foveate(imgs, loc).data.numpy() glimpse = np.reshape(glimpse, [2, num_patches, 3, patch_size, patch_size]) glimpse = np.transpose(glimpse, [0, 1, 3, 4, 2]) merged = [] for i in range(len(glimpse)): g = glimpse[i] g = list(g) g = [array2img(l) for l in g] res = reduce(merge_images, list(g)) merged.append(res) merged = [np.asarray(l, dtype="float32") / 255.0 for l in merged] fig, axs = plt.subplots(nrows=2, ncols=1) for i, ax in enumerate(axs.flat): axs[i].imshow(merged[i]) axs[i].get_xaxis().set_visible(False) axs[i].get_yaxis().set_visible(False) plt.show()
def main(): # load images imgs = [] paths = [data_dir + './lenna.jpg', data_dir + './cat.jpg'] for i in range(len(paths)): img = img2array(paths[i], desired_size=[512, 512], expand=True) imgs.append(torch.from_numpy(img)) imgs = Variable(torch.cat(imgs)) B, H, W, C = imgs.shape l_t_prev = torch.Tensor(B, 2).uniform_(-1, 1) l_t_prev = Variable(l_t_prev) h_t_prev = Variable(torch.zeros(B, 256)) ram = RecurrentAttention(64, 3, 2, 3, 128, 128, 256, 10, 0.11) h_t, l_t = ram(imgs, l_t_prev, h_t_prev) print("h_t: {}".format(h_t.shape)) print("l_t: {}".format(l_t.shape))
def main(): # load images imgs = [] paths = [data_dir + './lenna.jpg', data_dir + './cat.jpg'] for i in range(len(paths)): img = img2array(paths[i], desired_size=[512, 512], expand=True) imgs.append(torch.from_numpy(img)) imgs = Variable(torch.cat(imgs)) imgs = imgs.permute(0, 3, 1, 2) # loc = torch.Tensor(2, 2).uniform_(-1, 1) loc = torch.from_numpy(np.array([[0., 0.], [0., 0.]])) loc = Variable(loc) ret = retina(g=64, k=3, s=2) glimpse = ret.foveate(imgs, loc).data.numpy() glimpse = np.reshape(glimpse, [2, 3, 3, 64, 64]) glimpse = np.transpose(glimpse, [0, 1, 3, 4, 2]) merged = [] for i in range(len(glimpse)): g = glimpse[i] g = list(g) g = [array2img(l) for l in g] res = reduce(merge_images, list(g)) merged.append(res) merged = [np.asarray(l, dtype='float32') / 255.0 for l in merged] fig, axs = plt.subplots(nrows=2, ncols=1) for i, ax in enumerate(axs.flat): axs[i].imshow(merged[i]) axs[i].get_xaxis().set_visible(False) axs[i].get_yaxis().set_visible(False) plt.show()
import numpy as np from PIL import Image import tensorflow as tf from utils import img2array, array2img from stn import spatial_transformer_network as transformer DIMS = (600, 600) data_dir = './data/' # load 4 cat images img1 = img2array(data_dir + 'cat1.jpg', DIMS, expand=True) # , view=True) img2 = img2array(data_dir + 'cat2.jpg', DIMS, expand=True) img3 = img2array(data_dir + 'cat3.jpg', DIMS, expand=True) img4 = img2array(data_dir + 'cat4.jpg', DIMS, expand=True) input_img = np.concatenate([img1, img2, img3, img4], axis=0) B, H, W, C = input_img.shape print("Input Img Shape: {}".format(input_img.shape)) # identity transform theta = np.array([[1., 0, 0], [0, 1., 0]]) x = tf.placeholder(tf.float32, [None, H, W, C]) with tf.variable_scope('spatial_transformer'): theta = theta.astype('float32') theta = theta.flatten() # define loc net weight and bias loc_in = H * W * C
image = cv2.imread(img_path) c0 = cv2.calcHist([image], [0], None, [256], [0.0,255.0]) c1 = cv2.calcHist([image], [1], None, [256], [0.0,255.0]) c2 = cv2.calcHist([image], [2], None, [256], [0.0,255.0]) return np.concatenate((c0,c1,c2)).reshape(768,) # 把图片转化成数值: X_img = [] X_colors = [] X_isbns = [] Y = [] for isbn in pos_list: try: img_path = '../covers/%s.jpg'%isbn X_img.append(img2array(img_path,img_h,img_w)) X_colors.append(getColorHist(img_path)) Y.append(1) X_isbns.append(isbn) except Exception as e: print('Error for picture: %s.jpg'%isbn,e) for isbn in neg_list: try: img_path = '../covers/%s.jpg'%isbn X_img.append(img2array(img_path,img_h,img_w)) X_colors.append(getColorHist(img_path)) Y.append(0) X_isbns.append(isbn) except Exception as e: print('Error for picture: %s.jpg'%isbn,e)
import sys sys.path.append("..") import torch import model import utils if __name__ == "__main__": # paths plot_dir = "../plots/" data_dir = "../data/" # load images imgs = [] paths = [data_dir + "./lenna.jpg", data_dir + "./cat.jpg"] for i in range(len(paths)): img = utils.img2array(paths[i], desired_size=[512, 512], expand=True) imgs.append(torch.from_numpy(img)) imgs = torch.cat(imgs).permute((0, 3, 1, 2)) B, C, H, W = imgs.shape l_t_prev = torch.FloatTensor(B, 2).uniform_(-1, 1) h_t_prev = torch.zeros(B, 256) ram = model.RecurrentAttention(64, 3, 2, C, 128, 128, 0.11, 256, 10) h_t, l_t, _, _ = ram(imgs, l_t_prev, h_t_prev) assert h_t.shape == (B, 256) assert l_t.shape == (B, 2)
def convert(target_image, input_img, originals, batch_size=32, iterations=300): """ Method for training a convnet to learn transformation parameters for a target transformation. The parameters are supplied to the STN. :params np.array: Target image, in this case we just have one (square) :params np.array: Input masks, shapes to be converted into the target image :params list: Filename :params np.array: Files to be converted. """ #S, H, W, C = input_img.shape H, W, C = (DIMS[0], DIMS[1], 3) x = tf.placeholder(tf.float32, [None, H, W, C]) target_batch = [target_image[0] for i in range(batch_size)] food_x = tf.placeholder(tf.float32, [None, H, W, C]) target = tf.placeholder(tf.float32, [None, H, W, C]) with tf.variable_scope('spatial_transformer'): theta = np.array([1.1, .0, .0, .0, 1.1, .0]).astype('float32') # Conv Layers h0 = lrelu(conv2d(target, 64, name='t_h0_conv')) # h0 is (128 x 128 x self.df_dim) #h1 = lrelu(instance_norm(conv2d(h0, 64*2, name='t_h1_conv'), 't_bn1')) h1 = lrelu(instance_norm(conv2d(h0, 64, name='t_h1_conv'), 't_bn1')) # h1 is (64 x 64 x self.df_dim*2) #h2 = lrelu(instance_norm(conv2d(h1, 64*4, name='t_h2_conv'), 't_bn2')) h2 = lrelu(instance_norm(conv2d(h1, 64, name='t_h2_conv'), 't_bn2')) # Fully connected layer: shape = h2.get_shape().as_list() h2_flat = tf.reshape(h2, [-1, reduce(lambda x, y: x * y, shape[1:])]) l1 = linear(h2_flat, 512, scope="l1") W_loc = tf.Variable(tf.zeros([l1.get_shape()[-1], 6]), name='W_fc1') b_loc = tf.Variable(initial_value=theta, name='b_loc') # tie everything together fc_loc = tf.matmul(l1, W_loc) + b_loc h_trans = transformer(x, fc_loc, [H, W]) f_trans = transformer(food_x, fc_loc, [H, W]) loss = tf.losses.huber_loss(target, h_trans) optim = tf.train.AdamOptimizer(1e-07).minimize(loss) tfconfig = tf.ConfigProto(allow_soft_placement=True) tfconfig.gpu_options.allow_growth = True with tf.Session(config=tfconfig) as sess: sess.run(tf.global_variables_initializer()) losses = [] d = {'losses': []} #for i in range (0, S-batch_size, batch_size): for i in range(iterations): _, fc_o, l, w, b, y = sess.run( [optim, fc_loc, loss, W_loc, b_loc, h_trans], feed_dict={ x: loadImages("./mask/", batch_size), target: target_batch }) if i % 10 == 0: print(l) if l < 0.005: break # Transform and save the cropped images: filenames = glob.glob(originals + "*.png") for filename in filenames: [y] = sess.run( [f_trans], feed_dict={ food_x: img2array(filename, DIMS, expand=True), target: [target_batch[0]] }) imageio.imwrite("Results/" + filename.replace("./masked/", ""), (y[0] * 255).astype(np.uint8)) sess.close() tf.reset_default_graph()
_, fc_o, l, w, b, y = sess.run( [optim, fc_loc, loss, W_loc, b_loc, h_trans], feed_dict={ x: loadImages("./mask/", batch_size), target: target_batch }) if i % 10 == 0: print(l) if l < 0.005: break # Transform and save the cropped images: filenames = glob.glob(originals + "*.png") for filename in filenames: [y] = sess.run( [f_trans], feed_dict={ food_x: img2array(filename, DIMS, expand=True), target: [target_batch[0]] }) imageio.imwrite("Results/" + filename.replace("./masked/", ""), (y[0] * 255).astype(np.uint8)) sess.close() tf.reset_default_graph() print("Loading images") target = img2array("./target.jpg", DIMS, expand=True) print("Building net") convert(target, "./mask/", "./masked/")
#for i in range (0, S-batch_size, batch_size): for i in range(iterations): _, fc_o, l, w, b, y = sess.run([optim, fc_loc, loss, W_loc, b_loc, h_trans], feed_dict={x:loadImages("./square/", batch_size), target:target_batch}) if i%10 == 0: print(l) if l < 0.05: break # Transform and save the cropped images: filenames = glob.glob(originals+"**/*.jpg") print(filenames) random.shuffle(filenames) print(filenames) for filename in filenames[:3]: [y] = sess.run([f_trans], feed_dict={food_x:img2array(filename, DIMS, expand=True), target:[target_batch[0]]}) imageio.imwrite("Results/"+filename.replace(".jpg", str(time.time()).replace(".", "")+'.jpg'), (y[0]*255).astype(np.uint8)) sess.close() tf.reset_default_graph() print("Loading images") #target = img2array("./target.jpg", DIMS, expand=True) filenames = glob.glob("./mask/*.png") for filename in filenames: target = img2array(filename, DIMS, expand=True) print("Building net") #folders = ['alc/', 'food/', 'gambling/', 'other/'] #folders = ['food/'] #for folder in folders: convert(target, "./ads/")