def get_random_cached_bottlenecks(image_lists, how_many, category, bottleneck_dir, image_dir, bottle_func, architecture='inception_v3'): """Retrieves bottleneck values for cached images. If no distortions are being applied, this function can retrieve the cached bottleneck values directly from disk for images. It picks a random set of images from the specified category. """ class_count = len(image_lists.keys()) bottlenecks = [] ground_truths = [] filenames = [] if how_many >= 0: # Retrieve a random sample of bottlenecks. for unused_i in range(how_many): label_index = random.randrange(class_count) label_name = list(image_lists.keys())[label_index] image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS + 1) image_name = get_image_path(image_lists, label_name, image_index, image_dir, category) bottleneck = get_or_create_bottleneck(image_lists, label_name, image_index, image_dir, category, bottleneck_dir, bottle_func, architecture) bottlenecks.append(bottleneck) # y = np.zeros(class_count) # y[label_index] = 1 ground_truths.append(label_index) filenames.append(image_name) else: # Retrieve all bottlenecks. for label_index, label_name in enumerate(image_lists.keys()): for image_index, image_name in enumerate( image_lists[label_name][category]): image_name = get_image_path(image_lists, label_name, image_index, image_dir, category) bottleneck = get_or_create_bottleneck(image_lists, label_name, image_index, image_dir, category, bottleneck_dir, bottle_func, architecture) bottlenecks.append(bottleneck) # y = np.zeros(class_count) # y[label_index] = 1 ground_truths.append(label_index) filenames.append(image_name) return np.array(bottlenecks), np.array(ground_truths), np.array(filenames)
def get_or_create_bottleneck(image_lists, label_name, image_index, image_dir, category, bottleneck_dir, bottle_func, distorted=False, architecture='inception_v3'): # label_lists = image_lists[label_name] # sub_dir = label_lists['dir'] # sub_dir_path = os.path.join(bottleneck_dir, sub_dir) # ensure_dir_exists(sub_dir_path) target_size = (IM_WIDTH, IM_HEIGHT) image_file = get_image_path(image_lists, label_name, image_index, image_dir, category) bottle_file = get_image_path( image_lists, label_name, image_index, bottleneck_dir, category) +\ '_' + architecture + '.npy' try: bottleneck_values = np.load(bottle_file) return bottleneck_values except Exception as e: print('Bottleneck not found, creating bottleneck...\n{}'.format(e)) if not distorted: img = image.load_img(image_file, target_size=target_size) np.save(bottle_file, bottle_func(img)) else: img = image.load_img(re.sub('_\d+.jpg', '', image_file), target_size=target_size) distorted_image = distort_image(img, ROTATION_RANGE, WIDTH_SHIFT_RANGE, BRIGHTNESS_RANGE, SHEAR_RANGE, CHANNEL_SHIFT_RANGE, HORIZONTAL_FLIP) np.save(bottle_file, bottle_func(distorted_image)) bottleneck_values = np.load(bottle_file) return bottleneck_values
def feed_data(image_lists, category, image_dir, generator=False, how_many=None): class_count = len(image_lists.keys()) inputs, truths = [], [] target_size = (IM_WIDTH, IM_HEIGHT) if generator: # Retrieve a random sample of bottlenecks. class TrainBatchGen: def __init__(self, image_lists, category, image_dir, how_many): self.image_lists = image_lists self.category = category self.image_dir = image_dir self.how_many = how_many def __iter__(self): return self def __next__(self): for unused_i in range(how_many): label_index = random.randrange(class_count) label_name = list(image_lists.keys())[label_index] image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS + 1) file = get_image_path(image_lists, label_name, image_index, image_dir, category) img = image.load_img(file, target_size=target_size) inp = preprocess_img(img, expand_dim=False) inputs.append(inp) y = np.zeros(class_count) y[label_index] = 1 truths.append(y) return (np.array(inputs), np.array(truths)) return TrainBatchGen(image_lists, category, image_dir, how_many) else: for label_index, label_name in enumerate(image_lists.keys()): for image_index, image_name in enumerate( image_lists[label_name][category]): file = get_image_path(image_lists, label_name, image_index, image_dir, category) img = image.load_img(file, target_size=target_size) inp = preprocess_img(img, expand_dim=False) inputs.append(inp) y = np.zeros(class_count) y[label_index] = 1 truths.append(y) return (np.array(inputs), np.array(truths))
def __next__(self): for unused_i in range(how_many): label_index = random.randrange(class_count) label_name = list(image_lists.keys())[label_index] image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS + 1) file = get_image_path(image_lists, label_name, image_index, image_dir, category) img = image.load_img(file, target_size=target_size) inp = preprocess_img(img, expand_dim=False) inputs.append(inp) y = np.zeros(class_count) y[label_index] = 1 truths.append(y) return (np.array(inputs), np.array(truths))
def cache_distort_bottlenecks(image_lists, bottle_func, architecture='inception_v3'): '''generat distorted bottlenecks from image in image_lists''' distorted_image_lists = {} target_size = (IM_WIDTH, IM_HEIGHT) class Count(): def __init__(self): self.n_skipped = 0 self.n_created = 0 def __call__(self, count_type='create'): if count_type == 'skipped': self.n_skipped += 1 if self.n_skipped % 100 == 0: print('{} existing bottlenecks skipped.'.format( self.n_skipped)) else: self.n_created += 1 if self.n_created % 100 == 0: print('{} bottlenecks created.'.format(self.n_created)) def print_total(self): print('{} bottlenecks created in total.'.format(self.n_created)) print('{} existing bottlenecks skipped in total.'.format( self.n_skipped)) count = Count() # creat bottlenecks for every distorted imgs for label_index, label_name in enumerate(image_lists.keys()): print('Current label: {}'.format(label_name)) label_lists = image_lists[label_name] # image_lists['label 0'] for category in ['training', 'testing', 'validation']: category_list = label_lists[category] # label_lists['training'] print('Current category: {}'.format(category)) for image_index, image_name in enumerate(category_list): # 0, 'foo.jpg' # if label_index > 0 or image_index > 10: if random.randrange(1000) < 998: # ### DEBUG ONLY ### continue image_file = get_image_path(image_lists, label_name, image_index, ARGS.image_dir, category) img = image.load_img(image_file, target_size=target_size) # save original bottlenecks count() # save distorted image to path randomly if category in ['testing', 'validation']: continue # distort training image for {times_per_image} times for i in range(ARGS.times_per_image): distorted_image_name = image_name + \ '_{}'.format(i) + '.jpg' distorted_image = distort_image(img, ROTATION_RANGE, WIDTH_SHIFT_RANGE, BRIGHTNESS_RANGE, SHEAR_RANGE, CHANNEL_SHIFT_RANGE, HORIZONTAL_FLIP) # randomly save distorted images for check if i == 0: label_image_dir = ARGS.image_dir + '_distorted' ensure_dir_exists(label_image_dir) img.save(os.path.join(label_image_dir, image_name)) img_save_path = os.path.join(label_image_dir, distorted_image_name) distorted_image.save(img_save_path) count() count.print_total() return distorted_image_lists