from utils.preprocessing import preprocess_captions, preprocess_image, repeat_imgs from pycocotools.coco import COCO import cPickle as pickle if __name__ == '__main__': # Specify datasets path. refexp_filename='google_refexp_dataset_release/google_refexp_train_201511_coco_aligned.json' coco_filename='external/coco/annotations/instances_train2014.json' datasetDir = 'external/coco/' datasetType = 'train2014' # Create Refexp instance. refexp = Refexp(refexp_filename, coco_filename) # Get image ids of all images containing human beings categoryIds = refexp.getCatIds(catNms=['person']) imgIds = refexp.getImgIds(catIds=categoryIds) nImgsAvailable = len(imgIds) # Select 2 random images nImgsDesired = 2 nImgs = min(nImgsDesired, nImgsAvailable) np.random.seed(0) randImgIndices = np.random.choice(np.arange(0, nImgs), size=(nImgs, 1), replace=False) randImgIds = [imgIds[int(idx)] for idx in randImgIndices] coco_imgs = refexp.loadImgs(randImgIds) # The actual images as numpy arrays images = [preprocess_image('%s/images/%s/%s' % (datasetDir, datasetType, img['file_name'])) for img in coco_imgs] images = np.squeeze(np.asarray(images))
def preprocess_refexp_images(stream_num, stream_size, word_to_idx, max_cap_len, coco_dir, category_names=[], out_file='../keras_vgg_19/savedoc', NO_PADDING=False): coco_filename = coco_dir + '/annotations/instances_train2014.json' refexp_filename = '../google_refexp_dataset_release/google_refexp_train_201511_coco_aligned.json' refexp = Refexp(refexp_filename, coco_filename) # choose categories/images catIds = refexp.getCatIds(catNms=category_names) imgIds = list(set(refexp.getImgIds(catIds=catIds))) annIds = refexp.getAnnIds(imgIds=imgIds) anns = refexp.loadAnns(ids=annIds) refIds = [] bboxes = [] refImgIds = [] # get all refexp ids and bboxes and imageids in these annotations, except for captions with weird symbols for ann in anns: for ref_id in ann['refexp_ids']: if ref_id not in [ 146, 400, 923, 21409, 35384, 38589, 46630, 47673, 65639, 70715, 82742 ]: refIds.append(ref_id) bboxes.append(ann['bbox']) refImgIds.append(ann['image_id']) # get caption sequences, with added start and stop tokens captions = [ refexp.dataset['refexps'][ref_id]['raw'].encode('ascii') for ref_id in refIds ] caption_seqs = [[START_TOKEN] + text_to_word_sequence(c) + [STOP_TOKEN] for c in captions] caption_lengths = [len(seq) for seq in caption_seqs] # filter out the long captions refImgIds = [ img_id for i, img_id in enumerate(refImgIds) if caption_lengths[i] <= max_cap_len ] bboxes = [ bbox for i, bbox in enumerate(bboxes) if caption_lengths[i] <= max_cap_len ] caption_seqs = [ seq for i, seq in enumerate(caption_seqs) if caption_lengths[i] <= max_cap_len ] caption_lengths = [l for l in caption_lengths if l <= max_cap_len ] # do not move this before the other filter steps! total_num_partial_captions = sum(caption_lengths) # repeat image id and bounding box for each partial caption repeated_ids = [[img_id] * (l - 1) for img_id, l in zip(refImgIds, caption_lengths)] image_ids = [img_id for rep_id in repeated_ids for img_id in rep_id] repeated_bboxes = [[bbox] * (l - 1) for bbox, l in zip(bboxes, caption_lengths)] cap_bbox = [bbox for rep_bbox in repeated_bboxes for bbox in rep_bbox] partial_caps, next_words = partial_captions_and_next_words( caption_seqs, word_to_idx, max_cap_len ) #preprocess_captions(caption_seqs, word_to_idx, max_cap_len) print(len(image_ids), len(partial_caps), len(cap_bbox)) assert (len(image_ids) == len(partial_caps)) assert (len(image_ids) == len(cap_bbox)) ''' # Determine how many (partial caption, image) examples to take to obtain # `num_imgs_to_sample` total distinct images (including all partial captions) if num_caps_to_sample < total_num_images: number_of_items = 0 for i, l in enumerate(caption_lengths): if i >= num_caps_to_sample: break number_of_items += l else: print total_num_images, ' were requested, but only ', num_caps_to_sample, \ ' are available in this category. Processing all images in the category...' number_of_items = len(partial_caps) ''' X = [0, 0] # TODO: handle the case where you request indices out of range number_of_items = min(stream_size, total_num_partial_captions) item_range = range((stream_num - 1) * stream_size, stream_num * stream_size) ids_and_bboxes = zip(image_ids, cap_bbox) X[0] = ids_and_bboxes[((stream_num - 1) * stream_size):(stream_num * stream_size)] X[1] = np.asarray(partial_caps[item_range]) y = np.asarray(next_words)[item_range] out = X, y with open(out_file, 'w') as handle: pickle.dump(out, handle)