示例#1
0
from utils.preprocessing import preprocess_captions, preprocess_image, repeat_imgs
from pycocotools.coco import COCO
import cPickle as pickle

if __name__ == '__main__':
    # Specify datasets path.
    refexp_filename='google_refexp_dataset_release/google_refexp_train_201511_coco_aligned.json'
    coco_filename='external/coco/annotations/instances_train2014.json'
    datasetDir = 'external/coco/'
    datasetType = 'train2014'

    # Create Refexp instance.
    refexp = Refexp(refexp_filename, coco_filename)

    # Get image ids of all images containing human beings
    categoryIds = refexp.getCatIds(catNms=['person'])
    imgIds = refexp.getImgIds(catIds=categoryIds)
    nImgsAvailable = len(imgIds)

    # Select 2 random images
    nImgsDesired = 2
    nImgs = min(nImgsDesired, nImgsAvailable)
    np.random.seed(0)
    randImgIndices = np.random.choice(np.arange(0, nImgs), size=(nImgs, 1), replace=False)
    randImgIds = [imgIds[int(idx)] for idx in randImgIndices]

    coco_imgs = refexp.loadImgs(randImgIds)

    # The actual images as numpy arrays
    images = [preprocess_image('%s/images/%s/%s' % (datasetDir, datasetType, img['file_name'])) for img in coco_imgs]
    images = np.squeeze(np.asarray(images))
示例#2
0
def preprocess_refexp_images(stream_num,
                             stream_size,
                             word_to_idx,
                             max_cap_len,
                             coco_dir,
                             category_names=[],
                             out_file='../keras_vgg_19/savedoc',
                             NO_PADDING=False):

    coco_filename = coco_dir + '/annotations/instances_train2014.json'
    refexp_filename = '../google_refexp_dataset_release/google_refexp_train_201511_coco_aligned.json'

    refexp = Refexp(refexp_filename, coco_filename)

    # choose categories/images
    catIds = refexp.getCatIds(catNms=category_names)
    imgIds = list(set(refexp.getImgIds(catIds=catIds)))
    annIds = refexp.getAnnIds(imgIds=imgIds)
    anns = refexp.loadAnns(ids=annIds)

    refIds = []
    bboxes = []
    refImgIds = []
    # get all refexp ids and bboxes and imageids in these annotations, except for captions with weird symbols
    for ann in anns:
        for ref_id in ann['refexp_ids']:
            if ref_id not in [
                    146, 400, 923, 21409, 35384, 38589, 46630, 47673, 65639,
                    70715, 82742
            ]:
                refIds.append(ref_id)
                bboxes.append(ann['bbox'])
                refImgIds.append(ann['image_id'])

    # get caption sequences, with added start and stop tokens
    captions = [
        refexp.dataset['refexps'][ref_id]['raw'].encode('ascii')
        for ref_id in refIds
    ]
    caption_seqs = [[START_TOKEN] + text_to_word_sequence(c) + [STOP_TOKEN]
                    for c in captions]
    caption_lengths = [len(seq) for seq in caption_seqs]

    # filter out the long captions
    refImgIds = [
        img_id for i, img_id in enumerate(refImgIds)
        if caption_lengths[i] <= max_cap_len
    ]
    bboxes = [
        bbox for i, bbox in enumerate(bboxes)
        if caption_lengths[i] <= max_cap_len
    ]
    caption_seqs = [
        seq for i, seq in enumerate(caption_seqs)
        if caption_lengths[i] <= max_cap_len
    ]
    caption_lengths = [l for l in caption_lengths if l <= max_cap_len
                       ]  # do not move this before the other filter steps!
    total_num_partial_captions = sum(caption_lengths)

    # repeat image id and bounding box for each partial caption
    repeated_ids = [[img_id] * (l - 1)
                    for img_id, l in zip(refImgIds, caption_lengths)]
    image_ids = [img_id for rep_id in repeated_ids for img_id in rep_id]
    repeated_bboxes = [[bbox] * (l - 1)
                       for bbox, l in zip(bboxes, caption_lengths)]
    cap_bbox = [bbox for rep_bbox in repeated_bboxes for bbox in rep_bbox]

    partial_caps, next_words = partial_captions_and_next_words(
        caption_seqs, word_to_idx, max_cap_len
    )  #preprocess_captions(caption_seqs, word_to_idx, max_cap_len)

    print(len(image_ids), len(partial_caps), len(cap_bbox))
    assert (len(image_ids) == len(partial_caps))
    assert (len(image_ids) == len(cap_bbox))
    '''
    # Determine how many (partial caption, image) examples to take to obtain
    # `num_imgs_to_sample` total distinct images (including all partial captions)
    if num_caps_to_sample < total_num_images:
        number_of_items = 0
        for i, l in enumerate(caption_lengths):
            if i >= num_caps_to_sample:
                break
            number_of_items += l
    else:
        print total_num_images, ' were requested, but only ', num_caps_to_sample, \
            ' are available in this category. Processing all images in the category...'
        number_of_items = len(partial_caps)
    '''

    X = [0, 0]
    # TODO: handle the case where you request indices out of range
    number_of_items = min(stream_size, total_num_partial_captions)
    item_range = range((stream_num - 1) * stream_size,
                       stream_num * stream_size)
    ids_and_bboxes = zip(image_ids, cap_bbox)
    X[0] = ids_and_bboxes[((stream_num - 1) * stream_size):(stream_num *
                                                            stream_size)]
    X[1] = np.asarray(partial_caps[item_range])
    y = np.asarray(next_words)[item_range]
    out = X, y

    with open(out_file, 'w') as handle:
        pickle.dump(out, handle)