示例#1
0
    def __init__(self, coco_root=path["COCO_ROOT"], vg_path_ann=path["VG_ANN"], path_vg_img=path["VG_IMAGE"], coco_json_file_path=path["COCO_RESTVAL_SPLIT"], word_dict_path=path["WORD_DICT"], image=True, transform=None):
        self.transform = transform
        self.image = image

        path_params = os.path.join(word_dict_path, 'utable.npy')
        self.params = np.load(path_params, encoding='latin1')
        self.dico = _load_dictionary(word_dict_path)

        self.path_vg_img = path_vg_img

        ids = vg.get_all_image_data(vg_path_ann)
        regions = vg.get_all_region_descriptions(vg_path_ann)

        annFile = os.path.join(coco_root, "annotations/captions_val2014.json")
        coco = COCO(annFile)
        ids_val_coco = list(coco.imgs.keys())

        # Uncomment following bloc to evaluate only on validation set from Rest/Val split
        # with open(coco_json_file_path, 'r') as f: # coco_json_file_path = "/home/wp01/users/engilbergem/dev/trunk/CPLApplications/deep/PytorchApplications/coco/dataset.json"
        #     datas = json.load(f)
        # ids_val_coco = [x['cocoid'] for x in datas["images"] if x["split"] == "val"]  # list(coco.imgs.keys())

        self.data = [x for x in zip(ids, regions) if x[0].coco_id in ids_val_coco]
        self.imgs_paths = [x[0].id for x in self.data]
        self.nb_regions = [len([x.phrase for x in y[1]])
                           for y in self.data]
        self.captions = [x.phrase for y in self.data for x in y[1]]
示例#2
0
文件: space.py 项目: sefeoglu/cogsys
def get_next_local(ids=None):
    images = vgl.get_all_image_data(DATA_DIR)
    all_regions = vgl.get_all_region_descriptions(DATA_DIR)  # slow
    if ids is None:
        ids = [i for i in range(1, len(images))]

    for id in ids:
        image = images[id - 1]
        regions = all_regions[id - 1]
        graph = vgl.get_scene_graph(id,
                                    images=DATA_DIR,
                                    image_data_dir=DATA_DIR + '/by-id/',
                                    synset_file=DATA_DIR + '/synsets.json')
        yield image, regions, graph
示例#3
0
    def __init__(me, ids, data_dir='./data/'):
        '''data_dir: [string] Gets created and overwritten with 2.3GB of cached data.'''
        me.data_dir = data_dir
        me.download_dataset(me.data_dir)

        all_regions = vgl.get_all_region_descriptions(data_dir)  # slow
        # r here is the list of all regions in one image
        me.regions = {
            r[0].image.id: r
            for r in all_regions if r[0].image.id in ids
        }

        if ids is None:
            ids = me.get_all_image_ids()
        me.ids = ids
示例#4
0
    def process_dataset(self):
        """Load, transform and split dataset."""
        if not osp.exists(self.DATA_FOLDER):
            os.makedirs(self.DATA_FOLDER)

        obj_idx, img_regions = self.filter_regions()
        num_images = len(img_regions)

        print('Loading region bounding boxes...')
        region_descriptions = vg.get_all_region_descriptions(self.data_root)
        bar = progressbar.ProgressBar()
        for region_group in bar(region_descriptions):
            for region in region_group:
                if region.image.id in img_regions:
                    if region.id in img_regions[region.image.id]:
                        cat = img_regions[region.image.id][region.id]
                        img_regions[region.image.id][region.id] = (region, cat)

        print('Splitting dataset...')
        num_images_split = int(np.ceil(num_images * self.SPLIT_PROPORTION))

        image_id = np.array(list(img_regions.keys()))
        idx_perm = np.random.permutation(num_images)

        train_id = image_id[idx_perm[:num_images_split]]
        val_id = image_id[idx_perm[num_images_split:num_images_split * 2]]
        test_id = image_id[idx_perm[num_images_split * 2:]]

        train_images = [list(img_regions[img].values()) for img in train_id]
        val_images = [list(img_regions[img].values()) for img in val_id]
        test_images = [list(img_regions[img].values()) for img in test_id]

        train_file_path = osp.join(self.DATA_FOLDER, self.TRAIN_FILE)
        val_file_path = osp.join(self.DATA_FOLDER, self.VAL_FILE)
        test_file_path = osp.join(self.DATA_FOLDER, self.TEST_FILE)
        obj_idx_file_path = osp.join(self.DATA_FOLDER, self.OBJ_IDX_FILE)

        print('Saving data...')
        torch.save(train_images, train_file_path)
        torch.save(val_images, val_file_path)
        torch.save(test_images, test_file_path)
        torch.save(obj_idx, obj_idx_file_path)
def vg(opt):
    all_images = vg.get_all_image_data(dir)
    all_discriptions = vg.get_all_region_descriptions(dir)

    all_noun_sequenses_vg = []
    for i, discriptions in enumerate(all_discriptions):
        noun_sequenses_vg = {}
        noun_sequenses_vg['id'] = all_images[i].id
        noun_sequenses_vg['coco_id'] = all_images[i].coco_id
        sequenses = []
        for region in discriptions:
            splits = region.phrase.lower().split()
            e_words_stem_tag = make_stemwords(splits)  # add tag
            nouns = pickup_classnoun(e_words_stem_tag,
                                     None)  # pickup noun and make seq
            #         print('----------------------------------------')
            #         print(splits)
            #         print(e_words_stem_tag)
            #         print(nouns)
            sequenses.append(nouns)
        noun_sequenses_vg['sequences'] = sequenses
        #     all_noun_sequenses_vg.append(noun_sequenses_vg)
        all_noun_sequenses_vg.append(noun_sequenses_vg)
        if i % 1000 == 0:
            print('{}/{} ({:.2f}%) completed!'.format(
                i, len(all_images), i * 100 / len(all_images)))

    for i in range(len(all_noun_sequenses_vg)):
        all_noun_sequenses_vg[i]['S'] = []
        discriptions = all_discriptions[i]
        for j in range(len(discriptions)):
            S = discriptions[j].width * discriptions[j].height
            all_noun_sequenses_vg[i]['S'].append(S)
            all_noun_sequenses_vg[i]['rate'].append(S)

    save_dir = '/mnt/poplin/share/dataset/visualgenome/all_noun_sequenses_vg_mod.json'
    json.dump(all_noun_sequenses_vg, open(save_dir, 'w'))
示例#6
0
sys.path.append('/home/nakamura/project/python3_selfsequential')

from torchvision import transforms as trn
preprocess = trn.Compose([
    #trn.ToTensor(),
    trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

from misc.resnet_utils import myResnet
import misc.resnet as resnet

sys.path.append('/home/nakamura/project/python3_selfsequential/vg')
import visual_genome.local as vg
dir = '/mnt/poplin/share/dataset/visualgenome'
all_images = vg.get_all_image_data(dir)
all_discriptions = vg.get_all_region_descriptions(dir)
print('vg_loaded!')

import numpy as np
import torch
import pdb
import copy


# NMS の関数
def nms_cpu(dets, thresh):
    dets = np.array(dets)
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
示例#7
0
    def process_dataset(self):
        try:
            os.makedirs(osp.join(self.data_path, self.top_folder))
            os.makedirs(osp.join(self.data_path, self.processed_folder))
        except OSError as e:
            if e.errno == errno.EEXIST:
                pass
            else:
                raise

        # print("Generating top images set...")
        # img_top_ids = self.get_top_images()
        self.region_objects, self.obj_idx = self.__load_region_objects()

        print("Processing region descriptions...")
        region_descriptions_full = vg.get_all_region_descriptions(
            data_dir=self.root)

        region_descriptions = []
        for region in region_descriptions_full:
            region_descriptions += region

        # del region_descriptions_full

        corpus_path = osp.join(self.data_path, self.processed_folder,
                               self.corpus_file)

        if not osp.exists(corpus_path):
            print("Generating text corpus...")
            corpus = Corpus()
            for i, region in enumerate(region_descriptions):
                print("Processing region: {0}".format(i))
                corpus.add_to_corpus(region.phrase)

            corpus.dictionary.add_word('<unk>')
            print("Saving corpus to file...")
            with open(corpus_path, 'wb') as f:
                torch.save(corpus, f)

        # print("Selecting region descriptions from top images...")
        # regions = []
        # bar = progressbar.ProgressBar()
        # for region in bar(region_descriptions_full):
        #     # print("Processing region: {0}".format(i))
        #     if region[0].image.id in img_top_ids:
        #         regions += region
        regions, regions_objects = self.__filter_regions_by_class(
            region_descriptions)

        print("Splitting region descriptions...")
        train_prop = int(np.ceil(len(regions) * 0.6))
        val_train_prop = int(np.ceil(len(regions) * 0.15))

        regions = np.array(regions)
        np.random.shuffle(regions)

        train_regions = regions[:train_prop].tolist()
        val_regions = regions[train_prop:train_prop + val_train_prop].tolist()
        test_regions = regions[train_prop + val_train_prop:].tolist()

        print("Saving train text corpus...")
        train_text_path = osp.join(self.data_path, self.top_folder,
                                   self.train_text_file)
        with codecs.open(train_text_path, 'w', 'utf-8') as f:
            for region in train_regions:
                f.write(region.phrase + '\n')

        print("Saving validation text corpus...")
        val_text_path = osp.join(self.data_path, self.top_folder,
                                 self.val_text_file)
        with codecs.open(val_text_path, 'w', 'utf-8') as f:
            for region in val_regions:
                f.write(region.phrase + '\n')

        print("Saving test text corpus...")
        test_text_path = osp.join(self.data_path, self.top_folder,
                                  self.test_text_file)
        with codecs.open(test_text_path, 'w', 'utf-8') as f:
            for region in test_regions:
                f.write(region.phrase + '\n')

        print("Saving training regions...")
        train_file = osp.join(self.data_path, self.top_folder,
                              self.region_train_file)
        with open(train_file, 'wb') as f:
            torch.save(train_regions, f)

        print("Saving validation regions...")
        val_file = osp.join(self.data_path, self.top_folder,
                            self.region_val_file)
        with open(val_file, 'wb') as f:
            torch.save(val_regions, f)

        print("Saving testing regions...")
        test_file = osp.join(self.data_path, self.top_folder,
                             self.region_test_file)
        with open(test_file, 'wb') as f:
            torch.save(test_regions, f)

        print("Saving dataset objects per region...")
        regions_obj_file = osp.join(self.data_path, self.top_folder,
                                    self.region_objects_file)
        with open(regions_obj_file, 'wb') as f:
            torch.save(regions_objects, f)

        print("Saving object to index map...")
        obj_idx_path = osp.join(self.data_path, self.top_folder,
                                self.obj_idx_file)
        with open(obj_idx_path, 'wb') as f:
            torch.save(self.obj_idx, f)

        print("Done!")