Пример #1
0
    def __init__(self, **kwargs):

        dataset = kwargs.get('dataset')
        splitBy = kwargs.get('splitBy')
        split = kwargs.get('split')

        data_json = osp.join('cache/prepro', dataset + "_" + splitBy,
                             split + '.json')

        with open(data_json, 'r') as f:
            self.data = json.load(f)

        #only use the questinos having 1 bbox as answer
        datanew = []
        for ent in self.data:
            gtbox = ent['gtbox']
            if len(gtbox[0]) != 0 and len(gtbox) == 1:
                datanew.append(ent)
        self.data = datanew

        dictfile = kwargs.get('dictionaryfile')
        self.dictionary = Dictionary.load_from_file(dictfile)
        if kwargs.get('testrun'):
            self.data = self.data[:32]

        self.spatial = True
        self.image_features_path_coco = kwargs.get('coco_bottomup')
        self.coco_id_to_index = self.id_to_index(self.image_features_path_coco)
        print("Dataset [{}] loaded....".format(dataset, split))
        print("Split [{}] has {} ref exps.".format(split, len(self.data)))
Пример #2
0
    def __init__(self, **kwargs):

        file = kwargs.get('file')

        self.isnms = kwargs.get('isnms')
        self.trainembd = kwargs.get('trainembd')

        #6 postion encoded vectors as used by irls
        self.spatial = True

        with open(file, 'rb') as f:
            self.data = pickle.load(f)

        if self.trainembd:
            self.dictionary = Dictionary.load_from_file(
                kwargs.get('dictionaryfile'))

        if kwargs.get('testrun'):
            self.data = self.data[:32]

        self.pool_features_path_coco = kwargs.get('coco_pool_features')
        self.pool_features_path_genome = kwargs.get('genome_pool_features')
        self.poolcoco_id_to_index = self._poolcreate_coco_id_to_index(
            self.pool_features_path_coco)
        self.poolcoco_id_to_index_gen = self._poolcreate_coco_id_to_index(
            self.pool_features_path_genome)

        self.image_features_path_coco = kwargs.get('coco_bottomup')
        self.coco_id_to_index = self.id_to_index(self.image_features_path_coco)
        self.image_features_path_genome = kwargs.get('genome_bottomup')
        self.genome_id_to_index = self.id_to_index(
            self.image_features_path_genome)
Пример #3
0
    def __init__(self, **kwargs):

        dataset = kwargs.get('dataset')
        splitBy = kwargs.get('splitBy')
        split = kwargs.get('split')

        data_json = osp.join('cache/prepro', "vqd" + "_" + splitBy,
                             split + '.json')

        with open(data_json, 'r') as f:
            self.data = json.load(f)

        #only use 1 or more gt bos
        if dataset == 'vqd1':
            print('VQDv1 1box loaded .......')

            #only use the questinos having 1 bbox as answer
            datanew = []
            for ent in self.data:
                gtbox = ent['gtbox']
                if len(gtbox[0]) != 0 and len(gtbox) == 1:
                    datanew.append(ent)
            self.data = datanew

        dictfile = kwargs.get('dictionaryfile')
        self.dictionary = Dictionary.load_from_file(dictfile)
        if kwargs.get('testrun'):
            self.data = self.data[:32]

        self.spatial = True
        self.image_features_path_coco = kwargs.get('vqd_detfeats').format(
            split)
        self.coco_id_to_index = self.id_to_index(self.image_features_path_coco)
        print("Dataset [{}] loaded....".format(dataset, split))
        print("Split [{}] has {} ref exps.".format(split, len(self.data)))

        cocoids = set(self.coco_id_to_index)
        if kwargs.get('istrain'):
            cocoids.remove(81768)
        #some qid doesnot exist
        datanew = []
        for ent in self.data:
            #some image ids are not in the dataset
            if ent['image_id'] in cocoids:
                datanew.append(ent)
        self.data = datanew
Пример #4
0
    def __init__(self,**kwargs):
    
        dataset = kwargs.get('dataset')
        splitBy = kwargs.get('splitBy')
        split = kwargs.get('split')

        
        data_json = osp.join('cache/prepro', dataset +"_"+ splitBy , split +'.json')
        
        with open(data_json,'r') as f:
            self.data = json.load(f)


        dictfile = kwargs.get('dictionaryfile')
        self.dictionary = Dictionary.load_from_file(dictfile)    
        if kwargs.get('testrun'):
            self.data = self.data[:32]
            
        self.spatial = True            
        print ("Dataset [{}] loaded....".format(dataset,split))
        print ("Split [{}] has {} ref exps.".format(split,len(self.data)))
Пример #5
0
    def __init__(self, **kwargs):

        dataset = kwargs.get('dataset')
        splitBy = kwargs.get('splitBy')
        split = kwargs.get('split')

        data_json = osp.join('cache/prepro', dataset + "_" + splitBy,
                             split + '.json')

        with open(data_json, 'r') as f:
            self.data = json.load(f)

        dictfile = kwargs.get('dictionaryfile')
        self.dictionary = Dictionary.load_from_file(dictfile)
        if kwargs.get('testrun'):
            self.data = self.data[:32]

        self.spatial = True
        feats_use = '{}_{}_det_feats.h5'.format(dataset, splitBy)
        self.image_features_path_coco = osp.join(kwargs.get('refcoco_frcnn'),
                                                 feats_use)
        self.coco_id_to_index = self.id_to_index(self.image_features_path_coco)
        print("Dataset [{}] loaded....".format(dataset, split))
        print("Split [{}] has {} ref exps.".format(split, len(self.data)))
Пример #6
0
def create_glove_embedding_init(idx2word, glove_file):
    word2emb = {}
    with open(glove_file, 'r') as f:
        entries = f.readlines()
    emb_dim = len(entries[0].split(' ')) - 1
    print('embedding dim is %d' % emb_dim)
    weights = np.zeros((len(idx2word), emb_dim), dtype=np.float32)

    for entry in entries:
        vals = entry.split(' ')
        word = vals[0]
        vals = [float(val) for val in  vals[1:]]
        word2emb[word] = np.array(vals)
    for idx, word in enumerate(idx2word):
        if word not in word2emb:
            continue
        weights[idx] = word2emb[word]
    return weights, word2emb


if __name__ == '__main__':
    ds = 'Ourdb'
    d = create_dictionary(ds)
    d.dump_to_file('data/dictionary.pickle')

    d = Dictionary.load_from_file('data/dictionary.pickle')
    emb_dim = 300
    glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim
    weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file)
    np.save('data/glove6b_init_%dd.npy' % emb_dim, weights)
Пример #7
0
    import os.path as osp
    import json

    for ds in config.dataset:
        kwargs = {**config.global_config, **config.dataset[ds]}
        data_root = kwargs.get('data_root')
        dataset = kwargs.get('dataset')
        splitBy = kwargs.get('splitBy')
        splits = kwargs.get('splits')
        data = []
        for split in splits + ['train']:
            data_json = osp.join('cache/prepro', dataset + "_" + splitBy,
                                 split + '.json')
            with open(data_json, 'r') as f:
                d = json.load(f)
                data.extend(d)

        d = create_dictionary(data, dataset=dataset)
        basedir = os.path.dirname(kwargs['dictionaryfile'].format(dataset))
        if not os.path.exists(basedir):
            os.mkdir(basedir)
        d.dump_to_file(kwargs['dictionaryfile'].format(dataset))
        d = Dictionary.load_from_file(kwargs['dictionaryfile'].format(dataset))
        emb_dim = 300
        glove = 'glove/glove.6B.%dd.txt' % emb_dim
        embedding_basedir = os.path.dirname(kwargs['glove'])
        glove_file = embedding_basedir.format(glove)
        weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file)
        np.save(
            os.path.join(embedding_basedir.format(ds),
                         'glove6b_init_%dd.npy' % emb_dim), weights)