Exemplo n.º 1
0
 def __init__(self, dataset_path: Path, ids: List[str], preload: bool):
     self.dataset_path = Path(dataset_path)
     self.features_path = (dataset_path / "features" /
                           "ICEP_V3_global_pool_skip_8_direct_resize")
     self.cached_data = None
     if preload:
         self.cached_data = {}
         for id_ in tqdm(ids, desc="preload videos"):
             np_array = self.load_from_file(id_)
             shared_array = utils.make_shared_array(np_array)
             self.cached_data[id_] = shared_array
Exemplo n.º 2
0
 def __init__(self, dataset_path: Path, dataset_features: str,
              ids: List[str], preload: bool):
     self.dataset_path = Path(dataset_path)
     self.h5_path = dataset_path / f"video_feat_{dataset_features}.h5"
     self.cached_data = None
     if preload:
         self.cached_data = {}
         h5file = h5py.File(self.h5_path, "r")
         for id_ in tqdm(ids, desc="preload videos"):
             np_array = h5file[id_]
             shared_array = utils.make_shared_array(np_array)
             self.cached_data[id_] = shared_array
Exemplo n.º 3
0
 def __init__(self,
              dataset_path: Path,
              ids: List[str],
              metadata_name: str = "default",
              preload=True):
     self.h5_path = (dataset_path / f"text_{metadata_name}.h5")
     lens_file = (dataset_path / f"text_lens_{metadata_name}.json")
     self.lens = json.load(lens_file.open("rt", encoding="utf8"))
     self.cached_data = None
     if preload:
         h5file = h5py.File(self.h5_path, "r")
         self.cached_data = {}
         for id_ in tqdm(ids, desc="preload text"):
             np_array = h5file[id_]
             shared_array = utils.make_shared_array(np_array)
             self.cached_data[id_] = shared_array
         h5file.close()
Exemplo n.º 4
0
    def __init__(self):
        glove_path = Path("glove_vocab")

        # vocab_path = glove_path / "activitynet_vocab.dill"
        # self.vocab = dill.load(open(vocab_path, 'rb'))
        # vocab = Vocab()
        # vocab.word2idx = self.vocab.word2idx
        # vocab.idx2word = self.vocab.idx2word
        # vocab.idx = self.vocab.idx
        # vocab.dump(glove_path / "vocab.json")
        # exit()

        self.vocab = Vocab()
        self.vocab.load(glove_path / "vocab.json")
        mapping_path = glove_path / "precomp_anet_w2v_total.npz"
        npz_file = np.load(str(mapping_path))
        np_arr = npz_file[npz_file.files[0]]
        np_arr = np_arr.astype(np.float)
        self.shared_array = utils.make_shared_array(np_arr)
        assert np_arr.shape[0] == len(self.vocab)
        self.feature_dim = 300