Пример #1
0
    def read_sents(self, filename, filter_ids=None):
        npzFile = np.load(filename,
                          mmap_mode=None if filter_ids is None else "r")
        npzKeys = sorted(npzFile.files, key=lambda x: int(x.split('_')[-1]))
        if filter_ids is not None:
            npzKeys = [npzKeys[i] for i in filter_ids]
            npzKeys.sort(key=lambda x: int(x.split('_')[-1]))
        for idx, key in enumerate(npzKeys):
            inp = npzFile[key]
            if self.transpose:
                inp = inp.transpose()

            sub_inp = inp[self.feat_from:self.feat_to:self.feat_skip, :self.
                          timestep_truncate:self.timestep_skip]
            if sub_inp.size < inp.size:
                inp = np.empty_like(sub_inp)
                np.copyto(inp, sub_inp)
            else:
                inp = sub_inp

            if idx % 1000 == 999:
                logger.info(
                    f"Read {idx+1} lines ({float(idx+1)/len(npzKeys)*100:.2f}%) of {filename} at {key}"
                )
            yield ArrayInput(inp)
        npzFile.close()
Пример #2
0
 def read_sents(self, filename, filter_ids=None):
   npzFile = np.load(filename, mmap_mode=None if filter_ids is None else "r")
   npzKeys = sorted(npzFile.files, key=lambda x: int(x.split('_')[-1]))
   if filter_ids is not None:
     npzKeys = [npzKeys[i] for i in filter_ids]
   for idx, key in enumerate(npzKeys):
     inp = npzFile[key]
     if self.transpose:
       inp = inp.transpose()
     if idx % 1000 == 999:
       logger.info(f"Read {idx+1} lines ({float(idx+1)/len(npzKeys)*100:.2f}%) of {filename} at {key}")
     yield ArrayInput(inp)
   npzFile.close()
Пример #3
0
  def read_sents(self, filename, filter_ids=None):
    with h5py.File(filename, "r") as hf:
      h5_keys = sorted(hf.keys(), key=lambda x: int(x))
      if filter_ids is not None:
        h5_keys = [h5_keys[i] for i in filter_ids]
        h5_keys.sort(key=lambda x: int(x))
      for idx, key in enumerate(h5_keys):
        inp = hf[key][:]
        if self.transpose:
          inp = inp.transpose()

        sub_inp = inp[self.feat_from: self.feat_to: self.feat_skip, :self.timestep_truncate:self.timestep_skip]
        if sub_inp.size < inp.size:
          inp = np.empty_like(sub_inp)
          np.copyto(inp, sub_inp)
        else:
          inp = sub_inp

        if idx % 1000 == 999:
          logger.info(f"Read {idx+1} lines ({float(idx+1)/len(h5_keys)*100:.2f}%) of {filename} at {key}")
        yield ArrayInput(inp)