def get_homogenous_batch_iter(self): end_of_iter = False while True: k_batches = 10 batch_size = self.batch_size x = [] y = [] for k in xrange(k_batches): try: dx, dy = PytablesBitextIterator.next(self) except StopIteration: end_of_iter = True break if dx is None or dy is None: break x += dx y += dy if len(x) <= 0 or len(y) <= 0: raise StopIteration lens = numpy.asarray([map(len, x), map(len, y)]) order = numpy.argsort(lens.max(axis=0)) if k_batches > 1 else numpy.arange(len(x)) for k in range(k_batches): if k * batch_size > len(order): break indices = order[k * batch_size:(k + 1) * batch_size] yield [[x[ii] for ii in indices], [y[ii] for ii in indices]] if end_of_iter: raise StopIteration
def get_homogenous_batch_iter(self): end_of_iter = False while True: k_batches = 10 batch_size = self.batch_size x = [] y = [] for k in xrange(k_batches): try: dx, dy = PytablesBitextIterator.next(self) except StopIteration: end_of_iter = True break if dx == None or dy == None: break x += dx y += dy if len(x) <= 0 or len(y) <= 0: raise StopIteration lens = numpy.asarray([map(len, x), map(len, y)]) order = numpy.argsort(lens.max(axis=0)) if k_batches > 1 else numpy.arange(len(x)) for k in range(k_batches): if k * batch_size > len(order): break indices = order[k * batch_size:(k + 1) * batch_size] yield [[x[ii] for ii in indices], [y[ii] for ii in indices]] if end_of_iter: raise StopIteration
def load_data(batch_size=128): ''' Loads the dataset ''' path_src = '../data/vocab_and_data_small_europarl_v7_enfr/bin_test.fr.h5' path_trg = '../data/vocab_and_data_small_europarl_v7_enfr/bin_test.en.h5' ############# # LOAD DATA # ############# print '... initializing data iterators' train = PytablesBitextIterator(batch_size, path_trg, path_src, use_infinite_loop=False) valid = None test = None return train, valid, test
def __init__(self, *args, **kwargs): PytablesBitextIterator.__init__(self, *args, **kwargs) self.batch_iter = None