def get_batch(self, batch_size, vocab_size, max_nsteps, start=False, data_dir=None, dataset_name=None): if start == True: self.data_iterator = self.load_dataset(data_dir, dataset_name, vocab_size) target_outputs = np.zeros([batch_size, vocab_size]) inputs, nstarts, answers = [], [], [] data_idx, data_max_idx = 0, 0 for example_id in np.arange(batch_size): try: (_, document, question, answer, _), data_idx, data_max_idx = next(self.data_iterator) except StopIteration: break data = [int(d) for d in document.split()] + [0] + \ [int(q) for q in question.split() for q in question.split()] if len(data) > max_nsteps: continue inputs.append(data) nstarts.append(len(inputs[-1]) - 1) target_outputs[example_id][int(answer)] = 1 if (len(inputs) > 0): inputs = Util.array_pad(inputs, max_nsteps, pad=0) nstarts = [[nstart, idx, 0] for idx, nstart in enumerate(nstarts)] return inputs, nstarts, target_outputs, data_idx, data_max_idx