示例#1
0
    def get_batch(self,
                  batch_size,
                  vocab_size,
                  max_nsteps,
                  start=False,
                  data_dir=None,
                  dataset_name=None):

        if start == True:
            self.data_iterator = self.load_dataset(data_dir, dataset_name,
                                                   vocab_size)
        target_outputs = np.zeros([batch_size, vocab_size])
        inputs, nstarts, answers = [], [], []

        data_idx, data_max_idx = 0, 0
        for example_id in np.arange(batch_size):
            try:
                (_, document, question, answer,
                 _), data_idx, data_max_idx = next(self.data_iterator)
            except StopIteration:
                break

            data = [int(d) for d in document.split()] + [0] + \
                   [int(q) for q in question.split() for q in question.split()]

            if len(data) > max_nsteps:
                continue

            inputs.append(data)
            nstarts.append(len(inputs[-1]) - 1)
            target_outputs[example_id][int(answer)] = 1

        if (len(inputs) > 0):
            inputs = Util.array_pad(inputs, max_nsteps, pad=0)
            nstarts = [[nstart, idx, 0] for idx, nstart in enumerate(nstarts)]

        return inputs, nstarts, target_outputs, data_idx, data_max_idx