def __next__(self): if self._idx >= len(self.dataset): self._reorder() raise StopIteration() x, y = zip(*self.dataset[self._idx:(self._idx + self.batch_size)]) x, y = sort(x, y, order='descend') x = pad_sequences(x, padding='post') y = pad_sequences(y, padding='post') x = torch.LongTensor(x).t() y = torch.LongTensor(y).t() self._idx += self.batch_size return x, y
def __next__(self): if self._idx >= len(self.dataset): self._reorder() raise StopIteration() x, y = zip(*self.dataset[self._idx:(self._idx + self.batch_size)]) x, y = sort(x, y, order='descend') x = pad_sequences(x, padding='post') y = pad_sequences(y, padding='post') x = tf.convert_to_tensor(x, dtype=tf.int32) y = tf.convert_to_tensor(y, dtype=tf.int32) self._idx += self.batch_size return x, y
Load data ''' (x_train, y_train), \ (x_test, y_test), \ (num_x, num_y), \ (w2i_x, w2i_y), (i2w_x, i2w_y) = \ load_small_parallel_enja(to_ja=True, add_bos=False) N = len(x_train) train_size = int(N * 0.8) valid_size = N - train_size (x_train, y_train), (x_valid, y_valid) = \ (x_train[:train_size], y_train[:train_size]), \ (x_train[train_size:], y_train[train_size:]) x_train, y_train = sort(x_train, y_train) x_valid, y_valid = sort(x_valid, y_valid) x_test, y_test = sort(x_test, y_test) train_size = 40000 valid_size = 200 test_size = 10 x_train, y_train = x_train[:train_size], y_train[:train_size] x_valid, y_valid = x_valid[:valid_size], y_valid[:valid_size] x_test, y_test = x_test[:test_size], y_test[:test_size] ''' Build model ''' input_dim = num_x hidden_dim = 256