Exemplo n.º 1
0
  def build_graph(self):
    file_pattern = os.path.join(self.params['data_dir'],
                                self.params['file_pattern'])
    self.batched_dataset = _read_and_batch_from_files(
      file_pattern=file_pattern,
      batch_size=self.params['batch_size'],
      max_length=self.params['max_length'],
      num_cpu_cores=self.params.get('num_cpu_cores', 2),
      shuffle=self.params['shuffle'],
      repeat=self.params['repeat'],
      num_workers=self._num_workers,
      worker_id=self._worker_id,
      batch_in_tokens=self.params.get('batch_in_tokens', True),
      pad2eight=self.params.get('pad_data_to_eight', False))

    self._iterator = self.batched_dataset.make_initializable_iterator()
    x, y = self.iterator.get_next()

    len_x = tf.count_nonzero(x, axis=1, dtype=tf.int32)
    len_y = tf.count_nonzero(y, axis=1, dtype=tf.int32)
    if self.params['mode'] == 'train' or self.params['mode'] == 'eval':
      self._input_tensors['source_tensors'] = [x, len_x]
      self._input_tensors['target_tensors'] = [y, len_y]
    else:
      self._input_tensors['source_tensors'] = [x, len_x]
Exemplo n.º 2
0
  def build_graph(self):
    file_pattern = os.path.join(self.params['data_dir'],
                                self.params['file_pattern'])
    self.batched_dataset = _read_and_batch_from_files(
      file_pattern=file_pattern,
      batch_size=self.params['batch_size'],
      max_length=self.params['max_length'],
      num_cpu_cores=self.params.get('num_cpu_cores', 2),
      shuffle=self.params['shuffle'],
      repeat=self.params['repeat'],
      num_workers=self._num_workers,
      worker_id=self._worker_id)

    self._iterator = self.batched_dataset.make_initializable_iterator()
    x, y = self.iterator.get_next()

    if self.params.get('m_padding', False):
      # MAGIC PADDING
      x = tf.cond(tf.equal(tf.shape(x)[1] % 8, 0),
                  true_fn = lambda: x,
                  false_fn = lambda: tf.pad(x,
                                            paddings=[[0, 0],
                                                      [0, 8 - tf.shape(x)[1] % 8]]))

      y = tf.cond(tf.equal(tf.shape(y)[1] % 8, 0),
                  true_fn = lambda: y,
                  false_fn = lambda: tf.pad(y,
                                            paddings=[[0, 0],
                                                      [0, 8 - tf.shape(y)[1] % 8]]))

      x = tf.cond(tf.equal(tf.shape(x)[0] % 8, 0),
                  true_fn = lambda: x,
                  false_fn = lambda: tf.pad(x,
                                            paddings=[[0, 8 - tf.shape(x)[0] % 8],
                                                      [0, 0]]))

      y = tf.cond(tf.equal(tf.shape(y)[0] % 8, 0),
                  true_fn=lambda: y,
                  false_fn=lambda: tf.pad(y,
                                          paddings=[[0, 8 - tf.shape(y)[0] % 8],
                                                    [0, 0]]))
      # ENDOF MAGIC PADDING

    len_x = tf.count_nonzero(x, axis=1, dtype=tf.int32)
    len_y = tf.count_nonzero(y, axis=1, dtype=tf.int32)
    if self.params['mode'] == 'train' or self.params['mode'] == 'eval':
      self._input_tensors['source_tensors'] = [x, len_x]
      self._input_tensors['target_tensors'] = [y, len_y]
    else:
      self._input_tensors['source_tensors'] = [x, len_x]