Пример #1
0
    def test_get_results_for_epoch(self):
        # this will take all gpu memory, but that's probably fine for tests
        gpus = get_available_gpus()
        length_list = []
        for num_gpus in [1, 2, 3]:
            if num_gpus > len(gpus):
                continue
            for bs in [1, 2, 3, 5, 7]:
                if bs * num_gpus > 10:
                    continue
                with tf.Graph().as_default() as g:
                    self.eval_config['batch_size_per_gpu'] = bs
                    self.eval_config['num_gpus'] = num_gpus
                    model = base_model(params=self.eval_config,
                                       mode="infer",
                                       hvd=None)
                    model.compile()
                    model.infer = lambda inputs, outputs: inputs
                    model.finalize_inference = lambda results: results

                    with self.test_session(g, use_gpu=True) as sess:
                        sess.run(tf.global_variables_initializer())
                        inputs_per_batch = get_results_for_epoch(
                            model, sess, False, "infer")
                        length = np.hstack([
                            inp['source_tensors'][1]
                            for inp in inputs_per_batch
                        ])
                        ids = np.hstack(
                            [inp['source_ids'] for inp in inputs_per_batch])
                        length_list.append(length[np.argsort(ids)])

        for i in range(len(length_list) - 1):
            npt.assert_allclose(length_list[i], length_list[i + 1])
Пример #2
0
  def test_get_batches_for_epoch(self):
    # this will take all gpu memory, but that's probably fine for tests
    gpus = get_available_gpus()
    length_list = []
    for num_gpus in [1, 2, 3]:
      if num_gpus > len(gpus):
        continue
      for bs in [1, 2, 3, 5, 7]:
        if bs * num_gpus > 10:
          continue
        with tf.Graph().as_default() as g:
          self.eval_config['batch_size_per_gpu'] = bs
          self.eval_config['num_gpus'] = num_gpus
          model = base_model(params=self.eval_config, mode="eval", hvd=None)
          model.compile()
          model.evaluate = lambda inputs, outputs: inputs
          model.finalize_evaluation = lambda results: results

          with self.test_session(g, use_gpu=True) as sess:
            sess.run(tf.global_variables_initializer())
            inputs_per_batch = get_results_for_epoch(model, sess, False, "eval")
            length_list.append(np.hstack([inp['source_tensors'][1]
                                          for inp in inputs_per_batch]))

    for i in range(len(length_list) - 1):
      npt.assert_allclose(length_list[i], length_list[i + 1])
Пример #3
0
    def infer_test(self):
        train_config, infer_config = self.prepare_config()
        train_config['num_epochs'] = 250
        infer_config['batch_size_per_gpu'] = 4

        with tf.Graph().as_default() as g:
            with self.test_session(g, use_gpu=True) as sess:
                gpus = get_available_gpus()

        if len(gpus) > 1:
            infer_config['num_gpus'] = 2
        else:
            infer_config['num_gpus'] = 1

        with tf.Graph().as_default():
            # pylint: disable=not-callable
            train_model = self.base_model(params=train_config,
                                          mode="train",
                                          hvd=None)
            train_model.compile()
            train(train_model, None)

        with tf.Graph().as_default():
            # pylint: disable=not-callable
            infer_model = self.base_model(params=infer_config,
                                          mode="infer",
                                          hvd=None)
            infer_model.compile()

            print(train_model.params['logdir'])
            output_file = os.path.join(train_model.params['logdir'],
                                       'infer_out.csv')
            infer(
                infer_model,
                tf.train.latest_checkpoint(train_model.params['logdir']),
                output_file,
            )
            pred_csv = pd.read_csv(output_file)
            true_csv = pd.read_csv(
                'open_seq2seq/test_utils/toy_speech_data/toy_data.csv', )
            for pred_row, true_row in zip(pred_csv.as_matrix(),
                                          true_csv.as_matrix()):
                # checking file name
                self.assertEqual(pred_row[0], true_row[0])
                # checking prediction: no more than 5 chars difference
                self.assertLess(levenshtein(pred_row[-1], true_row[-1]), 5)
Пример #4
0
  def test_infer(self):
    train_config, infer_config = self.prepare_config()
    train_config['num_epochs'] = 200
    infer_config['batch_size_per_gpu'] = 4

    with tf.Graph().as_default() as g:
      with self.test_session(g, use_gpu=True) as sess:
        gpus = get_available_gpus()

    if len(gpus) > 1:
      infer_config['num_gpus'] = 2
    else:
      infer_config['num_gpus'] = 1

    with tf.Graph().as_default():
      train_model = base_model(params=train_config, mode="train", hvd=None)
      train_model.compile()
      train(train_model, None)

    with tf.Graph().as_default():
      infer_model = base_model(params=infer_config, mode="infer", hvd=None)
      infer_model.compile()

      print(train_model.params['logdir'])
      output_file = os.path.join(train_model.params['logdir'], 'infer_out.csv')
      infer(
        infer_model,
        tf.train.latest_checkpoint(train_model.params['logdir']),
        output_file,
      )
      pred_csv = pd.read_csv(output_file)
      true_csv = pd.read_csv(
        'open_seq2seq/test_utils/toy_speech_data/toy_data.csv',
      )
      for pred_row, true_row in zip(pred_csv.as_matrix(), true_csv.as_matrix()):
        # checking file name
        self.assertEqual(pred_row[0], true_row[0])
        # checking prediction
        self.assertEqual(pred_row[-1], true_row[-1])