Пример #1
0
 def testGeneratorInputFnWithXAsNonGeneratorFunction(self):
   x = np.arange(32, 36)
   with self.cached_session():
     with self.assertRaisesRegexp(TypeError, 'x must be generator function'):
       failing_input_fn = generator_io.generator_input_fn(
           x, batch_size=2, shuffle=False, num_epochs=1)
       failing_input_fn()
Пример #2
0
  def testGeneratorInputFnWithMismatchinGeneratorKeys(self):
    def generator():
      index = 0
      yield {'a': np.ones(1) * index,
             'b': np.ones(1) * index + 32,
             'label': np.ones(1) * index - 32}
      index = 1
      yield {'a': np.ones(1) * index,
             'c': np.ones(1) * index + 32,
             'label': np.ones(1) * index - 32}

    with self.test_session() as session:
      input_fn = generator_io.generator_input_fn(
        generator, target_key=None, batch_size=2, shuffle=False, num_epochs=1)
      features = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      with self.assertRaises(errors.OutOfRangeError):
        session.run([features])

      with self.assertRaisesRegex(KeyError, 'key mismatch between dicts emitted'
                                            ' by GenFunExpected'):
        coord.request_stop()
        coord.join(threads)
Пример #3
0
  def testGeneratorInputFnWithDifferentDimensionsOfFeatures(self):

    def generator():
      for index in range(100):
        yield {
            'a': np.ones((10, 10)) * index,
            'b': np.ones((5, 5)) * index + 32,
            'label': np.ones((3, 3)) * index - 32
        }

    with self.cached_session() as session:
      input_fn = generator_io.generator_input_fn(
          generator,
          target_key='label',
          batch_size=2,
          shuffle=False,
          num_epochs=1)
      features, target = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      res = session.run([features, target])
      self.assertAllEqual(res[0]['a'],
                          np.vstack((np.zeros((10, 10)), np.ones(
                              (10, 10)))).reshape(2, 10, 10))
      self.assertAllEqual(res[0]['b'],
                          np.vstack((np.zeros((5, 5)), np.ones(
                              (5, 5)))).reshape(2, 5, 5) + 32)
      self.assertAllEqual(res[1],
                          np.vstack((np.zeros((3, 3)), np.ones(
                              (3, 3)))).reshape(2, 3, 3) - 32)

      coord.request_stop()
      coord.join(threads)
Пример #4
0
  def testGeneratorInputFnWithBatchLargerthanData(self):

    def generator():
      for index in range(2):
        yield {
            'a': np.ones(1) * index,
            'b': np.ones(1) * index + 32,
            'label': np.ones(1) * index - 32
        }

    with self.cached_session() as session:
      input_fn = generator_io.generator_input_fn(
          generator, target_key=None, batch_size=4, shuffle=False, num_epochs=1)
      features = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      res = session.run(features)
      self.assertAllEqual(res['a'], np.asarray([0, 1, 0, 1]).reshape(-1, 1))
      self.assertAllEqual(res['b'], np.asarray([32, 33, 32, 33]).reshape(-1, 1))
      self.assertAllEqual(res['label'],
                          np.asarray([-32, -31, -32, -31]).reshape(-1, 1))

      with self.assertRaises(errors.OutOfRangeError):
        session.run([features])

      coord.request_stop()
      coord.join(threads)
Пример #5
0
def predictor():
    # now we want to predict!
    submission = dict()
    paths = ['./audio/recording.wav']

    test_input_fn = generator_input_fn(
        x=test_data_generator(paths),
        batch_size=hparams.batch_size,
        shuffle=False,
        num_epochs=1,
        queue_capacity=10 * hparams.batch_size,
        num_threads=1)  #the predict function being called

    model = create_model(config=run_config, hparams=hparams)
    it = model.predict(input_fn=test_input_fn)

    # last batch will contain padding, so remove duplicates
    for t in tqdm(it):
        fname, label = t['sample'].decode(), id2name[t['label']]
        submission[fname] = label
    os.system('clear')
    for fname, label in submission.items():
        if label == 'stop':
            print('Exitting...')
            os.system('rm -rf ./audio/*')
            return True
        print('You said : {}\n'.format(label))
        return False
Пример #6
0
    def testGeneratorSingleInputFn(self):
        def generator():
            for index in range(2):
                yield {'a': np.ones(1) * index}

        with self.test_session() as session:
            input_fn = generator_io.generator_input_fn(generator,
                                                       target_key=None,
                                                       batch_size=2,
                                                       shuffle=False,
                                                       num_epochs=1)
            features = input_fn()

            coord = coordinator.Coordinator()
            threads = queue_runner_impl.start_queue_runners(session,
                                                            coord=coord)

            res = session.run([features])
            self.assertAllEqual(res[0]['a'], np.asarray([0, 1]).reshape(-1, 1))

            session.run([features])
            with self.assertRaises(errors.OutOfRangeError):
                session.run([features])

            coord.request_stop()
            coord.join(threads)
Пример #7
0
  def testGeneratorInputFnWithDifferentDimensionsOfFeatures(self):

    def generator():
      for index in range(100):
        yield {
            'a': np.ones((10, 10)) * index,
            'b': np.ones((5, 5)) * index + 32,
            'label': np.ones((3, 3)) * index - 32
        }

    with self.cached_session() as session:
      input_fn = generator_io.generator_input_fn(
          generator,
          target_key='label',
          batch_size=2,
          shuffle=False,
          num_epochs=1)
      features, target = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      res = session.run([features, target])
      self.assertAllEqual(res[0]['a'],
                          np.vstack((np.zeros((10, 10)), np.ones(
                              (10, 10)))).reshape(2, 10, 10))
      self.assertAllEqual(res[0]['b'],
                          np.vstack((np.zeros((5, 5)), np.ones(
                              (5, 5)))).reshape(2, 5, 5) + 32)
      self.assertAllEqual(res[1],
                          np.vstack((np.zeros((3, 3)), np.ones(
                              (3, 3)))).reshape(2, 3, 3) - 32)

      coord.request_stop()
      coord.join(threads)
Пример #8
0
  def testGeneratorInputFnLabelDict(self):

    def generator():
      for index in range(2):
        yield {'a': np.ones(1) * index,
               'b': np.ones(1) * index + 32,
               'label': np.ones(1) * index - 32,
               'label2': np.ones(1) * index - 64,
               }

    with self.test_session() as session:
      input_fn = generator_io.generator_input_fn(
        generator, target_key=['label','label2'], batch_size=2, shuffle=False, num_epochs=1)
      features, target = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      res = session.run([features, target])
      self.assertAllEqual(res[0]['a'], np.asarray([0, 1]).reshape(-1, 1))
      self.assertAllEqual(res[0]['b'], np.asarray([32, 33]).reshape(-1, 1))
      self.assertAllEqual(res[1]['label'], np.asarray([-32, -31]).reshape(-1, 1))
      self.assertAllEqual(res[1]['label2'], np.asarray([-64, -63]).reshape(-1, 1))

      session.run([features])
      with self.assertRaises(errors.OutOfRangeError):
        session.run([features, target])

      coord.request_stop()
      coord.join(threads)
Пример #9
0
  def testGeneratorInputFn(self):

    def generator():
      for index in range(2):
        yield {
            'a': np.ones(1) * index,
            'b': np.ones(1) * index + 32,
            'label': np.ones(1) * index - 32
        }

    with self.cached_session() as session:
      input_fn = generator_io.generator_input_fn(
          generator,
          target_key='label',
          batch_size=2,
          shuffle=False,
          num_epochs=1)
      features, target = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      res = session.run([features, target])
      self.assertAllEqual(res[0]['a'], np.asarray([0, 1]).reshape(-1, 1))
      self.assertAllEqual(res[0]['b'], np.asarray([32, 33]).reshape(-1, 1))
      self.assertAllEqual(res[1], np.asarray([-32, -31]).reshape(-1, 1))

      session.run([features])
      with self.assertRaises(errors.OutOfRangeError):
        session.run([features, target])

      coord.request_stop()
      coord.join(threads)
Пример #10
0
 def testGeneratorInputFnWithXAsNonGeneratorFunction(self):
   x = np.arange(32, 36)
   with self.test_session():
     with self.assertRaisesRegexp(TypeError, 'x must be generator function'):
       failing_input_fn = generator_io.generator_input_fn(
         x, batch_size=2, shuffle=False, num_epochs=1)
       failing_input_fn()
Пример #11
0
  def testGeneratorInputFnWithMismatchinGeneratorKeys(self):

    def generator():
      index = 0
      yield {
          'a': np.ones(1) * index,
          'b': np.ones(1) * index + 32,
          'label': np.ones(1) * index - 32
      }
      index = 1
      yield {
          'a': np.ones(1) * index,
          'c': np.ones(1) * index + 32,
          'label': np.ones(1) * index - 32
      }

    with self.cached_session() as session:
      input_fn = generator_io.generator_input_fn(
          generator, target_key=None, batch_size=2, shuffle=False, num_epochs=1)
      features = input_fn()

      coord = coordinator.Coordinator()
      threads = queue_runner_impl.start_queue_runners(session, coord=coord)

      with self.assertRaises(errors.OutOfRangeError):
        session.run([features])

      with self.assertRaisesRegex(KeyError, 'key mismatch between dicts emitted'
                                  ' by GenFunExpected'):
        coord.request_stop()
        coord.join(threads)
Пример #12
0
 def testGeneratorInputFnWithXAsNonGeneratorYieldingDicts(self):
   def generator():
     yield np.arange(32, 36)
   with self.test_session():
     with self.assertRaisesRegexp(TypeError, "x\(\) must yield dict"):
       failing_input_fn = generator_io.generator_input_fn(
         generator, batch_size=2, shuffle=False, num_epochs=1)
       failing_input_fn()
Пример #13
0
    def testGeneratorInputFnWithXAsNonGenerator(self):
        def generator():
            return np.arange(32, 36)

        with self.test_session():
            with self.assertRaisesRegexp(TypeError, 'x\(\) must be generator'):
                failing_input_fn = generator_io.generator_input_fn(
                    generator, batch_size=2, shuffle=False, num_epochs=1)
                failing_input_fn()
Пример #14
0
  def testGeneratorInputFnWithXAsNonGenerator(self):

    def generator():
      return np.arange(32, 36)

    with self.cached_session():
      with self.assertRaisesRegexp(TypeError, r'x\(\) must be generator'):
        failing_input_fn = generator_io.generator_input_fn(
            generator, batch_size=2, shuffle=False, num_epochs=1)
        failing_input_fn()
Пример #15
0
def main(args):
    # восстанавливаем сохраненые конфиги и словарь
    with open(os.path.join(args.modeldir, 'hparams.json'), 'r') as fin:
        params = json.load(fin)

    with open(os.path.join(args.modeldir, 'vocab.json'), 'r') as fin:
        vocab = json.load(fin)
        vocab = {int(k): v for k, v in vocab.items()}

    hparams = tf.contrib.training.HParams(**params)
    # все тот же костыль для некоторых машин
    session_config = tf.ConfigProto()
    session_config.gpu_options.allow_growth = True
    run_config = tf.estimator.RunConfig(
        model_dir=args.modeldir, session_config=session_config)

    # создаем модельку
    model = base.create_model(config=run_config, hparams=hparams)

    # готовим данные для теста из sample_submission
    df = pd.read_csv(os.path.join(args.datadir, 'sample_submission.csv'))
    df.label = 0
    df.fname = [
        os.path.join(args.datadir, 'audio_test', _)
        for _ in df.fname.values]

    # predict все равно работает по одному примеру, так что давайте уберем батчи
    # так мы сможем работать с записями целиком
    # NB: стоит проверить, правильно ли работает pad_value
    test_input_fn = generator_input_fn(
        x=utils.fast_datagenerator(df, params, 'test'),
        batch_size=1,
        shuffle=False,
        num_epochs=1,
        queue_capacity=hparams.batch_size,
        num_threads=1,
        pad_value=0.0,
    )

    it = model.predict(input_fn=test_input_fn)  # это итератор

    # далее немного грязно, отрефакторите, добавьте информацию о фолдах, если нужно
    submission = dict()
    for output in tqdm(it):
        path = output['fname'].decode()
        fname = os.path.basename(path)
        # допускается предсказывать три метки на каждую запись
        predicted = " ".join([vocab[i] for i in output['top3']])
        submission[fname] = predicted

    with open(os.path.join(args.modeldir, 'submission.csv'), 'w') as fout:
        fout.write('fname,label\n')
        for fname, pred in submission.items():
            fout.write("{},{}\n".format(fname, pred))
Пример #16
0
    def get_test_input_function(self):
        val_input_fn = generator_input_fn(
            x=self.data_generator(self._dataset.get_test_files(), 1, 'test'),
            target_key=None,
            batch_size=1,
            shuffle=False,
            num_epochs=1,
            queue_capacity=3 * self._batch_size + 10,
            num_threads=1,
        )

        return val_input_fn
Пример #17
0
    def get_val_input_fn(self):
        val_input_fn = generator_input_fn(
            x=self.data_generator(self._dataset.get_val_files(),
                                  self._batch_size, 'val'),
            target_key=None,
            batch_size=self._batch_size,
            shuffle=True,
            num_epochs=1,
            queue_capacity=3 * self._batch_size + 10,
            num_threads=1,
        )

        return val_input_fn
Пример #18
0
    def get_val_input_fn(self):
        val_input_fn = generator_input_fn(
            x=self.data_generator(self._audio_preprocessor.get_val_files(),
                                  None, 'val'),
            target_key=self._feature_type.TARGET,
            batch_size=self._batch_size,
            shuffle=True,
            num_epochs=1,
            queue_capacity=3 * self._batch_size + 10,
            num_threads=1,
        )

        return val_input_fn
Пример #19
0
    def _get_test_input_function(self):

        test_wav_files_path = self.test_wav_files_path
        val_input_fn = generator_input_fn(
            x=self.data_generator(test_wav_files_path, None, 1, 'test'),
            target_key=None,
            batch_size=1,
            shuffle=False,
            num_epochs=1,
            queue_capacity=2000,
            num_threads=1,
        )

        return val_input_fn
Пример #20
0
  def testGeneratorInputFNWithTargetLabelListNotString(self):
    def generator():
      for index in range(2):
        yield {'a': np.ones((10, 10)) * index,
               'b': np.ones((5, 5)) * index + 32,
               'label': np.ones((3, 3)) * index - 32}

    y = ["label", np.arange(10)]
    with self.test_session():
      with self.assertRaisesRegexp(TypeError, 'target_key must be str or'
                                              ' Container of str'):
        failing_input_fn = generator_io.generator_input_fn(
          generator, target_key=y, batch_size=2, shuffle=False, num_epochs=1)
        failing_input_fn()
Пример #21
0
    def get_train_input_fn(self):
        train_input_fn = generator_input_fn(
            x=self.data_generator(self._audio_preprocessor.get_train_files(),
                                  None, 'train'),
            target_key=self._feature_type.
            TARGET,  # you could leave target_key in features, so labels in model_handler will be empty
            batch_size=self._batch_size,
            shuffle=True,
            num_epochs=self._num_epochs,
            queue_capacity=3 * self._batch_size + 10,
            num_threads=1,
        )

        return train_input_fn
Пример #22
0
  def testGeneratorInputFNWithTargetLabelNotInDict(self):
    def generator():
      for index in range(2):
        yield {'a': np.ones((10, 10)) * index,
               'b': np.ones((5, 5)) * index + 32,
               'label': np.ones((3, 3)) * index - 32}

    y = ["label", "target"]
    with self.test_session():
      with self.assertRaisesRegexp(KeyError,
                                   'target_key not in yielded dict'):
        failing_input_fn = generator_io.generator_input_fn(
          generator, target_key=y, batch_size=2, shuffle=False, num_epochs=1)
        failing_input_fn()
Пример #23
0
    def get_train_input_fn(self):
        train_input_fn = generator_input_fn(
            x=self.data_generator(self._dataset.get_train_files(),
                                  self._batch_size, 'train'),
            target_key=
            None,  # you could leave target_key in features, so labels in model_handler will be empty
            batch_size=self._batch_size,
            shuffle=True,
            num_epochs=1,
            queue_capacity=3 * self._batch_size + 10,
            num_threads=1,
        )

        return train_input_fn
Пример #24
0
def get_data_generator(code, hparams):
    # it's a magic function :)

    train_input_fn = generator_input_fn(
        x=data_generator_train(code),
        target_key=
        'target',  # you could leave target_key in features, so labels in model_handler will be empty
        batch_size=hparams.batch_size,
        shuffle=True,
        num_epochs=None,
        queue_capacity=3 * hparams.batch_size + 10,
        num_threads=10,
    )

    val_input_fn = generator_input_fn(
        x=data_generator_val(code),
        target_key='target',
        batch_size=hparams.batch_size,
        shuffle=True,
        num_epochs=None,
        queue_capacity=3 * hparams.batch_size + 10,
        num_threads=1,
    )
    return train_input_fn, val_input_fn
Пример #25
0
    def _get_train_input_fn(self):
        train_wav_files_path = self.train_wav_files_path
        labels = self.train_labels

        train_input_fn = generator_input_fn(
            x=self.data_generator(train_wav_files_path, labels,
                                  self._batch_size, 'train'),
            target_key=
            None,  # you could leave target_key in features, so labels in model_handler will be empty
            batch_size=self._batch_size,
            shuffle=True,
            num_epochs=1,
            queue_capacity=2000,
            num_threads=3,
        )

        return train_input_fn
Пример #26
0
    def _get_val_input_fn(self):

        train_wav_files_path = self.val_wav_files_path
        labels = self.val_labels

        val_input_fn = generator_input_fn(
            x=self.data_generator(train_wav_files_path, labels,
                                  self._batch_size, 'val'),
            target_key=None,
            batch_size=self._batch_size,
            shuffle=True,
            num_epochs=1,
            queue_capacity=2000,
            num_threads=3,
        )

        return val_input_fn
Пример #27
0
    def get_val_input_fn(self):
        val_input_fn = generator_input_fn(
            x=self.get_data(
                candidates=self._audio_preprocessor.get_val_files(),
                how_many=-1,
                offset=0,
                audio_sampling_settings=self._audio_sampling_settings,
                background_frequency=BACKGROUND_FREQUENCY,
                background_volume_range=BACKGROUND_VOLUME,
                time_shift=TIME_SHIFT_MS,
                mode="validation",
                sess=self._tf_sess),
            target_key=self._feature_type.TARGET,
            batch_size=self._batch_size,
            shuffle=True,
            num_epochs=1,
            queue_capacity=3 * self._batch_size + 10,
            num_threads=1,
        )

        return val_input_fn
Пример #28
0
    def get_train_input_fn(self):
        train_input_fn = generator_input_fn(
            x=self.get_data(
                candidates=self._audio_preprocessor.get_train_files(),
                how_many=-1,
                offset=0,
                audio_sampling_settings=self._audio_sampling_settings,
                background_frequency=BACKGROUND_FREQUENCY,
                background_volume_range=BACKGROUND_VOLUME,
                time_shift=TIME_SHIFT_MS,
                mode="training",
                sess=self._tf_sess),
            target_key=self._feature_type.
            TARGET,  # you could leave target_key in features, so labels in model_handler will be empty
            batch_size=self._batch_size,
            shuffle=True,
            num_epochs=self._num_epochs,
            queue_capacity=3 * self._batch_size + 10,
            num_threads=1,
        )

        return train_input_fn
Пример #29
0
def get_test_data_generator(code, hparams):
    def test_data_generator(code):
        def generator():
            scaler = StandardScaler()
            print("loading val data set")
            df = pd.read_hdf('../hdf_201709/%s.h5' % code,
                             'table').reset_index()
            scaler.fit(df.iloc[0:1000, 2:])
            df = pd.read_hdf('../hdf_201710/%s.h5' % code,
                             'table').reset_index()
            for i in range(120, 100000):
                cur_time = df.loc[i, '시간'][11:16]
                if cur_time > "15:20": continue
                if cur_time < "09:02": continue
                if df.iloc[i][2] < df.iloc[i + 60][2]:
                    pred = 1
                elif df.iloc[i][2] > df.iloc[i + 60][2]:
                    pred = -1
                else:
                    pred = 0
                yield dict(target=np.int32(pred),
                           data=scaler.transform(
                               np.array(df.iloc[i - 120:i, 2:])).reshape(
                                   120, 67, 1).astype(np.float32),
                           cur=df.iloc[i, 2],
                           future=df.iloc[i + 60, 2],
                           buy=df.iloc[i, 27],
                           sell=df.iloc[i, 29])

        return generator

    test_input_fn = generator_input_fn(x=test_data_generator(code),
                                       batch_size=hparams.batch_size,
                                       shuffle=False,
                                       num_epochs=1,
                                       queue_capacity=10 * hparams.batch_size,
                                       num_threads=1)
    return test_input_fn
Пример #30
0
)

hparams = tf.contrib.training.HParams(**params)
OUTDIR = './model-3'
directory = os.path.join(OUTDIR, 'eval')
if not os.path.exists(directory):
    os.makedirs(directory)
#os.makedirs(os.path.join(OUTDIR, 'eval'))
model_dir = OUTDIR

run_config = tf.contrib.learn.RunConfig(model_dir=model_dir)
from tensorflow.contrib.learn.python.learn.learn_io.generator_io import generator_input_fn
test_input_fn = generator_input_fn(
    x=test_data_generator(paths),
    batch_size=hparams.batch_size,
    shuffle=False,
    num_epochs=1,
    queue_capacity=10 * hparams.batch_size,
    num_threads=1,
)

# it's a magic function :)
#from tensorflow.contrib.learn.python.learn.learn_io.generator_io import generator_input_fn

model = create_model(config=run_config, hparams=hparams)
it = model.predict(input_fn=test_input_fn)

to_predict = 'yes no up down left right on off stop go'.split()
#print(it.get_shape().as_list())
with open(os.path.join(model_dir, 'submission1.csv'), 'w') as fout:
    fout.write('fname,label\n')
    submission = dict()
Пример #31
0
            fname=path,
            desired_samples=16000,
            fg_vol=1,
            bg_data=[],
            bg_vol=0,
            clip_min=-1.0,
            clip_max=1.0,
            time_shift_samples=0,
        )
        result[FINGERPRINT_KEY]=getMfcc(getTransformedAudioLocal(**audio_options))
        yield result

test_input_fn = generator_input_fn(
    x=test_data_generator,
    batch_size=TEST_BATCH_SIZE, 
    shuffle=False, 
    num_epochs=1,
    queue_capacity= 10 * TEST_BATCH_SIZE, 
    num_threads=1,
)

model = create_estimator(
            config=RunConfig(model_dir=model_dir), 
            hparams=HParams(**params),
        )
it = model.predict(input_fn=test_input_fn)

submission = dict()
for t in tqdm(it):
    fname, label = t['fname'].decode(), id2name[t['label']]
    submission[fname] = label
Пример #32
0
model_dir = OUTDIR

run_config = tf.contrib.learn.RunConfig(model_dir=model_dir)

# **Let's run training!**

# In[ ]:

# it's a magic function :)
from tensorflow.contrib.learn.python.learn.learn_io.generator_io import generator_input_fn

train_input_fn = generator_input_fn(
    x=data_generator(trainset, hparams, 'train'),
    target_key=
    'target',  # you could leave target_key in features, so labels in model_handler will be empty
    batch_size=hparams.batch_size,
    shuffle=True,
    num_epochs=None,
    queue_capacity=3 * hparams.batch_size + 10,
    num_threads=1,
)

val_input_fn = generator_input_fn(
    x=data_generator(valset, hparams, 'val'),
    target_key='target',
    batch_size=hparams.batch_size,
    shuffle=True,
    num_epochs=None,
    queue_capacity=3 * hparams.batch_size + 10,
    num_threads=1,
)
Пример #33
0
                    yield result
            except Exception as err:
                print(err, label_id, uid, fname)

    return generator


##=========================================================
## Actual computations start here
##=========================================================
train_meta_list, val_meta_list = get_metadata_lists(DATADIR)
train_input_fn = generator_input_fn(
    x=data_generator_fn(train_meta_list, 'train'),
    target_key=TARGET_KEY,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_epochs=None,
    queue_capacity=3 * BATCH_SIZE + 10,
    num_threads=1,
)
val_input_fn = generator_input_fn(
    x=data_generator_fn(val_meta_list),
    target_key=TARGET_KEY,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_epochs=None,
    queue_capacity=3 * BATCH_SIZE + 10,
    num_threads=1,
)

def main(_):
    # We want to see all the logging messages for this tutorial.
    tf.logging.set_verbosity(tf.logging.INFO)

    # Start a new TensorFlow session.
    sess = tf.InteractiveSession()

    # Begin by making sure we have the training data we need. If you already have
    # training data of your own, use `--data_url= ` on the command line to avoid
    # downloading.
    model_settings = models.prepare_model_settings(
        len(new_input_data.prepare_words_list(FLAGS.wanted_words.split(','))),
        FLAGS.sample_rate, FLAGS.clip_duration_ms, FLAGS.window_size_ms,
        FLAGS.window_stride_ms, FLAGS.dct_coefficient_count)

    POSSIBLE_LABELS = new_input_data.prepare_words_list(
        FLAGS.wanted_words.split(','))
    params = dict(
        # seed=2018,
        # batch_size=FLAGS.batch_size,
        # keep_prob=0.5,
        # learning_rate=0.0002,
        # clip_gradients=15.0,
        # use_batch_norm=True,
        # num_classes=len(POSSIBLE_LABELS)
    )

    hparams = tf.contrib.training.HParams(**params)
    run_config = tf.contrib.learn.RunConfig()
    run_config = run_config.replace(model_dir=FLAGS.train_dir)

    audio_processor2 = prediction_input_data.AudioProcessor(
        FLAGS.data_dir, FLAGS.prediction_data_dir, model_settings)

    set_size = audio_processor2.set_size()
    print('prediction data size: ', set_size)

    def prediction_data_generator():
        def generator():
            for i in xrange(0, set_size, FLAGS.prediction_batch_size):
                # Pull the audio samples we'll use for testing.
                fname, fingerprints = \
                  audio_processor2.get_data(FLAGS.prediction_batch_size, i,
                                            model_settings, 0.0, 0.0, 0, sess)

                yield dict(fname=fname, fingerprint_input=fingerprints)

        return generator

    test_input_fn = generator_input_fn(
        x=prediction_data_generator(),
        batch_size=FLAGS.prediction_batch_size,
        queue_capacity=10 * FLAGS.prediction_batch_size,
    )

    def model_fn(features, labels, mode, params):
        """Model function for Estimator."""
        logits = models.create_model(tf.cast(features['fingerprint_input'],
                                             tf.float32),
                                     model_settings,
                                     FLAGS.model_architecture,
                                     is_training=False)

        # Provide an estimator spec for `ModeKeys.PREDICT`.
        if mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {
                'fname': tf.cast(features['fname'], tf.float32),
                'label': tf.argmax(logits, axis=-1)
            }
            specs = dict(mode=mode, predictions=predictions)
        return tf.estimator.EstimatorSpec(**specs)

    def get_estimator(config, hparams):
        """Return the model as a Tensorflow Estimator object.
    Args:
       run_config (RunConfig): Configuration for Estimator run.
       params (HParams): hyperparameters.
    """
        return tf.estimator.Estimator(model_fn=model_fn,
                                      config=config
                                      # params=hparams,
                                      )

    estimator = get_estimator(config=run_config, hparams=hparams)
    it = estimator.predict(input_fn=test_input_fn)

    id2name = {i: name for i, name in enumerate(POSSIBLE_LABELS)}
    # last batch will contain padding, so remove duplicates
    submission = dict()
    for t in tqdm(it):
        fname, label = t['fname'].decode(), id2name[t['label']]
        # print("fname >>> : ", fname, ", ", "label >>> : ", label)
        submission[fname] = label

    # make submission.csv
    fout = open(os.path.join(FLAGS.result_dir, 'submission.csv'),
                'w',
                encoding='utf-8',
                newline='')
    writer = csv.writer(fout)
    writer.writerow(['fname', 'label'])
    for key in sorted(submission.keys()):
        writer.writerow([key, submission[key]])
    fout.close()
Пример #35
0
def main(args):
    # просто создадим две папки для текущего эксперимента exp и exp/eval
    try:
        os.makedirs(os.path.join(args.outdir, 'eval'))
    except OSError:
        pass

    df = pd.read_csv(os.path.join(args.datadir, 'train.csv'))
    labels = sorted(set(df.label.values))

    label2id = {label: i for i, label in enumerate(labels)}
    id2label = {i: label for label, i in label2id.items()}

    df['label'] = [label2id[_] for _ in df.label.values]
    df['fname'] = [
        os.path.join(args.datadir, 'audio_train', _) for _ in df.fname.values
    ]

    # todo: разберитесь с форматом входных данных, потюньте процедуру разбиения
    # можно добавить фолды, балансировать классы или разбивать по флагу ручной разметки
    idx = np.arange(len(df))
    idx_train, idx_val = train_test_split(idx,
                                          test_size=0.33,
                                          random_state=2018,
                                          shuffle=True)
    df_train, df_val = df.iloc[idx_train], df.iloc[idx_val]

    params = dict(num_classes=len(labels))
    params.update(**args.__dict__)

    hparams = tf.contrib.training.HParams(**params)

    # сохраним два файла: с параметрами модели, пригодится, когда параметры будут определять строение сетки
    with open(os.path.join(args.outdir, 'hparams.json'), 'w') as fout:
        json.dump(params, fout, indent=2)

    # словарь с метками для обратного преобразования
    with open(os.path.join(args.outdir, 'vocab.json'), 'w') as fout:
        json.dump(id2label, fout, indent=2)

    # маленький странный костыль, нужен не на всех машинах.
    # На некоторых помогает от странной ошибки CUDNN
    session_config = tf.ConfigProto()
    session_config.gpu_options.allow_growth = True
    run_config = tf.estimator.RunConfig(model_dir=args.outdir,
                                        session_config=session_config)

    # Написано что deprecated, но простой замены пока не нашлось, если придумаете -- напишите :)
    train_input_fn = generator_input_fn(
        x=utils.fast_datagenerator(df_train, hparams, 'train'),
        target_key='target',
        batch_size=hparams.batch_size,
        shuffle=True,
        num_epochs=10,
        queue_capacity=3 * hparams.batch_size,
        num_threads=1,
    )

    val_input_fn = generator_input_fn(
        x=utils.fast_datagenerator(df_val, hparams, 'val'),
        target_key='target',
        batch_size=hparams.batch_size,
        shuffle=False,
        num_epochs=None,
        queue_capacity=3 * hparams.batch_size,
        num_threads=1,
    )

    # создаем модельку и треним ее
    est = base.create_model(config=run_config, hparams=hparams)

    train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn)
    eval_spec = tf.estimator.EvalSpec(input_fn=val_input_fn)

    tf.estimator.train_and_evaluate(est, train_spec, eval_spec)