def setUp(self):
        service.FLAGS = _to_object_with_attributes({
            'model_dir': './tmp_model',
            'data_dir': './data/online',
            'filters': [64, 128, 256, 512],
            'blocks': 3,
            'kernel_size': 3,
            'strides': 2,
            'embedding_size': 512,
            'encoder': 'rescnn',
            'sample_rate': 16000,
            'desired_samples': from_ms_to_samples(16000, 1000),
            'window_size_samples': from_ms_to_samples(16000, 30.0),
            'window_stride_samples': from_ms_to_samples(16000, 10.0),
            'magnitude_squared': True,
            'dct_coefficient_count': 64,
            'batch_size': 10,
            'threshold': 0.2,
            'normalize_frames':False
        })

        service.model = create_model(
            model_dir=service.FLAGS.model_dir,
            params={
                'filters': service.FLAGS.filters,
                'blocks': service.FLAGS.blocks,
                'kernel_size': service.FLAGS.kernel_size,
                'strides': service.FLAGS.strides,
                'embedding_size': service.FLAGS.embedding_size,
                'encoder': service.FLAGS.encoder
            })
        service.grouped_registerations = dict()
示例#2
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)
    # Define the input function for training
    wav_files, labels, label_to_id = get_file_and_labels(
        os.path.join(FLAGS.data_dir, 'train_labels'))
    wav_files = [
        os.path.join(FLAGS.data_dir, 'train', wav_file)
        for wav_file in wav_files
    ]

    train_num_classes = len(label_to_id)
    if FLAGS.num_gpus > 1:
        # MirroredStrategy: This does in-graph replication with synchronous training on many GPUs on one machine.
        # Essentially, we create copies of all variables in the model's layers on each device.
        # We then use all-reduce to combine gradients across the devices
        # before applying them to the variables to keep them in sync.
        # Reference: https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/distribute
        train_distribute = tf.contrib.distribute.MirroredStrategy()
        run_config = tf.estimator.RunConfig(train_distribute=train_distribute)
    else:
        run_config = None

    model = create_model(config=run_config,
                         model_dir=FLAGS.model_dir,
                         params={
                             'num_classes': train_num_classes,
                             **FLAGS.__dict__
                         })
    train_input_fn = lambda: get_input_function(
        wav_files=wav_files, labels=labels, is_training=True, **FLAGS.__dict__)
    model.train(train_input_fn, steps=FLAGS.num_steps)
示例#3
0
def main(_):
    global model
    global grouped_registerations
    tf.logging.set_verbosity(tf.logging.INFO)
    model = create_model(model_dir=FLAGS.model_dir, params={**FLAGS.__dict__})
    grouped_registerations = dict()
    httpd = make_server(host=FLAGS.host, port=FLAGS.port, app=_application)
    tf.logging.info("serving http on port {0}...".format(FLAGS.port))
    httpd.serve_forever()
示例#4
0
def main(_):
    # We want to see all the logging messages for this tutorial.
    tf.logging.set_verbosity(tf.logging.INFO)

    # Define the input function for training
    wav_files, label_ids, label_to_id = get_file_and_labels(os.path.join(FLAGS.data_dir, 'eval_labels'))
    wav_files = [os.path.join(FLAGS.data_dir, 'eval', wav_file) for wav_file in wav_files]

    groups = _get_groups(os.path.join(FLAGS.data_dir, 'groups_config'))
    enrollments = get_enrollments(os.path.join(FLAGS.data_dir, 'enrollment_config'))
    to_be_verified = _get_to_be_verified(os.path.join(FLAGS.data_dir, 'verification_config'))
    to_be_identified = _get_to_be_identified(os.path.join(FLAGS.data_dir, 'identification_config'))
    file_id_to_index = _get_file_id_to_index(wav_files)
    # TODO validate configurations
    # transform the configurations: wav file id --> index, label_id --> label_index
    groups_transformed = dict()
    for group_id in groups:
        group = [label_to_id[i] for i in groups[group_id]]
        groups_transformed[group_id] = group
    groups = groups_transformed

    enrollments = [file_id_to_index[i] for i in enrollments]
    to_be_verified = [(file_id_to_index[i], label_to_id[j]) for i, j in to_be_verified]
    to_be_identified = [(file_id_to_index[i], group_id) for i, group_id in to_be_identified]
    model = create_model(
        model_dir=FLAGS.model_dir,
        params={
            **FLAGS.__dict__
        })

    embeddings = get_embeddings(model,
                                wav_files=wav_files,
                                **FLAGS.__dict__)

    registerations = get_registerations([embeddings[i] for i in enrollments],
                                        [label_ids[i] for i in enrollments])
    fa_rate, fr_rate, error_rate, threshold = _evaluate_verification(embeddings, label_ids, registerations,
                                                                     to_be_verified,
                                                                     FLAGS.threshold)

    eval_msg_template = 'false accept rate:{}\n' + \
                        'false reject rate:{}\n' + \
                        'error rate:{}\n' + \
                        'threshold:{}'

    tf.logging.info('verification performance')
    tf.logging.info(eval_msg_template.format(fa_rate, fr_rate, error_rate, threshold))

    # use the threshold corresponded to the eer for verification
    fa_rate, fr_rate, error_rate, threshold = _evaluate_identification(embeddings, label_ids, registerations,
                                                                       to_be_identified, groups, threshold)
    tf.logging.info('identification performance')
    tf.logging.info(eval_msg_template.format(fa_rate, fr_rate, error_rate, threshold))
示例#5
0
# Load config
ROOT_PATH = os.path.dirname(os.path.abspath(__file__))
STATIC_CONFIG = dict(json.load(open('config.json', 'r')))
RUNTIME_CONFIG = {"root_path": ROOT_PATH}
CONFIG = {**STATIC_CONFIG, **RUNTIME_CONFIG}

# This is the class that holds the mappings of characters to numbers and vice versa
# The input pipeline uses this to map characters in text to indexes into embedding matrix
vocabulary = Vocabulary()

# Define input pipeline
next_training_batch = train_input_fn(vocabulary, CONFIG)

# Create the training model. Inference model is created with same function in the synthesize_results.py file
mel_outputs, residual_mels = create_model(next_training_batch,
                                          CONFIG,
                                          is_training=True)

# Define loss. Loss is a tensor containing the value to minimize.
loss = composite_loss(mel_outputs, residual_mels,
                      next_training_batch['targets'])

# Create optimizer instance
opt = tf.train.AdamOptimizer()

# Call the minimize op. This, unlike most ops that return tensors, is an op which returns another op
# Call to minimize is equivalent to calling 2 things - compute_gradients() and apply_gradients(), in that order
# If we wanted to modify gradients before applying, we could use those 2 instead
# Example - gradient clipping (although this can also be implemented with gate_gradients parameter to minimize)
opt_op = opt.minimize(loss)