def setUp(self): service.FLAGS = _to_object_with_attributes({ 'model_dir': './tmp_model', 'data_dir': './data/online', 'filters': [64, 128, 256, 512], 'blocks': 3, 'kernel_size': 3, 'strides': 2, 'embedding_size': 512, 'encoder': 'rescnn', 'sample_rate': 16000, 'desired_samples': from_ms_to_samples(16000, 1000), 'window_size_samples': from_ms_to_samples(16000, 30.0), 'window_stride_samples': from_ms_to_samples(16000, 10.0), 'magnitude_squared': True, 'dct_coefficient_count': 64, 'batch_size': 10, 'threshold': 0.2, 'normalize_frames':False }) service.model = create_model( model_dir=service.FLAGS.model_dir, params={ 'filters': service.FLAGS.filters, 'blocks': service.FLAGS.blocks, 'kernel_size': service.FLAGS.kernel_size, 'strides': service.FLAGS.strides, 'embedding_size': service.FLAGS.embedding_size, 'encoder': service.FLAGS.encoder }) service.grouped_registerations = dict()
def main(_): tf.logging.set_verbosity(tf.logging.INFO) # Define the input function for training wav_files, labels, label_to_id = get_file_and_labels( os.path.join(FLAGS.data_dir, 'train_labels')) wav_files = [ os.path.join(FLAGS.data_dir, 'train', wav_file) for wav_file in wav_files ] train_num_classes = len(label_to_id) if FLAGS.num_gpus > 1: # MirroredStrategy: This does in-graph replication with synchronous training on many GPUs on one machine. # Essentially, we create copies of all variables in the model's layers on each device. # We then use all-reduce to combine gradients across the devices # before applying them to the variables to keep them in sync. # Reference: https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/distribute train_distribute = tf.contrib.distribute.MirroredStrategy() run_config = tf.estimator.RunConfig(train_distribute=train_distribute) else: run_config = None model = create_model(config=run_config, model_dir=FLAGS.model_dir, params={ 'num_classes': train_num_classes, **FLAGS.__dict__ }) train_input_fn = lambda: get_input_function( wav_files=wav_files, labels=labels, is_training=True, **FLAGS.__dict__) model.train(train_input_fn, steps=FLAGS.num_steps)
def main(_): global model global grouped_registerations tf.logging.set_verbosity(tf.logging.INFO) model = create_model(model_dir=FLAGS.model_dir, params={**FLAGS.__dict__}) grouped_registerations = dict() httpd = make_server(host=FLAGS.host, port=FLAGS.port, app=_application) tf.logging.info("serving http on port {0}...".format(FLAGS.port)) httpd.serve_forever()
def main(_): # We want to see all the logging messages for this tutorial. tf.logging.set_verbosity(tf.logging.INFO) # Define the input function for training wav_files, label_ids, label_to_id = get_file_and_labels(os.path.join(FLAGS.data_dir, 'eval_labels')) wav_files = [os.path.join(FLAGS.data_dir, 'eval', wav_file) for wav_file in wav_files] groups = _get_groups(os.path.join(FLAGS.data_dir, 'groups_config')) enrollments = get_enrollments(os.path.join(FLAGS.data_dir, 'enrollment_config')) to_be_verified = _get_to_be_verified(os.path.join(FLAGS.data_dir, 'verification_config')) to_be_identified = _get_to_be_identified(os.path.join(FLAGS.data_dir, 'identification_config')) file_id_to_index = _get_file_id_to_index(wav_files) # TODO validate configurations # transform the configurations: wav file id --> index, label_id --> label_index groups_transformed = dict() for group_id in groups: group = [label_to_id[i] for i in groups[group_id]] groups_transformed[group_id] = group groups = groups_transformed enrollments = [file_id_to_index[i] for i in enrollments] to_be_verified = [(file_id_to_index[i], label_to_id[j]) for i, j in to_be_verified] to_be_identified = [(file_id_to_index[i], group_id) for i, group_id in to_be_identified] model = create_model( model_dir=FLAGS.model_dir, params={ **FLAGS.__dict__ }) embeddings = get_embeddings(model, wav_files=wav_files, **FLAGS.__dict__) registerations = get_registerations([embeddings[i] for i in enrollments], [label_ids[i] for i in enrollments]) fa_rate, fr_rate, error_rate, threshold = _evaluate_verification(embeddings, label_ids, registerations, to_be_verified, FLAGS.threshold) eval_msg_template = 'false accept rate:{}\n' + \ 'false reject rate:{}\n' + \ 'error rate:{}\n' + \ 'threshold:{}' tf.logging.info('verification performance') tf.logging.info(eval_msg_template.format(fa_rate, fr_rate, error_rate, threshold)) # use the threshold corresponded to the eer for verification fa_rate, fr_rate, error_rate, threshold = _evaluate_identification(embeddings, label_ids, registerations, to_be_identified, groups, threshold) tf.logging.info('identification performance') tf.logging.info(eval_msg_template.format(fa_rate, fr_rate, error_rate, threshold))
# Load config ROOT_PATH = os.path.dirname(os.path.abspath(__file__)) STATIC_CONFIG = dict(json.load(open('config.json', 'r'))) RUNTIME_CONFIG = {"root_path": ROOT_PATH} CONFIG = {**STATIC_CONFIG, **RUNTIME_CONFIG} # This is the class that holds the mappings of characters to numbers and vice versa # The input pipeline uses this to map characters in text to indexes into embedding matrix vocabulary = Vocabulary() # Define input pipeline next_training_batch = train_input_fn(vocabulary, CONFIG) # Create the training model. Inference model is created with same function in the synthesize_results.py file mel_outputs, residual_mels = create_model(next_training_batch, CONFIG, is_training=True) # Define loss. Loss is a tensor containing the value to minimize. loss = composite_loss(mel_outputs, residual_mels, next_training_batch['targets']) # Create optimizer instance opt = tf.train.AdamOptimizer() # Call the minimize op. This, unlike most ops that return tensors, is an op which returns another op # Call to minimize is equivalent to calling 2 things - compute_gradients() and apply_gradients(), in that order # If we wanted to modify gradients before applying, we could use those 2 instead # Example - gradient clipping (although this can also be implemented with gate_gradients parameter to minimize) opt_op = opt.minimize(loss)