def write_training_result(global_step: int, id: List[int], text: List[str], predicted_mel: List[np.ndarray], ground_truth_mel: List[np.ndarray], mel_length: List[int], alignment: List[np.ndarray], filename: str): batch_size = len(ground_truth_mel) raw_predicted_mel = [m.tostring() for m in predicted_mel] raw_ground_truth_mel = [m.tostring() for m in ground_truth_mel] mel_width = ground_truth_mel[0].shape[1] padded_mel_length = [m.shape[0] for m in ground_truth_mel] predicted_mel_length = [m.shape[0] for m in predicted_mel] raw_alignment = [a.tostring() for a in alignment] alignment_source_length = [a.shape[1] for a in alignment] alignment_target_length = [a.shape[2] for a in alignment] example = tf.train.Example(features=tf.train.Features( feature={ 'global_step': int64_feature([global_step]), 'batch_size': int64_feature([batch_size]), 'id': int64_feature(id), 'text': bytes_feature(text), 'predicted_mel': bytes_feature(raw_predicted_mel), 'ground_truth_mel': bytes_feature(raw_ground_truth_mel), 'mel_length': int64_feature(padded_mel_length), 'mel_length_without_padding': int64_feature(mel_length), 'predicted_mel_length': int64_feature(predicted_mel_length), 'mel_width': int64_feature([mel_width]), 'alignment': bytes_feature(raw_alignment), 'alignment_source_length': int64_feature(alignment_source_length), 'alignment_target_length': int64_feature(alignment_target_length), })) write_tfrecord(example, filename)
def write_preprocessed_target_data(_id: int, key: str, mel: np.ndarray, filename: str): raw_mel = mel.tostring() example = tf.train.Example(features=tf.train.Features( feature={ 'id': int64_feature([_id]), 'key': bytes_feature([key.encode('utf-8')]), 'mel': bytes_feature([raw_mel]), 'target_length': int64_feature([len(mel)]), 'mel_width': int64_feature([mel.shape[1]]), })) write_tfrecord(example, filename)
def write_preprocessed_source_data(_id: int, key: str, source: np.ndarray, text, phones: np.ndarray, phone_txt, speaker_id, age, gender, filename: str): raw_source = source.tostring() phones = phones if phones is not None else np.empty([0], dtype=np.int64) phone_txt = phone_txt if phone_txt is not None else '' example = tf.train.Example(features=tf.train.Features( feature={ 'id': int64_feature([_id]), 'key': bytes_feature([key.encode('utf-8')]), 'source': bytes_feature([raw_source]), 'source_length': int64_feature([len(source)]), 'text': bytes_feature([text.encode('utf-8')]), 'phone': bytes_feature([phones.tostring()]), 'phone_length': int64_feature([len(phones)]), 'phone_txt': bytes_feature([phone_txt.encode('utf-8')]), 'speaker_id': int64_feature([speaker_id]), 'age': int64_feature([age]), 'gender': int64_feature([gender]), })) write_tfrecord(example, filename)
def write_postnet_training_result(global_step: int, ids: List[str], predicted_spec: List[np.ndarray], ground_truth_spec: List[np.ndarray], spec_length: List[int], filename: str): batch_size = len(ground_truth_spec) raw_predicted_spec = [m.tostring() for m in predicted_spec] raw_ground_truth_spec = [m.tostring() for m in ground_truth_spec] spec_width = ground_truth_spec[0].shape[1] padded_spec_length = [m.shape[0] for m in ground_truth_spec] predicted_spec_length = [m.shape[0] for m in predicted_spec] ids_bytes = [s.encode("utf-8") for s in ids] example = tf.train.Example(features=tf.train.Features( feature={ 'global_step': int64_feature([global_step]), 'batch_size': int64_feature([batch_size]), 'id': bytes_feature(ids_bytes), 'predicted_spec': bytes_feature(raw_predicted_spec), 'ground_truth_spec': bytes_feature(raw_ground_truth_spec), 'spec_length': int64_feature(padded_spec_length), 'spec_length_without_padding': int64_feature(spec_length), 'predicted_spec_length': int64_feature(predicted_spec_length), 'spec_width': int64_feature([spec_width]), })) write_tfrecord(example, filename)