def test_generate_dict_textgrid( multilingual_ipa_tg_corpus_dir, english_g2p_model, generated_dir, temp_dir, g2p_config_path, ): if G2P_DISABLED: pytest.skip("No Pynini found") output_file = os.path.join(generated_dir, "tg_g2pped.dict") command = [ "g2p", english_g2p_model, multilingual_ipa_tg_corpus_dir, output_file, "-t", temp_dir, "-q", "--clean", "--debug", "--config_path", g2p_config_path, ] args, unknown = parser.parse_known_args(command) run_g2p(args, unknown) assert os.path.exists(output_file) d = MultispeakerDictionary(dictionary_path=output_file, temporary_directory=temp_dir) d.dictionary_setup() assert len(d.word_mapping()) > 0
def test_generate_pretrained_threshold( english_g2p_model, basic_corpus_dir, temp_dir, generated_dir ): if G2P_DISABLED: pytest.skip("No Pynini found") output_path = os.path.join(generated_dir, "g2p_out.txt") command = [ "g2p", english_g2p_model, basic_corpus_dir, output_path, "-t", temp_dir, "-q", "--clean", "--g2p_threshold", "0.95", ] args, unknown = parser.parse_known_args(command) run_g2p(args, unknown) assert os.path.exists(output_path) d = MultispeakerDictionary(output_path, temporary_directory=temp_dir) d.dictionary_setup() assert len(d.word_mapping(1)) > 0
def test_generate_dict_text_only( basic_split_dir, basic_g2p_model_path, g2p_basic_output, temp_dir, g2p_config_path, ): if G2P_DISABLED: pytest.skip("No Pynini found") text_dir = basic_split_dir[1] command = [ "g2p", basic_g2p_model_path, text_dir, g2p_basic_output, "-t", temp_dir, "-q", "--clean", "--debug", "--config_path", g2p_config_path, ] args, unknown = parser.parse_known_args(command) run_g2p(args, unknown) assert os.path.exists(g2p_basic_output) d = MultispeakerDictionary(dictionary_path=g2p_basic_output, temporary_directory=temp_dir) d.dictionary_setup() assert len(d.word_mapping()) > 0
def test_classify(basic_corpus_dir, sick_dict_path, english_ivector_model, generated_dir, transcription_acoustic_model, transcription_language_model, temp_dir): output_path = os.path.join(generated_dir, 'classify_test') command = [ 'classify_speakers', basic_corpus_dir, 'english_ivector', output_path, '-t', temp_dir, '-q', '--clean', '-d', '--disable_mp' ] args, unknown = parser.parse_known_args(command) run_classify_speakers(args)
def test_create_segments(basic_corpus_dir, sick_dict_path, english_acoustic_model, generated_dir, transcription_acoustic_model, transcription_language_model, temp_dir, basic_segment_config): output_path = os.path.join(generated_dir, 'segment_output') command = [ 'create_segments', basic_corpus_dir, output_path, '-t', temp_dir, '-q', '--clean', '-d', '--config', basic_segment_config ] args, unknown = parser.parse_known_args(command) run_create_segments(args)