def test_split_by_length_of_utterances(benchmark): corpus = resources.generate_corpus(179, (250, 500), (1, 9), (0, 6), (1, 20), random.Random(x=234)) splitter = subset.Splitter(corpus, random_seed=324) benchmark(run, splitter)
def test_subview(benchmark): corpus = resources.generate_corpus(200, (5, 10), (1, 5), (0, 6), (1, 20), random.Random(x=234)) random.seed(200) filtered_utts = random.choices(list(corpus.utterances.keys()), k=20000) filters = [subset.MatchingUtteranceIdxFilter(filtered_utts)] benchmark(run, corpus, filters)
def test_kaldi_write(benchmark, tmp_path): corpus = resources.generate_corpus( 200, (5, 10), (1, 5), (0, 6), (1, 20), random.Random(x=234) ) benchmark(run, corpus, str(tmp_path))
def test_from_corpus(benchmark): source_corpus = resources.generate_corpus( 200, (5, 5), (5, 5), (4, 4), (4, 4), random.Random(x=234) ) benchmark(run, source_corpus)
from bench import resources def run(source_corpus): audiomate.Corpus.from_corpus(source_corpus) def test_from_corpus(benchmark): source_corpus = resources.generate_corpus( 200, (5, 5), (5, 5), (4, 4), (4, 4), random.Random(x=234) ) benchmark(run, source_corpus) if __name__ == '__main__': source_corpus = resources.generate_corpus( 200, (5, 10), (1, 5), (0, 6), (1, 20), random.Random(x=234) ) audiomate.Corpus.from_corpus(source_corpus)
def test_merge_corpus(benchmark): target_corpus = audiomate.Corpus() merge_corpus = resources.generate_corpus(200, (5, 10), (1, 5), (0, 6), (1, 20), random.Random(x=234)) benchmark(run, target_corpus, merge_corpus)