示例#1
0
def main(args):
    print(create_args_str(args))
    lang, audio, trans, keras, ds, ds_alpha, ds_trie, lm, vocab, target_dir, normalize, gpu = setup(args)
    print(f'all artifacts will be saved to {target_dir}')

    lm = load_lm(lm) if lm else None
    vocab = load_vocab(vocab) if vocab else None

    audio_bytes, sample_rate, transcript, language = preprocess(audio, trans, lang, norm_transcript=normalize)
    voiced_segments = vad(audio_bytes, sample_rate)
    df_alignments = pipeline(voiced_segments=voiced_segments, sample_rate=sample_rate, transcript=transcript,
                             language='en',
                             ds_path=ds, ds_alpha_path=ds_alpha, ds_trie_path=ds_trie,
                             keras_path=keras, lm=lm, vocab=vocab,
                             force_realignment=args.force_realignment, align_endings=args.align_endings,
                             target_dir=target_dir)

    df_stats = calculate_stats(df_alignments, ds, transcript)
    create_demo_files(target_dir, audio, transcript, df_alignments, df_stats)

    print()
    print_dataframe(df_stats)
    print()

    stats_csv = join(target_dir, 'stats.csv')
    print(f'Saving stats to {stats_csv}')
    df_alignments.to_csv(stats_csv)
示例#2
0
def main():
    print(create_args_str(args))
    print(
        f'Processing files from {args.source} and saving them in {args.target}'
    )
    corpus, corpus_file = create_corpus(args.source, args.target, args.limit)
    print(f'Done! Corpus with {len(corpus)} entries saved to {corpus_file}')
示例#3
0
def main(date_time):
    print(create_args_str(args))

    target_dir = setup(date_time)
    print()
    print(f'all output will be written to {target_dir}')
    print()

    print(f'creating {args.optimizer.upper()} optimizer for model')
    if args.optimizer == 'adam':
        opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, decay=0.01, epsilon=1e-8)
    else:
        opt = SGD(lr=args.learning_rate, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
    model = create_model(target_dir, opt, args.dropouts, args.language)

    train_model(model, args.language, target_dir, args.minutes)
示例#4
0
def main():
    print(create_args_str(args))
    target_dir = setup(args)
    print()
    print(f'all output will be written to {target_dir}')
    print()

    opt = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5)
    model = load_keras_model(args.model_dir, opt)
    model.summary()

    lm, vocab = None, None
    if args.lm:
        lm = load_lm(args.lm)
        vocab = load_vocab(args.lm_vocab)

    test_model(model, args.test_files, args.test_batches, args.batch_size,
               args.language, lm, vocab, target_dir)
示例#5
0
def main(args):
    print(create_args_str(args))

    target_dir, corpus_id, force, synthesize, min_dur, max_dur, precompute_features = setup(
        args)

    corpus = get_corpus(args.source_dir, args.language)
    corpus.summary()

    print(
        f'processing {corpus.name} corpus and saving split segments in {target_dir}'
    )
    csv_train, csv_dev, csv_test = extract_segments(target_dir, corpus_id,
                                                    corpus, synthesize,
                                                    min_dur, max_dur, force)
    print(f'done! All files are in {target_dir}')

    corpus = DeepSpeechCorpus(args.language, csv_train, csv_dev, csv_test)
    corpus.summary()

    if precompute_features:
        print(f'pre-computing features')
        compute_features(csv_train, csv_dev, csv_test, target_dir, force)
示例#6
0
def main(args):
    print(create_args_str(args))
    target_dir, keras_path, lm_path, vocab_path, gpu = setup(args)
    print(f'all results will be written to {target_dir}')

    lm = load_lm(lm_path) if lm_path else None
    vocab = load_vocab(vocab_path) if vocab_path else None

    corpus = get_corpus('rl', 'de')
    corpus.summary()
    test_entries = list(set((segment.entry for segment in corpus.test_set())))
    # add 6 entries from PodClub corpus
    corpus = get_corpus('pc', 'de')
    corpus.summary()
    test_entries += [
        corpus['record1058'], corpus['record1063'], corpus['record1076'],
        corpus['record1523'], corpus['record1548'], corpus['record1556']
    ]
    stats = []
    for i, entry in enumerate(test_entries):
        print(f'entry {i + 1}/{len(test_entries)}')
        audio_file = entry.audio_path
        sample_rate = entry.rate
        with open(entry.transcript_path, encoding='utf-8') as f:
            transcript = f.read()
            if args.norm_transcript:
                transcript = normalize(transcript, 'de')

        demo_id = splitext(basename(audio_file))[0]
        target_dir_entry = join(target_dir, demo_id)
        if not exists(target_dir_entry):
            makedirs(target_dir_entry)

        voiced_segments = [
            Voice(s.audio, s.rate, s.start_frame, s.end_frame) for s in entry
        ]
        df_alignments = pipeline(voiced_segments=voiced_segments,
                                 sample_rate=sample_rate,
                                 transcript=transcript,
                                 language='de',
                                 keras_path=keras_path,
                                 lm=lm,
                                 vocab=vocab,
                                 force_realignment=args.force_realignment,
                                 align_endings=args.align_endings,
                                 target_dir=target_dir_entry)

        df_stats = calculate_stats(df_alignments, keras_path, transcript)

        # calculate average similarity between Keras-alignment and original aligments
        original_alignments = [s.transcript for s in entry.segments]
        av_similarity = np.mean([
            levenshtein_similarity(ka, oa)
            for (ka,
                 oa) in zip(df_alignments['alignment'], original_alignments)
        ])
        df_stats['similarity'] = av_similarity
        create_demo_files(target_dir_entry, audio_file, transcript,
                          df_alignments, df_stats)

        stats.append(df_stats)

    df_keras = pd.concat(stats)
    csv_keras = join(target_dir, 'performance.csv')
    df_keras.to_csv(csv_keras)
    print(f'summary saved to {csv_keras}')

    visualize_pipeline_performance(csv_keras, csv_ds=None, silent=True)
    update_index(target_dir,
                 lang='de',
                 num_aligned=len(test_entries),
                 df_keras=df_keras,
                 keras_path=keras_path,
                 lm_path=lm_path,
                 vocab_path=vocab_path)
    K.clear_session()
示例#7
0
def main(args):
    print(create_args_str(args))
    demo_files, target_dir, keras_path, ds_path, ds_alpha, ds_trie, lm_path, vocab_path, normalize, gpu = setup(
        args)
    num_files = len(demo_files)
    print(
        f'Processing {num_files} audio/transcript samples. All results will be written to {target_dir}'
    )

    lm = load_lm(lm_path) if lm_path else None
    vocab = load_vocab(vocab_path) if vocab_path else None

    stats_keras, stats_ds = [], []
    for i, (audio, transcript) in enumerate(demo_files):
        print(
            '-----------------------------------------------------------------'
        )
        print(f'{i + 1}/{num_files}: Evaluating pipeline on {audio}')
        print(
            '-----------------------------------------------------------------'
        )
        demo_id = splitext(basename(audio))[0]
        target_dir_ds = join(target_dir, demo_id + '_ds')
        target_dir_keras = join(target_dir, demo_id + '_keras')

        audio_bytes, sample_rate, transcript, language = preprocess(
            audio, transcript, 'en', norm_transcript=normalize)
        voiced_segments = vad(audio_bytes, sample_rate)

        df_alignments_ds = pipeline(voiced_segments=voiced_segments,
                                    sample_rate=sample_rate,
                                    transcript=transcript,
                                    language='en',
                                    ds_path=ds_path,
                                    ds_alpha_path=ds_alpha,
                                    ds_trie_path=ds_trie,
                                    lm_path=lm,
                                    force_realignment=args.force_realignment,
                                    align_endings=args.align_endings,
                                    target_dir=target_dir_ds)
        df_stats_ds = calculate_stats(df_alignments_ds, ds_path, transcript)

        df_alignments_keras = pipeline(
            voiced_segments=voiced_segments,
            sample_rate=sample_rate,
            transcript=transcript,
            language='en',
            keras_path=keras_path,
            lm=lm,
            vocab=vocab,
            force_realignment=args.force_realignment,
            align_endings=args.align_endings,
            target_dir=target_dir_keras)
        df_stats_keras = calculate_stats(df_alignments_keras, keras_path,
                                         transcript)

        # average similarity between Keras and DeepSpeech alignments
        av_similarity = np.mean([
            levenshtein_similarity(al_keras, al_ds)
            for (al_keras, al_ds) in zip(df_alignments_keras['alignment'],
                                         df_alignments_ds['alignment'])
        ])

        df_stats_ds['similarity'] = av_similarity
        df_stats_keras['similarity'] = av_similarity
        stats_ds.append(df_stats_ds)
        stats_keras.append(df_stats_keras)

        create_demo_files(target_dir_ds, audio, transcript, df_alignments_ds,
                          df_stats_ds)
        create_demo_files(target_dir_keras, audio, transcript,
                          df_alignments_keras, df_stats_keras)

    df_keras = pd.concat(stats_keras)
    csv_keras = join(target_dir, 'performance_keras.csv')
    df_keras.to_csv(csv_keras)

    df_ds = pd.concat(stats_ds)
    csv_ds = join(target_dir, 'performance_ds.csv')
    df_ds.to_csv(csv_ds)
    print(f'summary saved to {csv_keras}')

    visualize_pipeline_performance(csv_keras, csv_ds, silent=True)
    update_index(target_dir,
                 lang='en',
                 num_aligned=len(demo_files),
                 df_keras=df_keras,
                 keras_path=keras_path,
                 df_ds=df_ds,
                 ds_path=ds_path,
                 lm_path=lm_path,
                 vocab_path=vocab_path)

    print(f'Done! Demos have been saved to {target_dir}')
def main(args):
    print(create_args_str(args))
    csv_ds, csv_keras, silent = setup(args)
    visualize_pipeline_performance(csv_keras, csv_ds, silent)