Exemplo n.º 1
0
def create_bundle(
    alphabet_path,
    lm_path,
    vocab_path,
    package_path,
    force_utf8,
    default_alpha,
    default_beta,
):
    words = set()
    vocab_looks_char_based = True
    with open(vocab_path) as fin:
        for line in fin:
            for word in line.split():
                words.add(word.encode("utf-8"))
                if len(word) > 1:
                    vocab_looks_char_based = False
    print("{} unique words read from vocabulary file.".format(len(words)))
    print("{} like a character based model.".format(
        "Looks" if vocab_looks_char_based else "Doesn't look"))

    if force_utf8 != None:  # pylint: disable=singleton-comparison
        use_utf8 = force_utf8.value
        print("Forcing UTF-8 mode = {}".format(use_utf8))
    else:
        use_utf8 = vocab_looks_char_based

    if use_utf8:
        serialized_alphabet = UTF8Alphabet().serialize()
    else:
        serialized_alphabet = Alphabet(alphabet_path).serialize()

    alphabet = NativeAlphabet()
    err = alphabet.deserialize(serialized_alphabet, len(serialized_alphabet))
    if err != 0:
        print("Error loading alphabet: {}".format(err))
        sys.exit(1)

    scorer = Scorer()
    scorer.set_alphabet(alphabet)
    scorer.set_utf8_mode(use_utf8)
    scorer.reset_params(default_alpha, default_beta)
    scorer.load_lm(lm_path)
    scorer.fill_dictionary(list(words))
    shutil.copy(lm_path, package_path)
    scorer.save_dictionary(package_path, True)  # append, not overwrite
    print("Package created in {}".format(package_path))
Exemplo n.º 2
0
def initialize_globals():
    c = AttrDict()

    # Set default dropout rates
    if FLAGS.dropout_rate2 < 0:
        FLAGS.dropout_rate2 = FLAGS.dropout_rate
    if FLAGS.dropout_rate3 < 0:
        FLAGS.dropout_rate3 = FLAGS.dropout_rate
    if FLAGS.dropout_rate6 < 0:
        FLAGS.dropout_rate6 = FLAGS.dropout_rate

    # Set default checkpoint dir
    if not FLAGS.checkpoint_dir:
        FLAGS.checkpoint_dir = xdg.save_data_path(
            os.path.join('deepspeech', 'checkpoints'))

    if FLAGS.load not in ['last', 'best', 'init', 'auto', 'transfer']:
        FLAGS.load = 'auto'

    # Set default summary dir
    if not FLAGS.summary_dir:
        FLAGS.summary_dir = xdg.save_data_path(
            os.path.join('deepspeech', 'summaries'))

    # Standard session configuration that'll be used for all new sessions.
    c.session_config = tfv1.ConfigProto(
        allow_soft_placement=True,
        log_device_placement=FLAGS.log_placement,
        inter_op_parallelism_threads=FLAGS.inter_op_parallelism_threads,
        intra_op_parallelism_threads=FLAGS.intra_op_parallelism_threads,
        gpu_options=tfv1.GPUOptions(allow_growth=FLAGS.use_allow_growth))

    # CPU device
    c.cpu_device = '/cpu:0'

    # Available GPU devices
    c.available_devices = get_available_gpus(c.session_config)

    # If there is no GPU available, we fall back to CPU based operation
    if not c.available_devices:
        c.available_devices = [c.cpu_device]

    if FLAGS.utf8:
        c.alphabet = UTF8Alphabet()
    else:
        c.alphabet = Alphabet(os.path.abspath(FLAGS.alphabet_config_path))

    # Geometric Constants
    # ===================

    # For an explanation of the meaning of the geometric constants, please refer to
    # doc/Geometry.md

    # Number of MFCC features
    c.n_input = 26  # TODO: Determine this programmatically from the sample rate

    # The number of frames in the context
    c.n_context = 9  # TODO: Determine the optimal value using a validation data set

    # Number of units in hidden layers
    c.n_hidden = FLAGS.n_hidden

    c.n_hidden_1 = c.n_hidden

    c.n_hidden_2 = c.n_hidden

    c.n_hidden_5 = c.n_hidden

    # LSTM cell state dimension
    c.n_cell_dim = c.n_hidden

    # The number of units in the third layer, which feeds in to the LSTM
    c.n_hidden_3 = c.n_cell_dim

    # Units in the sixth layer = number of characters in the target language plus one
    c.n_hidden_6 = c.alphabet.size() + 1  # +1 for CTC blank label

    # Size of audio window in samples
    if (FLAGS.feature_win_len * FLAGS.audio_sample_rate) % 1000 != 0:
        log_error(
            '--feature_win_len value ({}) in milliseconds ({}) multiplied '
            'by --audio_sample_rate value ({}) must be an integer value. Adjust '
            'your --feature_win_len value or resample your audio accordingly.'
            ''.format(FLAGS.feature_win_len, FLAGS.feature_win_len / 1000,
                      FLAGS.audio_sample_rate))
        sys.exit(1)

    c.audio_window_samples = FLAGS.audio_sample_rate * (FLAGS.feature_win_len /
                                                        1000)

    # Stride for feature computations in samples
    if (FLAGS.feature_win_step * FLAGS.audio_sample_rate) % 1000 != 0:
        log_error(
            '--feature_win_step value ({}) in milliseconds ({}) multiplied '
            'by --audio_sample_rate value ({}) must be an integer value. Adjust '
            'your --feature_win_step value or resample your audio accordingly.'
            ''.format(FLAGS.feature_win_step, FLAGS.feature_win_step / 1000,
                      FLAGS.audio_sample_rate))
        sys.exit(1)

    c.audio_step_samples = FLAGS.audio_sample_rate * (FLAGS.feature_win_step /
                                                      1000)

    if FLAGS.one_shot_infer:
        if not os.path.exists(FLAGS.one_shot_infer):
            log_error(
                'Path specified in --one_shot_infer is not a valid file.')
            sys.exit(1)

    ConfigSingleton._config = c  # pylint: disable=protected-access
Exemplo n.º 3
0
import os
from ds_ctcdecoder import ctc_beam_search_decoder_batch, Scorer
from util.text import Alphabet, UTF8Alphabet
alphabet = UTF8Alphabet()
scorer = Scorer(0.75, 1.85,
                "/content/DeepSpeech-Indo/data/lm/kenlm_tamil.scorer",
                alphabet)
print(scorer)