Python Dictionary.Dictionary примеры использования

Язык программирования: Python

Пространство имен/Пакет: wav2letter.common

Класс/Тип: Dictionary

Метод/Функция: Dictionary

Примеров на hotexamples.com: 3

Python Dictionary.Dictionary - 3 примера найдено. Это лучшие примеры Python кода для wav2letter.common.Dictionary.Dictionary, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Dictionary(3)

add_entry(2)

get_entry(2)

get_index(2)

index_size(2)

addEntry(1)

getEntry(1)

getIndex(1)

indexSize(1)

Пример #1

Показать файл

Файл: wl_decoder.py Проект: facebookresearch/libri-light

    def __init__(self,
                 lm_weight=2.0,
                 lexicon_path="WER_data/lexicon.txt",
                 token_path="WER_data/letters.lst",
                 lm_path="WER_data/4-gram.bin"):
        lexicon = load_words(lexicon_path)
        word_dict = create_word_dict(lexicon)

        self.token_dict = Dictionary(token_path)
        self.lm = KenLM(lm_path, word_dict)

        self.sil_idx = self.token_dict.get_index("|")
        self.unk_idx = word_dict.get_index("<unk>")
        self.token_dict.add_entry("#")
        self.blank_idx = self.token_dict.get_index('#')

        self.trie = Trie(self.token_dict.index_size(), self.sil_idx)
        start_state = self.lm.start(start_with_nothing=False)

        for word, spellings in lexicon.items():
            usr_idx = word_dict.get_index(word)
            _, score = self.lm.score(start_state, usr_idx)
            for spelling in spellings:
                # max_reps should be 1; using 0 here to match DecoderTest bug
                spelling_idxs = tkn_to_idx(spelling,
                                           self.token_dict,
                                           max_reps=0)
                self.trie.insert(spelling_idxs, usr_idx, score)

        self.trie.smear(SmearingMode.MAX)
        self.opts = DecoderOptions(beam_size=2500,
                                   beam_threshold=100.0,
                                   lm_weight=lm_weight,
                                   word_score=2.0,
                                   unk_score=-math.inf,
                                   log_add=False,
                                   sil_weight=-1,
                                   criterion_type=CriterionType.CTC)

Пример #2

Показать файл

Файл: prog_w2l.py Проект: BasRizk/Wav2letter-Evaluation

        return np.frombuffer(file.read(N * N * 4), dtype=np.float32)


def assert_near(x, y, tol):
    assert abs(x - y) <= tol


# load test files

T, N = load_TN(os.path.join(testing_data_path, "TN.bin"))
emissions = load_emissions(os.path.join(testing_data_path, "emission.bin"))
transitions = load_transitions(
    os.path.join(testing_data_path, "transition.bin"))
lexicon = loadWords(os.path.join(testing_data_path, "words.lst"))
wordDict = createWordDict(lexicon)
tokenDict = Dictionary(os.path.join(testing_data_path, "letters.lst"))
tokenDict.addEntry("1")
lm = KenLM(os.path.join(testing_data_path, "lm.arpa"), wordDict)

# test LM

#sentence = ["the", "cat", "sat", "on", "the", "mat"]
#lm_state = lm.start(False)
#total_score = 0
#lm_score_target = [-1.05971, -4.19448, -3.33383, -2.76726, -1.16237, -4.64589]
#for i in range(len(sentence)):
#    lm_state, lm_score = lm.score(lm_state, wordDict.getIndex(sentence[i]))
#    assert_near(lm_score, lm_score_target[i], 1e-5)
#    total_score += lm_score
#lm_state, lm_score = lm.finish(lm_state)
#total_score += lm_score

Пример #3

Показать файл

    # load test files
    # load time and number of tokens for dumped acoustic scores
    T, N = load_tn(os.path.join(data_path, "TN.bin"))
    # load emissions [Batch=1, Time, Ntokens]
    emissions = load_emissions(os.path.join(data_path, "emission.bin"))
    # load transitions (from ASG loss optimization) [Ntokens, Ntokens]
    transitions = load_transitions(os.path.join(data_path, "transition.bin"))
    # load lexicon file, which defines spelling of words
    # the format word and its tokens spelling separated by the spaces,
    # for example for letters tokens with ASG loss:
    # ann a n 1 |
    lexicon = load_words(os.path.join(data_path, "words.lst"))
    # read lexicon and store it in the w2l dictionary
    word_dict = create_word_dict(lexicon)
    # create w2l dict with tokens set (letters in this example)
    token_dict = Dictionary(os.path.join(data_path, "letters.lst"))
    # add repetition symbol as soon as we have ASG acoustic model
    token_dict.add_entry("1")
    # create Kenlm language model
    lm = KenLM(os.path.join(data_path, "lm.arpa"), word_dict)

    # test LM
    sentence = ["the", "cat", "sat", "on", "the", "mat"]
    # start LM with nothing, get its current state
    lm_state = lm.start(False)
    total_score = 0
    lm_score_target = [
        -1.05971, -4.19448, -3.33383, -2.76726, -1.16237, -4.64589
    ]
    # iterate over words in the sentence
    for i in range(len(sentence)):