Exemplo n.º 1
0
def sentence_to_ids(phoneme, target_dict):
    """
    :param phoneme: input phonemes
    :param target_dict: target dict
    """
    if phoneme is None or len(phoneme) == 0:
        return None
    tokens = phoneme.split(" ")
    ids = [target_dict.get(token, target_dict[UNK]) for token in tokens]
    return process_batch_data([ids], [[]], target_dict)
Exemplo n.º 2
0
def sentence_to_ids(sentence, target_dict):
    """
    :param sentence: input sentence
    :param target_dict: target dict
    """
    if sentence is None or len(sentence) == 0:
        return None
    sentence = cleanup_sentence(sentence, only_alphanumeric=False)
    tokens = word_tokenize(sentence)
    ids = [target_dict.get(token, target_dict[UNK]) for token in tokens]
    return process_batch_data([ids], [[]], target_dict)
Exemplo n.º 3
0
def sentence_to_ids(sentence, source_dict, language, lower, keep_number):
    """
    :param sentence: input sentence
    :param source_dict: source dict
    :param language: source language
    :param lower: boolean, convert sentence to lowercase or not
    :param keep_number: boolean, whether to keep the number characters
    """
    if sentence is None or len(sentence) == 0:
        return None
    sentence = cleanup_sentence(sentence, language, lower, keep_number)
    tokens = sentence.split()
    ids = [source_dict.get(token, source_dict[UNK]) for token in tokens]
    return process_batch_data([ids], [[]], source_dict)
Exemplo n.º 4
0
    def words_to_indices(self, words):
        """
		Convert input words into batchnized word/chars indices for inference
		:param words: input words
		:return: batchnized word indices
		"""
        chars_idx = []
        for word in words:
            chars = [
                self.char_dict[char]
                if char in self.char_dict else self.char_dict[UNK]
                for char in word
            ]
            chars_idx.append(chars)
        words = [word_convert(word) for word in words]
        words_idx = [
            self.word_dict[word]
            if word in self.word_dict else self.word_dict[UNK]
            for word in words
        ]
        return process_batch_data([words_idx], [chars_idx])