Python PrefixTree примеры использования

Язык программирования: Python

Пространство имен/Пакет: prefix_tree

Класс/Тип: PrefixTree

Примеров на hotexamples.com: 4

Python PrefixTree - 4 примера найдено. Это лучшие примеры Python кода для prefix_tree.PrefixTree, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

PrefixTree(1)

calculate_probs(1)

get_continuations(1)

get_prefixes(1)

insert(1)

prefix_entropies(1)

prefix_frequencies(1)

prefix_probabilities(1)

prefix_surprisals(1)

Пример #1

Показать файл

Файл: segment_surprisal_tree.py Проект: LauraGwilliams/SpeechStatistics

    def read_elp(self):
        self.tree = PrefixTree()
        self.pronunciations = {}

        words = list(csv.DictReader(open(paths.elp)))
        for i, x in enumerate(words):
            pron = self.elp2cmu.translate(x['Pron'])
            if x[self.frequency_field] not in ['0', 'NULL']:
                self.tree.insert(tuple(pron + ['#']),
                                 float(x[self.frequency_field]), x['Word'])

            self.pronunciations[x['Word']] = pron

        self.tree.calculate_probs()

Пример #2

Показать файл

Файл: segment_surprisal_tree.py Проект: kylejwilliams/BP_amb_phon

class SegmentSurprisalTree(object):

    frequency_field = 'SUBTLWF'

    def __init__(self):
        self.elp2cmu = ELP2CMU()

    def read_elp(self):
        self.tree = PrefixTree()
        self.pronunciations = {}

        words = list(csv.DictReader(open(paths.elp)))
        for i, x in enumerate(words):
            pron = self.elp2cmu.translate(x['Pron'])
            if x[self.frequency_field] not in ['0', 'NULL']:
                self.tree.insert(tuple(pron + ['#']), 
                        float(x[self.frequency_field]), x['Word'])

            self.pronunciations[x['Word']] = pron

        self.tree.calculate_probs()

    def surprisals(self, word, with_end=True):
        pron = self.pronunciations[word]
        pron = tuple(pron + ['#'] if with_end else [])
        return self.tree.prefix_surprisals(pron)

    def entropies(self, word, with_end=True):
        pron = self.pronunciations[word]
        pron = tuple(pron + ['#'] if with_end else [])
        return self.tree.prefix_entropies(pron)

    def probabilities(self, word, with_end=True):
        pron = self.pronunciations[word]
        pron = tuple(pron + ['#'] if with_end else [])
        return self.tree.prefix_probabilities(pron)
        
    def frequencies(self, word, with_end=True):
        pron = self.pronunciations[word]
        pron = tuple(pron + ['#'] if with_end else [])
        return self.tree.prefix_frequencies(pron)
        
    def node_frequencies(self, word, with_end=True):
        pron = self.pronunciations[word]
        pron = tuple(pron + ['#'] if with_end else [])
        return self.tree.prefix_frequencies(pron)

Пример #3

Показать файл

Файл: segment_surprisal_tree.py Проект: kylejwilliams/BP_amb_phon

    def read_elp(self):
        self.tree = PrefixTree()
        self.pronunciations = {}

        words = list(csv.DictReader(open(paths.elp)))
        for i, x in enumerate(words):
            pron = self.elp2cmu.translate(x['Pron'])
            if x[self.frequency_field] not in ['0', 'NULL']:
                self.tree.insert(tuple(pron + ['#']), 
                        float(x[self.frequency_field]), x['Word'])

            self.pronunciations[x['Word']] = pron

        self.tree.calculate_probs()

Пример #4

Показать файл

Файл: segment_surprisal_tree.py Проект: LauraGwilliams/SpeechStatistics

class SegmentSurprisalTree(object):

    frequency_field = 'SUBTLWF'

    def __init__(self):
        self.elp2cmu = ELP2CMU()

    def read_elp(self):
        self.tree = PrefixTree()
        self.pronunciations = {}

        words = list(csv.DictReader(open(paths.elp)))
        for i, x in enumerate(words):
            pron = self.elp2cmu.translate(x['Pron'])
            if x[self.frequency_field] not in ['0', 'NULL']:
                self.tree.insert(tuple(pron + ['#']),
                                 float(x[self.frequency_field]), x['Word'])

            self.pronunciations[x['Word']] = pron

        self.tree.calculate_probs()

    def surprisals(self, word, with_end=True):
        pron = self.pronunciations[word]
        pron = tuple(pron + ['#'] if with_end else [])
        return self.tree.prefix_surprisals(pron)

    def entropies(self, word, with_end=True):
        pron = self.pronunciations[word]
        pron = tuple(pron + ['#'] if with_end else [])
        return self.tree.prefix_entropies(pron)

    def probabilities(self, word, with_end=True):
        pron = self.pronunciations[word]
        pron = tuple(pron + ['#'] if with_end else [])
        return self.tree.prefix_probabilities(pron)

    def frequencies(self, word, with_end=True):
        pron = self.pronunciations[word]
        pron = tuple(pron + ['#'] if with_end else [])
        return self.tree.prefix_frequencies(pron)

    def node_frequencies(self, word, with_end=True):
        pron = self.pronunciations[word]
        pron = tuple(pron + ['#'] if with_end else [])
        return self.tree.prefix_frequencies(pron)

    def get_word_continuations(self, word):

        # get pronunciation of the word
        try:
            word_pron = self.pronunciations[word]
        except:
            raise NotImplementedError("Word '%s' not found in corpus." % word)

        # convert into expected tuple format
        word_tuple = tuple(word_pron + ['#'] if True else [])

        # extract prefix frequency for target word
        continuation_dict = {}  # empty dict to populate w/ frequencies
        phoneme_count = 0
        for phoneme_iter, prefix in self.tree.get_prefixes(word_tuple):

            # get frequency of all phoneme continuations
            cont_freqs = [x[1] for x in self.tree.get_continuations(prefix)]

            # add to dictionary
            continuation_dict.update(
                {'%s_%s' % (phoneme_count, phoneme_iter): cont_freqs})

            # update phoneme count
            phoneme_count = phoneme_count + 1

        return continuation_dict

    def get_uniqueness_point(self, word):

        # get word continuation dict
        continuation_dict = self.get_word_continuations(word)

        # sort key entries
        phoneme_list = continuation_dict.keys()
        phoneme_list.sort()

        # loop through each phoneme entry
        for n, phoneme in enumerate(phoneme_list):
            conts = continuation_dict[phoneme]

            # when there is only one continuation, we have found the UP.
            if len(conts) == 1:
                return n, phoneme

        # if no UP found, return error.
        raise ValueError("No uniqueness point found.")