Python SimpleMarkovClassifier примеры использования

Язык программирования: Python

Пространство имен/Пакет: mdp.nodes

Примеров на hotexamples.com: 5

Python SimpleMarkovClassifier - 5 примеров найдено. Это лучшие примеры Python кода для mdp.nodes.SimpleMarkovClassifier, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

SimpleMarkovClassifier(2)

prob(2)

train(2)

Пример #1

Показать файл

Файл: word_generator.py Проект: lijielife/mdp-docs

    def __init__(self, dictionary, correlation, verbose=False):
        self._correlation = correlation
        self._dictionary = dictionary
        self._verbose = verbose

        if self._verbose:
            print self.__doc__

        self.mc = SimpleMarkovClassifier(dtype="unicode")

        self.trainSimpleMarkovClassifier()
        if self._verbose:
            self.print_transition_probabilities()

Пример #2

Показать файл

Файл: word_generator.py Проект: akatumba/mdp-docs

    def __init__(self, dictionary, correlation, verbose=False):
        self._correlation = correlation
        self._dictionary = dictionary
        self._verbose = verbose

        if self._verbose:
            print self.__doc__

        self.mc = SimpleMarkovClassifier(dtype="unicode")

        self.trainSimpleMarkovClassifier()
        if self._verbose:
            self.print_transition_probabilities()

Пример #3

Показать файл

def testSimpleMarkovClassifier():
    mc = SimpleMarkovClassifier(dtype="c")
    text = "after the letter e follows either space or the letters r t or i"

    for word in text.split():
        word = word.lower()

        features = list(zip(" " + word))
        labels = list(word + " ")

        mc.train(mdp.numx.array(features), labels)

    assert mc.input_dim == 1

    num_transitions = 0
    features = mc.features
    for feature, count in list(features.items()):
        if count:
            prob = mc.prob(mdp.numx.array([feature]))
            prob_sum = 0
            for p in prob:
                for k, v in list(p.items()):
                    prob_sum += v
                    if v:
                        num_transitions += 1

            assert abs(prob_sum - 1.0) < 1e-5

    # calculate the number of transitions (the negative set deletes the artefact of two spaces)
    trans = len(set((list(zip("  ".join(text.split()) + " ", \
                         " " + "  ".join(text.split()))))) - set([(' ', ' ')]))
    assert num_transitions == trans

    letters_following_e = [' ', 'r', 't', 'i']
    letters_prob = mc.prob(mdp.numx.array([['e']]))[0]
    prob_sum = 0
    for letter, prob in list(letters_prob.items()):
        prob_sum += prob
        if prob > 1e-5:
            assert letter in letters_following_e

    assert abs(prob_sum - 1.0) < 1e-5

Пример #4

Показать файл

Файл: word_generator.py Проект: lijielife/mdp-docs

class DictionaryDemo(object):
    """This demo generates words from a selected dictionary by calculating
    the transition probabilities from two consecutive letters to the next.
    """
    def __init__(self, dictionary, correlation, verbose=False):
        self._correlation = correlation
        self._dictionary = dictionary
        self._verbose = verbose

        if self._verbose:
            print self.__doc__

        self.mc = SimpleMarkovClassifier(dtype="unicode")

        self.trainSimpleMarkovClassifier()
        if self._verbose:
            self.print_transition_probabilities()

    def trainSimpleMarkovClassifier(self):
        regex = re.compile('[%s]' % re.escape(string.punctuation))
        dictfile = codecs.open(self._dictionary, "r", "latin-1")

        def file_len(fname):
            f = open(fname)
            for i, l in enumerate(f):
                pass
            f.close()
            return i + 1

        if self._verbose:
            print "Start learning from ‘%s’." % self._dictionary
        for num, word in mdp.utils.progressinfo(enumerate(dictfile),
                                                file_len(self._dictionary)):
            # transform input to our needs
            #if num == 100: break

            # remove punctuation
            word = regex.sub(' ', word).lower().strip().split()
            try:
                word = word[0]
            except IndexError:
                continue

            shifted_words = [
                " " * i + word for i in range(self._correlation, 0, -1)
            ]
            words = zip(*shifted_words)
            labels = list(word + " ")
            self.mc.train(mdp.numx.array(words), labels)

        dictfile.close()

    def print_transition_probabilities(self):
        print "Transition probabilities:"
        features = self.mc.features
        for feature, count in features.items():
            if count:
                prob = self.mc.prob(mdp.numx.array([feature]))
                for p in prob:
                    for k, v in p.items():
                        if v:
                            print "".join(feature).replace(" ", "_"), \
                                  "->", k.replace(" ", "_"), \
                                  "(", ("%7.3f %%" % (v * 100)), ")"

    def get_words(self, num_words):
        for _ in range(num_words):
            features = [" "] * (self._correlation)
            for __ in range(50):  # have a maximum length
                f = mdp.numx.array([features[-self._correlation:]])
                new_f = weighted_choice(self.mc.prob(f)[0], True)
                if new_f is None:
                    break
                features.append(new_f)
            print "".join(features)

Пример #5

Показать файл

Файл: word_generator.py Проект: akatumba/mdp-docs

class DictionaryDemo(object):
    """This demo generates words from a selected dictionary by calculating
    the transition probabilities from two consecutive letters to the next.
    """
    def __init__(self, dictionary, correlation, verbose=False):
        self._correlation = correlation
        self._dictionary = dictionary
        self._verbose = verbose

        if self._verbose:
            print self.__doc__

        self.mc = SimpleMarkovClassifier(dtype="unicode")

        self.trainSimpleMarkovClassifier()
        if self._verbose:
            self.print_transition_probabilities()

    def trainSimpleMarkovClassifier(self):
        regex = re.compile('[%s]' % re.escape(string.punctuation))
        dictfile = codecs.open(self._dictionary, "r", "latin-1")

        def file_len(fname):
            f = open(fname)
            for i, l in enumerate(f):
                pass
            f.close()
            return i + 1

        if self._verbose:
            print "Start learning from ‘%s’." % self._dictionary
        for num, word in mdp.utils.progressinfo(enumerate(dictfile),
                                                file_len(self._dictionary)):
            # transform input to our needs
            #if num == 100: break

            # remove punctuation
            word = regex.sub(' ', word).lower().strip().split()
            try:
                word = word[0]
            except IndexError:
                continue

            shifted_words = [" " * i + word for i in range(self._correlation, 0, -1)]
            words = zip(*shifted_words)
            labels = list(word + " ")
            self.mc.train(mdp.numx.array(words), labels)

        dictfile.close()

    def print_transition_probabilities(self):
        print "Transition probabilities:"
        features = self.mc.features
        for feature, count in features.items():
            if count:
                prob = self.mc.prob(mdp.numx.array([feature]))
                for p in prob:
                    for k, v in p.items():
                        if v:
                            print "".join(feature).replace(" ", "_"), \
                                  "->", k.replace(" ", "_"), \
                                  "(", ("%7.3f %%" % (v * 100)), ")"

    def get_words(self, num_words):
        for _ in range(num_words):
            features = [" "] * (self._correlation)
            for __ in range(50): # have a maximum length
                f = mdp.numx.array([features[-self._correlation:]])
                new_f = weighted_choice(self.mc.prob(f)[0], True)
                if new_f is None:
                    break
                features.append(new_f)
            print "".join(features)