示例#1
0
def modelOutput(trainFile, testFile, modelType):
    """
    output is:
        (naive bayes) variable name | 'class'
        (tan) variable name | name of its parents
    # empty
    followed by:
        predict class | actual class | posterior probability (12 digits after decimal point)
    # empty
    followed by:
        The number of the test-set examples that were correctly classified.
    """

    attributes, labels, instances = data_provider(trainFile)
    if modelType == 'n':
        model = Bayes(attributes, labels, instances)
    elif modelType == 't':
        model = TAN(attributes, labels, instances)
    else:
        import sys
        print >> sys.stderr, 'model type should be [n] or [t] !!!'
        sys.exit()
    attributes, labels, instances = data_provider(testFile)

    # format output part1: attribute name | 'class'
    model.printTree()
    print

    correctClassCnt = 0
    for test in instances:
        result = model.classify(test)
        if result[0] == result[1]:
            correctClassCnt += 1
        # format output part2: predict class | actual class | posterior probability
        print formatOutput(result)
    print

    # format output part3: correctly classified number of test instances
    print correctClassCnt
示例#2
0
class Sentiment:
    def __init__(self):
        self.classifier = Bayes()
        self.seg = Seg()
        self.seg.load('seg.pickle')

    def save(self, fname):
        self.classifier.save(fname)

    def load(self, fname):
        self.classifier = self.classifier.load(fname)

    def handle(self, doc):
        words = self.seg.seg(doc)
        words = self.filter_stop(words)

        return words

    def train(self, neg_docs, pos_docs):
        datas = []
        for doc in neg_docs:
            datas.append([self.handle(doc), 'neg'])
        for doc in pos_docs:
            datas.append([self.handle(doc), 'pos'])

        self.classifier.train(datas)

    def classify(self, doc):
        ret, prob = self.classifier.classify(self.handle(doc))
        if ret == 'pos':
            return prob
        else:
            return 1 - prob

    @staticmethod
    def filter_stop(words):
        return list(filter(lambda x: x not in stop_words, words))
from bayes import Bayes

# First you need to create an instance of this
# algorithm and defined what field/column
# you want to classify
instance = Bayes("Sex")

# Secondly, you will need to learn about a set
# of data to train the algorithm
instance.learn("static/data_test.csv")

# Finally you can use your trained instance to
# classify a set of data (In this example we
# will find the most probable sex)
print(instance.classify([6, 130, 8]))