Python Boolean_Model.observe_example示例

编程语言: Python

命名空间/包名称: bayes_model

类/类型: Boolean_Model

方法/功能: observe_example

hotexamples.com的示例: 2

Python Boolean_Model.observe_example - 已找到2个示例。这些是从开源项目中提取的最受好评的bayes_model.Boolean_Model.observe_example现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

build_network(2)

observe_example(2)

set_features(2)

示例#1

显示文件

文件： NBmodel.py 项目： igumus/spamfilter

class Feature_Chooser:
    def __init__(self):
        self.features = []
        self.model = Boolean_Model()
        self.threshold = .03

    def choose(self, spam_dir, ham_dir):
        i = 0
        for f in get_files(spam_dir):
            i += 1
            for word in munge_All_Words(f):
                if word not in self.features:
                    self.features.append(word)
        j = 0
        for f in get_files(ham_dir):
            j += 1
            for word in munge_All_Words(f):
                if word not in self.features:
                    self.features.append(word)
        print len(self.features)
        self.model.set_features(self.features)
        print "finished choosing features"

    def train(self, spam_dir, ham_dir):
        N = 0
        loss = 0.
        for f in get_files(spam_dir):
            N += 1
            if N % 23 == 0:
                print N
                self.model.observe_example(self.munge(f), 1)
        for f in get_files(ham_dir):
            N += 1
            print N
            if N % 23 == 0:
                print N
                self.model.observe_example(self.munge(f), 0)
        self.model.build_network()
        new_features = []
        for i, attribute in enumerate(self.model.attribute_params):
            print attribute, attribute[1] - attribute[0]
            if abs(attribute[1] - attribute[0]) > self.threshold:
                new_features.append(self.features[i])
        print new_features
        self.features = new_features
        print "finished training"

    def munge(self, email_file):
        f = open(email_file, 'rb')
        text = f.read()
        word_list = re.split('\W+', text)
        boolean_vector = [int(token in word_list) for token in self.features]
        return boolean_vector

    def pickle(self, features_file):
        output = open(features_file, 'wb')
        pickle.dump(self.features, output)
        print self.features
        print "pickled"

示例#2

显示文件

文件： NBmodel.py 项目： igumus/spamfilter

class NB_Boolean(NaiveBayesModel):

    def classify(self, example, cost_ratio):
        log_likelihood1 = math.log(self.model.base_param)
        log_likelihood2 = math.log(1 - self.model.base_param)
        for i, token in enumerate(self.model.attribute_params):
            if example[i] == 1:
                log_likelihood1 += math.log(self.model.attribute_params[i][0])
                log_likelihood2 += math.log(self.model.attribute_params[i][1])
            else:
                log_likelihood1 += math.log(1 -
                                            self.model.attribute_params[i][0])
                log_likelihood2 += math.log(1 -
                                            self.model.attribute_params[i][1])
        return int(log_likelihood1 - math.log(cost_ratio) > log_likelihood2)

    def train(self, spam_dir, ham_dir):
        self.model = Boolean_Model()
        self.model.set_features(self.features)
        N = 0
        loss = 0.
        for f in get_files(spam_dir):
            print N
            N += 1
            self.model.observe_example(self.munge(f), 1)
        for f in get_files(ham_dir):
            print N
            N += 1
            self.model.observe_example(self.munge(f), 0)
        self.model.build_network()
        print "finished training"

    def munge(self, email_file):
        f = open(email_file, 'rb')
        text = f.read()
        word_list = re.split('\W+', text)
        boolean_vector = [int(token in word_list) for token in self.features]
        return boolean_vector