Exemplo n.º 1
0
    def train(self):
        for category, file in self.to_train:
            email = EmailObject(io.open(file, 'rb'))

            self.categories.add(category)

            for token in Tokenizer.unique_tokenizer(email.body()):
                self.training[category][token] += 1
                self.totals['_all'] += 1
                self.totals[category] += 1

        self.to_train = {}
Exemplo n.º 2
0
    def score(self, email):
        self.train()

        cat_totals = self.totals

        aggregates = {
            cat: cat_totals[cat] / cat_totals['_all']
            for cat in self.categories
        }
        for token in Tokenizer.unique_tokenizer(email.body()):
            for cat in self.categories:
                value = self.training[cat][token]
                r = (value + 1) / (cat_totals[cat] + 1)
                aggregates[cat] *= r
        return aggregates
    def score(self, email):
        """
    Calculates score
    :param email: EmailObject
    :return: float number
    """
        self.train()

        cat_totals = self.totals

        aggregates = {
            cat: cat_totals[cat] / cat_totals['_all']
            for cat in self.categories
        }

        for token in Tokenizer.unique_tokenizer(email.body()):
            for cat in self.categories:
                value = self.training[cat][token]
                r = (value + 1) / (cat_totals[cat] + 1)
                aggregates[cat] *= r

        return aggregates