Пример #1
0
    def test_extract_events_odds(self):
        instances = {'spam': ["buy viagra", "buy cialis"] * 100 + ["meeting love"],
                     'genuine': ["meeting tomorrow", "buy milk"] * 100}
        odds = Bayes.extract_events_odds(instances)

        b = Bayes({'spam': 0.9, 'genuine': 0.1})
        b.update_from_events('buy coffee for meeting'.split(), odds)
        self.assertEqual(b.most_likely(0.8), 'genuine')
Пример #2
0
    def test_extract_events_odds(self):
        instances = {
            'spam': ["buy viagra", "buy cialis"] * 100 + ["meeting love"],
            'genuine': ["meeting tomorrow", "buy milk"] * 100
        }
        odds = Bayes.extract_events_odds(instances)

        b = Bayes({'spam': 0.9, 'genuine': 0.1})
        b.update_from_events('buy coffee for meeting'.split(), odds)
        self.assertEqual(b.most_likely(0.8), 'genuine')
Пример #3
0
# emails.
words_odds = {'buy': (5, 100), 'viagra': (1, 1000), 'meeting': (15, 2)}
# Emails to be analyzed.
emails = [
    "let's schedule a meeting for tomorrow",  # 100% genuine (meeting)
    "buy some viagra",  # 100% spam (buy, viagra)
    "buy coffee for the meeting",  # buy x meeting, should be genuine
]

for email in emails:
    # Start with priors of 90% chance being genuine, 10% spam.
    # Probabilities are normalized automatically.
    b = Bayes([('genuine', 90), ('spam', 10)])
    # Update probabilities, using the words in the emails as events and the
    # database of chances to figure out the change.
    b.update_from_events(email.split(), words_odds)
    # Print the email and if it's likely spam or not.
    print(email[:15] + '...', b.most_likely())

print('')

print(' -- Spam Filter With Email Corpus -- ')

# Email corpus. A hundred spam emails to buy products and with the word
# "meeting" thrown around. Genuine emails are about meetings and buying
# milk.
instances = {
    'spam': ["buy viagra", "buy cialis"] * 100 + ["meeting love"],
    'genuine': ["meeting tomorrow", "buy milk"] * 100
}
Пример #4
0
 def test_update_from_events(self):
     b = Bayes([1, 1])
     b.update_from_events(['a', 'a', 'a'], {'a': (0.5, 2)})
     self.assertEqual(b, [0.5 ** 3, 2 ** 3])
Пример #5
0
 def test_update_from_events(self):
     b = Bayes([1, 1])
     b.update_from_events(['a', 'a', 'a'], {'a': (0.5, 2)})
     self.assertEqual(b, [0.5**3, 2**3])
Пример #6
0
# emails.
words_odds = {'buy': (5, 100), 'viagra': (1, 1000), 'meeting': (15, 2)}
# Emails to be analyzed.
emails = [
          "let's schedule a meeting for tomorrow", # 100% genuine (meeting)
          "buy some viagra", # 100% spam (buy, viagra)
          "buy coffee for the meeting", # buy x meeting, should be genuine
         ]

for email in emails:
    # Start with priors of 90% chance being genuine, 10% spam.
    # Probabilities are normalized automatically.
    b = Bayes([('genuine', 90), ('spam', 10)])
    # Update probabilities, using the words in the emails as events and the
    # database of chances to figure out the change.
    b.update_from_events(email.split(), words_odds)
    # Print the email and if it's likely spam or not.
    print(email[:15] + '...', b.most_likely())

print('')

print(' -- Spam Filter With Email Corpus -- ')

# Email corpus. A hundred spam emails to buy products and with the word
# "meeting" thrown around. Genuine emails are about meetings and buying
# milk.
instances = {'spam': ["buy viagra", "buy cialis"] * 100 + ["meeting love"],
             'genuine': ["meeting tomorrow", "buy milk"] * 100}

# Use str.split to extract features/events/words from the corpus and build
# the model.