def test_extract_events_odds(self): instances = {'spam': ["buy viagra", "buy cialis"] * 100 + ["meeting love"], 'genuine': ["meeting tomorrow", "buy milk"] * 100} odds = Bayes.extract_events_odds(instances) b = Bayes({'spam': 0.9, 'genuine': 0.1}) b.update_from_events('buy coffee for meeting'.split(), odds) self.assertEqual(b.most_likely(0.8), 'genuine')
def test_extract_events_odds(self): instances = { 'spam': ["buy viagra", "buy cialis"] * 100 + ["meeting love"], 'genuine': ["meeting tomorrow", "buy milk"] * 100 } odds = Bayes.extract_events_odds(instances) b = Bayes({'spam': 0.9, 'genuine': 0.1}) b.update_from_events('buy coffee for meeting'.split(), odds) self.assertEqual(b.most_likely(0.8), 'genuine')
print('') print(' -- Spam Filter With Email Corpus -- ') # Email corpus. A hundred spam emails to buy products and with the word # "meeting" thrown around. Genuine emails are about meetings and buying # milk. instances = { 'spam': ["buy viagra", "buy cialis"] * 100 + ["meeting love"], 'genuine': ["meeting tomorrow", "buy milk"] * 100 } # Use str.split to extract features/events/words from the corpus and build # the model. model = Bayes.extract_events_odds(instances, str.split) # Create a new Bayes instance with 10%/90% priors on emails being genuine. b = Bayes({'spam': .9, 'genuine': .1}) # Update beliefs with features/events/words from an email. b.update_from_events("buy coffee for meeting".split(), model) # Print the email and if it's likely spam or not. print("'buy coffee for meeting'", ':', b) print('') print(' -- Are You Cheating? -- ') results = ['heads', 'heads', 'tails', 'heads', 'heads'] events_odds = { 'heads': { 'honest': .5, 'cheating': .9
# Print the email and if it's likely spam or not. print(email[:15] + '...', b.most_likely()) print('') print(' -- Spam Filter With Email Corpus -- ') # Email corpus. A hundred spam emails to buy products and with the word # "meeting" thrown around. Genuine emails are about meetings and buying # milk. instances = {'spam': ["buy viagra", "buy cialis"] * 100 + ["meeting love"], 'genuine': ["meeting tomorrow", "buy milk"] * 100} # Use str.split to extract features/events/words from the corpus and build # the model. model = Bayes.extract_events_odds(instances, str.split) # Create a new Bayes instance with 10%/90% priors on emails being genuine. b = Bayes({'spam': .9, 'genuine': .1}) # Update beliefs with features/events/words from an email. b.update_from_events("buy coffee for meeting".split(), model) # Print the email and if it's likely spam or not. print("'buy coffee for meeting'", ':', b) print('') print(' -- Are You Cheating? -- ') results = ['heads', 'heads', 'tails', 'heads', 'heads'] events_odds = {'heads': {'honest': .5, 'cheating': .9}, 'tails': {'honest': .5, 'cheating': .1}} b = Bayes({'cheating': .5, 'honest': .5}) b.update_from_events(results, events_odds)