def test_extract_events_odds(self): instances = {'spam': ["buy viagra", "buy cialis"] * 100 + ["meeting love"], 'genuine': ["meeting tomorrow", "buy milk"] * 100} odds = Bayes.extract_events_odds(instances) b = Bayes({'spam': 0.9, 'genuine': 0.1}) b.update_from_events('buy coffee for meeting'.split(), odds) self.assertEqual(b.most_likely(0.8), 'genuine')
def test_extract_events_odds(self): instances = { 'spam': ["buy viagra", "buy cialis"] * 100 + ["meeting love"], 'genuine': ["meeting tomorrow", "buy milk"] * 100 } odds = Bayes.extract_events_odds(instances) b = Bayes({'spam': 0.9, 'genuine': 0.1}) b.update_from_events('buy coffee for meeting'.split(), odds) self.assertEqual(b.most_likely(0.8), 'genuine')
# Classifies every file under "folder" as either a Python or Java file, # considering you have subdirectories with examples of each language. #print classify_folder("folder") print('') print(' == Low Level Functions == ') print(' -- Classic Cancer Test Problem --') # 1% chance of having cancer. b = Bayes([('not cancer', 0.99), ('cancer', 0.01)]) # Test positive, 9.6% false positives and 80% true positives b.update((9.6, 80)) print(b) print('Most likely:', b.most_likely()) print('') print(' -- Spam Filter With Existing Model --') # Database with number of sightings of each words in (genuine, spam) # emails. words_odds = {'buy': (5, 100), 'viagra': (1, 1000), 'meeting': (15, 2)} # Emails to be analyzed. emails = [ "let's schedule a meeting for tomorrow", # 100% genuine (meeting) "buy some viagra", # 100% spam (buy, viagra) "buy coffee for the meeting", # buy x meeting, should be genuine ] for email in emails:
def test_most_likely(self): b = Bayes({'a': 9, 'b': 1}) self.assertEqual(b.most_likely(), 'a') self.assertEqual(b.most_likely(0), 'a') self.assertEqual(b.most_likely(0.89), 'a') self.assertIsNone(b.most_likely(0.91))
def test_most_likely(self): b = Bayes({'a': 9, 'b': 1}) self.assertEqual(b.most_likely(), 'a') self.assertEqual(b.most_likely(0), 'a') self.assertEqual(b.most_likely(0.89), 'a') self.assertIsNone(b.most_likely(0.91))
# Classifies every file under "folder" as either a Python or Java file, # considering you have subdirectories with examples of each language. #print classify_folder("folder") print('') print(' == Low Level Functions == ') print(' -- Classic Cancer Test Problem --') # 1% chance of having cancer. b = Bayes([('not cancer', 0.99), ('cancer', 0.01)]) # Test positive, 9.6% false positives and 80% true positives b.update((9.6, 80)) print(b) print('Most likely:', b.most_likely()) print('') print(' -- Spam Filter With Existing Model --') # Database with number of sightings of each words in (genuine, spam) # emails. words_odds = {'buy': (5, 100), 'viagra': (1, 1000), 'meeting': (15, 2)} # Emails to be analyzed. emails = [ "let's schedule a meeting for tomorrow", # 100% genuine (meeting) "buy some viagra", # 100% spam (buy, viagra) "buy coffee for the meeting", # buy x meeting, should be genuine ] for email in emails: