示例#1
0
 def test_import_to_dbm(self):
     # Create a CSV file to import.
     temp = open(TEMP_CSV_NAME, "wb")
     temp.write("3,4\n")
     csv_data = {
         "this": (2, 1),
         "is": (0, 1),
         "a": (3, 4),
         'test': (1, 1),
         "of": (1, 0),
         "the": (1, 2),
         "import": (3, 1)
     }
     for word, (ham, spam) in csv_data.items():
         temp.write("%s,%s,%s\n" % (word, ham, spam))
     temp.close()
     sb_dbexpimp.runImport(TEMP_DBM_NAME, "dbm", True, TEMP_CSV_NAME)
     # Open the converted file and verify that it has all the data from
     # the CSV file (and by opening it, that it is a valid dbm file).
     bayes = open_storage(TEMP_DBM_NAME, "dbm")
     self.assertEqual(bayes.nham, 3)
     self.assertEqual(bayes.nspam, 4)
     for word, (ham, spam) in csv_data.items():
         word = sb_dbexpimp.uquote(word)
         self.assert_(word in bayes._wordinfokeys())
         wi = bayes._wordinfoget(word)
         self.assertEqual(wi.hamcount, ham)
         self.assertEqual(wi.spamcount, spam)
示例#2
0
 def test_merge_to_pickle(self):
     bayes = PickledClassifier(TEMP_PICKLE_NAME)
     bayes.learn(tokenize(spam1), True)
     bayes.learn(tokenize(good1), False)
     bayes.store()
     nham, nspam = 3,4
     temp = open(TEMP_CSV_NAME, "wb")
     temp.write("%d,%d\n" % (nham, nspam))
     csv_data = {"this":(2,1), "is":(0,1), "a":(3,4), 'test':(1,1),
                 "of":(1,0), "the":(1,2), "import":(3,1)}
     for word, (ham, spam) in csv_data.items():
         temp.write("%s,%s,%s\n" % (word, ham, spam))
     temp.close()
     sb_dbexpimp.runImport(TEMP_PICKLE_NAME, "pickle", False,
                           TEMP_CSV_NAME)
     bayes2 = open_storage(TEMP_PICKLE_NAME, "pickle")
     self.assertEqual(bayes2.nham, nham + bayes.nham)
     self.assertEqual(bayes2.nspam, nspam + bayes.nspam)
     words = bayes._wordinfokeys()
     words.extend(csv_data.keys())
     for word in words:
         word = sb_dbexpimp.uquote(word)
         self.assert_(word in bayes2._wordinfokeys())
         h, s = csv_data.get(word, (0,0))
         wi = bayes._wordinfoget(word)
         if wi:
             h += wi.hamcount
             s += wi.spamcount
         wi2 = bayes2._wordinfoget(word)
         self.assertEqual(h, wi2.hamcount)
         self.assertEqual(s, wi2.spamcount)
示例#3
0
 def test_merge_to_dbm(self):
     # Create a dbm classifier to merge with.
     bayes = DBDictClassifier(TEMP_DBM_NAME)
     # Stuff some messages in it so it's not empty.
     bayes.learn(tokenize(spam1), True)
     bayes.learn(tokenize(good1), False)
     # Save data to check against.
     original_nham = bayes.nham
     original_nspam = bayes.nspam
     original_data = {}
     for key in bayes._wordinfokeys():
         original_data[key] = bayes._wordinfoget(key)
     # Save & Close.
     bayes.store()
     bayes.close()
     # Create a CSV file to import.
     nham, nspam = 3, 4
     temp = open(TEMP_CSV_NAME, "wb")
     temp.write("%d,%d\n" % (nham, nspam))
     csv_data = {
         "this": (2, 1),
         "is": (0, 1),
         "a": (3, 4),
         'test': (1, 1),
         "of": (1, 0),
         "the": (1, 2),
         "import": (3, 1)
     }
     for word, (ham, spam) in csv_data.items():
         temp.write("%s,%s,%s\n" % (word, ham, spam))
     temp.close()
     sb_dbexpimp.runImport(TEMP_DBM_NAME, "dbm", False, TEMP_CSV_NAME)
     # Open the converted file and verify that it has all the data from
     # the CSV file (and by opening it, that it is a valid dbm file),
     # and the data from the original dbm database.
     bayes2 = open_storage(TEMP_DBM_NAME, "dbm")
     self.assertEqual(bayes2.nham, nham + original_nham)
     self.assertEqual(bayes2.nspam, nspam + original_nspam)
     words = original_data.keys()[:]
     words.extend(csv_data.keys())
     for word in words:
         word = sb_dbexpimp.uquote(word)
         self.assert_(word in bayes2._wordinfokeys())
         h, s = csv_data.get(word, (0, 0))
         wi = original_data.get(word, None)
         if wi:
             h += wi.hamcount
             s += wi.spamcount
         wi2 = bayes2._wordinfoget(word)
         self.assertEqual(h, wi2.hamcount)
         self.assertEqual(s, wi2.spamcount)
示例#4
0
 def test_import_to_dbm(self):
     temp = open(TEMP_CSV_NAME, "wb")
     temp.write("3,4\n")
     csv_data = {"this":(2,1), "is":(0,1), "a":(3,4), 'test':(1,1),
                 "of":(1,0), "the":(1,2), "import":(3,1)}
     for word, (ham, spam) in csv_data.items():
         temp.write("%s,%s,%s\n" % (word, ham, spam))
     temp.close()
     sb_dbexpimp.runImport(TEMP_DBM_NAME, "dbm", True, TEMP_CSV_NAME)
     bayes = open_storage(TEMP_DBM_NAME, "dbm")
     self.assertEqual(bayes.nham, 3)
     self.assertEqual(bayes.nspam, 4)
     for word, (ham, spam) in csv_data.items():
         word = sb_dbexpimp.uquote(word)
         self.assert_(word in bayes._wordinfokeys())
         wi = bayes._wordinfoget(word)
         self.assertEqual(wi.hamcount, ham)
         self.assertEqual(wi.spamcount, spam)
 def test_merge_to_dbm(self):
     # Create a dbm classifier to merge with.
     bayes = DBDictClassifier(TEMP_DBM_NAME)
     # Stuff some messages in it so it's not empty.
     bayes.learn(tokenize(spam1), True)
     bayes.learn(tokenize(good1), False)
     # Save data to check against.
     original_nham = bayes.nham
     original_nspam = bayes.nspam
     original_data = {}
     for key in bayes._wordinfokeys():
         original_data[key] = bayes._wordinfoget(key)
     # Save & Close.
     bayes.store()
     bayes.close()
     # Create a CSV file to import.
     nham, nspam = 3,4
     temp = open(TEMP_CSV_NAME, "wb")
     temp.write("%d,%d\n" % (nham, nspam))
     csv_data = {"this":(2,1), "is":(0,1), "a":(3,4), 'test':(1,1),
                 "of":(1,0), "the":(1,2), "import":(3,1)}
     for word, (ham, spam) in csv_data.items():
         temp.write("%s,%s,%s\n" % (word, ham, spam))
     temp.close()
     sb_dbexpimp.runImport(TEMP_DBM_NAME, "dbm", False, TEMP_CSV_NAME)
     # Open the converted file and verify that it has all the data from
     # the CSV file (and by opening it, that it is a valid dbm file),
     # and the data from the original dbm database.
     bayes2 = open_storage(TEMP_DBM_NAME, "dbm")
     self.assertEqual(bayes2.nham, nham + original_nham)
     self.assertEqual(bayes2.nspam, nspam + original_nspam)
     words = original_data.keys()[:]
     words.extend(csv_data.keys())
     for word in words:
         word = sb_dbexpimp.uquote(word)
         self.assert_(word in bayes2._wordinfokeys())
         h, s = csv_data.get(word, (0,0))
         wi = original_data.get(word, None)
         if wi:
             h += wi.hamcount
             s += wi.spamcount
         wi2 = bayes2._wordinfoget(word)
         self.assertEqual(h, wi2.hamcount)
         self.assertEqual(s, wi2.spamcount)
 def test_import_to_dbm(self):
     # Create a CSV file to import.
     temp = open(TEMP_CSV_NAME, "wb")
     temp.write("3,4\n")
     csv_data = {"this":(2,1), "is":(0,1), "a":(3,4), 'test':(1,1),
                 "of":(1,0), "the":(1,2), "import":(3,1)}
     for word, (ham, spam) in csv_data.items():
         temp.write("%s,%s,%s\n" % (word, ham, spam))
     temp.close()
     sb_dbexpimp.runImport(TEMP_DBM_NAME, "dbm", True, TEMP_CSV_NAME)
     # Open the converted file and verify that it has all the data from
     # the CSV file (and by opening it, that it is a valid dbm file).
     bayes = open_storage(TEMP_DBM_NAME, "dbm")
     self.assertEqual(bayes.nham, 3)
     self.assertEqual(bayes.nspam, 4)
     for word, (ham, spam) in csv_data.items():
         word = sb_dbexpimp.uquote(word)
         self.assert_(word in bayes._wordinfokeys())
         wi = bayes._wordinfoget(word)
         self.assertEqual(wi.hamcount, ham)
         self.assertEqual(wi.spamcount, spam)