def drive(nsets,decision): print options.display() spamdirs = [get_pathname_option("TestDriver", "spam_directories") % \ i for i in range(1, nsets+1)] hamdirs = [get_pathname_option("TestDriver", "ham_directories") % \ i for i in range(1, nsets+1)] spamfns = [(x,y,1) for x in spamdirs for y in os.listdir(x)] hamfns = [(x,y,0) for x in hamdirs for y in os.listdir(x)] nham = len(hamfns) nspam = len(spamfns) cc = CostCounter.nodelay() allfns = {} for fn in spamfns+hamfns: allfns[fn] = None d = hammie.open('weaktest.db', False) hamtrain = 0 spamtrain = 0 n = 0 for dir,name, is_spam in allfns.iterkeys(): n += 1 m=msgs.Msg(dir, name).guts if debug > 1: print "trained:%dH+%dS"%(hamtrain,spamtrain) scr=d.score(m) if debug > 1: print "score:%.3f"%scr if not decision.tooearly(): if is_spam: if debug > 0: print "Spam with score %.2f"%scr cc.spam(scr) else: if debug > 0: print "Ham with score %.2f"%scr cc.ham(scr) de = decision(scr,is_spam) if de == TRAIN_AS_SPAM: d.train_spam(m) spamtrain += 1 elif de == TRAIN_AS_HAM: d.train_ham(m) hamtrain += 1 if n % 100 == 0: print "%5d trained:%dH+%dS wrds:%d"%( n, hamtrain, spamtrain, len(d.bayes.wordinfo)) print cc print "="*70 print "%5d trained:%dH+%dS wrds:%d"%( n, hamtrain, spamtrain, len(d.bayes.wordinfo)) print cc
def __init__(self): self.falsepos = set() self.falseneg = set() self.unsure = set() self.global_ham_hist = Hist() self.global_spam_hist = Hist() self.ntimes_finishtest_called = 0 self.new_classifier() from spambayes import CostCounter self.cc = CostCounter.default()
def drive(nsets, decision): print options.display() spamdirs = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, nsets + 1)] hamdirs = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, nsets + 1)] spamfns = [(x, y, 1) for x in spamdirs for y in os.listdir(x)] hamfns = [(x, y, 0) for x in hamdirs for y in os.listdir(x)] nham = len(hamfns) nspam = len(spamfns) cc = CostCounter.nodelay() allfns = {} for fn in spamfns + hamfns: allfns[fn] = None d = hammie.open("weaktest.db", False) hamtrain = 0 spamtrain = 0 n = 0 for dir, name, is_spam in allfns.iterkeys(): n += 1 m = msgs.Msg(dir, name).guts if debug > 1: print "trained:%dH+%dS" % (hamtrain, spamtrain) scr = d.score(m) if debug > 1: print "score:%.3f" % scr if not decision.tooearly(): if is_spam: if debug > 0: print "Spam with score %.2f" % scr cc.spam(scr) else: if debug > 0: print "Ham with score %.2f" % scr cc.ham(scr) de = decision(scr, is_spam) if de == TRAIN_AS_SPAM: d.train_spam(m) spamtrain += 1 elif de == TRAIN_AS_HAM: d.train_ham(m) hamtrain += 1 if n % 100 == 0: print "%5d trained:%dH+%dS wrds:%d" % (n, hamtrain, spamtrain, len(d.bayes.wordinfo)) print cc print "=" * 70 print "%5d trained:%dH+%dS wrds:%d" % (n, hamtrain, spamtrain, len(d.bayes.wordinfo)) print cc