Exemplo n.º 1
0
def RandomFilter(path):
    pred_dict = {}
    for file_name in os.listdir(path):
        if not file_name.startswith("!"):
            pred_dict[file_name] = random.choice(["OK", "SPAM"])
    bf = BaseFilter(path, pred_dict)
    bf.generate_prediction_file()
Exemplo n.º 2
0
def NaiveFilter(path):
    pred_dict = {}
    for file_name in os.listdir(path):
        if not file_name.startswith("!"):
            pred_dict[file_name] = "OK"
    bf = BaseFilter(path, pred_dict)
    bf.generate_prediction_file()
Exemplo n.º 3
0
        def test(self, path_to_test_dir):
                predictions = {} #Predictions dict {fname:prediction}
                bs = Bayesian.Bayesian()
                corpus = Corpus(path_to_test_dir)
                #Read dict's (if test called before train)
                black_list_dict = methods.read_dict_from_file(self.path_bl)
                white_list_dict = methods.read_dict_from_file(self.path_wl)
                spam_subject_dict = methods.read_dict_from_file(self.path_ssl)
                ham_subject_dict = methods.read_dict_from_file(self.path_hsl)
                
                for fname, body in corpus.emails_as_string():
                        #Open email with parser
                        email_as_file = open(methods.add_slash(path_to_test_dir) + fname,'r',encoding = 'utf-8')
                        msg = email.message_from_file(email_as_file)

                        #Check if sender in a black list
                        if (self.extract_email_adress_from_text(msg['From']) in black_list_dict):
                                predictions[fname] = 'SPAM'
                        elif(self.extract_email_adress_from_text(msg['From']) in white_list_dict):
                        #Check if sender in a white list
                                predictions[fname] = 'OK'
                        #Check if subject in a black list
                        elif(self.extract_email_adress_from_text(msg['From']) in spam_subject_dict):
                             prediction[fname] = 'SPAM'
                        #Check if subject in a white list
                        elif(self.extract_email_adress_from_text(msg['From']) in ham_subject_dict):
                                prediction[fname] = 'OK'
                        #Run Bayesian checker
                        else:                
                                if (bs.bayesian_prediction(methods.get_text(msg))) > 0.485:
                                        predictions[fname] = 'SPAM'
                                else:
                                        predictions[fname] = 'OK'

                #Generate prediction file
                bf = BaseFilter(path_to_test_dir,predictions)
                bf.generate_prediction_file()
Exemplo n.º 4
0
 def __init__(self, src, expr):
     BaseFilter.__init__(self,src)
     self.expr = re.compile(expr)
Exemplo n.º 5
0
 def __init__(self):
     BaseFilter.__init__(self)
     # This is only used if filter not trained
     self.bayes_val = 0
Exemplo n.º 6
0
 def __init__(self):
     BaseFilter.__init__(self)
     self.whitelist = set()
Exemplo n.º 7
0
 def __init__(self):
     BaseFilter.__init__(self)
     self.bayes_val = 0
Exemplo n.º 8
0
 def __init__(self):
     BaseFilter.__init__(self)
     self.blacklist = set()
Exemplo n.º 9
0
 def __init__(self, src, expr):
     BaseFilter.__init__(self,src)
     self.expr = expr