Пример #1
0
def compute_quality_for_corpus(corpus_dir):
        truth_dic = methods.read_classification_from_file(methods.add_slash(corpus_dir)+"!truth.txt")
        pred_dic = methods.read_classification_from_file(methods.add_slash(corpus_dir)+"!prediction.txt")
        bc1 = BinaryConfusionMatrix('SPAM', 'OK')
        bc1.compute_from_dicts(truth_dic, pred_dic)
        dict_score = bc1.as_dict()
        fn=dict_score['fn']
        tn=dict_score['tn']
        fp=dict_score['fp']
        tp=dict_score['tp']
        return quality_score(tp, tn, fp, fn), tp, tn, fp, fn
Пример #2
0
        def train(self,path_to_truth_dir):
                corpus = Corpus(path_to_truth_dir)
                #Read truth file
                truth = methods.read_classification_from_file(methods.add_slash(path_to_truth_dir)+"!truth.txt")
                #Make truth global
                self.truth = truth
                for fname, body in corpus.emails_as_string():
                        email_as_file = open(methods.add_slash(path_to_truth_dir) + fname,'r',encoding = 'utf-8')
                        #Read email with EMAIL parser
                        msg = email.message_from_file(email_as_file)
                        self.extract_senders_list(msg,fname)
                        self.check_subject(msg,fname)

                #Generate dict's
                methods.generate_file_from_dict(self.path_bl , self.black_list)
                methods.generate_file_from_dict(self.path_wl ,self.white_list)
                methods.generate_file_from_dict(self.path_ssl , self.spam_subject_list)
                methods.generate_file_from_dict(self.path_hsl ,self.ham_subject_list)
Пример #3
0
 def is_spam(self,email_name):
         my_dict = methods.read_classification_from_file(self.path,'!truth.txt')
         if (my_dict[email_name] == 'SPAM'):
                 return True
         return False