def create_non_spam_msgs(self, *argv): s_list = format_string_content(open_txt_file_from_terminal(*argv)) self._non_spam_msgs = list_to_dictionary_word_frequency_in_msg(s_list) self._nr_of_non_spam_msgs_total +=1
def update_non_spam_msgs(self, *argv): s_list = format_string_content(open_txt_file_from_terminal(*argv)) self._non_spam_msgs = update_the_frequency_dictionary_by_msg(self._non_spam_msgs, s_list) self._nr_of_non_spam_msgs_total +=1
spam_filter_obj.update_non_spam_msgs('non_spam_msgs/nsm'+str(i)) #print spam_filter_obj._non_spam_msgs #print spam_filter_obj._nr_of_non_spam_msgs_total spam_filter_obj.create_spam_msgs('spam_msgs/sm0') for i in range(1, 23): spam_filter_obj.update_spam_msgs('spam_msgs/sm'+str(i)) for i in range(spam_filter_obj._nr_of_spam_msgs_total): print "\nGENERATED THE INTERESTED LIST FROM SPAM DATABASE WITH K = " + str(i) list_from_spm = spam_filter_obj.get_less_frequent_words_in_spam(i) reduced_lst_spam = list(spam_filter_obj.get_intersection_of_spam_nonspam(list_from_spm)) #print "The reduced_list", reduced_lst_spam list_from_new_msg = format_string_content(open_txt_file_from_terminal('spam_msgs/sm23')) #print "Product Probability in spam(using reduced_lst)", spam_filter_obj.get_product_of_prob_spam(reduced_lst_spam) #print "Product Probability in nonspam(using reduced_lst)", spam_filter_obj.get_product_of_prob_non_spam(reduced_lst_spam) print "Bayes applied(using reduced_lst)", spam_filter_obj.apply_bayes_thm_list_of_words(reduced_lst_spam) print "the list_from_new_msg: ", list_from_new_msg final_lst = list(set(list_from_new_msg).intersection(reduced_lst_spam)) print "final list", final_lst #print spam_filter_obj.get_product_of_prob_spam(final_lst) #print spam_filter_obj.get_product_of_prob_non_spam(final_lst) print spam_filter_obj.apply_bayes_thm_list_of_words(final_lst), "\n"