class RENEGE: """Class pour realiser le filtrage du spam en utilisant vocabular.json file et CRUD et EmalAnalyze classes""" def __init__(self): self.email_file = "800-mails.json" self.crud = CRUD() self.e_mail = EmailAnalyzer() def calculate_user_trust(self, user_id): #extracting json data date_of_first_seen_message = self.crud.get_user_data(user_id, "Date_of_first_seen_message") date_of_last_seen_message = self.crud.get_user_data(user_id, "Date_of_last_seen_message") n_ham = self.crud.get_user_data(user_id, "HamN") n_spam = self.crud.get_user_data(user_id, "SpamN") groups = self.crud.get_user_data(user_id, "Groups") #calculate the sum of trust values of all groups sum_trust = 0 for group in groups : group_id = self.crud.get_group_id(group) sum_trust += self.crud.get_group_data(group_id, 'Trust') #now that we have all the needed vars, calculate trust1, trust2 and trust trust1 = (date_of_last_seen_message * n_ham) / (date_of_first_seen_message * (n_ham + n_spam)) trust2 = sum_trust / len(groups) trust = (trust1 + trust2) / 2 if trust2 < 50: trust = trust2 if trust1 > 100: trust = 100 #before returning a value, check if trust is between 0 and 100 if trust < 0: trust = 0 elif trust > 100: trust = 100 return trust def classify_emails(self): ''' fonction deja implemente Description: fonction pour commencer l'analyse des e-mails. Sortie: bool, 'True' pour succes, 'False' dans le cas de failure. ''' try: self.process_email(self.get_email()) return True except Exception as e: raise Exception return False def process_email(self, new_emails): ''' Description: fonction pour analyser chaque nouvel e-mail dans le dictionare. Elle gere l'ajout des nouveux utilisateurs et/ou modification de l'information existante sur les utilisateurs et groupes. Sortie: bool, 'True' pour succes, 'False' dans le cas de failure. ''' print(type(new_emails)) emails = new_emails["dataset"] for email in emails: email_adr = email['mail']['From'] date = email['mail']['Date'] spam = email['mail']['Spam'] spam = spam == 'true' and True or False if self.crud.get_user_id(email_adr): self.update_user_info(email_adr, date, spam) else: self.add_user_info(email_adr, date) return True def update_user_info(self, new_user_email, new_user_date, new_email_spam): ''' Description: fonction pour modifier l'information de utilisateur (date de dernier message arrive, numero de spam/ham, trust level, etc). Sortie: bool, 'True' pour succes, 'False' dans le cas de failure. ''' users_dict = self.crud.read_users_file() for key in users_dict: user = users_dict.get(key) if user.get("name") == new_user_email: self.crud.update_users(key, 'Date_of_first_seen_message', new_user_date) if new_email_spam: spamN = user.get('SpamN') spamN += 1 trust = user.get('Trust') if trust > 0: trust -= 1 self.crud.update_users(key, 'SpamN', spamN) self.crud.update_users(key, 'Trust', trust) else: hamN = user.get('HamN') hamN += 1 trust = user.get('Trust') if trust < 100: trust += 1 self.crud.update_users(key, 'HamN', hamN) self.crud.update_users(key, 'HamN', trust) break return True def add_user_info(self, user_email, user_date): self.crud.add_new_user(user_email, user_date) def update_group_info(self, user_group_list): ''' Description: fonction pour modifier l'information de groupe dans lequel l'utilisater est present (trust level, etc). Sortie: bool, 'True' pour succes, 'False' dans le cas de failure. ''' groups = self.crud.read_groups_file() for group in groups: if group['List_of_members'] == user_group_list: group['Trust'] = self.crud.get_user_data(user_group_list[1], 'Trust') return True return False def get_user_email_list(self): ''' Description: fonction pour creer le liste des e-mails (noms) des utilisateurs uniques. Sortie: liste des uniques e-mails des utilisateurs ''' emails = self.get_email() list_of_email_addresses = [] for mail in emails['dataset']: list_of_email_addresses.append((mail['mail']['From'])) return list_of_email_addresses def get_email(self): ''' Description: fonction pour lire le ficher json avec les mails et extraire les donees necessaire. Sortie: dictionare de e-mails formate selon le JSON. ''' with open(self.email_file) as file: emails = json.load(file) return emails
class RENEGE: """Class pour realiser le filtrage du spam en utilisant vocabular.json file et CRUD et EmalAnalyze classes""" def __init__(self): self.email_file = "train_set.json" self.crud = CRUD() self.e_mail = EmailAnalyzer() def calculate_user_trust(self, user_id): #json data date_of_first_seen_message = self.crud.get_user_data( user_id, "Date_of_first_seen_message") date_of_last_seen_message = self.crud.get_user_data( user_id, "Date_of_last_seen_message") n_ham = self.crud.get_user_data(user_id, "HamN") n_spam = self.crud.get_user_data(user_id, "SpamN") user_name = self.crud.get_user_data(user_id, "name") groups = self.crud.read_groups_file() sum_trust = 0 n_groups = 0 # find nb of groups to which user belongs for group in groups.values(): if user_name in group["List_of_members"]: sum_trust += group["Trust"] n_groups += 1 trust1 = (date_of_last_seen_message * n_ham) / (date_of_first_seen_message * (n_ham + n_spam)) trust2 = 0 # to avoid 'division by zero' error if n_groups != 0: trust2 = sum_trust / n_groups trust = (trust1 + trust2) / 2 if trust2 < 50: trust = trust2 if trust1 > 100: trust = 100 if trust > 100: trust = 100 elif trust < 0: trust = 0 return trust def classify_emails(self): ''' Description: fonction pour commencer l'analyse des e-mails. Sortie: bool, 'True' pour succes, 'False' dans le cas de failure. ''' try: self.process_email(self.get_email()) return True except Exception as e: print("Error!", e.__class__, "occurred.") raise e return False def process_email(self, new_emails): ''' Description: fonction pour analyser chaque nouvel e-mail dans le dictionare. Elle gere l'ajout des nouveux utilisateurs et/ou modification de l'information existante sur les utilisateurs et groupes. Sortie: bool, 'True' pour succes, 'False' dans le cas de failure. ''' emails = self.get_email() print("Processing emails") i = 0 email_count = len(emails["dataset"]) # Load emails for email in emails["dataset"]: i += 1 print("\rEmail " + str(i) + "/" + str(email_count), end="") data = email["mail"] subject = data["Subject"] name = data["From"] date = data["Date"] body = data["Body"] is_spam = data["Spam"] # Get registered data user_id = -1 try: user_id = self.crud.get_user_id(name) except RuntimeError: # Create the user if not self.crud.add_new_user(name, date): return False user_id = self.crud.get_user_id(name) # Update user's emails info if is_spam == "true": if not self.update_user_info(user_id, date, 1, 0): return False else: if not self.update_user_info(user_id, date, 0, 1): return False # Update groups data groups = self.crud.get_user_data(user_id, "Groups") for group_name in groups: try: group_id = self.crud.get_group_id(group_name) if not self.update_group_info(group_id, user_id): return False except RuntimeError: return False print("\n") return True def update_user_info(self, user_id, new_user_date, new_email_spam, new_email_ham): ''' Description: fonction pour modifier l'information de utilisateur (date de dernier message arrive, numero de spam/ham, trust level, etc). Sortie: bool, 'True' pour succes, 'False' dans le cas de failure. ''' # Update last / first seen date new_date = self.crud.convert_to_unix(new_user_date) if new_date > self.crud.get_user_data(user_id, "Date_of_last_seen_message"): if not self.crud.update_users(user_id, "Date_of_last_seen_message", new_user_date): return False elif new_date < self.crud.get_user_data(user_id, "Date_of_first_seen_message"): if not self.crud.update_users( user_id, "Date_of_first_seen_message", new_user_date): return False # Update trust score spam_n = self.crud.get_user_data(user_id, "SpamN") + new_email_spam ham_n = self.crud.get_user_data(user_id, "HamN") + new_email_ham trust_lvl = 50 if (spam_n + ham_n) != 0: trust_lvl = ham_n / (spam_n + ham_n) * 100 if trust_lvl > 100: trust_lvl = 100 if not self.crud.update_users(user_id, "SpamN", spam_n): return False if not self.crud.update_users(user_id, "HamN", ham_n): return False return self.crud.update_users(user_id, "Trust", trust_lvl) def update_group_info(self, group_id, user_id): ''' Description: fonction pour modifier l'information de groupe dans lequel l'utilisater est present (trust level, etc). Sortie: bool, 'True' pour succes, 'False' dans le cas de failure. ''' try: # Get list of users and update it users_list = self.crud.get_groups_data(group_id, "List_of_members") user_name = self.crud.get_user_data(user_id, "name") if user_name not in users_list: users_list.append(user_name) # Get data for trust update user_count = len(users_list) trust_lvl = 0 # Compute group's trust for user in users_list: curr_user_id = self.crud.get_user_id(user) trust_lvl += self.crud.get_user_data(curr_user_id, "Trust") if (trust_lvl > 100): trust_lvl = 100 # Update the group with the new trust level and the new member list if self.crud.update_groups(group_id, "Trust", trust_lvl): return self.crud.update_groups(group_id, 'List_of_members', users_list) return False except RuntimeError: return False def get_user_email_list(self): ''' Description: fonction pour creer le liste des e-mails (noms) des utilisateurs uniques. Sortie: liste des uniques e-mails des utilisateurs ''' emails = [] for user in self.crud.users_data: emails.append(user["name"]) return emails def get_email(self): ''' Description: fonction pour lire le ficher json avec les mails et extraire les donees necessaire. Sortie: dictionare de e-mails formate selon le JSON. ''' with open(self.email_file) as email_file: return json.load(email_file)
class RENEGE: """Class pour realiser le filtrage du spam en utilisant vocabular.json file et CRUD et EmalAnalyze classes""" def __init__(self): self.email_file = "1000-mails.json" self.crud = CRUD() self.e_mail = EmailAnalyzer() def calculate_user_trust(self, user_id): ''' Description: fonction a implementer pour la deuxieme partie du deuxieme travail pratique. Permet de calculer le trust d'un utilisateur specifique Sortie: int; Retourne la valeur du trust de l'utilisateur ''' # calculation for Trust1 nb_spam = self.crud.get_user_data(user_id, "SpamN") nb_ham = self.crud.get_user_data(user_id, "HamN") time_first_seen_message = self.crud.get_user_data( user_id, "Date_of_first_seen_message") time_last_seen_message = self.crud.get_user_data( user_id, "Date_of_last_seen_message") if (time_first_seen_message * (nb_ham + nb_spam)) == 0: print("trust1 not possible, division by zero") return trust1 = ((time_last_seen_message * nb_ham) / (time_first_seen_message * (nb_ham + nb_spam))) # calculation for Trust2 group_list = self.crud.read_groups_file() # in order to determine if user is part of a group, we need their email user_email = self.crud.get_user_data(user_id, "name") # number of groups that the user is in nb_group = 0 user_trust_total = 0 for group in group_list.values(): if user_email in group["List_of_members"]: nb_group += 1 user_trust_total += group["Trust"] trust2 = user_trust_total / nb_group # determining user's final trust value if trust2 < 50: return trust2 elif trust1 > 100: return 100 final_trust = (trust1 + trust2) / 2 if final_trust < 0: final_trust = 0 elif final_trust > 100: final_trust = 100 return final_trust def classify_emails(self, spam_definition_mode): ''' fonction deja implemente Description: fonction pour commencer l'analyse des e-mails. Sortie: bool, 'True' pour succes, 'False' dans le cas de failure. ''' try: self.process_email(self.get_email(), spam_definition_mode) return True except Exception as e: print("Error!", e.__class__, "occurred.") return False def process_email(self, new_emails, mode): ''' Description: fonction pour analyser chaque nouvel e-mail dans le dictionare. Elle gere l'ajout des nouveux utilisateurs et/ou modification de l'information existante sur les utilisateurs et groupes. Sortie: bool, 'True' pour succes, 'False' dans le cas de failure. ''' list_of_email_addresses = self.get_user_email_list() for user in list_of_email_addresses: # date de defaut est l'epoch linux self.crud.add_new_user(user, "1970-01-01") self.crud.add_new_group('default', 50, list_of_email_addresses) is_spam = '' for email in new_emails["dataset"]: individual_email = email["mail"] email_address = individual_email["From"] date = time.mktime( datetime.datetime.strptime(individual_email["Date"], "%Y-%m-%d").timetuple()) if mode == 0: is_spam = individual_email["Spam"] elif mode == 1: is_spam = self.is_spam1(email) elif mode == 2: is_spam = self.is_spam2(email) self.update_user_info(email_address, date, is_spam) self.update_group_info(list_of_email_addresses, 'default') def is_spam1(self, email): ''' Description: fonction qui calcule la probabilité de spam selon l'équation 1 de l'énoncé, soit S = P ∗ (H ∗ T1 + T2) + H ∗ T2 ∗ ¬T3 ''' user_id = self.crud.get_user_id(email["From"]) trust = self.crud.get_user_data(user_id, "Trust") first = self.crud.get_user_data(user_id, "Date_of_first_seen_message") last = self.crud.get_user_data(user_id, "Date_of_last_seen_message") p = email["Spam"] == "true" h = (float(last) - float(first)) / (60 * 60 * 24) > 31 t1 = trust < 60 t2 = trust < 70 t3 = trust > 75 return (p and ((h and t1) or t2)) or (h and t2 and not t3) def is_spam2(self, email): ''' Description: fonction qui calcule la probabilité de spam selon l'équation 2 de l'énoncé, soit S = P + ¬T3 ∗ T2 ''' user_id = self.crud.get_user_id(email["From"]) trust = self.crud.get_user_data(user_id, "Trust") p = email["Spam"] == "true" t2 = trust < 70 t3 = trust > 75 return p or (not t3 and t2) def update_user_info(self, new_user_email, new_user_date, new_email_spam): ''' Description: fonction pour modifier l'information de utilisateur (date de dernier message arrive, numero de spam/ham, trust level, etc). Sortie: bool, 'True' pour succes, 'False' dans le cas de failure. ''' user_id = self.crud.get_user_id(new_user_email) date_first_seen_message = self.crud.get_user_data( user_id, "Date_of_first_seen_message") date_last_seen_message = self.crud.get_user_data( user_id, "Date_of_last_seen_message") try: if new_user_date < date_first_seen_message: self.crud.update_users(user_id, "Date_of_first_seen_message", new_user_date) elif new_user_date > date_last_seen_message: self.crud.update_users(user_id, "Date_of_last_seen_message", new_user_date) # Mise a jour des spamN/hamN et trust des utilisateurs user_spamN = self.crud.get_user_data(user_id, "SpamN") user_hamN = self.crud.get_user_data(user_id, "HamN") if new_email_spam == "true": user_spamN = user_spamN + 1 self.crud.update_users(user_id, "SpamN", user_spamN) else: user_hamN = user_hamN + 1 self.crud.update_users(user_id, "HamN", user_hamN) total_msg = user_spamN + user_hamN self.crud.update_users(user_id, "Trust", (user_hamN / total_msg) * 100) return True except: return False def update_group_info(self, user_group_list, group_name): ''' Description: fonction pour modifier l'information de groupe dans lequel l'utilisater est present (trust level, etc). Sortie: bool, 'True' pour succes, 'False' dans le cas de failure. ''' try: trust_sum = 0 for user in user_group_list: user_id = self.crud.get_user_id(user) trust_sum += self.crud.get_user_data(user_id, "Trust") # Calcul de la nouvelle moyenne de trust des membres group_trust = round(trust_sum / len(user_group_list), 0) group_id = self.crud.get_group_id(group_name) self.crud.update_groups(group_id, "Trust", group_trust) return True except: return False def get_user_email_list(self): ''' Description: fonction pour creer le liste des e-mails (noms) des utilisateurs uniques. Sortie: liste des uniques e-mails des utilisateurs ''' email_dict = self.get_email() existing_email_address = set() unique_emails = [] for email in email_dict["dataset"]: individual_email = email["mail"] email_address = individual_email["From"] if email_address in existing_email_address: continue existing_email_address.add(email_address) unique_emails.append(email_address) return unique_emails def get_email(self): ''' Description: fonction pour lire le ficher json avec les mails et extraire les donees necessaire. Sortie: dictionare de e-mails formate selon le JSON. ''' with open(self.email_file) as email_file: return json.load(email_file)
class RENEGE: """Class pour realiser le filtrage du spam en utilisant vocabular.json file et CRUD et EmalAnalyze classes""" def __init__(self): self.email_file = "800-mails.json" self.crud = CRUD() self.e_mail = EmailAnalyzer() def calculate_user_trust(self, user_id): #extracting json data date_of_first_seen_message = self.crud.get_user_data(user_id, "Date_of_first_seen_message") date_of_last_seen_message = self.crud.get_user_data(user_id, "Date_of_last_seen_message") n_ham = self.crud.get_user_data(user_id, "HamN") n_spam = self.crud.get_user_data(user_id, "SpamN") groups = self.crud.get_user_data(user_id, "Groups") #calculate the sum of trust values of all groups sum_trust = 0 for group in groups : group_id = self.crud.get_group_id(group) sum_trust += self.crud.get_group_data(group_id, 'Trust') #now that we have all the needed vars, calculate trust1, trust2 and trust trust1 = (date_of_last_seen_message * n_ham) / (date_of_first_seen_message * (n_ham + n_spam)) trust2 = sum_trust / len(groups) trust = (trust1 + trust2) / 2 if trust2 < 50: trust = trust2 if trust1 > 100: trust = 100 #before returning a value, check if trust is between 0 and 100 if trust < 0: trust = 0 elif trust > 100: trust = 100 return trust def classify_emails(self, calculation_mode, is_log_est, is_log_combo): ''' fonction deja implemente Description: fonction pour commencer l'analyse des e-mails. Sortie: bool, 'True' pour succes, 'False' dans le cas de failure. ''' try: self.process_email(self.get_email(), calculation_mode, is_log_est, is_log_combo) return True except Exception: raise Exception def process_email(self, new_emails, calculation_mode, is_log_est, is_log_combo): ''' Description: fonction pour analyser chaque nouvel e-mail dans le dictionare. Elle gere l'ajout des nouveux utilisateurs et/ou modification de l'information existante sur les utilisateurs et groupes. Sortie: bool, 'True' pour succes, 'False' dans le cas de failure. ''' print(type(new_emails)) emails = new_emails["dataset"] for email in emails: email_adr = email['mail']['From'] date = email['mail']['Date'] spam = email['mail']['Spam'] == 'true' subject = email['mail']['Subject'] body = email['mail']['Body'] user_id = self.crud.get_user_id(email_adr) if user_id: self.update_user_info(email_adr, date, spam) else: self.add_user_info(email_adr, date) # trust de l'utilisateur trust = self.crud.get_user_data(user_id, "Trust") # moyenne du trust de tous les groupes user_group = self.crud.get_user_data(user_id, "Groups") sum_trust = 0 groups = self.crud.read_groups_file() for group in groups : if group['List_of_members'] == user_group: sum_trust += group['Trust'] avg_group_trust = sum_trust / len(groups) # difference de jours entre last_seen_msg et first_seen_msg user_activity = self.substract_dates(self.crud.get_user_data(email_adr, "Date_of_last_seen_message"), self.crud.get_user_data(email_adr, "Date_of_first_seen_message")) if(calculation_mode == 1): spam = self.e_mail.is_spam_function_one(spam, user_activity, trust, avg_group_trust) elif(calculation_mode == 2): spam = self.e_mail.is_spam_function_two(spam, trust, avg_group_trust) elif(calculation_mode == 0): spam = self.e_mail.is_spam(subject, body, is_log_est, is_log_combo, 0) # mettre a jour l'utilisateur avec la nouvelle valeur de spam self.update_user_info(email_adr, date, spam) return True def substract_dates(self, last_seen_msg, first_seen_msg): # calcul de difference de jours entre le dernier et le premier message last_seen_msg = str(last_seen_msg).split("-") first_seen_msg = str(first_seen_msg).split("-") user_activity = date(int(last_seen_msg[0]), int(last_seen_msg[1]), int(last_seen_msg[2])) - date(int(first_seen_msg[0]), int(first_seen_msg[1]), int(first_seen_msg[2])) # convertir la soustraction en int user_activity = int((str(user_activity).split(" "))[0]) return user_activity
def test_crud_constructor(self): crud = CRUD(False) self.assertEqual(crud.read_users_file(), {"1": self.user}) self.assertEqual(crud.read_groups_file(), {})