def parsed_mail(self): # Method which returns parsed mail payload # saving attachments, original message and bodies # and insert metadata into database # Returns: # email dictionary payload with parsed metadata # empty dictionary if parsed object is not email # Getting email.message.Message object from original file self.mail = message_from_file(self.original_file) logger.info("Getting mail.message.Message object from binary file") # Define email metadata self.mail_payload = {} # Check if recieved email.message.Message is mail if self.is_mail(self.mail): # creating directory to save original file os.makedirs(self.original_dir) logger.info("Created directory for original file") # saving original file self.original_path = os.path.join(self.original_dir, self.file_name) self.object_write(self.original_path, str(self.mail)) self.mail_payload['Original'] = self.original_path # calls method to get email metadata logger.debug("Calling self.get_mail_parts method to parse mail") self.get_mail_parts(self.mail) # insert metadata in mysql logger.debug("Calling data_insertion_mysql func " "to insert %s" % (self.file_name, )) data_insertion_mysql(self.mail_payload) else: logger.info("The file %s is not email" % (self.file_name, )) return self.mail_payload
def delete_mail_files(mail): # This function deleting mail files from NFS # Args: # mail - mail rows from database # get email pathes from mail raws and delete them logger.info("Starting to delete files from NFS") mail_to_delete = mail_modification(mail).get('hidden') for mail_part in mail_to_delete.keys(): if mail_part == 'Metadata': for value in mail_to_delete[mail_part].values(): try: file_dir = os.path.dirname(value) except AttributeError: break if os.path.isdir(file_dir): shutil.rmtree(file_dir) logger.debug("Deleted directory %s" % (file_dir, )) if mail_part == 'Attachments': for attachment in mail_to_delete[mail_part]: for attachment_location in attachment.values(): try: file_dir = os.path.dirname(attachment_location) except AttributeError: break if os.path.isdir(file_dir): shutil.rmtree(file_dir) logger.debug("Deleted directory %s" % (file_dir, ))
def header_parse(self, mail): # Method which is parsing email headers # Args: # mail(email.message.Message) - message to parse # get message header FROM logger.info("parsing headers of email") mail_from = parseaddr(mail.get('From'))[1] self.mail_payload['FROM'] = mail_from # get message header TO mail_to = mail.get('to') if mail_to is None: mail_to = mail.get('Delivered-To') mail_to = mail_to.split(',') for index, item in enumerate(mail_to): mail_to[index] = parseaddr(item)[1] self.mail_payload['TO'] = mail_to # get message subject subject = self.get_decoded(mail.get('subject')) if len(subject) == 0: subject = '(NO subject)' self.mail_payload['Subject'] = subject # get message date date = mail.get('date') datetime = parse(date) date_timestamp = int(time.mktime(datetime.timetuple())) self.mail_payload['Date'] = date_timestamp logger.debug("parsed headers in payload %s" % (self.mail_payload, ))
def object_write(self, file_path, file_object): # Method for writing files # Args: # file_path(str) - path to write # file_object - file object to be written with open(file_path, 'w') as file_to_write: file_to_write.write(file_object) logger.debug("Saved file to %s" % (file_path, ))
def get_decoded(self, header_to_decode): # Method for decoding subject and attachment names # Args: # header_to_decode - email header to decode logger.debug("Header to decode - %s" % (header_to_decode, )) try: decoded = decode_header(header_to_decode)[0][0] except UnicodeError: decoded = header_to_decode logger.debug("Decoded header - %s" % (decoded, )) return decoded
def update_mail(mail_id, items_to_update): # Function to mail updating # Args: # mail_id(int) - id of email to update # items_to_update(dict) - dict from PUT method of items to update # Returns: # result(dict) - returns dictionary which can have keys: # Wrong keys - if some metadata raws were uncorrect they will add to # Wrong keys and not will be updated # Updated - info about updated items # Error - shows error if it was occured logger.debug("Updating email with items %s" % (items_to_update, )) result = {} checked = key_checker(items_to_update) not_updated = checked.get('impossible_keys') if not_updated: result["Wrong keys"] = not_updated logger.debug("wrong keys %s" % (not_updated, )) to_update = checked.get('possible_keys') if to_update: to_update_formatted = key_reformatter(to_update) logger.debug("updating %s" % (to_update_formatted, )) updating_result = db.update_mail(mail_id, to_update_formatted) if updating_result is True: result["Updated"] = to_update logger.debug("Updated %s" % (to_update, )) elif updating_result: result["Error"] = updating_result logger.warning("error occured %s" % (updating_result, )) return result
def data_insertion_mysql(cnx, payload): # Function which is inserting data to mysql from mail payload # Args: # payload(dict) - email payload with parsed metadata cursor = cnx.cursor() # define payload keys which belongs to metadata table metadata_keys = [ 'FROM', 'Subject', 'Original', 'Text body', 'Html body', 'Date' ] # taking params for metadata table metadata_params = [payload.get(key) for key in metadata_keys] # sql expression to insert data into metadata table metadata_insertion = ("INSERT INTO metadata" "(mail_from, mail_subject, path_to_original, " "path_to_text_body, path_to_html_body, mail_date) " "VALUES (%s, %s, %s, %s, %s, %s)") logger.info("executing inserting to metadata table") cursor.execute(metadata_insertion, metadata_params) # getting mail_id from the last insertinon mail_id = cnx.insert_id() logger.debug("insertion id %s" % (mail_id, )) # getting recipients list from mail payload recipients = payload.get('TO') # sql expression to insert data into mail_to table mail_to_insertion = ("INSERT INTO mail_to" "(mail_id, mail_recipient) " "VALUES (%s, %s)") # insert every recipient from list for recipient in recipients: logger.info("executing inserting to mail_to table") cursor.execute(mail_to_insertion, (mail_id, recipient)) # getting attachments list from mail payload attachments = payload.get('Attachments') if attachments: # sql expression to insert data into attachment table attachment_insertion = ( "INSERT INTO attachment" "(attachment_hash, path_to_attachment_file, " "attachment_name, attachment_size, attachment_type, mail_id) " "VALUES (%s, %s, %s, %s, %s, %s)") # define payload keys which belongs to attachment table attachment_keys = ['md5', 'path to attachment', 'name', 'size', 'type'] # insert each attachment into attachment table for attachment in attachments: attachment_params = [ attachment.get(key) for key in attachment_keys ] attachment_params.append(mail_id) logger.info("Executing inserting to attachment table") cursor.execute(attachment_insertion, attachment_params)
def key_checker(to_check): # This function is checking keys of recieved dictionary # that can be updated # Args: # to_check(dict) - recieved dictionary of items to update # Returns: # result(dict) - dictionary with possible and impossible keys to update logger.info("Checking keys to update") possible_keys = ['date', 'from', 'subject'] impossible = [] for key in to_check.keys(): if key not in possible_keys: impossible.append(key) del to_check[key] result = {'possible_keys': to_check, 'impossible_keys': impossible} logger.debug("Checked %s" % (result, )) return result
def key_reformatter(dict_to_change): # This function is reformating keys that were requested by user # to keys that might been changed in database # Args: # dict_to_change(dict) - dictionary with user keys # Returns: # formatted_dict(dict) - dictionary of database keys to update logger.info("Reformatting leys to update") formatted_keys = { 'date': 'mail_date', 'from': 'mail_from', 'subject': 'mail_subject' } formatted_dict = {} for key, value in dict_to_change.items(): formatted_dict[formatted_keys[key]] = value logger.debug("Reformated keys %s" % (formatted_dict, )) return formatted_dict
def delete_mail(cnx, mail_id): # Deleting mail with having it id from db # Args: # cnx - opened MySQLdb connection # mail_id(int) - id of email to delete # Returns: # result: # True if email was deleted # False if email didn't exist result = False cursor = cnx.cursor() sql = ("DELETE FROM metadata WHERE mail_id =%s" % mail_id) logger.info("Deleting mail with id %s" % (mail_id, )) deleting_result = cursor.execute(sql) if deleting_result: result = True logger.debug("Result of deleting %s" % (result, )) return result
def __init__(self, original_file, file_name): # Initializating original message file and it's name # initializating saving directories # generate sub directory for every message instance # Args: # original_file(open file object) - original msg file # file_name(str) - name of file logger.debug("Initializating EmailPayload class object") self.original_file = original_file self.file_name = file_name self.sub_dir = str(uuid.uuid4()) self.original_dir = ORIGINALS_DIR self.original_dir = os.path.join(self.original_dir, self.sub_dir) self.body_dir = BODY_DIR self.body_dir = os.path.join(self.body_dir, self.sub_dir) self.att_dir = ATT_DIR self.att_dir = os.path.join(self.att_dir, self.sub_dir) # define empty attachment list self.attachments = []
def data_selection(cnx, mail_id=None): # Function which is selecting rows from db # Select only rows for 1 email if mail_id is defined # Args: # cnx - opened MySQLdb connection # mail_id(int) - select only rows for email with mail_id # Returns: # mail_rows(dict) - dictionary with all joined tables # define cursor with dictionary type cursor = cnx.cursor(MySQLdb.cursors.DictCursor) sql = ("SELECT * FROM metadata LEFT " "JOIN attachment ON (metadata.mail_id = attachment.mail_id) " "JOIN mail_to ON (mail_to.mail_id=metadata.mail_id)") if mail_id: sql += "WHERE metadata.mail_id = %s" % (mail_id, ) logger.debug("selecting emeil with id %s" % (mail_id, )) cursor.execute(sql) mail_rows = cursor.fetchall() return mail_rows
def delete_mail(mail_id): # This checking if email exists and calls # functions to delete it from db and NFS # Args: # mail_id(int) - id of email to delete # Returns: # result(dict) - empty dictionary if email didn't exist # Deleted - message with deleted mail_id # Error - if error occured logger.info("Deleting mail with id %s" % (mail_id, )) result = {} mail = db.data_selection(mail_id) if mail: deleting_result = db.delete_mail(mail_id) if deleting_result is True: result['Deleted'] = "Mail with id %s" % (mail_id, ) delete_mail_files(mail) logger.debug("Mail with %s deleted" % (mail_id, )) elif deleting_result: result['Error'] = deleting_result logger.warning("Error occured %s" % (deleting_result, )) return result
def update_mail(cnx, mail_id, to_update): # Updating mail with having it id in database # Args: # cnx - opened MySQLdb connection # mail_id(int) - id of email to update # to_update(dict) - dictionary of values that will be updated # Returns: # result: # True if email was updated # False if not result = False cursor = cnx.cursor() set_expression = ",".join( ["%s=%s" % (key, '%s') for key in to_update.keys()]) sql = "UPDATE metadata SET %s WHERE mail_id = %s" % (set_expression, mail_id) params = (to_update.values()) logger.info("Updating mail with id %s" % (mail_id, )) updating_result = cursor.execute(sql, params) if updating_result: result = True logger.debug("result of updating is %s" % (result, )) return result
def wrapper(*args, **kwargs): logger.info("connecting to db") cnx = MySQLdb.connect(host=config.get(DB_SECTION, 'host'), user=config.get(DB_SECTION, 'user'), passwd=config.get(DB_SECTION, 'passwd'), db=config.get(DB_SECTION, 'db'), charset=config.get(DB_SECTION, 'charset')) logger.debug("Connected %s" % (cnx, )) try: logger.debug("Calling func %s " % (func.__name__, )) result = func(cnx, *args, **kwargs) logger.info("Commiting changes") cnx.commit() except Exception as error: logger.info("Error occured") logger.error(error) logger.info("Rollbacking changes") cnx.rollback() result = error.args[-1] finally: logger.debug("Closing connection %s" % (cnx, )) cnx.close() return result