def _log_message_ids(self): from engine.models.message import Message from pprint import pformat import cPickle as pickle import sqlite3 conn = sqlite3.connect( '/home/ubuntu/production/mailx/logs/message_data.db') c = conn.cursor() c.execute(''' CREATE TABLE IF NOT EXISTS "data" ( "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE, "email" INTEGER NOT NULL, "folder" TEXT NOT NULL, "uid" INTEGER NOT NULL, "data" TEXT NOT NULL ); ''') conn.commit() c.execute(''' CREATE TABLE IF NOT EXISTS "synced" ( "id" INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE, "email" INTEGER NOT NULL ); ''') conn.commit() emails = [email[0] for email in c.execute("SELECT email FROM synced")] if self._imap_account.email in emails: logger.info("_log_message_ids(): already logged %s", self._imap_account.email) return logger.info("_log_message_ids(): logging message data for %s", self._imap_account.email) for folder in self._list_selectable_folders(): response = self._imap_client.select_folder(folder.name) min_mail_id = folder._get_min_mail_id() uid_criteria = '%d:*' % (min_mail_id + 1) is_gmail = self._imap_account.is_gmail descriptors = Message._get_descriptors(is_gmail) fetch_data = self._imap_client.fetch(uid_criteria, descriptors) values = [(self._imap_account.email, folder.name, uid, pickle.dumps(fetch_data[uid])) for uid in fetch_data] c.executemany( "INSERT INTO data (email, folder, uid, data) VALUES (?, ?, ?, ?)", values) conn.commit() c.execute("INSERT into synced (email) VALUES (?)", (self._imap_account.email, )) conn.commit() # We can also close the connection if we are done with it. # Just be sure any changes have been committed or they will be lost. conn.close()
def _save_new_messages(self, last_seen_uid, event_data_list=None, new_message_ids=None, urgent=False): # type: (int, t.List[AbstractEventData]) -> None """Save any messages we haven't seen before Args: last_seen_uid (int): the max uid we have stored, should be 0 if there are no messages stored. urgent (bool): if True, save only one email """ # get the descriptors for the message is_gmail = self._imap_account.is_gmail descriptors = Message._get_descriptors(is_gmail) uid_criteria = "" if urgent: uid_criteria = '%d' % (last_seen_uid + 1) else: uid_criteria = '%d:*' % (last_seen_uid + 1) # all the data we're iterating over fetch_data = self._imap_client.fetch(uid_criteria, descriptors) # remove the last seen uid if we can if last_seen_uid in fetch_data: del fetch_data[last_seen_uid] # iterate over the fetched data for uid in fetch_data: # dictionary of general data about the message message_data = fetch_data[uid] # make sure all the fields we're interested in are in the message_data ok = self._check_fields_in_fetch( ['SEQ'] + Message._get_descriptors(is_gmail, True), message_data) if not ok: continue # dictionary of header key value pairs metadata = self._parse_email_header( message_data[Message._header_fields_key]) # TODO currently have a bug with parsing, if encoding fails we return None if metadata is None or metadata.get('message-id') is None: continue self._cleanup_message_data(message_data) self._cleanup_metadata(metadata) try: base_message = BaseMessage.objects.get( imap_account=self._imap_account, message_id=metadata['message-id']) # type: BaseMessage except BaseMessage.DoesNotExist: internal_date = self._parse_header_date( message_data.get('INTERNALDATE', '')) assert internal_date is not None date = self._parse_header_date(metadata.get( 'date', '')) or internal_date base_message = BaseMessage( imap_account=self._imap_account, message_id=metadata['message-id'], in_reply_to=metadata['in-reply-to'], references=metadata['references'], date=date, subject=metadata.get('subject', ''), internal_date=internal_date, from_m=self._find_or_create_contacts(metadata['from'])[0] if 'from' in metadata else None, _thread=self._find_or_create_gmail_thread( message_data['X-GM-THRID']) if is_gmail else None) base_message.save() if new_message_ids is not None: new_message_ids.add(metadata['message-id']) # create and save the message contacts if "reply-to" in metadata: base_message.reply_to.add( *self._find_or_create_contacts(metadata["reply-to"])) if "to" in metadata: base_message.to.add( *self._find_or_create_contacts(metadata["to"])) if "cc" in metadata: base_message.cc.add( *self._find_or_create_contacts(metadata["cc"])) # TODO test if bcc is working - LM (use yagmail and look at original on GMAIL) if "bcc" in metadata: base_message.bcc.add( *self._find_or_create_contacts(metadata["bcc"])) new_message = MessageSchema(base_message=base_message, imap_account=self._imap_account, flags=message_data['FLAGS'], folder=self._schema, uid=uid, msn=message_data['SEQ']) try: new_message.save() except Exception: logger.critical("%s failed to save message %d" % (self, uid)) logger.critical( "%s stored last_seen_uid %d, passed last_seen_uid %d" % (self, self._last_seen_uid, last_seen_uid)) logger.critical("number of messages returned %d" % (len(fetch_data))) # to prevent dup saved email continue if event_data_list is not None: assert new_message_ids is not None if metadata['message-id'] in new_message_ids: event_data_list.append( MessageArrivalData( Message(new_message, self._imap_client))) logger.info('folder {f}: uid {u}: message_arrival'.format( f=self.name, u=uid)) else: event_data_list.append( MessageMovedData( Message(new_message, self._imap_client))) logger.info('folder {f}: uid {u}: message_moved'.format( f=self.name, u=uid))