def _get_uid_from_msgidCb(self, msgid): hdoc = None curried = partial( self._soledad.get_from_index, fields.TYPE_MSGID_IDX, fields.TYPE_HEADERS_VAL, msgid) curried.expected = "hdoc" hdoc = try_unique_query(curried) # XXX this is only a quick hack to avoid regression # on the "multiple copies of the draft" issue, but # this is currently broken since it's not efficient to # look for this. Should lookup better. # FIXME! if hdoc is not None: hdoc_dict = hdoc.content else: hdocstore = self.memstore._hdoc_store match = [x for _, x in hdocstore.items() if x['msgid'] == msgid] hdoc_dict = first(match) if hdoc_dict is None: logger.warning("Could not find hdoc for msgid %s" % (msgid,)) return None msg_chash = hdoc_dict.get(fields.CONTENT_HASH_KEY) fdoc = self._get_fdoc_from_chash(msg_chash) if not fdoc: logger.warning("Could not find fdoc for msgid %s" % (msgid,)) return None return fdoc.content.get(fields.UID_KEY, None)
def _add_message_locally(self, result): """ Adds a message to local inbox and delete it from the incoming db in soledad. # XXX this comes from a gatherresult... :param msgtuple: a tuple consisting of a SoledadDocument instance containing the incoming message and data, the json-encoded, decrypted content of the incoming message :type msgtuple: (SoledadDocument, str) """ from twisted.internet import reactor msgtuple = first(result) doc, data = msgtuple log.msg('adding message %s to local db' % (doc.doc_id,)) if isinstance(data, list): if empty(data): return False data = data[0] def msgSavedCallback(result): if not empty(result): leap_events.signal(IMAP_MSG_SAVED_LOCALLY) deferLater(reactor, 0, self._delete_incoming_message, doc) leap_events.signal(IMAP_MSG_DELETED_INCOMING) d = self._inbox.addMessage(data, flags=(self.RECENT_FLAG,), notify_on_disk=True) d.addCallbacks(msgSavedCallback, self._errback)
def _populate_headr(self, msg, chash, subject, date): """ Return a headers doc. XXX Missing DOC ----------- """ headers = defaultdict(list) for k, v in msg.items(): headers[k].append(v) # "fix" for repeated headers. for k, v in headers.items(): newline = "\n%s: " % (k,) headers[k] = newline.join(v) lower_headers = lowerdict(headers) msgid = first(MSGID_RE.findall( lower_headers.get('message-id', ''))) hd = self._get_empty_doc(self.HEADERS_DOC) hd[self.CONTENT_HASH_KEY] = chash hd[self.HEADERS_KEY] = headers hd[self.MSGID_KEY] = msgid if not subject and self.SUBJECT_FIELD in headers: hd[self.SUBJECT_KEY] = headers[self.SUBJECT_FIELD] else: hd[self.SUBJECT_KEY] = subject if not date and self.DATE_FIELD in headers: hd[self.DATE_KEY] = headers[self.DATE_FIELD] else: hd[self.DATE_KEY] = date return hd
def get_flags_doc(self, mbox, uid): """ Return the SoledadDocument for the given mbox and uid. :param mbox: the mailbox :type mbox: str or unicode :param uid: the UID for the message :type uid: int :rtype: SoledadDocument or None """ result = None try: flag_docs = self._soledad.get_from_index( fields.TYPE_MBOX_UID_IDX, fields.TYPE_FLAGS_VAL, mbox, str(uid)) if len(flag_docs) != 1: logger.warning("More than one flag doc for %r:%s" % (mbox, uid)) result = first(flag_docs) except Exception as exc: # ugh! Something's broken down there! logger.warning("ERROR while getting flags for UID: %s" % uid) logger.exception(exc) finally: return result
def _get_body_doc(self): """ Return the document that keeps the body for this message. """ hdoc_content = self.hdoc.content body_phash = hdoc_content.get( fields.BODY_KEY, None) if not body_phash: logger.warning("No body phash for this document!") return None # XXX get from memstore too... # if memstore: memstore.get_phrash # memstore should keep a dict with weakrefs to the # phash doc... if self._container is not None: bdoc = self._container.memstore.get_cdoc_from_phash(body_phash) if not empty(bdoc) and not empty(bdoc.content): return bdoc # no memstore, or no body doc found there if self._soledad: body_docs = self._soledad.get_from_index( fields.TYPE_P_HASH_IDX, fields.TYPE_CONTENT_VAL, str(body_phash)) return first(body_docs) else: logger.error("No phash in container, and no soledad found!")
def get_flags_doc(self, mbox, uid): """ Return the SoledadDocument for the given mbox and uid. :param mbox: the mailbox :type mbox: str or unicode :param uid: the UID for the message :type uid: int :rtype: SoledadDocument or None """ result = None try: flag_docs = self._soledad.get_from_index(fields.TYPE_MBOX_UID_IDX, fields.TYPE_FLAGS_VAL, mbox, str(uid)) if len(flag_docs) != 1: logger.warning("More than one flag doc for %r:%s" % (mbox, uid)) result = first(flag_docs) except Exception as exc: # ugh! Something's broken down there! logger.warning("ERROR while getting flags for UID: %s" % uid) logger.exception(exc) finally: return result
def _get_headers_doc(self): """ Return the document that keeps the headers for this message. """ head_docs = self._soledad.get_from_index( fields.TYPE_C_HASH_IDX, fields.TYPE_HEADERS_VAL, str(self.chash)) return first(head_docs)
def get_headers_doc(self, chash): """ Return the document that keeps the headers for a message indexed by its content-hash. :param chash: the content-hash to retrieve the document from. :type chash: str or unicode :rtype: SoledadDocument or None """ head_docs = self._soledad.get_from_index( fields.TYPE_C_HASH_IDX, fields.TYPE_HEADERS_VAL, str(chash)) return first(head_docs)
def get_headers_doc(self, chash): """ Return the document that keeps the headers for a message indexed by its content-hash. :param chash: the content-hash to retrieve the document from. :type chash: str or unicode :rtype: SoledadDocument or None """ head_docs = self._soledad.get_from_index(fields.TYPE_C_HASH_IDX, fields.TYPE_HEADERS_VAL, str(chash)) return first(head_docs)
def get_raw_docs(msg, parts): return ( { "type": "cnt", # type content they'll be "raw": payload if not DEBUG else payload[:100], "phash": get_hash(payload), "content-disposition": first(headers.get( 'content-disposition', '').split(';')), "content-type": headers.get( 'content-type', ''), "content-transfer-encoding": headers.get( 'content-transfer-encoding', '') } for payload, headers in get_payloads(msg) if not isinstance(payload, list))
def _get_flags_doc(self): """ Return the document that keeps the flags for this message. """ result = {} try: flag_docs = self._soledad.get_from_index( fields.TYPE_MBOX_UID_IDX, fields.TYPE_FLAGS_VAL, self._mbox, str(self._uid)) result = first(flag_docs) except Exception as exc: # ugh! Something's broken down there! logger.warning("ERROR while getting flags for UID: %s" % self._uid) logger.exception(exc) finally: return result
def _signal_fetch_to_ui(self, doclist): """ Send leap events to ui. :param doclist: iterable with msg documents. :type doclist: iterable. :returns: doclist :rtype: iterable """ doclist = first(doclist) # gatherResults pass us a list if doclist: fetched_ts = time.mktime(time.gmtime()) num_mails = len(doclist) if doclist is not None else 0 if num_mails != 0: log.msg("there are %s mails" % (num_mails,)) leap_events.signal( IMAP_FETCHED_INCOMING, str(num_mails), str(fetched_ts)) return doclist
def _get_ctype_from_document(self, phash): """ Reeturn the content-type from the content document. :param phash: the payload hash to retrieve by. :type phash: str or unicode :rtype: str or unicode """ cdocs = self._soledad.get_from_index(fields.TYPE_P_HASH_IDX, fields.TYPE_CONTENT_VAL, str(phash)) cdoc = first(cdocs) if not cdoc: logger.warning("Could not find the content doc " "for phash %s" % (phash, )) ctype = cdoc.content.get('ctype', "") return ctype
def _get_payload_from_document(self, phash): """ Return the message payload from the content document. :param phash: the payload hash to retrieve by. :type phash: str or unicode :rtype: str or unicode or None """ cdocs = self._soledad.get_from_index(fields.TYPE_P_HASH_IDX, fields.TYPE_CONTENT_VAL, str(phash)) cdoc = first(cdocs) if cdoc is None: logger.warning("Could not find the content doc " "for phash %s" % (phash, )) payload = "" else: payload = cdoc.content.get(fields.RAW_KEY, "") return payload
def _build_headers_doc(msg, chash, body_phash, parts_map): """ Assemble a headers document from the original parsed message, the content-hash, and the parts map. It takes into account possibly repeated headers. """ headers = defaultdict(list) for k, v in msg.items(): headers[k].append(v) # "fix" for repeated headers (as in "Received:" for k, v in headers.items(): newline = "\n%s: " % (k.lower(), ) headers[k] = newline.join(v) lower_headers = lowerdict(dict(headers)) msgid = first(_MSGID_RE.findall(lower_headers.get('message-id', ''))) _hdoc = HeaderDocWrapper(chash=chash, headers=headers, body=body_phash, msgid=msgid) def copy_attr(headers, key, doc): if key in headers: setattr(doc, key, headers[key]) copy_attr(lower_headers, "subject", _hdoc) copy_attr(lower_headers, "date", _hdoc) hdoc = _hdoc.serialize() # add parts map to header doc # (body, multi, part_map) for key in parts_map: hdoc[key] = parts_map[key] return stringify_parts_map(hdoc)
def _build_headers_doc(msg, chash, body_phash, parts_map): """ Assemble a headers document from the original parsed message, the content-hash, and the parts map. It takes into account possibly repeated headers. """ headers = defaultdict(list) for k, v in msg.items(): headers[k].append(v) # "fix" for repeated headers (as in "Received:" for k, v in headers.items(): newline = "\n%s: " % (k.lower(),) headers[k] = newline.join(v) lower_headers = lowerdict(dict(headers)) msgid = first(_MSGID_RE.findall( lower_headers.get('message-id', ''))) _hdoc = HeaderDocWrapper( chash=chash, headers=headers, body=body_phash, msgid=msgid) def copy_attr(headers, key, doc): if key in headers: setattr(doc, key, headers[key]) copy_attr(lower_headers, "subject", _hdoc) copy_attr(lower_headers, "date", _hdoc) hdoc = _hdoc.serialize() # add parts map to header doc # (body, multi, part_map) for key in parts_map: hdoc[key] = parts_map[key] return stringify_parts_map(hdoc)
# XXX what other ctypes should be considered body? if part.get_content_type() in ("text/plain", "text/html"): # XXX avoid hashing again return get_hash(part.get_payload()) """ On getting the raw docs, we get also some of the headers to be able to index the content. Here we remove any mutable part, as the the filename in the content disposition. """ get_raw_docs = lambda msg, parts: ( {"type": "cnt", # type content they'll be "raw": payload if not DEBUG else payload[:100], "phash": get_hash(payload), "content-disposition": first(headers.get( 'content-disposition', '').split(';')), "content-type": headers.get( 'content-type', ''), "content-transfer-encoding": headers.get( 'content-transfer-type', '')} for payload, headers in get_payloads(msg) if not isinstance(payload, list)) """ Groucho Marx: Now pay particular attention to this first clause, because it's most important. There's the party of the first part shall be known in this contract as the party of the first part. How do you like that, that's pretty neat eh? Chico Marx: No, that's no good. Groucho Marx: What's the matter with it?
'phash': get_hash(part.get_payload()) if not part.is_multipart() else None} for part in msg.walk()] """ Utility lambda functions for getting the parts vector and the payloads from the original message. """ get_parts_vector = lambda parts: (x.get('parts', 1) for x in parts) get_payloads = lambda msg: ((x.get_payload(), dict(((str.lower(k), v) for k, v in (x.items())))) for x in msg.walk()) get_body_phash_simple = lambda payloads: first( [get_hash(payload) for payload, headers in payloads if payloads]) get_body_phash_multi = lambda payloads: (first( [get_hash(payload) for payload, headers in payloads if payloads and "text/plain" in headers.get('content-type', '')]) or get_body_phash_simple(payloads)) """ On getting the raw docs, we get also some of the headers to be able to index the content. Here we remove any mutable part, as the the filename in the content disposition. """ get_raw_docs = lambda msg, parts: (