Пример #1
0
def parsemail(mail, logger='none'):
  log = logging.getLogger(logger)
  if len(mail) == 0:
    raise MailParserException('Empty mail')
  
  data = { 'type': 'mail' }
  
  # parse mail
  try:
    message = email.parser.Parser().parsestr(mail)
  except UnicodeEncodeError:
    message = email.parser.Parser().parsestr(mail.encode('latin_1'))
  
  # test defects and try to save defect mails
  if len(message.defects) != 0:
    raise MailParserException("Parser signaled defect:\n  %s" % (str(message.defects)))
  
  # encoded word is not decoded here, because it only should appear in the
  #   display name that is discarded by the last map function
  # parse from and sender addresses
  addresses = itertools.chain(*(message.get_all(field) for field in ('from', 'sender') if message.has_key(field)))
  data['from'] = map(lambda adrs: adrs[1], set(email.utils.getaddresses(addresses)))
  log.info("From: %s"%(' '.join(data['from'])))
  # parse recipient addresses
  addresses = itertools.chain(*(message.get_all(field) for field in ('to', 'cc') if message.has_key(field)))
  data['to'] = map(lambda adrs: adrs[1], set(email.utils.getaddresses(addresses)))
  log.info("To: %s"%(' '.join(data['to'])))
  
  # parse date and convert it to standard format in UTC
  if message.get('Date', None):
    try:
      # guesses format and parses 10-tuple
      parsedtime = email.utils.parsedate_tz(message.get('Date'))
      # seconds since epoch
      utc_timestamp = calendar.timegm(parsedtime[0:9])-parsedtime[9]
      # formatted
      data['date'] = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(utc_timestamp))
      log.info("Date: %s", data['date'])
    except Exception as e:
      raise MailParserException("Could not convert %s to YYYY-MM-DD hh:mm:ss\n  %s" % (message.get('Date'), str(e)))
  
  # format current UTC time
  data['upload_date'] = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(time.time()))
  log.info("upload-date: %s", data['upload_date'])
  
  # add labels
  data['labels'] = []
  if message.get('Status', None):
   if not 'R' in message.get('Status'):
     data['labels'].append('unread')
  else: data['labels'].append('unread')
  
  if config.autolabels:
    labeller = labels.Labeller(path=config.autolabels)
    labeller.check(data)
  log.info("Labels: %s", ' '.join(data['labels']))
  
  return data
Пример #2
0
    def get_receivers(self):
        """
        List of all receiver email-addresses (without the name parts).
        This includes addresses from the TO, CC and BCC headers.
        """
        message = self.message
        to_addrs = (message.get_all('to', []) +
                    message.get_all('cc', []) +
                    message.get_all('bcc', []))
        to_addrs = [str(addr) for addr in to_addrs]

        to_addrs = email.utils.getaddresses(to_addrs)
        # addresses only without name parts
        return [addr[1] for addr in to_addrs]
Пример #3
0
def extract_sender(
    message: Union[email.message.EmailMessage, email.message.Message]
) -> Optional[str]:
    """
    Extract the sender from the message object given.
    """
    resent_dates = message.get_all("Resent-Date")

    if resent_dates is not None and len(resent_dates) > 1:
        raise ValueError("Message has more than one 'Resent-' header block")
    elif resent_dates:
        sender_header_name = "Resent-Sender"
        from_header_name = "Resent-From"
    else:
        sender_header_name = "Sender"
        from_header_name = "From"

    # Prefer the sender field per RFC 2822:3.6.2.
    if sender_header_name in message:
        sender_header = message[sender_header_name]
    else:
        sender_header = message[from_header_name]

    if sender_header is None:
        return None

    return extract_addresses(sender_header)[0]
Пример #4
0
def extract_recipients(message: email.message.Message) -> List[str]:
    """
    Extract the recipients from the message object given.
    """
    recipients = []  # type: List[str]

    resent_dates = message.get_all("Resent-Date")

    if resent_dates is not None and len(resent_dates) > 1:
        raise ValueError("Message has more than one 'Resent-' header block")
    elif resent_dates:
        recipient_headers = ("Resent-To", "Resent-Cc", "Resent-Bcc")
    else:
        recipient_headers = ("To", "Cc", "Bcc")

    for header in recipient_headers:
        for recipient in message.get_all(header, failobj=[]):
            recipients.extend(extract_addresses(recipient))

    return recipients
Пример #5
0
def get_mail_addresses(message, header_name):
    """
    retrieve all email addresses from one message header

    @type message: email.message.Message
    @param message: the email message
    @type header_name: str
    @param header_name: the name of the header, can be 'from', 'to', 'cc' or 
    any other header containing one or more email addresses
    @rtype: list
    @returns: a list of the addresses in the form of tuples 
    C{[(u'Name', '*****@*****.**'), ...]}

    >>> import email
    >>> import email.mime.text
    >>> msg=email.mime.text.MIMEText('The text.', 'plain', 'us-ascii')
    >>> msg['From']=email.email.utils.formataddr(('Me', '*****@*****.**'))
    >>> msg['To']=email.email.utils.formataddr(('A', '*****@*****.**'))+', '+email.email.utils.formataddr(('B', '*****@*****.**'))
    >>> print msg.as_string(unixfrom=False)
    Content-Type: text/plain; charset="us-ascii"
    MIME-Version: 1.0
    Content-Transfer-Encoding: 7bit
    From: Me <*****@*****.**>
    To: A <*****@*****.**>, B <*****@*****.**>
    <BLANKLINE>
    The text.
    >>> get_mail_addresses(msg, 'from')
    [(u'Me', '*****@*****.**')]
    >>> get_mail_addresses(msg, 'to')
    [(u'A', '*****@*****.**'), (u'B', '*****@*****.**')]
    """
    addrs = email.utils.getaddresses(
        [_friendly_header(h) for h in message.get_all(header_name, [])])
    for i, (addr_name, addr) in enumerate(addrs):
        if not addr_name and addr:
            # only one string! Is it the address or the  address name ?
            # use the same for both and see later
            addr_name = addr

        if is_usascii(addr):
            # address must be ascii only and must match address regex
            if not email_address_re.match(addr):
                addr = ''
        else:
            addr = ''
        addrs[i] = (decode_mail_header(addr_name), addr)
    return addrs
Пример #6
0
def get_mail_addresses(message, header_name):
    """
    retrieve all email addresses from one message header

    @type message: email.message.Message
    @param message: the email message
    @type header_name: str
    @param header_name: the name of the header, can be 'from', 'to', 'cc' or
    any other header containing one or more email addresses
    @rtype: list
    @returns: a list of the addresses in the form of tuples
    C{[(u'Name', '*****@*****.**'), ...]}

    >>> import email
    >>> import email.mime.text
    >>> msg=email.mime.text.MIMEText('The text.', 'plain', 'us-ascii')
    >>> msg['From']=email.email.utils.formataddr(('Me', '*****@*****.**'))
    >>> msg['To']=email.email.utils.formataddr(('A', '*****@*****.**'))+', '+email.email.utils.formataddr(('B', '*****@*****.**'))
    >>> print msg.as_string(unixfrom=False)
    Content-Type: text/plain; charset="us-ascii"
    MIME-Version: 1.0
    Content-Transfer-Encoding: 7bit
    From: Me <*****@*****.**>
    To: A <*****@*****.**>, B <*****@*****.**>
    <BLANKLINE>
    The text.
    >>> get_mail_addresses(msg, 'from')
    [(u'Me', '*****@*****.**')]
    >>> get_mail_addresses(msg, 'to')
    [(u'A', '*****@*****.**'), (u'B', '*****@*****.**')]
    """
    addrs = email.utils.getaddresses(
        [_friendly_header(h) for h in message.get_all(header_name, [])])
    for i, (addr_name, addr) in enumerate(addrs):
        if not addr_name and addr:
            # only one string! Is it the address or the  address name ?
            # use the same for both and see later
            addr_name = addr

        if is_usascii(addr):
            # address must be ascii only and must match address regex
            if not email_address_re.match(addr):
                addr = ''
        else:
            addr = ''
        addrs[i] = (decode_mail_header(addr_name), addr)
    return addrs
Пример #7
0
def find_emailgateway_recipient(message: message.Message) -> str:
    # We can't use Delivered-To; if there is a X-Gm-Original-To
    # it is more accurate, so try to find the most-accurate
    # recipient list in descending priority order
    recipient_headers = ["X-Gm-Original-To", "Delivered-To",
                         "Resent-To", "Resent-CC", "To", "CC"]

    pattern_parts = [re.escape(part) for part in settings.EMAIL_GATEWAY_PATTERN.split('%s')]
    match_email_re = re.compile(".*?".join(pattern_parts))

    header_addresses = [str(addr)
                        for recipient_header in recipient_headers
                        for addr in message.get_all(recipient_header, [])]

    for addr_tuple in getaddresses(header_addresses):
        if match_email_re.match(addr_tuple[1]):
            return addr_tuple[1]

    raise ZulipEmailForwardError("Missing recipient in mirror email")
Пример #8
0
def find_emailgateway_recipient(message: message.Message) -> str:
    # We can't use Delivered-To; if there is a X-Gm-Original-To
    # it is more accurate, so try to find the most-accurate
    # recipient list in descending priority order
    recipient_headers = ["X-Gm-Original-To", "Delivered-To",
                         "Resent-To", "Resent-CC", "To", "CC"]

    pattern_parts = [re.escape(part) for part in settings.EMAIL_GATEWAY_PATTERN.split('%s')]
    match_email_re = re.compile(".*?".join(pattern_parts))

    header_addresses = [str(addr)
                        for recipient_header in recipient_headers
                        for addr in message.get_all(recipient_header, [])]

    for addr_tuple in getaddresses(header_addresses):
        if match_email_re.match(addr_tuple[1]):
            return addr_tuple[1]

    raise ZulipEmailForwardError("Missing recipient in mirror email")
Пример #9
0
def find_emailgateway_recipient(message: message.Message) -> str:
    # We can't use Delivered-To; if there is a X-Gm-Original-To
    # it is more accurate, so try to find the most-accurate
    # recipient list in descending priority order
    recipient_headers = ["X-Gm-Original-To", "Delivered-To", "To"]
    recipients = []  # type: List[Union[str, Header]]
    for recipient_header in recipient_headers:
        r = message.get_all(recipient_header, None)
        if r:
            recipients = r
            break

    pattern_parts = [re.escape(part) for part in settings.EMAIL_GATEWAY_PATTERN.split('%s')]
    match_email_re = re.compile(".*?".join(pattern_parts))
    for recipient_email in [str(recipient) for recipient in recipients]:
        if match_email_re.match(recipient_email):
            return recipient_email

    raise ZulipEmailForwardError("Missing recipient in mirror email")
Пример #10
0
def find_emailgateway_recipient(message: message.Message) -> str:
    # We can't use Delivered-To; if there is a X-Gm-Original-To
    # it is more accurate, so try to find the most-accurate
    # recipient list in descending priority order
    recipient_headers = ["X-Gm-Original-To", "Delivered-To", "To"]
    recipients = []  # type: List[Union[str, Header]]
    for recipient_header in recipient_headers:
        r = message.get_all(recipient_header, None)
        if r:
            recipients = r
            break

    pattern_parts = [re.escape(part) for part in settings.EMAIL_GATEWAY_PATTERN.split('%s')]
    match_email_re = re.compile(".*?".join(pattern_parts))
    for recipient_email in [str(recipient) for recipient in recipients]:
        if match_email_re.match(recipient_email):
            return recipient_email

    raise ZulipEmailForwardError("Missing recipient in mirror email")
Пример #11
0
    def test_dkim_and_feedback_loop(self):
        privkey = self._get_dkim_privkey()
        mailing = factories.MailingFactory(dkim={'selector': 'mail', 'domain': 'unittest.cloud-mailing.net', 'privkey':privkey},
                                           feedback_loop={'dkim': {'selector': 'mail', 'domain': 'unittest.cloud-mailing.net', 'privkey':privkey},
                                                          'sender_id': 'CloudMailing'})
        recipient = factories.RecipientFactory(mailing=mailing)

        message_str = self._customize(recipient)

        self.assertNotIn(b"\r\n", message_str)

        parser = email.parser.Parser()
        message = parser.parsestr(message_str, headersonly=False)
        assert (isinstance(message, email.message.Message))
        self.assertTrue('Feedback-ID' in message)
        self.assertEqual(2, len(message.get_all('DKIM-Signature')))

        d = dkim.DKIM(message_str)
        self.assertTrue(d.verify(0, dnsfunc=self._get_txt))
        self.assertTrue(d.verify(1, dnsfunc=self._get_txt))