Python html2plaintext примеры использования

Язык программирования: Python

Пространство имен/Пакет: tools

Метод/Функция: html2plaintext

Примеров на hotexamples.com: 17

Python html2plaintext - 17 примеров найдено. Это лучшие примеры Python кода для tools.html2plaintext, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: mail_gateway.py Проект: syleam/fetchmail_parsing

    def _get_message_parts(self, message):
        """
        Extracts all parts from a message and returns them in a dict
        Code from the mail_gateway module
        """
        if isinstance(message, xmlrpclib.Binary):
            message = str(message.data)

        # Warning: message_from_string doesn't always work correctly on unicode,
        # we must use utf-8 strings here :-(
        if isinstance(message, unicode):
            message = message.encode('utf-8')

        msg_txt = email.message_from_string(message)
        msg = {}

        fields = msg_txt.keys()

        if 'Subject' in fields:
            msg['subject'] = self._decode_header(msg_txt.get('Subject'))

        if 'Content-Type' in fields:
            msg['content-type'] = msg_txt.get('Content-Type')

        if 'From' in fields:
            msg['from'] = self._decode_header(msg_txt.get('From'))

        if 'Delivered-To' in fields:
            msg['to'] = self._decode_header(msg_txt.get('Delivered-To'))

        if 'CC' in fields:
            msg['cc'] = self._decode_header(msg_txt.get('CC'))

        if 'Reply-to' in fields:
            msg['reply-to'] = self._decode_header(msg_txt.get('Reply-To'))

        if 'Date' in fields:
            msg['date'] = self._decode_header(msg_txt.get('Date'))

        if 'Content-Transfer-Encoding' in fields:
            msg['encoding'] = msg_txt.get('Content-Transfer-Encoding')

        if 'References' in fields:
            msg['references'] = msg_txt.get('References')

        if 'In-Reply-To' in fields:
            msg['in-reply-to'] = msg_txt.get('In-Reply-To')

        if not msg_txt.is_multipart() or 'text/plain' in msg.get('Content-Type', ''):
            encoding = msg_txt.get_content_charset()
            body = msg_txt.get_payload(decode=True)

            if 'text/html' in msg_txt.get('Content-Type', ''):
                body = tools.html2plaintext(body)

            msg['body'] = tools.ustr(body, encoding)

        return msg

Пример #2

Показать файл

Файл: mail_mail.py Проект: anilgs/openerp-addons

    def send_get_email_dict(self, cr, uid, mail, partner=None, context=None):
        """ Return a dictionary for specific email values, depending on a
            partner, or generic to the whole recipients given by mail.email_to.

            :param browse_record mail: mail.mail browse_record
            :param browse_record partner: specific recipient partner
        """
        body = self.send_get_mail_body(cr, uid, mail, partner=partner, context=context)
        subject = self.send_get_mail_subject(cr, uid, mail, partner=partner, context=context)
        body_alternative = tools.html2plaintext(body)
        email_to = [partner.email] if partner else tools.email_split(mail.email_to)
        return {"body": body, "body_alternative": body_alternative, "subject": subject, "email_to": email_to}

Пример #3

Показать файл

Файл: crm_helpdesk.py Проект: ShantiSR/openerp-addons

 def message_new(self, cr, uid, msg, custom_values=None, context=None):
     """ Overrides mail_thread message_new that is called by the mailgateway
         through message_process.
         This override updates the document according to the email.
     """
     if custom_values is None: custom_values = {}
     desc = html2plaintext(msg.get('body')) if msg.get('body') else ''
     custom_values.update({
         'name': msg.get('subject') or _("No Subject"),
         'description': desc,
         'email_from': msg.get('from'),
         'email_cc': msg.get('cc'),
         'user_id': False,
     })
     return super(crm_helpdesk,self).message_new(cr, uid, msg, custom_values=custom_values, context=context)

Пример #4

Показать файл

Файл: project_issue.py Проект: anilgs/openerp-addons

    def message_new(self, cr, uid, msg, custom_values=None, context=None):
        """ Overrides mail_thread message_new that is called by the mailgateway
            through message_process.
            This override updates the document according to the email.
        """
        if custom_values is None: custom_values = {}
        if context is None: context = {}
        context['state_to'] = 'draft'

        desc = html2plaintext(msg.get('body')) if msg.get('body') else ''

        custom_values.update({
            'name':  msg.get('subject') or _("No Subject"),
            'description': desc,
            'email_from': msg.get('from'),
            'email_cc': msg.get('cc'),
            'user_id': False,
        })
        if  msg.get('priority'):
            custom_values['priority'] =  msg.get('priority')

        res_id = super(project_issue, self).message_new(cr, uid, msg, custom_values=custom_values, context=context)
        # self.convert_to_bug(cr, uid, [res_id], context=context)
        return res_id

Пример #5

Показать файл

Файл: PSRDistance.py Проект: danielemichilli/pulsar_tools

def psrDist(psr):
    name = psr.split()
    if len(name) == 2:
        if name[0] == 'PSR':
            psr = name[1]
        else:
            raise 'Can only deal with radio pulsar.'
    if PSRDict.has_key(psr):
        url = 'https://www.nrl.navy.mil/rsd/RORF/ne2001/pulsar.cgi'
        values = {'pulsar': PSRDict[psr], "frequency": 1.}
        data = urllib.urlencode(values)
        req = urllib2.Request(url, data)
        gcontext = ssl.SSLContext(ssl.PROTOCOL_TLSv1)  # Only for gangstars
        response = urllib2.urlopen(req, context=gcontext)
        the_page = response.read()
        text = html2plaintext(the_page, encoding='UTF-8')
        p = re.compile('D = (.+) kpc\\n\((.+),(.+)\)', re.U)
        m = p.search(text)
        dist = [float(D) for D in m.groups(0)]
        return (dist[0], dist[1] - dist[0], dist[2] - dist[0])
    else:
        #raise "%s's information is not available" % psr
        dist = Qatnf(psr, ['Dist'])['Dist']
        return (float(dist[0]), dist[1])

Пример #6

Показать файл

Файл: PSRDistance.py Проект: zhuww/psrtools

def psrDist(psr):
    name = psr.split()
    if len(name) == 2:
        if name[0] == "PSR":
            psr = name[1]
        else:
            raise "Can only deal with radio pulsar."
    if PSRDict.has_key(psr):
        url = "http://rsd-www.nrl.navy.mil/cgi-bin/pulsar.cgi"
        values = {"pulsar": PSRDict[psr], "frequency": 1.0}
        data = urllib.urlencode(values)
        req = urllib2.Request(url, data)
        response = urllib2.urlopen(req)
        the_page = response.read()
        text = html2plaintext(the_page, encoding="UTF-8")
        # print text
        p = re.compile("D = (.+) kpc\\n\((.+),(.+)\)", re.U)
        m = p.search(text)
        dist = [float(D) for D in m.groups(0)]
        return (dist[0], dist[1] - dist[0], dist[2] - dist[0])
    else:
        # raise "%s's information is not available" % psr
        dist = Qatnf(psr, ["Dist"])["Dist"]
        return (float(dist[0]), dist[1])

Пример #7

Показать файл

Файл: partner.py Проект: proxly/ntm_project

    def parse_message(self, message):
        #TOCHECK: put this function in mailgateway module
        if isinstance(message, unicode):
            message = message.encode('utf-8')
        msg_txt = email.message_from_string(message)
        message_id = msg_txt.get('message-id', False)
        msg = {}
        fields = msg_txt.keys()
        msg['id'] = message_id
        msg['message-id'] = message_id
        if 'Subject' in fields:
            msg['subject'] = self._decode_header(msg_txt.get('Subject'))

        if 'Content-Type' in fields:
            msg['content-type'] = msg_txt.get('Content-Type')

        if 'From' in fields:
            msg['from'] = self._decode_header(msg_txt.get('From'))

        if 'To' in fields:
            msg['to'] = self._decode_header(msg_txt.get('To'))
        else:
            msg['to'] = self._decode_header(msg_txt.get('Delivered-To'))

        if 'CC' in fields:
            msg['cc'] = self._decode_header(msg_txt.get('CC'))

        if 'Reply-to' in fields:
            msg['reply'] = self._decode_header(msg_txt.get('Reply-To'))

        if 'Date' in fields:
            date = self._decode_header(msg_txt.get('Date'))
            msg['date'] = dateutil.parser.parse(date).strftime("%Y-%m-%d %H:%M:%S")

        if 'Content-Transfer-Encoding' in fields:
            msg['encoding'] = msg_txt.get('Content-Transfer-Encoding')

        if 'References' in fields:
            msg['references'] = msg_txt.get('References')

        if 'In-Reply-To' in fields:
            msg['in-reply-to'] = msg_txt.get('In-Reply-To')

        if 'X-Priority' in fields:
            msg['priority'] = msg_txt.get('X-Priority', '3 (Normal)').split(' ')[0]

        if not msg_txt.is_multipart() or 'text/plain' in msg.get('Content-Type', ''):
            encoding = msg_txt.get_content_charset()
            body = msg_txt.get_payload(decode=True)
            msg['body'] = tools.ustr(body, encoding)

        attachments = {}
        has_plain_text = False
        if msg_txt.is_multipart() or 'multipart/alternative' in msg.get('content-type', ''):
            body = ""
            for part in msg_txt.walk():
                if part.get_content_maintype() == 'multipart':
                    continue

                encoding = part.get_content_charset()
                filename = part.get_filename()
                if part.get_content_maintype()=='text':
                    content = part.get_payload(decode=True)
                    if filename:
                        attachments[filename] = content
                    elif not has_plain_text:
                        # main content parts should have 'text' maintype
                        # and no filename. we ignore the html part if
                        # there is already a plaintext part without filename,
                        # because presumably these are alternatives.
                        content = tools.ustr(content, encoding)
                        if part.get_content_subtype() == 'html':
                            body = tools.ustr(tools.html2plaintext(content))
                        elif part.get_content_subtype() == 'plain':
                            body = content
                            has_plain_text = True
                elif part.get_content_maintype() in ('application', 'image') \
                        or part.get_content_subtype() in ('octet-stream'):
                    if filename :
                        attachments[filename] = part.get_payload(decode=True)
                    else:
                        res = part.get_payload(decode=True)
                        body += tools.ustr(res, encoding)

            msg['body'] = body
            msg['attachments'] = attachments
        return msg

Пример #8

Показать файл

Файл: mail_gateway.py Проект: lcrdcastro/viaweb

    def process_email(self, cr, uid, model, message, custom_values=None, attach=True, context=None):
        """This function Processes email and create record for given OpenERP model
        @param self: The object pointer
        @param cr: the current row, from the database cursor,
        @param uid: the current user’s ID for security checks,
        @param model: OpenObject Model
        @param message: Email details, passed as a string or an xmlrpclib.Binary
        @param attach: Email attachments
        @param context: A standard dictionary for contextual values"""

        # extract message bytes, we are forced to pass the message as binary because
        # we don't know its encoding until we parse its headers and hence can't
        # convert it to utf-8 for transport between the mailgate script and here.
        if isinstance(message, xmlrpclib.Binary):
            message = str(message.data)

        if context is None:
            context = {}

        if custom_values is None or not isinstance(custom_values, dict):
            custom_values = {}

        model_pool = self.pool.get(model)
        res_id = False

        # Create New Record into particular model
        def create_record(msg):
            att_ids = []
            if hasattr(model_pool, 'message_new'):
                res_id,att_ids = model_pool.message_new(cr, uid, msg, context=context)
                if custom_values:
                    model_pool.write(cr, uid, [res_id], custom_values, context=context)
            else:
                data = {
                    'name': msg.get('subject'),
                    'email_from': msg.get('from'),
                    'email_cc': msg.get('cc'),
                    'user_id': False,
                    'description': msg.get('body'),
                    'state' : 'draft',
                }
                data.update(self.get_partner(cr, uid, msg.get('from'), context=context))
                res_id = model_pool.create(cr, uid, data, context=context)

                if attach:
                    for attachment in msg.get('attachments', []):
                        data_attach = {
                            'name': attachment,
                            'datas': binascii.b2a_base64(str(attachments.get(attachment))),
                            'datas_fname': attachment,
                            'description': 'Mail attachment',
                            'res_model': model,
                            'res_id': res_id,
                        }
                        att_ids.append(self.pool.get('ir.attachment').create(cr, uid, data_attach))

            return res_id, att_ids

        # Warning: message_from_string doesn't always work correctly on unicode,
        # we must use utf-8 strings here :-(
        if isinstance(message, unicode):
            message = message.encode('utf-8')
        msg_txt = email.message_from_string(message)
        message_id = msg_txt.get('message-id', False)
        msg = {}

        if not message_id:
            # Very unusual situation, be we should be fault-tolerant here
            message_id = time.time()
            msg_txt['message-id'] = message_id
            _logger.info('Message without message-id, generating a random one: %s', message_id)

        fields = msg_txt.keys()
        msg['id'] = message_id
        msg['message-id'] = message_id

        if 'Subject' in fields:
            msg['subject'] = self._decode_header(msg_txt.get('Subject'))

        if 'Content-Type' in fields:
            msg['content-type'] = msg_txt.get('Content-Type')

        if 'From' in fields:
            msg['from'] = self._decode_header(msg_txt.get('From'))

        if 'Delivered-To' in fields:
            msg['to'] = self._decode_header(msg_txt.get('Delivered-To'))

        if 'CC' in fields:
            msg['cc'] = self._decode_header(msg_txt.get('CC'))

        if 'Reply-to' in fields:
            msg['reply'] = self._decode_header(msg_txt.get('Reply-To'))

        if 'Date' in fields:
            msg['date'] = self._decode_header(msg_txt.get('Date'))

        if 'Content-Transfer-Encoding' in fields:
            msg['encoding'] = msg_txt.get('Content-Transfer-Encoding')

        if 'References' in fields:
            msg['references'] = msg_txt.get('References')

        if 'In-Reply-To' in fields:
            msg['in-reply-to'] = msg_txt.get('In-Reply-To')

        if 'X-Priority' in fields:
            msg['priority'] = msg_txt.get('X-Priority', '3 (Normal)').split(' ')[0]

        if not msg_txt.is_multipart() or 'text/plain' in msg.get('Content-Type', ''):
            encoding = msg_txt.get_content_charset()
            body = msg_txt.get_payload(decode=True)
            if 'text/html' in msg_txt.get('Content-Type', ''):
                body = tools.html2plaintext(body)
            msg['body'] = tools.ustr(body, encoding)

        attachments = {}
        has_plain_text = False
        if msg_txt.is_multipart() or 'multipart/alternative' in msg.get('content-type', ''):
            body = ""
            for part in msg_txt.walk():
                if part.get_content_maintype() == 'multipart':
                    continue

                encoding = part.get_content_charset()
                filename = part.get_filename()
                if part.get_content_maintype()=='text':
                    content = part.get_payload(decode=True)
                    if filename:
                        attachments[filename] = content
                    elif not has_plain_text:
                        # main content parts should have 'text' maintype
                        # and no filename. we ignore the html part if
                        # there is already a plaintext part without filename,
                        # because presumably these are alternatives.
                        content = tools.ustr(content, encoding)
                        if part.get_content_subtype() == 'html':
                            body = tools.ustr(tools.html2plaintext(content))
                        elif part.get_content_subtype() == 'plain':
                            body = content
                            has_plain_text = True
                elif part.get_content_maintype() in ('application', 'image') \
                        or part.get_content_subtype() in ('octet-stream'):
                    if filename :
                        attachments[filename] = part.get_payload(decode=True)
                    else:
                        res = part.get_payload(decode=True)
                        body += tools.ustr(res, encoding)

            msg['body'] = body
            msg['attachments'] = attachments
        res_ids = []
        attachment_ids = []
        new_res_id = False
        if msg.get('references') or msg.get('in-reply-to'):
            references = msg.get('references') or msg.get('in-reply-to')
            if '\r\n' in references:
                references = references.split('\r\n')
            else:
                references = references.split(' ')
            for ref in references:
                ref = ref.strip()
                res_id = tools.misc.reference_re.search(ref)
                if res_id:
                    res_id = res_id.group(1)
                else:
                    res_id = tools.misc.res_re.search(msg['subject'])
                    if res_id:
                        res_id = res_id.group(1)
                if res_id:
                    res_id = int(res_id)
                    model_pool = self.pool.get(model)
                    if model_pool.exists(cr, uid, res_id):
                        res_ids.append(res_id)
                        if hasattr(model_pool, 'message_update'):
                            model_pool.message_update(cr, uid, [res_id], {}, msg, context=context)
                        else:
                            raise NotImplementedError('model %s does not support updating records, mailgate API method message_update() is missing'%model)

        if not len(res_ids):
            new_res_id, attachment_ids = create_record(msg)
            res_ids = [new_res_id]

        # Store messages
        context.update({'model' : model})
        if hasattr(model_pool, 'history'):
            model_pool.history(cr, uid, res_ids, _('receive'), history=True,
                            subject = msg.get('subject'),
                            email = msg.get('to'),
                            details = msg.get('body'),
                            email_from = msg.get('from'),
                            email_cc = msg.get('cc'),
                            message_id = msg.get('message-id'),
                            references = msg.get('references', False) or msg.get('in-reply-to', False),
                            attach = attachment_ids or attachments.items(),
                            email_date = msg.get('date'),
                            context = context)
        else:
            self.history(cr, uid, model, res_ids, msg, attachment_ids, context=context)
        self.email_forward(cr, uid, model, res_ids, msg_txt)
        return new_res_id

Пример #9

Показать файл

Файл: mail_message.py Проект: zhaohuaw/openerp-ktv

    def parse_message(self, message, save_original=False):
        """Parses a string or email.message.Message representing an
           RFC-2822 email, and returns a generic dict holding the
           message details.

           :param message: the message to parse
           :type message: email.message.Message | string | unicode
           :param bool save_original: whether the returned dict
               should include an ``original`` entry with the base64
               encoded source of the message.
           :rtype: dict
           :return: A dict with the following structure, where each
                    field may not be present if missing in original
                    message::

                    { 'message-id': msg_id,
                      'subject': subject,
                      'from': from,
                      'to': to,
                      'cc': cc,
                      'headers' : { 'X-Mailer': mailer,
                                    #.. all X- headers...
                                  },
                      'subtype': msg_mime_subtype,
                      'body_text': plaintext_body
                      'body_html': html_body,
                      'attachments': [('file1', 'bytes'),
                                       ('file2', 'bytes') }
                       # ...
                       'original': source_of_email,
                    }
        """
        msg_txt = message
        if isinstance(message, str):
            msg_txt = email.message_from_string(message)

        # Warning: message_from_string doesn't always work correctly on unicode,
        # we must use utf-8 strings here :-(
        if isinstance(message, unicode):
            message = message.encode('utf-8')
            msg_txt = email.message_from_string(message)

        message_id = msg_txt.get('message-id', False)
        msg = {}

        if save_original:
            # save original, we need to be able to read the original email sometimes
            msg['original'] = message.as_string() if isinstance(message, Message) \
                                                  else message
            msg['original'] = base64.b64encode(
                msg['original'])  # binary fields are b64

        if not message_id:
            # Very unusual situation, be we should be fault-tolerant here
            message_id = time.time()
            msg_txt['message-id'] = message_id
            _logger.info(
                'Parsing Message without message-id, generating a random one: %s',
                message_id)

        fields = msg_txt.keys()
        msg['id'] = message_id
        msg['message-id'] = message_id

        if 'Subject' in fields:
            msg['subject'] = decode(msg_txt.get('Subject'))

        if 'Content-Type' in fields:
            msg['content-type'] = msg_txt.get('Content-Type')

        if 'From' in fields:
            msg['from'] = decode(msg_txt.get('From') or msg_txt.get_unixfrom())

        if 'To' in fields:
            msg['to'] = decode(msg_txt.get('To'))

        if 'Delivered-To' in fields:
            msg['to'] = decode(msg_txt.get('Delivered-To'))

        if 'CC' in fields:
            msg['cc'] = decode(msg_txt.get('CC'))

        if 'Cc' in fields:
            msg['cc'] = decode(msg_txt.get('Cc'))

        if 'Reply-To' in fields:
            msg['reply'] = decode(msg_txt.get('Reply-To'))

        if 'Date' in fields:
            try:
                date_hdr = decode(msg_txt.get('Date'))
                parsed_date = dateutil.parser.parse(date_hdr, fuzzy=True)
                if parsed_date.utcoffset() is None:
                    # naive datetime, so we arbitrarily decide to make it
                    # UTC, there's no better choice. Should not happen,
                    # as RFC2822 requires timezone offset in Date headers.
                    stored_date = parsed_date.replace(tzinfo=pytz.utc)
                else:
                    stored_date = parsed_date.astimezone(pytz.utc)
            except Exception:
                _logger.warning(
                    'Failed to parse Date header %r in incoming mail '
                    'with message-id %r, assuming current date/time.',
                    msg_txt.get('Date'), message_id)
                stored_date = datetime.datetime.now()

            msg['date'] = stored_date.strftime("%Y-%m-%d %H:%M:%S")

        if 'Content-Transfer-Encoding' in fields:
            msg['encoding'] = msg_txt.get('Content-Transfer-Encoding')

        if 'References' in fields:
            msg['references'] = msg_txt.get('References')

        if 'In-Reply-To' in fields:
            msg['in-reply-to'] = msg_txt.get('In-Reply-To')

        msg['headers'] = {}
        msg['subtype'] = 'plain'
        for item in msg_txt.items():
            if item[0].startswith('X-'):
                msg['headers'].update({item[0]: item[1]})
        if not msg_txt.is_multipart() or 'text/plain' in msg.get(
                'content-type', ''):
            encoding = msg_txt.get_content_charset()
            body = tools.ustr(msg_txt.get_payload(decode=True),
                              encoding,
                              errors='replace')
            if 'text/html' in msg.get('content-type', ''):
                msg['body_html'] = body
                msg['subtype'] = 'html'
                body = tools.html2plaintext(body)
            msg['body_text'] = tools.ustr(body, encoding, errors='replace')

        attachments = []
        if msg_txt.is_multipart() or 'multipart/alternative' in msg.get(
                'content-type', ''):
            body = ""
            if 'multipart/alternative' in msg.get('content-type', ''):
                msg['subtype'] = 'alternative'
            else:
                msg['subtype'] = 'mixed'
            for part in msg_txt.walk():
                if part.get_content_maintype() == 'multipart':
                    continue

                encoding = part.get_content_charset()
                filename = part.get_filename()
                if part.get_content_maintype() == 'text':
                    content = part.get_payload(decode=True)
                    if filename:
                        attachments.append((filename, content))
                    content = tools.ustr(content, encoding, errors='replace')
                    if part.get_content_subtype() == 'html':
                        msg['body_html'] = content
                        msg['subtype'] = 'html'  # html version prevails
                        body = tools.ustr(tools.html2plaintext(content))
                        body = body.replace('&#13;', '')
                    elif part.get_content_subtype() == 'plain':
                        body = content
                elif part.get_content_maintype() in ('application', 'image'):
                    if filename:
                        attachments.append(
                            (filename, part.get_payload(decode=True)))
                    else:
                        res = part.get_payload(decode=True)
                        body += tools.ustr(res, encoding, errors='replace')

            msg['body_text'] = body
        msg['attachments'] = attachments

        # for backwards compatibility:
        msg['body'] = msg['body_text']
        msg['sub_type'] = msg['subtype'] or 'plain'
        return msg

Пример #10

Показать файл

Файл: mail_gateway.py Проект: lbiemans/docker

    def process_email(self,
                      cr,
                      uid,
                      model,
                      message,
                      custom_values=None,
                      attach=True,
                      context=None):
        """This function Processes email and create record for given OpenERP model
        @param self: The object pointer
        @param cr: the current row, from the database cursor,
        @param uid: the current user’s ID for security checks,
        @param model: OpenObject Model
        @param message: Email details, passed as a string or an xmlrpclib.Binary
        @param attach: Email attachments
        @param context: A standard dictionary for contextual values"""

        # extract message bytes, we are forced to pass the message as binary because
        # we don't know its encoding until we parse its headers and hence can't
        # convert it to utf-8 for transport between the mailgate script and here.
        if isinstance(message, xmlrpclib.Binary):
            message = str(message.data)

        if context is None:
            context = {}

        if custom_values is None or not isinstance(custom_values, dict):
            custom_values = {}

        model_pool = self.pool.get(model)
        res_id = False

        # Create New Record into particular model
        def create_record(msg):
            att_ids = []
            if hasattr(model_pool, 'message_new'):
                res_id, att_ids = model_pool.message_new(cr,
                                                         uid,
                                                         msg,
                                                         context=context)
                if custom_values:
                    model_pool.write(cr,
                                     uid, [res_id],
                                     custom_values,
                                     context=context)
            else:
                data = {
                    'name': msg.get('subject'),
                    'email_from': msg.get('from'),
                    'email_cc': msg.get('cc'),
                    'user_id': False,
                    'description': msg.get('body'),
                    'state': 'draft',
                }
                data.update(
                    self.get_partner(cr, uid, msg.get('from'),
                                     context=context))
                res_id = model_pool.create(cr, uid, data, context=context)

                if attach:
                    for attachment in msg.get('attachments', []):
                        data_attach = {
                            'name':
                            attachment,
                            'datas':
                            binascii.b2a_base64(
                                str(attachments.get(attachment))),
                            'datas_fname':
                            attachment,
                            'description':
                            'Mail attachment',
                            'res_model':
                            model,
                            'res_id':
                            res_id,
                        }
                        att_ids.append(
                            self.pool.get('ir.attachment').create(
                                cr, uid, data_attach))

            return res_id, att_ids

        # Warning: message_from_string doesn't always work correctly on unicode,
        # we must use utf-8 strings here :-(
        if isinstance(message, unicode):
            message = message.encode('utf-8')
        msg_txt = email.message_from_string(message)
        message_id = msg_txt.get('message-id', False)
        msg = {}

        if not message_id:
            # Very unusual situation, be we should be fault-tolerant here
            message_id = time.time()
            msg_txt['message-id'] = message_id
            _logger.info(
                'Message without message-id, generating a random one: %s',
                message_id)

        fields = msg_txt.keys()
        msg['id'] = message_id
        msg['message-id'] = message_id

        if 'Subject' in fields:
            msg['subject'] = self._decode_header(msg_txt.get('Subject'))

        if 'Content-Type' in fields:
            msg['content-type'] = msg_txt.get('Content-Type')

        if 'From' in fields:
            msg['from'] = self._decode_header(msg_txt.get('From'))

        if 'Delivered-To' in fields:
            msg['to'] = self._decode_header(msg_txt.get('Delivered-To'))

        if 'CC' in fields:
            msg['cc'] = self._decode_header(msg_txt.get('CC'))

        if 'Reply-to' in fields:
            msg['reply'] = self._decode_header(msg_txt.get('Reply-To'))

        if 'Date' in fields:
            msg['date'] = self._decode_header(msg_txt.get('Date'))

        if 'Content-Transfer-Encoding' in fields:
            msg['encoding'] = msg_txt.get('Content-Transfer-Encoding')

        if 'References' in fields:
            msg['references'] = msg_txt.get('References')

        if 'In-Reply-To' in fields:
            msg['in-reply-to'] = msg_txt.get('In-Reply-To')

        if 'X-Priority' in fields:
            msg['priority'] = msg_txt.get('X-Priority',
                                          '3 (Normal)').split(' ')[0]

        if not msg_txt.is_multipart() or 'text/plain' in msg.get(
                'Content-Type', ''):
            encoding = msg_txt.get_content_charset()
            body = msg_txt.get_payload(decode=True)
            if 'text/html' in msg_txt.get('Content-Type', ''):
                body = tools.html2plaintext(body)
            msg['body'] = tools.ustr(body, encoding)

        attachments = {}
        has_plain_text = False
        if msg_txt.is_multipart() or 'multipart/alternative' in msg.get(
                'content-type', ''):
            body = ""
            for part in msg_txt.walk():
                if part.get_content_maintype() == 'multipart':
                    continue

                encoding = part.get_content_charset()
                filename = part.get_filename()
                if part.get_content_maintype() == 'text':
                    content = part.get_payload(decode=True)
                    if filename:
                        attachments[filename] = content
                    elif not has_plain_text:
                        # main content parts should have 'text' maintype
                        # and no filename. we ignore the html part if
                        # there is already a plaintext part without filename,
                        # because presumably these are alternatives.
                        content = tools.ustr(content, encoding)
                        if part.get_content_subtype() == 'html':
                            body = tools.ustr(tools.html2plaintext(content))
                        elif part.get_content_subtype() == 'plain':
                            body = content
                            has_plain_text = True
                elif part.get_content_maintype() in ('application', 'image'):
                    if filename:
                        attachments[filename] = part.get_payload(decode=True)
                    else:
                        res = part.get_payload(decode=True)
                        body += tools.ustr(res, encoding)

            msg['body'] = body
            msg['attachments'] = attachments
        res_ids = []
        attachment_ids = []
        new_res_id = False
        if msg.get('references') or msg.get('in-reply-to'):
            references = msg.get('references') or msg.get('in-reply-to')
            if '\r\n' in references:
                references = references.split('\r\n')
            else:
                references = references.split(' ')
            for ref in references:
                ref = ref.strip()
                res_id = tools.misc.reference_re.search(ref)
                if res_id:
                    res_id = res_id.group(1)
                else:
                    res_id = tools.misc.res_re.search(msg['subject'])
                    if res_id:
                        res_id = res_id.group(1)
                if res_id:
                    res_id = int(res_id)
                    model_pool = self.pool.get(model)
                    if model_pool.exists(cr, uid, res_id):
                        res_ids.append(res_id)
                        if hasattr(model_pool, 'message_update'):
                            model_pool.message_update(cr,
                                                      uid, [res_id], {},
                                                      msg,
                                                      context=context)
                        else:
                            raise NotImplementedError(
                                'model %s does not support updating records, mailgate API method message_update() is missing'
                                % model)

        if not len(res_ids):
            new_res_id, attachment_ids = create_record(msg)
            res_ids = [new_res_id]

        # Store messages
        context.update({'model': model})
        if hasattr(model_pool, 'history'):
            model_pool.history(cr,
                               uid,
                               res_ids,
                               _('receive'),
                               history=True,
                               subject=msg.get('subject'),
                               email=msg.get('to'),
                               details=msg.get('body'),
                               email_from=msg.get('from'),
                               email_cc=msg.get('cc'),
                               message_id=msg.get('message-id'),
                               references=msg.get('references', False)
                               or msg.get('in-reply-to', False),
                               attach=attachment_ids or attachments.items(),
                               email_date=msg.get('date'),
                               context=context)
        else:
            self.history(cr,
                         uid,
                         model,
                         res_ids,
                         msg,
                         attachment_ids,
                         context=context)
        self.email_forward(cr, uid, model, res_ids, msg_txt)
        return new_res_id

Пример #11

Показать файл

Файл: mail_gateway.py Проект: goldenboy/razvoj

    def process_email(self, cr, uid, model, message, custom_values=None, attach=True, context=None):
        """This function Processes email and create record for given OpenERP model
        @param self: The object pointer
        @param cr: the current row, from the database cursor,
        @param uid: the current user’s ID for security checks,
        @param model: OpenObject Model
        @param message: Email details, passed as a string or an xmlrpclib.Binary
        @param attach: Email attachments
        @param context: A standard dictionary for contextual values"""

        # extract message bytes, we are forced to pass the message as binary because
        # we don't know its encoding until we parse its headers and hence can't
        # convert it to utf-8 for transport between the mailgate script and here.
        if isinstance(message, xmlrpclib.Binary):
            message = str(message.data)

        if context is None:
            context = {}

        if custom_values is None or not isinstance(custom_values, dict):
            custom_values = {}

        model_pool = self.pool.get(model)
        res_id = False

        # Create New Record into particular model
        def create_record(msg):
            att_ids = []
            if hasattr(model_pool, "message_new"):
                res_id = model_pool.message_new(cr, uid, msg, context=context)
                if custom_values:
                    model_pool.write(cr, uid, [res_id], custom_values, context=context)
            else:
                data = {
                    "name": msg.get("subject"),
                    "email_from": msg.get("from"),
                    "email_cc": msg.get("cc"),
                    "user_id": False,
                    "description": msg.get("body"),
                    "state": "draft",
                }
                data.update(self.get_partner(cr, uid, msg.get("from"), context=context))
                res_id = model_pool.create(cr, uid, data, context=context)

                if attach:
                    for attachment in msg.get("attachments", []):
                        data_attach = {
                            "name": attachment,
                            "datas": binascii.b2a_base64(str(attachments.get(attachment))),
                            "datas_fname": attachment,
                            "description": "Mail attachment",
                            "res_model": model,
                            "res_id": res_id,
                        }
                        att_ids.append(self.pool.get("ir.attachment").create(cr, uid, data_attach))

            return res_id, att_ids

        # Warning: message_from_string doesn't always work correctly on unicode,
        # we must use utf-8 strings here :-(
        if isinstance(message, unicode):
            message = message.encode("utf-8")
        msg_txt = email.message_from_string(message)
        message_id = msg_txt.get("message-id", False)
        msg = {}

        if not message_id:
            # Very unusual situation, be we should be fault-tolerant here
            message_id = time.time()
            msg_txt["message-id"] = message_id
            _logger.info("Message without message-id, generating a random one: %s", message_id)

        fields = msg_txt.keys()
        msg["id"] = message_id
        msg["message-id"] = message_id

        if "Subject" in fields:
            msg["subject"] = self._decode_header(msg_txt.get("Subject"))

        if "Content-Type" in fields:
            msg["content-type"] = msg_txt.get("Content-Type")

        if "From" in fields:
            msg["from"] = self._decode_header(msg_txt.get("From"))

        if "Delivered-To" in fields:
            msg["to"] = self._decode_header(msg_txt.get("Delivered-To"))

        if "CC" in fields:
            msg["cc"] = self._decode_header(msg_txt.get("CC"))

        if "Reply-to" in fields:
            msg["reply"] = self._decode_header(msg_txt.get("Reply-To"))

        if "Date" in fields:
            msg["date"] = self._decode_header(msg_txt.get("Date"))

        if "Content-Transfer-Encoding" in fields:
            msg["encoding"] = msg_txt.get("Content-Transfer-Encoding")

        if "References" in fields:
            msg["references"] = msg_txt.get("References")

        if "In-Reply-To" in fields:
            msg["in-reply-to"] = msg_txt.get("In-Reply-To")

        if "X-Priority" in fields:
            msg["priority"] = msg_txt.get("X-Priority", "3 (Normal)").split(" ")[0]

        if not msg_txt.is_multipart() or "text/plain" in msg.get("Content-Type", ""):
            encoding = msg_txt.get_content_charset()
            body = msg_txt.get_payload(decode=True)
            if "text/html" in msg_txt.get("Content-Type", ""):
                body = tools.html2plaintext(body)
            msg["body"] = tools.ustr(body, encoding)

        attachments = {}
        has_plain_text = False
        if msg_txt.is_multipart() or "multipart/alternative" in msg.get("content-type", ""):
            body = ""
            for part in msg_txt.walk():
                if part.get_content_maintype() == "multipart":
                    continue

                encoding = part.get_content_charset()
                filename = part.get_filename()
                if part.get_content_maintype() == "text":
                    content = part.get_payload(decode=True)
                    if filename:
                        attachments[filename] = content
                    elif not has_plain_text:
                        # main content parts should have 'text' maintype
                        # and no filename. we ignore the html part if
                        # there is already a plaintext part without filename,
                        # because presumably these are alternatives.
                        content = tools.ustr(content, encoding)
                        if part.get_content_subtype() == "html":
                            body = tools.ustr(tools.html2plaintext(content))
                        elif part.get_content_subtype() == "plain":
                            body = content
                            has_plain_text = True
                elif part.get_content_maintype() in ("application", "image"):
                    if filename and attach:
                        attachments[filename] = part.get_payload(decode=True)
                    else:
                        res = part.get_payload(decode=True)
                        body += tools.ustr(res, encoding)

            msg["body"] = body
            msg["attachments"] = attachments
        res_ids = []
        attachment_ids = []
        new_res_id = False
        if msg.get("references") or msg.get("in-reply-to"):
            references = msg.get("references") or msg.get("in-reply-to")
            if "\r\n" in references:
                references = references.split("\r\n")
            else:
                references = references.split(" ")
            for ref in references:
                ref = ref.strip()
                res_id = tools.misc.reference_re.search(ref)
                if res_id:
                    res_id = res_id.group(1)
                else:
                    res_id = tools.misc.res_re.search(msg["subject"])
                    if res_id:
                        res_id = res_id.group(1)
                if res_id:
                    res_id = int(res_id)
                    model_pool = self.pool.get(model)
                    if model_pool.exists(cr, uid, res_id):
                        res_ids.append(res_id)
                        if hasattr(model_pool, "message_update"):
                            model_pool.message_update(cr, uid, [res_id], {}, msg, context=context)
                        else:
                            raise NotImplementedError(
                                "model %s does not support updating records, mailgate API method message_update() is missing"
                                % model
                            )

        if not len(res_ids):
            new_res_id, attachment_ids = create_record(msg)
            res_ids = [new_res_id]

        # Store messages
        context.update({"model": model})
        if hasattr(model_pool, "history"):
            model_pool.history(
                cr,
                uid,
                res_ids,
                _("receive"),
                history=True,
                subject=msg.get("subject"),
                email=msg.get("to"),
                details=msg.get("body"),
                email_from=msg.get("from"),
                email_cc=msg.get("cc"),
                message_id=msg.get("message-id"),
                references=msg.get("references", False) or msg.get("in-reply-to", False),
                attach=attachments.items(),
                email_date=msg.get("date"),
                context=context,
            )
        else:
            self.history(cr, uid, model, res_ids, msg, attachment_ids, context=context)
        self.email_forward(cr, uid, model, res_ids, msg_txt)
        return new_res_id

Пример #12

Показать файл

    def parse_message(self, message, save_original=False):
        """Parses a string or email.message.Message representing an
           RFC-2822 email, and returns a generic dict holding the
           message details.

           :param message: the message to parse
           :type message: email.message.Message | string | unicode
           :param bool save_original: whether the returned dict
               should include an ``original`` entry with the base64
               encoded source of the message.
           :rtype: dict
           :return: A dict with the following structure, where each
                    field may not be present if missing in original
                    message::

                    { 'message-id': msg_id,
                      'subject': subject,
                      'from': from,
                      'to': to,
                      'cc': cc,
                      'headers' : { 'X-Mailer': mailer,
                                    #.. all X- headers...
                                  },
                      'subtype': msg_mime_subtype,
                      'body_text': plaintext_body
                      'body_html': html_body,
                      'attachments': [('file1', 'bytes'),
                                       ('file2', 'bytes') }
                       # ...
                       'original': source_of_email,
                    }
        """
        msg_txt = message
        if isinstance(message, str):
            if message.startswith("undefinedFrom -"):
                msg_txt = email.message_from_string(message.replace("undefinedFrom", "undefinedFromPop:", 1))
            else:
                msg_txt = email.message_from_string(message)

        # Warning: message_from_string doesn't always work correctly on unicode,
        # we must use utf-8 strings here :-(
        if isinstance(message, unicode):
            message = message.encode("utf-8")
            msg_txt = email.message_from_string(message)

        message_id = msg_txt.get("message-id", False)

        msg = {}

        if save_original:
            # save original, we need to be able to read the original email sometimes
            msg["original"] = message.as_string() if isinstance(message, Message) else message
            msg["original"] = base64.b64encode(msg["original"])  # binary fields are b64

        if not message_id:
            # Very unusual situation, be we should be fault-tolerant here
            message_id = str(time.time())  # KGB primax problem
            msg_txt["message-id"] = message_id
            _logger.info("Parsing Message without message-id, generating a random one: %s", message_id)

        fields = msg_txt.keys()
        msg["id"] = message_id
        msg["message-id"] = message_id

        if "Subject" in fields:
            msg["subject"] = decode(msg_txt.get("Subject"))

        if "Content-Type" in fields:
            msg["content-type"] = msg_txt.get("Content-Type")

        if "From" in fields:
            msg["from"] = decode(msg_txt.get("From") or msg_txt.get_unixfrom())

        if "To" in fields:
            msg["to"] = decode(msg_txt.get("To"))

        if "Delivered-To" in fields:
            msg["to"] = decode(msg_txt.get("Delivered-To"))

        if "CC" in fields:
            msg["cc"] = decode(msg_txt.get("CC"))

        if "Cc" in fields:
            msg["cc"] = decode(msg_txt.get("Cc"))

        if "Reply-To" in fields:
            msg["reply"] = decode(msg_txt.get("Reply-To"))

        if "Date" in fields:
            try:
                date_hdr = decode(msg_txt.get("Date"))
                parsed_date = dateutil.parser.parse(date_hdr, fuzzy=True)
                if parsed_date.utcoffset() is None:
                    # naive datetime, so we arbitrarily decide to make it
                    # UTC, there's no better choice. Should not happen,
                    # as RFC2822 requires timezone offset in Date headers.
                    stored_date = parsed_date.replace(tzinfo=pytz.utc)
                else:
                    stored_date = parsed_date.astimezone(pytz.utc)
            except Exception:
                _logger.warning(
                    "Failed to parse Date header %r in incoming mail "
                    "with message-id %r, assuming current date/time.",
                    msg_txt.get("Date"),
                    message_id,
                )
                stored_date = datetime.datetime.now()

            msg["date"] = stored_date.strftime("%Y-%m-%d %H:%M:%S")

        if "Content-Transfer-Encoding" in fields:
            msg["encoding"] = msg_txt.get("Content-Transfer-Encoding")

        if "References" in fields:
            msg["references"] = msg_txt.get("References")

        if "In-Reply-To" in fields:
            msg["in-reply-to"] = msg_txt.get("In-Reply-To")

        msg["headers"] = {}
        msg["subtype"] = "plain"
        for item in msg_txt.items():
            if item[0].startswith("X-"):
                msg["headers"].update({item[0]: item[1]})
        if not msg_txt.is_multipart() or "text/plain" in msg.get("content-type", ""):
            encoding = msg_txt.get_content_charset()
            body = tools.ustr(msg_txt.get_payload(decode=True), encoding, errors="replace")
            if "text/html" in msg.get("content-type", ""):
                msg["body_html"] = body
                msg["subtype"] = "html"
                body = tools.html2plaintext(body)
            msg["body_text"] = tools.ustr(body, encoding, errors="replace")

        attachments = []
        if msg_txt.is_multipart() or "multipart/alternative" in msg.get("content-type", ""):
            body = ""
            if "multipart/alternative" in msg.get("content-type", ""):
                msg["subtype"] = "alternative"
            else:
                msg["subtype"] = "mixed"
            for part in msg_txt.walk():
                if part.get_content_maintype() == "multipart":
                    continue

                encoding = part.get_content_charset()
                filename = part.get_filename()
                if part.get_content_maintype() == "text":
                    content = part.get_payload(decode=True)
                    if filename:
                        attachments.append((filename, content))
                    content = tools.ustr(content, encoding, errors="replace")
                    if part.get_content_subtype() == "html":
                        msg["body_html"] = content
                        msg["subtype"] = "html"  # html version prevails
                        body = tools.ustr(tools.html2plaintext(content))
                        body = body.replace("&#13;", "")
                    elif part.get_content_subtype() == "plain":
                        body = content
                elif part.get_content_maintype() in ("application", "image"):
                    if filename:
                        attachments.append((filename, part.get_payload(decode=True)))
                    else:
                        res = part.get_payload(decode=True)
                        body += tools.ustr(res, encoding, errors="replace")

            msg["body_text"] = body
        msg["attachments"] = attachments

        # for backwards compatibility:
        msg["body"] = msg["body_text"]
        msg["sub_type"] = msg["subtype"] or "plain"
        return msg

Пример #13

Показать файл

Файл: mail_message.py Проект: iw3hxn/addons

    def parse_message(self, message, save_original=False):
        """Parses a string or email.message.Message representing an
           RFC-2822 email, and returns a generic dict holding the
           message details.

           :param message: the message to parse
           :type message: email.message.Message | string | unicode
           :param bool save_original: whether the returned dict
               should include an ``original`` entry with the base64
               encoded source of the message.
           :rtype: dict
           :return: A dict with the following structure, where each
                    field may not be present if missing in original
                    message::

                    { 'message-id': msg_id,
                      'subject': subject,
                      'from': from,
                      'to': to,
                      'cc': cc,
                      'headers' : { 'X-Mailer': mailer,
                                    #.. all X- headers...
                                  },
                      'subtype': msg_mime_subtype,
                      'body_text': plaintext_body
                      'body_html': html_body,
                      'attachments': [('file1', 'bytes'),
                                       ('file2', 'bytes') }
                       # ...
                       'original': source_of_email,
                    }
        """

        def unique_filename(attachments, file_name):
            if attachments:
                attachments_dict = dict(attachments)

                if '.' in file_name:
                    name, extension = file_name.rsplit('.', 1)
                else:
                    name = file_name
                    extension = ''

                new_filename = name + '.' + extension

                count = 0

                while new_filename in attachments_dict:
                    count += 1
                    new_filename = "{name}-{count}.{extension}".format(name=name, count=count, extension=extension)
                return new_filename
            else:
                return file_name

        msg_txt = message
        if isinstance(message, str):
            msg_txt = email.message_from_string(message)

        # Warning: message_from_string doesn't always work correctly on unicode,
        # we must use utf-8 strings here :-(
        if isinstance(message, unicode):
            message = message.encode('utf-8')
            msg_txt = email.message_from_string(message)

        message_id = msg_txt.get('message-id', False)
        msg = {}

        if save_original:
            # save original, we need to be able to read the original email sometimes
            msg['original'] = message.as_string() if isinstance(message, Message) \
                else message
            msg['original'] = base64.b64encode(msg['original'])  # binary fields are b64

        if not message_id:
            # Very unusual situation, be we should be fault-tolerant here
            message_id = time.time()
            msg_txt['message-id'] = message_id
            _logger.info('Parsing Message without message-id, generating a random one: %s', message_id)

        fields = msg_txt.keys()
        msg['id'] = message_id
        msg['message-id'] = message_id

        if 'Subject' in fields:
            msg['subject'] = decode(msg_txt.get('Subject'))

        if 'Content-Type' in fields:
            msg['content-type'] = msg_txt.get('Content-Type')

        if 'From' in fields:
            msg['from'] = decode(msg_txt.get('From') or msg_txt.get_unixfrom())

        if 'To' in fields:
            msg['to'] = decode(msg_txt.get('To'))

        if 'Delivered-To' in fields:
            msg['to'] = decode(msg_txt.get('Delivered-To'))

        if 'CC' in fields:
            msg['cc'] = decode(msg_txt.get('CC'))

        if 'Cc' in fields:
            msg['cc'] = decode(msg_txt.get('Cc'))

        if 'Reply-To' in fields:
            msg['reply'] = decode(msg_txt.get('Reply-To'))

        if 'Date' in fields:
            try:
                date_hdr = decode(msg_txt.get('Date'))
                parsed_date = dateutil.parser.parse(date_hdr, fuzzy=True)
                if parsed_date.utcoffset() is None:
                    # naive datetime, so we arbitrarily decide to make it
                    # UTC, there's no better choice. Should not happen,
                    # as RFC2822 requires timezone offset in Date headers.
                    stored_date = parsed_date.replace(tzinfo=pytz.utc)
                else:
                    stored_date = parsed_date.astimezone(pytz.utc)
            except Exception:
                _logger.warning('Failed to parse Date header %r in incoming mail '
                                'with message-id %r, assuming current date/time.',
                                msg_txt.get('Date'), message_id)
                stored_date = datetime.datetime.now()

            msg['date'] = stored_date.strftime("%Y-%m-%d %H:%M:%S")

        if 'Content-Transfer-Encoding' in fields:
            msg['encoding'] = msg_txt.get('Content-Transfer-Encoding')

        if 'References' in fields:
            msg['references'] = msg_txt.get('References')

        if 'In-Reply-To' in fields:
            msg['in-reply-to'] = msg_txt.get('In-Reply-To')

        msg['headers'] = {}
        msg['subtype'] = 'plain'
        for item in msg_txt.items():
            if item[0].startswith('X-'):
                msg['headers'].update({item[0]: item[1]})
        if not msg_txt.is_multipart() or 'text/plain' in msg.get('content-type', ''):
            encoding = msg_txt.get_content_charset()
            body = tools.ustr(msg_txt.get_payload(decode=True), encoding, errors='replace')
            if 'text/html' in msg.get('content-type', ''):
                msg['body_html'] = body
                msg['subtype'] = 'html'
                body = tools.html2plaintext(body)
            msg['body_text'] = tools.ustr(body, encoding, errors='replace')

        attachments = []
        if msg_txt.is_multipart() or 'multipart/alternative' in msg.get('content-type', ''):
            body = ""
            if 'multipart/alternative' in msg.get('content-type', ''):
                msg['subtype'] = 'alternative'
            else:
                msg['subtype'] = 'mixed'
            for part in msg_txt.walk():
                if part.get_content_maintype() == 'multipart':
                    continue

                encoding = part.get_content_charset()
                filename = part.get_filename()
                if part.get_content_maintype() == 'text':
                    content = part.get_payload(decode=True)
                    if filename:
                        attachments.append((unique_filename(attachments, filename), content))
                    content = tools.ustr(content, encoding, errors='replace')
                    if part.get_content_subtype() == 'html':
                        msg['body_html'] = content
                        msg['subtype'] = 'html'  # html version prevails
                        body = tools.ustr(tools.html2plaintext(content))
                        body = body.replace('&#13;', '')
                    elif part.get_content_subtype() == 'plain':
                        body = content
                elif part.get_content_maintype() in ('application', 'image'):
                    if filename:
                        attachments.append((unique_filename(attachments, filename), part.get_payload(decode=True)))
                    else:
                        res = part.get_payload(decode=True)
                        body += tools.ustr(res, encoding, errors='replace')

            msg['body_text'] = body
        msg['attachments'] = attachments

        # for backwards compatibility:
        msg['body'] = msg['body_text']
        msg['sub_type'] = msg['subtype'] or 'plain'
        return msg

Пример #14

Показать файл

def Qatnf(psr, Query=('RaJ', 'DecJ')):
    if psr == 'all': psr = ''
    name = psr.split()
    if len(name) == 2:
        if name[0] == 'PSR':
            psr = name[1]
    try:
        #for i in [1]:
        #Try query the webserver
        url = 'http://www.atnf.csiro.au/research/pulsar/psrcat/proc_form.php'
        data = {}
        if isinstance(Query, (basestring)):
            Query = [Query]
        for Q in Query:
            data[Q] = Q
        url_values = urllib.urlencode(data)
        data = {
            'ephemeris': 'short',
            'startUserDefined': 'true',
            'style': 'Publication quality',
            'sort_attr': 'jname',
            'sort_order': 'asc'
        }
        url_values += '&' + urllib.urlencode(data)
        data = {'pulsar_names': psr}
        url_values += '&' + urllib.urlencode(data)
        data = {
            'x_axis': '',
            'x_scale': 'linear',
            'y_axis': '',
            'y_scale': 'linear',
            'no_value': 'None',
            'coords_unit': 'raj/decj',
            'radius': '',
            'coords_1': '',
            'coords_2': '',
            'fsize': '3'
        }
        url_values += '&' + urllib.urlencode(data)
        data = {'state': 'query'}
        url_values += '&' + urllib.urlencode(data)
        data = {'table_bottom.x': '100', 'table_bottom.y': '100'}
        url_values += '&' + urllib.urlencode(data)
        full_url = url + '?' + url_values
        order = []
        for key in Query:
            idx = url_values.find(key)
            order.append((idx, key))
        order.sort()
        keylist = [x[1] for x in order]
        the_page = urllib2.urlopen(full_url).read()
        text = html2plaintext(the_page, encoding='UTF-8')
        #print text
        Nkey = len(keylist)
        #print Nkey
        #p = re.compile('(?P<Number>[ \t\n\r\f\v]\d+|\d+)(?P<line>(\s+(?P<value>-*\+*(\d{2,2}\:)*(\d+\.*\d*(e-*\+*\d+)*)|NONE|[JB]\d{4}[-\+]\d{2,4})(\s+(?P<uncertainty>\(\d{1,2}\))){0,1}(\s+(?P<ref>[a-z]+\+*\d{2}|\[\d+\]))*){1,%d})' % (Nkey), re.U)
        p = re.compile(
            '(?P<Number>\d+)(?P<line>(\s+(?P<value>-*\+*(\d{2,2}\:)*(\d+\.*\d*(e-*\+*\d+)*)|NONE|([JB]\d{4}[-\+]\d{2,4}))(\s+(?P<uncertainty>\(\d{1,2}\))){0,1}(\s+(?P<ref>[a-z]+\+*\d+|\[\d+\]))*){1,%d})'
            % (Nkey), re.U)
        pattern = re.compile(
            '(?P<number>(?P<value>-*\+*(\d{2,2}\:)*\d+(\.\d+){0,1}(e-*\+*\d+){0,1}|NONE|[JB]\d{4}[-\+]\d{2,4})(\s+(?P<uncertainty>\(\d{1,2}\))){0,1}(\s+(?P<ref>[a-z]+\+{0,1}\d+|\[\d+\]))*)'
        )

        #finally:
        if not psr == '':
            m = p.search(text)
            line = m.group('line')
            #print 'line: \n',line
            results = _retrieve(line, keylist, pattern)
            return results
        else:
            results = []
            for m in p.finditer(text):
                line = m.group('line')
                #print m.group()
                #print 'line %s: \n%s' % (m.group('Number'),line)
                results.append(_retrieve(line, keylist, pattern))
            return results

    except:
        print 'web server query failed, try local database.\n'
        return LQatnf(psr, Query)

Пример #15

Показать файл

    def parse_message(self, message):
        #TOCHECK: put this function in mailgateway module
        if isinstance(message, unicode):
            message = message.encode('utf-8')
        msg_txt = email.message_from_string(message)
        message_id = msg_txt.get('message-id', False)
        msg = {}
        fields = msg_txt.keys()
        msg['id'] = message_id
        msg['message-id'] = message_id
        if 'Subject' in fields:
            msg['subject'] = self._decode_header(msg_txt.get('Subject'))

        if 'Content-Type' in fields:
            msg['content-type'] = msg_txt.get('Content-Type')

        if 'From' in fields:
            msg['from'] = self._decode_header(msg_txt.get('From'))

        if 'Delivered-To' in fields:
            msg['to'] = self._decode_header(msg_txt.get('Delivered-To'))

        if 'CC' in fields:
            msg['cc'] = self._decode_header(msg_txt.get('CC'))

        if 'Reply-to' in fields:
            msg['reply'] = self._decode_header(msg_txt.get('Reply-To'))

        if 'Date' in fields:
            msg['date'] = self._decode_header(msg_txt.get('Date'))

        if 'Content-Transfer-Encoding' in fields:
            msg['encoding'] = msg_txt.get('Content-Transfer-Encoding')

        if 'References' in fields:
            msg['references'] = msg_txt.get('References')

        if 'In-Reply-To' in fields:
            msg['in-reply-to'] = msg_txt.get('In-Reply-To')

        if 'X-Priority' in fields:
            msg['priority'] = msg_txt.get('X-Priority', '3 (Normal)').split(' ')[0]

        if not msg_txt.is_multipart() or 'text/plain' in msg.get('Content-Type', ''):
            encoding = msg_txt.get_content_charset()
            body = msg_txt.get_payload(decode=True)
            msg['body'] = tools.ustr(body, encoding)

        attachments = {}
        has_plain_text = False
        if msg_txt.is_multipart() or 'multipart/alternative' in msg.get('content-type', ''):
            body = ""
            for part in msg_txt.walk():
                if part.get_content_maintype() == 'multipart':
                    continue

                encoding = part.get_content_charset()
                filename = part.get_filename()
                if part.get_content_maintype()=='text':
                    content = part.get_payload(decode=True)
                    if filename:
                        attachments[filename] = content
                    elif not has_plain_text:
                        # main content parts should have 'text' maintype
                        # and no filename. we ignore the html part if
                        # there is already a plaintext part without filename,
                        # because presumably these are alternatives.
                        content = tools.ustr(content, encoding)
                        if part.get_content_subtype() == 'html':
                            body = tools.ustr(tools.html2plaintext(content))
                        elif part.get_content_subtype() == 'plain':
                            body = content
                            has_plain_text = True
                elif part.get_content_maintype() in ('application', 'image'):
                    if filename :
                        attachments[filename] = part.get_payload(decode=True)
                    else:
                        res = part.get_payload(decode=True)
                        body += tools.ustr(res, encoding)

            msg['body'] = body
            msg['attachments'] = attachments
        return msg

Пример #16

Показать файл

Файл: mail_message.py Проект: CloudWareChile/OpenChile

    def parse_message(self, message, save_original=False):
        """Parses a string or email.message.Message representing an
           RFC-2822 email, and returns a generic dict holding the
           message details.

           :param message: the message to parse
           :type message: email.message.Message | string | unicode
           :param bool save_original: whether the returned dict
               should include an ``original`` entry with the base64
               encoded source of the message.
           :rtype: dict
           :return: A dict with the following structure, where each
                    field may not be present if missing in original
                    message::

                    { 'message-id': msg_id,
                      'subject': subject,
                      'from': from,
                      'to': to,
                      'cc': cc,
                      'headers' : { 'X-Mailer': mailer,
                                    #.. all X- headers...
                                  },
                      'subtype': msg_mime_subtype,
                      'body_text': plaintext_body
                      'body_html': html_body,
                      'attachments': [('file1', 'bytes'),
                                       ('file2', 'bytes') }
                       # ...
                       'original': source_of_email,
                    }
        """
        msg_txt = message
        if isinstance(message, str):
            msg_txt = email.message_from_string(message)

        # Warning: message_from_string doesn't always work correctly on unicode,
        # we must use utf-8 strings here :-(
        if isinstance(message, unicode):
            message = message.encode('utf-8')
            msg_txt = email.message_from_string(message)

        message_id = msg_txt.get('message-id', False)
        msg = {}

        if save_original:
            # save original, we need to be able to read the original email sometimes
            msg['original'] = message.as_string() if isinstance(message, Message) \
                                                  else message
            msg['original'] = base64.b64encode(msg['original']) # binary fields are b64

        if not message_id:
            # Very unusual situation, be we should be fault-tolerant here
            message_id = time.time()
            msg_txt['message-id'] = message_id
            _logger.info('Parsing Message without message-id, generating a random one: %s', message_id)

        fields = msg_txt.keys()
        msg['id'] = message_id
        msg['message-id'] = message_id

        if 'Subject' in fields:
            msg['subject'] = decode(msg_txt.get('Subject'))

        if 'Content-Type' in fields:
            msg['content-type'] = msg_txt.get('Content-Type')

        if 'From' in fields:
            msg['from'] = decode(msg_txt.get('From') or msg_txt.get_unixfrom())

        if 'To' in fields:
            msg['to'] = decode(msg_txt.get('To'))

        if 'Delivered-To' in fields:
            msg['to'] = decode(msg_txt.get('Delivered-To'))

        if 'CC' in fields:
            msg['cc'] = decode(msg_txt.get('CC'))

        if 'Cc' in fields:
            msg['cc'] = decode(msg_txt.get('Cc'))

        if 'Reply-To' in fields:
            msg['reply'] = decode(msg_txt.get('Reply-To'))

        if 'Date' in fields:
            date_hdr = decode(msg_txt.get('Date'))
            msg['date'] = dateutil.parser.parse(date_hdr).strftime("%Y-%m-%d %H:%M:%S")

        if 'Content-Transfer-Encoding' in fields:
            msg['encoding'] = msg_txt.get('Content-Transfer-Encoding')

        if 'References' in fields:
            msg['references'] = msg_txt.get('References')

        if 'In-Reply-To' in fields:
            msg['in-reply-to'] = msg_txt.get('In-Reply-To')

        msg['headers'] = {}
        msg['subtype'] = 'plain'
        for item in msg_txt.items():
            if item[0].startswith('X-'):
                msg['headers'].update({item[0]: item[1]})
        if not msg_txt.is_multipart() or 'text/plain' in msg.get('content-type', ''):
            encoding = msg_txt.get_content_charset()
            body = msg_txt.get_payload(decode=True)
            if 'text/html' in msg.get('content-type', ''):
                msg['body_html'] =  body
                msg['subtype'] = 'html'
                body = tools.html2plaintext(body)
            msg['body_text'] = tools.ustr(body, encoding)

        attachments = []
        if msg_txt.is_multipart() or 'multipart/alternative' in msg.get('content-type', ''):
            body = ""
            if 'multipart/alternative' in msg.get('content-type', ''):
                msg['subtype'] = 'alternative'
            else:
                msg['subtype'] = 'mixed'
            for part in msg_txt.walk():
                if part.get_content_maintype() == 'multipart':
                    continue

                encoding = part.get_content_charset()
                filename = part.get_filename()
                if part.get_content_maintype()=='text':
                    content = part.get_payload(decode=True)
                    if filename:
                        attachments.append((filename, content))
                    content = tools.ustr(content, encoding)
                    if part.get_content_subtype() == 'html':
                        msg['body_html'] = content
                        msg['subtype'] = 'html' # html version prevails
                        body = tools.ustr(tools.html2plaintext(content))
                    elif part.get_content_subtype() == 'plain':
                        body = content
                elif part.get_content_maintype() in ('application', 'image'):
                    if filename :
                        attachments.append((filename,part.get_payload(decode=True)))
                    else:
                        res = part.get_payload(decode=True)
                        body += tools.ustr(res, encoding)

            msg['body_text'] = body
        msg['attachments'] = attachments

        # for backwards compatibility:
        msg['body'] = msg['body_text']
        msg['sub_type'] = msg['subtype'] or 'plain'
        return msg

Пример #17

Показать файл

Файл: PyATNF.py Проект: zhuww/psrtools

def Qatnf(psr, Query=('RaJ', 'DecJ'), condition=None):
    if psr == 'all':psr=''
    name = psr.split()
    if len(name) == 2:
        if name[0] == 'PSR':
            psr = name[1]
    #try:
    #for i in [1]:
        #Try query the webserver
    url = 'http://www.atnf.csiro.au/research/pulsar/psrcat/proc_form.php'
    data = {}
    if isinstance(Query, (basestring)):
        Query = [Query]
    for Q in Query:
        data[Q]=Q
    url_values = urllib.urlencode(data)
    #data = {'ephemeris':'short', 'startUserDefined':'true','style':'Publication quality','sort_attr':'jname','sort_order':'asc'}
    #data = {'ephemeris':'short', 'startUserDefined':'true','style':'Long with last digit error','sort_attr':'jname','sort_order':'asc'}
    data = {'ephemeris':'short', 'startUserDefined':'true','style':'Short without erros','sort_attr':'jname','sort_order':'asc'}
    url_values += '&'+urllib.urlencode(data)
    if condition:
        data = {'pulsar_names':psr, 'condition':condition}
    else:
        data = {'pulsar_names':psr}
    url_values += '&'+urllib.urlencode(data)
    data = {'x_axis':'','x_scale':'linear','y_axis':'','y_scale':'linear', 
        'no_value':'None','coords_unit':'raj/decj','radius':'','coords_1':'',
        'coords_2':'','fsize':'3' }
    url_values += '&'+urllib.urlencode(data)
    data = {'state':'query'}
    url_values += '&'+urllib.urlencode(data)
    data = {'table_bottom.x':'100', 'table_bottom.y':'100'}
    url_values += '&'+urllib.urlencode(data)
    full_url = url + '?' + url_values
    order = []
    for key in Query:
        idx = url_values.find(key)
        order.append((idx, key))
    order.sort()
    keylist = [x[1] for x in order]
    the_page = urllib2.urlopen(full_url).read()
    #print the_page
    text = html2plaintext(the_page, encoding='UTF-8')
    '''filer the text for artifacts [??]'''
    artifacts = re.compile('(\[\d+\])', re.VERBOSE)
    text = artifacts.sub('', text)
    text = text.encode('ascii', 'ignore')
    #print text
    #print '\n'.join([x for x in text.split('\n') if not x == '' and not x.startswith(' http')][20:22])
    #print '\n'.join(the_page.split('\n')[93:95])
    #print '\n'.join([x for x in text.split('\n') if not x == '' and not x.startswith(' http')][22:-22])
    text = '\n'.join([x for x in text.split('\n') if not x == '' and not x.startswith(' http')][22:-22])+'\n'
    Nkey = len(keylist)
    #print Nkey
    #p = re.compile(r"""^(?P<Number>\d+)
            #(?P<line>(\s+
            #(?P<value>([-\+]{0,1}\d{2}\:\d{2}\:\d{2}\.\d+)|(\d+\.\d*(e-*\+*\d+)*)|NONE|\*|([JB]\d{4}[-\+]\d{2,4})|((AXP|HE|NRAD|XDINS|RRAT)(\,(AXP|HE|NRAD|XDINS|RRAT)){0,1}))
            #(\s+(?P<uncertainty>\d{1,2})\s+){0,1}
            #(?P<ref>\s*\[*[a-z]+\+{0,1}\d{2}\]*){0,1}
            #){%d,%d})\s+\n""" % (Nkey,Nkey), re.VERBOSE)
    p = re.compile(r"""(?P<Number>\d+)\s+
            (?P<line>(.*))\n""" , re.VERBOSE)
    pattern = re.compile(r"""
            (?<=\s)(?P<value>((-*\+*(\d{2}\:){1,2}){0,1}\d+(\.\d*){0,1}([eE]-*\+*\d+){0,1})|NONE|\*|([JB]\d{4}[-\+]\d{2,5}([A-Z]|-\d|[a-z]{1,2}){0,1})|(AXP|HE|NRAD|XDINS|XINS|RRAT)(\,(AXP|HE|NRAD|XDINS|XINS|RRAT)){0,1})
            ((\s+)(?P<uncertainty>\d+)(?=\s)){0,1}
            ((\s*|\[)(?P<ref>(([a-z]{2,4}\+{0,1}\d{2})(\,\s+([a-z]+\+{0,1}\d{2}))*)|(?<=\s)\*)\]*){0,1}(\s%){0,1}
            """, re.VERBOSE)
            #(?<=[\s|\[])((?P<ref>(((\w{2,4}\+{0,1})(?=\d{2}))|\*))\]*){0,1}\s%{0,1}
            #((\s+|\[)(?P<ref>(([a-z]{2,4}\+{0,1}(?=\d{2}))(\,\s+([a-z]+\+{0,1}\d{2}))*)|\*)\]*){0,1}\s%{0,1}
            #((\s*|\[)(?P<ref>(([a-z]+\+{0,1}\d{2})(\,\s+([a-z]+\+{0,1}\d{2}))*)|(?<=\s)\*)\]*)(\s%){0,1}

    #finally:
    #print keylist
    #print text
    if not psr == '':
        m = p.search(text)
        line = ' '+m.group('line')
        #print m.group('Number'),' :', line
        results = _retrieve(line, keylist, pattern)
        return results
    else:
        results = []
        i = 0
        for m in p.finditer(text):
            try:
                line = m.group('line')
                info = _retrieve(line, keylist, pattern)
                if not info == None:
                    results.append(info)
                    i+=1
            except:
                #print i
                print 'line %s: \n%s' % (m.group('Number'),line)

        return results