Python parse_mail примеры, patchwork.parser.parse_mail Python примеры использования

Пример #1

0

Показать файл

Файл: parsemail.py Проект: seanfarley/patchwork

    def handle(self, *args, **options):
        infile = args[0] if args else options['infile']

        if infile:
            logger.info('Parsing mail loaded by filename')
            if six.PY3:
                with open(infile, 'rb') as file_:
                    mail = email.message_from_binary_file(file_)
            else:
                with open(infile) as file_:
                    mail = email.message_from_file(file_)
        else:
            logger.info('Parsing mail loaded from stdin')
            if six.PY3:
                mail = email.message_from_binary_file(sys.stdin.buffer)
            else:
                mail = email.message_from_file(sys.stdin)
        try:
            result = parse_mail(mail, options['list_id'])
            if result:
                sys.exit(0)
            logger.warning('Failed to parse mail')
            sys.exit(1)
        except Exception:
            logger.exception('Error when parsing incoming email',
                             extra={'mail': mail.as_string()})

Пример #2

0

Показать файл

Файл: parsearchive.py Проект: wengpingbo/patchwork

def parse_mbox(path, list_id):
    results = {
        models.Patch: 0,
        models.CoverLetter: 0,
        models.Comment: 0,
    }
    duplicates = 0
    dropped = 0

    mbox = mailbox.mbox(path)
    for msg in mbox:
        try:
            obj = parse_mail(msg, list_id)
            if obj:
                results[type(obj)] += 1
            else:
                dropped += 1
        except django.db.utils.IntegrityError:
            duplicates += 1
    print('Processed %(total)d messages -->\n'
          '  %(covers)4d cover letters\n'
          '  %(patches)4d patches\n'
          '  %(comments)4d comments\n'
          '  %(duplicates)4d duplicates\n'
          '  %(dropped)4d dropped\n'
          'Total: %(new)s new entries' % {
              'total': len(mbox),
              'covers': results[models.CoverLetter],
              'patches': results[models.Patch],
              'comments': results[models.Comment],
              'duplicates': duplicates,
              'dropped': dropped,
              'new': len(mbox) - duplicates - dropped,
          })

Пример #3

0

Показать файл

Файл: parsemail.py Проект: getpatchwork/patchwork

    def handle(self, *args, **options):
        infile = args[0] if args else options['infile']

        if infile:
            logger.info('Parsing mail loaded by filename')
            if six.PY3:
                with open(infile, 'rb') as file_:
                    mail = email.message_from_binary_file(file_)
            else:
                with open(infile) as file_:
                    mail = email.message_from_file(file_)
        else:
            logger.info('Parsing mail loaded from stdin')
            if six.PY3:
                mail = email.message_from_binary_file(sys.stdin.buffer)
            else:
                mail = email.message_from_file(sys.stdin)
        try:
            result = parse_mail(mail, options['list_id'])
            if result:
                sys.exit(0)
            logger.warning('Failed to parse mail')
            sys.exit(1)
        except Exception:
            logger.exception('Error when parsing incoming email',
                             extra={'mail': mail.as_string()})

Пример #4

0

Показать файл

    def handle(self, *args, **options):
        infile = args[0] if args else options['infile']

        try:
            if infile:
                logger.info('Parsing mail loaded by filename')
                with open(infile, 'rb') as file_:
                    mail = email.message_from_binary_file(file_)
            else:
                logger.info('Parsing mail loaded from stdin')
                mail = email.message_from_binary_file(sys.stdin.buffer)
        except AttributeError:
            logger.warning("Broken email ignored")
            return

        # it's important to get exit codes correct here. The key is to allow
        # proper separation of real errors vs expected 'failures'.
        #
        # patch/comment parsed:        0
        # no parseable content found:  0
        # duplicate messages:          0
        # db integrity/other db error: 1
        # broken email (ValueError):   1 (this could be noisy, if it's an issue
        #                                 we could use a different return code)
        try:
            result = parse_mail(mail, options['list_id'])
            if result is None:
                logger.warning('Nothing added to database')
        except DuplicateMailError as exc:
            logger.warning('Duplicate mail for message ID %s', exc.msgid)
        except (ValueError, Exception) as exc:
            logger.exception('Error when parsing incoming email: %s',
                             repr(exc),
                             extra={'mail': mail.as_string()})
            sys.exit(1)

Пример #5

0

Показать файл

Файл: gitlab2email.py Проект: jeremycline/patchlab

def _record_bridging(listid: str, merge_id: int, email: EmailMessage) -> None:
    """
    Create the Patchwork submission records. This would happen when the mail
    hit the mailing list, but doing so now lets us associate them with a
    BridgedSubmission so we can post follow-up comments.

    Raises:
        ValueError: If the emails cannot be parsed by patchwork or is a duplicate.
        Submission.DoesNotExist: If the Submission object isn't created by
            patchwork; this indicates Patchwork has changed in some way or
            there's a bug in this function.
    """
    try:
        patchwork_parser.parse_mail(email.message(), list_id=listid)
    except patchwork_parser.DuplicateMailError:
        _log.error(
            "Message ID %s is already in the database; do not call "
            "_record_bridging twice with the same email",
            email.extra_headers["Message-ID"],
        )
        raise ValueError(email)

    try:
        submission = Submission.objects.get(
            msgid=email.extra_headers["Message-ID"])
    except Submission.DoesNotExist:
        _log.error(
            "Patchwork did not save the email which likely means the subject "
            "match field on the project with listid '%s' is filtering out "
            "emails with subjects like '%s'",
            listid,
            email.subject,
        )
        raise

    bridged_submission = BridgedSubmission(
        submission=submission,
        git_forge=submission.project.git_forge,
        merge_request=merge_id,
        commit=email.extra_headers.get("X-Patchlab-Commit"),
        series_version=email.extra_headers.get("X-Patchlab-Series-Version", 1),
    )
    bridged_submission.save()
    return bridged_submission

Пример #6

0

Показать файл

Файл: parsearchive.py Проект: seanfarley/patchwork

    def handle(self, *args, **options):
        results = {
            models.Patch: 0,
            models.CoverLetter: 0,
            models.Comment: 0,
        }
        duplicates = 0
        dropped = 0
        errors = 0

        # TODO(stephenfin): Support passing via stdin
        path = args and args[0] or options['infile']
        if not os.path.exists(path):
            self.stdout.write('Invalid path: %s' % path)
            sys.exit(1)

        mbox = mailbox.mbox(path)
        count = len(mbox)

        logger.info('Parsing %d mails', count)
        for i, msg in enumerate(mbox):
            try:
                obj = parse_mail(msg, options['list_id'])
                if obj:
                    results[type(obj)] += 1
                else:
                    dropped += 1
            except django.db.utils.IntegrityError:
                duplicates += 1
            except ValueError:
                # TODO(stephenfin): Perhaps we should store the broken patch
                # somewhere for future reference?
                errors += 1

            if (i % 10) == 0:
                self.stdout.write('%06d/%06d\r' % (i, count), ending='')
                self.stdout.flush()

        self.stdout.write(
            'Processed %(total)d messages -->\n'
            '  %(covers)4d cover letters\n'
            '  %(patches)4d patches\n'
            '  %(comments)4d comments\n'
            '  %(duplicates)4d duplicates\n'
            '  %(dropped)4d dropped\n'
            '  %(errors)4d errors\n'
            'Total: %(new)s new entries' % {
                'total': count,
                'covers': results[models.CoverLetter],
                'patches': results[models.Patch],
                'comments': results[models.Comment],
                'duplicates': duplicates,
                'dropped': dropped,
                'errors': errors,
                'new': count - duplicates - dropped - errors,
            })

Пример #7

0

Показать файл

    def test_nack_bridged(self):
        """Assert Nacked-by tags are bridged as labels."""
        comment = """Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Subject: Re: [TEST PATCH] Bring balance to the equals signs
From: Jeremy Cline <*****@*****.**>
To: [email protected]
Date: Mon, 04 Nov 2019 23:00:00 -0000
Message-ID: <*****@*****.**>
X-Patchlab-Patch-Author: Jeremy Cline <*****@*****.**>
X-Patchlab-Merge-Request: https://gitlab/root/patchlab_test/merge_requests/1
X-Patchlab-Commit: a958a0dff5e3c433eb99bc5f18cbcfad77433b0d
In-Reply-To: <*****@*****.**>
List-Id: patchlab.example.com

Hi,

> From: Jeremy Cline <*****@*****.**>
>
> This is a silly change so I can write a test.
>
> Signed-off-by: Jeremy Cline <*****@*****.**>

This is unacceptable.

Nacked-by: Jeremy Cline <*****@*****.**>
"""
        parse_mail(message_from_string(comment), "patchlab.example.com")
        comment = pw_models.Comment.objects.first()
        models.BridgedSubmission.objects.create(
            git_forge=models.GitForge.objects.get(pk=1),
            submission=comment.submission,
            merge_request=2,
        )

        merge_request, note = bridge.submit_gitlab_comment(
            self.gitlab, comment)

        self.assertEqual(merge_request.labels,
                         ["Nacked-by: [email protected]"])

Пример #8

0

Показать файл

Файл: test_models.py Проект: jeremycline/patchlab

 def setUp(self):
     super().setUp()
     self.project = pw_models.Project.objects.create(
         linkname="ark",
         name="ARK",
         listid="kernel.lists.fedoraproject.org",
         listemail="*****@*****.**",
     )
     self.forge = models.GitForge.objects.create(project=self.project,
                                                 host="gitlab.example.com",
                                                 forge_id=1)
     parse_mail(
         email.message_from_string(SINGLE_COMMIT_MR),
         "kernel.lists.fedoraproject.org",
     )
     models.Branch.objects.create(
         git_forge=self.forge,
         name="master",
         subject_prefix="TEST",
         subject_match=r"^.*\[.*TEST.*\].*$",
     )

Пример #9

0

Показать файл

Файл: parsemail.py Проект: daxtens/patchwork

    def handle(self, *args, **options):
        infile = args[0] if args else options['infile']

        try:
            if infile:
                logger.info('Parsing mail loaded by filename')
                if six.PY3:
                    with open(infile, 'rb') as file_:
                        mail = email.message_from_binary_file(file_)
                else:
                    with open(infile) as file_:
                        mail = email.message_from_file(file_)
            else:
                logger.info('Parsing mail loaded from stdin')
                if six.PY3:
                    mail = email.message_from_binary_file(sys.stdin.buffer)
                else:
                    mail = email.message_from_file(sys.stdin)
        except AttributeError:
            logger.warning("Broken email ignored")
            return

        # it's important to get exit codes correct here. The key is to allow
        # proper separation of real errors vs expected 'failures'.
        #
        # patch/comment parsed:        0
        # no parseable content found:  0
        # duplicate messages:          0
        # db integrity/other db error: 1
        # broken email (ValueError):   1 (this could be noisy, if it's an issue
        #                                 we could use a different return code)
        try:
            result = parse_mail(mail, options['list_id'])
            if result is None:
                logger.warning('Nothing added to database')
        except DuplicateMailError as exc:
            logger.warning('Duplicate mail for message ID %s', exc.msgid)
        except (ValueError, Exception) as exc:
            logger.exception('Error when parsing incoming email: %s',
                             exc.message,
                             extra={'mail': mail.as_string()})
            sys.exit(1)

Пример #10

0

Показать файл

Файл: test_series.py Проект: getpatchwork/patchwork

    def _parse_mbox(self, name, counts):
        """Parse an mbox file and return the results.

        :param name: Name of mbox file
        :param counts: A three-tuple of expected number of cover
            letters, patches and replies parsed
        """
        results = [[], [], []]

        mbox = mailbox.mbox(os.path.join(TEST_SERIES_DIR, name))
        for msg in mbox:
            obj = parser.parse_mail(msg, self.project.listid)
            if type(obj) == models.CoverLetter:
                results[0].append(obj)
            elif type(obj) == models.Patch:
                results[1].append(obj)
            else:
                results[2].append(obj)

        self.assertParsed(results, counts)

        return results

Пример #11

0

Показать файл

    def _parse_mbox(self, name, counts):
        """Parse an mbox file and return the results.

        :param name: Name of mbox file
        :param counts: A three-tuple of expected number of cover
            letters, patches and replies parsed
        """
        results = [[], [], []]

        mbox = mailbox.mbox(os.path.join(TEST_SERIES_DIR, name))
        for msg in mbox:
            obj = parser.parse_mail(msg, self.project.listid)
            if type(obj) == models.CoverLetter:
                results[0].append(obj)
            elif type(obj) == models.Patch:
                results[1].append(obj)
            else:
                results[2].append(obj)

        self.assertParsed(results, counts)

        return results

Пример #12

0

Показать файл

Файл: parsemail.py Проект: wengpingbo/patchwork

def main(args):
    django.setup()
    logger = setup_error_handler()
    parser = argparse.ArgumentParser()

    def list_logging_levels():
        """Give a summary of all available logging levels."""
        return sorted(list(VERBOSITY_LEVELS.keys()),
                      key=lambda x: VERBOSITY_LEVELS[x])

    parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
                        default=sys.stdin, help='input mbox file (a filename '
                        'or stdin)')

    group = parser.add_argument_group('Mail parsing configuration')
    group.add_argument('--list-id', help='mailing list ID. If not supplied '
                       'this will be extracted from the mail headers.')
    group.add_argument('--verbosity', choices=list_logging_levels(),
                       help='debug level', default='info')

    args = vars(parser.parse_args())

    logging.basicConfig(level=VERBOSITY_LEVELS[args['verbosity']])

    mail = message_from_file(args['infile'])
    try:
        result = parse_mail(mail, args['list_id'])
        if result:
            return 0
        return 1
    except:
        if logger:
            logger.exception('Error when parsing incoming email', extra={
                'mail': mail.as_string(),
            })
        raise

Пример #13

0

Показать файл

    def handle(self, *args, **options):
        results = {
            models.Patch: 0,
            models.CoverLetter: 0,
            models.Comment: 0,
        }
        duplicates = 0
        dropped = 0
        errors = 0

        verbosity = int(options['verbosity'])
        if not verbosity:
            level = logging.CRITICAL
        elif verbosity == 1:
            level = logging.ERROR
        elif verbosity == 2:
            level = logging.INFO
        else:  # verbosity == 3
            level = logging.DEBUG

        if level:
            logger.setLevel(level)
            logging.getLogger('patchwork.parser').setLevel(level)

        # TODO(stephenfin): Support passing via stdin
        path = args and args[0] or options['infile']
        if not os.path.exists(path):
            logger.error('Invalid path: %s', path)
            sys.exit(1)

        # assume if <infile> is a directory, then we're passing a maildir
        if os.path.isfile(path):
            mbox = mailbox.mbox(path, create=False)
        else:
            mbox = mailbox.Maildir(path, create=False)

        count = len(mbox)

        # Iterate through the mbox. This will pick up exceptions that are only
        # thrown when a broken email is found part way through. Without this
        # block, we'd get the exception thrown in enumerate(mbox) below, which
        # is harder to catch. This is due to a bug in the Python 'email'
        # library, as described here:
        #
        #   https://lists.ozlabs.org/pipermail/patchwork/2017-July/004486.html
        #
        # The alternative is converting the mbox to a list of messages, but
        # that requires holding the entire thing in memory, which is wateful.
        try:
            for m in mbox:
                pass
        except AttributeError:
            logger.error('Broken mbox/Maildir, aborting')
            return

        logger.info('Parsing %d mails', count)
        for i, msg in enumerate(mbox):
            try:
                obj = parse_mail(msg, options['list_id'])
                if obj:
                    results[type(obj)] += 1
                else:
                    dropped += 1
            except DuplicateMailError as exc:
                duplicates += 1
                logger.warning('Duplicate mail for message ID %s', exc.msgid)
            except (ValueError, Exception) as exc:
                errors += 1
                logger.warning('Invalid mail: %s', repr(exc))

            if verbosity < 3 and (i % 10) == 0:
                self.stdout.write('%06d/%06d\r' % (i, count), ending='')
                self.stdout.flush()

        mbox.close()

        if not verbosity:
            return

        self.stdout.write(
            'Processed %(total)d messages -->\n'
            '  %(covers)4d cover letters\n'
            '  %(patches)4d patches\n'
            '  %(comments)4d comments\n'
            '  %(duplicates)4d duplicates\n'
            '  %(dropped)4d dropped\n'
            '  %(errors)4d errors\n'
            'Total: %(new)s new entries' % {
                'total': count,
                'covers': results[models.CoverLetter],
                'patches': results[models.Patch],
                'comments': results[models.Comment],
                'duplicates': duplicates,
                'dropped': dropped,
                'errors': errors,
                'new': count - duplicates - dropped - errors,
            })

Пример #14

0

Показать файл

Файл: gitlab2email.py Проект: jeremycline/patchlab

def email_comment(gitlab, forge_id, author, comment, merge_id=None) -> None:
    """Email a comment made on Gitlab"""
    try:
        git_forge = GitForge.objects.get(host=urllib.parse.urlsplit(
            gitlab.url).hostname,
                                         forge_id=forge_id)
    except GitForge.DoesNotExist:
        _log.error(
            "Got comment event for project id %d, which isn't in the database",
            forge_id)
        return

    commit = comment.get("commit_id")
    try:
        bridged_submission = BridgedSubmission.objects.filter(
            git_forge=git_forge).order_by("-series_version")
        if merge_id:
            bridged_submission = bridged_submission.filter(
                merge_request=merge_id)
        if commit:
            bridged_submission = bridged_submission.filter(commit=commit)
        bridged_submission = bridged_submission[0]
    except IndexError:
        _log.info(
            "Unable to find a bridged submission for comment on MR %d, commit %s, forge %r",
            merge_id,
            commit,
            git_forge,
        )
        return

    from_email = settings.PATCHLAB_FROM_EMAIL.format(forge_user=author["name"])
    # From the bridged_submission, find the in-reply-to, create email.
    headers = {
        "Date": email_utils.formatdate(localtime=settings.EMAIL_USE_LOCALTIME),
        "Message-ID": email_utils.make_msgid(domain=DNS_NAME),
        "In-Reply-To": bridged_submission.submission.msgid,
        "X-Patchlab-Comment": comment["url"],
    }
    subject = "Re: " + " ".join(
        message_from_string(
            bridged_submission.submission.headers)["Subject"].splitlines())
    wrapped_description = "\n".join([
        textwrap.fill(line, width=72, replace_whitespace=False)
        for line in comment["note"].splitlines()
    ])
    body = (
        f"From: {author['name']} on {git_forge.host}\n{comment['url']}\n\n{wrapped_description}\n"
        f"")
    comment = EmailMessage(
        subject=subject,
        body=body,
        from_email=from_email,
        to=[git_forge.project.listemail],
        headers=headers,
        reply_to=[git_forge.project.listemail],
    )
    with get_connection(fail_silently=False) as conn:
        patchwork_parser.parse_mail(comment.message(),
                                    list_id=git_forge.project.listid)
        comment.connection = conn
        comment.send(fail_silently=False)

Пример #15

0

Показать файл

Файл: parsearchive.py Проект: ajdlinux/patchwork

    def handle(self, *args, **options):
        results = {
            models.Patch: 0,
            models.CoverLetter: 0,
            models.Comment: 0,
        }
        duplicates = 0
        dropped = 0
        errors = 0

        # TODO(stephenfin): Support passing via stdin
        path = args and args[0] or options['infile']
        if not os.path.exists(path):
            self.stdout.write('Invalid path: %s' % path)
            sys.exit(1)

        # assume if <infile> is a directory, then we're passing a maildir
        if os.path.isfile(path):
            mbox = mailbox.mbox(path)
        else:
            mbox = mailbox.Maildir(path)

        count = len(mbox)

        # Iterate through the mbox. This will pick up exceptions that are only
        # thrown when a broken email is found part way through. Without this
        # block, we'd get the exception thrown in enumerate(mbox) below, which
        # is harder to catch. This is due to a bug in the Python 'email'
        # library, as described here:
        #
        #   https://lists.ozlabs.org/pipermail/patchwork/2017-July/004486.html
        #
        # The alternative is converting the mbox to a list of messages, but
        # that requires holding the entire thing in memory, which is wateful.
        try:
            for m in mbox:
                pass
        except AttributeError:
            logger.warning('Broken mbox/Maildir, aborting')
            return

        logger.info('Parsing %d mails', count)
        for i, msg in enumerate(mbox):
            try:
                obj = parse_mail(msg, options['list_id'])
                if obj:
                    results[type(obj)] += 1
                else:
                    dropped += 1
            except django.db.utils.IntegrityError:
                duplicates += 1
            except ValueError:
                # TODO(stephenfin): Perhaps we should store the broken patch
                # somewhere for future reference?
                errors += 1

            if (i % 10) == 0:
                self.stdout.write('%06d/%06d\r' % (i, count), ending='')
                self.stdout.flush()

        self.stdout.write(
            'Processed %(total)d messages -->\n'
            '  %(covers)4d cover letters\n'
            '  %(patches)4d patches\n'
            '  %(comments)4d comments\n'
            '  %(duplicates)4d duplicates\n'
            '  %(dropped)4d dropped\n'
            '  %(errors)4d errors\n'
            'Total: %(new)s new entries' % {
                'total': count,
                'covers': results[models.CoverLetter],
                'patches': results[models.Patch],
                'comments': results[models.Comment],
                'duplicates': duplicates,
                'dropped': dropped,
                'errors': errors,
                'new': count - duplicates - dropped - errors,
            })
        mbox.close()

Пример #16

0

Показать файл

Файл: test_series.py Проект: getpatchwork/patchwork

 def _parse_mail(self, mail):
     return parser.parse_mail(mail, self.project.listid)

Пример #17

0

Показать файл

Файл: parsearchive.py Проект: daxtens/patchwork

    def handle(self, *args, **options):
        results = {
            models.Patch: 0,
            models.CoverLetter: 0,
            models.Comment: 0,
        }
        duplicates = 0
        dropped = 0
        errors = 0

        verbosity = int(options['verbosity'])
        if not verbosity:
            level = logging.CRITICAL
        elif verbosity == 1:
            level = logging.ERROR
        elif verbosity == 2:
            level = logging.INFO
        else:  # verbosity == 3
            level = logging.DEBUG

        if level:
            logger.setLevel(level)
            logging.getLogger('patchwork.parser').setLevel(level)

        # TODO(stephenfin): Support passing via stdin
        path = args and args[0] or options['infile']
        if not os.path.exists(path):
            logger.error('Invalid path: %s', path)
            sys.exit(1)

        # assume if <infile> is a directory, then we're passing a maildir
        if os.path.isfile(path):
            mbox = mailbox.mbox(path, create=False)
        else:
            mbox = mailbox.Maildir(path, create=False)

        count = len(mbox)

        # Iterate through the mbox. This will pick up exceptions that are only
        # thrown when a broken email is found part way through. Without this
        # block, we'd get the exception thrown in enumerate(mbox) below, which
        # is harder to catch. This is due to a bug in the Python 'email'
        # library, as described here:
        #
        #   https://lists.ozlabs.org/pipermail/patchwork/2017-July/004486.html
        #
        # The alternative is converting the mbox to a list of messages, but
        # that requires holding the entire thing in memory, which is wateful.
        try:
            for m in mbox:
                pass
        except AttributeError:
            logger.error('Broken mbox/Maildir, aborting')
            return

        logger.info('Parsing %d mails', count)
        for i, msg in enumerate(mbox):
            try:
                obj = parse_mail(msg, options['list_id'])
                if obj:
                    results[type(obj)] += 1
                else:
                    dropped += 1
            except DuplicateMailError as exc:
                duplicates += 1
                logger.warning('Duplicate mail for message ID %s', exc.msgid)
            except (ValueError, Exception) as exc:
                errors += 1
                logger.warning('Invalid mail: %s', exc.message)

            if verbosity < 3 and (i % 10) == 0:
                self.stdout.write('%06d/%06d\r' % (i, count), ending='')
                self.stdout.flush()

        mbox.close()

        if not verbosity:
            return

        self.stdout.write(
            'Processed %(total)d messages -->\n'
            '  %(covers)4d cover letters\n'
            '  %(patches)4d patches\n'
            '  %(comments)4d comments\n'
            '  %(duplicates)4d duplicates\n'
            '  %(dropped)4d dropped\n'
            '  %(errors)4d errors\n'
            'Total: %(new)s new entries' % {
                'total': count,
                'covers': results[models.CoverLetter],
                'patches': results[models.Patch],
                'comments': results[models.Comment],
                'duplicates': duplicates,
                'dropped': dropped,
                'errors': errors,
                'new': count - duplicates - dropped - errors,
            })

Пример #18

0

Показать файл

 def _parse_mail(self, mail):
     return parser.parse_mail(mail, self.project.listid)

Пример #19

0

Показать файл

Файл: parsearchive.py Проект: alialnu/patchwork

    def handle(self, *args, **options):
        results = {
            models.Patch: 0,
            models.CoverLetter: 0,
            models.Comment: 0,
        }
        duplicates = 0
        dropped = 0
        errors = 0

        # TODO(stephenfin): Support passing via stdin
        path = args and args[0] or options['infile']
        if not os.path.exists(path):
            self.stdout.write('Invalid path: %s' % path)
            sys.exit(1)

        # assume if <infile> is a directory, then we're passing a maildir
        if os.path.isfile(path):
            mbox = mailbox.mbox(path, create=False)
        else:
            mbox = mailbox.Maildir(path, create=False)

        count = len(mbox)

        # Iterate through the mbox. This will pick up exceptions that are only
        # thrown when a broken email is found part way through. Without this
        # block, we'd get the exception thrown in enumerate(mbox) below, which
        # is harder to catch. This is due to a bug in the Python 'email'
        # library, as described here:
        #
        #   https://lists.ozlabs.org/pipermail/patchwork/2017-July/004486.html
        #
        # The alternative is converting the mbox to a list of messages, but
        # that requires holding the entire thing in memory, which is wateful.
        try:
            for m in mbox:
                pass
        except AttributeError:
            logger.warning('Broken mbox/Maildir, aborting')
            return

        logger.info('Parsing %d mails', count)
        for i, msg in enumerate(mbox):
            try:
                obj = parse_mail(msg, options['list_id'])
                if obj:
                    results[type(obj)] += 1
                else:
                    dropped += 1
            except django.db.utils.IntegrityError:
                duplicates += 1
            except ValueError:
                # TODO(stephenfin): Perhaps we should store the broken patch
                # somewhere for future reference?
                errors += 1

            if (i % 10) == 0:
                self.stdout.write('%06d/%06d\r' % (i, count), ending='')
                self.stdout.flush()

        self.stdout.write(
            'Processed %(total)d messages -->\n'
            '  %(covers)4d cover letters\n'
            '  %(patches)4d patches\n'
            '  %(comments)4d comments\n'
            '  %(duplicates)4d duplicates\n'
            '  %(dropped)4d dropped\n'
            '  %(errors)4d errors\n'
            'Total: %(new)s new entries' % {
                'total': count,
                'covers': results[models.CoverLetter],
                'patches': results[models.Patch],
                'comments': results[models.Comment],
                'duplicates': duplicates,
                'dropped': dropped,
                'errors': errors,
                'new': count - duplicates - dropped - errors,
            })
        mbox.close()

Python parse_mail примеры использования