def handle(self, *args, **options): infile = args[0] if args else options['infile'] if infile: logger.info('Parsing mail loaded by filename') if six.PY3: with open(infile, 'rb') as file_: mail = email.message_from_binary_file(file_) else: with open(infile) as file_: mail = email.message_from_file(file_) else: logger.info('Parsing mail loaded from stdin') if six.PY3: mail = email.message_from_binary_file(sys.stdin.buffer) else: mail = email.message_from_file(sys.stdin) try: result = parse_mail(mail, options['list_id']) if result: sys.exit(0) logger.warning('Failed to parse mail') sys.exit(1) except Exception: logger.exception('Error when parsing incoming email', extra={'mail': mail.as_string()})
def parse_mbox(path, list_id): results = { models.Patch: 0, models.CoverLetter: 0, models.Comment: 0, } duplicates = 0 dropped = 0 mbox = mailbox.mbox(path) for msg in mbox: try: obj = parse_mail(msg, list_id) if obj: results[type(obj)] += 1 else: dropped += 1 except django.db.utils.IntegrityError: duplicates += 1 print('Processed %(total)d messages -->\n' ' %(covers)4d cover letters\n' ' %(patches)4d patches\n' ' %(comments)4d comments\n' ' %(duplicates)4d duplicates\n' ' %(dropped)4d dropped\n' 'Total: %(new)s new entries' % { 'total': len(mbox), 'covers': results[models.CoverLetter], 'patches': results[models.Patch], 'comments': results[models.Comment], 'duplicates': duplicates, 'dropped': dropped, 'new': len(mbox) - duplicates - dropped, })
def handle(self, *args, **options): infile = args[0] if args else options['infile'] try: if infile: logger.info('Parsing mail loaded by filename') with open(infile, 'rb') as file_: mail = email.message_from_binary_file(file_) else: logger.info('Parsing mail loaded from stdin') mail = email.message_from_binary_file(sys.stdin.buffer) except AttributeError: logger.warning("Broken email ignored") return # it's important to get exit codes correct here. The key is to allow # proper separation of real errors vs expected 'failures'. # # patch/comment parsed: 0 # no parseable content found: 0 # duplicate messages: 0 # db integrity/other db error: 1 # broken email (ValueError): 1 (this could be noisy, if it's an issue # we could use a different return code) try: result = parse_mail(mail, options['list_id']) if result is None: logger.warning('Nothing added to database') except DuplicateMailError as exc: logger.warning('Duplicate mail for message ID %s', exc.msgid) except (ValueError, Exception) as exc: logger.exception('Error when parsing incoming email: %s', repr(exc), extra={'mail': mail.as_string()}) sys.exit(1)
def _record_bridging(listid: str, merge_id: int, email: EmailMessage) -> None: """ Create the Patchwork submission records. This would happen when the mail hit the mailing list, but doing so now lets us associate them with a BridgedSubmission so we can post follow-up comments. Raises: ValueError: If the emails cannot be parsed by patchwork or is a duplicate. Submission.DoesNotExist: If the Submission object isn't created by patchwork; this indicates Patchwork has changed in some way or there's a bug in this function. """ try: patchwork_parser.parse_mail(email.message(), list_id=listid) except patchwork_parser.DuplicateMailError: _log.error( "Message ID %s is already in the database; do not call " "_record_bridging twice with the same email", email.extra_headers["Message-ID"], ) raise ValueError(email) try: submission = Submission.objects.get( msgid=email.extra_headers["Message-ID"]) except Submission.DoesNotExist: _log.error( "Patchwork did not save the email which likely means the subject " "match field on the project with listid '%s' is filtering out " "emails with subjects like '%s'", listid, email.subject, ) raise bridged_submission = BridgedSubmission( submission=submission, git_forge=submission.project.git_forge, merge_request=merge_id, commit=email.extra_headers.get("X-Patchlab-Commit"), series_version=email.extra_headers.get("X-Patchlab-Series-Version", 1), ) bridged_submission.save() return bridged_submission
def handle(self, *args, **options): results = { models.Patch: 0, models.CoverLetter: 0, models.Comment: 0, } duplicates = 0 dropped = 0 errors = 0 # TODO(stephenfin): Support passing via stdin path = args and args[0] or options['infile'] if not os.path.exists(path): self.stdout.write('Invalid path: %s' % path) sys.exit(1) mbox = mailbox.mbox(path) count = len(mbox) logger.info('Parsing %d mails', count) for i, msg in enumerate(mbox): try: obj = parse_mail(msg, options['list_id']) if obj: results[type(obj)] += 1 else: dropped += 1 except django.db.utils.IntegrityError: duplicates += 1 except ValueError: # TODO(stephenfin): Perhaps we should store the broken patch # somewhere for future reference? errors += 1 if (i % 10) == 0: self.stdout.write('%06d/%06d\r' % (i, count), ending='') self.stdout.flush() self.stdout.write( 'Processed %(total)d messages -->\n' ' %(covers)4d cover letters\n' ' %(patches)4d patches\n' ' %(comments)4d comments\n' ' %(duplicates)4d duplicates\n' ' %(dropped)4d dropped\n' ' %(errors)4d errors\n' 'Total: %(new)s new entries' % { 'total': count, 'covers': results[models.CoverLetter], 'patches': results[models.Patch], 'comments': results[models.Comment], 'duplicates': duplicates, 'dropped': dropped, 'errors': errors, 'new': count - duplicates - dropped - errors, })
def test_nack_bridged(self): """Assert Nacked-by tags are bridged as labels.""" comment = """Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: Re: [TEST PATCH] Bring balance to the equals signs From: Jeremy Cline <*****@*****.**> To: [email protected] Date: Mon, 04 Nov 2019 23:00:00 -0000 Message-ID: <*****@*****.**> X-Patchlab-Patch-Author: Jeremy Cline <*****@*****.**> X-Patchlab-Merge-Request: https://gitlab/root/patchlab_test/merge_requests/1 X-Patchlab-Commit: a958a0dff5e3c433eb99bc5f18cbcfad77433b0d In-Reply-To: <*****@*****.**> List-Id: patchlab.example.com Hi, > From: Jeremy Cline <*****@*****.**> > > This is a silly change so I can write a test. > > Signed-off-by: Jeremy Cline <*****@*****.**> This is unacceptable. Nacked-by: Jeremy Cline <*****@*****.**> """ parse_mail(message_from_string(comment), "patchlab.example.com") comment = pw_models.Comment.objects.first() models.BridgedSubmission.objects.create( git_forge=models.GitForge.objects.get(pk=1), submission=comment.submission, merge_request=2, ) merge_request, note = bridge.submit_gitlab_comment( self.gitlab, comment) self.assertEqual(merge_request.labels, ["Nacked-by: [email protected]"])
def setUp(self): super().setUp() self.project = pw_models.Project.objects.create( linkname="ark", name="ARK", listid="kernel.lists.fedoraproject.org", listemail="*****@*****.**", ) self.forge = models.GitForge.objects.create(project=self.project, host="gitlab.example.com", forge_id=1) parse_mail( email.message_from_string(SINGLE_COMMIT_MR), "kernel.lists.fedoraproject.org", ) models.Branch.objects.create( git_forge=self.forge, name="master", subject_prefix="TEST", subject_match=r"^.*\[.*TEST.*\].*$", )
def handle(self, *args, **options): infile = args[0] if args else options['infile'] try: if infile: logger.info('Parsing mail loaded by filename') if six.PY3: with open(infile, 'rb') as file_: mail = email.message_from_binary_file(file_) else: with open(infile) as file_: mail = email.message_from_file(file_) else: logger.info('Parsing mail loaded from stdin') if six.PY3: mail = email.message_from_binary_file(sys.stdin.buffer) else: mail = email.message_from_file(sys.stdin) except AttributeError: logger.warning("Broken email ignored") return # it's important to get exit codes correct here. The key is to allow # proper separation of real errors vs expected 'failures'. # # patch/comment parsed: 0 # no parseable content found: 0 # duplicate messages: 0 # db integrity/other db error: 1 # broken email (ValueError): 1 (this could be noisy, if it's an issue # we could use a different return code) try: result = parse_mail(mail, options['list_id']) if result is None: logger.warning('Nothing added to database') except DuplicateMailError as exc: logger.warning('Duplicate mail for message ID %s', exc.msgid) except (ValueError, Exception) as exc: logger.exception('Error when parsing incoming email: %s', exc.message, extra={'mail': mail.as_string()}) sys.exit(1)
def _parse_mbox(self, name, counts): """Parse an mbox file and return the results. :param name: Name of mbox file :param counts: A three-tuple of expected number of cover letters, patches and replies parsed """ results = [[], [], []] mbox = mailbox.mbox(os.path.join(TEST_SERIES_DIR, name)) for msg in mbox: obj = parser.parse_mail(msg, self.project.listid) if type(obj) == models.CoverLetter: results[0].append(obj) elif type(obj) == models.Patch: results[1].append(obj) else: results[2].append(obj) self.assertParsed(results, counts) return results
def main(args): django.setup() logger = setup_error_handler() parser = argparse.ArgumentParser() def list_logging_levels(): """Give a summary of all available logging levels.""" return sorted(list(VERBOSITY_LEVELS.keys()), key=lambda x: VERBOSITY_LEVELS[x]) parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin, help='input mbox file (a filename ' 'or stdin)') group = parser.add_argument_group('Mail parsing configuration') group.add_argument('--list-id', help='mailing list ID. If not supplied ' 'this will be extracted from the mail headers.') group.add_argument('--verbosity', choices=list_logging_levels(), help='debug level', default='info') args = vars(parser.parse_args()) logging.basicConfig(level=VERBOSITY_LEVELS[args['verbosity']]) mail = message_from_file(args['infile']) try: result = parse_mail(mail, args['list_id']) if result: return 0 return 1 except: if logger: logger.exception('Error when parsing incoming email', extra={ 'mail': mail.as_string(), }) raise
def handle(self, *args, **options): results = { models.Patch: 0, models.CoverLetter: 0, models.Comment: 0, } duplicates = 0 dropped = 0 errors = 0 verbosity = int(options['verbosity']) if not verbosity: level = logging.CRITICAL elif verbosity == 1: level = logging.ERROR elif verbosity == 2: level = logging.INFO else: # verbosity == 3 level = logging.DEBUG if level: logger.setLevel(level) logging.getLogger('patchwork.parser').setLevel(level) # TODO(stephenfin): Support passing via stdin path = args and args[0] or options['infile'] if not os.path.exists(path): logger.error('Invalid path: %s', path) sys.exit(1) # assume if <infile> is a directory, then we're passing a maildir if os.path.isfile(path): mbox = mailbox.mbox(path, create=False) else: mbox = mailbox.Maildir(path, create=False) count = len(mbox) # Iterate through the mbox. This will pick up exceptions that are only # thrown when a broken email is found part way through. Without this # block, we'd get the exception thrown in enumerate(mbox) below, which # is harder to catch. This is due to a bug in the Python 'email' # library, as described here: # # https://lists.ozlabs.org/pipermail/patchwork/2017-July/004486.html # # The alternative is converting the mbox to a list of messages, but # that requires holding the entire thing in memory, which is wateful. try: for m in mbox: pass except AttributeError: logger.error('Broken mbox/Maildir, aborting') return logger.info('Parsing %d mails', count) for i, msg in enumerate(mbox): try: obj = parse_mail(msg, options['list_id']) if obj: results[type(obj)] += 1 else: dropped += 1 except DuplicateMailError as exc: duplicates += 1 logger.warning('Duplicate mail for message ID %s', exc.msgid) except (ValueError, Exception) as exc: errors += 1 logger.warning('Invalid mail: %s', repr(exc)) if verbosity < 3 and (i % 10) == 0: self.stdout.write('%06d/%06d\r' % (i, count), ending='') self.stdout.flush() mbox.close() if not verbosity: return self.stdout.write( 'Processed %(total)d messages -->\n' ' %(covers)4d cover letters\n' ' %(patches)4d patches\n' ' %(comments)4d comments\n' ' %(duplicates)4d duplicates\n' ' %(dropped)4d dropped\n' ' %(errors)4d errors\n' 'Total: %(new)s new entries' % { 'total': count, 'covers': results[models.CoverLetter], 'patches': results[models.Patch], 'comments': results[models.Comment], 'duplicates': duplicates, 'dropped': dropped, 'errors': errors, 'new': count - duplicates - dropped - errors, })
def email_comment(gitlab, forge_id, author, comment, merge_id=None) -> None: """Email a comment made on Gitlab""" try: git_forge = GitForge.objects.get(host=urllib.parse.urlsplit( gitlab.url).hostname, forge_id=forge_id) except GitForge.DoesNotExist: _log.error( "Got comment event for project id %d, which isn't in the database", forge_id) return commit = comment.get("commit_id") try: bridged_submission = BridgedSubmission.objects.filter( git_forge=git_forge).order_by("-series_version") if merge_id: bridged_submission = bridged_submission.filter( merge_request=merge_id) if commit: bridged_submission = bridged_submission.filter(commit=commit) bridged_submission = bridged_submission[0] except IndexError: _log.info( "Unable to find a bridged submission for comment on MR %d, commit %s, forge %r", merge_id, commit, git_forge, ) return from_email = settings.PATCHLAB_FROM_EMAIL.format(forge_user=author["name"]) # From the bridged_submission, find the in-reply-to, create email. headers = { "Date": email_utils.formatdate(localtime=settings.EMAIL_USE_LOCALTIME), "Message-ID": email_utils.make_msgid(domain=DNS_NAME), "In-Reply-To": bridged_submission.submission.msgid, "X-Patchlab-Comment": comment["url"], } subject = "Re: " + " ".join( message_from_string( bridged_submission.submission.headers)["Subject"].splitlines()) wrapped_description = "\n".join([ textwrap.fill(line, width=72, replace_whitespace=False) for line in comment["note"].splitlines() ]) body = ( f"From: {author['name']} on {git_forge.host}\n{comment['url']}\n\n{wrapped_description}\n" f"") comment = EmailMessage( subject=subject, body=body, from_email=from_email, to=[git_forge.project.listemail], headers=headers, reply_to=[git_forge.project.listemail], ) with get_connection(fail_silently=False) as conn: patchwork_parser.parse_mail(comment.message(), list_id=git_forge.project.listid) comment.connection = conn comment.send(fail_silently=False)
def handle(self, *args, **options): results = { models.Patch: 0, models.CoverLetter: 0, models.Comment: 0, } duplicates = 0 dropped = 0 errors = 0 # TODO(stephenfin): Support passing via stdin path = args and args[0] or options['infile'] if not os.path.exists(path): self.stdout.write('Invalid path: %s' % path) sys.exit(1) # assume if <infile> is a directory, then we're passing a maildir if os.path.isfile(path): mbox = mailbox.mbox(path) else: mbox = mailbox.Maildir(path) count = len(mbox) # Iterate through the mbox. This will pick up exceptions that are only # thrown when a broken email is found part way through. Without this # block, we'd get the exception thrown in enumerate(mbox) below, which # is harder to catch. This is due to a bug in the Python 'email' # library, as described here: # # https://lists.ozlabs.org/pipermail/patchwork/2017-July/004486.html # # The alternative is converting the mbox to a list of messages, but # that requires holding the entire thing in memory, which is wateful. try: for m in mbox: pass except AttributeError: logger.warning('Broken mbox/Maildir, aborting') return logger.info('Parsing %d mails', count) for i, msg in enumerate(mbox): try: obj = parse_mail(msg, options['list_id']) if obj: results[type(obj)] += 1 else: dropped += 1 except django.db.utils.IntegrityError: duplicates += 1 except ValueError: # TODO(stephenfin): Perhaps we should store the broken patch # somewhere for future reference? errors += 1 if (i % 10) == 0: self.stdout.write('%06d/%06d\r' % (i, count), ending='') self.stdout.flush() self.stdout.write( 'Processed %(total)d messages -->\n' ' %(covers)4d cover letters\n' ' %(patches)4d patches\n' ' %(comments)4d comments\n' ' %(duplicates)4d duplicates\n' ' %(dropped)4d dropped\n' ' %(errors)4d errors\n' 'Total: %(new)s new entries' % { 'total': count, 'covers': results[models.CoverLetter], 'patches': results[models.Patch], 'comments': results[models.Comment], 'duplicates': duplicates, 'dropped': dropped, 'errors': errors, 'new': count - duplicates - dropped - errors, }) mbox.close()
def _parse_mail(self, mail): return parser.parse_mail(mail, self.project.listid)
def handle(self, *args, **options): results = { models.Patch: 0, models.CoverLetter: 0, models.Comment: 0, } duplicates = 0 dropped = 0 errors = 0 verbosity = int(options['verbosity']) if not verbosity: level = logging.CRITICAL elif verbosity == 1: level = logging.ERROR elif verbosity == 2: level = logging.INFO else: # verbosity == 3 level = logging.DEBUG if level: logger.setLevel(level) logging.getLogger('patchwork.parser').setLevel(level) # TODO(stephenfin): Support passing via stdin path = args and args[0] or options['infile'] if not os.path.exists(path): logger.error('Invalid path: %s', path) sys.exit(1) # assume if <infile> is a directory, then we're passing a maildir if os.path.isfile(path): mbox = mailbox.mbox(path, create=False) else: mbox = mailbox.Maildir(path, create=False) count = len(mbox) # Iterate through the mbox. This will pick up exceptions that are only # thrown when a broken email is found part way through. Without this # block, we'd get the exception thrown in enumerate(mbox) below, which # is harder to catch. This is due to a bug in the Python 'email' # library, as described here: # # https://lists.ozlabs.org/pipermail/patchwork/2017-July/004486.html # # The alternative is converting the mbox to a list of messages, but # that requires holding the entire thing in memory, which is wateful. try: for m in mbox: pass except AttributeError: logger.error('Broken mbox/Maildir, aborting') return logger.info('Parsing %d mails', count) for i, msg in enumerate(mbox): try: obj = parse_mail(msg, options['list_id']) if obj: results[type(obj)] += 1 else: dropped += 1 except DuplicateMailError as exc: duplicates += 1 logger.warning('Duplicate mail for message ID %s', exc.msgid) except (ValueError, Exception) as exc: errors += 1 logger.warning('Invalid mail: %s', exc.message) if verbosity < 3 and (i % 10) == 0: self.stdout.write('%06d/%06d\r' % (i, count), ending='') self.stdout.flush() mbox.close() if not verbosity: return self.stdout.write( 'Processed %(total)d messages -->\n' ' %(covers)4d cover letters\n' ' %(patches)4d patches\n' ' %(comments)4d comments\n' ' %(duplicates)4d duplicates\n' ' %(dropped)4d dropped\n' ' %(errors)4d errors\n' 'Total: %(new)s new entries' % { 'total': count, 'covers': results[models.CoverLetter], 'patches': results[models.Patch], 'comments': results[models.Comment], 'duplicates': duplicates, 'dropped': dropped, 'errors': errors, 'new': count - duplicates - dropped - errors, })
def handle(self, *args, **options): results = { models.Patch: 0, models.CoverLetter: 0, models.Comment: 0, } duplicates = 0 dropped = 0 errors = 0 # TODO(stephenfin): Support passing via stdin path = args and args[0] or options['infile'] if not os.path.exists(path): self.stdout.write('Invalid path: %s' % path) sys.exit(1) # assume if <infile> is a directory, then we're passing a maildir if os.path.isfile(path): mbox = mailbox.mbox(path, create=False) else: mbox = mailbox.Maildir(path, create=False) count = len(mbox) # Iterate through the mbox. This will pick up exceptions that are only # thrown when a broken email is found part way through. Without this # block, we'd get the exception thrown in enumerate(mbox) below, which # is harder to catch. This is due to a bug in the Python 'email' # library, as described here: # # https://lists.ozlabs.org/pipermail/patchwork/2017-July/004486.html # # The alternative is converting the mbox to a list of messages, but # that requires holding the entire thing in memory, which is wateful. try: for m in mbox: pass except AttributeError: logger.warning('Broken mbox/Maildir, aborting') return logger.info('Parsing %d mails', count) for i, msg in enumerate(mbox): try: obj = parse_mail(msg, options['list_id']) if obj: results[type(obj)] += 1 else: dropped += 1 except django.db.utils.IntegrityError: duplicates += 1 except ValueError: # TODO(stephenfin): Perhaps we should store the broken patch # somewhere for future reference? errors += 1 if (i % 10) == 0: self.stdout.write('%06d/%06d\r' % (i, count), ending='') self.stdout.flush() self.stdout.write( 'Processed %(total)d messages -->\n' ' %(covers)4d cover letters\n' ' %(patches)4d patches\n' ' %(comments)4d comments\n' ' %(duplicates)4d duplicates\n' ' %(dropped)4d dropped\n' ' %(errors)4d errors\n' 'Total: %(new)s new entries' % { 'total': count, 'covers': results[models.CoverLetter], 'patches': results[models.Patch], 'comments': results[models.Comment], 'duplicates': duplicates, 'dropped': dropped, 'errors': errors, 'new': count - duplicates - dropped - errors, }) mbox.close()