def build_patches(notmuch_dir, search_days, mail_query, trees): db = notmuch.Database(notmuch_dir) now = long(time()) then = now - days_to_seconds(search_days) query = '%s (subject:PATCH or subject:PULL) %s..%s' % (mail_query, then, now) q = notmuch.Query(db, query) oldest = build_thread_leaders(q, then) # A pull request may contain patches older than the posted commits. That's # because a commit doesn't happen *after* the post like what normally # happens with a patch but rather the post happens after the commit. # There's no obvious way to handle this other than the hack below. # Give some extra time for pull request commits oldest -= (30 * 24 * 60 * 60) commits = gitcmd.get_commits(oldest, trees) merged_heads = gitcmd.get_merges(oldest) mbox.setup_mboxes() patches = [] for thread in q.search_threads(): try: top = list(thread.get_toplevel_messages())[0] except notmuch.errors.NullPointerError: continue if not message.is_patch(top): continue # The parser chokes on emails too often, simply report the error and # skip the thread so that scan can complete. try: patch = build_patch(commits, merged_heads, top, trees, leader=True) except: import traceback import sys sys.stderr.write('Message-Id: %s\n' % top.get_message_id()) traceback.print_exc() continue patch_list = [ patch ] message_list = [] for reply in top.get_replies(): # notmuch won't let us call get_replies twice so we have to do # everything in a single loop. # any first level replies are replies to the top level post. if not message.is_patch(reply): new_tags, to, cc = message.find_extra_tags(reply, False) patch_list[0]['tags'] = message.merge_tags(patch_list[0]['tags'], new_tags) patch_list[0]['to'] = message.dedup(patch_list[0]['to'] + to) patch_list[0]['cc'] = message.dedup(patch_list[0]['cc'] + cc) if message.is_thanks_applied(reply): patch_list[0]['applied-by'] = message.parse_email_address(message.get_header(reply, 'From')) else: patch = build_patch(commits, merged_heads, reply, trees) patch_list.append(patch) message_list.append((reply, patch['tags'])) # now we're done with replies so tags for the top patch are known if not message.is_cover(patch_list[0]): message_list.insert(0, (top, patch_list[0]['tags'])) series = { 'messages': patch_list, 'total_messages': thread.get_total_messages() } if series_.is_pull_request(series): series = fixup_pull_request(series, merged_heads) message_list.sort(message.cmp_patch) m = message.parse_subject(top)[1] if len(message_list) != m: series['broken'] = True if (not series_.is_broken(series) and not series_.is_obsolete(series) and not series_.any_committed(series) and not series_.is_pull_request(series) and not series_.is_applied(series)): if message.is_cover(series['messages'][0]): tags = series['messages'][0]['tags'] else: tags = {} series['mbox_path'] = mbox.generate_mbox(message_list, tags) series['mbox_hash'] = mbox.get_hash(series['mbox_path']) patches.append(series) return patches
def build_patches(notmuch_dir, search_days, mail_query, trees): db = notmuch.Database(notmuch_dir) now = long(time()) then = now - days_to_seconds(search_days) query = '%s (subject:PATCH or subject:PULL) %s..%s' % (mail_query, then, now) q = notmuch.Query(db, query) oldest = build_thread_leaders(q, then) # A pull request may contain patches older than the posted commits. That's # because a commit doesn't happen *after* the post like what normally # happens with a patch but rather the post happens after the commit. # There's no obvious way to handle this other than the hack below. # Give some extra time for pull request commits oldest -= (30 * 24 * 60 * 60) commits = gitcmd.get_commits(oldest, trees) merged_heads = gitcmd.get_merges(oldest) mbox.setup_mboxes() patches = [] for thread in q.search_threads(): try: top = list(thread.get_toplevel_messages())[0] except notmuch.errors.NullPointerError: continue if not message.is_patch(top): continue # The parser chokes on emails too often, simply report the error and # skip the thread so that scan can complete. try: patch = build_patch(commits, merged_heads, top, trees, leader=True) except: import traceback import sys sys.stderr.write('Message-Id: %s\n' % top.get_message_id()) traceback.print_exc() continue patch_list = [patch] message_list = [] for reply in top.get_replies(): # notmuch won't let us call get_replies twice so we have to do # everything in a single loop. # any first level replies are replies to the top level post. if not message.is_patch(reply): new_tags, to, cc = message.find_extra_tags(reply, False) patch_list[0]['tags'] = message.merge_tags( patch_list[0]['tags'], new_tags) patch_list[0]['to'] = message.dedup(patch_list[0]['to'] + to) patch_list[0]['cc'] = message.dedup(patch_list[0]['cc'] + cc) if message.is_thanks_applied(reply): patch_list[0]['applied-by'] = message.parse_email_address( message.get_header(reply, 'From')) else: patch = build_patch(commits, merged_heads, reply, trees) patch_list.append(patch) message_list.append((reply, patch['tags'])) # now we're done with replies so tags for the top patch are known if not message.is_cover(patch_list[0]): message_list.insert(0, (top, patch_list[0]['tags'])) series = { 'messages': patch_list, 'total_messages': thread.get_total_messages() } if series_.is_pull_request(series): series = fixup_pull_request(series, merged_heads) message_list.sort(message.cmp_patch) m = message.parse_subject(top)[1] if len(message_list) != m: series['broken'] = True if (not series_.is_broken(series) and not series_.is_obsolete(series) and not series_.any_committed(series) and not series_.is_pull_request(series) and not series_.is_applied(series)): if message.is_cover(series['messages'][0]): tags = series['messages'][0]['tags'] else: tags = {} series['mbox_path'] = mbox.generate_mbox(message_list, tags) series['mbox_hash'] = mbox.get_hash(series['mbox_path']) patches.append(series) return patches
mids[mid][name]['owner'] = builds['owner'] for series in patches: mid = series['messages'][0]['message-id'] if mid in mids: series['buildbots'] = mids[mid] print 'Fetching mboxes...' for series in patches: if 'mbox_path' not in series: continue mbox_path = series['mbox_path'] old_hash = mbox.get_hash(mbox_path) if 'mbox_hash' in series and series['mbox_hash'] == old_hash: continue print 'Fetching mbox for %s' % series['messages'][0]['subject'] base, _ = url.rsplit('/', 1) try: fp = urlopen(base + '/' + series['mbox_path']) except HTTPError, e: print 'Skipping mbox %s' % series['mbox_path'] continue try: mbox_data = fp.read()