def get(self): repo = self.request.get('repo') number = self.request.get('number') if self.request.get('format') == 'json': ancestor = models.GithubResource.make_key(repo, number) events = list(models.GithubWebhookRaw.query(ancestor=ancestor)) self.response.headers['content-type'] = 'application/json' self.response.write(json.dumps([e.body for e in events], indent=True)) return self.response.write( '<style>td pre{max-height:200px;max-width:800px;overflow:scroll}</style>') self.response.write('<p>Memory: %s' % memory_usage().current()) self.emit_classified(repo, number) self.response.write('<p>Memory: %s' % memory_usage().current()) if self.request.get('classify_only'): return merged = self.emit_events(repo, number) self.response.write('<p>Memory: %s' % memory_usage().current()) if 'head' in merged: sha = merged['head']['sha'] results = models.GHStatus.query_for_sha(repo, sha) self.response.write('</table><table>') for res in results: self.response.write('<tr><td>%s<td>%s<td><a href="%s">%s</a>\n' % (res.context, res.state, res.target_url, res.description)) models.shrink(merged) self.response.write('</table><pre>%s</pre>' % cgi.escape( json.dumps(merged, indent=2, sort_keys=True))) self.response.write('<p>Memory: %s' % memory_usage().current())
def emit_events(self, repo, number): ancestor = models.GithubResource.make_key(repo, number) events = list(models.GithubWebhookRaw.query(ancestor=ancestor) .order(models.GithubWebhookRaw.timestamp)) self.response.write('<h3>Distilled Events</h3>') self.response.write('<pre>') event_pairs = [event.to_tuple() for event in events] for ev in classifier.distill_events(event_pairs): self.response.write(cgi.escape('%s, %s %s\n' % ev)) self.response.write('</pre>') self.response.write('<h3>%d Raw Events</h3>' % (len(events))) self.response.write('<table border=2>') self.response.write('<tr><th>Timestamp<th>Event<th>Action<th>Sender<th>Body</tr>') merged = {} for event in events: body_json = json.loads(event.body) models.shrink(body_json) if 'issue' in body_json: merged.update(body_json['issue']) elif 'pull_request' in body_json: merged.update(body_json['pull_request']) body = json.dumps(body_json, indent=2) action = body_json.get('action') sender = body_json.get('sender', {}).get('login') self.response.write('<tr><td>%s\n' % '<td>'.join(str(x) for x in [event.timestamp, event.event, action, sender, '<pre>' + cgi.escape(body)])) return merged
def get(self): repo = self.request.get('repo') number = self.request.get('number') if self.request.get('format') == 'json': ancestor = models.GithubResource.make_key(repo, number) events = list(models.GithubWebhookRaw.query(ancestor=ancestor)) self.response.headers['content-type'] = 'application/json' self.response.write( json.dumps([e.body for e in events], indent=True)) return self.response.write( '<style>td pre{max-height:200px;max-width:800px;overflow:scroll}</style>' ) self.response.write('<p>Memory: %s' % memory_usage().current()) self.emit_classified(repo, number) self.response.write('<p>Memory: %s' % memory_usage().current()) if self.request.get('classify_only'): return merged = self.emit_events(repo, number) self.response.write('<p>Memory: %s' % memory_usage().current()) if 'head' in merged: sha = merged['head']['sha'] results = models.GHStatus.query_for_sha(repo, sha) self.response.write('</table><table>') for res in results: self.response.write( '<tr><td>%s<td>%s<td><a href="%s">%s</a>\n' % (res.context, res.state, res.target_url, res.description)) models.shrink(merged) self.response.write( '</table><pre>%s</pre>' % cgi.escape(json.dumps(merged, indent=2, sort_keys=True))) self.response.write('<p>Memory: %s' % memory_usage().current())
def emit_events(self, repo, number): ancestor = models.GithubResource.make_key(repo, number) events = list( models.GithubWebhookRaw.query(ancestor=ancestor).order( models.GithubWebhookRaw.timestamp)) self.response.write('<h3>Distilled Events</h3>') self.response.write('<pre>') event_pairs = [event.to_tuple() for event in events] for ev in classifier.distill_events(event_pairs): self.response.write(cgi.escape('%s, %s %s\n' % ev)) self.response.write('</pre>') self.response.write('<h3>%d Raw Events</h3>' % (len(events))) self.response.write('<table border=2>') self.response.write( '<tr><th>Timestamp<th>Event<th>Action<th>Sender<th>Body</tr>') merged = {} for event in events: body_json = json.loads(event.body) models.shrink(body_json) if 'issue' in body_json: merged.update(body_json['issue']) elif 'pull_request' in body_json: merged.update(body_json['pull_request']) body = json.dumps(body_json, indent=2) action = body_json.get('action') sender = body_json.get('sender', {}).get('login') self.response.write('<tr><td>%s\n' % '<td>'.join( str(x) for x in [ event.timestamp, event.event, action, sender, '<pre>' + cgi.escape(body) ])) return merged
def sync_repo(token, repo, write_html=None): if write_html is None: write_html = lambda x: None logging.info('syncing repo %s', repo) write_html('<h1>%s</h1>' % repo) # There is a race condition here: # We can't atomically get a list of PRs from the database and GitHub, # so a PR might falsely be in stale_open_prs if it is opened after # we scan GitHub, or falsely be in missing_prs if a PR is made after we # got the list from GitHub, and before we get the list from the database. # # These cases will both be fixed the next time this code runs, so we don't # try to prevent it here. prs_gh = get_prs_from_github(token, repo) prs_gh_by_number = {pr['number']: pr for pr in prs_gh} prs_db = list(models.GHIssueDigest.find_open_prs_for_repo(repo)) prs_db_by_number = {pr.number: pr for pr in prs_db} numbers_datastore = set(prs_db_by_number) numbers_github = set(prs_gh_by_number) stale_open_prs = sorted(numbers_datastore - numbers_github) missing_prs = sorted(numbers_github - numbers_datastore) if not stale_open_prs and not missing_prs: write_html('matched, no further work needed') logging.info('matched, no further work needed') return logging.info('PRs to close: %s', stale_open_prs) logging.info('PRs to open: %s', missing_prs) write_html('<br>') write_html('PRs that should be closed: %s<br>' % stale_open_prs) for number in stale_open_prs: pr = prs_db_by_number[number] write_html('<b>%d</b><br>%s<br>' % (number, pr)) inject_event_and_reclassify(repo, number, 'gh-sync-close', {'state': 'closed', # These other 3 keys are injected because the classifier expects them. # This simplifies the testing code, and means we don't have to inject # fake webhooks. 'user': {'login': pr.payload['author']}, 'assignees': [{'login': u} for u in pr.payload['assignees']], 'title': pr.payload['title']}) write_html('PRs that should be opened: %s<br>' % missing_prs) for number in missing_prs: pr = models.shrink(prs_gh_by_number[number]) write_html('<br>%d</br><pre>%s</pre><br>' % (number, json.dumps(pr, indent=4, sort_keys=True))) inject_event_and_reclassify(repo, number, 'gh-sync-open', pr)