def process_pull_request_file( prf_data, via="webhook", fetched_at=None, commit=True, pull_request_id=None, ): sha = prf_data.get("sha") if not sha: # This indicates a moved file: for example, moving /tmp/a.txt # to /tmp/b.txt. I don't know why Github marks moved files this # way, but it's not actually an error. raise NothingToDo("no pull request file SHA") pr_id = pull_request_id if not pr_id: raise MissingData("no pull_request_id", obj=prf_data) # fetch the object from the database, # or create it if it doesn't exist in the DB prf = PullRequestFile.query.get((pr_id, sha)) if not prf: prf = PullRequestFile(sha=sha, pull_request_id=pr_id) # should we update the object? fetched_at = fetched_at or datetime.now() if prf.last_replicated_at > fetched_at: raise StaleData() # update the object fields = ( "filename", "status", "additions", "deletions", "changes", "patch", ) for field in fields: if field in prf_data: setattr(prf, field, prf_data[field]) # update replication timestamp replicated_dt_field = "last_replicated_via_{}_at".format(via) if hasattr(prf, replicated_dt_field): setattr(prf, replicated_dt_field, fetched_at) # add to DB session, so that it will be committed db.session.add(prf) if commit: db.session.commit() return prf
def process_repository(repo_data, via="webhook", fetched_at=None, commit=True, requestor_id=None): repo_id = repo_data.get("id") if not repo_id: raise MissingData("no repo ID") # fetch the object from the database, # or create it if it doesn't exist in the DB repo = Repository.query.get(repo_id) if not repo: repo = Repository(id=repo_id) # should we update the object? fetched_at = fetched_at or datetime.now() if repo.last_replicated_at > fetched_at: raise StaleData() # update the object fields = ( "name", "private", "description", "fork", "homepage", "size", "stargazers_count", "watchers_count", "language", "has_issues", "has_downloads", "has_wiki", "has_pages", "forks_count", "open_issues_count", "default_branch", ) for field in fields: if field in repo_data: setattr(repo, field, repo_data[field]) dt_fields = ("created_at", "updated_at", "pushed_at") for field in dt_fields: if repo_data.get(field): dt = parse_date(repo_data[field]).replace(tzinfo=None) setattr(repo, field, dt) # user references user_fields = ("owner", "organization") for user_field in user_fields: if user_field not in repo_data: continue user_data = repo_data[user_field] id_field = "{}_id".format(user_field) login_field = "{}_login".format(user_field) if user_data: setattr(repo, id_field, user_data["id"]) if hasattr(repo, login_field): setattr(repo, login_field, user_data["login"]) try: process_user(user_data, via=via, fetched_at=fetched_at) except StaleData: pass else: setattr(repo, id_field, None) if hasattr(repo, login_field): setattr(repo, login_field, None) # update replication timestamp replicated_dt_field = "last_replicated_via_{}_at".format(via) if hasattr(repo, replicated_dt_field): setattr(repo, replicated_dt_field, fetched_at) # add to DB session, so that it will be committed db.session.add(repo) # if we have requestor_id and permissions, update the permissions object if requestor_id and repo_data.get("permissions"): permissions_data = repo_data["permissions"] assoc = UserRepoAssociation.query.get((requestor_id, repo_id)) if not assoc: assoc = UserRepoAssociation(user_id=requestor_id, repo_id=repo_id) for perm in ("admin", "push", "pull"): if perm in permissions_data: perm_attr = "can_{perm}".format(perm=perm) setattr(assoc, perm_attr, permissions_data[perm]) db.session.add(assoc) if commit: db.session.commit() return repo
def process_user(user_data, via="webhook", fetched_at=None, commit=True): user_id = user_data.get("id") if not user_id: raise MissingData("no user ID") # fetch the object from the database, # or create it if it doesn't exist in the DB user = User.query.get(user_id) if not user: user = User(id=user_id) # should we update the object? fetched_at = fetched_at or datetime.now() if user.last_replicated_at > fetched_at: raise StaleData() # Most fields have the same name in our model as they do in Github's API. # However, some are different. This mapping contains just the differences. field_to_model = { "public_repos": "public_repos_count", "public_gists": "public_gists_count", "followers": "followers_count", "following": "following_count", } # update the object fields = ( "login", "site_admin", "name", "company", "blog", "location", "email", "hireable", "bio", "public_repos", "public_gists", "followers", "following", ) for field in fields: if field in user_data: mfield = field_to_model.get(field, field) setattr(user, mfield, user_data[field]) dt_fields = ("created_at", "updated_at") for field in dt_fields: if user_data.get(field): dt = parse_date(user_data[field]).replace(tzinfo=None) setattr(user, field, dt) # update replication timestamp replicated_dt_field = "last_replicated_via_{}_at".format(via) if hasattr(user, replicated_dt_field): setattr(user, replicated_dt_field, fetched_at) # add to DB session, so that it will be committed db.session.add(user) if commit: db.session.commit() return user
def process_milestone(milestone_data, via="webhook", fetched_at=None, commit=True, repo_id=None): number = milestone_data.get("number") if not number: raise MissingData("no milestone number") if not repo_id: url = milestone_data.get("url") if not url: raise MissingData("no milestone url") # parse repo info from url path = URLObject(url).path assert path.segments[0] == "repos" repo_owner = path.segments[1] repo_name = path.segments[2] # fetch repo from database try: repo = Repository.get(repo_owner, repo_name) except MultipleResultsFound: msg = "Repo {owner}/{repo} found multiple times!".format( owner=repo_owner, repo=repo_name, ) raise DatabaseError(msg, { "type": "milestone", "owner": repo_owner, "repo": repo_name, }) if not repo: msg = "Repo {owner}/{repo} not loaded in webhookdb".format( owner=repo_owner, repo=repo_name, ) raise NotFound(msg, { "type": "milestone", "owner": repo_owner, "repo": repo_name, }) repo_id = repo.id # fetch the object from the database, # or create it if it doesn't exist in the DB milestone = Milestone.query.get((repo_id, number)) if not milestone: milestone = Milestone(repo_id=repo_id, number=number) # should we update the object? fetched_at = fetched_at or datetime.now() if milestone.last_replicated_at > fetched_at: raise StaleData() # Most fields have the same name in our model as they do in Github's API. # However, some are different. This mapping contains just the differences. field_to_model = { "open_issues": "open_issues_count", "closed_issues": "closed_issues_count", "due_on": "due_at", } # update the object fields = ( "state", "title", "description", "open_issues", "closed_issues", ) for field in fields: if field in milestone_data: mfield = field_to_model.get(field, field) setattr(milestone, mfield, milestone_data[field]) dt_fields = ("created_at", "updated_at", "closed_at", "due_on") for field in dt_fields: if milestone_data.get(field): dt = parse_date(milestone_data[field]).replace(tzinfo=None) mfield = field_to_model.get(field, field) setattr(milestone, mfield, dt) # user references user_fields = ("creator", ) for user_field in user_fields: if user_field not in milestone_data: continue user_data = milestone_data[user_field] id_field = "{}_id".format(user_field) login_field = "{}_login".format(user_field) if user_data: setattr(milestone, id_field, user_data["id"]) if hasattr(milestone, login_field): setattr(milestone, login_field, user_data["login"]) try: process_user(user_data, via=via, fetched_at=fetched_at) except StaleData: pass else: setattr(milestone, id_field, None) if hasattr(milestone, login_field): setattr(milestone, login_field, None) # update replication timestamp replicated_dt_field = "last_replicated_via_{}_at".format(via) if hasattr(milestone, replicated_dt_field): setattr(milestone, replicated_dt_field, fetched_at) # add to DB session, so that it will be committed db.session.add(milestone) if commit: db.session.commit() return milestone
def process_repository_hook(hook_data, via="webhook", fetched_at=None, commit=True, requestor_id=None, repo_id=None): hook_id = hook_data.get("id") if not hook_id: raise MissingData("no hook ID") if not repo_id: url = hook_data.get("url") if not url: raise MissingData("no hook url") # parse repo info from url path = URLObject(url).path assert path.segments[0] == "repos" repo_owner = path.segments[1] repo_name = path.segments[2] # fetch repo from database repo_query = (Repository.query .filter(Repository.owner_login == repo_owner) .filter(Repository.name == repo_name) ) try: repo = repo_query.one() except NoResultFound: msg = "Repo {owner}/{repo} not loaded in webhookdb".format( owner=repo_owner, repo=repo_name, ) raise NotFound(msg, { "type": "repo_hook", "owner": repo_owner, "repo": repo_name, }) except MultipleResultsFound: msg = "Repo {owner}/{repo} found multiple times!".format( owner=repo_owner, repo=repo_name, ) raise DatabaseError(msg, { "type": "repo_hook", "owner": repo_owner, "repo": repo_name, }) repo_id = repo.id # fetch the object from the database, # or create it if it doesn't exist in the DB hook = RepositoryHook.query.get(hook_id) if not hook: hook = RepositoryHook(id=hook_id, repo_id=repo_id) # should we update the object? fetched_at = fetched_at or datetime.now() if hook.last_replicated_at > fetched_at: raise StaleData() # update the object fields = ( "name", "config", "events", "active", "last_response", ) for field in fields: if field in hook_data: setattr(hook, field, hook_data[field]) dt_fields = ("created_at", "updated_at") for field in dt_fields: if hook_data.get(field): dt = parse_date(hook_data[field]).replace(tzinfo=None) setattr(hook, field, dt) # `url` is special -- it's the value in the `config` object, # NOT the top-level `url` property hook.url = hook_data.get("config", {}).get("url") # update replication timestamp replicated_dt_field = "last_replicated_via_{}_at".format(via) if hasattr(hook, replicated_dt_field): setattr(hook, replicated_dt_field, fetched_at) # add to DB session, so that it will be committed db.session.add(hook) if commit: db.session.commit() return hook
def process_label(label_data, via="webhook", fetched_at=None, commit=True, repo_id=None): name = label_data.get("name") if not name: raise MissingData("no label name") if not repo_id: url = label_data.get("url") if not url: raise MissingData("no label url") # parse repo info from url path = URLObject(url).path assert path.segments[0] == "repos" repo_owner = path.segments[1] repo_name = path.segments[2] # fetch repo from database try: repo = Repository.get(repo_owner, repo_name) except MultipleResultsFound: msg = "Repo {owner}/{repo} found multiple times!".format( owner=repo_owner, repo=repo_name, ) raise DatabaseError(msg, { "type": "label", "owner": repo_owner, "repo": repo_name, }) if not repo: msg = "Repo {owner}/{repo} not loaded in webhookdb".format( owner=repo_owner, repo=repo_name, ) raise NotFound(msg, { "type": "label", "owner": repo_owner, "repo": repo_name, }) repo_id = repo.id # fetch the object from the database, # or create it if it doesn't exist in the DB label = IssueLabel.query.get((repo_id, name)) if not label: label = IssueLabel(repo_id=repo_id, name=name) # should we update the object? fetched_at = fetched_at or datetime.now() if label.last_replicated_at > fetched_at: raise StaleData() # color reference if "color" in label_data: color_hex = label_data["color"] if color_hex: label.color = Color("#{hex}".format(hex=color_hex)) else: label.color = None # update replication timestamp replicated_dt_field = "last_replicated_via_{}_at".format(via) if hasattr(label, replicated_dt_field): setattr(label, replicated_dt_field, fetched_at) # add to DB session, so that it will be committed db.session.add(label) if commit: db.session.commit() return label
def process_pull_request(pr_data, via="webhook", fetched_at=None, commit=True): pr_id = pr_data.get("id") if not pr_id: raise MissingData("no pull_request ID", obj=pr_data) # fetch the object from the database, # or create it if it doesn't exist in the DB pr = PullRequest.query.get(pr_id) if not pr: pr = PullRequest(id=pr_id) # should we update the object? fetched_at = fetched_at or datetime.now() if pr.last_replicated_at > fetched_at: raise StaleData() # Most fields have the same name in our model as they do in Github's API. # However, some are different. This mapping contains just the differences. field_to_model = { "comments": "comments_count", "review_comments": "review_comments_count", "commits": "commits_count", } # update the object fields = ( "number", "state", "locked", "title", "body", "merged", "mergeable", "comments", "review_comments", "commits", "additions", "deletions", "changed_files", ) for field in fields: if field in pr_data: mfield = field_to_model.get(field, field) setattr(pr, mfield, pr_data[field]) dt_fields = ("created_at", "updated_at", "closed_at", "merged_at") for field in dt_fields: if pr_data.get(field): dt = parse_date(pr_data[field]).replace(tzinfo=None) mfield = field_to_model.get(field, field) setattr(pr, mfield, dt) # user references user_fields = ("user", "assignee", "merged_by") for user_field in user_fields: if user_field not in pr_data: continue user_data = pr_data[user_field] id_field = "{}_id".format(user_field) login_field = "{}_login".format(user_field) if user_data: setattr(pr, id_field, user_data["id"]) if hasattr(pr, login_field): setattr(pr, login_field, user_data["login"]) try: process_user(user_data, via=via, fetched_at=fetched_at) except StaleData: pass else: setattr(pr, id_field, None) if hasattr(pr, login_field): setattr(pr, login_field, None) # repository references refs = ("base", "head") for ref in refs: if not ref in pr_data: continue ref_data = pr_data[ref] ref_field = "{}_ref".format(ref) setattr(pr, ref_field, ref_data["ref"]) repo_data = ref_data["repo"] repo_id_field = "{}_repo_id".format(ref) if repo_data: setattr(pr, repo_id_field, repo_data["id"]) try: process_repository(repo_data, via=via, fetched_at=fetched_at) except StaleData: pass else: setattr(pr, repo_id_field, None) # update replication timestamp replicated_dt_field = "last_replicated_via_{}_at".format(via) if hasattr(pr, replicated_dt_field): setattr(pr, replicated_dt_field, fetched_at) # add to DB session, so that it will be committed db.session.add(pr) if commit: db.session.commit() return pr
def process_issue(issue_data, via="webhook", fetched_at=None, commit=True): issue_id = issue_data.get("id") if not issue_id: raise MissingData("no issue ID", obj=issue_data) # fetch the object from the database, # or create it if it doesn't exist in the DB issue = Issue.query.get(issue_id) if not issue: issue = Issue(id=issue_id) # should we update the object? fetched_at = fetched_at or datetime.now() if issue.last_replicated_at > fetched_at: raise StaleData() # Most fields have the same name in our model as they do in Github's API. # However, some are different. This mapping contains just the differences. field_to_model = { "comments": "comments_count", } # update the object fields = ( "number", "state", "title", "body", "comments", ) for field in fields: if field in issue_data: mfield = field_to_model.get(field, field) setattr(issue, mfield, issue_data[field]) dt_fields = ("created_at", "updated_at", "closed_at") for field in dt_fields: if issue_data.get(field): dt = parse_date(issue_data[field]).replace(tzinfo=None) mfield = field_to_model.get(field, field) setattr(issue, mfield, dt) # user references user_fields = ("user", "assignee", "closed_by") for user_field in user_fields: if user_field not in issue_data: continue user_data = issue_data[user_field] id_field = "{}_id".format(user_field) login_field = "{}_login".format(user_field) if user_data: setattr(issue, id_field, user_data["id"]) if hasattr(issue, login_field): setattr(issue, login_field, user_data["login"]) try: process_user(user_data, via=via, fetched_at=fetched_at) except StaleData: pass else: setattr(issue, id_field, None) if hasattr(issue, login_field): setattr(issue, login_field, None) # used for labels and milestone repo_id = None # label reference if "labels" in issue_data: label_data_list = issue_data["labels"] if label_data_list: labels = [] for label_data in label_data_list: label = process_label( label_data, via=via, fetched_at=fetched_at, commit=False, repo_id=repo_id, ) repo_id = repo_id or label.repo_id labels.append(label) issue.labels = labels else: issue.labels = [] # milestone reference if "milestone" in issue_data: milestone_data = issue_data["milestone"] if milestone_data: milestone = process_milestone( milestone_data, via=via, fetched_at=fetched_at, commit=False, repo_id=repo_id, ) repo_id = repo_id or milestone.repo_id issue.milestone_number = milestone.number else: issue.milestone = None # update replication timestamp replicated_dt_field = "last_replicated_via_{}_at".format(via) if hasattr(issue, replicated_dt_field): setattr(issue, replicated_dt_field, fetched_at) # add to DB session, so that it will be committed db.session.add(issue) if commit: db.session.commit() return issue