def _find_commit(self, commit_id: str) -> Commit: logging.debug(f'running query for commit: [{commit_id}]') query = self._commit_query(commit_id) response_json = self.graph_ql_client.execute_query(query) logging.debug(f'query complete for commit: [{commit_id}]') node = response_json["data"]["node"] commit = Commit(node["id"]) commit.repository_id = self.repository.id if node["author"] is not None: if node["author"]["user"] is not None: commit.author = self._find_author_by_id( node["author"]["user"]["id"]) commit.authored_by_committer = node["authoredByCommitter"] commit.total_authors = node["authors"]["totalCount"] if node["committer"] is not None: if node["committer"]["user"] is not None: commit.committer = self._find_author_by_id( node["committer"]["user"]["id"]) if node["onBehalfOf"] is not None: commit.for_organization_id = node["onBehalfOf"]["id"] commit.create_datetime = base.to_datetime_from_str( node["committedDate"]) if node["pushedDate"] is not None: commit.push_datetime = base.to_datetime_from_str( node["pushedDate"]) commit.total_comments = node["comments"]["totalCount"] commit.total_associated_pull_requests = \ node["associatedPullRequests"]["totalCount"] commit.total_check_suites = node["checkSuites"]["totalCount"] commit.message_headline = node["messageHeadline"] commit.message_body = node["messageBody"] commit.additions = node["additions"] commit.deletions = node["deletions"] commit.changed_files = node["changedFiles"] # iterate over the commit tree commit.tree_id = node["tree"]["id"] for entry_node in node["tree"]["entries"]: entry = CommitEntry() entry.id = entry_node["object"]["id"] entry.name = entry_node["name"] entry.extension = entry_node["extension"] entry.path = entry_node["path"] entry.is_generated = entry_node["isGenerated"] entry.mode = entry_node["mode"] entry.type = entry_node["type"] if entry_node["submodule"] is not None: entry.submodule_name = entry_node["submodule"]["name"] commit.entries.append(entry) if node["status"] is not None: commit.state = node["status"]["state"] logging.debug(f'query complete for commit: [{commit_id}]') return commit
def run(self): """ loads the comments for the pull request reviews for a specific repository :return: None """ pull_request_reviews_reviewed: int = 0 pull_request_review_count = self._get_objects_saved_count( self.repository, base.ObjectType.PULL_REQUEST_REVIEW) pull_request_reviews = self._get_collection().find({ 'repository_id': self.repository.id, 'object_type': base.ObjectType.PULL_REQUEST_REVIEW.name }) for review in pull_request_reviews: pull_request_review_id: str = review['id'] pull_request_id: str = review['pull_request_id'] comments_expected: int = review['total_comments'] comments: [PullRequestReviewComment] = [] comment_ids: [str] = [] comment_cursor: str = None pull_request_reviews_reviewed += 1 while comments_expected > self._get_actual_comments( pull_request_review_id): logging.debug( f'running query for comments for pull request review [{pull_request_review_id}] ' f'against {self.repository}') query = self._pull_request_review_comments_query( pull_request_review_id, comment_cursor) response_json = self.graph_ql_client.execute_query(query) logging.debug( f'query complete for comments for pull request review [{pull_request_review_id}] ' f'against {self.repository}') # iterate over each comment returned (we return 100 at a time) for edge in response_json["data"]["node"]["comments"]["edges"]: comment_cursor = edge["cursor"] comment = PullRequestReviewComment(edge["node"]["id"]) comments.append(comment) comment_ids.append(comment.id) comment.repository_id = self.repository.id comment.pull_request_id = pull_request_id comment.pull_request_review_id = pull_request_review_id comment.state = edge["node"]["state"] # get the body text comment.body_text = edge["node"]["bodyText"] # get the counts for the sub items to comment comment.total_reactions = edge["node"]["reactions"][ "totalCount"] comment.total_edits = edge["node"]["userContentEdits"][ "totalCount"] # parse the datetime comment.create_datetime = base.to_datetime_from_str( edge["node"]["createdAt"]) # set the diffHunk comment.diff_hunk = edge["node"]["diffHunk"] # set the path comment.path = edge["node"]["path"] # set the original position if edge["node"]["originalPosition"] is not None: comment.original_position = edge["node"][ "originalPosition"] # set the position if edge["node"]["position"] is not None: comment.position = edge["node"]["position"] # set if the comment has been minimized if edge["node"]["isMinimized"] is not None and edge[ "node"]["isMinimized"] is True: comment.minimized_status = edge["node"][ "minimizedReason"] # author can be None. Who knew? if edge["node"]["author"] is not None: comment.author = self._find_author_by_login( edge["node"]["author"]["login"]) comment.author_association = edge["node"][ 'authorAssociation'] # get original commit id if edge["node"]["originalCommit"] is not None: comment.original_commit_id = edge["node"][ "originalCommit"]["id"] # get commit id if edge["node"]["commit"] is not None: comment.commit_id = edge["node"]["commit"]["id"] # get author of comment we are replying to if edge["node"]["replyTo"] is not None: comment.reply_to_comment_id = edge["node"]["replyTo"][ "id"] # check to see if pull request comment is in the database found_request = \ self._get_collection().find_one({'id': comment.id, 'object_type': base.ObjectType.PULL_REQUEST_REVIEW_COMMENT.name}) if found_request is None: self._get_collection().insert_one( comment.to_dictionary()) self._get_collection().update_one( {'id': pull_request_review_id}, {'$set': { 'comment_ids': comment_ids }}) logging.debug( f'pull request reviews reviewed for {self.repository} ' f'{pull_request_reviews_reviewed}/{pull_request_review_count}') actual_count: int = self._get_actual_results() expected_count: int = self._get_expected_results() logging.debug( f'comments returned for {self.repository} ' f'returned: [{actual_count}], expected: [{expected_count}]')
def run(self): """ loads the reviews for the pull requests for a specific repository :return: None """ pull_request_reviewed: int = 0 pull_request_count = self._get_objects_saved_count( self.repository, base.ObjectType.PULL_REQUEST) pull_requests = self._get_collection().find({ 'repository_id': self.repository.id, 'object_type': base.ObjectType.PULL_REQUEST.name }) for pr in pull_requests: pull_request_id: str = pr['id'] reviews_expected: int = pr['total_reviews'] review_ids: [str] = [] review_cursor: str = None pull_request_reviewed += 1 while reviews_expected > self._get_actual_reviews(pull_request_id): logging.debug( f'running query for reviews for pull request [{pull_request_id}] against {self.repository}' ) query = self._pull_request_reviews_query( pull_request_id, review_cursor) response_json = self.graph_ql_client.execute_query(query) logging.debug( f'query complete for reviews for pull request [{pull_request_id}] against {self.repository}' ) # iterate over each review returned (we return 100 at a time) for edge in response_json["data"]["node"]["reviews"]["edges"]: review_cursor = edge["cursor"] review = PullRequestReview(edge["node"]["id"], pull_request_id) review_ids.append(review.id) review.repository_id = self.repository.id review.body_text = edge["node"]["bodyText"] review.commit_id = edge["node"]["commit"]["id"] review.total_comments = edge["node"]["comments"][ "totalCount"] review.total_edits = edge["node"]["userContentEdits"][ "totalCount"] review.total_reactions = edge["node"]["reactions"][ "totalCount"] review.total_for_teams = edge["node"]["onBehalfOf"][ "totalCount"] review.create_datetime = base.to_datetime_from_str( edge["node"]["createdAt"]) review.state = edge["node"]["state"] # author can be None. Who knew? if edge["node"]["author"] is not None: review.author = self._find_author_by_login( edge["node"]["author"]["login"]) review.author_association = edge["node"][ "authorAssociation"] # check to see if pull request review is in the database found_request = self._get_collection().find_one({ 'id': review.id, 'object_type': 'PULL_REQUEST_REVIEW' }) if found_request is None: self._get_collection().insert_one( review.to_dictionary()) self._get_collection().update_one( {'id': pull_request_id}, {'$set': { 'review_ids': review_ids }}) logging.debug( f'pull requests reviewed for {self.repository} {pull_request_reviewed}/{pull_request_count}' ) actual_count: int = self._get_actual_results() expected_count: int = self._get_expected_results() logging.debug( f'reviews returned for {self.repository} ' f'returned: [{actual_count}], expected: [{expected_count}]')
def run(self): """ loads the check suite ids from the commits for a specific repository :return: None """ commits_reviewed: int = 0 commit_count = self._get_objects_saved_count(self.repository, base.ObjectType.COMMIT) commits = self._get_collection().find({ 'repository_id': self.repository.id, 'object_type': base.ObjectType.COMMIT.name }) for item in commits: commit_id: str = item['id'] check_suites_expected: int = item['total_check_suites'] check_suite_ids: [str] = [] check_suite_cursor: str = None commits_reviewed += 1 if check_suites_expected > len(item['check_suite_ids']): while check_suites_expected > len(check_suite_ids): logging.debug( f'running query for check suite ids for commit [{commit_id}] against {self.repository}' ) query = self._commit_check_suite_query( commit_id, check_suite_cursor) response_json = self.graph_ql_client.execute_query(query) logging.debug( f'query complete for check suite ids for commit [{commit_id}] against {self.repository}' ) # iterate over each check suite returned (we return 100 at a time) for edge in response_json["data"]["node"]["checkSuites"][ "edges"]: check_suite_cursor = edge["cursor"] check_suite = CheckSuite(edge["node"]["id"]) check_suite.commit_id = edge["node"]["commit"]["id"] check_suite.repository_id = edge["node"]["repository"][ "id"] if edge["node"]["app"] is not None: check_suite.application_id = edge["node"]["app"][ "id"] if edge["node"]["branch"] is not None: check_suite.branch_id = edge["node"]["branch"][ "id"] check_suite.conclusion = edge["node"]["conclusion"] # check_suite.push_id = edge["node"]["push"]["id"] check_suite.state = edge["node"]["status"] # load the counts check_suite.total_check_runs = edge["node"][ "checkRuns"]["totalCount"] check_suite.total_matching_pull_requests = edge[ "node"]["matchingPullRequests"]["totalCount"] # parse the datetime check_suite.create_datetime = base.to_datetime_from_str( edge["node"]["createdAt"]) check_suite_ids.append(check_suite.id) # check to see if pull request comment is in the database found_request = \ self._get_collection().find_one({'id': check_suite.id, 'object_type': base.ObjectType.CHECK_SUITE.name}) if found_request is None: self._get_collection().insert_one( check_suite.to_dictionary()) self._get_collection().update_one( {'id': commit_id}, {'$set': { 'check_suite_ids': check_suite_ids }}) logging.debug( f'commits reviewed for {self.repository} {commits_reviewed}/{commit_count}' ) actual_count: int = self._get_actual_results() expected_count: int = self._get_expected_results() logging.debug( f'check suites returned for {self.repository} returned: [{actual_count}], expected: [{expected_count}]' )
def run(self): """ loads the comments for the commits for a specific repository :return: None """ commits_reviewed: int = 0 commit_count = self._get_objects_saved_count(self.repository, base.ObjectType.COMMIT) commits = self._get_collection().find({'repository_id': self.repository.id, 'object_type': base.ObjectType.COMMIT.name}) for item in commits: commit_id: str = item['id'] comments_expected: int = item['total_comments'] comment_ids: [str] = [] comment_cursor: str = None commits_reviewed += 1 if comments_expected > len(item['comment_ids']): while comments_expected > len(comment_ids): logging.debug( f'running query for comments for commit [{commit_id}] against {self.repository}' ) query = self._commit_comment_query(commit_id, comment_cursor) response_json = self.graph_ql_client.execute_query(query) logging.debug( f'query complete for comments for commit [{commit_id}] against {self.repository}' ) # iterate over each participant returned (we return 100 at a time) for edge in response_json["data"]["node"]["comments"]["edges"]: comment_cursor = edge["cursor"] commit_comment = CommitComment(edge["node"]["id"]) comment_ids.append(commit_comment.id) commit_comment.repository_id = self.repository.id commit_comment.commit_id = commit_id # get the counts for the sub items to comment commit_comment.total_reactions = edge["node"]["reactions"]["totalCount"] commit_comment.total_edits = edge["node"]["userContentEdits"]["totalCount"] # get the body text commit_comment.body_text = edge["node"]["bodyText"] # parse the datetime commit_comment.create_datetime = base.to_datetime_from_str(edge["node"]["createdAt"]) # set the path commit_comment.path = edge["node"]["path"] # author can be None. Who knew? if edge["node"]["author"] is not None: commit_comment.author = self._find_author_by_login(edge["node"]["author"]["login"]) commit_comment.author_association = edge["node"]['authorAssociation'] # set the position if edge["node"]["position"] is not None: commit_comment.position = edge["node"]["position"] # set if the comment has been minimized if edge["node"]["isMinimized"] is not None and edge["node"]["isMinimized"] is True: commit_comment.minimized_status = edge["node"]["minimizedReason"] # check to see if pull request comment is in the database found_request = \ self._get_collection().find_one({'id': commit_comment.id, 'object_type': base.ObjectType.COMMIT_COMMENT.name}) if found_request is None: self._get_collection().insert_one(commit_comment.to_dictionary()) self._get_collection().update_one({'id': commit_id}, {'$set': {'comment_ids': comment_ids}}) logging.debug(f'pull requests reviewed for {self.repository} {commits_reviewed}/{commit_count}') actual_count: int = self._get_actual_results() expected_count: int = self._get_expected_results() logging.debug( f'participants returned for {self.repository} returned: [{actual_count}], expected: [{expected_count}]' )
def run(self): """ loads the pull request for a specific repository :return: None """ if self._get_expected_results() != self._get_actual_results(): pull_requests_loaded: int = 0 pull_request_cursor: str = None # continue executing gets against git until we have all the PRs while self.repository.total_pull_requests > pull_requests_loaded: logging.debug( f'running query for pull requests against {self.repository}' ) query = self._pull_request_query(pull_request_cursor) response_json = self.graph_ql_client.execute_query(query) logging.debug( f'query complete for pull requests against {self.repository}' ) # iterate over each pull request returned (we return 20 at a time) for edge in response_json["data"]["repository"][ "pullRequests"]["edges"]: pull_requests_loaded += 1 pull_request_cursor = edge["cursor"] pull_request_id = edge["node"]["id"] # check to see if pull request is in the database found_request = self._get_collection().find_one({ 'id': pull_request_id, 'object_type': base.ObjectType.PULL_REQUEST.name }) if found_request is None: pr = PullRequest(pull_request_id, self.repository.id) pr.body_text = edge["node"]["bodyText"] pr.state = edge["node"]["state"] # load the counts pr.total_reviews = edge["node"]["reviews"][ "totalCount"] pr.total_comments = edge["node"]["comments"][ "totalCount"] pr.total_participants = edge["node"]["participants"][ "totalCount"] pr.total_edits = edge["node"]["userContentEdits"][ "totalCount"] pr.total_reactions = edge["node"]["reactions"][ "totalCount"] pr.total_commits = edge["node"]["commits"][ "totalCount"] # author can be None. Who knew? if edge["node"]["author"] is not None: pr.author = self._find_author_by_login( edge["node"]["author"]["login"]) pr.author_association = pr.author_login = edge["node"][ "authorAssociation"] # parse the datetime pr.create_datetime = base.to_datetime_from_str( edge["node"]["createdAt"]) logging.debug(f'inserting record for {pr}') self._get_collection().insert_one(pr.to_dictionary()) logging.debug(f'insert complete for {pr}') else: logging.debug( f'Pull Request [id: {pull_request_id}] already found in database' ) logging.debug( f'pull requests found for {self.repository} ' f'{pull_requests_loaded}/{self.repository.total_pull_requests}' ) actual_count: int = self._get_actual_results() logging.debug( f'pull requests returned for {self.repository} returned: [{actual_count}], ' f'expected: [{self.repository.total_pull_requests}]')
def run(self): """ loads the reactions for a document type for a specific repository :return: None """ items_reviewed: int = 0 item_count = self._get_objects_saved_count(self.repository, self.object_type) items = self._get_collection().find({ 'repository_id': self.repository.id, 'object_type': self.object_type.name }) for item in items: item_id: str = item['id'] reactions_expected: int = item['total_reactions'] reactions: [Reaction] = [] reaction_cursor: str = None items_reviewed += 1 if reactions_expected > len(item['reactions']): while reactions_expected > len(reactions): logging.debug( f'running query for reactions for {self.object_type.name} [{item_id}] against {self.repository}' ) query = self._reactions_query(item_id, reaction_cursor) response_json = self.graph_ql_client.execute_query(query) logging.debug( f'query complete for reactions for {self.object_type.name} [{item_id}] ' f'against {self.repository}') # iterate over each reaction returned (we return 100 at a time) for edge in response_json["data"]["node"]["reactions"][ "edges"]: reaction_cursor = edge["cursor"] reaction = Reaction(edge["node"]["id"]) reaction.content = edge["node"]["content"] reaction.create_datetime = base.to_datetime_from_str( edge["node"]["createdAt"]) # author can be None. Who knew? if edge["node"]["user"] is not None: reaction.author = self._find_author_by_id( edge["node"]["user"]["id"]) reactions.append(reaction) reaction_dictionaries = list(map(base.to_dictionary, reactions)) self._get_collection().update_one( {'id': item_id}, {'$set': { 'reactions': reaction_dictionaries }}) logging.debug( f'{self.object_type.name} reviewed for {self.repository} {items_reviewed}/{item_count}' ) actual_count: int = self._get_actual_results() expected_count: int = self._get_expected_results() logging.debug( f'reactions returned for {self.repository} returned: [{actual_count}], expected: [{expected_count}]' )
def run(self): """ loads the edits for a specific object type for a specific repository :return: None """ item_reviewed: int = 0 item_count = self._get_objects_saved_count(self.repository, self.object_type) items = self._get_collection().find({ 'repository_id': self.repository.id, 'object_type': self.object_type.name }) for item in items: item_reviewed += 1 item_id: str = item['id'] edits_expected: int = item['total_edits'] edits: [ContentEdit] = [] edit_cursor: str = None has_delete: bool = False if edits_expected > len(item['edits']): while edits_expected > len(edits): logging.debug( f'running query for edits for {self.object_type.name} [{item_id}] ' f'against {self.repository}') query = self._edits_query(item_id, edit_cursor) response_json = self.graph_ql_client.execute_query(query) logging.debug( f'query complete for edits for {self.object_type.name} [{item_id}] ' f'against {self.repository}') # iterate over each edit returned (we return 100 at a time) for edge in response_json["data"]["node"][ "userContentEdits"]["edges"]: edit_cursor = edge["cursor"] edit = ContentEdit(edge["node"]["id"]) edits.append(edit) edit.edit_datetime = base.to_datetime_from_str( edge["node"]["editedAt"]) if edge["node"]["editor"] is not None: edit.editor = self._find_author_by_login( edge["node"]["editor"]["login"]) # checking to see if this is an edit if edge["node"]["diff"] is not None: edit.difference = edge["node"]["diff"] if edge["node"]["deletedAt"] is not None: if edge["node"]["deletedBy"] is not None: edit.editor = self._find_author_by_login( edge["node"]["deletedBy"]["login"]) edit.edit_datetime = base.to_datetime_from_str( edge["node"]["deletedAt"]) edit.is_delete = True has_delete = True edit_dictionaries = list(map(base.to_dictionary, edits)) self._get_collection().update_one({'id': item_id}, { '$set': { 'edits': edit_dictionaries, 'is_deleted': has_delete } }) logging.debug( f'{self.object_type.name} reviewed for {self.repository} {item_reviewed}/{item_count}' ) actual_count: int = self._get_actual_results() expected_count: int = self._get_expected_results() logging.debug( f'edits returned for {self.repository} returned: [{actual_count}], expected: [{expected_count}]' )