class Issue(Entity): """GitHub Issue entity.""" entity_schema = Schema({ "created_by": str, "created_at": int, "closed_by": Any(None, str), "closed_at": Any(None, int), "labels": { str: { str: Any(int, str) } }, "interactions": { str: int }, }) def analyse(self) -> PaginatedList: """Override :func:`~Entity.analyse`.""" return self.get_only_new_entities() def store(self, issue: GithubIssue): """Override :func:`~Entity.store`.""" if issue.pull_request is not None: return # we analyze issues and prs differentely self.stored_entities[str(issue.number)] = { "created_by": issue.user.login, "created_at": int(issue.created_at.timestamp()), "closed_by": issue.closed_by.login if issue.closed_by is not None else None, "closed_at": int(issue.closed_at.timestamp()) if issue.closed_at is not None else None, "labels": GitHubKnowledge.get_labels(issue), "interactions": GitHubKnowledge.get_interactions(issue.get_comments()), } def previous_knowledge(self): """Override :func:`~Entity.previous_knowledge`.""" return self.previous_knowledge def get_raw_github_data(self): """Override :func:`~Entity.get_raw_github_data`.""" return [ issue for issue in self.repository.get_issues(state="all") if not issue.pull_request ]
def prediction_schema(exact_len, *args): return All([{ 'threshold': float, 'label_id': int, 'score': float, 'label_name': Any(*six.string_types), }], ExactLen(exact_len), *args)
def test_error_reporting(): expected = S({ 'info': Partial({ 'package_url': 'http://pypi.python.org/pypi/pytest', 'platform': 'INVALID VALUE', 'description': Length(max=10), 'downloads': list, 'classifiers': dict, }), 'urls': int }) _ = expected == TEST_DATA msgs = pytest_assertrepr_compare('==', expected, TEST_DATA) assert S(Unordered([ "failed due to validation error(s):", "- info.platform: not a valid value (actual: 'unix')", "- info.description: length of value must be at most 10 (actual: 'lorem ipsum lorem ipsum')", "- info.downloads: expected list (actual: {'last_month': 0})", "- info.classifiers: expected dict (actual: ['Development Status :: 6 - Mature', 'Intended Audience :: Developers'])", "- urls: expected int (actual: [{}, {}])", Any( "- releases: extra keys not allowed (actual: {'3.0.7': [], '3.1.3': []})", "- releases: extra keys not allowed (actual: {'3.1.3': [], '3.0.7': []})", ), ])) == msgs
def NoneOr(Else): return Any(None, Else)
class PullRequest(Entity): """GitHub PullRequest entity.""" entity_schema = Schema({ "title": str, "body": Any(None, str), "size": str, "labels": [str], "created_by": str, "created_at": int, "closed_at": Any(None, int), "closed_by": Any(None, str), "merged_at": Any(None, int), "commits_number": int, "changed_files_number": int, "interactions": { str: int }, "reviews": PullRequestReviews, "commits": [str], "files": [str], }) def analyse(self) -> PaginatedList: """Override :func:`~Entity.analyse`.""" return self.get_raw_github_data() def store(self, pull_request: GithubPullRequest): """Override :func:`~Entity.store`.""" _LOGGER.info("Extracting PR #%d", pull_request.number) if pull_request.number in self.previous_knowledge.index: _LOGGER.debug("PullRequest %s already analysed, skipping") return created_at = int(pull_request.created_at.timestamp()) closed_at = int(pull_request.closed_at.timestamp() ) if pull_request.closed_at is not None else None merged_at = int(pull_request.merged_at.timestamp() ) if pull_request.merged_at is not None else None closed_by = pull_request.as_issue( ).closed_by.login if pull_request.as_issue( ).closed_by is not None else None merged_by = pull_request.merged_by.login if pull_request.merged_by is not None else None labels = [label.name for label in pull_request.get_labels()] # Evaluate size of PR pull_request_size = None if labels: pull_request_size = GitHubKnowledge.get_labeled_size(labels) if not pull_request_size: lines_changes = pull_request.additions + pull_request.deletions pull_request_size = GitHubKnowledge.assign_pull_request_size( lines_changes=lines_changes) reviews = self.extract_pull_request_reviews(pull_request) self.stored_entities[str(pull_request.number)] = { "title": pull_request.title, "body": pull_request.body, "size": pull_request_size, "created_by": pull_request.user.login, "created_at": created_at, "closed_at": closed_at, "closed_by": closed_by, "merged_at": merged_at, "merged_by": merged_by, "commits_number": pull_request.commits, "changed_files_number": pull_request.changed_files, "interactions": GitHubKnowledge.get_interactions( pull_request.get_issue_comments()), "reviews": reviews, "labels": labels, "commits": [c.sha for c in pull_request.get_commits()], "changed_files": [f.filename for f in pull_request.get_files()], "first_review_at": get_first_review_time(reviews), "first_approve_at": get_approve_time(reviews), } def get_raw_github_data(self): """Override :func:`~Entity.get_raw_github_data`.""" return self.repository.get_pulls(state="all") @staticmethod def extract_pull_request_review_requests( pull_request: GithubPullRequest) -> List[str]: """Extract features from requested reviews of the PR. GitHub understands review requests rather as requested reviewers than actual requests. Arguments: pull_request {PullRequest} -- PR of which we can extract review requests. Returns: List[str] -- list of logins of the requested reviewers """ requested_users = pull_request.get_review_requests()[0] extracted = [] for user in requested_users: extracted.append(user.login) return extracted @staticmethod def extract_pull_request_reviews( pull_request: GithubPullRequest) -> Dict[str, Dict[str, Any]]: """Extract required features for each review from PR. Arguments: pull_request {PullRequest} -- Pull Request from which the reviews will be extracted Returns: Dict[str, Dict[str, Any]] -- dictionary of extracted reviews. Each review is stored """ reviews = pull_request.get_reviews() _LOGGER.debug(" -num of reviews found: %d" % reviews.totalCount) results = {} for idx, review in enumerate(reviews, 1): _LOGGER.info(" -analysing review no. %d/%d" % (idx, reviews.totalCount)) results[str(review.id)] = { "author": review.user.login if review.user and review.user.login else None, "words_count": len(review.body.split(" ")), "submitted_at": int(review.submitted_at.timestamp()), "state": review.state, } return results @staticmethod def get_referenced_issues(pull_request: GithubPullRequest) -> List[str]: """Scan all of the Pull Request comments and get referenced issues. Arguments: pull_request {PullRequest} -- Pull request for which the referenced issues are extracted Returns: List[str] -- IDs of referenced issues within the Pull Request. """ issues_referenced = [] for comment in pull_request.get_issue_comments(): for id in PullRequest.search_for_references(comment.body): issues_referenced.append(id) for id in PullRequest.search_for_references(pull_request.body): issues_referenced.append(id) _LOGGER.debug(" referenced issues: %s" % issues_referenced) return issues_referenced @staticmethod def search_for_references(body: str) -> Generator[str, None, None]: """Return generator for iterating through referenced IDs in a comment.""" if body is None: return message = body.split(" ") for idx, word in enumerate(message): if word.replace(":", "").lower() not in ISSUE_KEYWORDS: return _LOGGER.info(" ...found keyword referencing issue") referenced_issue_number = message[idx + 1] if referenced_issue_number.startswith("https"): # last element of url is always the issue number ref_issue = referenced_issue_number.split("/")[-1] elif referenced_issue_number.startswith("#"): ref_issue = referenced_issue_number.replace("#", "") else: _LOGGER.info(" ...referenced issue number absent") _LOGGER.debug(" keyword message: %s" % body) return if not referenced_issue_number.isnumeric(): _LOGGER.info( " ...referenced issue number in incorrect format") return _LOGGER.info(" ...referenced issue number: %s" % ref_issue) yield ref_issue
import logging from typing import Dict, Generator, List from github.PaginatedList import PaginatedList from github.PullRequest import PullRequest as GithubPullRequest from voluptuous.schema_builder import Schema from voluptuous.validators import Any from srcopsmetrics.entities import Entity from srcopsmetrics.entities.tools.knowledge import GitHubKnowledge _LOGGER = logging.getLogger(__name__) PullRequestReview = Schema({ "author": Any(None, str), "words_count": int, "submitted_at": int, "state": str }) PullRequestReviews = Schema({str: PullRequestReview}) ISSUE_KEYWORDS = { "close", "closes", "closed", "fix", "fixes", "fixed", "resolve", "resolves", "resolved" } def get_first_review_time(reviews: Dict[Any, Any]): """Return timestamp of the first PR review.""" rev_times = [int(rev["submitted_at"]) for rev in reviews.values()]