Пример #1
0
class Issue(Entity):
    """GitHub Issue entity."""

    entity_schema = Schema({
        "created_by": str,
        "created_at": int,
        "closed_by": Any(None, str),
        "closed_at": Any(None, int),
        "labels": {
            str: {
                str: Any(int, str)
            }
        },
        "interactions": {
            str: int
        },
    })

    def analyse(self) -> PaginatedList:
        """Override :func:`~Entity.analyse`."""
        return self.get_only_new_entities()

    def store(self, issue: GithubIssue):
        """Override :func:`~Entity.store`."""
        if issue.pull_request is not None:
            return  # we analyze issues and prs differentely

        self.stored_entities[str(issue.number)] = {
            "created_by":
            issue.user.login,
            "created_at":
            int(issue.created_at.timestamp()),
            "closed_by":
            issue.closed_by.login if issue.closed_by is not None else None,
            "closed_at":
            int(issue.closed_at.timestamp())
            if issue.closed_at is not None else None,
            "labels":
            GitHubKnowledge.get_labels(issue),
            "interactions":
            GitHubKnowledge.get_interactions(issue.get_comments()),
        }

    def previous_knowledge(self):
        """Override :func:`~Entity.previous_knowledge`."""
        return self.previous_knowledge

    def get_raw_github_data(self):
        """Override :func:`~Entity.get_raw_github_data`."""
        return [
            issue for issue in self.repository.get_issues(state="all")
            if not issue.pull_request
        ]
def prediction_schema(exact_len, *args):
    return All([{
        'threshold': float,
        'label_id': int,
        'score': float,
        'label_name': Any(*six.string_types),
    }], ExactLen(exact_len), *args)
Пример #3
0
def test_error_reporting():
    expected = S({
        'info': Partial({
            'package_url': 'http://pypi.python.org/pypi/pytest',
            'platform': 'INVALID VALUE',
            'description': Length(max=10),
            'downloads': list,
            'classifiers': dict,
        }),
        'urls': int
    })
    _ = expected == TEST_DATA
    msgs = pytest_assertrepr_compare('==', expected, TEST_DATA)
    assert S(Unordered([
        "failed due to validation error(s):",
        "- info.platform: not a valid value (actual: 'unix')",
        "- info.description: length of value must be at most 10 (actual: 'lorem ipsum lorem ipsum')",
        "- info.downloads: expected list (actual: {'last_month': 0})",
        "- info.classifiers: expected dict (actual: ['Development Status :: 6 - Mature', 'Intended Audience :: Developers'])",
        "- urls: expected int (actual: [{}, {}])",
        Any(
            "- releases: extra keys not allowed (actual: {'3.0.7': [], '3.1.3': []})",
            "- releases: extra keys not allowed (actual: {'3.1.3': [], '3.0.7': []})",
        ),
    ])) == msgs
Пример #4
0
def NoneOr(Else):
    return Any(None, Else)
Пример #5
0
class PullRequest(Entity):
    """GitHub PullRequest entity."""

    entity_schema = Schema({
        "title": str,
        "body": Any(None, str),
        "size": str,
        "labels": [str],
        "created_by": str,
        "created_at": int,
        "closed_at": Any(None, int),
        "closed_by": Any(None, str),
        "merged_at": Any(None, int),
        "commits_number": int,
        "changed_files_number": int,
        "interactions": {
            str: int
        },
        "reviews": PullRequestReviews,
        "commits": [str],
        "files": [str],
    })

    def analyse(self) -> PaginatedList:
        """Override :func:`~Entity.analyse`."""
        return self.get_raw_github_data()

    def store(self, pull_request: GithubPullRequest):
        """Override :func:`~Entity.store`."""
        _LOGGER.info("Extracting PR #%d", pull_request.number)

        if pull_request.number in self.previous_knowledge.index:
            _LOGGER.debug("PullRequest %s already analysed, skipping")
            return

        created_at = int(pull_request.created_at.timestamp())
        closed_at = int(pull_request.closed_at.timestamp()
                        ) if pull_request.closed_at is not None else None
        merged_at = int(pull_request.merged_at.timestamp()
                        ) if pull_request.merged_at is not None else None

        closed_by = pull_request.as_issue(
        ).closed_by.login if pull_request.as_issue(
        ).closed_by is not None else None
        merged_by = pull_request.merged_by.login if pull_request.merged_by is not None else None

        labels = [label.name for label in pull_request.get_labels()]

        # Evaluate size of PR
        pull_request_size = None
        if labels:
            pull_request_size = GitHubKnowledge.get_labeled_size(labels)

        if not pull_request_size:
            lines_changes = pull_request.additions + pull_request.deletions
            pull_request_size = GitHubKnowledge.assign_pull_request_size(
                lines_changes=lines_changes)

        reviews = self.extract_pull_request_reviews(pull_request)

        self.stored_entities[str(pull_request.number)] = {
            "title":
            pull_request.title,
            "body":
            pull_request.body,
            "size":
            pull_request_size,
            "created_by":
            pull_request.user.login,
            "created_at":
            created_at,
            "closed_at":
            closed_at,
            "closed_by":
            closed_by,
            "merged_at":
            merged_at,
            "merged_by":
            merged_by,
            "commits_number":
            pull_request.commits,
            "changed_files_number":
            pull_request.changed_files,
            "interactions":
            GitHubKnowledge.get_interactions(
                pull_request.get_issue_comments()),
            "reviews":
            reviews,
            "labels":
            labels,
            "commits": [c.sha for c in pull_request.get_commits()],
            "changed_files": [f.filename for f in pull_request.get_files()],
            "first_review_at":
            get_first_review_time(reviews),
            "first_approve_at":
            get_approve_time(reviews),
        }

    def get_raw_github_data(self):
        """Override :func:`~Entity.get_raw_github_data`."""
        return self.repository.get_pulls(state="all")

    @staticmethod
    def extract_pull_request_review_requests(
            pull_request: GithubPullRequest) -> List[str]:
        """Extract features from requested reviews of the PR.

        GitHub understands review requests rather as requested reviewers than actual
        requests.

        Arguments:
            pull_request {PullRequest} -- PR of which we can extract review requests.

        Returns:
            List[str] -- list of logins of the requested reviewers

        """
        requested_users = pull_request.get_review_requests()[0]

        extracted = []
        for user in requested_users:
            extracted.append(user.login)
        return extracted

    @staticmethod
    def extract_pull_request_reviews(
            pull_request: GithubPullRequest) -> Dict[str, Dict[str, Any]]:
        """Extract required features for each review from PR.

        Arguments:
            pull_request {PullRequest} -- Pull Request from which the reviews will be extracted

        Returns:
            Dict[str, Dict[str, Any]] -- dictionary of extracted reviews. Each review is stored

        """
        reviews = pull_request.get_reviews()
        _LOGGER.debug("  -num of reviews found: %d" % reviews.totalCount)

        results = {}
        for idx, review in enumerate(reviews, 1):
            _LOGGER.info("      -analysing review no. %d/%d" %
                         (idx, reviews.totalCount))
            results[str(review.id)] = {
                "author":
                review.user.login
                if review.user and review.user.login else None,
                "words_count":
                len(review.body.split(" ")),
                "submitted_at":
                int(review.submitted_at.timestamp()),
                "state":
                review.state,
            }
        return results

    @staticmethod
    def get_referenced_issues(pull_request: GithubPullRequest) -> List[str]:
        """Scan all of the Pull Request comments and get referenced issues.

        Arguments:
            pull_request {PullRequest} -- Pull request for which the referenced
                                        issues are extracted

        Returns:
            List[str] -- IDs of referenced issues within the Pull Request.

        """
        issues_referenced = []
        for comment in pull_request.get_issue_comments():
            for id in PullRequest.search_for_references(comment.body):
                issues_referenced.append(id)

        for id in PullRequest.search_for_references(pull_request.body):
            issues_referenced.append(id)

        _LOGGER.debug("      referenced issues: %s" % issues_referenced)
        return issues_referenced

    @staticmethod
    def search_for_references(body: str) -> Generator[str, None, None]:
        """Return generator for iterating through referenced IDs in a comment."""
        if body is None:
            return

        message = body.split(" ")
        for idx, word in enumerate(message):
            if word.replace(":", "").lower() not in ISSUE_KEYWORDS:
                return

            _LOGGER.info("      ...found keyword referencing issue")
            referenced_issue_number = message[idx + 1]
            if referenced_issue_number.startswith("https"):
                # last element of url is always the issue number
                ref_issue = referenced_issue_number.split("/")[-1]
            elif referenced_issue_number.startswith("#"):
                ref_issue = referenced_issue_number.replace("#", "")
            else:
                _LOGGER.info("      ...referenced issue number absent")
                _LOGGER.debug("      keyword message: %s" % body)
                return

            if not referenced_issue_number.isnumeric():
                _LOGGER.info(
                    "      ...referenced issue number in incorrect format")
                return

            _LOGGER.info("      ...referenced issue number: %s" % ref_issue)
            yield ref_issue
Пример #6
0
import logging
from typing import Dict, Generator, List

from github.PaginatedList import PaginatedList
from github.PullRequest import PullRequest as GithubPullRequest
from voluptuous.schema_builder import Schema
from voluptuous.validators import Any

from srcopsmetrics.entities import Entity
from srcopsmetrics.entities.tools.knowledge import GitHubKnowledge

_LOGGER = logging.getLogger(__name__)

PullRequestReview = Schema({
    "author": Any(None, str),
    "words_count": int,
    "submitted_at": int,
    "state": str
})
PullRequestReviews = Schema({str: PullRequestReview})

ISSUE_KEYWORDS = {
    "close", "closes", "closed", "fix", "fixes", "fixed", "resolve",
    "resolves", "resolved"
}


def get_first_review_time(reviews: Dict[Any, Any]):
    """Return timestamp of the first PR review."""
    rev_times = [int(rev["submitted_at"]) for rev in reviews.values()]