Exemplo n.º 1
0
    def fetching_data(cls, parameters_dict):
        """for extracting data"""
        pulls = Pulls()
        repository = Repository()
        labels = Label()
        last_page = parameters_dict['last_page']
        contributor_url = parameters_dict['contributor_url']
        if cls.repo == "new":
            repos_url = parameters_dict['repos_url']
            repos_response = requests.get(repos_url,
                                          auth=(Fetch.user_id, Fetch.password))
            reset = int(repos_response.headers['X-RateLimit-Reset'])
            reset_datetime = datetime.datetime.fromtimestamp(reset)
            reset_datetime = reset_datetime + datetime.timedelta(0, 70)
            repos_response = repos_response.json()
            if not str(repos_response).__contains__(Fetch.msg):
                pushed_at = repos_response['pushed_at']
                watchers_count = repos_response['watchers_count']
                forks_count = repos_response['forks_count']
                open_issues_count = repos_response['open_issues_count']
                last_page = parameters_dict['last_page']
                cls.dict_total_contribution = Repository.total_contribution(
                    last_page, contributor_url)
                cls.dict_open_pull_request_count = Repository.open_pr_count(
                    repos_url, last_page)
                cls.dict_forks_count = repository.get_forks_count(forks_count)
                cls.dict_push_time = repository.pushed_time(pushed_at)
                cls.dict_watchers_count = repository.watchers_count(
                    watchers_count)
                cls.dict_issue_count = repository.get_open_issue_count(
                    open_issues_count)
                cls.dict_acceptance_rate = repository.get_repo_probability(
                    last_page, repos_url)
                cls.repository_name = repos_response['full_name']
            else:
                print('in sleep till :', reset_datetime)
                print(cls.repository_name)
                pause.until(reset_datetime)
        pulls_url = parameters_dict['pulls_url']
        pulls_response = requests.get(pulls_url,
                                      auth=(Fetch.user_id, Fetch.password))
        reset = int(pulls_response.headers['X-RateLimit-Reset'])
        reset_datetime = datetime.datetime.fromtimestamp(reset)
        reset_datetime = reset_datetime + datetime.timedelta(0, 70)
        pulls_response = pulls_response.json()
        feature_dict = {}
        if not str(pulls_response).__contains__(Fetch.msg):
            created_at = pulls_response['created_at']
            closed_at = pulls_response['closed_at']
            state = pulls_response['state']
            commit = pulls_response['commits']
            changed_file = pulls_response['changed_files']
            merged_at = pulls_response['merged_at']
            user = {}
            user = pulls_response["user"]["login"]
            dict_created_time = pulls.created_time(created_at, state)
            dict_closed_pull_request_time = pulls.closed_pull_request_time(
                created_at, closed_at)
            dict_get_commits = pulls.get_commits(commit)
            dict_changed_files = pulls.get_changed_files(changed_file)
            dict_labels = labels.get_label(state, merged_at)
            cls.dict_contributor_rate = pulls.contributor_probability_rate(
                user, cls.dict_total_contribution)
            cls.dict_pull_request_size = pulls.pull_request_size(
                pulls_response)
            files_url = parameters_dict['files_url']
            dict_changes_in_file = pulls.changed_lines_in_file(files_url)

            feature_dict = {}
            feature_dict['repository_name'] = cls.repository_name
            feature_dict['pull_numbers'] = parameters_dict['number']
            if dict_created_time['open_pr_time']:
                feature_dict.update(dict_created_time)
            else:
                feature_dict.update(dict_closed_pull_request_time)
            feature_dict.update(cls.dict_open_pull_request_count)
            feature_dict.update(cls.dict_forks_count)
            feature_dict.update(dict_get_commits)
            feature_dict.update(dict_changed_files)
            feature_dict.update(cls.dict_push_time)
            feature_dict.update(cls.dict_watchers_count)
            feature_dict.update(cls.dict_issue_count)
            feature_dict.update(cls.dict_acceptance_rate)
            feature_dict.update(cls.dict_contributor_rate)
            feature_dict.update(cls.dict_pull_request_size)
            feature_dict.update(dict_changes_in_file)
            feature_dict.update(dict_labels)
            logging.debug("Extracted features :{} ".format(feature_dict))
        else:
            print('in sleep till :', reset_datetime)
            print(cls.repository_name)
            pause.until(reset_datetime)
        return feature_dict
Exemplo n.º 2
0
'''Entry point for Repository'''
from utils.utils import Utils
from repository.repository import Repository
if __name__ == '__main__':
    CONFIG = Utils().get_config_file('config.ini')
    REPOSITORY_URL = CONFIG.get('repository_client_parameters', 'repo_url')
    PUSHED_AT = CONFIG.get('repository_client_parameters', 'pushed_at')
    LAST_PAGE = CONFIG.get('repository_client_parameters', 'last_page')
    WATCHERS_COUNT = CONFIG.get('repository_client_parameters',
                                'watchers_count')
    FORKS_COUNT = CONFIG.get('repository_client_parameters', 'forks_count')
    OPEN_ISSUE = CONFIG.get('repository_client_parameters', 'open_issue_count')
    CONTRIBUTOR_URL = CONFIG.get('repository_client_parameters',
                                 'contributor_url')
    Utils().user_path()
    REPOSITORY = Repository()
    LAST_PAGE = Utils().pagination('sjain3097', 'new')
    PUSHED_TIME = REPOSITORY.pushed_time(PUSHED_AT)
    OPEN_PR_COUNT = REPOSITORY.open_pr_count(REPOSITORY_URL, LAST_PAGE)
    WATCHERS_COUNT = REPOSITORY.watchers_count(WATCHERS_COUNT)
    FORKS_COUNT = REPOSITORY.get_forks_count(FORKS_COUNT)
    ISSUE_COUNT = REPOSITORY.get_open_issue_count(OPEN_ISSUE)
    TOTAL_CONTRIBUTOR_URL = REPOSITORY.total_contribution(
        LAST_PAGE, CONTRIBUTOR_URL)
    ACCEPTANCE_RATE = REPOSITORY.get_repo_probability(LAST_PAGE,
                                                      REPOSITORY_URL)