def collect_batch(self, save: bool = True): """ Collect several groups of 30 elements returned by the API until the pages return an empty JSON :param save: if it should persist the json downloaded on the hard drive :type save: bool :return: list of elements returned by the API :rtype: list """ request_url = self.api_url + self.owner + '/' + self.repo + self.private_url + '?page=' path = self.path + self.repo + self.private_path + 'all/' page = 1 data_list = [] json = JSONHandler(path) while True: if json.file_exists(path + str(page) + '.json'): page = page + 1 continue data = self.apiHandler.request(request_url + str(page)) if not data: break data_list.append(data) if save: json.save_json(data, str(page)) page = page + 1 return data_list
def collect_single(self, parameter: str, save: bool = True): """ Collect a single element of the API :param parameter: parameter that will be used by the function to know which element it should download :type parameter: str :param save: if it should persist the json downloaded on the hard drive :type save: bool :return: json downloaded :rtype: dict """ path = self.path + self.repo + self.private_path + 'individual/' json = JSONHandler(path) if json.file_exists(path + str(parameter) + '.json'): return JSONHandler(path).open_json(str(parameter) + '.json') request_url = self.api_url + self.owner + '/' + self.repo + self.private_url + '/' + str( parameter) data = self.apiHandler.request(request_url) if not data: print( 'JSON returned empty. Please check your parameters for URL: ' + request_url) data = [] if save: json.save_json(data, str(parameter)) return data
def collect_commits_on_pulls(self, owner: str, project: str): """ Collect Commits from Pull Requests from the GitHub API :param owner: repository owner :type owner: str :param project: project name :type project: str :return: list of commits from pull requests :rtype: list """ print('Collecting Pull Requests Commits') pulls = [] mypath = self.config['output_path'] + project + '/pulls/all/' json = JSONHandler(mypath) commits_json = JSONHandler(self.config['output_path'] + project + '/pulls_commits/commits/') onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))] for file in onlyfiles: batch = json.open_json(file) for pull in batch: pulls.append(pull['number']) hashs = [] for pull in pulls: if JSONHandler.file_exists( self.config['output_path'] + project + '/pulls_commits/commits/' + str(pull) + '.json'): commits_pull = commits_json.open_json( str(pull) + '.json') for commit_pull in commits_pull: for commit in commit_pull: hashs.append(commit['sha']) continue pullsEndpoint = PrototypeAPI(owner, project, '/pulls_commits/', '/pulls/' + str(pull) + '/commits') files = pullsEndpoint.collect_batch(False) commits_json.save_json(files, str(pull)) commitsEndpoint = PrototypeAPI(owner, project, '/pulls_commits/', '/commits') aux = 1 for hash in hashs: if not hash: continue commitsEndpoint.collect_single(hash) print(str(aux * 100 / len(hashs)) + "%") aux = aux + 1 return hashs