def get_data(cls, account, source_filter, limit=100, skip=0): """ Gathers commit information from GH GET https://api.github.com/repos/:owner/:repo/commits Header: Accept: application/vnd.github.v3+json """ if not account or not account.enabled: raise ValueError( 'cannot gather information without a valid account') client = AsyncHTTPClient() source_filter = GitHubIssuesFilter(source_filter) if source_filter.repository is None: raise ValueError('required parameter projects missing') app_log.info("Starting retrieval of issues for {}".format(account._id)) default_headers = { "Content-Type": "application/json", "Accept": "application/vnd.github.v3+json" } page_size = limit if limit is not None and limit <= 100 else 100 taken = 0 uri = "https://api.github.com/repos/{}/issues?per_page={}&{}".format( source_filter.repository, page_size, source_filter.get_qs()) uri = uri.rstrip( '&') # remove trailing & in case filter has no QS elements cls.write('[') count = 0 while uri is not None: app_log.info( "({}) Retrieving next page, received {} issues thus far". format(account._id, taken)) req = account.get_request(uri, headers=default_headers) response = yield client.fetch(req) page_data = json.loads(response.body.decode('utf-8')) for issue in page_data: if count > 0: cls.write(',') cls.write(cls.format_data_to_schema(issue)) count += 1 if limit is None or count < limit: # parse the Link header from GitHub (https://developer.github.com/v3/#pagination) links = parse_link_header(response.headers.get('Link', '')) uri = links.get('next', None) else: break cls.write(']') app_log.info( "[GitHub] Finished retrieving {} issues for repository {}".format( count, source_filter.repository))
def get_data(cls, account, source_filter, limit=100, skip=0): """ Gathers commit information from GH GET https://api.github.com/repos/:owner/:repo/commits Header: Accept: application/vnd.github.v3+json """ if not account or not account.enabled: raise ValueError('cannot gather information without a valid account') client = AsyncHTTPClient() source_filter = GitHubIssuesFilter(source_filter) if source_filter.repository is None: raise ValueError('required parameter projects missing') app_log.info("Starting retrieval of issues for {}".format(account._id)) default_headers = {"Content-Type": "application/json", "Accept": "application/vnd.github.v3+json"} page_size = limit if limit is not None and limit <= 100 else 100 taken = 0 uri = "https://api.github.com/repos/{}/issues?per_page={}&{}".format(source_filter.repository, page_size, source_filter.get_qs()) uri = uri.rstrip('&') # remove trailing & in case filter has no QS elements cls.write('[') count = 0 while uri is not None: app_log.info( "({}) Retrieving next page, received {} issues thus far".format(account._id, taken)) req = account.get_request(uri, headers=default_headers) response = yield client.fetch(req) page_data = json.loads(response.body.decode('utf-8')) for issue in page_data: if count > 0: cls.write(',') cls.write(cls.format_data_to_schema(issue)) count += 1 if limit is None or count < limit: # parse the Link header from GitHub (https://developer.github.com/v3/#pagination) links = parse_link_header(response.headers.get('Link', '')) uri = links.get('next', None) else: break cls.write(']') app_log.info("[GitHub] Finished retrieving {} issues for repository {}".format(count, source_filter.repository))
def get_milestone_options(account, repository, **kwargs): """ Gathers milestone options for a GitHub repository """ client = AsyncHTTPClient() uri = "https://api.github.com/repos/{}/milestones?page_size=100".format(repository) data = [] while uri is not None: req = account.get_request(uri) response = yield client.fetch(req) response_object = json.loads(response.body.decode('utf-8')) data += response_object links = parse_link_header(response.headers.get('Link', '')) uri = links.get('next', None) return [{"title": "#{} - {}".format(milestone['number'], milestone['title']), "value": milestone['number']} for milestone in data]
def get_repo_options(account, **kwargs): """ Gathers repository options for GitHub commits source GET https://api.github.com/user/repos Special Accept header required: application/vnd.github.moondragon+json """ client = AsyncHTTPClient() uri = "https://api.github.com/user/repos?per_page=100" data = [] while uri is not None: req = account.get_request(uri, headers={"Accept": "application/vnd.github.moondragon+json"}) response = yield client.fetch(req) response_object = json.loads(response.body.decode('utf-8')) data += response_object links = parse_link_header(response.headers.get('Link', '')) uri = links.get('next', None) return [{"title": repo['full_name'], "value": repo['full_name']} for repo in data]
def get_milestone_options(account, repository, **kwargs): """ Gathers milestone options for a GitHub repository """ client = AsyncHTTPClient() uri = "https://api.github.com/repos/{}/milestones?page_size=100".format( repository) data = [] while uri is not None: req = account.get_request(uri) response = yield client.fetch(req) response_object = json.loads(response.body.decode('utf-8')) data += response_object links = parse_link_header(response.headers.get('Link', '')) uri = links.get('next', None) return [{ "title": "#{} - {}".format(milestone['number'], milestone['title']), "value": milestone['number'] } for milestone in data]
def get_repo_options(account, **kwargs): """ Gathers repository options for GitHub commits source GET https://api.github.com/user/repos Special Accept header required: application/vnd.github.moondragon+json """ client = AsyncHTTPClient() uri = "https://api.github.com/user/repos?per_page=100" data = [] while uri is not None: req = account.get_request( uri, headers={"Accept": "application/vnd.github.moondragon+json"}) response = yield client.fetch(req) response_object = json.loads(response.body.decode('utf-8')) data += response_object links = parse_link_header(response.headers.get('Link', '')) uri = links.get('next', None) return [{ "title": repo['full_name'], "value": repo['full_name'] } for repo in data]
def get_data(cls, account, source_filter, limit=100, skip=0): """ Gathers commit information from GH GET https://api.github.com/repos/:owner/:repo/commits Header: Accept: application/vnd.github.v3+json """ if not account or not account.enabled: raise ValueError('cannot gather information without a valid account') client = AsyncHTTPClient() source_filter = GitHubRepositoryDateFilter(source_filter) if source_filter.repository is None: raise ValueError('required parameter projects missing') default_headers = {"Content-Type": "application/json", "Accept": "application/vnd.github.v3+json"} # first we grab our list of commits uri = "https://api.github.com/repos/{}/commits".format(source_filter.repository) qs = source_filter.get_qs() if qs != '': uri = uri + '?' + qs app_log.info("Starting retrieval of commit list for account {}".format(account._id)) if limit is not None and limit <= 100: # we can handle our limit right here uri += "?per_page={}".format(limit) elif limit is None: uri += "?per_page=100" # maximum number per page for GitHub API taken = 0 queue = Queue() sem = BoundedSemaphore(FETCH_CONCURRENCY) done, working = set(), set() while uri is not None: app_log.info( "({}) Retrieving next page, received {} commits thus far".format(account._id, taken)) req = account.get_request(uri, headers=default_headers) response = yield client.fetch(req) page_data = json.loads(response.body.decode('utf-8')) taken += page_data.__len__() for item in page_data: queue.put(item.get('url', None)) if limit is None or taken < limit: # parse the Link header from GitHub (https://developer.github.com/v3/#pagination) links = parse_link_header(response.headers.get('Link', '')) uri = links.get('next', None) else: break if queue.qsize() > 500: raise HTTPError(413, 'too many commits') app_log.info("({}) Commit list retrieved, fetching info for {} commits".format(account._id, taken)) # open our list cls.write('[') # our worker to actually fetch the info @gen.coroutine def fetch_url(): current_url = yield queue.get() try: if current_url in working: return page_no = working.__len__() app_log.info("Fetching page {}".format(page_no)) working.add(current_url) req = account.get_request(current_url) client = AsyncHTTPClient() response = yield client.fetch(req) response_data = json.loads(response.body.decode('utf-8')) obj = { 'date': response_data['commit']['author']['date'], 'author': response_data['commit']['author']['name'], 'added_files': [file for file in response_data['files'] if file['status'] == 'added'].__len__(), 'deleted_files': [file for file in response_data['files'] if file['status'] == 'deleted'].__len__(), 'modified_files': [file for file in response_data['files'] if file['status'] == 'modified'].__len__(), 'additions': response_data['stats']['additions'], 'deletions': response_data['stats']['deletions'] } if done.__len__() > 0: cls.write(',') cls.write(json.dumps(obj)) done.add(current_url) app_log.info("Page {} downloaded".format(page_no)) finally: queue.task_done() sem.release() @gen.coroutine def worker(): while True: yield sem.acquire() fetch_url() # start our concurrency worker worker() try: # wait until we're done yield queue.join(timeout=timedelta(seconds=MAXIMUM_REQ_TIME)) except gen.TimeoutError: app_log.warning("Request exceeds maximum time, cutting response short") finally: # close our list cls.write(']') app_log.info("Finished retrieving commits for {}".format(account._id))