def update_metadata(self): headers = {'Accept': 'application/vnd.github.drax-preview+json'} response = util.github_get('repos', self.owner, self.repo, etag=self.library.metadata_etag, headers=headers) if response.status_code == 200: try: metadata = json.loads(response.content) except ValueError: return self.error("could not parse metadata") repo = metadata.get('name', '').lower() owner = metadata.get('owner', {}).get('login', '').lower() if repo != '' and owner != '' and (repo != self.repo or owner != self.owner): logging.info('deleting renamed repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) task_url = util.ensure_library_task(owner, repo) util.new_task(task_url, target='manage') raise RequestAborted('repo has been renamed to %s', Library.id(owner, repo)) self.library.metadata = response.content self.library.metadata_etag = response.headers.get('ETag', None) self.library.metadata_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 404: logging.info('deleting non-existing repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) raise RequestAborted('repo no longer exists') elif response.status_code != 304: return self.retry('could not update repo metadata (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'contributors', etag=self.library.contributors_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse contributors") self.library.contributors = response.content self.library.contributors_etag = response.headers.get('ETag', None) self.library.contributors_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code != 304: return self.retry('could not update contributors (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'stats/participation ', etag=self.library.participation_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse stats/participation") self.library.participation = response.content self.library.participation_etag = response.headers.get('ETag', None) self.library.participation_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 202: # GitHub is "computing" the data. We'll try again next update cycle. # TODO: Alternatively we could retry this task pass elif response.status_code != 304: return self.retry('could not update stats/participation (%d)' % response.status_code)
def update_pages(self): bower = Content.get_by_id('bower', parent=self.version_key) if bower is None: return bower_json = bower.get_json() for _, path in bower_json.get('pages', {}).iteritems(): response = util.github_get('repos', self.owner, self.repo, 'contents/' + path, params={'ref': self.sha}) if response.status_code == 200: response_json = json.loads(response.content) markdown = None # Ensure a file was returned if isinstance(response_json, dict) and response_json.get('type') == 'file': markdown = base64.b64decode(response_json.get('content')) elif response.status_code == 404: markdown = None else: return self.retry('error fetching page %s (%d)' % (path, response.status_code)) if markdown is not None: response = util.github_markdown(markdown) if response.status_code == 200: Content(parent=self.version_key, id='page-' + path, content=response.content, status=Status.ready, etag=response.headers.get('ETag', None)).put() else: return self.retry('error converting page to markdown %s (%d)' % (path, response.status_code))
def update_element_tags(self): # Transition from when we didn't store tag_map if self.library.tag_map is None: self.library.tags_etag = None response = util.github_get('repos', self.owner, self.repo, 'tags', etag=self.library.tags_etag) if response.status_code == 304: return json.loads(self.library.tag_map) if response.status_code != 200: return self.retry('could not update git/refs/tags (%d)' % response.status_code) try: data = json.loads(response.content) except ValueError: return self.error("could not parse tags", ErrorCodes.Library_element_parse_tags) tag_map = dict((tag['name'], tag['commit']['sha']) for tag in data if versiontag.is_valid(tag['name'])) tags = tag_map.keys() tags.sort(versiontag.compare) self.library.library_dirty = True self.library.tags = tags self.library.tag_map = json.dumps(tag_map) self.library.tags_etag = response.headers.get('ETag', None) self.library.tags_updated = datetime.datetime.now() return tag_map
def update_readme(self, is_npm_package): if is_npm_package: # Load registry metadata to fetch readme path. library = Library.get_by_id(Library.id(self.owner, self.repo)) registry_metadata = json.loads(library.registry_metadata) if library.registry_metadata else None readme_path = registry_metadata.get('readmeFilename', 'README.md') response = util.unpkg_get(self.owner, self.repo, self.version, readme_path) readme = response.content else: # Load readme from GitHub endpoint. response = util.github_get('repos', self.owner, self.repo, 'readme', params={"ref": self.sha}) if response.status_code == 200: readme = base64.b64decode(json.loads(response.content)['content']) elif response.status_code == 404: readme = None else: return self.retry('error fetching readme (%d)' % response.status_code) if readme is not None: # Store the raw readme markdown. try: Content(parent=self.version_key, id='readme', content=readme, status=Status.ready, etag=response.headers.get('ETag', None)).put() except db.BadValueError: return self.error("Could not store README.md as a utf-8 string", ErrorCodes.Version_utf) # Convert markdown to HTML and store the result. response = util.github_markdown(readme) if response.status_code == 200: Content(parent=self.version_key, id='readme.html', content=response.content, status=Status.ready, etag=response.headers.get('ETag', None)).put() else: return self.retry('error converting readme to markdown (%d)' % response.status_code)
def update_element_tags(self): # Transition from when we didn't store tag_map if self.library.tag_map is None: self.library.tags_etag = None response = util.github_get('repos', self.owner, self.repo, 'tags', etag=self.library.tags_etag) if response.status_code == 304: return json.loads(self.library.tag_map) if response.status_code != 200: return self.retry('could not update git/refs/tags (%d)' % response.status_code) try: data = json.loads(response.content) except ValueError: return self.error("could not parse tags") tag_map = dict((tag['name'], tag['commit']['sha']) for tag in data if versiontag.is_valid(tag['name'])) tags = tag_map.keys() tags.sort(versiontag.compare) self.library.library_dirty = True self.library.tags = tags self.library.tag_map = json.dumps(tag_map) self.library.tags_etag = response.headers.get('ETag', None) self.library.tags_updated = datetime.datetime.now() return tag_map
def post(self): full_name = self.request.get('repo').lower() split = full_name.split('/') if len(split) != 2: self.response.set_status(400) self.response.write('Bad request, not repo') return owner = split[0] repo = split[1] access_token = exchange_token(self) if access_token is None: return # Validate access token against repo repos_response = util.github_get('repos/%s' % full_name, access_token=access_token) if repos_response.status_code != 200: self.response.set_status(401) self.response.write('Cannot access repo') return info = json.loads(repos_response.content) has_access = info['permissions']['admin'] if not has_access: self.response.set_status(401) self.response.write('Do not have access to the repo') return parsed_url = urlparse(self.request.url) params = {'name': 'web', 'events': ['pull_request']} params['config'] = { 'url': '%s://%s/api/preview-event' % (parsed_url.scheme, parsed_url.netloc), 'content_type': 'json', } # Check if the webhook exists list_webhooks_response = util.github_post('repos', owner, repo, 'hooks', access_token=access_token) if list_webhooks_response.status_code != 200: logging.error('Unable to query existing webhooks, continuing anyway. Github %s: %s', list_webhooks_response.status_code, list_webhooks_response.content) else: webhooks = json.loads(list_webhooks_response.content) for webhook in webhooks: if webhook['active'] and webhook['config'] == params['config']: self.response.write('Webhook is already configured') return # Create the webhook create_webhook_response = util.github_post('repos', owner, repo, 'hooks', params, access_token) if create_webhook_response.status_code != 201: self.response.set_status(500) self.response.write('Failed to create webhook.') logging.error('Failed to create webhook. Github %s: %s', create_webhook_response.status_code, create_webhook_response.content) return # Trigger shallow ingestion of the library so we can store the access token. util.new_task(util.ingest_webhook_task(owner, repo), params={'access_token': access_token}, target='manage') self.response.write('Created webhook')
def update_readme(self): response = util.github_get('repos', self.owner, self.repo, 'readme', params={"ref": self.sha}) if response.status_code == 200: readme = base64.b64decode(json.loads(response.content)['content']) try: Content(parent=self.version_key, id='readme', content=readme, status=Status.ready, etag=response.headers.get('ETag', None)).put() except db.BadValueError: return self.error( "Could not store README.md as a utf-8 string", ErrorCodes.Version_utf) elif response.status_code == 404: readme = None else: return self.retry('error fetching readme (%d)' % response.status_code) if readme is not None: response = util.github_markdown(readme) if response.status_code == 200: Content(parent=self.version_key, id='readme.html', content=response.content, status=Status.ready, etag=response.headers.get('ETag', None)).put() else: return self.retry('error converting readme to markdown (%d)' % response.status_code)
def update_metadata(self): response = util.github_get('users', self.author.key.id(), etag=self.author.metadata_etag) if response.status_code == 200: self.author.metadata = response.content self.author.metadata_etag = response.headers.get('ETag', None) self.author_dirty = True elif response.status_code == 404: logging.info('deleting non-existing author %s', self.author.key.id()) delete_author(self.author.key) raise RequestAborted('author no longer exists') elif response.status_code != 304: return self.retry('could not update author metadata (%d)' % response.status_code)
def post(self): if not validate_captcha(self): return url = self.request.get('url') match = re.match(r'https://github.com/(.*?)/([^/]*)(.*)', url) if match is None: self.response.set_status(400) self.response.write('Unable to understand url (%s)' % url) owner = match.group(1) repo = match.group(2) tail = match.group(3) # SHA already defined match = re.match(r'.*commits?/(.*)', tail) if match: self.response.headers['Access-Control-Allow-Origin'] = '*' self.response.headers['Content-Type'] = 'application/json' self.response.write('%s/%s/%s' % (owner, repo, match.group(1))) util.new_task(util.ingest_preview_task(owner, repo), params={ 'commit': match.group(1), 'url': url }, target='manage') return # Resolve SHA using these patterns and Github API tail = re.sub(r'/pull/(.*)', r'pull/\1/head', tail) tail = re.sub(r'/tree/(.*)', r'heads/\1', tail) tail = re.sub(r'^$', r'heads/master', tail) if not tail: self.response.set_status(400) self.response.write('Unable to understand url (%s)' % url) response = util.github_get('repos', owner, repo, 'git/refs/' + tail) if response.status_code == 404: self.response.set_status(400) self.response.write('Error resolving url (%s)' % url) sha = json.loads(response.content)['object']['sha'] util.new_task(util.ingest_preview_task(owner, repo), params={ 'commit': sha, 'url': url }, target='manage') self.response.headers['Access-Control-Allow-Origin'] = '*' self.response.headers['Content-Type'] = 'application/json' self.response.write('%s/%s/%s' % (owner, repo, sha))
def update_collection_tags(self): # Transition from when we didn't store tag_map if self.library.tag_map is None: self.library.tags_etag = None response = util.github_get('repos', self.owner, self.repo, 'git/refs/heads/master', etag=self.library.tags_etag) if response.status_code == 304: return json.loads(self.library.tag_map) if response.status_code != 200: return self.retry('could not update git/refs/heads/master (%d)' % response.status_code) try: data = json.loads(response.content) except ValueError: return self.error("could not parse git/refs/heads/master", ErrorCodes.Library_collection_parse_tags) if data.get('ref', None) != 'refs/heads/master': return self.error('could not find master branch', ErrorCodes.Library_collection_master) master_sha = data['object']['sha'] if self.library.tag_map is not None: # Even though we got a reply the master_sha might not have changed. tag_map = json.loads(self.library.tag_map) if tag_map.values()[0] == master_sha: return tag_map self.library.collection_sequence_number = self.library.collection_sequence_number + 1 version = 'v0.0.%d' % self.library.collection_sequence_number tag_map = {version: master_sha} self.library_dirty = True self.library.tags = [version] self.library.tag_map = json.dumps(tag_map) self.library.tags_etag = response.headers.get('ETag', None) self.library.tags_updated = datetime.datetime.now() return tag_map
def update_readme(self): response = util.github_get('repos', self.owner, self.repo, 'readme', params={"ref": self.sha}) if response.status_code == 200: readme = base64.b64decode(json.loads(response.content)['content']) try: Content(parent=self.version_key, id='readme', content=readme, status=Status.ready, etag=response.headers.get('ETag', None)).put() except db.BadValueError: return self.error("Could not store README.md as a utf-8 string") elif response.status_code == 404: readme = None else: return self.retry('error fetching readme (%d)' % response.status_code) if readme is not None: response = util.github_markdown(readme) if response.status_code == 200: Content(parent=self.version_key, id='readme.html', content=response.content, status=Status.ready, etag=response.headers.get('ETag', None)).put() else: return self.retry('error converting readme to markdown (%d)' % response.status_code)
def update_collection_tags(self): # Transition from when we didn't store tag_map if self.library.tag_map is None: self.library.tags_etag = None response = util.github_get('repos', self.owner, self.repo, 'git/refs/heads/master', etag=self.library.tags_etag) if response.status_code == 304: return json.loads(self.library.tag_map) if response.status_code != 200: return self.retry('could not update git/refs/heads/master (%d)' % response.status_code) try: data = json.loads(response.content) except ValueError: return self.error("could not parse git/refs/heads/master") if data.get('ref', None) != 'refs/heads/master': return self.error('could not find master branch') master_sha = data['object']['sha'] if self.library.tag_map is not None: # Even though we got a reply the master_sha might not have changed. tag_map = json.loads(self.library.tag_map) if tag_map.values()[0] == master_sha: return tag_map self.library.collection_sequence_number = self.library.collection_sequence_number + 1 version = 'v0.0.%d' % self.library.collection_sequence_number tag_map = {version: master_sha} self.library_dirty = True self.library.tags = [version] self.library.tag_map = json.dumps(tag_map) self.library.tags_etag = response.headers.get('ETag', None) self.library.tags_updated = datetime.datetime.now() return tag_map
def update_metadata(self): # Query NPM registry API for packages is_npm_package = self.scope.startswith('@') if is_npm_package: self.update_registry_info() else: self.owner = self.scope self.repo = self.package # Fetch GitHub metadata headers = {'Accept': 'application/vnd.github.drax-preview+json'} response = util.github_get('repos', self.owner, self.repo, etag=self.library.metadata_etag, headers=headers) if response.status_code == 200: try: metadata = json.loads(response.content) except ValueError: return self.error("could not parse metadata", ErrorCodes.Library_parse_metadata) self.owner = metadata.get('owner', {}).get('login', '').lower() self.repo = metadata.get('name', '').lower() # Deleting is only necessary if Library entity is a GitHub repo if (not is_npm_package) and self.repo != '' and self.owner != '' and (self.repo != self.package or self.owner != self.scope): logging.info('deleting renamed repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) task_url = util.ensure_library_task(self.owner, self.repo) util.new_task(task_url, target='manage') raise RequestAborted('repo has been renamed to %s', Library.id(self.owner, self.repo)) # If adding a NPM package that a Bower repo already points to, remove the bower one. bower_library_id = Library.id(self.owner, self.repo) if is_npm_package and bower_library_id is not None: logging.info('removing bower repo %s', Library.id(self.owner, self.repo)) task_url = util.suppress_library_task(self.owner, self.repo) util.new_task(task_url, target='manage') self.library.github_owner = self.owner self.library.github_repo = self.repo self.library.metadata = response.content self.library.metadata_etag = response.headers.get('ETag', None) self.library.metadata_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 404: logging.info('deleting non-existing repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) raise RequestAborted('repo no longer exists') elif response.status_code != 304: return self.retry('could not update repo metadata (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'contributors', etag=self.library.contributors_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse contributors", ErrorCodes.Library_parse_contributors) self.library.contributors = response.content self.library.contributors_etag = response.headers.get('ETag', None) self.library.contributors_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code != 304: return self.retry('could not update contributors (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'stats/participation ', etag=self.library.participation_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse stats/participation", ErrorCodes.Library_parse_stats) self.library.participation = response.content self.library.participation_etag = response.headers.get('ETag', None) self.library.participation_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 202: # GitHub is "computing" the data. We'll try again next update cycle. # TODO: Alternatively we could retry this task pass elif response.status_code != 304: return self.retry('could not update stats/participation (%d)' % response.status_code)
def update_metadata(self): headers = {'Accept': 'application/vnd.github.drax-preview+json'} response = util.github_get('repos', self.owner, self.repo, etag=self.library.metadata_etag, headers=headers) if response.status_code == 200: try: metadata = json.loads(response.content) except ValueError: return self.error("could not parse metadata", ErrorCodes.Library_parse_metadata) repo = metadata.get('name', '').lower() owner = metadata.get('owner', {}).get('login', '').lower() if repo != '' and owner != '' and (repo != self.repo or owner != self.owner): logging.info('deleting renamed repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) task_url = util.ensure_library_task(owner, repo) util.new_task(task_url, target='manage') raise RequestAborted('repo has been renamed to %s', Library.id(owner, repo)) self.library.metadata = response.content self.library.metadata_etag = response.headers.get('ETag', None) self.library.metadata_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 404: logging.info('deleting non-existing repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) raise RequestAborted('repo no longer exists') elif response.status_code != 304: return self.retry('could not update repo metadata (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'contributors', etag=self.library.contributors_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse contributors", ErrorCodes.Library_parse_contributors) self.library.contributors = response.content self.library.contributors_etag = response.headers.get('ETag', None) self.library.contributors_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code != 304: return self.retry('could not update contributors (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'stats/participation ', etag=self.library.participation_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse stats/participation", ErrorCodes.Library_parse_stats) self.library.participation = response.content self.library.participation_etag = response.headers.get( 'ETag', None) self.library.participation_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 202: # GitHub is "computing" the data. We'll try again next update cycle. # TODO: Alternatively we could retry this task pass elif response.status_code != 304: return self.retry('could not update stats/participation (%d)' % response.status_code)
def update_metadata(self): # Query NPM registry API for packages is_npm_package = self.scope.startswith('@') if is_npm_package: self.update_registry_info() else: self.owner = self.scope self.repo = self.package # Fetch GitHub metadata headers = {'Accept': 'application/vnd.github.drax-preview+json'} response = util.github_get('repos', self.owner, self.repo, etag=self.library.metadata_etag, headers=headers) if response.status_code == 200: try: metadata = json.loads(response.content) except ValueError: return self.error("could not parse metadata", ErrorCodes.Library_parse_metadata) self.owner = metadata.get('owner', {}).get('login', '').lower() self.repo = metadata.get('name', '').lower() # Deleting is only necessary if Library entity is a GitHub repo if (not is_npm_package) and self.repo != '' and self.owner != '' and (self.repo != self.package or self.owner != self.scope): logging.info('deleting renamed repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) task_url = util.ensure_library_task(self.owner, self.repo) util.new_task(task_url, target='manage') raise RequestAborted('repo has been renamed to %s', Library.id(self.owner, self.repo)) # If adding a NPM package that a Bower repo already points to, remove the bower one. bower_library_id = Library.id(self.owner, self.repo) if is_npm_package and bower_library_id is not None: task_url = util.migrate_library_task(self.owner, self.repo, self.scope, self.package) util.new_task(task_url, target='manage') self.library.github_owner = self.owner self.library.github_repo = self.repo self.library.metadata = response.content self.library.metadata_etag = response.headers.get('ETag', None) self.library.metadata_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 404: logging.info('deleting non-existing repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) raise RequestAborted('repo no longer exists') elif response.status_code != 304: return self.retry('could not update repo metadata (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'contributors', etag=self.library.contributors_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse contributors", ErrorCodes.Library_parse_contributors) self.library.contributors = response.content self.library.contributors_etag = response.headers.get('ETag', None) self.library.contributors_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code != 304: return self.retry('could not update contributors (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'stats/participation ', etag=self.library.participation_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse stats/participation", ErrorCodes.Library_parse_stats) self.library.participation = response.content self.library.participation_etag = response.headers.get('ETag', None) self.library.participation_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 202: # GitHub is "computing" the data. We'll try again next update cycle. # TODO: Alternatively we could retry this task pass elif response.status_code != 304: return self.retry('could not update stats/participation (%d)' % response.status_code)