def update_license_and_kind(self): metadata = json.loads(self.library.metadata) kind = 'element' bower_json = None default_branch = metadata.get('default_branch', 'master') if not self.scope.startswith('@'): response = urlfetch.fetch(util.content_url(self.owner, self.repo, default_branch, 'bower.json'), validate_certificate=True) if response.status_code == 200: try: bower_json = json.loads(response.content) except ValueError: return self.error("Could not parse master/bower.json", ErrorCodes.Library_parse_bower) elif response.status_code == 404: bower_json = None else: return self.retry('error fetching master/bower.json (%d)' % response.status_code) if bower_json is not None and 'element-collection' in bower_json.get('keywords', []): kind = 'collection' if self.library.kind != kind: self.library.kind = kind self.library_dirty = True spdx_identifier = None github_license = metadata.get('license') # GitHub may now return as a license object instead. if isinstance(github_license, dict): github_license = github_license.get('spdx_id', 'MISSING') if github_license is not None: spdx_identifier = licenses.validate_spdx(github_license) if spdx_identifier is None and bower_json is not None: license_name = bower_json.get('license') if license_name is not None: spdx_identifier = licenses.validate_spdx(license_name) if spdx_identifier is None and self.scope.startswith('@'): registry_metadata = json.loads(self.library.registry_metadata) spdx_identifier = licenses.validate_spdx(registry_metadata.get('license', '')) if self.library.spdx_identifier != spdx_identifier: self.library.spdx_identifier = spdx_identifier self.library_dirty = True if self.library.spdx_identifier is None: if self.scope.startswith('@'): return self.error('Could not detect an OSI approved license on GitHub or in package info', ErrorCodes.Library_license) return self.error('Could not detect an OSI approved license on GitHub or in %s/bower.json' % default_branch, ErrorCodes.Library_license)
def update_bower(self): response = urlfetch.fetch(util.content_url(self.owner, self.repo, self.sha, 'bower.json'), validate_certificate=True) if response.status_code == 200: try: bower_json = json.loads(response.content) except ValueError: return self.error("could not parse bower.json") Content(parent=self.version_key, id='bower', content=response.content, status=Status.ready, etag=response.headers.get('ETag', None)).put() return bower_json elif response.status_code == 404: return self.error("missing bower.json") else: return self.retry('could not access bower.json (%d)' % response.status_code)
def update_bower(self): response = urlfetch.fetch(util.content_url(self.owner, self.repo, self.sha, 'bower.json'), validate_certificate=True) if response.status_code == 200: try: bower_json = json.loads(response.content) except ValueError: return self.error("could not parse bower.json", ErrorCodes.Version_parse_bower) Content(parent=self.version_key, id='bower', json=bower_json, status=Status.ready, etag=response.headers.get('ETag', None)).put() return bower_json elif response.status_code == 404: return self.error("missing bower.json", ErrorCodes.Version_missing_bower) else: return self.retry('could not access bower.json (%d)' % response.status_code)
def update_license_and_kind(self): metadata = json.loads(self.library.metadata) default_branch = metadata.get('default_branch', 'master') response = urlfetch.fetch(util.content_url(self.owner, self.repo, default_branch, 'bower.json'), validate_certificate=True) bower_json = None if response.status_code == 200: try: bower_json = json.loads(response.content) except ValueError: return self.error("Could not parse master/bower.json") elif response.status_code == 404: bower_json = None else: return self.retry('error fetching master/bower.json' % response.status_code) kind = 'element' if bower_json is not None and 'element-collection' in bower_json.get( 'keywords', []): kind = 'collection' if self.library.kind != kind: self.library.kind = kind self.library_dirty = True spdx_identifier = None github_license = metadata.get('license') if github_license is not None: spdx_identifier = licenses.validate_spdx( github_license.get('key', 'MISSING')) if spdx_identifier is None and bower_json is not None: license_name = bower_json.get('license') if license_name is not None: spdx_identifier = licenses.validate_spdx(license_name) if self.library.spdx_identifier != spdx_identifier: self.library.spdx_identifier = spdx_identifier self.library_dirty = True if self.library.spdx_identifier is None: return self.error( 'Could not detect an OSI approved license on GitHub or in %s/bower.json' % default_branch)
def update_license_and_kind(self): metadata = json.loads(self.library.metadata) default_branch = metadata.get('default_branch', 'master') response = urlfetch.fetch(util.content_url(self.owner, self.repo, default_branch, 'bower.json'), validate_certificate=True) bower_json = None if response.status_code == 200: try: bower_json = json.loads(response.content) except ValueError: return self.error("Could not parse master/bower.json") elif response.status_code == 404: bower_json = None else: return self.retry('error fetching master/bower.json' % response.status_code) kind = 'element' if bower_json is not None and 'element-collection' in bower_json.get('keywords', []): kind = 'collection' if self.library.kind != kind: self.library.kind = kind self.library_dirty = True spdx_identifier = None github_license = metadata.get('license') if github_license is not None: spdx_identifier = licenses.validate_spdx(github_license.get('key', 'MISSING')) if spdx_identifier is None and bower_json is not None: license_name = bower_json.get('license') if license_name is not None: spdx_identifier = licenses.validate_spdx(license_name) if self.library.spdx_identifier != spdx_identifier: self.library.spdx_identifier = spdx_identifier self.library_dirty = True if self.library.spdx_identifier is None: return self.error('Could not detect an OSI approved license on GitHub or in %s/bower.json' % default_branch)
def get(self, owner, repo, version): logging.info('ingesting version %s/%s/%s', owner, repo, version) github = quota.GitHub() if not github.reserve(1): self.response.set_status(500) return key = ndb.Key(Library, '%s/%s' % (owner, repo), Version, version) response = urlfetch.fetch(util.content_url(owner, repo, version, 'README.md')) readme = response.content try: content = Content(parent=key, id='readme', content=readme) content.etag = response.headers.get('ETag', None) content.put() except db.BadValueError: ver = key.get() ver.error = "Could not store README.md as a utf-8 string" ver.put() self.response.set_status(200) return response = github.markdown(readme) content = Content(parent=key, id='readme.html', content=response.content) content.put() response = urlfetch.fetch(util.content_url(owner, repo, version, 'bower.json')) try: json.loads(response.content) except ValueError: ver = key.get() ver.error = "This version has a missing or broken bower.json" ver.put() self.response.set_status(200) return content = Content(parent=key, id='bower', content=response.content) content.etag = response.headers.get('ETag', None) content.put() versions = Library.versions_for_key(key.parent()) if versions[-1] == version: library = key.parent().get() if library.kind == "collection": task_url = util.ingest_dependencies_task(owner, repo, version) util.new_task(task_url) bower = json.loads(response.content) metadata = json.loads(library.metadata) logging.info('adding search index for %s', version) description = bower.get("description", metadata.get("description", "")) document = search.Document(doc_id='%s/%s' % (owner, repo), fields=[ search.AtomField(name='full_name', value=metadata['full_name']), search.TextField(name='owner', value=owner), search.TextField(name='repo', value=repo), search.TextField(name='version', value=version), search.TextField(name='repoparts', value=' '.join(repo.split('-'))), search.TextField(name='description', value=description), search.TextField(name='keywords', value=' '.join(bower.get('keywords', []))), search.NumberField(name='stars', value=metadata.get('stargazers_count')), search.NumberField(name='subscribers', value=metadata.get('subscribers_count')), search.NumberField(name='forks', value=metadata.get('forks')), search.NumberField(name='contributors', value=library.contributor_count), search.DateField(name='updated_at', value=datetime.datetime.strptime(metadata.get('updated_at'), TIME_FORMAT)) ]) index = search.Index('repo') index.put(document) self.response.set_status(200)