def handle_get(self): keys = (Library.query() .filter(Library.kind == 'element') # pylint: disable=singleton-comparison .filter(Library.shallow_ingestion == False) .filter(Library.status == Status.ready) .fetch(keys_only=True, read_policy=ndb.EVENTUAL_CONSISTENCY)) elements = Sitemap(id='elements') elements.pages = [key.id() for key in keys] elements.put() logging.info('%d elements', len(elements.pages)) keys = (Library.query() .filter(Library.kind == 'collection') # pylint: disable=singleton-comparison .filter(Library.shallow_ingestion == False) .filter(Library.status == Status.ready) .fetch(keys_only=True, read_policy=ndb.EVENTUAL_CONSISTENCY)) collections = Sitemap(id='collections') collections.pages = [key.id() for key in keys] collections.put() logging.info('%d collections', len(collections.pages)) keys = Author.query().fetch(keys_only=True, read_policy=ndb.EVENTUAL_CONSISTENCY) authors = Sitemap(id='authors') authors.pages = [key.id() for key in keys] authors.put() logging.info('%d authors', len(authors.pages))
def handle_post(self): message_json = json.loads( urllib.unquote(self.request.body).rstrip('=')) message = message_json['message'] data = base64.b64decode(str(message['data'])) attributes = message['attributes'] owner = attributes['owner'] repo = attributes['repo'] version = attributes['version'] error = attributes.get('error', None) version_key = ndb.Key(Library, Library.id(owner, repo), Version, version) content = Content.get_by_id('analysis', parent=version_key) if content is None: return if data == '': content.set_json(None) else: content.set_json(json.loads(data)) if error is None: content.status = Status.ready content.error = None else: content.status = Status.error content.error = error content.put() if version_key.id() == Library.default_version_for_key_async( version_key.parent()).get_result(): task_url = util.update_indexes_task(owner, repo) util.new_task(task_url, target='manage')
def test_ingest_version(self): library_key = Library(id='org/repo', metadata='{"full_name": "NSS Bob", "stargazers_count": 420, "subscribers_count": 419, "forks": 418, "updated_at": "2011-8-10T13:47:12Z"}').put() Version(id='v1.0.0', parent=library_key, sha='sha').put() self.respond_to_github(r'https://api.github.com/repos/org/repo/readme\?ref=sha', '{"content":"%s"}' % b64encode('README')) self.respond_to('https://raw.githubusercontent.com/org/repo/sha/bower.json', '{}') self.respond_to_github('https://api.github.com/markdown', '<html>README</html>') response = self.app.get(util.ingest_version_task('org', 'repo', 'v1.0.0'), headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) version = Version.get_by_id('v1.0.0', parent=library_key) self.assertIsNone(version.error) self.assertEqual(version.status, Status.ready) self.assertFalse(version.preview) versions = Library.versions_for_key_async(library_key).get_result() self.assertEqual(['v1.0.0'], versions) readme = ndb.Key(Library, 'org/repo', Version, 'v1.0.0', Content, 'readme').get() self.assertEqual(readme.content, 'README') readme_html = ndb.Key(Library, 'org/repo', Version, 'v1.0.0', Content, 'readme.html').get() self.assertEqual(readme_html.content, '<html>README</html>') bower = ndb.Key(Library, 'org/repo', Version, 'v1.0.0', Content, 'bower').get() self.assertEqual(bower.get_json(), {})
def update_readme(self, is_npm_package): if is_npm_package: # Load registry metadata to fetch readme path. library = Library.get_by_id(Library.id(self.owner, self.repo)) registry_metadata = json.loads(library.registry_metadata) if library.registry_metadata else None readme_path = registry_metadata.get('readmeFilename', 'README.md') response = util.unpkg_get(self.owner, self.repo, self.version, readme_path) readme = response.content else: # Load readme from GitHub endpoint. response = util.github_get('repos', self.owner, self.repo, 'readme', params={"ref": self.sha}) if response.status_code == 200: readme = base64.b64decode(json.loads(response.content)['content']) elif response.status_code == 404: readme = None else: return self.retry('error fetching readme (%d)' % response.status_code) if readme is not None: # Store the raw readme markdown. try: Content(parent=self.version_key, id='readme', content=readme, status=Status.ready, etag=response.headers.get('ETag', None)).put() except db.BadValueError: return self.error("Could not store README.md as a utf-8 string", ErrorCodes.Version_utf) # Convert markdown to HTML and store the result. response = util.github_markdown(readme) if response.status_code == 200: Content(parent=self.version_key, id='readme.html', content=response.content, status=Status.ready, etag=response.headers.get('ETag', None)).put() else: return self.retry('error converting readme to markdown (%d)' % response.status_code)
def get(self, owner, repo, ver=None): self.response.headers['Access-Control-Allow-Origin'] = '*' owner = owner.lower() repo = repo.lower() library_key = ndb.Key(Library, Library.id(owner, repo)) if ver is None: ver = yield Library.latest_version_for_key_async(library_key) if ver is None: self.response.set_status(404) return version_key = ndb.Key(Library, Library.id(owner, repo), Version, ver) analysis = Content.get_by_id('analysis', parent=version_key, read_policy=ndb.EVENTUAL_CONSISTENCY) if analysis is None: self.response.set_status(404) return self.response.headers['Content-Type'] = 'application/json' result = {} result['status'] = analysis.status if analysis.status == Status.ready: result['content'] = json.loads(analysis.content) if analysis.status == Status.error: result['error'] = analysis.error if result['status'] != Status.ready: self.response.set_status(400) self.response.headers['Content-Type'] = 'application/json' self.response.write(json.dumps(result))
def test_version_cache(self): library_key = ndb.Key(Library, 'a/b') Version(id='v2.0.0', sha='x', status=Status.ready, parent=library_key).put() Version(id='v1.0.0', sha='x', status=Status.ready, parent=library_key).put() Version(id='v3.0.0', sha='x', status=Status.ready, parent=library_key).put() Version(id='v3.0.X', sha='x', status=Status.ready, parent=library_key).put() Version(id='v4.0.0', sha='x', status=Status.error, parent=library_key).put() Version(id='v5.0.0', sha='x', status=Status.pending, parent=library_key).put() Version(id='xxx', sha='x', status=Status.ready, parent=library_key).put() versions = yield Library.versions_for_key_async(library_key) self.assertEqual(versions, []) latest_changed = VersionCache.update(library_key) self.assertTrue(latest_changed) versions = yield Library.versions_for_key_async(library_key) self.assertEqual(versions, ['v1.0.0', 'v2.0.0', 'v3.0.0', 'v4.0.0']) Version(id='v6.0.0', sha='x', status=Status.ready, parent=library_key).put() latest_changed = VersionCache.update(library_key) self.assertTrue(latest_changed) versions = yield Library.versions_for_key_async(library_key) self.assertEqual(versions, ['v1.0.0', 'v2.0.0', 'v3.0.0', 'v4.0.0', 'v6.0.0'])
def test_update_collection(self): library_key = Library(id='org/repo', tags=['v0.0.1'], collection_sequence_number=1, kind='collection', spdx_identifier='MIT').put() Version(id='v0.0.1', parent=library_key, sha="old", status=Status.ready).put() self.respond_to_github('https://api.github.com/repos/org/repo', {'status': 304}) self.respond_to_github('https://api.github.com/repos/org/repo/contributors', {'status': 304}) self.respond_to_github('https://api.github.com/repos/org/repo/stats/participation', '{}') self.respond_to_github('https://api.github.com/repos/org/repo/git/refs/heads/master', """{ "ref": "refs/heads/master", "object": {"sha": "new-master-sha"} }""") response = self.app.get(util.update_library_task('org/repo'), headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) library = library_key.get() self.assertEqual(library.error, None) self.assertEqual(library.status, Status.ready) tasks = self.tasks.get_filtered_tasks() self.assertEqual([ util.ingest_analysis_task('org', 'repo', 'v0.0.2', 'new-master-sha'), util.ingest_version_task('org', 'repo', 'v0.0.2'), ], [task.url for task in tasks]) version = Version.get_by_id('v0.0.2', parent=library_key) self.assertEqual(version.sha, 'new-master-sha') self.assertEqual(version.status, Status.pending)
def handle_get(self): keys = ( Library.query().filter(Library.kind == 'element') # pylint: disable=singleton-comparison .filter(Library.shallow_ingestion == False).filter( Library.status == Status.ready).fetch( keys_only=True, read_policy=ndb.EVENTUAL_CONSISTENCY)) elements = Sitemap(id='elements') elements.pages = [key.id() for key in keys] elements.put() logging.info('%d elements', len(elements.pages)) keys = ( Library.query().filter(Library.kind == 'collection') # pylint: disable=singleton-comparison .filter(Library.shallow_ingestion == False).filter( Library.status == Status.ready).fetch( keys_only=True, read_policy=ndb.EVENTUAL_CONSISTENCY)) collections = Sitemap(id='collections') collections.pages = [key.id() for key in keys] collections.put() logging.info('%d collections', len(collections.pages)) keys = Author.query().fetch(keys_only=True, read_policy=ndb.EVENTUAL_CONSISTENCY) authors = Sitemap(id='authors') authors.pages = [key.id() for key in keys] authors.put() logging.info('%d authors', len(authors.pages))
def get(self, owner, repo, version=None): self.response.headers['Access-Control-Allow-Origin'] = '*' self.response.headers['Content-Type'] = 'application/json' library_key = ndb.Key(Library, Library.id(owner, repo)) if version is None: version = yield Library.default_version_for_key_async(library_key) if version is None: self.response.set_status(404) return version_key = ndb.Key(Library, library_key.id(), Version, version) collection_versions = yield Version.collections_for_key_async(version_key) collection_futures = [] for collection_version in collection_versions: collection_futures.append(LibraryMetadata.brief_async(collection_version.key.parent(), collection_version.key.id())) collections = [] for future in collection_futures: collection_result = yield future if collection_result is not None: collections.append(collection_result) result = { 'results': collections, 'count': len(collections), } self.response.write(json.dumps(result))
def test_normal(self): headers = {'X-Github-Event': 'pull_request'} payload = { 'action': 'opened', 'repository': { 'owner': {'login': '******'}, 'name': 'repo', 'full_name': 'owner/repo' }, 'pull_request': { 'head': { 'sha': 'sha', 'repo': { 'owner': {'login': '******'}, 'name': 'pull_repo', 'full_name': 'pull_owner/pull_repo' } }, 'url': 'github_pr_url' } } library = Library(id='owner/repo') library.put() self.respond_to('https://api.github.com/repos/owner/repo/statuses', {'status': 201}) self.app.post('/api/preview-event', params=json.dumps(payload), headers=headers, status=200) tasks = self.tasks.get_filtered_tasks() self.assertEqual(len(tasks), 1)
def test_renamed_repo_is_renamed(self): library = Library(id='org/repo', metadata_etag='a', contributors_etag='b', tags_etag='c', tag_map='{}', spdx_identifier='MIT') library.put() self.respond_to_github( 'https://api.github.com/repos/org/repo', json.dumps({ "name": "newname", "owner": { "login": "******" }, })) response = self.app.get('/task/update/org/repo', headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) library = library.key.get() self.assertIsNone(library) tasks = self.tasks.get_filtered_tasks() self.assertEqual([ util.ensure_library_task('newowner', 'newname'), ], [task.url for task in tasks])
def handle_get(self, owner, repo, latest=False): self.init_library(owner, repo) if self.library is None: self.response.set_status(404) self.response.write('could not find library: %s' % Library.id(owner, repo)) return if latest: version_id = Library.default_version_for_key_async( self.library.key).get_result() if version_id: version = Version.get_by_id(version_id, parent=self.library.key) if version is not None: self.trigger_analysis(version_id, version.sha, transactional=False) else: versions = Version.query(Version.status == Status.ready, ancestor=self.library.key).fetch() for version in versions: self.trigger_analysis(version.key.id(), version.sha, transactional=False)
def get(self, owner, repo, version=None): self.response.headers['Access-Control-Allow-Origin'] = '*' self.response.headers['Content-Type'] = 'application/json' library_key = ndb.Key(Library, Library.id(owner, repo)) if version is None: version = yield Library.default_version_for_key_async(library_key) if version is None: self.response.set_status(404) return version_key = ndb.Key(Library, library_key.id(), Version, version) bower = yield Content.get_by_id_async('bower', parent=version_key) if bower is None: self.response.set_status(404) return bower_json = bower.get_json() bower_dependencies = bower_json.get('dependencies', {}) dependencies = [] version_futures = [] for name in bower_dependencies.keys(): dependency = Dependency.from_string(bower_dependencies[name]) if dependency is None: continue dependencies.append(dependency) dependency_library_key = ndb.Key(Library, Library.id(dependency.owner, dependency.repo)) version_futures.append(Library.versions_for_key_async(dependency_library_key)) dependency_futures = [] for i, dependency in enumerate(dependencies): versions = yield version_futures[i] def matches(version, spec): try: return versiontag.match(version, spec) except ValueError: # FIXME: What other cases do we need to support here? return False while len(versions) > 0 and not matches(versions[-1], dependency.version): versions.pop() if len(versions) > 0: dependency_library_key = ndb.Key(Library, Library.id(dependency.owner.lower(), dependency.repo.lower())) dependency_futures.append(LibraryMetadata.brief_async(dependency_library_key, versions[-1])) results = [] for future in dependency_futures: dependency_result = yield future if dependency_result is not None: results.append(dependency_result) result = { 'results': results, 'count': len(results), } self.response.write(json.dumps(result))
def test_ensure_when_present(self): Library(id=Library.id('owner', 'repo')).put() response = self.app.get(util.ensure_library_task('owner', 'repo'), headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) tasks = self.tasks.get_filtered_tasks() self.assertEqual([], [task.url for task in tasks])
def init_library(self, owner, repo, kind=None, create=True): self.owner = owner.lower() self.repo = repo.lower() if create: assert kind is not None self.library = Library.maybe_create_with_kind(self.owner, self.repo, kind) else: self.library = Library.get_by_id('%s/%s' % (owner, repo))
def update_metadata(self): headers = {'Accept': 'application/vnd.github.drax-preview+json'} response = util.github_get('repos', self.owner, self.repo, etag=self.library.metadata_etag, headers=headers) if response.status_code == 200: try: metadata = json.loads(response.content) except ValueError: return self.error("could not parse metadata") repo = metadata.get('name', '').lower() owner = metadata.get('owner', {}).get('login', '').lower() if repo != '' and owner != '' and (repo != self.repo or owner != self.owner): logging.info('deleting renamed repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) task_url = util.ensure_library_task(owner, repo) util.new_task(task_url, target='manage') raise RequestAborted('repo has been renamed to %s', Library.id(owner, repo)) self.library.metadata = response.content self.library.metadata_etag = response.headers.get('ETag', None) self.library.metadata_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 404: logging.info('deleting non-existing repo %s', Library.id(self.owner, self.repo)) delete_library(self.library.key) raise RequestAborted('repo no longer exists') elif response.status_code != 304: return self.retry('could not update repo metadata (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'contributors', etag=self.library.contributors_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse contributors") self.library.contributors = response.content self.library.contributors_etag = response.headers.get('ETag', None) self.library.contributors_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code != 304: return self.retry('could not update contributors (%d)' % response.status_code) response = util.github_get('repos', self.owner, self.repo, 'stats/participation ', etag=self.library.participation_etag) if response.status_code == 200: try: json.loads(response.content) except ValueError: return self.error("could not parse stats/participation") self.library.participation = response.content self.library.participation_etag = response.headers.get('ETag', None) self.library.participation_updated = datetime.datetime.now() self.library_dirty = True elif response.status_code == 202: # GitHub is "computing" the data. We'll try again next update cycle. # TODO: Alternatively we could retry this task pass elif response.status_code != 304: return self.retry('could not update stats/participation (%d)' % response.status_code)
def test_from_url(self): self.assertEqual(Library.github_from_url('owner/repo'), ('owner', 'repo')) self.assertEqual( Library.github_from_url('git+https://github.com/owner/repo.git'), ('owner', 'repo')) self.assertEqual( Library.github_from_url('git://github.com/owner/repo.git'), ('owner', 'repo'))
def init_library(self, owner, repo, create=True): self.owner = owner.lower() self.repo = repo.lower() if create: self.library = Library.get_or_insert(Library.id(owner, repo)) self.is_new = self.library.metadata is None and self.library.error is None else: self.library = Library.get_by_id(Library.id(owner, repo)) if self.library.status == Status.suppressed: raise RequestAborted('library is suppressed')
def test_update_suppressed_is_noop(self): library = Library(id='org/repo', status=Status.suppressed, spdx_identifier='MIT') library.put() response = self.app.get('/task/update/org/repo', headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) tasks = self.tasks.get_filtered_tasks() self.assertEqual(len(tasks), 0) library = library.key.get() self.assertEqual(library.status, Status.suppressed)
def init_library(self, scope, package, create=True): self.scope = scope.lower() self.package = package.lower() if create: self.library = Library.get_or_insert(Library.id(self.scope, self.package)) self.is_new = self.library.metadata is None and self.library.error is None else: self.library = Library.get_by_id(Library.id(self.scope, self.package)) if self.library.status == Status.suppressed: raise RequestAborted('library is suppressed')
def test_update_respects_304(self): library = Library(id='org/repo', metadata_etag='a', contributors_etag='b', tags_etag='c', tag_map='{}', spdx_identifier='MIT') library.put() self.respond_to_github('https://api.github.com/repos/org/repo', {'status': 304}) self.respond_to_github('https://api.github.com/repos/org/repo/contributors', {'status': 304}) self.respond_to_github('https://api.github.com/repos/org/repo/tags', {'status': 304}) self.respond_to_github('https://api.github.com/repos/org/repo/stats/participation', '{}') response = self.app.get('/task/update/org/repo', headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) tasks = self.tasks.get_filtered_tasks() self.assertEqual(len(tasks), 0)
def test_update_all(self): library_key = Library(id='owner/repo').put() author_key = Author(id='owner').put() response = self.app.get('/manage/update-all', headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) tasks = self.tasks.get_filtered_tasks() self.assertEqual([ util.update_library_task(library_key.id()), util.update_author_task(author_key.id()), ], [task.url for task in tasks])
def test_update_deletes_missing_repo(self): library = Library(id='org/repo', metadata_etag='a', contributors_etag='b', tags_etag='c', spdx_identifier='MIT') library.put() version = Version(parent=library.key, id='v1.0.0', sha='lol') version.put() self.respond_to_github('https://api.github.com/repos/org/repo', {'status': 404}) response = self.app.get('/task/update/org/repo', headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) version = version.key.get() library = library.key.get() self.assertIsNone(library) self.assertIsNone(version)
def test_ingest_preview(self): self.respond_to_github('https://api.github.com/repos/org/repo', '{"owner":{"login":"******"},"name":"repo"}') self.respond_to_github( 'https://api.github.com/repos/org/repo/contributors', '["a"]') self.respond_to_github( 'https://api.github.com/repos/org/repo/stats/participation', '{}') self.respond_to_github( 'https://raw.githubusercontent.com/org/repo/master/bower.json', '{"license": "MIT"}') response = self.app.get(util.ingest_preview_task('org', 'repo'), params={ 'commit': 'commit-sha', 'url': 'url' }, headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) library = Library.get_by_id('org/repo') self.assertIsNotNone(library) self.assertIsNone(library.error) self.assertTrue(library.shallow_ingestion) version = Version.get_by_id('commit-sha', parent=library.key) self.assertEquals(version.status, Status.pending) self.assertEquals(version.sha, 'commit-sha') self.assertEquals(version.url, 'url') self.assertTrue(version.preview) tasks = self.tasks.get_filtered_tasks() self.assertEqual(len(tasks), 2) self.assertEqual([ util.ingest_analysis_task('org', 'repo', 'commit-sha'), util.ingest_version_task('org', 'repo', 'commit-sha'), ], [task.url for task in tasks])
def test_ingest_element_no_versions(self): self.respond_to_github( 'https://raw.githubusercontent.com/org/repo/master/bower.json', '{"license": "MIT"}') self.respond_to_github('https://api.github.com/repos/org/repo', '{"owner":{"login":"******"},"name":"repo"}') self.respond_to_github( 'https://api.github.com/repos/org/repo/contributors', '["a"]') self.respond_to_github('https://api.github.com/repos/org/repo/tags', '''[]''') self.respond_to_github( 'https://api.github.com/repos/org/repo/stats/participation', '{}') response = self.app.get(util.ingest_library_task('org', 'repo'), headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) library = Library.get_by_id('org/repo') self.assertIsNotNone(library) self.assertIsNotNone(library.error) self.assertIsNotNone(json.loads(library.error).get('code', None)) self.assertEqual(library.metadata, '{"owner":{"login":"******"},"name":"repo"}') self.assertEqual(library.contributors, '["a"]') self.assertEqual(library.tags, []) tasks = self.tasks.get_filtered_tasks() self.assertEqual([ util.ensure_author_task('org'), ], [task.url for task in tasks])
def test_update_doesnt_ingest_older_versions(self): library_key = Library(id='org/repo', tags=['v0.1.0', 'v1.0.0', 'v2.0.0'], spdx_identifier='MIT').put() Version(id='v1.0.0', parent=library_key, sha="old", status=Status.ready).put() VersionCache.update(library_key) self.respond_to_github('https://api.github.com/repos/org/repo', {'status': 304}) self.respond_to_github( 'https://api.github.com/repos/org/repo/contributors', {'status': 304}) self.respond_to_github( 'https://api.github.com/repos/org/repo/tags', """[ {"name": "v0.5.0", "commit": {"sha": "new"}}, {"name": "v1.0.0", "commit": {"sha": "old"}} ]""") self.respond_to_github( 'https://api.github.com/repos/org/repo/stats/participation', '{}') response = self.app.get(util.update_library_task('org/repo'), headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) tasks = self.tasks.get_filtered_tasks() self.assertEqual([], [task.url for task in tasks])
def test_analyze_leaves_existing_content_when_reanalyzing(self): library_key = Library(id='owner/repo').put() version_key = Version(id='v1.1.1', parent=library_key, sha='sha', status='ready').put() content = Content(id='analysis', parent=version_key, status=Status.pending) content.content = 'existing data' content.status = Status.ready content.put() response = self.app.get('/task/analyze/owner/repo', headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) content = Content.get_by_id('analysis', parent=version_key) self.assertEqual(content.content, 'existing data') self.assertEqual(content.status, Status.ready) tasks = self.tasks.get_filtered_tasks() self.assertEqual([ util.ingest_analysis_task('owner', 'repo', 'v1.1.1'), ], [task.url for task in tasks])
def test_ingest_element(self): self.respond_to_github('https://raw.githubusercontent.com/org/repo/master/bower.json', '{"license": "MIT"}') self.respond_to_github('https://api.github.com/repos/org/repo', '{"owner":{"login":"******"},"name":"repo"}') self.respond_to_github('https://api.github.com/repos/org/repo/contributors', '["a"]') self.respond_to_github('https://api.github.com/repos/org/repo/tags', '''[{"name": "v0.5.0", "commit": {"sha": "old"}},{"name": "v1.0.0", "commit": {"sha": "lol"}}]''') self.respond_to_github('https://api.github.com/repos/org/repo/stats/participation', '{}') response = self.app.get(util.ingest_library_task('org', 'repo'), headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) library = Library.get_by_id('org/repo') self.assertIsNotNone(library) self.assertIsNone(library.error) self.assertEqual(library.metadata, '{"owner":{"login":"******"},"name":"repo"}') self.assertEqual(library.contributors, '["a"]') self.assertEqual(library.tags, ['v0.5.0', 'v1.0.0']) version = ndb.Key(Library, 'org/repo', Version, 'v1.0.0').get() self.assertIsNotNone(version) self.assertIsNone(version.error) self.assertEqual(version.sha, 'lol') tasks = self.tasks.get_filtered_tasks() self.assertEqual([ util.ingest_analysis_task('org', 'repo', 'v1.0.0'), util.ensure_author_task('org'), util.ingest_version_task('org', 'repo', 'v1.0.0'), ], [task.url for task in tasks])
def test_ingest_collection(self): self.respond_to_github('https://raw.githubusercontent.com/org/repo/master/bower.json', '{"keywords": ["element-collection"], "license": "MIT"}') self.respond_to_github('https://api.github.com/repos/org/repo', '{"owner":{"login":"******"},"name":"repo"}') self.respond_to_github('https://api.github.com/repos/org/repo/contributors', '["a"]') self.respond_to_github('https://api.github.com/repos/org/repo/git/refs/heads/master', '{"ref": "refs/heads/master", "object": {"sha": "master-sha"}}') self.respond_to_github('https://api.github.com/repos/org/repo/stats/participation', '{}') response = self.app.get(util.ingest_library_task('org', 'repo'), headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) library = Library.get_by_id('org/repo') self.assertIsNotNone(library) self.assertIsNone(library.error) self.assertEqual(library.metadata, '{"owner":{"login":"******"},"name":"repo"}') self.assertEqual(library.contributors, '["a"]') self.assertEqual(library.tags, ['v0.0.1']) version = ndb.Key(Library, 'org/repo', Version, 'v0.0.1').get() self.assertIsNone(version.error) self.assertEqual(version.status, Status.pending) self.assertEqual(version.sha, 'master-sha') tasks = self.tasks.get_filtered_tasks() self.assertEqual([ util.ingest_analysis_task('org', 'repo', 'v0.0.1', 'master-sha'), util.ensure_author_task('org'), util.ingest_version_task('org', 'repo', 'v0.0.1'), ], [task.url for task in tasks])
def test_add_element(self): response = self.app.get('/manage/add/element/org/repo') self.assertEqual(response.status_int, 200) self.assertEqual(response.normal_body, 'OK') tasks = self.tasks.get_filtered_tasks() self.assertEqual(len(tasks), 1) self.assertEqual(tasks[0].url, util.ingest_library_task('org', 'repo', 'element')) self.respond_to_github('https://api.github.com/repos/org/repo', 'metadata bits') self.respond_to_github('https://api.github.com/repos/org/repo/contributors', '["a"]') self.respond_to_github('https://api.github.com/repos/org/repo/git/refs/tags', '[{"ref": "refs/tags/v1.0.0", "object": {"sha": "lol"}}]') response = self.app.get(util.ingest_library_task('org', 'repo', 'element')) self.assertEqual(response.status_int, 200) library = Library.get_by_id('org/repo') self.assertIsNotNone(library) self.assertIsNone(library.error) self.assertEqual(library.kind, 'element') self.assertEqual(library.metadata, 'metadata bits') self.assertEqual(library.contributors, '["a"]') self.assertEqual(library.contributor_count, 1) version = ndb.Key(Library, 'org/repo', Version, 'v1.0.0').get() self.assertIsNone(version.error) self.assertEqual(version.sha, 'lol') tasks = self.tasks.get_filtered_tasks() self.assertEqual(len(tasks), 2) self.assertEqual(tasks[1].url, util.ingest_version_task('org', 'repo', 'v1.0.0') + '?latestVersion=True')
def handle_get(self): while True: deleted_something = False for library_key in Library.query().fetch(keys_only=True, limit=10): delete_library(library_key, response_for_logging=self.response) deleted_something = True for author_key in Author.query().fetch(keys_only=True, limit=10): delete_author(author_key, response_for_logging=self.response) deleted_something = True if not deleted_something: break # Delete any remaining entries in the search index. index = search.Index('repo') while True: docs = [ document.doc_id for document in index.get_range(ids_only=True) ] if not docs: break self.response.write('search docs: %s\n' + repr(docs)) index.delete(docs) self.response.write('Finished')
def test_analyzer_index_empty(self): metadata = """{ "full_name": "full-name" }""" library_key = Library(id='owner/repo', metadata=metadata).put() version_key = Version(id='v1.1.1', parent=library_key, sha='sha', status='ready').put() content = Content(id='analysis', parent=version_key, status=Status.pending) data = {"analyzerData": {}} content.json = data content.status = Status.ready content.put() VersionCache.update(library_key) response = self.app.get(util.update_indexes_task('owner', 'repo'), headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) index = search.Index('repo') document = index.get('owner/repo') self.assertIsNotNone(document) self.assertTrue(len(document.fields) > 0) elements = [field for field in document.fields if field.name == 'element'] self.assertEqual(len(elements), 0) behaviors = [field for field in document.fields if field.name == 'behavior'] self.assertEqual(len(behaviors), 0)
def handle_get(self): queue = taskqueue.Queue('update') if queue.fetch_statistics().tasks > 0: self.response.write('update already in progress') return query = Library.query() cursor = None more = True task_count = 0 while more: keys, cursor, more = query.fetch_page(50, keys_only=True, start_cursor=cursor) for key in keys: task_count = task_count + 1 task_url = util.update_library_task(key.id()) util.new_task(task_url, target='manage', queue_name='update') logging.info('triggered %d library updates', task_count) query = Author.query() cursor = None more = True task_count = 0 while more: keys, cursor, more = query.fetch_page(50, keys_only=True, start_cursor=cursor) for key in keys: task_count = task_count + 1 task_url = util.update_author_task(key.id()) util.new_task(task_url, target='manage', queue_name='update') logging.info('triggered %d author updates', task_count)
def handle_get(self, owner, repo, version): # FIXME: Make deletion transactional with check on library that tag is excluded. version_key = ndb.Key(Library, Library.id(owner, repo), Version, version) ndb.delete_multi(ndb.Query(ancestor=version_key).iter(keys_only=True)) if VersionCache.update(version_key.parent()): task_url = util.update_indexes_task(owner, repo) util.new_task(task_url, target='manage')
def handle_get(self): while True: deleted_something = False for library_key in Library.query().fetch(keys_only=True, limit=10): delete_library(library_key, response_for_logging=self.response) deleted_something = True for author_key in Author.query().fetch(keys_only=True, limit=10): delete_author(author_key, response_for_logging=self.response) deleted_something = True if not deleted_something: break # Delete any remaining entries in the search index. index = search.Index('repo') while True: docs = [ document.doc_id for document in index.get_range(ids_only=True)] if not docs: break self.response.write('search docs: %s\n' + repr(docs)) index.delete(docs) self.response.write('Finished')
def test_ingest_preview(self): self.respond_to_github('https://api.github.com/repos/org/repo', '{"owner":{"login":"******"},"name":"repo"}') self.respond_to_github('https://api.github.com/repos/org/repo/contributors', '["a"]') self.respond_to_github('https://api.github.com/repos/org/repo/stats/participation', '{}') self.respond_to_github('https://raw.githubusercontent.com/org/repo/master/bower.json', '{"license": "MIT"}') response = self.app.get(util.ingest_preview_task('org', 'repo'), params={'commit': 'commit-sha', 'url': 'url'}, headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) library = Library.get_by_id('org/repo') self.assertIsNotNone(library) self.assertIsNone(library.error) self.assertTrue(library.shallow_ingestion) version = Version.get_by_id('commit-sha', parent=library.key) self.assertEquals(version.status, Status.pending) self.assertEquals(version.sha, 'commit-sha') self.assertEquals(version.url, 'url') self.assertTrue(version.preview) tasks = self.tasks.get_filtered_tasks() self.assertEqual(len(tasks), 2) self.assertEqual([ util.ingest_analysis_task('org', 'repo', 'commit-sha'), util.ingest_version_task('org', 'repo', 'commit-sha'), ], [task.url for task in tasks])
def get(self, terms): self.response.headers['Access-Control-Allow-Origin'] = '*' scoring = self.request.get('noscore', None) is None include_results = self.request.get('noresults', None) is None include_count = self.request.get('count', None) is not None request_cursor = self.request.get('cursor', None) if not include_results: scoring = False include_count = True try: limit = min(20, int(self.request.get('limit', 20))) except ValueError: self.response.set_status(400) return index = search.Index('repo') cursor = search.Cursor(web_safe_string=request_cursor) try: # Accuracy refers to accurate till n results. accuracy = 2000 if include_count else None sort_options = search.SortOptions(match_scorer=search.MatchScorer()) if scoring else None query_options = search.QueryOptions(limit=limit, number_found_accuracy=accuracy, sort_options=sort_options, cursor=cursor) search_results = index.search(search.Query(query_string=terms, options=query_options)) cursor = search_results.cursor except search.QueryError: self.response.set_status(400) self.response.write('bad query') return count = search_results.number_found if include_results: result_futures = [] for result in search_results.results: (owner, repo) = result.doc_id.split('/') version = None for field in result.fields: if field.name == 'version': version = field.value break library_key = ndb.Key(Library, Library.id(owner, repo)) result_futures.append(LibraryMetadata.brief_async(library_key, version, assume_latest=True)) results = [] for future in result_futures: result = yield future if result is None: # Fixup count when we skip over incomplete entries. count = count - 1 if result is not None: results.append(result) result = { 'cursor': cursor.web_safe_string if cursor and include_results else None, } if include_count: result['count'] = count if include_results: result['results'] = results self.response.headers['Content-Type'] = 'application/json' self.response.write(json.dumps(result))
def test_update_indexes(self): metadata = """{ "full_name": "full-name" }""" collection_library_key = Library(id='my/collection', status=Status.ready, kind='collection', metadata=metadata).put() collection_version_key = Version(id='v1.0.0', parent=collection_library_key, sha='sha', status=Status.ready).put() Content(id='bower', parent=collection_version_key, content="""{"dependencies": { "a": "org/element-1#1.0.0", "b": "org/element-2#1.0.0" }}""").put() VersionCache.update(collection_library_key) response = self.app.get(util.update_indexes_task('my', 'collection'), headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) # Triggers ingestions tasks = self.tasks.get_filtered_tasks() self.assertEqual([ util.ensure_library_task('org', 'element-1'), util.ensure_library_task('org', 'element-2'), ], [task.url for task in tasks]) # Ensures collection references ref1 = CollectionReference.get_by_id(id="my/collection/v1.0.0", parent=ndb.Key(Library, "org/element-1")) self.assertIsNotNone(ref1) ref2 = CollectionReference.get_by_id(id="my/collection/v1.0.0", parent=ndb.Key(Library, "org/element-2")) self.assertIsNotNone(ref2) # Validate search index index = search.Index('repo') document = index.get('my/collection') self.assertIsNotNone(document) self.assertTrue(len(document.fields) > 0)
def test_ingest_version(self): library_key = Library(id='org/repo', metadata='{"full_name": "NSS Bob", "stargazers_count": 420, "subscribers_count": 419, "forks": 418, "updated_at": "2011-8-10T13:47:12Z"}').put() Version(id='v1.0.0', parent=library_key, sha='sha').put() self.respond_to_github(r'https://api.github.com/repos/org/repo/readme\?ref=sha', '{"content":"%s"}' % b64encode('README')) self.respond_to('https://raw.githubusercontent.com/org/repo/sha/bower.json', '{}') self.respond_to_github('https://api.github.com/markdown', '<html>README</html>') response = self.app.get(util.ingest_version_task('org', 'repo', 'v1.0.0'), headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) version = Version.get_by_id('v1.0.0', parent=library_key) self.assertIsNone(version.error) self.assertEqual(version.status, Status.ready) self.assertFalse(version.preview) versions = Library.versions_for_key_async(library_key).get_result() self.assertEqual(['v1.0.0'], versions) readme = ndb.Key(Library, 'org/repo', Version, 'v1.0.0', Content, 'readme').get() self.assertEqual(readme.content, 'README') readme_html = ndb.Key(Library, 'org/repo', Version, 'v1.0.0', Content, 'readme.html').get() self.assertEqual(readme_html.content, '<html>README</html>') bower = ndb.Key(Library, 'org/repo', Version, 'v1.0.0', Content, 'bower').get() self.assertEqual(bower.content, '{}')
def handle_get(self, owner, repo, version): self.owner = owner self.repo = repo self.version = version library_key = ndb.Key(Library, Library.id(owner, repo)) self.version_object = Version.get_by_id(version, parent=library_key) if self.version_object is None: return self.error('Version entity does not exist: %s/%s' % (Library.id(owner, repo), version)) self.sha = self.version_object.sha self.version_key = self.version_object.key self.update_readme() self.update_bower() self.set_ready()
def handle_get(self, scope, package, latest=False): self.init_library(scope, package) if self.library is None: self.response.set_status(404) self.response.write('could not find library: %s' % Library.id(scope, package)) return if latest: version_id = Library.default_version_for_key_async(self.library.key).get_result() if version_id: version = Version.get_by_id(version_id, parent=self.library.key) if version is not None: self.trigger_analysis(version_id, version.sha, transactional=False) else: versions = Version.query(Version.status == Status.ready, ancestor=self.library.key).fetch() for version in versions: self.trigger_analysis(version.key.id(), version.sha, transactional=False)
def handle_get(self, owner, repo, scope, package): library = Library.get_by_id(Library.id(owner, repo)) if library is None: return library.npm_package = scope + '/' + package library.put() # Remove from search indexes. index = search.Index('repo') index.delete(Library.id(owner, repo)) npm_library = Library.get_by_id(Library.id(scope, package)) if npm_library is not None: npm_library.migrated_from_bower = True npm_library.put()
def test_renamed_repo_is_renamed(self): library = Library(id='org/repo', metadata_etag='a', contributors_etag='b', tags_etag='c', tag_map='{}', spdx_identifier='MIT') library.put() self.respond_to_github('https://api.github.com/repos/org/repo', json.dumps({ "name": "newname", "owner": {"login": "******"}, })) response = self.app.get('/task/update/org/repo', headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) library = library.key.get() self.assertIsNone(library) tasks = self.tasks.get_filtered_tasks() self.assertEqual([ util.ensure_library_task('newowner', 'newname'), ], [task.url for task in tasks])
def test_delete_version(self): library_key = ndb.Key(Library, 'owner/repo') version_key = Version(id='v1.0.0', parent=library_key, sha='1', status=Status.ready).put() VersionCache.update(library_key) response = self.app.get('/task/delete/owner/repo/v1.0.0', headers={'X-AppEngine-QueueName': 'default'}) self.assertEqual(response.status_int, 200) version = version_key.get() self.assertIsNone(version) self.assertEqual(Library.versions_for_key_async(library_key).get_result(), [])
def handle_get(self, owner, repo): self.init_library(owner, repo) if self.library is None: self.response.set_status(404) self.response.write('could not find library: %s' % Library.id(owner, repo)) return versions = Version.query(Version.status == Status.ready, ancestor=self.library.key).fetch() for version in versions: self.trigger_analysis(version.key.id(), version.sha, transactional=False)
def handle_get(self, owner, repo): index = search.Index('repo') document = index.get(Library.id(owner, repo)) if document is None: self.response.set_status(404) return for field in document.fields: self.response.write('%s: %s<br>' % (field.name, field.value)) self.response.write('rank: %s<br>' % (document.rank))
def test_versions_for_key(self): library_key = ndb.Key(Library, 'a/b') Version(id='v2.0.0', sha='x', status=Status.ready, parent=library_key).put() Version(id='v1.0.0', sha='x', status=Status.ready, parent=library_key).put() Version(id='v3.0.0', sha='x', status=Status.ready, parent=library_key).put() Version(id='v3.0.X', sha='x', status=Status.ready, parent=library_key).put() Version(id='v4.0.0', sha='x', status=Status.error, parent=library_key).put() Version(id='v5.0.0', sha='x', status=Status.pending, parent=library_key).put() Version(id='xxx', sha='x', status=Status.ready, parent=library_key).put() versions = yield Library.uncached_versions_for_key_async(library_key) self.assertEqual(versions, ['v1.0.0', 'v2.0.0', 'v3.0.0'])
def update_collection_dependencies(self, collection_version_key, bower): dependencies = bower.get('dependencies', {}) for name in dependencies.keys(): dep = Dependency.from_string(dependencies[name]) if dep is None: continue library_key = ndb.Key(Library, Library.id(dep.owner, dep.repo)) CollectionReference.ensure(library_key, collection_version_key, semver=dep.version) task_url = util.ensure_library_task(dep.owner.lower(), dep.repo.lower()) util.new_task(task_url, target='manage')